diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 6ca0fac8..00000000 --- a/.coveragerc +++ /dev/null @@ -1,2 +0,0 @@ -[run] -omit = datasette/_version.py, datasette/utils/shutil_backport.py diff --git a/.dockerignore b/.dockerignore index 5078bf47..938173e9 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,11 +3,10 @@ .eggs .gitignore .ipynb_checkpoints +.travis.yml build *.spec *.egg-info dist scratchpad venv -*.db -*.sqlite diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs deleted file mode 100644 index 84e574fd..00000000 --- a/.git-blame-ignore-revs +++ /dev/null @@ -1,4 +0,0 @@ -# Applying Black -35d6ee2790e41e96f243c1ff58be0c9c0519a8ce -368638555160fb9ac78f462d0f79b1394163fa30 -2b344f6a34d2adaa305996a1a580ece06397f6e4 diff --git a/.gitattributes b/.gitattributes index 744258eb..e5e5865f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ +datasette/_version.py export-subst datasette/static/codemirror-* linguist-vendored diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml deleted file mode 100644 index f0bcdbe0..00000000 --- a/.github/FUNDING.yml +++ /dev/null @@ -1 +0,0 @@ -github: [simonw] diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 88bb03b1..00000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,11 +0,0 @@ -version: 2 -updates: -- package-ecosystem: pip - directory: "/" - schedule: - interval: daily - time: "13:00" - groups: - python-packages: - patterns: - - "*" diff --git a/.github/workflows/deploy-branch-preview.yml b/.github/workflows/deploy-branch-preview.yml deleted file mode 100644 index e56d9c27..00000000 --- a/.github/workflows/deploy-branch-preview.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Deploy a Datasette branch preview to Vercel - -on: - workflow_dispatch: - inputs: - branch: - description: "Branch to deploy" - required: true - type: string - -jobs: - deploy-branch-preview: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.11 - uses: actions/setup-python@v6 - with: - python-version: "3.11" - - name: Install dependencies - run: | - pip install datasette-publish-vercel - - name: Deploy the preview - env: - VERCEL_TOKEN: ${{ secrets.BRANCH_PREVIEW_VERCEL_TOKEN }} - run: | - export BRANCH="${{ github.event.inputs.branch }}" - wget https://latest.datasette.io/fixtures.db - datasette publish vercel fixtures.db \ - --branch $BRANCH \ - --project "datasette-preview-$BRANCH" \ - --token $VERCEL_TOKEN \ - --scope datasette \ - --about "Preview of $BRANCH" \ - --about_url "https://github.com/simonw/datasette/tree/$BRANCH" diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml deleted file mode 100644 index 9f53b01e..00000000 --- a/.github/workflows/deploy-latest.yml +++ /dev/null @@ -1,132 +0,0 @@ -name: Deploy latest.datasette.io - -on: - workflow_dispatch: - push: - branches: - - main - # - 1.0-dev - -permissions: - contents: read - -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - name: Check out datasette - uses: actions/checkout@v5 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: "3.13" - cache: pip - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - python -m pip install -e .[test] - python -m pip install -e .[docs] - python -m pip install sphinx-to-sqlite==0.1a1 - - name: Run tests - if: ${{ github.ref == 'refs/heads/main' }} - run: | - pytest -n auto -m "not serial" - pytest -m "serial" - - name: Build fixtures.db and other files needed to deploy the demo - run: |- - python tests/fixtures.py \ - fixtures.db \ - fixtures-config.json \ - fixtures-metadata.json \ - plugins \ - --extra-db-filename extra_database.db - - name: Build docs.db - if: ${{ github.ref == 'refs/heads/main' }} - run: |- - cd docs - DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build - sphinx-to-sqlite ../docs.db _build - cd .. - - name: Set up the alternate-route demo - run: | - echo ' - from datasette import hookimpl - - @hookimpl - def startup(datasette): - db = datasette.get_database("fixtures2") - db.route = "alternative-route" - ' > plugins/alternative_route.py - cp fixtures.db fixtures2.db - - name: And the counters writable canned query demo - run: | - cat > plugins/counters.py < metadata.json - # cat metadata.json - - id: auth - name: Authenticate to Google Cloud - uses: google-github-actions/auth@v3 - with: - credentials_json: ${{ secrets.GCP_SA_KEY }} - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v3 - - name: Deploy to Cloud Run - env: - LATEST_DATASETTE_SECRET: ${{ secrets.LATEST_DATASETTE_SECRET }} - run: |- - gcloud config set run/region us-central1 - gcloud config set project datasette-222320 - export SUFFIX="-${GITHUB_REF#refs/heads/}" - export SUFFIX=${SUFFIX#-main} - # Replace 1.0 with one-dot-zero in SUFFIX - export SUFFIX=${SUFFIX//1.0/one-dot-zero} - datasette publish cloudrun fixtures.db fixtures2.db extra_database.db \ - -m fixtures-metadata.json \ - --plugins-dir=plugins \ - --branch=$GITHUB_SHA \ - --version-note=$GITHUB_SHA \ - --extra-options="--setting template_debug 1 --setting trace_debug 1 --crossdb" \ - --install 'datasette-ephemeral-tables>=0.2.2' \ - --service "datasette-latest$SUFFIX" \ - --secret $LATEST_DATASETTE_SECRET - - name: Deploy to docs as well (only for main) - if: ${{ github.ref == 'refs/heads/main' }} - run: |- - # Deploy docs.db to a different service - datasette publish cloudrun docs.db \ - --branch=$GITHUB_SHA \ - --version-note=$GITHUB_SHA \ - --extra-options="--setting template_debug 1" \ - --service=datasette-docs-latest diff --git a/.github/workflows/documentation-links.yml b/.github/workflows/documentation-links.yml deleted file mode 100644 index a54bd83a..00000000 --- a/.github/workflows/documentation-links.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: Read the Docs Pull Request Preview -on: - pull_request_target: - types: - - opened - -permissions: - pull-requests: write - -jobs: - documentation-links: - runs-on: ubuntu-latest - steps: - - uses: readthedocs/actions/preview@v1 - with: - project-slug: "datasette" diff --git a/.github/workflows/prettier.yml b/.github/workflows/prettier.yml deleted file mode 100644 index 77cce7d1..00000000 --- a/.github/workflows/prettier.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Check JavaScript for conformance with Prettier - -on: [push] - -permissions: - contents: read - -jobs: - prettier: - runs-on: ubuntu-latest - steps: - - name: Check out repo - uses: actions/checkout@v4 - - uses: actions/cache@v4 - name: Configure npm caching - with: - path: ~/.npm - key: ${{ runner.OS }}-npm-${{ hashFiles('**/package-lock.json') }} - restore-keys: | - ${{ runner.OS }}-npm- - - name: Install dependencies - run: npm ci - - name: Run prettier - run: |- - npm run prettier -- --check diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index e94d0bdd..00000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,109 +0,0 @@ -name: Publish Python Package - -on: - release: - types: [created] - -permissions: - contents: read - -jobs: - test: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - cache: pip - cache-dependency-path: pyproject.toml - - name: Install dependencies - run: | - pip install -e '.[test]' - - name: Run tests - run: | - pytest - - deploy: - runs-on: ubuntu-latest - needs: [test] - environment: release - permissions: - id-token: write - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: '3.13' - cache: pip - cache-dependency-path: pyproject.toml - - name: Install dependencies - run: | - pip install setuptools wheel build - - name: Build - run: | - python -m build - - name: Publish - uses: pypa/gh-action-pypi-publish@release/v1 - - deploy_static_docs: - runs-on: ubuntu-latest - needs: [deploy] - if: "!github.event.release.prerelease" - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: '3.10' - cache: pip - cache-dependency-path: pyproject.toml - - name: Install dependencies - run: | - python -m pip install -e .[docs] - python -m pip install sphinx-to-sqlite==0.1a1 - - name: Build docs.db - run: |- - cd docs - DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build - sphinx-to-sqlite ../docs.db _build - cd .. - - id: auth - name: Authenticate to Google Cloud - uses: google-github-actions/auth@v2 - with: - credentials_json: ${{ secrets.GCP_SA_KEY }} - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v3 - - name: Deploy stable-docs.datasette.io to Cloud Run - run: |- - gcloud config set run/region us-central1 - gcloud config set project datasette-222320 - datasette publish cloudrun docs.db \ - --service=datasette-docs-stable - - deploy_docker: - runs-on: ubuntu-latest - needs: [deploy] - if: "!github.event.release.prerelease" - steps: - - uses: actions/checkout@v4 - - name: Build and push to Docker Hub - env: - DOCKER_USER: ${{ secrets.DOCKER_USER }} - DOCKER_PASS: ${{ secrets.DOCKER_PASS }} - run: |- - sleep 60 # Give PyPI time to make the new release available - docker login -u $DOCKER_USER -p $DOCKER_PASS - export REPO=datasetteproject/datasette - docker build -f Dockerfile \ - -t $REPO:${GITHUB_REF#refs/tags/} \ - --build-arg VERSION=${GITHUB_REF#refs/tags/} . - docker tag $REPO:${GITHUB_REF#refs/tags/} $REPO:latest - docker push $REPO:${GITHUB_REF#refs/tags/} - docker push $REPO:latest diff --git a/.github/workflows/push_docker_tag.yml b/.github/workflows/push_docker_tag.yml deleted file mode 100644 index afe8d6b2..00000000 --- a/.github/workflows/push_docker_tag.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Push specific Docker tag - -on: - workflow_dispatch: - inputs: - version_tag: - description: Tag to build and push - -permissions: - contents: read - -jobs: - deploy_docker: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Build and push to Docker Hub - env: - DOCKER_USER: ${{ secrets.DOCKER_USER }} - DOCKER_PASS: ${{ secrets.DOCKER_PASS }} - VERSION_TAG: ${{ github.event.inputs.version_tag }} - run: |- - docker login -u $DOCKER_USER -p $DOCKER_PASS - export REPO=datasetteproject/datasette - docker build -f Dockerfile \ - -t $REPO:${VERSION_TAG} \ - --build-arg VERSION=${VERSION_TAG} . - docker push $REPO:${VERSION_TAG} diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml deleted file mode 100644 index 7c5370ce..00000000 --- a/.github/workflows/spellcheck.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Check spelling in documentation - -on: [push, pull_request] - -permissions: - contents: read - -jobs: - spellcheck: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: '3.11' - cache: 'pip' - cache-dependency-path: '**/pyproject.toml' - - name: Install dependencies - run: | - pip install -e '.[docs]' - - name: Check spelling - run: | - codespell README.md --ignore-words docs/codespell-ignore-words.txt - codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt - codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt - codespell tests --ignore-words docs/codespell-ignore-words.txt diff --git a/.github/workflows/stable-docs.yml b/.github/workflows/stable-docs.yml deleted file mode 100644 index 3119d617..00000000 --- a/.github/workflows/stable-docs.yml +++ /dev/null @@ -1,76 +0,0 @@ -name: Update Stable Docs - -on: - release: - types: [published] - push: - branches: - - main - -permissions: - contents: write - -jobs: - update_stable_docs: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v5 - with: - fetch-depth: 0 # We need all commits to find docs/ changes - - name: Set up Git user - run: | - git config user.name "Automated" - git config user.email "actions@users.noreply.github.com" - - name: Create stable branch if it does not yet exist - run: | - if ! git ls-remote --heads origin stable | grep -qE '\bstable\b'; then - # Make sure we have all tags locally - git fetch --tags --quiet - - # Latest tag that is just numbers and dots (optionally prefixed with 'v') - # e.g., 0.65.2 or v0.65.2 — excludes 1.0a20, 1.0-rc1, etc. - LATEST_RELEASE=$( - git tag -l --sort=-v:refname \ - | grep -E '^v?[0-9]+(\.[0-9]+){1,3}$' \ - | head -n1 - ) - - git checkout -b stable - - # If there are any stable releases, copy docs/ from the most recent - if [ -n "$LATEST_RELEASE" ]; then - rm -rf docs/ - git checkout "$LATEST_RELEASE" -- docs/ || true - fi - - git commit -m "Populate docs/ from $LATEST_RELEASE" || echo "No changes" - git push -u origin stable - fi - - name: Handle Release - if: github.event_name == 'release' && !github.event.release.prerelease - run: | - git fetch --all - git checkout stable - git reset --hard ${GITHUB_REF#refs/tags/} - git push origin stable --force - - name: Handle Commit to Main - if: contains(github.event.head_commit.message, '!stable-docs') - run: | - git fetch origin - git checkout -b stable origin/stable - # Get the list of modified files in docs/ from the current commit - FILES=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} -- docs/) - # Check if the list of files is non-empty - if [[ -n "$FILES" ]]; then - # Checkout those files to the stable branch to over-write with their contents - for FILE in $FILES; do - git checkout ${{ github.sha }} -- $FILE - done - git add docs/ - git commit -m "Doc changes from ${{ github.sha }}" - git push origin stable - else - echo "No changes to docs/ in this commit." - exit 0 - fi diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml deleted file mode 100644 index 8d73b64d..00000000 --- a/.github/workflows/test-coverage.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Calculate test coverage - -on: - push: - branches: - - main - pull_request: - branches: - - main -permissions: - contents: read - -jobs: - test: - runs-on: ubuntu-latest - steps: - - name: Check out datasette - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: '3.12' - cache: 'pip' - cache-dependency-path: '**/pyproject.toml' - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - python -m pip install -e .[test] - python -m pip install pytest-cov - - name: Run tests - run: |- - ls -lah - cat .coveragerc - pytest -m "not serial" --cov=datasette --cov-config=.coveragerc --cov-report xml:coverage.xml --cov-report term -x - ls -lah - - name: Upload coverage report - uses: codecov/codecov-action@v1 - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: coverage.xml diff --git a/.github/workflows/test-pyodide.yml b/.github/workflows/test-pyodide.yml deleted file mode 100644 index b490a9bf..00000000 --- a/.github/workflows/test-pyodide.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Test in Pyodide with shot-scraper - -on: - push: - pull_request: - workflow_dispatch: - -permissions: - contents: read - -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.10 - uses: actions/setup-python@v6 - with: - python-version: "3.10" - cache: 'pip' - cache-dependency-path: '**/pyproject.toml' - - name: Cache Playwright browsers - uses: actions/cache@v4 - with: - path: ~/.cache/ms-playwright/ - key: ${{ runner.os }}-browsers - - name: Install Playwright dependencies - run: | - pip install shot-scraper build - shot-scraper install - - name: Run test - run: | - ./test-in-pyodide-with-shot-scraper.sh diff --git a/.github/workflows/test-sqlite-support.yml b/.github/workflows/test-sqlite-support.yml deleted file mode 100644 index 76ea138a..00000000 --- a/.github/workflows/test-sqlite-support.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: Test SQLite versions - -on: [push, pull_request] - -permissions: - contents: read - -jobs: - test: - runs-on: ${{ matrix.platform }} - continue-on-error: true - strategy: - matrix: - platform: [ubuntu-latest] - python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] - sqlite-version: [ - #"3", # latest version - "3.46", - #"3.45", - #"3.27", - #"3.26", - "3.25", - #"3.25.3", # 2018-09-25, window functions breaks test_upsert for some reason on 3.10, skip for now - #"3.24", # 2018-06-04, added UPSERT support - #"3.23.1" # 2018-04-10, before UPSERT - ] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - allow-prereleases: true - cache: pip - cache-dependency-path: pyproject.toml - - name: Set up SQLite ${{ matrix.sqlite-version }} - uses: asg017/sqlite-versions@71ea0de37ae739c33e447af91ba71dda8fcf22e6 - with: - version: ${{ matrix.sqlite-version }} - cflags: "-DSQLITE_ENABLE_DESERIALIZE -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_FTS4 -DSQLITE_ENABLE_FTS3_PARENTHESIS -DSQLITE_ENABLE_RTREE -DSQLITE_ENABLE_JSON1" - - run: python3 -c "import sqlite3; print(sqlite3.sqlite_version)" - - run: echo $LD_LIBRARY_PATH - - name: Build extension for --load-extension test - run: |- - (cd tests && gcc ext.c -fPIC -shared -o ext.so) - - name: Install dependencies - run: | - pip install -e '.[test]' - pip freeze - - name: Run tests - run: | - pytest -n auto -m "not serial" - pytest -m "serial" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 1e5e03d2..00000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Test - -on: [push, pull_request] - -permissions: - contents: read - -jobs: - test: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - allow-prereleases: true - cache: pip - cache-dependency-path: pyproject.toml - - name: Build extension for --load-extension test - run: |- - (cd tests && gcc ext.c -fPIC -shared -o ext.so) - - name: Install dependencies - run: | - pip install -e '.[test]' - pip freeze - - name: Run tests - run: | - pytest -n auto -m "not serial" - pytest -m "serial" - # And the test that exceeds a localhost HTTPS server - tests/test_datasette_https_server.sh - - name: Install docs dependencies - run: | - pip install -e '.[docs]' - - name: Black - run: black --check . - - name: Check if cog needs to be run - run: | - cog --check docs/*.rst - - name: Check if blacken-docs needs to be run - run: | - # This fails on syntax errors, or a diff was applied - blacken-docs -l 60 docs/*.rst - - name: Test DATASETTE_LOAD_PLUGINS - run: | - pip install datasette-init datasette-json-html - tests/test-datasette-load-plugins.sh diff --git a/.github/workflows/tmate-mac.yml b/.github/workflows/tmate-mac.yml deleted file mode 100644 index fcee0f21..00000000 --- a/.github/workflows/tmate-mac.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: tmate session mac - -on: - workflow_dispatch: - -permissions: - contents: read - -jobs: - build: - runs-on: macos-latest - steps: - - uses: actions/checkout@v2 - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3 diff --git a/.github/workflows/tmate.yml b/.github/workflows/tmate.yml deleted file mode 100644 index 123f6c71..00000000 --- a/.github/workflows/tmate.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: tmate session - -on: - workflow_dispatch: - -permissions: - contents: read - models: read - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 70e6bbeb..47418755 100644 --- a/.gitignore +++ b/.gitignore @@ -5,9 +5,6 @@ scratchpad .vscode -uv.lock -data.db - # We don't use Pipfile, so ignore them Pipfile Pipfile.lock @@ -119,11 +116,3 @@ ENV/ # macOS files .DS_Store -node_modules -.*.swp - -# In case someone compiled tests/ext.c for test_load_extensions, don't -# include it in source control. -tests/*.dylib -tests/*.so -tests/*.dll diff --git a/.prettierrc b/.prettierrc deleted file mode 100644 index 222861c3..00000000 --- a/.prettierrc +++ /dev/null @@ -1,4 +0,0 @@ -{ - "tabWidth": 2, - "useTabs": false -} diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 5b30e75a..00000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 - -build: - os: ubuntu-20.04 - tools: - python: "3.11" - -sphinx: - configuration: docs/conf.py - -python: - install: - - method: pip - path: . - extra_requirements: - - docs diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..29388bc1 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,55 @@ +language: python +dist: xenial + +# 3.6 is listed first so it gets used for the later build stages +python: + - "3.6" + - "3.7" + - "3.5" + +# Executed for 3.5 AND 3.5 as the first "test" stage: +script: + - pip install -U pip wheel + - pip install .[test] + - pytest + +cache: + directories: + - $HOME/.cache/pip + +# This defines further stages that execute after the tests +jobs: + include: + - stage: deploy latest.datasette.io + if: branch = master AND type = push + script: + - pip install .[test] + - npm install -g now + - python tests/fixtures.py fixtures.db fixtures.json + - export ALIAS=`echo $TRAVIS_COMMIT | cut -c 1-7` + - datasette publish nowv1 fixtures.db -m fixtures.json --token=$NOW_TOKEN --branch=$TRAVIS_COMMIT --version-note=$TRAVIS_COMMIT --name=datasette-latest-$ALIAS --alias=latest.datasette.io --alias=$ALIAS.datasette.io + - stage: release tagged version + if: tag IS present + python: 3.6 + deploy: + - provider: pypi + user: simonw + distributions: bdist_wheel + password: ${PYPI_PASSWORD} + on: + branch: master + tags: true + - stage: publish docker image + if: tag IS present + python: 3.6 + script: + - npm install -g now + - export ALIAS=`echo $TRAVIS_COMMIT | cut -c 1-7` + - export TAG=`echo $TRAVIS_TAG | sed 's/\./-/g' | sed 's/.*/v&/'` + - now alias $ALIAS.datasette.io $TAG.datasette.io --token=$NOW_TOKEN + # Build and release to Docker Hub + - docker login -u $DOCKER_USER -p $DOCKER_PASS + - export REPO=datasetteproject/datasette + - docker build -f Dockerfile -t $REPO:$TRAVIS_TAG . + - docker tag $REPO:$TRAVIS_TAG $REPO:latest + - docker push $REPO diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 14d4c567..00000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,128 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -We as members, contributors, and leaders pledge to make participation in our -community a harassment-free experience for everyone, regardless of age, body -size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, -nationality, personal appearance, race, religion, or sexual identity -and orientation. - -We pledge to act and interact in ways that contribute to an open, welcoming, -diverse, inclusive, and healthy community. - -## Our Standards - -Examples of behavior that contributes to a positive environment for our -community include: - -* Demonstrating empathy and kindness toward other people -* Being respectful of differing opinions, viewpoints, and experiences -* Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, - and learning from the experience -* Focusing on what is best not just for us as individuals, but for the - overall community - -Examples of unacceptable behavior include: - -* The use of sexualized language or imagery, and sexual attention or - advances of any kind -* Trolling, insulting or derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or email - address, without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Enforcement Responsibilities - -Community leaders are responsible for clarifying and enforcing our standards of -acceptable behavior and will take appropriate and fair corrective action in -response to any behavior that they deem inappropriate, threatening, offensive, -or harmful. - -Community leaders have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions that are -not aligned to this Code of Conduct, and will communicate reasons for moderation -decisions when appropriate. - -## Scope - -This Code of Conduct applies within all community spaces, and also applies when -an individual is officially representing the community in public spaces. -Examples of representing our community include using an official e-mail address, -posting via an official social media account, or acting as an appointed -representative at an online or offline event. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported to the community leaders responsible for enforcement at -`swillison+datasette-code-of-conduct@gmail.com`. -All complaints will be reviewed and investigated promptly and fairly. - -All community leaders are obligated to respect the privacy and security of the -reporter of any incident. - -## Enforcement Guidelines - -Community leaders will follow these Community Impact Guidelines in determining -the consequences for any action they deem in violation of this Code of Conduct: - -### 1. Correction - -**Community Impact**: Use of inappropriate language or other behavior deemed -unprofessional or unwelcome in the community. - -**Consequence**: A private, written warning from community leaders, providing -clarity around the nature of the violation and an explanation of why the -behavior was inappropriate. A public apology may be requested. - -### 2. Warning - -**Community Impact**: A violation through a single incident or series -of actions. - -**Consequence**: A warning with consequences for continued behavior. No -interaction with the people involved, including unsolicited interaction with -those enforcing the Code of Conduct, for a specified period of time. This -includes avoiding interactions in community spaces as well as external channels -like social media. Violating these terms may lead to a temporary or -permanent ban. - -### 3. Temporary Ban - -**Community Impact**: A serious violation of community standards, including -sustained inappropriate behavior. - -**Consequence**: A temporary ban from any sort of interaction or public -communication with the community for a specified period of time. No public or -private interaction with the people involved, including unsolicited interaction -with those enforcing the Code of Conduct, is allowed during this period. -Violating these terms may lead to a permanent ban. - -### 4. Permanent Ban - -**Community Impact**: Demonstrating a pattern of violation of community -standards, including sustained inappropriate behavior, harassment of an -individual, or aggression toward or disparagement of classes of individuals. - -**Consequence**: A permanent ban from any sort of public interaction within -the community. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 2.0, available at -https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. - -Community Impact Guidelines were inspired by [Mozilla's code of conduct -enforcement ladder](https://github.com/mozilla/diversity). - -[homepage]: https://www.contributor-covenant.org - -For answers to common questions about this code of conduct, see the FAQ at -https://www.contributor-covenant.org/faq. Translations are available at -https://www.contributor-covenant.org/translations. diff --git a/Dockerfile b/Dockerfile index 9a8f06cf..08639e52 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,42 @@ -FROM python:3.11.0-slim-bullseye as build +FROM python:3.7.2-slim-stretch as build -# Version of Datasette to install, e.g. 0.55 -# docker build . -t datasette --build-arg VERSION=0.55 -ARG VERSION +# Setup build dependencies +RUN apt update \ +&& apt install -y python3-dev build-essential wget libxml2-dev libproj-dev libgeos-dev libsqlite3-dev zlib1g-dev pkg-config git \ + && apt clean -RUN apt-get update && \ - apt-get install -y --no-install-recommends libsqlite3-mod-spatialite && \ - apt clean && \ - rm -rf /var/lib/apt && \ - rm -rf /var/lib/dpkg/info/* -RUN pip install https://github.com/simonw/datasette/archive/refs/tags/${VERSION}.zip && \ - find /usr/local/lib -name '__pycache__' | xargs rm -r && \ - rm -rf /root/.cache/pip +RUN wget "https://www.sqlite.org/2018/sqlite-autoconf-3260000.tar.gz" && tar xzf sqlite-autoconf-3260000.tar.gz \ + && cd sqlite-autoconf-3260000 && ./configure --disable-static --enable-fts5 --enable-json1 CFLAGS="-g -O2 -DSQLITE_ENABLE_FTS3=1 -DSQLITE_ENABLE_FTS4=1 -DSQLITE_ENABLE_RTREE=1 -DSQLITE_ENABLE_JSON1" \ + && make && make install + +RUN wget "https://www.gaia-gis.it/gaia-sins/freexl-1.0.5.tar.gz" && tar zxf freexl-1.0.5.tar.gz \ + && cd freexl-1.0.5 && ./configure && make && make install + +RUN wget "https://www.gaia-gis.it/gaia-sins/libspatialite-4.4.0-RC0.tar.gz" && tar zxf libspatialite-4.4.0-RC0.tar.gz \ + && cd libspatialite-4.4.0-RC0 && ./configure && make && make install + +RUN wget "https://www.gaia-gis.it/gaia-sins/readosm-1.1.0.tar.gz" && tar zxf readosm-1.1.0.tar.gz && cd readosm-1.1.0 && ./configure && make && make install + +RUN wget "https://www.gaia-gis.it/gaia-sins/spatialite-tools-4.4.0-RC0.tar.gz" && tar zxf spatialite-tools-4.4.0-RC0.tar.gz \ + && cd spatialite-tools-4.4.0-RC0 && ./configure && make && make install + + +# Add local code to the image instead of fetching from pypi. +COPY . /datasette + +RUN pip install /datasette + +FROM python:3.7.2-slim-stretch + +# Copy python dependencies and spatialite libraries +COPY --from=build /usr/local/lib/ /usr/local/lib/ +# Copy executables +COPY --from=build /usr/local/bin /usr/local/bin +# Copy spatial extensions +COPY --from=build /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu + +ENV LD_LIBRARY_PATH=/usr/local/lib EXPOSE 8001 CMD ["datasette"] diff --git a/Justfile b/Justfile deleted file mode 100644 index a47662c3..00000000 --- a/Justfile +++ /dev/null @@ -1,56 +0,0 @@ -export DATASETTE_SECRET := "not_a_secret" - -# Run tests and linters -@default: test lint - -# Setup project -@init: - uv sync --extra test --extra docs - -# Run pytest with supplied options -@test *options: init - uv run pytest -n auto {{options}} - -@codespell: - uv run codespell README.md --ignore-words docs/codespell-ignore-words.txt - uv run codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt - uv run codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt - uv run codespell tests --ignore-words docs/codespell-ignore-words.txt - -# Run linters: black, flake8, mypy, cog -@lint: codespell - uv run black . --check - uv run flake8 - uv run --extra test cog --check README.md docs/*.rst - -# Rebuild docs with cog -@cog: - uv run --extra test cog -r README.md docs/*.rst - -# Serve live docs on localhost:8000 -@docs: cog blacken-docs - uv run --extra docs make -C docs livehtml - -# Build docs as static HTML -@docs-build: cog blacken-docs - rm -rf docs/_build && cd docs && uv run make html - -# Apply Black -@black: - uv run black . - -# Apply blacken-docs -@blacken-docs: - uv run blacken-docs -l 60 docs/*.rst - -# Apply prettier -@prettier: - npm run fix - -# Format code with both black and prettier -@format: black prettier blacken-docs - -@serve *options: - uv run sqlite-utils create-database data.db - uv run sqlite-utils create-table data.db docs id integer title text --pk id --ignore - uv run python -m datasette data.db --root --reload {{options}} diff --git a/MANIFEST.in b/MANIFEST.in index 8c5e3ee6..cca501c9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,3 @@ recursive-include datasette/static * -recursive-include datasette/templates * include versioneer.py include datasette/_version.py -include LICENSE diff --git a/README.md b/README.md index 393e8e5c..59a6649e 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,73 @@ -Datasette +# Datasette [![PyPI](https://img.shields.io/pypi/v/datasette.svg)](https://pypi.org/project/datasette/) -[![Changelog](https://img.shields.io/github/v/release/simonw/datasette?label=changelog)](https://docs.datasette.io/en/latest/changelog.html) -[![Python 3.x](https://img.shields.io/pypi/pyversions/datasette.svg?logo=python&logoColor=white)](https://pypi.org/project/datasette/) -[![Tests](https://github.com/simonw/datasette/workflows/Test/badge.svg)](https://github.com/simonw/datasette/actions?query=workflow%3ATest) -[![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](https://docs.datasette.io/en/latest/?badge=latest) -[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette/blob/main/LICENSE) -[![docker: datasette](https://img.shields.io/badge/docker-datasette-blue)](https://hub.docker.com/r/datasetteproject/datasette) -[![discord](https://img.shields.io/discord/823971286308356157?label=discord)](https://datasette.io/discord) +[![Travis CI](https://travis-ci.org/simonw/datasette.svg?branch=master)](https://travis-ci.org/simonw/datasette) +[![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](http://datasette.readthedocs.io/en/latest/?badge=latest) +[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette/blob/master/LICENSE) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://black.readthedocs.io/en/stable/) -*An open source multi-tool for exploring and publishing data* +*A tool for exploring and publishing data* Datasette is a tool for exploring and publishing data. It helps people take data of any shape or size and publish that as an interactive, explorable website and accompanying API. -Datasette is aimed at data journalists, museum curators, archivists, local governments, scientists, researchers and anyone else who has data that they wish to share with the world. +Datasette is aimed at data journalists, museum curators, archivists, local governments and anyone else who has data that they wish to share with the world. -[Explore a demo](https://datasette.io/global-power-plants/global-power-plants), watch [a video about the project](https://simonwillison.net/2021/Feb/7/video/) or try it out [on GitHub Codespaces](https://github.com/datasette/datasette-studio). +[Explore a demo](https://fivethirtyeight.datasettes.com/fivethirtyeight), watch [a video about the project](https://www.youtube.com/watch?v=pTr1uLQTJNE) or try it out by [uploading and publishing your own CSV data](https://simonwillison.net/2019/Apr/23/datasette-glitch/). -* [datasette.io](https://datasette.io/) is the official project website -* Latest [Datasette News](https://datasette.io/news) -* Comprehensive documentation: https://docs.datasette.io/ -* Examples: https://datasette.io/examples -* Live demo of current `main` branch: https://latest.datasette.io/ -* Questions, feedback or want to talk about the project? Join our [Discord](https://datasette.io/discord) +* Comprehensive documentation: http://datasette.readthedocs.io/ +* Examples: https://github.com/simonw/datasette/wiki/Datasettes +* Live demo of current master: https://latest.datasette.io/ -Want to stay up-to-date with the project? Subscribe to the [Datasette newsletter](https://datasette.substack.com/) for tips, tricks and news on what's new in the Datasette ecosystem. +## News + + * 13th July 2019: [Single sign-on against GitHub using ASGI middleware](https://simonwillison.net/2019/Jul/14/sso-asgi/) talks about the implementation of [datasette-auth-github](https://github.com/simonw/datasette-auth-github) in more detail. + * 7th July 2019: [Datasette 0.29](https://datasette.readthedocs.io/en/stable/changelog.html#v0-29) - ASGI, new plugin hooks, facet by date and much, much more... + * [datasette-auth-github](https://github.com/simonw/datasette-auth-github) - a new plugin for Datasette 0.29 that lets you require users to authenticate against GitHub before accessing your Datasette instance. You can whitelist specific users, or you can restrict access to members of specific GitHub organizations or teams. + * [datasette-cors](https://github.com/simonw/datasette-cors) - a plugin that lets you configure CORS access from a list of domains (or a set of domain wildcards) so you can make JavaScript calls to a Datasette instance from a specific set of other hosts. + * 23rd June 2019: [Porting Datasette to ASGI, and Turtles all the way down](https://simonwillison.net/2019/Jun/23/datasette-asgi/) + * 21st May 2019: The anonymized raw data from [the Stack Overflow Developer Survey 2019](https://stackoverflow.blog/2019/05/21/public-data-release-of-stack-overflows-2019-developer-survey/) has been [published in partnership with Glitch](https://glitch.com/culture/discover-insights-explore-developer-survey-results-2019/), powered by Datasette. + * 19th May 2019: [Datasette 0.28](https://datasette.readthedocs.io/en/stable/changelog.html#v0-28) - a salmagundi of new features! + * No longer immutable! Datasette now supports [databases that change](https://datasette.readthedocs.io/en/stable/changelog.html#supporting-databases-that-change). + * [Faceting improvements](https://datasette.readthedocs.io/en/stable/changelog.html#faceting-improvements-and-faceting-plugins) including facet-by-JSON-array and the ability to define custom faceting using plugins. + * [datasette publish cloudrun](https://datasette.readthedocs.io/en/stable/changelog.html#datasette-publish-cloudrun) lets you publish databases to Google's new Cloud Run hosting service. + * New [register_output_renderer](https://datasette.readthedocs.io/en/stable/changelog.html#register-output-renderer-plugins) plugin hook for adding custom output extensions to Datasette in addition to the default `.json` and `.csv`. + * Dozens of other smaller features and tweaks - see [the release notes](https://datasette.readthedocs.io/en/stable/changelog.html#v0-28) for full details. + * Read more about this release here: [Datasette 0.28—and why master should always be releasable](https://simonwillison.net/2019/May/19/datasette-0-28/) + * 24th February 2019: [ +sqlite-utils: a Python library and CLI tool for building SQLite databases](https://simonwillison.net/2019/Feb/25/sqlite-utils/) - a partner tool for easily creating SQLite databases for use with Datasette. + * 31st Janary 2019: [Datasette 0.27](https://datasette.readthedocs.io/en/latest/changelog.html#v0-27) - `datasette plugins` command, newline-delimited JSON export option, new documentation on [The Datasette Ecosystem](https://datasette.readthedocs.io/en/latest/ecosystem.html). + * 10th January 2019: [Datasette 0.26.1](http://datasette.readthedocs.io/en/latest/changelog.html#v0-26-1) - SQLite upgrade in Docker image, `/-/versions` now shows SQLite compile options. + * 2nd January 2019: [Datasette 0.26](http://datasette.readthedocs.io/en/latest/changelog.html#v0-26) - minor bug fixes, `datasette publish now --alias` argument. +* 18th December 2018: [Fast Autocomplete Search for Your Website](https://24ways.org/2018/fast-autocomplete-search-for-your-website/) - a new tutorial on using Datasette to build a JavaScript autocomplete search engine. +* 3rd October 2018: [The interesting ideas in Datasette](https://simonwillison.net/2018/Oct/4/datasette-ideas/) - a write-up of some of the less obvious interesting ideas embedded in the Datasette project. +* 19th September 2018: [Datasette 0.25](http://datasette.readthedocs.io/en/latest/changelog.html#v0-25) - New plugin hooks, improved database view support and an easier way to use more recent versions of SQLite. +* 23rd July 2018: [Datasette 0.24](http://datasette.readthedocs.io/en/latest/changelog.html#v0-24) - a number of small new features +* 29th June 2018: [datasette-vega](https://github.com/simonw/datasette-vega), a new plugin for visualizing data as bar, line or scatter charts +* 21st June 2018: [Datasette 0.23.1](http://datasette.readthedocs.io/en/latest/changelog.html#v0-23-1) - minor bug fixes +* 18th June 2018: [Datasette 0.23: CSV, SpatiaLite and more](http://datasette.readthedocs.io/en/latest/changelog.html#v0-23) - CSV export, foreign key expansion in JSON and CSV, new config options, improved support for SpatiaLite and a bunch of other improvements +* 23rd May 2018: [Datasette 0.22.1 bugfix](https://github.com/simonw/datasette/releases/tag/0.22.1) plus we now use [versioneer](https://github.com/warner/python-versioneer) +* 20th May 2018: [Datasette 0.22: Datasette Facets](https://simonwillison.net/2018/May/20/datasette-facets) +* 5th May 2018: [Datasette 0.21: New _shape=, new _size=, search within columns](https://github.com/simonw/datasette/releases/tag/0.21) +* 25th April 2018: [Exploring the UK Register of Members Interests with SQL and Datasette](https://simonwillison.net/2018/Apr/25/register-members-interests/) - a tutorial describing how [register-of-members-interests.datasettes.com](https://register-of-members-interests.datasettes.com/) was built ([source code here](https://github.com/simonw/register-of-members-interests)) +* 20th April 2018: [Datasette plugins, and building a clustered map visualization](https://simonwillison.net/2018/Apr/20/datasette-plugins/) - introducing Datasette's new plugin system and [datasette-cluster-map](https://pypi.org/project/datasette-cluster-map/), a plugin for visualizing data on a map +* 20th April 2018: [Datasette 0.20: static assets and templates for plugins](https://github.com/simonw/datasette/releases/tag/0.20) +* 16th April 2018: [Datasette 0.19: plugins preview](https://github.com/simonw/datasette/releases/tag/0.19) +* 14th April 2018: [Datasette 0.18: units](https://github.com/simonw/datasette/releases/tag/0.18) +* 9th April 2018: [Datasette 0.15: sort by column](https://github.com/simonw/datasette/releases/tag/0.15) +* 28th March 2018: [Baltimore Sun Public Salary Records](https://simonwillison.net/2018/Mar/28/datasette-in-the-wild/) - a data journalism project from the Baltimore Sun powered by Datasette - source code [is available here](https://github.com/baltimore-sun-data/salaries-datasette) +* 27th March 2018: [Cloud-first: Rapid webapp deployment using containers](https://wwwf.imperial.ac.uk/blog/research-software-engineering/2018/03/27/cloud-first-rapid-webapp-deployment-using-containers/) - a tutorial covering deploying Datasette using Microsoft Azure by the Research Software Engineering team at Imperial College London +* 28th January 2018: [Analyzing my Twitter followers with Datasette](https://simonwillison.net/2018/Jan/28/analyzing-my-twitter-followers/) - a tutorial on using Datasette to analyze follower data pulled from the Twitter API +* 17th January 2018: [Datasette Publish: a web app for publishing CSV files as an online database](https://simonwillison.net/2018/Jan/17/datasette-publish/) +* 12th December 2017: [Building a location to time zone API with SpatiaLite, OpenStreetMap and Datasette](https://simonwillison.net/2017/Dec/12/building-a-location-time-zone-api/) +* 9th December 2017: [Datasette 0.14: customization edition](https://github.com/simonw/datasette/releases/tag/0.14) +* 25th November 2017: [New in Datasette: filters, foreign keys and search](https://simonwillison.net/2017/Nov/25/new-in-datasette/) +* 13th November 2017: [Datasette: instantly create and publish an API for your SQLite databases](https://simonwillison.net/2017/Nov/13/datasette/) ## Installation -If you are on a Mac, [Homebrew](https://brew.sh/) is the easiest way to install Datasette: + pip3 install datasette - brew install datasette - -You can also install it using `pip` or `pipx`: - - pip install datasette - -Datasette requires Python 3.8 or higher. We also have [detailed installation instructions](https://docs.datasette.io/en/stable/installation.html) covering other options such as Docker. +Datasette requires Python 3.5 or higher. We also have [detailed installation instructions](https://datasette.readthedocs.io/en/stable/installation.html) covering other options such as Docker. ## Basic usage @@ -48,12 +79,41 @@ This will start a web server on port 8001 - visit http://localhost:8001/ to acce Use Chrome on OS X? You can run datasette against your browser history like so: - datasette ~/Library/Application\ Support/Google/Chrome/Default/History --nolock + datasette ~/Library/Application\ Support/Google/Chrome/Default/History Now visiting http://localhost:8001/History/downloads will show you a web interface to browse your downloads data: ![Downloads table rendered by datasette](https://static.simonwillison.net/static/2017/datasette-downloads.png) +## datasette serve options + + $ datasette serve --help + + Usage: datasette serve [OPTIONS] [FILES]... + + Serve up specified SQLite database files with a web UI + + Options: + -i, --immutable PATH Database files to open in immutable mode + -h, --host TEXT host for server, defaults to 127.0.0.1 + -p, --port INTEGER port for server, defaults to 8001 + --debug Enable debug mode - useful for development + --reload Automatically reload if database or code change detected - + useful for development + --cors Enable CORS by serving Access-Control-Allow-Origin: * + --load-extension PATH Path to a SQLite extension to load + --inspect-file TEXT Path to JSON file created using "datasette inspect" + -m, --metadata FILENAME Path to JSON file containing license/source metadata + --template-dir DIRECTORY Path to directory containing custom templates + --plugins-dir DIRECTORY Path to directory containing custom plugins + --static STATIC MOUNT mountpoint:path-to-directory for serving static files + --memory Make :memory: database available + --config CONFIG Set config option using configname:value + datasette.readthedocs.io/en/latest/config.html + --version-note TEXT Additional note to show on /-/versions + --help-config Show available config options + --help Show this message and exit. + ## metadata.json If you want to include licensing and source information in the generated datasette website you can do so using a JSON file that looks something like this: @@ -74,7 +134,7 @@ The license and source information will be displayed on the index page and in th ## datasette publish -If you have [Heroku](https://heroku.com/) or [Google Cloud Run](https://cloud.google.com/run/) configured, Datasette can deploy one or more SQLite databases to the internet with a single command: +If you have [Heroku](https://heroku.com/), [Google Cloud Run](https://cloud.google.com/run/) or [Zeit Now v1](https://zeit.co/now) configured, Datasette can deploy one or more SQLite databases to the internet with a single command: datasette publish heroku database.db @@ -84,8 +144,4 @@ Or: This will create a docker image containing both the datasette application and the specified SQLite database files. It will then deploy that image to Heroku or Cloud Run and give you a URL to access the resulting website and API. -See [Publishing data](https://docs.datasette.io/en/stable/publish.html) in the documentation for more details. - -## Datasette Lite - -[Datasette Lite](https://lite.datasette.io/) is Datasette packaged using WebAssembly so that it runs entirely in your browser, no Python web application server required. Read more about that in the [Datasette Lite documentation](https://github.com/simonw/datasette-lite/blob/main/README.md). +See [Publishing data](https://datasette.readthedocs.io/en/stable/publish.html) in the documentation for more details. diff --git a/_config.yml b/_config.yml new file mode 100644 index 00000000..3397c9a4 --- /dev/null +++ b/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-architect \ No newline at end of file diff --git a/codecov.yml b/codecov.yml deleted file mode 100644 index bfdc9877..00000000 --- a/codecov.yml +++ /dev/null @@ -1,8 +0,0 @@ -coverage: - status: - project: - default: - informational: true - patch: - default: - informational: true diff --git a/datasette/__init__.py b/datasette/__init__.py index 47d2b4f6..0e59760a 100644 --- a/datasette/__init__.py +++ b/datasette/__init__.py @@ -1,8 +1,3 @@ -from datasette.permissions import Permission # noqa from datasette.version import __version_info__, __version__ # noqa -from datasette.events import Event # noqa -from datasette.utils.asgi import Forbidden, NotFound, Request, Response # noqa -from datasette.utils import actor_matches_allow # noqa -from datasette.views import Context # noqa from .hookspecs import hookimpl # noqa from .hookspecs import hookspec # noqa diff --git a/datasette/_version.py b/datasette/_version.py new file mode 100644 index 00000000..a12f24aa --- /dev/null +++ b/datasette/_version.py @@ -0,0 +1,556 @@ +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.18 (https://github.com/warner/python-versioneer) + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "" + cfg.parentdir_prefix = "datasette-" + cfg.versionfile_source = "datasette/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, p.returncode + return stdout, p.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r"\d", r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Eexceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split("/"): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/datasette/actor_auth_cookie.py b/datasette/actor_auth_cookie.py deleted file mode 100644 index 368213af..00000000 --- a/datasette/actor_auth_cookie.py +++ /dev/null @@ -1,23 +0,0 @@ -from datasette import hookimpl -from itsdangerous import BadSignature -from datasette.utils import baseconv -import time - - -@hookimpl -def actor_from_request(datasette, request): - if "ds_actor" not in request.cookies: - return None - try: - decoded = datasette.unsign(request.cookies["ds_actor"], "actor") - # If it has "e" and "a" keys process the "e" expiry - if not isinstance(decoded, dict) or "a" not in decoded: - return None - expires_at = decoded.get("e") - if expires_at: - timestamp = int(baseconv.base62.decode(expires_at)) - if time.time() > timestamp: - return None - return decoded["a"] - except BadSignature: - return None diff --git a/datasette/app.py b/datasette/app.py index b9955925..2ee32dc8 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,1473 +1,352 @@ -from __future__ import annotations - -from asgi_csrf import Errors import asyncio -import contextvars -from typing import TYPE_CHECKING, Any, Dict, Iterable, List - -if TYPE_CHECKING: - from datasette.permissions import AllowedResource, Resource -import asgi_csrf import collections -import dataclasses -import datetime -import functools -import glob import hashlib -import httpx -import importlib.metadata -import inspect -from itsdangerous import BadSignature -import json import os -import re -import secrets import sys import threading -import time -import types +import traceback import urllib.parse from concurrent import futures from pathlib import Path -from markupsafe import Markup, escape -from itsdangerous import URLSafeSerializer -from jinja2 import ( - ChoiceLoader, - Environment, - FileSystemLoader, - PrefixLoader, -) -from jinja2.environment import Template -from jinja2.exceptions import TemplateNotFound +import click +from markupsafe import Markup +from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader -from .events import Event -from .views import Context -from .views.database import database_download, DatabaseView, TableCreateView, QueryView +from .views.base import DatasetteError, ureg, AsgiRouter +from .views.database import DatabaseDownload, DatabaseView from .views.index import IndexView -from .views.special import ( - JsonDataView, - PatternPortfolioView, - AuthTokenView, - ApiExplorerView, - CreateTokenView, - LogoutView, - AllowDebugView, - PermissionsDebugView, - MessagesDebugView, - AllowedResourcesView, - PermissionRulesView, - PermissionCheckView, - TablesView, - InstanceSchemaView, - DatabaseSchemaView, - TableSchemaView, -) -from .views.table import ( - TableInsertView, - TableUpsertView, - TableDropView, - table_view, -) -from .views.row import RowView, RowDeleteView, RowUpdateView +from .views.special import JsonDataView +from .views.table import RowView, TableView from .renderer import json_renderer -from .url_builder import Urls -from .database import Database, QueryInterrupted +from .database import Database from .utils import ( - PaginatedResources, - PrefixedUrlString, - SPATIALITE_FUNCTIONS, - StartupError, - async_call_with_supported_arguments, - await_me_maybe, - baseconv, - call_with_supported_arguments, - detect_json1, - display_actor, + QueryInterrupted, + Results, escape_css_string, escape_sqlite, - find_spatialite, - format_bytes, + get_plugins, module_from_path, - move_plugins_and_allow, - move_table_config, - parse_metadata, - resolve_env_secrets, - resolve_routes, - tilde_decode, - tilde_encode, + sqlite3, + sqlite_timelimit, to_css_class, - urlsafe_components, - redact_keys, - row_sql_params_pks, ) from .utils.asgi import ( AsgiLifespan, - Forbidden, NotFound, - DatabaseNotFound, - TableNotFound, - RowNotFound, - Request, - Response, - AsgiRunOnFirstRequest, asgi_static, asgi_send, - asgi_send_file, + asgi_send_html, + asgi_send_json, asgi_send_redirect, ) -from .utils.internal_db import init_internal_db, populate_schema_tables -from .utils.sqlite import ( - sqlite3, - using_pysqlite3, -) -from .tracer import AsgiTracer -from .plugins import pm, DEFAULT_PLUGINS, get_plugins +from .tracer import trace, AsgiTracer +from .plugins import pm, DEFAULT_PLUGINS from .version import __version__ -from .resources import DatabaseResource, TableResource - app_root = Path(__file__).parent.parent +connections = threading.local() +MEMORY = object() -# Context variable to track when code is executing within a datasette.client request -_in_datasette_client = contextvars.ContextVar("in_datasette_client", default=False) - - -class _DatasetteClientContext: - """Context manager to mark code as executing within a datasette.client request.""" - - def __enter__(self): - self.token = _in_datasette_client.set(True) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - _in_datasette_client.reset(self.token) - return False - - -@dataclasses.dataclass -class PermissionCheck: - """Represents a logged permission check for debugging purposes.""" - - when: str - actor: Dict[str, Any] | None - action: str - parent: str | None - child: str | None - result: bool - - -# https://github.com/simonw/datasette/issues/283#issuecomment-781591015 -SQLITE_LIMIT_ATTACHED = 10 - -INTERNAL_DB_NAME = "__INTERNAL__" - -Setting = collections.namedtuple("Setting", ("name", "default", "help")) -SETTINGS = ( - Setting("default_page_size", 100, "Default page size for the table view"), - Setting( +ConfigOption = collections.namedtuple("ConfigOption", ("name", "default", "help")) +CONFIG_OPTIONS = ( + ConfigOption("default_page_size", 100, "Default page size for the table view"), + ConfigOption( "max_returned_rows", 1000, "Maximum rows that can be returned from a table or custom query", ), - Setting( - "max_insert_rows", - 100, - "Maximum rows that can be inserted at a time using the bulk insert API", - ), - Setting( + ConfigOption( "num_sql_threads", 3, "Number of threads in the thread pool for executing SQLite queries", ), - Setting("sql_time_limit_ms", 1000, "Time limit for a SQL query in milliseconds"), - Setting( + ConfigOption( + "sql_time_limit_ms", 1000, "Time limit for a SQL query in milliseconds" + ), + ConfigOption( "default_facet_size", 30, "Number of values to return for requested facets" ), - Setting("facet_time_limit_ms", 200, "Time limit for calculating a requested facet"), - Setting( + ConfigOption( + "facet_time_limit_ms", 200, "Time limit for calculating a requested facet" + ), + ConfigOption( "facet_suggest_time_limit_ms", 50, "Time limit for calculating a suggested facet", ), - Setting( + ConfigOption( + "hash_urls", + False, + "Include DB file contents hash in URLs, for far-future caching", + ), + ConfigOption( "allow_facet", True, "Allow users to specify columns to facet using ?_facet= parameter", ), - Setting( + ConfigOption( "allow_download", True, "Allow users to download the original SQLite database files", ), - Setting( - "allow_signed_tokens", - True, - "Allow users to create and use signed API tokens", - ), - Setting( - "default_allow_sql", - True, - "Allow anyone to run arbitrary SQL queries", - ), - Setting( - "max_signed_tokens_ttl", - 0, - "Maximum allowed expiry time for signed API tokens", - ), - Setting("suggest_facets", True, "Calculate and display suggested facets"), - Setting( + ConfigOption("suggest_facets", True, "Calculate and display suggested facets"), + ConfigOption("allow_sql", True, "Allow arbitrary SQL queries via ?sql= parameter"), + ConfigOption( "default_cache_ttl", 5, "Default HTTP cache TTL (used in Cache-Control: max-age= header)", ), - Setting("cache_size_kb", 0, "SQLite cache size in KB (0 == use SQLite default)"), - Setting( + ConfigOption( + "default_cache_ttl_hashed", + 365 * 24 * 60 * 60, + "Default HTTP cache TTL for hashed URL pages", + ), + ConfigOption( + "cache_size_kb", 0, "SQLite cache size in KB (0 == use SQLite default)" + ), + ConfigOption( "allow_csv_stream", True, "Allow .csv?_stream=1 to download all rows (ignoring max_returned_rows)", ), - Setting( + ConfigOption( "max_csv_mb", 100, "Maximum size allowed for CSV export in MB - set 0 to disable this limit", ), - Setting( + ConfigOption( "truncate_cells_html", 2048, "Truncate cells longer than this in HTML table view - set 0 to disable", ), - Setting( + ConfigOption( "force_https_urls", False, "Force URLs in API output to always use https:// protocol", ), - Setting( - "template_debug", - False, - "Allow display of template debug information with ?_context=1", - ), - Setting( - "trace_debug", - False, - "Allow display of SQL trace debug information with ?_trace=1", - ), - Setting("base_url", "/", "Datasette URLs should use this base path"), ) -_HASH_URLS_REMOVED = "The hash_urls setting has been removed, try the datasette-hashed-urls plugin instead" -OBSOLETE_SETTINGS = { - "hash_urls": _HASH_URLS_REMOVED, - "default_cache_ttl_hashed": _HASH_URLS_REMOVED, -} -DEFAULT_SETTINGS = {option.name: option.default for option in SETTINGS} - -FAVICON_PATH = app_root / "datasette" / "static" / "favicon.png" - -DEFAULT_NOT_SET = object() +DEFAULT_CONFIG = {option.name: option.default for option in CONFIG_OPTIONS} -ResourcesSQL = collections.namedtuple("ResourcesSQL", ("sql", "params")) - - -async def favicon(request, send): - await asgi_send_file( - send, - str(FAVICON_PATH), - content_type="image/png", - headers={"Cache-Control": "max-age=3600, immutable, public"}, - ) - - -ResolvedTable = collections.namedtuple("ResolvedTable", ("db", "table", "is_view")) -ResolvedRow = collections.namedtuple( - "ResolvedRow", ("db", "table", "sql", "params", "pks", "pk_values", "row") -) - - -def _to_string(value): - if isinstance(value, str): - return value - else: - return json.dumps(value, default=str) +async def favicon(scope, receive, send): + await asgi_send(send, "", 200) class Datasette: - # Message constants: - INFO = 1 - WARNING = 2 - ERROR = 3 - def __init__( self, - files=None, + files, immutables=None, cache_headers=True, cors=False, inspect_data=None, - config=None, metadata=None, sqlite_extensions=None, template_dir=None, plugins_dir=None, static_mounts=None, memory=False, - settings=None, - secret=None, + config=None, version_note=None, - config_dir=None, - pdb=False, - crossdb=False, - nolock=False, - internal=None, - default_deny=False, + extra_serve_options=None, ): - self._startup_invoked = False - assert config_dir is None or isinstance( - config_dir, Path - ), "config_dir= should be a pathlib.Path" - self.config_dir = config_dir - self.pdb = pdb - self._secret = secret or secrets.token_hex(32) - if files is not None and isinstance(files, str): - raise ValueError("files= must be a list of paths, not a string") - self.files = tuple(files or []) + tuple(immutables or []) - if config_dir: - db_files = [] - for ext in ("db", "sqlite", "sqlite3"): - db_files.extend(config_dir.glob("*.{}".format(ext))) - self.files += tuple(str(f) for f in db_files) - if ( - config_dir - and (config_dir / "inspect-data.json").exists() - and not inspect_data - ): - inspect_data = json.loads((config_dir / "inspect-data.json").read_text()) - if not immutables: - immutable_filenames = [i["file"] for i in inspect_data.values()] - immutables = [ - f for f in self.files if Path(f).name in immutable_filenames - ] + immutables = immutables or [] + self.files = tuple(files) + tuple(immutables) + self.immutables = set(immutables) + if not self.files: + self.files = [MEMORY] + elif memory: + self.files = (MEMORY,) + self.files + self.extra_serve_options = extra_serve_options or {} + self._databases = {} self.inspect_data = inspect_data - self.immutables = set(immutables or []) - self.databases = collections.OrderedDict() - self.actions = {} # .invoke_startup() will populate this - try: - self._refresh_schemas_lock = asyncio.Lock() - except RuntimeError as rex: - # Workaround for intermittent test failure, see: - # https://github.com/simonw/datasette/issues/1802 - if "There is no current event loop in thread" in str(rex): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - self._refresh_schemas_lock = asyncio.Lock() - else: - raise - self.crossdb = crossdb - self.nolock = nolock - if memory or crossdb or not self.files: - self.add_database( - Database(self, is_mutable=False, is_memory=True), name="_memory" - ) for file in self.files: - self.add_database( - Database(self, file, is_mutable=file not in self.immutables) - ) - - self.internal_db_created = False - if internal is None: - self._internal_database = Database(self, memory_name=secrets.token_hex()) - else: - self._internal_database = Database(self, path=internal, mode="rwc") - self._internal_database.name = INTERNAL_DB_NAME - + path = file + is_memory = False + if file is MEMORY: + path = None + is_memory = True + is_mutable = path not in self.immutables + db = Database(self, path, is_mutable=is_mutable, is_memory=is_memory) + if db.name in self.databases: + raise Exception("Multiple files with same stem: {}".format(db.name)) + self._databases[db.name] = db self.cache_headers = cache_headers self.cors = cors - config_files = [] - metadata_files = [] - if config_dir: - metadata_files = [ - config_dir / filename - for filename in ("metadata.json", "metadata.yaml", "metadata.yml") - if (config_dir / filename).exists() - ] - config_files = [ - config_dir / filename - for filename in ("datasette.json", "datasette.yaml", "datasette.yml") - if (config_dir / filename).exists() - ] - if config_dir and metadata_files and not metadata: - with metadata_files[0].open() as fp: - metadata = parse_metadata(fp.read()) - - if config_dir and config_files and not config: - with config_files[0].open() as fp: - config = parse_metadata(fp.read()) - - # Move any "plugins" and "allow" settings from metadata to config - updates them in place - metadata = metadata or {} - config = config or {} - metadata, config = move_plugins_and_allow(metadata, config) - # Now migrate any known table configuration settings over as well - metadata, config = move_table_config(metadata, config) - - self._metadata_local = metadata or {} - self.sqlite_extensions = [] - for extension in sqlite_extensions or []: - # Resolve spatialite, if requested - if extension == "spatialite": - # Could raise SpatialiteNotFound - self.sqlite_extensions.append(find_spatialite()) - else: - self.sqlite_extensions.append(extension) - if config_dir and (config_dir / "templates").is_dir() and not template_dir: - template_dir = str((config_dir / "templates").resolve()) + self._metadata = metadata or {} + self.sqlite_functions = [] + self.sqlite_extensions = sqlite_extensions or [] self.template_dir = template_dir - if config_dir and (config_dir / "plugins").is_dir() and not plugins_dir: - plugins_dir = str((config_dir / "plugins").resolve()) self.plugins_dir = plugins_dir - if config_dir and (config_dir / "static").is_dir() and not static_mounts: - static_mounts = [("static", str((config_dir / "static").resolve()))] self.static_mounts = static_mounts or [] - if config_dir and (config_dir / "datasette.json").exists() and not config: - config = json.loads((config_dir / "datasette.json").read_text()) - - config = config or {} - config_settings = config.get("settings") or {} - - # Validate settings from config file - for key, value in config_settings.items(): - if key not in DEFAULT_SETTINGS: - raise StartupError(f"Invalid setting '{key}' in config file") - # Validate type matches expected type from DEFAULT_SETTINGS - if value is not None: # Allow None/null values - expected_type = type(DEFAULT_SETTINGS[key]) - actual_type = type(value) - if actual_type != expected_type: - raise StartupError( - f"Setting '{key}' in config file has incorrect type. " - f"Expected {expected_type.__name__}, got {actual_type.__name__}. " - f"Value: {value!r}. " - f"Hint: In YAML/JSON config files, remove quotes from boolean and integer values." - ) - - # Validate settings from constructor parameter - if settings: - for key, value in settings.items(): - if key not in DEFAULT_SETTINGS: - raise StartupError(f"Invalid setting '{key}' in settings parameter") - if value is not None: - expected_type = type(DEFAULT_SETTINGS[key]) - actual_type = type(value) - if actual_type != expected_type: - raise StartupError( - f"Setting '{key}' in settings parameter has incorrect type. " - f"Expected {expected_type.__name__}, got {actual_type.__name__}. " - f"Value: {value!r}" - ) - - self.config = config - # CLI settings should overwrite datasette.json settings - self._settings = dict(DEFAULT_SETTINGS, **(config_settings), **(settings or {})) - self.renderers = {} # File extension -> (renderer, can_render) functions + self._config = dict(DEFAULT_CONFIG, **(config or {})) + self.renderers = {} # File extension -> renderer function self.version_note = version_note - if self.setting("num_sql_threads") == 0: - self.executor = None - else: - self.executor = futures.ThreadPoolExecutor( - max_workers=self.setting("num_sql_threads") - ) - self.max_returned_rows = self.setting("max_returned_rows") - self.sql_time_limit_ms = self.setting("sql_time_limit_ms") - self.page_size = self.setting("default_page_size") + self.executor = futures.ThreadPoolExecutor( + max_workers=self.config("num_sql_threads") + ) + self.max_returned_rows = self.config("max_returned_rows") + self.sql_time_limit_ms = self.config("sql_time_limit_ms") + self.page_size = self.config("default_page_size") # Execute plugins in constructor, to ensure they are available # when the rest of `datasette inspect` executes if self.plugins_dir: - for filepath in glob.glob(os.path.join(self.plugins_dir, "*.py")): - if not os.path.isfile(filepath): - continue - mod = module_from_path(filepath, name=os.path.basename(filepath)) + for filename in os.listdir(self.plugins_dir): + filepath = os.path.join(self.plugins_dir, filename) + mod = module_from_path(filepath, name=filename) try: pm.register(mod) except ValueError: # Plugin already registered pass - # Configure Jinja - default_templates = str(app_root / "datasette" / "templates") - template_paths = [] - if self.template_dir: - template_paths.append(self.template_dir) - plugin_template_paths = [ - plugin["templates_path"] - for plugin in get_plugins() - if plugin["templates_path"] - ] - template_paths.extend(plugin_template_paths) - template_paths.append(default_templates) - template_loader = ChoiceLoader( - [ - FileSystemLoader(template_paths), - # Support {% extends "default:table.html" %}: - PrefixLoader( - {"default": FileSystemLoader(default_templates)}, delimiter=":" - ), - ] - ) - environment = Environment( - loader=template_loader, - autoescape=True, - enable_async=True, - # undefined=StrictUndefined, - ) - environment.filters["escape_css_string"] = escape_css_string - environment.filters["quote_plus"] = urllib.parse.quote_plus - self._jinja_env = environment - environment.filters["escape_sqlite"] = escape_sqlite - environment.filters["to_css_class"] = to_css_class - self._register_renderers() - self._permission_checks = collections.deque(maxlen=200) - self._root_token = secrets.token_hex(32) - self.root_enabled = False - self.default_deny = default_deny - self.client = DatasetteClient(self) - - async def apply_metadata_json(self): - # Apply any metadata entries from metadata.json to the internal tables - # step 1: top-level metadata - for key in self._metadata_local or {}: - if key == "databases": - continue - value = self._metadata_local[key] - await self.set_instance_metadata(key, _to_string(value)) - - # step 2: database-level metadata - for dbname, db in self._metadata_local.get("databases", {}).items(): - for key, value in db.items(): - if key in ("tables", "queries"): - continue - await self.set_database_metadata(dbname, key, _to_string(value)) - - # step 3: table-level metadata - for tablename, table in db.get("tables", {}).items(): - for key, value in table.items(): - if key == "columns": - continue - await self.set_resource_metadata( - dbname, tablename, key, _to_string(value) - ) - - # step 4: column-level metadata (only descriptions in metadata.json) - for columnname, column_description in table.get("columns", {}).items(): - await self.set_column_metadata( - dbname, tablename, columnname, "description", column_description - ) - - # TODO(alex) is metadata.json was loaded in, and --internal is not memory, then log - # a warning to user that they should delete their metadata.json file - - def get_jinja_environment(self, request: Request = None) -> Environment: - environment = self._jinja_env - if request: - for environment in pm.hook.jinja2_environment_from_request( - datasette=self, request=request, env=environment - ): - pass - return environment - - def get_action(self, name_or_abbr: str): - """ - Returns an Action object for the given name or abbreviation. Returns None if not found. - """ - if name_or_abbr in self.actions: - return self.actions[name_or_abbr] - # Try abbreviation - for action in self.actions.values(): - if action.abbr == name_or_abbr: - return action - return None - - async def refresh_schemas(self): - if self._refresh_schemas_lock.locked(): - return - async with self._refresh_schemas_lock: - await self._refresh_schemas() - - async def _refresh_schemas(self): - internal_db = self.get_internal_database() - if not self.internal_db_created: - await init_internal_db(internal_db) - await self.apply_metadata_json() - self.internal_db_created = True - current_schema_versions = { - row["database_name"]: row["schema_version"] - for row in await internal_db.execute( - "select database_name, schema_version from catalog_databases" - ) - } - # Delete stale entries for databases that are no longer attached - stale_databases = set(current_schema_versions.keys()) - set( - self.databases.keys() - ) - for stale_db_name in stale_databases: - await internal_db.execute_write( - "DELETE FROM catalog_databases WHERE database_name = ?", - [stale_db_name], - ) - for database_name, db in self.databases.items(): - schema_version = (await db.execute("PRAGMA schema_version")).first()[0] - # Compare schema versions to see if we should skip it - if schema_version == current_schema_versions.get(database_name): - continue - placeholders = "(?, ?, ?, ?)" - values = [database_name, str(db.path), db.is_memory, schema_version] - if db.path is None: - placeholders = "(?, null, ?, ?)" - values = [database_name, db.is_memory, schema_version] - await internal_db.execute_write( - """ - INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version) - VALUES {} - """.format( - placeholders - ), - values, - ) - await populate_schema_tables(internal_db, db) - @property - def urls(self): - return Urls(self) + def databases(self): + databases = dict(self._databases) + # pylint: disable=no-member + for pairs in pm.hook.available_databases(datasette=self): + databases.update(pairs) + return databases - @property - def pm(self): - """ - Return the global plugin manager instance. - - This provides access to the pluggy PluginManager that manages all - Datasette plugins and hooks. Use datasette.pm.hook.hook_name() to - call plugin hooks. - """ - return pm - - async def invoke_startup(self): - # This must be called for Datasette to be in a usable state - if self._startup_invoked: - return - # Register event classes - event_classes = [] - for hook in pm.hook.register_events(datasette=self): - extra_classes = await await_me_maybe(hook) - if extra_classes: - event_classes.extend(extra_classes) - self.event_classes = tuple(event_classes) - - # Register actions, but watch out for duplicate name/abbr - action_names = {} - action_abbrs = {} - for hook in pm.hook.register_actions(datasette=self): - if hook: - for action in hook: - if ( - action.name in action_names - and action != action_names[action.name] - ): - raise StartupError( - "Duplicate action name: {}".format(action.name) + async def run_sanity_checks(self): + # Only one check right now, for Spatialite + for database_name, database in self.databases.items(): + # Run pragma_info on every table + for table in await database.table_names(): + try: + await self.execute( + database_name, + "PRAGMA table_info({});".format(escape_sqlite(table)), + ) + except sqlite3.OperationalError as e: + if e.args[0] == "no such module: VirtualSpatialIndex": + raise click.UsageError( + "It looks like you're trying to load a SpatiaLite" + " database without first loading the SpatiaLite module." + "\n\nRead more: https://datasette.readthedocs.io/en/latest/spatialite.html" ) - if ( - action.abbr - and action.abbr in action_abbrs - and action != action_abbrs[action.abbr] - ): - raise StartupError( - "Duplicate action abbr: {}".format(action.abbr) - ) - action_names[action.name] = action - if action.abbr: - action_abbrs[action.abbr] = action - self.actions[action.name] = action + else: + raise - for hook in pm.hook.prepare_jinja2_environment( - env=self._jinja_env, datasette=self - ): - await await_me_maybe(hook) - for hook in pm.hook.startup(datasette=self): - await await_me_maybe(hook) - self._startup_invoked = True + def config(self, key): + return self._config.get(key, None) - def sign(self, value, namespace="default"): - return URLSafeSerializer(self._secret, namespace).dumps(value) + def config_dict(self): + # Returns a fully resolved config dictionary, useful for templates + return {option.name: self.config(option.name) for option in CONFIG_OPTIONS} - def unsign(self, signed, namespace="default"): - return URLSafeSerializer(self._secret, namespace).loads(signed) - - def in_client(self) -> bool: - """Check if the current code is executing within a datasette.client request. - - Returns: - bool: True if currently executing within a datasette.client request, False otherwise. + def metadata(self, key=None, database=None, table=None, fallback=True): """ - return _in_datasette_client.get() - - def create_token( - self, - actor_id: str, - *, - expires_after: int | None = None, - restrict_all: Iterable[str] | None = None, - restrict_database: Dict[str, Iterable[str]] | None = None, - restrict_resource: Dict[str, Dict[str, Iterable[str]]] | None = None, - ): - token = {"a": actor_id, "t": int(time.time())} - if expires_after: - token["d"] = expires_after - - def abbreviate_action(action): - # rename to abbr if possible - action_obj = self.actions.get(action) - if not action_obj: - return action - return action_obj.abbr or action - - if expires_after: - token["d"] = expires_after - if restrict_all or restrict_database or restrict_resource: - token["_r"] = {} - if restrict_all: - token["_r"]["a"] = [abbreviate_action(a) for a in restrict_all] - if restrict_database: - token["_r"]["d"] = {} - for database, actions in restrict_database.items(): - token["_r"]["d"][database] = [abbreviate_action(a) for a in actions] - if restrict_resource: - token["_r"]["r"] = {} - for database, resources in restrict_resource.items(): - for resource, actions in resources.items(): - token["_r"]["r"].setdefault(database, {})[resource] = [ - abbreviate_action(a) for a in actions - ] - return "dstok_{}".format(self.sign(token, namespace="token")) - - def get_database(self, name=None, route=None): - if route is not None: - matches = [db for db in self.databases.values() if db.route == route] - if not matches: - raise KeyError - return matches[0] - if name is None: - name = [key for key in self.databases.keys()][0] - return self.databases[name] - - def add_database(self, db, name=None, route=None): - new_databases = self.databases.copy() - if name is None: - # Pick a unique name for this database - suggestion = db.suggest_name() - name = suggestion + Looks up metadata, cascading backwards from specified level. + Returns None if metadata value is not found. + """ + assert not ( + database is None and table is not None + ), "Cannot call metadata() with table= specified but not database=" + databases = self._metadata.get("databases") or {} + search_list = [] + if database is not None: + search_list.append(databases.get(database) or {}) + if table is not None: + table_metadata = ((databases.get(database) or {}).get("tables") or {}).get( + table + ) or {} + search_list.insert(0, table_metadata) + search_list.append(self._metadata) + if not fallback: + # No fallback allowed, so just use the first one in the list + search_list = search_list[:1] + if key is not None: + for item in search_list: + if key in item: + return item[key] + return None else: - suggestion = name - i = 2 - while name in self.databases: - name = "{}_{}".format(suggestion, i) - i += 1 - db.name = name - db.route = route or name - new_databases[name] = db - # don't mutate! that causes race conditions with live import - self.databases = new_databases - return db - - def add_memory_database(self, memory_name, name=None, route=None): - return self.add_database( - Database(self, memory_name=memory_name), name=name, route=route - ) - - def remove_database(self, name): - self.get_database(name).close() - new_databases = self.databases.copy() - new_databases.pop(name) - self.databases = new_databases - - def setting(self, key): - return self._settings.get(key, None) - - def settings_dict(self): - # Returns a fully resolved settings dictionary, useful for templates - return {option.name: self.setting(option.name) for option in SETTINGS} - - def _metadata_recursive_update(self, orig, updated): - if not isinstance(orig, dict) or not isinstance(updated, dict): - return orig - - for key, upd_value in updated.items(): - if isinstance(upd_value, dict) and isinstance(orig.get(key), dict): - orig[key] = self._metadata_recursive_update(orig[key], upd_value) - else: - orig[key] = upd_value - return orig - - async def get_instance_metadata(self): - rows = await self.get_internal_database().execute( - """ - SELECT - key, - value - FROM metadata_instance - """ - ) - return dict(rows) - - async def get_database_metadata(self, database_name: str): - rows = await self.get_internal_database().execute( - """ - SELECT - key, - value - FROM metadata_databases - WHERE database_name = ? - """, - [database_name], - ) - return dict(rows) - - async def get_resource_metadata(self, database_name: str, resource_name: str): - rows = await self.get_internal_database().execute( - """ - SELECT - key, - value - FROM metadata_resources - WHERE database_name = ? - AND resource_name = ? - """, - [database_name, resource_name], - ) - return dict(rows) - - async def get_column_metadata( - self, database_name: str, resource_name: str, column_name: str - ): - rows = await self.get_internal_database().execute( - """ - SELECT - key, - value - FROM metadata_columns - WHERE database_name = ? - AND resource_name = ? - AND column_name = ? - """, - [database_name, resource_name, column_name], - ) - return dict(rows) - - async def set_instance_metadata(self, key: str, value: str): - # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) - await self.get_internal_database().execute_write( - """ - INSERT INTO metadata_instance(key, value) - VALUES(?, ?) - ON CONFLICT(key) DO UPDATE SET value = excluded.value; - """, - [key, value], - ) - - async def set_database_metadata(self, database_name: str, key: str, value: str): - # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) - await self.get_internal_database().execute_write( - """ - INSERT INTO metadata_databases(database_name, key, value) - VALUES(?, ?, ?) - ON CONFLICT(database_name, key) DO UPDATE SET value = excluded.value; - """, - [database_name, key, value], - ) - - async def set_resource_metadata( - self, database_name: str, resource_name: str, key: str, value: str - ): - # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) - await self.get_internal_database().execute_write( - """ - INSERT INTO metadata_resources(database_name, resource_name, key, value) - VALUES(?, ?, ?, ?) - ON CONFLICT(database_name, resource_name, key) DO UPDATE SET value = excluded.value; - """, - [database_name, resource_name, key, value], - ) - - async def set_column_metadata( - self, - database_name: str, - resource_name: str, - column_name: str, - key: str, - value: str, - ): - # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) - await self.get_internal_database().execute_write( - """ - INSERT INTO metadata_columns(database_name, resource_name, column_name, key, value) - VALUES(?, ?, ?, ?, ?) - ON CONFLICT(database_name, resource_name, column_name, key) DO UPDATE SET value = excluded.value; - """, - [database_name, resource_name, column_name, key, value], - ) - - def get_internal_database(self): - return self._internal_database + # Return the merged list + m = {} + for item in search_list: + m.update(item) + return m def plugin_config(self, plugin_name, database=None, table=None, fallback=True): - """Return config for plugin, falling back from specified database/table""" - if database is None and table is None: - config = self._plugin_config_top(plugin_name) - else: - config = self._plugin_config_nested(plugin_name, database, table, fallback) - - return resolve_env_secrets(config, os.environ) - - def _plugin_config_top(self, plugin_name): - """Returns any top-level plugin configuration for the specified plugin.""" - return ((self.config or {}).get("plugins") or {}).get(plugin_name) - - def _plugin_config_nested(self, plugin_name, database, table=None, fallback=True): - """Returns any database or table-level plugin configuration for the specified plugin.""" - db_config = ((self.config or {}).get("databases") or {}).get(database) - - # if there's no db-level configuration, then return early, falling back to top-level if needed - if not db_config: - return self._plugin_config_top(plugin_name) if fallback else None - - db_plugin_config = (db_config.get("plugins") or {}).get(plugin_name) - - if table: - table_plugin_config = ( - ((db_config.get("tables") or {}).get(table) or {}).get("plugins") or {} - ).get(plugin_name) - - # fallback to db_config or top-level config, in that order, if needed - if table_plugin_config is None and fallback: - return db_plugin_config or self._plugin_config_top(plugin_name) - - return table_plugin_config - - # fallback to top-level if needed - if db_plugin_config is None and fallback: - self._plugin_config_top(plugin_name) - - return db_plugin_config + "Return config for plugin, falling back from specified database/table" + plugins = self.metadata( + "plugins", database=database, table=table, fallback=fallback + ) + if plugins is None: + return None + plugin_config = plugins.get(plugin_name) + # Resolve any $file and $env keys + if isinstance(plugin_config, dict): + # Create a copy so we don't mutate the version visible at /-/metadata.json + plugin_config_copy = dict(plugin_config) + for key, value in plugin_config_copy.items(): + if isinstance(value, dict): + if list(value.keys()) == ["$env"]: + plugin_config_copy[key] = os.environ.get( + list(value.values())[0] + ) + elif list(value.keys()) == ["$file"]: + plugin_config_copy[key] = open(list(value.values())[0]).read() + return plugin_config_copy + return plugin_config def app_css_hash(self): if not hasattr(self, "_app_css_hash"): - with open(os.path.join(str(app_root), "datasette/static/app.css")) as fp: - self._app_css_hash = hashlib.sha1(fp.read().encode("utf8")).hexdigest()[ - :6 - ] + self._app_css_hash = hashlib.sha1( + open(os.path.join(str(app_root), "datasette/static/app.css")) + .read() + .encode("utf8") + ).hexdigest()[:6] return self._app_css_hash - async def get_canned_queries(self, database_name, actor): - queries = {} - for more_queries in pm.hook.canned_queries( - datasette=self, - database=database_name, - actor=actor, - ): - more_queries = await await_me_maybe(more_queries) - queries.update(more_queries or {}) - # Fix any {"name": "select ..."} queries to be {"name": {"sql": "select ..."}} - for key in queries: - if not isinstance(queries[key], dict): - queries[key] = {"sql": queries[key]} - # Also make sure "name" is available: - queries[key]["name"] = key - return queries + def get_canned_queries(self, database_name): + queries = self.metadata("queries", database=database_name, fallback=False) or {} + names = queries.keys() + return [self.get_canned_query(database_name, name) for name in names] - async def get_canned_query(self, database_name, query_name, actor): - queries = await self.get_canned_queries(database_name, actor) + def get_canned_query(self, database_name, query_name): + queries = self.metadata("queries", database=database_name, fallback=False) or {} query = queries.get(query_name) if query: + if not isinstance(query, dict): + query = {"sql": query} + query["name"] = query_name return query - def _prepare_connection(self, conn, database): + def update_with_inherited_metadata(self, metadata): + # Fills in source/license with defaults, if available + metadata.update( + { + "source": metadata.get("source") or self.metadata("source"), + "source_url": metadata.get("source_url") or self.metadata("source_url"), + "license": metadata.get("license") or self.metadata("license"), + "license_url": metadata.get("license_url") + or self.metadata("license_url"), + "about": metadata.get("about") or self.metadata("about"), + "about_url": metadata.get("about_url") or self.metadata("about_url"), + } + ) + + def prepare_connection(self, conn): conn.row_factory = sqlite3.Row conn.text_factory = lambda x: str(x, "utf-8", "replace") - if self.sqlite_extensions and database != INTERNAL_DB_NAME: + for name, num_args, func in self.sqlite_functions: + conn.create_function(name, num_args, func) + if self.sqlite_extensions: conn.enable_load_extension(True) for extension in self.sqlite_extensions: - # "extension" is either a string path to the extension - # or a 2-item tuple that specifies which entrypoint to load. - if isinstance(extension, tuple): - path, entrypoint = extension - conn.execute("SELECT load_extension(?, ?)", [path, entrypoint]) - else: - conn.execute("SELECT load_extension(?)", [extension]) - if self.setting("cache_size_kb"): - conn.execute(f"PRAGMA cache_size=-{self.setting('cache_size_kb')}") + conn.execute("SELECT load_extension('{}')".format(extension)) + if self.config("cache_size_kb"): + conn.execute("PRAGMA cache_size=-{}".format(self.config("cache_size_kb"))) # pylint: disable=no-member - if database != INTERNAL_DB_NAME: - pm.hook.prepare_connection(conn=conn, database=database, datasette=self) - # If self.crossdb and this is _memory, connect the first SQLITE_LIMIT_ATTACHED databases - if self.crossdb and database == "_memory": - count = 0 - for db_name, db in self.databases.items(): - if count >= SQLITE_LIMIT_ATTACHED or db.is_memory: - continue - sql = 'ATTACH DATABASE "file:{path}?{qs}" AS [{name}];'.format( - path=db.path, - qs="mode=ro" if db.is_mutable else "immutable=1", - name=db_name, - ) - conn.execute(sql) - count += 1 + pm.hook.prepare_connection(conn=conn) - def add_message(self, request, message, type=INFO): - if not hasattr(request, "_messages"): - request._messages = [] - request._messages_should_clear = False - request._messages.append((message, type)) - - def _write_messages_to_response(self, request, response): - if getattr(request, "_messages", None): - # Set those messages - response.set_cookie("ds_messages", self.sign(request._messages, "messages")) - elif getattr(request, "_messages_should_clear", False): - response.set_cookie("ds_messages", "", expires=0, max_age=0) - - def _show_messages(self, request): - if getattr(request, "_messages", None): - request._messages_should_clear = True - messages = request._messages - request._messages = [] - return messages - else: - return [] - - async def _crumb_items(self, request, table=None, database=None): - crumbs = [] - actor = None - if request: - actor = request.actor - # Top-level link - if await self.allowed(action="view-instance", actor=actor): - crumbs.append({"href": self.urls.instance(), "label": "home"}) - # Database link - if database: - if await self.allowed( - action="view-database", - resource=DatabaseResource(database=database), - actor=actor, - ): - crumbs.append( - { - "href": self.urls.database(database), - "label": database, - } - ) - # Table link - if table: - assert database, "table= requires database=" - if await self.allowed( - action="view-table", - resource=TableResource(database=database, table=table), - actor=actor, - ): - crumbs.append( - { - "href": self.urls.table(database, table), - "label": table, - } - ) - return crumbs - - async def actors_from_ids( - self, actor_ids: Iterable[str | int] - ) -> Dict[int | str, Dict]: - result = pm.hook.actors_from_ids(datasette=self, actor_ids=actor_ids) - if result is None: - # Do the default thing - return {actor_id: {"id": actor_id} for actor_id in actor_ids} - result = await await_me_maybe(result) - return result - - async def track_event(self, event: Event): - assert isinstance(event, self.event_classes), "Invalid event type: {}".format( - type(event) - ) - for hook in pm.hook.track_event(datasette=self, event=event): - await await_me_maybe(hook) - - def resource_for_action(self, action: str, parent: str | None, child: str | None): - """ - Create a Resource instance for the given action with parent/child values. - - Looks up the action's resource_class and instantiates it with the - provided parent and child identifiers. - - Args: - action: The action name (e.g., "view-table", "view-query") - parent: The parent resource identifier (e.g., database name) - child: The child resource identifier (e.g., table/query name) - - Returns: - A Resource instance of the appropriate subclass - - Raises: - ValueError: If the action is unknown - """ - from datasette.permissions import Resource - - action_obj = self.actions.get(action) - if not action_obj: - raise ValueError(f"Unknown action: {action}") - - resource_class = action_obj.resource_class - instance = object.__new__(resource_class) - Resource.__init__(instance, parent=parent, child=child) - return instance - - async def check_visibility( - self, - actor: dict, - action: str, - resource: "Resource" | None = None, - ): - """ - Check if actor can see a resource and if it's private. - - Returns (visible, private) tuple: - - visible: bool - can the actor see it? - - private: bool - if visible, can anonymous users NOT see it? - """ - from datasette.permissions import Resource - - # Validate that resource is a Resource object or None - if resource is not None and not isinstance(resource, Resource): - raise TypeError(f"resource must be a Resource subclass instance or None.") - - # Check if actor can see it - if not await self.allowed(action=action, resource=resource, actor=actor): - return False, False - - # Check if anonymous user can see it (for "private" flag) - if not await self.allowed(action=action, resource=resource, actor=None): - # Actor can see it but anonymous cannot - it's private - return True, True - - # Both actor and anonymous can see it - it's public - return True, False - - async def allowed_resources_sql( - self, - *, - action: str, - actor: dict | None = None, - parent: str | None = None, - include_is_private: bool = False, - ) -> ResourcesSQL: - """ - Build SQL query to get all resources the actor can access for the given action. - - Args: - action: The action name (e.g., "view-table") - actor: The actor dict (or None for unauthenticated) - parent: Optional parent filter (e.g., database name) to limit results - include_is_private: If True, include is_private column showing if anonymous cannot access - - Returns a namedtuple of (query: str, params: dict) that can be executed against the internal database. - The query returns rows with (parent, child, reason) columns, plus is_private if requested. - - Example: - query, params = await datasette.allowed_resources_sql( - action="view-table", - actor=actor, - parent="mydb", - include_is_private=True - ) - result = await datasette.get_internal_database().execute(query, params) - """ - from datasette.utils.actions_sql import build_allowed_resources_sql - - action_obj = self.actions.get(action) - if not action_obj: - raise ValueError(f"Unknown action: {action}") - - sql, params = await build_allowed_resources_sql( - self, actor, action, parent=parent, include_is_private=include_is_private - ) - return ResourcesSQL(sql, params) - - async def allowed_resources( - self, - action: str, - actor: dict | None = None, - *, - parent: str | None = None, - include_is_private: bool = False, - include_reasons: bool = False, - limit: int = 100, - next: str | None = None, - ) -> PaginatedResources: - """ - Return paginated resources the actor can access for the given action. - - Uses SQL with keyset pagination to efficiently filter resources. - Returns PaginatedResources with list of Resource instances and pagination metadata. - - Args: - action: The action name (e.g., "view-table") - actor: The actor dict (or None for unauthenticated) - parent: Optional parent filter (e.g., database name) to limit results - include_is_private: If True, adds a .private attribute to each Resource - include_reasons: If True, adds a .reasons attribute with List[str] of permission reasons - limit: Maximum number of results to return (1-1000, default 100) - next: Keyset token from previous page for pagination - - Returns: - PaginatedResources with: - - resources: List of Resource objects for this page - - next: Token for next page (None if no more results) - - Example: - # Get first page of tables - page = await datasette.allowed_resources("view-table", actor, limit=50) - for table in page.resources: - print(f"{table.parent}/{table.child}") - - # Get next page - if page.next: - next_page = await datasette.allowed_resources( - "view-table", actor, limit=50, next=page.next - ) - - # With reasons for debugging - page = await datasette.allowed_resources( - "view-table", actor, include_reasons=True - ) - for table in page.resources: - print(f"{table.child}: {table.reasons}") - - # Iterate through all results with async generator - page = await datasette.allowed_resources("view-table", actor) - async for table in page.all(): - print(table.child) - """ - - action_obj = self.actions.get(action) - if not action_obj: - raise ValueError(f"Unknown action: {action}") - - # Validate and cap limit - limit = min(max(1, limit), 1000) - - # Get base SQL query - query, params = await self.allowed_resources_sql( - action=action, - actor=actor, - parent=parent, - include_is_private=include_is_private, - ) - - # Add keyset pagination WHERE clause if next token provided - if next: - try: - components = urlsafe_components(next) - if len(components) >= 2: - last_parent, last_child = components[0], components[1] - # Keyset condition: (parent > last) OR (parent = last AND child > last) - keyset_where = """ - (parent > :keyset_parent OR - (parent = :keyset_parent AND child > :keyset_child)) - """ - # Wrap original query and add keyset filter - query = f"SELECT * FROM ({query}) WHERE {keyset_where}" - params["keyset_parent"] = last_parent - params["keyset_child"] = last_child - except (ValueError, KeyError): - # Invalid token - ignore and start from beginning - pass - - # Add LIMIT (fetch limit+1 to detect if there are more results) - # Note: query from allowed_resources_sql() already includes ORDER BY parent, child - query = f"{query} LIMIT :limit" - params["limit"] = limit + 1 - - # Execute query - result = await self.get_internal_database().execute(query, params) - rows = list(result.rows) - - # Check if truncated (got more than limit rows) - truncated = len(rows) > limit - if truncated: - rows = rows[:limit] # Remove the extra row - - # Build Resource objects with optional attributes - resources = [] - for row in rows: - # row[0]=parent, row[1]=child, row[2]=reason, row[3]=is_private (if requested) - resource = self.resource_for_action(action, parent=row[0], child=row[1]) - - # Add reasons if requested - if include_reasons: - reason_json = row[2] - try: - reasons_array = ( - json.loads(reason_json) if isinstance(reason_json, str) else [] - ) - resource.reasons = [r for r in reasons_array if r is not None] - except (json.JSONDecodeError, TypeError): - resource.reasons = [reason_json] if reason_json else [] - - # Add private flag if requested - if include_is_private: - resource.private = bool(row[3]) - - resources.append(resource) - - # Generate next token if there are more results - next_token = None - if truncated and resources: - last_resource = resources[-1] - # Use tilde-encoding like table pagination - next_token = "{},{}".format( - tilde_encode(str(last_resource.parent)), - tilde_encode(str(last_resource.child)), - ) - - return PaginatedResources( - resources=resources, - next=next_token, - _datasette=self, - _action=action, - _actor=actor, - _parent=parent, - _include_is_private=include_is_private, - _include_reasons=include_reasons, - _limit=limit, - ) - - async def allowed( - self, - *, - action: str, - resource: "Resource" = None, - actor: dict | None = None, - ) -> bool: - """ - Check if actor can perform action on specific resource. - - Uses SQL to check permission for a single resource without fetching all resources. - This is efficient - it does NOT call allowed_resources() and check membership. - - For global actions, resource should be None (or omitted). - - Example: - from datasette.resources import TableResource - can_view = await datasette.allowed( - action="view-table", - resource=TableResource(database="analytics", table="users"), - actor=actor - ) - - # For global actions, resource can be omitted: - can_debug = await datasette.allowed(action="permissions-debug", actor=actor) - """ - from datasette.utils.actions_sql import check_permission_for_resource - - # For global actions, resource remains None - - # Check if this action has also_requires - if so, check that action first - action_obj = self.actions.get(action) - if action_obj and action_obj.also_requires: - # Must have the required action first - if not await self.allowed( - action=action_obj.also_requires, - resource=resource, - actor=actor, - ): - return False - - # For global actions, resource is None - parent = resource.parent if resource else None - child = resource.child if resource else None - - result = await check_permission_for_resource( - datasette=self, - actor=actor, - action=action, - parent=parent, - child=child, - ) - - # Log the permission check for debugging - self._permission_checks.append( - PermissionCheck( - when=datetime.datetime.now(datetime.timezone.utc).isoformat(), - actor=actor, - action=action, - parent=parent, - child=child, - result=result, - ) - ) - - return result - - async def ensure_permission( - self, - *, - action: str, - resource: "Resource" = None, - actor: dict | None = None, - ): - """ - Check if actor can perform action on resource, raising Forbidden if not. - - This is a convenience wrapper around allowed() that raises Forbidden - instead of returning False. Use this when you want to enforce a permission - check and halt execution if it fails. - - Example: - from datasette.resources import TableResource - - # Will raise Forbidden if actor cannot view the table - await datasette.ensure_permission( - action="view-table", - resource=TableResource(database="analytics", table="users"), - actor=request.actor - ) - - # For instance-level actions, resource can be omitted: - await datasette.ensure_permission( - action="permissions-debug", - actor=request.actor - ) - """ - if not await self.allowed(action=action, resource=resource, actor=actor): - raise Forbidden(action) - - async def execute( - self, - db_name, - sql, - params=None, - truncate=False, - custom_time_limit=None, - page_size=None, - log_sql_errors=True, - ): - return await self.databases[db_name].execute( - sql, - params=params, - truncate=truncate, - custom_time_limit=custom_time_limit, - page_size=page_size, - log_sql_errors=log_sql_errors, - ) - - async def expand_foreign_keys(self, actor, database, table, column, values): - """Returns dict mapping (column, value) -> label""" + async def expand_foreign_keys(self, database, table, column, values): + "Returns dict mapping (column, value) -> label" labeled_fks = {} db = self.databases[database] foreign_keys = await db.foreign_keys_for_table(table) @@ -1480,19 +359,7 @@ class Datasette: ][0] except IndexError: return {} - # Ensure user has permission to view the referenced table - from datasette.resources import TableResource - - other_table = fk["other_table"] - other_column = fk["other_column"] - visible, _ = await self.check_visibility( - actor, - action="view-table", - resource=TableResource(database=database, table=other_table), - ) - if not visible: - return {} - label_column = await db.label_column_for_table(other_table) + label_column = await db.label_column_for_table(fk["other_table"]) if not label_column: return {(fk["column"], value): str(value) for value in values} labeled_fks = {} @@ -1501,9 +368,9 @@ class Datasette: from {other_table} where {other_column} in ({placeholders}) """.format( - other_column=escape_sqlite(other_column), + other_column=escape_sqlite(fk["other_column"]), label_column=escape_sqlite(label_column), - other_table=escape_sqlite(other_table), + other_table=escape_sqlite(fk["other_table"]), placeholders=", ".join(["?"] * len(set(values))), ) try: @@ -1517,30 +384,35 @@ class Datasette: def absolute_url(self, request, path): url = urllib.parse.urljoin(request.url, path) - if url.startswith("http://") and self.setting("force_https_urls"): + if url.startswith("http://") and self.config("force_https_urls"): url = "https://" + url[len("http://") :] return url - def _connected_databases(self): + def register_custom_units(self): + "Register any custom units defined in the metadata.json with Pint" + for unit in self.metadata("custom_units") or []: + ureg.define(unit) + + def connected_databases(self): return [ { "name": d.name, - "route": d.route, "path": d.path, "size": d.size, "is_mutable": d.is_mutable, "is_memory": d.is_memory, "hash": d.hash, } - for name, d in self.databases.items() + for d in sorted(self.databases.values(), key=lambda d: d.name) ] - def _versions(self): + def versions(self): conn = sqlite3.connect(":memory:") - self._prepare_connection(conn, "_memory") + self.prepare_connection(conn) sqlite_version = conn.execute("select sqlite_version()").fetchone()[0] - sqlite_extensions = {"json1": detect_json1(conn)} + sqlite_extensions = {} for extension, testsql, hasversion in ( + ("json1", "SELECT json('{}')", False), ("spatialite", "SELECT spatialite_version()", True), ): try: @@ -1551,17 +423,6 @@ class Datasette: sqlite_extensions[extension] = None except Exception: pass - # More details on SpatiaLite - if "spatialite" in sqlite_extensions: - spatialite_details = {} - for fn in SPATIALITE_FUNCTIONS: - try: - result = conn.execute("select {}()".format(fn)) - spatialite_details[fn] = result.fetchone()[0] - except Exception as e: - spatialite_details[fn] = {"error": str(e)} - sqlite_extensions["spatialite"] = spatialite_details - # Figure out supported FTS versions fts_versions = [] for fts in ("FTS5", "FTS4", "FTS3"): @@ -1575,23 +436,13 @@ class Datasette: datasette_version = {"version": __version__} if self.version_note: datasette_version["note"] = self.version_note - - try: - # Optional import to avoid breaking Pyodide - # https://github.com/simonw/datasette/issues/1733#issuecomment-1115268245 - import uvicorn - - uvicorn_version = uvicorn.__version__ - except ImportError: - uvicorn_version = None - info = { + return { "python": { "version": ".".join(map(str, sys.version_info[:3])), "full": sys.version, }, "datasette": datasette_version, "asgi": "3.0", - "uvicorn": uvicorn_version, "sqlite": { "version": sqlite_version, "fts_versions": fts_versions, @@ -1601,278 +452,155 @@ class Datasette: ], }, } - if using_pysqlite3: - for package in ("pysqlite3", "pysqlite3-binary"): - try: - info["pysqlite3"] = importlib.metadata.version(package) - break - except importlib.metadata.PackageNotFoundError: - pass - return info - def _plugins(self, request=None, all=False): - ps = list(get_plugins()) - should_show_all = False - if request is not None: - should_show_all = request.args.get("all") - else: - should_show_all = all - if not should_show_all: + def plugins(self, show_all=False): + ps = list(get_plugins(pm)) + if not show_all: ps = [p for p in ps if p["name"] not in DEFAULT_PLUGINS] - ps.sort(key=lambda p: p["name"]) return [ { "name": p["name"], "static": p["static_path"] is not None, "templates": p["templates_path"] is not None, "version": p.get("version"), - "hooks": list(sorted(set(p["hooks"]))), } for p in ps ] - def _threads(self): - if self.setting("num_sql_threads") == 0: - return {"num_threads": 0, "threads": []} - threads = list(threading.enumerate()) - d = { - "num_threads": len(threads), - "threads": [ - {"name": t.name, "ident": t.ident, "daemon": t.daemon} for t in threads - ], - } - tasks = asyncio.all_tasks() - d.update( - { - "num_tasks": len(tasks), - "tasks": [_cleaner_task_str(t) for t in tasks], - } - ) - return d - - def _actor(self, request): - return {"actor": request.actor} - - def _actions(self): - return [ - { - "name": action.name, - "abbr": action.abbr, - "description": action.description, - "takes_parent": action.takes_parent, - "takes_child": action.takes_child, - "resource_class": ( - action.resource_class.__name__ if action.resource_class else None - ), - "also_requires": action.also_requires, - } - for action in sorted(self.actions.values(), key=lambda a: a.name) - ] - - async def table_config(self, database: str, table: str) -> dict: - """Return dictionary of configuration for specified table""" + def table_metadata(self, database, table): + "Fetch table-specific metadata." return ( - (self.config or {}) - .get("databases", {}) + (self.metadata("databases") or {}) .get(database, {}) .get("tables", {}) .get(table, {}) ) - def _register_renderers(self): - """Register output renderers which output data in custom formats.""" + async def execute_against_connection_in_thread(self, db_name, fn): + def in_thread(): + conn = getattr(connections, db_name, None) + if not conn: + conn = self.databases[db_name].connect() + self.prepare_connection(conn) + setattr(connections, db_name, conn) + return fn(conn) + + return await asyncio.get_event_loop().run_in_executor(self.executor, in_thread) + + async def execute( + self, + db_name, + sql, + params=None, + truncate=False, + custom_time_limit=None, + page_size=None, + log_sql_errors=True, + ): + """Executes sql against db_name in a thread""" + page_size = page_size or self.page_size + + def sql_operation_in_thread(conn): + time_limit_ms = self.sql_time_limit_ms + if custom_time_limit and custom_time_limit < time_limit_ms: + time_limit_ms = custom_time_limit + + with sqlite_timelimit(conn, time_limit_ms): + try: + cursor = conn.cursor() + cursor.execute(sql, params or {}) + max_returned_rows = self.max_returned_rows + if max_returned_rows == page_size: + max_returned_rows += 1 + if max_returned_rows and truncate: + rows = cursor.fetchmany(max_returned_rows + 1) + truncated = len(rows) > max_returned_rows + rows = rows[:max_returned_rows] + else: + rows = cursor.fetchall() + truncated = False + except sqlite3.OperationalError as e: + if e.args == ("interrupted",): + raise QueryInterrupted(e, sql, params) + if log_sql_errors: + print( + "ERROR: conn={}, sql = {}, params = {}: {}".format( + conn, repr(sql), params, e + ) + ) + raise + + if truncate: + return Results(rows, truncated, cursor.description) + + else: + return Results(rows, False, cursor.description) + + with trace("sql", database=db_name, sql=sql.strip(), params=params): + results = await self.execute_against_connection_in_thread( + db_name, sql_operation_in_thread + ) + return results + + def register_renderers(self): + """ Register output renderers which output data in custom formats. """ # Built-in renderers - self.renderers["json"] = (json_renderer, lambda: True) + self.renderers["json"] = json_renderer # Hooks hook_renderers = [] # pylint: disable=no-member for hook in pm.hook.register_output_renderer(datasette=self): - if type(hook) is list: + if type(hook) == list: hook_renderers += hook else: hook_renderers.append(hook) for renderer in hook_renderers: - self.renderers[renderer["extension"]] = ( - # It used to be called "callback" - remove this in Datasette 1.0 - renderer.get("render") or renderer["callback"], - renderer.get("can_render") or (lambda: True), - ) + self.renderers[renderer["extension"]] = renderer["callback"] - async def render_template( - self, - templates: List[str] | str | Template, - context: Dict[str, Any] | Context | None = None, - request: Request | None = None, - view_name: str | None = None, - ): - if not self._startup_invoked: - raise Exception("render_template() called before await ds.invoke_startup()") - context = context or {} - if isinstance(templates, Template): - template = templates - else: - if isinstance(templates, str): - templates = [templates] - template = self.get_jinja_environment(request).select_template(templates) - if dataclasses.is_dataclass(context): - context = dataclasses.asdict(context) - body_scripts = [] - # pylint: disable=no-member - for extra_script in pm.hook.extra_body_script( - template=template.name, - database=context.get("database"), - table=context.get("table"), - columns=context.get("columns"), - view_name=view_name, - request=request, - datasette=self, - ): - extra_script = await await_me_maybe(extra_script) - if isinstance(extra_script, dict): - script = extra_script["script"] - module = bool(extra_script.get("module")) - else: - script = extra_script - module = False - body_scripts.append({"script": Markup(script), "module": module}) - - extra_template_vars = {} - # pylint: disable=no-member - for extra_vars in pm.hook.extra_template_vars( - template=template.name, - database=context.get("database"), - table=context.get("table"), - columns=context.get("columns"), - view_name=view_name, - request=request, - datasette=self, - ): - extra_vars = await await_me_maybe(extra_vars) - assert isinstance(extra_vars, dict), "extra_vars is of type {}".format( - type(extra_vars) - ) - extra_template_vars.update(extra_vars) - - async def menu_links(): - links = [] - for hook in pm.hook.menu_links( - datasette=self, - actor=request.actor if request else None, - request=request or None, - ): - extra_links = await await_me_maybe(hook) - if extra_links: - links.extend(extra_links) - return links - - template_context = { - **context, - **{ - "request": request, - "crumb_items": self._crumb_items, - "urls": self.urls, - "actor": request.actor if request else None, - "menu_links": menu_links, - "display_actor": display_actor, - "show_logout": request is not None - and "ds_actor" in request.cookies - and request.actor, - "app_css_hash": self.app_css_hash(), - "zip": zip, - "body_scripts": body_scripts, - "format_bytes": format_bytes, - "show_messages": lambda: self._show_messages(request), - "extra_css_urls": await self._asset_urls( - "extra_css_urls", template, context, request, view_name - ), - "extra_js_urls": await self._asset_urls( - "extra_js_urls", template, context, request, view_name - ), - "base_url": self.setting("base_url"), - "csrftoken": request.scope["csrftoken"] if request else lambda: "", - "datasette_version": __version__, - }, - **extra_template_vars, - } - if request and request.args.get("_context") and self.setting("template_debug"): - return "
{}
".format( - escape(json.dumps(template_context, default=repr, indent=4)) - ) - - return await template.render_async(template_context) - - def set_actor_cookie( - self, response: Response, actor: dict, expire_after: int | None = None - ): - data = {"a": actor} - if expire_after: - expires_at = int(time.time()) + (24 * 60 * 60) - data["e"] = baseconv.base62.encode(expires_at) - response.set_cookie("ds_actor", self.sign(data, "actor")) - - def delete_actor_cookie(self, response: Response): - response.set_cookie("ds_actor", "", expires=0, max_age=0) - - async def _asset_urls(self, key, template, context, request, view_name): - # Flatten list-of-lists from plugins: - seen_urls = set() - collected = [] - for hook in getattr(pm.hook, key)( - template=template.name, - database=context.get("database"), - table=context.get("table"), - columns=context.get("columns"), - view_name=view_name, - request=request, - datasette=self, - ): - hook = await await_me_maybe(hook) - collected.extend(hook) - collected.extend((self.config or {}).get(key) or []) - output = [] - for url_or_dict in collected: - if isinstance(url_or_dict, dict): - url = url_or_dict["url"] - sri = url_or_dict.get("sri") - module = bool(url_or_dict.get("module")) - else: - url = url_or_dict - sri = None - module = False - if url in seen_urls: - continue - seen_urls.add(url) - if url.startswith("/"): - # Take base_url into account: - url = self.urls.path(url) - script = {"url": url} - if sri: - script["sri"] = sri - if module: - script["module"] = True - output.append(script) - return output - - def _config(self): - return redact_keys( - self.config, ("secret", "key", "password", "token", "hash", "dsn") + def app(self): + "Returns an ASGI app function that serves the whole of Datasette" + default_templates = str(app_root / "datasette" / "templates") + template_paths = [] + if self.template_dir: + template_paths.append(self.template_dir) + template_paths.extend( + [ + plugin["templates_path"] + for plugin in get_plugins(pm) + if plugin["templates_path"] + ] ) + template_paths.append(default_templates) + template_loader = ChoiceLoader( + [ + FileSystemLoader(template_paths), + # Support {% extends "default:table.html" %}: + PrefixLoader( + {"default": FileSystemLoader(default_templates)}, delimiter=":" + ), + ] + ) + self.jinja_env = Environment(loader=template_loader, autoescape=True) + self.jinja_env.filters["escape_css_string"] = escape_css_string + self.jinja_env.filters["quote_plus"] = lambda u: urllib.parse.quote_plus(u) + self.jinja_env.filters["escape_sqlite"] = escape_sqlite + self.jinja_env.filters["to_css_class"] = to_css_class + # pylint: disable=no-member + pm.hook.prepare_jinja2_environment(env=self.jinja_env) + + self.register_renderers() - def _routes(self): routes = [] - for routes_to_add in pm.hook.register_routes(datasette=self): - for regex, view_fn in routes_to_add: - routes.append((regex, wrap_view(view_fn, self))) - def add_route(view, regex): routes.append((regex, view)) - add_route(IndexView.as_view(self), r"/(\.(?Pjsono?))?$") - add_route(IndexView.as_view(self), r"/-/(\.(?Pjsono?))?$") - add_route(permanent_redirect("/-/"), r"/-$") + # Generate a regex snippet to match all registered renderer file extensions + renderer_regex = "|".join(r"\." + key for key in self.renderers.keys()) + + add_route(IndexView.as_asgi(self), r"/(?P(\.jsono?)?$)") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires add_route(favicon, "/favicon.ico") @@ -1883,657 +611,110 @@ class Datasette: add_route(asgi_static(dirname), r"/" + path + "/(?P.*)$") # Mount any plugin static/ directories - for plugin in get_plugins(): + for plugin in get_plugins(pm): if plugin["static_path"]: - add_route( - asgi_static(plugin["static_path"]), - f"/-/static-plugins/{plugin['name']}/(?P.*)$", - ) - # Support underscores in name in addition to hyphens, see https://github.com/simonw/datasette/issues/611 - add_route( - asgi_static(plugin["static_path"]), - "/-/static-plugins/{}/(?P.*)$".format( - plugin["name"].replace("-", "_") - ), - ) + modpath = "/-/static-plugins/{}/(?P.*)$".format(plugin["name"]) + add_route(asgi_static(plugin["static_path"]), modpath) add_route( - permanent_redirect( - "/_memory", forward_query_string=True, forward_rest=True - ), - r"/:memory:(?P.*)$", + JsonDataView.as_asgi(self, "metadata.json", lambda: self._metadata), + r"/-/metadata(?P(\.json)?)$", ) add_route( - JsonDataView.as_view(self, "versions.json", self._versions), - r"/-/versions(\.(?Pjson))?$", + JsonDataView.as_asgi(self, "versions.json", self.versions), + r"/-/versions(?P(\.json)?)$", ) add_route( - JsonDataView.as_view( - self, "plugins.json", self._plugins, needs_request=True - ), - r"/-/plugins(\.(?Pjson))?$", + JsonDataView.as_asgi(self, "plugins.json", self.plugins), + r"/-/plugins(?P(\.json)?)$", ) add_route( - JsonDataView.as_view(self, "settings.json", lambda: self._settings), - r"/-/settings(\.(?Pjson))?$", + JsonDataView.as_asgi(self, "config.json", lambda: self._config), + r"/-/config(?P(\.json)?)$", ) add_route( - JsonDataView.as_view(self, "config.json", lambda: self._config()), - r"/-/config(\.(?Pjson))?$", + JsonDataView.as_asgi(self, "databases.json", self.connected_databases), + r"/-/databases(?P(\.json)?)$", ) add_route( - JsonDataView.as_view(self, "threads.json", self._threads), - r"/-/threads(\.(?Pjson))?$", + DatabaseDownload.as_asgi(self), r"/(?P[^/]+?)(?P\.db)$" ) add_route( - JsonDataView.as_view(self, "databases.json", self._connected_databases), - r"/-/databases(\.(?Pjson))?$", + DatabaseView.as_asgi(self), + r"/(?P[^/]+?)(?P" + + renderer_regex + + r"|.jsono|\.csv)?$", ) add_route( - JsonDataView.as_view( - self, "actor.json", self._actor, needs_request=True, permission=None - ), - r"/-/actor(\.(?Pjson))?$", + TableView.as_asgi(self), + r"/(?P[^/]+)/(?P[^/]+?$)", ) add_route( - JsonDataView.as_view( - self, - "actions.json", - self._actions, - template="debug_actions.html", - permission="permissions-debug", - ), - r"/-/actions(\.(?Pjson))?$", + RowView.as_asgi(self), + r"/(?P[^/]+)/(?P[^/]+?)/(?P[^/]+?)(?P" + + renderer_regex + + r")?$", ) - add_route( - AuthTokenView.as_view(self), - r"/-/auth-token$", - ) - add_route( - CreateTokenView.as_view(self), - r"/-/create-token$", - ) - add_route( - ApiExplorerView.as_view(self), - r"/-/api$", - ) - add_route( - TablesView.as_view(self), - r"/-/tables(\.(?Pjson))?$", - ) - add_route( - InstanceSchemaView.as_view(self), - r"/-/schema(\.(?Pjson|md))?$", - ) - add_route( - LogoutView.as_view(self), - r"/-/logout$", - ) - add_route( - PermissionsDebugView.as_view(self), - r"/-/permissions$", - ) - add_route( - AllowedResourcesView.as_view(self), - r"/-/allowed(\.(?Pjson))?$", - ) - add_route( - PermissionRulesView.as_view(self), - r"/-/rules(\.(?Pjson))?$", - ) - add_route( - PermissionCheckView.as_view(self), - r"/-/check(\.(?Pjson))?$", - ) - add_route( - MessagesDebugView.as_view(self), - r"/-/messages$", - ) - add_route( - AllowDebugView.as_view(self), - r"/-/allow-debug$", - ) - add_route( - wrap_view(PatternPortfolioView, self), - r"/-/patterns$", - ) - add_route( - wrap_view(database_download, self), - r"/(?P[^\/\.]+)\.db$", - ) - add_route( - wrap_view(DatabaseView, self), - r"/(?P[^\/\.]+)(\.(?P\w+))?$", - ) - add_route(TableCreateView.as_view(self), r"/(?P[^\/\.]+)/-/create$") - add_route( - DatabaseSchemaView.as_view(self), - r"/(?P[^\/\.]+)/-/schema(\.(?Pjson|md))?$", - ) - add_route( - wrap_view(QueryView, self), - r"/(?P[^\/\.]+)/-/query(\.(?P\w+))?$", - ) - add_route( - wrap_view(table_view, self), - r"/(?P[^\/\.]+)/(?P
[^\/\.]+)(\.(?P\w+))?$", - ) - add_route( - RowView.as_view(self), - r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)(\.(?P\w+))?$", - ) - add_route( - TableInsertView.as_view(self), - r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/insert$", - ) - add_route( - TableUpsertView.as_view(self), - r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/upsert$", - ) - add_route( - TableDropView.as_view(self), - r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/drop$", - ) - add_route( - TableSchemaView.as_view(self), - r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/schema(\.(?Pjson|md))?$", - ) - add_route( - RowDeleteView.as_view(self), - r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/delete$", - ) - add_route( - RowUpdateView.as_view(self), - r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/update$", - ) - return [ - # Compile any strings to regular expressions - ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) - for pattern, view in routes - ] - - async def resolve_database(self, request): - database_route = tilde_decode(request.url_vars["database"]) - try: - return self.get_database(route=database_route) - except KeyError: - raise DatabaseNotFound(database_route) - - async def resolve_table(self, request): - db = await self.resolve_database(request) - table_name = tilde_decode(request.url_vars["table"]) - # Table must exist - is_view = False - table_exists = await db.table_exists(table_name) - if not table_exists: - is_view = await db.view_exists(table_name) - if not (table_exists or is_view): - raise TableNotFound(db.name, table_name) - return ResolvedTable(db, table_name, is_view) - - async def resolve_row(self, request): - db, table_name, _ = await self.resolve_table(request) - pk_values = urlsafe_components(request.url_vars["pks"]) - sql, params, pks = await row_sql_params_pks(db, table_name, pk_values) - results = await db.execute(sql, params, truncate=True) - row = results.first() - if row is None: - raise RowNotFound(db.name, table_name, pk_values) - return ResolvedRow(db, table_name, sql, params, pks, pk_values, results.first()) - - def app(self): - """Returns an ASGI app function that serves the whole of Datasette""" - routes = self._routes() + self.register_custom_units() async def setup_db(): # First time server starts up, calculate table counts for immutable databases - for database in self.databases.values(): + for dbname, database in self.databases.items(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) - async def custom_csrf_error(scope, send, message_id): - await asgi_send( - send, - content=await self.render_template( - "csrf_error.html", - {"message_id": message_id, "message_name": Errors(message_id).name}, - ), - status=403, - content_type="text/html; charset=utf-8", - ) - - asgi = asgi_csrf.asgi_csrf( - DatasetteRouter(self, routes), - signing_secret=self._secret, - cookie_name="ds_csrftoken", - skip_if_scope=lambda scope: any( - pm.hook.skip_csrf(datasette=self, scope=scope) - ), - send_csrf_failed=custom_csrf_error, + asgi = AsgiLifespan( + AsgiTracer(DatasetteRouter(self, routes)), on_startup=setup_db ) - if self.setting("trace_debug"): - asgi = AsgiTracer(asgi) - asgi = AsgiLifespan(asgi) - asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) return asgi -class DatasetteRouter: +class DatasetteRouter(AsgiRouter): def __init__(self, datasette, routes): self.ds = datasette - self.routes = routes or [] + super().__init__(routes) - async def __call__(self, scope, receive, send): - # Because we care about "foo/bar" v.s. "foo%2Fbar" we decode raw_path ourselves - path = scope["path"] - raw_path = scope.get("raw_path") - if raw_path: - path = raw_path.decode("ascii") - path = path.partition("?")[0] - return await self.route_path(scope, receive, send, path) - - async def route_path(self, scope, receive, send, path): - # Strip off base_url if present before routing - base_url = self.ds.setting("base_url") - if base_url != "/" and path.startswith(base_url): - path = "/" + path[len(base_url) :] - scope = dict(scope, route_path=path) - request = Request(scope, receive) - # Populate request_messages if ds_messages cookie is present - try: - request._messages = self.ds.unsign( - request.cookies.get("ds_messages", ""), "messages" - ) - except BadSignature: - pass - - scope_modifications = {} - # Apply force_https_urls, if set - if ( - self.ds.setting("force_https_urls") - and scope["type"] == "http" - and scope.get("scheme") != "https" - ): - scope_modifications["scheme"] = "https" - # Handle authentication - default_actor = scope.get("actor") or None - actor = None - for actor in pm.hook.actor_from_request(datasette=self.ds, request=request): - actor = await await_me_maybe(actor) - if actor: - break - scope_modifications["actor"] = actor or default_actor - scope = dict(scope, **scope_modifications) - - match, view = resolve_routes(self.routes, path) - - if match is None: - return await self.handle_404(request, send) - - new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) - request.scope = new_scope - try: - response = await view(request, send) - if response: - self.ds._write_messages_to_response(request, response) - await response.asgi_send(send) - return - except NotFound as exception: - return await self.handle_404(request, send, exception) - except Forbidden as exception: - # Try the forbidden() plugin hook - for custom_response in pm.hook.forbidden( - datasette=self.ds, request=request, message=exception.args[0] - ): - custom_response = await await_me_maybe(custom_response) - assert ( - custom_response - ), "Default forbidden() hook should have been called" - return await custom_response.asgi_send(send) - except Exception as exception: - return await self.handle_exception(request, send, exception) - - async def handle_404(self, request, send, exception=None): - # If path contains % encoding, redirect to tilde encoding - if "%" in request.path: - # Try the same path but with "%" replaced by "~" - # and "~" replaced with "~7E" - # and "." replaced with "~2E" - new_path = ( - request.path.replace("~", "~7E").replace("%", "~").replace(".", "~2E") - ) - if request.query_string: - new_path += "?{}".format(request.query_string) - await asgi_send_redirect(send, new_path) - return + async def handle_404(self, scope, receive, send): # If URL has a trailing slash, redirect to URL without it - path = request.scope.get( - "raw_path", request.scope["path"].encode("utf8") - ).partition(b"?")[0] - context = {} + path = scope.get("raw_path", scope["path"].encode("utf8")) if path.endswith(b"/"): path = path.rstrip(b"/") - if request.scope["query_string"]: - path += b"?" + request.scope["query_string"] + if scope["query_string"]: + path += b"?" + scope["query_string"] await asgi_send_redirect(send, path.decode("latin1")) else: - # Is there a pages/* template matching this path? - route_path = request.scope.get("route_path", request.scope["path"]) - # Jinja requires template names to use "/" even on Windows - template_name = "pages" + route_path + ".html" - # Build a list of pages/blah/{name}.html matching expressions - environment = self.ds.get_jinja_environment(request) - pattern_templates = [ - filepath - for filepath in environment.list_templates() - if "{" in filepath and filepath.startswith("pages/") - ] - page_routes = [ - (route_pattern_from_filepath(filepath[len("pages/") :]), filepath) - for filepath in pattern_templates - ] - try: - template = environment.select_template([template_name]) - except TemplateNotFound: - template = None - if template is None: - # Try for a pages/blah/{name}.html template match - for regex, wildcard_template in page_routes: - match = regex.match(route_path) - if match is not None: - context.update(match.groupdict()) - template = wildcard_template - break + await super().handle_404(scope, receive, send) - if template: - headers = {} - status = [200] - - def custom_header(name, value): - headers[name] = value - return "" - - def custom_status(code): - status[0] = code - return "" - - def custom_redirect(location, code=302): - status[0] = code - headers["Location"] = location - return "" - - def raise_404(message=""): - raise NotFoundExplicit(message) - - context.update( - { - "custom_header": custom_header, - "custom_status": custom_status, - "custom_redirect": custom_redirect, - "raise_404": raise_404, - } - ) - try: - body = await self.ds.render_template( - template, - context, - request=request, - view_name="page", - ) - except NotFoundExplicit as e: - await self.handle_exception(request, send, e) - return - # Pull content-type out into separate parameter - content_type = "text/html; charset=utf-8" - matches = [k for k in headers if k.lower() == "content-type"] - if matches: - content_type = headers[matches[0]] - await asgi_send( - send, - body, - status=status[0], - headers=headers, - content_type=content_type, - ) - else: - await self.handle_exception(request, send, exception or NotFound("404")) - - async def handle_exception(self, request, send, exception): - responses = [] - for hook in pm.hook.handle_exception( - datasette=self.ds, - request=request, - exception=exception, - ): - response = await await_me_maybe(hook) - if response is not None: - responses.append(response) - - assert responses, "Default exception handler should have returned something" - # Even if there are multiple responses use just the first one - response = responses[0] - await response.asgi_send(send) - - -_cleaner_task_str_re = re.compile(r"\S*site-packages/") - - -def _cleaner_task_str(task): - s = str(task) - # This has something like the following in it: - # running at /Users/simonw/Dropbox/Development/datasette/venv-3.7.5/lib/python3.7/site-packages/uvicorn/main.py:361> - # Clean up everything up to and including site-packages - return _cleaner_task_str_re.sub("", s) - - -def wrap_view(view_fn_or_class, datasette): - is_function = isinstance(view_fn_or_class, types.FunctionType) - if is_function: - return wrap_view_function(view_fn_or_class, datasette) - else: - if not isinstance(view_fn_or_class, type): - raise ValueError("view_fn_or_class must be a function or a class") - return wrap_view_class(view_fn_or_class, datasette) - - -def wrap_view_class(view_class, datasette): - async def async_view_for_class(request, send): - instance = view_class() - if inspect.iscoroutinefunction(instance.__call__): - return await async_call_with_supported_arguments( - instance.__call__, - scope=request.scope, - receive=request.receive, - send=send, - request=request, - datasette=datasette, - ) + async def handle_500(self, scope, receive, send, exception): + title = None + if isinstance(exception, NotFound): + status = 404 + info = {} + message = exception.args[0] + elif isinstance(exception, DatasetteError): + status = exception.status + info = exception.error_dict + message = exception.message + if exception.messagge_is_html: + message = Markup(message) + title = exception.title else: - return call_with_supported_arguments( - instance.__call__, - scope=request.scope, - receive=request.receive, - send=send, - request=request, - datasette=datasette, - ) - - async_view_for_class.view_class = view_class - return async_view_for_class - - -def wrap_view_function(view_fn, datasette): - @functools.wraps(view_fn) - async def async_view_fn(request, send): - if inspect.iscoroutinefunction(view_fn): - response = await async_call_with_supported_arguments( - view_fn, - scope=request.scope, - receive=request.receive, - send=send, - request=request, - datasette=datasette, - ) + status = 500 + info = {} + message = str(exception) + traceback.print_exc() + templates = ["500.html"] + if status != 500: + templates = ["{}.html".format(status)] + templates + info.update({"ok": False, "error": message, "status": status, "title": title}) + headers = {} + if self.ds.cors: + headers["Access-Control-Allow-Origin"] = "*" + if scope["path"].split("?")[0].endswith(".json"): + await asgi_send_json(send, info, status=status, headers=headers) else: - response = call_with_supported_arguments( - view_fn, - scope=request.scope, - receive=request.receive, - send=send, - request=request, - datasette=datasette, + template = self.ds.jinja_env.select_template(templates) + await asgi_send_html( + send, template.render(info), status=status, headers=headers ) - if response is not None: - return response - - return async_view_fn - - -def permanent_redirect(path, forward_query_string=False, forward_rest=False): - return wrap_view( - lambda request, send: Response.redirect( - path - + (request.url_vars["rest"] if forward_rest else "") - + ( - ("?" + request.query_string) - if forward_query_string and request.query_string - else "" - ), - status=301, - ), - datasette=None, - ) - - -_curly_re = re.compile(r"({.*?})") - - -def route_pattern_from_filepath(filepath): - # Drop the ".html" suffix - if filepath.endswith(".html"): - filepath = filepath[: -len(".html")] - re_bits = ["/"] - for bit in _curly_re.split(filepath): - if _curly_re.match(bit): - re_bits.append(f"(?P<{bit[1:-1]}>[^/]*)") - else: - re_bits.append(re.escape(bit)) - return re.compile("^" + "".join(re_bits) + "$") - - -class NotFoundExplicit(NotFound): - pass - - -class DatasetteClient: - """Internal HTTP client for making requests to a Datasette instance. - - Used for testing and for internal operations that need to make HTTP requests - to the Datasette app without going through an actual HTTP server. - """ - - def __init__(self, ds): - self.ds = ds - - @property - def app(self): - return self.ds.app() - - def actor_cookie(self, actor): - # Utility method, mainly for tests - return self.ds.sign({"a": actor}, "actor") - - def _fix(self, path, avoid_path_rewrites=False): - if not isinstance(path, PrefixedUrlString) and not avoid_path_rewrites: - path = self.ds.urls.path(path) - if path.startswith("/"): - path = f"http://localhost{path}" - return path - - async def _request(self, method, path, skip_permission_checks=False, **kwargs): - from datasette.permissions import SkipPermissions - - with _DatasetteClientContext(): - if skip_permission_checks: - with SkipPermissions(): - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await getattr(client, method)(self._fix(path), **kwargs) - else: - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await getattr(client, method)(self._fix(path), **kwargs) - - async def get(self, path, skip_permission_checks=False, **kwargs): - return await self._request( - "get", path, skip_permission_checks=skip_permission_checks, **kwargs - ) - - async def options(self, path, skip_permission_checks=False, **kwargs): - return await self._request( - "options", path, skip_permission_checks=skip_permission_checks, **kwargs - ) - - async def head(self, path, skip_permission_checks=False, **kwargs): - return await self._request( - "head", path, skip_permission_checks=skip_permission_checks, **kwargs - ) - - async def post(self, path, skip_permission_checks=False, **kwargs): - return await self._request( - "post", path, skip_permission_checks=skip_permission_checks, **kwargs - ) - - async def put(self, path, skip_permission_checks=False, **kwargs): - return await self._request( - "put", path, skip_permission_checks=skip_permission_checks, **kwargs - ) - - async def patch(self, path, skip_permission_checks=False, **kwargs): - return await self._request( - "patch", path, skip_permission_checks=skip_permission_checks, **kwargs - ) - - async def delete(self, path, skip_permission_checks=False, **kwargs): - return await self._request( - "delete", path, skip_permission_checks=skip_permission_checks, **kwargs - ) - - async def request(self, method, path, skip_permission_checks=False, **kwargs): - """Make an HTTP request with the specified method. - - Args: - method: HTTP method (e.g., "GET", "POST", "PUT") - path: The path to request - skip_permission_checks: If True, bypass all permission checks for this request - **kwargs: Additional arguments to pass to httpx - - Returns: - httpx.Response: The response from the request - """ - from datasette.permissions import SkipPermissions - - avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) - with _DatasetteClientContext(): - if skip_permission_checks: - with SkipPermissions(): - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await client.request( - method, self._fix(path, avoid_path_rewrites), **kwargs - ) - else: - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await client.request( - method, self._fix(path, avoid_path_rewrites), **kwargs - ) diff --git a/datasette/blob_renderer.py b/datasette/blob_renderer.py deleted file mode 100644 index 4d8c6bea..00000000 --- a/datasette/blob_renderer.py +++ /dev/null @@ -1,61 +0,0 @@ -from datasette import hookimpl -from datasette.utils.asgi import Response, BadRequest -from datasette.utils import to_css_class -import hashlib - -_BLOB_COLUMN = "_blob_column" -_BLOB_HASH = "_blob_hash" - - -async def render_blob(datasette, database, rows, columns, request, table, view_name): - if _BLOB_COLUMN not in request.args: - raise BadRequest(f"?{_BLOB_COLUMN}= is required") - blob_column = request.args[_BLOB_COLUMN] - if blob_column not in columns: - raise BadRequest(f"{blob_column} is not a valid column") - - # If ?_blob_hash= provided, use that to select the row - otherwise use first row - blob_hash = None - if _BLOB_HASH in request.args: - blob_hash = request.args[_BLOB_HASH] - for row in rows: - value = row[blob_column] - if hashlib.sha256(value).hexdigest() == blob_hash: - break - else: - # Loop did not break - raise BadRequest( - "Link has expired - the requested binary content has changed or could not be found." - ) - else: - row = rows[0] - - value = row[blob_column] - filename_bits = [] - if table: - filename_bits.append(to_css_class(table)) - if "pks" in request.url_vars: - filename_bits.append(request.url_vars["pks"]) - filename_bits.append(to_css_class(blob_column)) - if blob_hash: - filename_bits.append(blob_hash[:6]) - filename = "-".join(filename_bits) + ".blob" - headers = { - "X-Content-Type-Options": "nosniff", - "Content-Disposition": f'attachment; filename="{filename}"', - } - return Response( - body=value or b"", - status=200, - headers=headers, - content_type="application/binary", - ) - - -@hookimpl -def register_output_renderer(): - return { - "extension": "blob", - "render": render_blob, - "can_render": lambda: False, - } diff --git a/datasette/cli.py b/datasette/cli.py index 21420491..f478d496 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -2,156 +2,84 @@ import asyncio import uvicorn import click from click import formatting -from click.types import CompositeParamType from click_default_group import DefaultGroup -import functools import json import os -import pathlib -from runpy import run_module import shutil from subprocess import call import sys -import textwrap -import webbrowser -from .app import ( - Datasette, - DEFAULT_SETTINGS, - SETTINGS, - SQLITE_LIMIT_ATTACHED, - pm, -) +from .app import Datasette, DEFAULT_CONFIG, CONFIG_OPTIONS, pm from .utils import ( - LoadExtension, - StartupError, - check_connection, - deep_dict_update, - find_spatialite, - parse_metadata, - ConnectionProblem, - SpatialiteConnectionProblem, - initial_path_for_datasette, - pairs_to_nested_config, temporary_docker_directory, value_as_boolean, - SpatialiteNotFound, StaticMount, ValueAsBooleanError, ) -from .utils.sqlite import sqlite3 -from .utils.testing import TestClient -from .version import __version__ -def run_sync(coro_func): - """Run an async callable to completion on a fresh event loop.""" - loop = asyncio.new_event_loop() - try: - asyncio.set_event_loop(loop) - return loop.run_until_complete(coro_func()) - finally: - asyncio.set_event_loop(None) - loop.close() - - -# Use Rich for tracebacks if it is installed -try: - from rich.traceback import install - - install(show_locals=True) -except ImportError: - pass - - -class Setting(CompositeParamType): - name = "setting" - arity = 2 +class Config(click.ParamType): + name = "config" def convert(self, config, param, ctx): - name, value = config - if name in DEFAULT_SETTINGS: - # For backwards compatibility with how this worked prior to - # Datasette 1.0, we turn bare setting names into setting.name - # Type checking for those older settings - default = DEFAULT_SETTINGS[name] - name = "settings.{}".format(name) - if isinstance(default, bool): - try: - return name, "true" if value_as_boolean(value) else "false" - except ValueAsBooleanError: - self.fail(f'"{name}" should be on/off/true/false/1/0', param, ctx) - elif isinstance(default, int): - if not value.isdigit(): - self.fail(f'"{name}" should be an integer', param, ctx) - return name, value - elif isinstance(default, str): - return name, value - else: - # Should never happen: - self.fail("Invalid option") - return name, value - - -def sqlite_extensions(fn): - fn = click.option( - "sqlite_extensions", - "--load-extension", - type=LoadExtension(), - envvar="DATASETTE_LOAD_EXTENSION", - multiple=True, - help="Path to a SQLite extension to load, and optional entrypoint", - )(fn) - - # Wrap it in a custom error handler - @functools.wraps(fn) - def wrapped(*args, **kwargs): - try: - return fn(*args, **kwargs) - except AttributeError as e: - if "enable_load_extension" in str(e): - raise click.ClickException( - textwrap.dedent( - """ - Your Python installation does not have the ability to load SQLite extensions. - - More information: https://datasette.io/help/extensions - """ - ).strip() + if ":" not in config: + self.fail('"{}" should be name:value'.format(config), param, ctx) + return + name, value = config.split(":") + if name not in DEFAULT_CONFIG: + self.fail( + "{} is not a valid option (--help-config to see all)".format(name), + param, + ctx, + ) + return + # Type checking + default = DEFAULT_CONFIG[name] + if isinstance(default, bool): + try: + return name, value_as_boolean(value) + except ValueAsBooleanError: + self.fail( + '"{}" should be on/off/true/false/1/0'.format(name), param, ctx ) - raise - - return wrapped + return + elif isinstance(default, int): + if not value.isdigit(): + self.fail('"{}" should be an integer'.format(name), param, ctx) + return + return name, int(value) + else: + # Should never happen: + self.fail("Invalid option") @click.group(cls=DefaultGroup, default="serve", default_if_no_args=True) -@click.version_option(version=__version__) +@click.version_option() def cli(): """ - Datasette is an open source multi-tool for exploring and publishing data - - \b - About Datasette: https://datasette.io/ - Full documentation: https://docs.datasette.io/ + Datasette! """ @cli.command() @click.argument("files", type=click.Path(exists=True), nargs=-1) @click.option("--inspect-file", default="-") -@sqlite_extensions +@click.option( + "sqlite_extensions", + "--load-extension", + envvar="SQLITE_EXTENSIONS", + multiple=True, + type=click.Path(exists=True, resolve_path=True), + help="Path to a SQLite extension to load", +) def inspect(files, inspect_file, sqlite_extensions): - """ - Generate JSON summary of provided database files - - This can then be passed to "datasette --inspect-file" to speed up count - operations against immutable database files. - """ - inspect_data = run_sync(lambda: inspect_(files, sqlite_extensions)) + app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions) if inspect_file == "-": - sys.stdout.write(json.dumps(inspect_data, indent=2)) + out = sys.stdout else: - with open(inspect_file, "w") as fp: - fp.write(json.dumps(inspect_data, indent=2)) + out = open(inspect_file, "w") + loop = asyncio.get_event_loop() + inspect_data = loop.run_until_complete(inspect_(files, sqlite_extensions)) + out.write(json.dumps(inspect_data, indent=2)) async def inspect_(files, sqlite_extensions): @@ -171,9 +99,18 @@ async def inspect_(files, sqlite_extensions): return data -@cli.group() +class PublishAliases(click.Group): + aliases = {"now": "nowv1"} + + def get_command(self, ctx, cmd_name): + if cmd_name in self.aliases: + return click.Group.get_command(self, ctx, self.aliases[cmd_name]) + return click.Group.get_command(self, ctx, cmd_name) + + +@cli.group(cls=PublishAliases) def publish(): - """Publish specified SQLite database files to the internet along with a Datasette-powered interface and API""" + "Publish specified SQLite database files to the internet along with a Datasette-powered interface and API" pass @@ -183,23 +120,15 @@ pm.hook.publish_subcommand(publish=publish) @cli.command() @click.option("--all", help="Include built-in default plugins", is_flag=True) -@click.option( - "--requirements", help="Output requirements.txt of installed plugins", is_flag=True -) @click.option( "--plugins-dir", type=click.Path(exists=True, file_okay=False, dir_okay=True), help="Path to directory containing custom plugins", ) -def plugins(all, requirements, plugins_dir): - """List currently installed plugins""" +def plugins(all, plugins_dir): + "List currently available plugins" app = Datasette([], plugins_dir=plugins_dir) - if requirements: - for plugin in app._plugins(): - if plugin["version"]: - click.echo("{}=={}".format(plugin["name"], plugin["version"])) - else: - click.echo(json.dumps(app._plugins(all=all), indent=4)) + click.echo(json.dumps(app.plugins(all), indent=4)) @cli.command() @@ -213,10 +142,10 @@ def plugins(all, requirements, plugins_dir): "-m", "--metadata", type=click.File(mode="r"), - help="Path to JSON/YAML file containing metadata to publish", + help="Path to JSON file containing metadata to publish", ) @click.option("--extra-options", help="Extra options to pass to datasette serve") -@click.option("--branch", help="Install datasette from a GitHub branch e.g. main") +@click.option("--branch", help="Install datasette from a GitHub branch e.g. master") @click.option( "--template-dir", type=click.Path(exists=True, file_okay=False, dir_okay=True), @@ -230,7 +159,7 @@ def plugins(all, requirements, plugins_dir): @click.option( "--static", type=StaticMount(), - help="Serve static files from this directory at /MOUNT/...", + help="mountpoint:path-to-directory for serving static files", multiple=True, ) @click.option( @@ -238,19 +167,6 @@ def plugins(all, requirements, plugins_dir): ) @click.option("--spatialite", is_flag=True, help="Enable SpatialLite extension") @click.option("--version-note", help="Additional note to show on /-/versions") -@click.option( - "--secret", - help="Secret used for signing secure values, such as signed cookies", - envvar="DATASETTE_PUBLISH_SECRET", - default=lambda: os.urandom(32).hex(), -) -@click.option( - "-p", - "--port", - default=8001, - type=click.IntRange(1, 65535), - help="Port to run the server on, defaults to 8001", -) @click.option("--title", help="Title for metadata") @click.option("--license", help="License label for metadata") @click.option("--license_url", help="License URL for metadata") @@ -270,11 +186,9 @@ def package( install, spatialite, version_note, - secret, - port, **extra_metadata, ): - """Package SQLite files into a Datasette Docker container""" + "Package specified SQLite files into a new datasette Docker container" if not shutil.which("docker"): click.secho( ' The package command requires "docker" to be installed and configured ', @@ -287,18 +201,16 @@ def package( with temporary_docker_directory( files, "datasette", - metadata=metadata, - extra_options=extra_options, - branch=branch, - template_dir=template_dir, - plugins_dir=plugins_dir, - static=static, - install=install, - spatialite=spatialite, - version_note=version_note, - secret=secret, - extra_metadata=extra_metadata, - port=port, + metadata, + extra_options, + branch, + template_dir, + plugins_dir, + static, + install, + spatialite, + version_note, + extra_metadata, ): args = ["docker", "build"] if tag: @@ -308,49 +220,15 @@ def package( call(args) -@cli.command() -@click.argument("packages", nargs=-1) -@click.option( - "-U", "--upgrade", is_flag=True, help="Upgrade packages to latest version" -) -@click.option( - "-r", - "--requirement", - type=click.Path(exists=True), - help="Install from requirements file", -) -@click.option( - "-e", - "--editable", - help="Install a project in editable mode from this path", -) -def install(packages, upgrade, requirement, editable): - """Install plugins and packages from PyPI into the same environment as Datasette""" - if not packages and not requirement and not editable: - raise click.UsageError("Please specify at least one package to install") - args = ["pip", "install"] - if upgrade: - args += ["--upgrade"] - if editable: - args += ["--editable", editable] - if requirement: - args += ["-r", requirement] - args += list(packages) - sys.argv = args - run_module("pip", run_name="__main__") +def extra_serve_options(serve): + for options in pm.hook.extra_serve_options(): + for option in reversed(options): + serve = option(serve) + return serve @cli.command() -@click.argument("packages", nargs=-1, required=True) -@click.option("-y", "--yes", is_flag=True, help="Don't ask for confirmation") -def uninstall(packages, yes): - """Uninstall plugins and Python packages from the Datasette environment""" - sys.argv = ["pip", "uninstall"] + list(packages) + (["-y"] if yes else []) - run_module("pip", run_name="__main__") - - -@cli.command() -@click.argument("files", type=click.Path(), nargs=-1) +@click.argument("files", type=click.Path(exists=True), nargs=-1) @click.option( "-i", "--immutable", @@ -359,35 +237,28 @@ def uninstall(packages, yes): multiple=True, ) @click.option( - "-h", - "--host", - default="127.0.0.1", - help=( - "Host for server. Defaults to 127.0.0.1 which means only connections " - "from the local machine will be allowed. Use 0.0.0.0 to listen to " - "all IPs and allow access from other machines." - ), + "-h", "--host", default="127.0.0.1", help="host for server, defaults to 127.0.0.1" ) +@click.option("-p", "--port", default=8001, help="port for server, defaults to 8001") @click.option( - "-p", - "--port", - default=8001, - type=click.IntRange(0, 65535), - help="Port for server, defaults to 8001. Use -p 0 to automatically assign an available port.", -) -@click.option( - "--uds", - help="Bind to a Unix domain socket", + "--debug", is_flag=True, help="Enable debug mode - useful for development" ) @click.option( "--reload", is_flag=True, - help="Automatically reload if code or metadata change detected - useful for development", + help="Automatically reload if database or code change detected - useful for development", ) @click.option( "--cors", is_flag=True, help="Enable CORS by serving Access-Control-Allow-Origin: *" ) -@sqlite_extensions +@click.option( + "sqlite_extensions", + "--load-extension", + envvar="SQLITE_EXTENSIONS", + multiple=True, + type=click.Path(exists=True, resolve_path=True), + help="Path to a SQLite extension to load", +) @click.option( "--inspect-file", help='Path to JSON file created using "datasette inspect"' ) @@ -395,7 +266,7 @@ def uninstall(packages, yes): "-m", "--metadata", type=click.File(mode="r"), - help="Path to JSON/YAML file containing license/source metadata", + help="Path to JSON file containing license/source metadata", ) @click.option( "--template-dir", @@ -410,102 +281,25 @@ def uninstall(packages, yes): @click.option( "--static", type=StaticMount(), - help="Serve static files from this directory at /MOUNT/...", + help="mountpoint:path-to-directory for serving static files", multiple=True, ) -@click.option("--memory", is_flag=True, help="Make /_memory database available") +@click.option("--memory", is_flag=True, help="Make :memory: database available") @click.option( - "-c", "--config", - type=click.File(mode="r"), - help="Path to JSON/YAML Datasette configuration file", -) -@click.option( - "-s", - "--setting", - "settings", - type=Setting(), - help="nested.key, value setting to use in Datasette configuration", + type=Config(), + help="Set config option using configname:value datasette.readthedocs.io/en/latest/config.html", multiple=True, ) -@click.option( - "--secret", - help="Secret used for signing secure values, such as signed cookies", - envvar="DATASETTE_SECRET", -) -@click.option( - "--root", - help="Output URL that sets a cookie authenticating the root user", - is_flag=True, -) -@click.option( - "--default-deny", - help="Deny all permissions by default", - is_flag=True, -) -@click.option( - "--get", - help="Run an HTTP GET request against this path, print results and exit", -) -@click.option( - "--headers", - is_flag=True, - help="Include HTTP headers in --get output", -) -@click.option( - "--token", - help="API token to send with --get requests", -) -@click.option( - "--actor", - help="Actor to use for --get requests (JSON string)", -) @click.option("--version-note", help="Additional note to show on /-/versions") -@click.option("--help-settings", is_flag=True, help="Show available settings") -@click.option("--pdb", is_flag=True, help="Launch debugger on any errors") -@click.option( - "-o", - "--open", - "open_browser", - is_flag=True, - help="Open Datasette in your web browser", -) -@click.option( - "--create", - is_flag=True, - help="Create database files if they do not exist", -) -@click.option( - "--crossdb", - is_flag=True, - help="Enable cross-database joins using the /_memory database", -) -@click.option( - "--nolock", - is_flag=True, - help="Ignore locking, open locked files in read-only mode", -) -@click.option( - "--ssl-keyfile", - help="SSL key file", - envvar="DATASETTE_SSL_KEYFILE", -) -@click.option( - "--ssl-certfile", - help="SSL certificate file", - envvar="DATASETTE_SSL_CERTFILE", -) -@click.option( - "--internal", - type=click.Path(), - help="Path to a persistent Datasette internal SQLite database", -) +@click.option("--help-config", is_flag=True, help="Show available config options") +@extra_serve_options def serve( files, immutable, host, port, - uds, + debug, reload, cors, sqlite_extensions, @@ -516,34 +310,18 @@ def serve( static, memory, config, - settings, - secret, - root, - default_deny, - get, - headers, - token, - actor, version_note, - help_settings, - pdb, - open_browser, - create, - crossdb, - nolock, - ssl_keyfile, - ssl_certfile, - internal, - return_instance=False, + help_config, + **extra_serve_options, ): """Serve up specified SQLite database files with a web UI""" - if help_settings: + if help_config: formatter = formatting.HelpFormatter() - with formatter.section("Settings"): + with formatter.section("Config options"): formatter.write_dl( [ - (option.name, f"{option.help} (default={option.default})") - for option in SETTINGS + (option.name, "{} (default={})".format(option.help, option.default)) + for option in CONFIG_OPTIONS ] ) click.echo(formatter.getvalue()) @@ -552,342 +330,39 @@ def serve( import hupper reloader = hupper.start_reloader("datasette.cli.serve") - if immutable: - reloader.watch_files(immutable) - if config: - reloader.watch_files([config.name]) + reloader.watch_files(files) if metadata: reloader.watch_files([metadata.name]) inspect_data = None if inspect_file: - with open(inspect_file) as fp: - inspect_data = json.load(fp) + inspect_data = json.load(open(inspect_file)) metadata_data = None if metadata: - metadata_data = parse_metadata(metadata.read()) + metadata_data = json.loads(metadata.read()) - config_data = None - if config: - config_data = parse_metadata(config.read()) - - config_data = config_data or {} - - # Merge in settings from -s/--setting - if settings: - settings_updates = pairs_to_nested_config(settings) - # Merge recursively, to avoid over-writing nested values - # https://github.com/simonw/datasette/issues/2389 - deep_dict_update(config_data, settings_updates) - - kwargs = dict( + click.echo( + "Serve! files={} (immutables={}) on port {}".format(files, immutable, port) + ) + ds = Datasette( + files, immutables=immutable, - cache_headers=not reload, + cache_headers=not debug and not reload, cors=cors, inspect_data=inspect_data, - config=config_data, metadata=metadata_data, sqlite_extensions=sqlite_extensions, template_dir=template_dir, plugins_dir=plugins_dir, static_mounts=static, - settings=None, # These are passed in config= now + config=dict(config), memory=memory, - secret=secret, version_note=version_note, - pdb=pdb, - crossdb=crossdb, - nolock=nolock, - internal=internal, - default_deny=default_deny, + extra_serve_options=extra_serve_options, ) - - # Separate directories from files - directories = [f for f in files if os.path.isdir(f)] - file_paths = [f for f in files if not os.path.isdir(f)] - - # Handle config_dir - only one directory allowed - if len(directories) > 1: - raise click.ClickException( - "Cannot pass multiple directories. Pass a single directory as config_dir." - ) - elif len(directories) == 1: - kwargs["config_dir"] = pathlib.Path(directories[0]) - - # Verify list of files, create if needed (and --create) - for file in file_paths: - if not pathlib.Path(file).exists(): - if create: - sqlite3.connect(file).execute("vacuum") - else: - raise click.ClickException( - "Invalid value for '[FILES]...': Path '{}' does not exist.".format( - file - ) - ) - - # Check for duplicate files by resolving all paths to their absolute forms - # Collect all database files that will be loaded (explicit files + config_dir files) - all_db_files = [] - - # Add explicit files - for file in file_paths: - all_db_files.append((file, pathlib.Path(file).resolve())) - - # Add config_dir databases if config_dir is set - if "config_dir" in kwargs: - config_dir = kwargs["config_dir"] - for ext in ("db", "sqlite", "sqlite3"): - for db_file in config_dir.glob(f"*.{ext}"): - all_db_files.append((str(db_file), db_file.resolve())) - - # Check for duplicates - seen = {} - for original_path, resolved_path in all_db_files: - if resolved_path in seen: - raise click.ClickException( - f"Duplicate database file: '{original_path}' and '{seen[resolved_path]}' " - f"both refer to {resolved_path}" - ) - seen[resolved_path] = original_path - - files = file_paths - - try: - ds = Datasette(files, **kwargs) - except SpatialiteNotFound: - raise click.ClickException("Could not find SpatiaLite extension") - except StartupError as e: - raise click.ClickException(e.args[0]) - - if return_instance: - # Private utility mechanism for writing unit tests - return ds - - # Run the "startup" plugin hooks - run_sync(ds.invoke_startup) - - # Run async soundness checks - but only if we're not under pytest - run_sync(lambda: check_databases(ds)) - - if headers and not get: - raise click.ClickException("--headers can only be used with --get") - - if token and not get: - raise click.ClickException("--token can only be used with --get") - - if get: - client = TestClient(ds) - request_headers = {} - if token: - request_headers["Authorization"] = "Bearer {}".format(token) - cookies = {} - if actor: - cookies["ds_actor"] = client.actor_cookie(json.loads(actor)) - response = client.get(get, headers=request_headers, cookies=cookies) - - if headers: - # Output HTTP status code, headers, two newlines, then the response body - click.echo(f"HTTP/1.1 {response.status}") - for key, value in response.headers.items(): - click.echo(f"{key}: {value}") - if response.text: - click.echo() - click.echo(response.text) - else: - click.echo(response.text) - - exit_code = 0 if response.status == 200 else 1 - sys.exit(exit_code) - return + # Run async sanity checks - but only if we're not under pytest + asyncio.get_event_loop().run_until_complete(ds.run_sanity_checks()) # Start the server - url = None - if root: - ds.root_enabled = True - url = "http://{}:{}{}?token={}".format( - host, port, ds.urls.path("-/auth-token"), ds._root_token - ) - click.echo(url) - if open_browser: - if url is None: - # Figure out most convenient URL - to table, database or homepage - path = run_sync(lambda: initial_path_for_datasette(ds)) - url = f"http://{host}:{port}{path}" - webbrowser.open(url) - uvicorn_kwargs = dict( - host=host, port=port, log_level="info", lifespan="on", workers=1 - ) - if uds: - uvicorn_kwargs["uds"] = uds - if ssl_keyfile: - uvicorn_kwargs["ssl_keyfile"] = ssl_keyfile - if ssl_certfile: - uvicorn_kwargs["ssl_certfile"] = ssl_certfile - uvicorn.run(ds.app(), **uvicorn_kwargs) - - -@cli.command() -@click.argument("id") -@click.option( - "--secret", - help="Secret used for signing the API tokens", - envvar="DATASETTE_SECRET", - required=True, -) -@click.option( - "-e", - "--expires-after", - help="Token should expire after this many seconds", - type=int, -) -@click.option( - "alls", - "-a", - "--all", - type=str, - metavar="ACTION", - multiple=True, - help="Restrict token to this action", -) -@click.option( - "databases", - "-d", - "--database", - type=(str, str), - metavar="DB ACTION", - multiple=True, - help="Restrict token to this action on this database", -) -@click.option( - "resources", - "-r", - "--resource", - type=(str, str, str), - metavar="DB RESOURCE ACTION", - multiple=True, - help="Restrict token to this action on this database resource (a table, SQL view or named query)", -) -@click.option( - "--debug", - help="Show decoded token", - is_flag=True, -) -@click.option( - "--plugins-dir", - type=click.Path(exists=True, file_okay=False, dir_okay=True), - help="Path to directory containing custom plugins", -) -def create_token( - id, secret, expires_after, alls, databases, resources, debug, plugins_dir -): - """ - Create a signed API token for the specified actor ID - - Example: - - datasette create-token root --secret mysecret - - To allow only "view-database-download" for all databases: - - \b - datasette create-token root --secret mysecret \\ - --all view-database-download - - To allow "create-table" against a specific database: - - \b - datasette create-token root --secret mysecret \\ - --database mydb create-table - - To allow "insert-row" against a specific table: - - \b - datasette create-token root --secret myscret \\ - --resource mydb mytable insert-row - - Restricted actions can be specified multiple times using - multiple --all, --database, and --resource options. - - Add --debug to see a decoded version of the token. - """ - ds = Datasette(secret=secret, plugins_dir=plugins_dir) - - # Run ds.invoke_startup() in an event loop - run_sync(ds.invoke_startup) - - # Warn about any unknown actions - actions = [] - actions.extend(alls) - actions.extend([p[1] for p in databases]) - actions.extend([p[2] for p in resources]) - for action in actions: - if not ds.actions.get(action): - click.secho( - f" Unknown permission: {action} ", - fg="red", - err=True, - ) - - restrict_database = {} - for database, action in databases: - restrict_database.setdefault(database, []).append(action) - restrict_resource = {} - for database, resource, action in resources: - restrict_resource.setdefault(database, {}).setdefault(resource, []).append( - action - ) - - token = ds.create_token( - id, - expires_after=expires_after, - restrict_all=alls, - restrict_database=restrict_database, - restrict_resource=restrict_resource, - ) - click.echo(token) - if debug: - encoded = token[len("dstok_") :] - click.echo("\nDecoded:\n") - click.echo(json.dumps(ds.unsign(encoded, namespace="token"), indent=2)) - - -pm.hook.register_commands(cli=cli) - - -async def check_databases(ds): - # Run check_connection against every connected database - # to confirm they are all usable - for database in list(ds.databases.values()): - try: - await database.execute_fn(check_connection) - except SpatialiteConnectionProblem: - suggestion = "" - try: - find_spatialite() - suggestion = "\n\nTry adding the --load-extension=spatialite option." - except SpatialiteNotFound: - pass - raise click.UsageError( - "It looks like you're trying to load a SpatiaLite" - + " database without first loading the SpatiaLite module." - + suggestion - + "\n\nRead more: https://docs.datasette.io/en/stable/spatialite.html" - ) - except ConnectionProblem as e: - raise click.UsageError( - f"Connection to {database.path} failed check: {str(e.args[0])}" - ) - # If --crossdb and more than SQLITE_LIMIT_ATTACHED show warning - if ( - ds.crossdb - and len([db for db in ds.databases.values() if not db.is_memory]) - > SQLITE_LIMIT_ATTACHED - ): - msg = ( - "Warning: --crossdb only works with the first {} attached databases".format( - SQLITE_LIMIT_ATTACHED - ) - ) - click.echo(click.style(msg, bold=True, fg="yellow"), err=True) + uvicorn.run(ds.app(), host=host, port=port, log_level="info") diff --git a/datasette/database.py b/datasette/database.py index e5858128..06d1c2ad 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -1,380 +1,62 @@ -import asyncio -from collections import namedtuple from pathlib import Path -import janus -import queue -import sqlite_utils -import sys -import threading -import uuid -from .tracer import trace from .utils import ( + QueryInterrupted, detect_fts, detect_primary_keys, detect_spatialite, get_all_foreign_keys, get_outbound_foreign_keys, - md5_not_usedforsecurity, - sqlite_timelimit, sqlite3, table_columns, - table_column_details, ) -from .utils.sqlite import sqlite_version from .inspect import inspect_hash -connections = threading.local() - -AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file")) - class Database: - # For table counts stop at this many rows: - count_limit = 10000 - _thread_local_id_counter = 1 - def __init__( - self, - ds, - path=None, - is_mutable=True, - is_memory=False, - memory_name=None, - mode=None, + self, ds, path=None, name=None, is_mutable=False, is_memory=False, comment=None ): - self.name = None - self._thread_local_id = f"x{self._thread_local_id_counter}" - Database._thread_local_id_counter += 1 - self.route = None self.ds = ds + self._name = name self.path = path self.is_mutable = is_mutable self.is_memory = is_memory - self.memory_name = memory_name - if memory_name is not None: - self.is_memory = True - self.cached_hash = None + self.hash = None self.cached_size = None - self._cached_table_counts = None - self._write_thread = None - self._write_queue = None - # These are used when in non-threaded mode: - self._read_connection = None - self._write_connection = None - # This is used to track all file connections so they can be closed - self._all_file_connections = [] - self.mode = mode + self.cached_table_counts = None + self.comment = comment + if not self.is_mutable and path is not None: + p = Path(path) + self.hash = inspect_hash(p) + self.cached_size = p.stat().st_size + # Maybe use self.ds.inspect_data to populate cached_table_counts + if self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_table_counts = { + key: value["count"] + for key, value in self.ds.inspect_data[self.name]["tables"].items() + } - @property - def cached_table_counts(self): - if self._cached_table_counts is not None: - return self._cached_table_counts - # Maybe use self.ds.inspect_data to populate cached_table_counts - if self.ds.inspect_data and self.ds.inspect_data.get(self.name): - self._cached_table_counts = { - key: value["count"] - for key, value in self.ds.inspect_data[self.name]["tables"].items() - } - return self._cached_table_counts - - @property - def color(self): - if self.hash: - return self.hash[:6] - return md5_not_usedforsecurity(self.name)[:6] - - def suggest_name(self): - if self.path: - return Path(self.path).stem - elif self.memory_name: - return self.memory_name - else: - return "db" - - def connect(self, write=False): - extra_kwargs = {} - if write: - extra_kwargs["isolation_level"] = "IMMEDIATE" - if self.memory_name: - uri = "file:{}?mode=memory&cache=shared".format(self.memory_name) - conn = sqlite3.connect( - uri, uri=True, check_same_thread=False, **extra_kwargs - ) - if not write: - conn.execute("PRAGMA query_only=1") - return conn + def connect(self): if self.is_memory: - return sqlite3.connect(":memory:", uri=True) - + return sqlite3.connect(":memory:") # mode=ro or immutable=1? if self.is_mutable: - qs = "?mode=ro" - if self.ds.nolock: - qs += "&nolock=1" + qs = "mode=ro" else: - qs = "?immutable=1" - assert not (write and not self.is_mutable) - if write: - qs = "" - if self.mode is not None: - qs = f"?mode={self.mode}" - conn = sqlite3.connect( - f"file:{self.path}{qs}", uri=True, check_same_thread=False, **extra_kwargs + qs = "immutable=1" + return sqlite3.connect( + "file:{}?{}".format(self.path, qs), uri=True, check_same_thread=False ) - self._all_file_connections.append(conn) - return conn - - def close(self): - # Close all connections - useful to avoid running out of file handles in tests - for connection in self._all_file_connections: - connection.close() - - async def execute_write(self, sql, params=None, block=True): - def _inner(conn): - return conn.execute(sql, params or []) - - with trace("sql", database=self.name, sql=sql.strip(), params=params): - results = await self.execute_write_fn(_inner, block=block) - return results - - async def execute_write_script(self, sql, block=True): - def _inner(conn): - return conn.executescript(sql) - - with trace("sql", database=self.name, sql=sql.strip(), executescript=True): - results = await self.execute_write_fn( - _inner, block=block, transaction=False - ) - return results - - async def execute_write_many(self, sql, params_seq, block=True): - def _inner(conn): - count = 0 - - def count_params(params): - nonlocal count - for param in params: - count += 1 - yield param - - return conn.executemany(sql, count_params(params_seq)), count - - with trace( - "sql", database=self.name, sql=sql.strip(), executemany=True - ) as kwargs: - results, count = await self.execute_write_fn(_inner, block=block) - kwargs["count"] = count - return results - - async def execute_isolated_fn(self, fn): - # Open a new connection just for the duration of this function - # blocking the write queue to avoid any writes occurring during it - if self.ds.executor is None: - # non-threaded mode - isolated_connection = self.connect(write=True) - try: - result = fn(isolated_connection) - finally: - isolated_connection.close() - try: - self._all_file_connections.remove(isolated_connection) - except ValueError: - # Was probably a memory connection - pass - return result - else: - # Threaded mode - send to write thread - return await self._send_to_write_thread(fn, isolated_connection=True) - - async def execute_write_fn(self, fn, block=True, transaction=True): - if self.ds.executor is None: - # non-threaded mode - if self._write_connection is None: - self._write_connection = self.connect(write=True) - self.ds._prepare_connection(self._write_connection, self.name) - if transaction: - with self._write_connection: - return fn(self._write_connection) - else: - return fn(self._write_connection) - else: - return await self._send_to_write_thread( - fn, block=block, transaction=transaction - ) - - async def _send_to_write_thread( - self, fn, block=True, isolated_connection=False, transaction=True - ): - if self._write_queue is None: - self._write_queue = queue.Queue() - if self._write_thread is None: - self._write_thread = threading.Thread( - target=self._execute_writes, daemon=True - ) - self._write_thread.name = "_execute_writes for database {}".format( - self.name - ) - self._write_thread.start() - task_id = uuid.uuid5(uuid.NAMESPACE_DNS, "datasette.io") - reply_queue = janus.Queue() - self._write_queue.put( - WriteTask(fn, task_id, reply_queue, isolated_connection, transaction) - ) - if block: - result = await reply_queue.async_q.get() - if isinstance(result, Exception): - raise result - else: - return result - else: - return task_id - - def _execute_writes(self): - # Infinite looping thread that protects the single write connection - # to this database - conn_exception = None - conn = None - try: - conn = self.connect(write=True) - self.ds._prepare_connection(conn, self.name) - except Exception as e: - conn_exception = e - while True: - task = self._write_queue.get() - if conn_exception is not None: - result = conn_exception - else: - if task.isolated_connection: - isolated_connection = self.connect(write=True) - try: - result = task.fn(isolated_connection) - except Exception as e: - sys.stderr.write("{}\n".format(e)) - sys.stderr.flush() - result = e - finally: - isolated_connection.close() - try: - self._all_file_connections.remove(isolated_connection) - except ValueError: - # Was probably a memory connection - pass - else: - try: - if task.transaction: - with conn: - result = task.fn(conn) - else: - result = task.fn(conn) - except Exception as e: - sys.stderr.write("{}\n".format(e)) - sys.stderr.flush() - result = e - task.reply_queue.sync_q.put(result) - - async def execute_fn(self, fn): - if self.ds.executor is None: - # non-threaded mode - if self._read_connection is None: - self._read_connection = self.connect() - self.ds._prepare_connection(self._read_connection, self.name) - return fn(self._read_connection) - - # threaded mode - def in_thread(): - conn = getattr(connections, self._thread_local_id, None) - if not conn: - conn = self.connect() - self.ds._prepare_connection(conn, self.name) - setattr(connections, self._thread_local_id, conn) - return fn(conn) - - return await asyncio.get_event_loop().run_in_executor( - self.ds.executor, in_thread - ) - - async def execute( - self, - sql, - params=None, - truncate=False, - custom_time_limit=None, - page_size=None, - log_sql_errors=True, - ): - """Executes sql against db_name in a thread""" - page_size = page_size or self.ds.page_size - - def sql_operation_in_thread(conn): - time_limit_ms = self.ds.sql_time_limit_ms - if custom_time_limit and custom_time_limit < time_limit_ms: - time_limit_ms = custom_time_limit - - with sqlite_timelimit(conn, time_limit_ms): - try: - cursor = conn.cursor() - cursor.execute(sql, params if params is not None else {}) - max_returned_rows = self.ds.max_returned_rows - if max_returned_rows == page_size: - max_returned_rows += 1 - if max_returned_rows and truncate: - rows = cursor.fetchmany(max_returned_rows + 1) - truncated = len(rows) > max_returned_rows - rows = rows[:max_returned_rows] - else: - rows = cursor.fetchall() - truncated = False - except (sqlite3.OperationalError, sqlite3.DatabaseError) as e: - if e.args == ("interrupted",): - raise QueryInterrupted(e, sql, params) - if log_sql_errors: - sys.stderr.write( - "ERROR: conn={}, sql = {}, params = {}: {}\n".format( - conn, repr(sql), params, e - ) - ) - sys.stderr.flush() - raise - - if truncate: - return Results(rows, truncated, cursor.description) - - else: - return Results(rows, False, cursor.description) - - with trace("sql", database=self.name, sql=sql.strip(), params=params): - results = await self.execute_fn(sql_operation_in_thread) - return results - - @property - def hash(self): - if self.cached_hash is not None: - return self.cached_hash - elif self.is_mutable or self.is_memory: - return None - elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): - self.cached_hash = self.ds.inspect_data[self.name]["hash"] - return self.cached_hash - else: - p = Path(self.path) - self.cached_hash = inspect_hash(p) - return self.cached_hash @property def size(self): + if self.is_memory or self.path is None: + return 0 if self.cached_size is not None: return self.cached_size - elif self.is_memory: - return 0 - elif self.is_mutable: - return Path(self.path).stat().st_size - elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): - self.cached_size = self.ds.inspect_data[self.name]["size"] - return self.cached_size else: - self.cached_size = Path(self.path).stat().st_size - return self.cached_size + return Path(self.path).stat().st_size async def table_counts(self, limit=10): if not self.is_mutable and self.cached_table_counts is not None: @@ -384,225 +66,109 @@ class Database: for table in await self.table_names(): try: table_count = ( - await self.execute( - f"select count(*) from (select * from [{table}] limit {self.count_limit + 1})", + await self.ds.execute( + self.name, + "select count(*) from [{}]".format(table), custom_time_limit=limit, ) ).rows[0][0] counts[table] = table_count # In some cases I saw "SQL Logic Error" here in addition to # QueryInterrupted - so we catch that too: - except (QueryInterrupted, sqlite3.OperationalError, sqlite3.DatabaseError): + except (QueryInterrupted, sqlite3.OperationalError): counts[table] = None if not self.is_mutable: - self._cached_table_counts = counts + self.cached_table_counts = counts return counts @property def mtime_ns(self): - if self.is_memory: - return None return Path(self.path).stat().st_mtime_ns - async def attached_databases(self): - # This used to be: - # select seq, name, file from pragma_database_list() where seq > 0 - # But SQLite prior to 3.16.0 doesn't support pragma functions - results = await self.execute("PRAGMA database_list;") - # {'seq': 0, 'name': 'main', 'file': ''} - return [ - AttachedDatabase(*row) - for row in results.rows - # Filter out the SQLite internal "temp" database, refs #2557 - if row["seq"] > 0 and row["name"] != "temp" - ] + @property + def name(self): + if self._name: + return self._name + if self.is_memory: + return ":memory:" + else: + return Path(self.path).stem async def table_exists(self, table): - results = await self.execute( - "select 1 from sqlite_master where type='table' and name=?", params=(table,) - ) - return bool(results.rows) - - async def view_exists(self, table): - results = await self.execute( - "select 1 from sqlite_master where type='view' and name=?", params=(table,) + results = await self.ds.execute( + self.name, + "select 1 from sqlite_master where type='table' and name=?", + params=(table,), ) return bool(results.rows) async def table_names(self): - results = await self.execute( - "select name from sqlite_master where type='table'" + results = await self.ds.execute( + self.name, "select name from sqlite_master where type='table'" ) return [r[0] for r in results.rows] async def table_columns(self, table): - return await self.execute_fn(lambda conn: table_columns(conn, table)) - - async def table_column_details(self, table): - return await self.execute_fn(lambda conn: table_column_details(conn, table)) + return await self.ds.execute_against_connection_in_thread( + self.name, lambda conn: table_columns(conn, table) + ) async def primary_keys(self, table): - return await self.execute_fn(lambda conn: detect_primary_keys(conn, table)) + return await self.ds.execute_against_connection_in_thread( + self.name, lambda conn: detect_primary_keys(conn, table) + ) async def fts_table(self, table): - return await self.execute_fn(lambda conn: detect_fts(conn, table)) + return await self.ds.execute_against_connection_in_thread( + self.name, lambda conn: detect_fts(conn, table) + ) async def label_column_for_table(self, table): - explicit_label_column = (await self.ds.table_config(self.name, table)).get( + explicit_label_column = self.ds.table_metadata(self.name, table).get( "label_column" ) if explicit_label_column: return explicit_label_column - - def column_details(conn): - # Returns {column_name: (type, is_unique)} - db = sqlite_utils.Database(conn) - columns = db[table].columns_dict - indexes = db[table].indexes - details = {} - for name in columns: - is_unique = any( - index - for index in indexes - if index.columns == [name] and index.unique - ) - details[name] = (columns[name], is_unique) - return details - - column_details = await self.execute_fn(column_details) - # Is there just one unique column that's text? - unique_text_columns = [ - name - for name, (type_, is_unique) in column_details.items() - if is_unique and type_ is str - ] - if len(unique_text_columns) == 1: - return unique_text_columns[0] - - column_names = list(column_details.keys()) + # If a table has two columns, one of which is ID, then label_column is the other one + column_names = await self.ds.execute_against_connection_in_thread( + self.name, lambda conn: table_columns(conn, table) + ) # Is there a name or title column? - name_or_title = [c for c in column_names if c.lower() in ("name", "title")] + name_or_title = [c for c in column_names if c in ("name", "title")] if name_or_title: return name_or_title[0] - # If a table has two columns, one of which is ID, then label_column is the other one if ( column_names and len(column_names) == 2 and ("id" in column_names or "pk" in column_names) - and not set(column_names) == {"id", "pk"} ): return [c for c in column_names if c not in ("id", "pk")][0] # Couldn't find a label: return None async def foreign_keys_for_table(self, table): - return await self.execute_fn( - lambda conn: get_outbound_foreign_keys(conn, table) + return await self.ds.execute_against_connection_in_thread( + self.name, lambda conn: get_outbound_foreign_keys(conn, table) ) async def hidden_table_names(self): - hidden_tables = [] - # Add any tables marked as hidden in config - db_config = self.ds.config.get("databases", {}).get(self.name, {}) - if "tables" in db_config: - hidden_tables += [ - t for t in db_config["tables"] if db_config["tables"][t].get("hidden") - ] - - if sqlite_version()[1] >= 37: - hidden_tables += [ - x[0] - for x in await self.execute( - """ - with shadow_tables as ( - select name - from pragma_table_list - where [type] = 'shadow' - order by name - ), - core_tables as ( - select name - from sqlite_master - WHERE name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') - OR substr(name, 1, 1) == '_' - ), - combined as ( - select name from shadow_tables - union all - select name from core_tables - ) - select name from combined order by 1 + # Mark tables 'hidden' if they relate to FTS virtual tables + hidden_tables = [ + r[0] + for r in ( + await self.ds.execute( + self.name, """ + select name from sqlite_master + where rootpage = 0 + and sql like '%VIRTUAL TABLE%USING FTS%' + """, ) - ] - else: - hidden_tables += [ - x[0] - for x in await self.execute( - """ - WITH base AS ( - SELECT name - FROM sqlite_master - WHERE name IN ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') - OR substr(name, 1, 1) == '_' - ), - fts_suffixes AS ( - SELECT column1 AS suffix - FROM (VALUES ('_data'), ('_idx'), ('_docsize'), ('_content'), ('_config')) - ), - fts5_names AS ( - SELECT name - FROM sqlite_master - WHERE sql LIKE '%VIRTUAL TABLE%USING FTS%' - ), - fts5_shadow_tables AS ( - SELECT - printf('%s%s', fts5_names.name, fts_suffixes.suffix) AS name - FROM fts5_names - JOIN fts_suffixes - ), - fts3_suffixes AS ( - SELECT column1 AS suffix - FROM (VALUES ('_content'), ('_segdir'), ('_segments'), ('_stat'), ('_docsize')) - ), - fts3_names AS ( - SELECT name - FROM sqlite_master - WHERE sql LIKE '%VIRTUAL TABLE%USING FTS3%' - OR sql LIKE '%VIRTUAL TABLE%USING FTS4%' - ), - fts3_shadow_tables AS ( - SELECT - printf('%s%s', fts3_names.name, fts3_suffixes.suffix) AS name - FROM fts3_names - JOIN fts3_suffixes - ), - final AS ( - SELECT name FROM base - UNION ALL - SELECT name FROM fts5_shadow_tables - UNION ALL - SELECT name FROM fts3_shadow_tables - ) - SELECT name FROM final ORDER BY 1 - """ - ) - ] - # Also hide any FTS tables that have a content= argument - hidden_tables += [ - x[0] - for x in await self.execute( - """ - SELECT name - FROM sqlite_master - WHERE sql LIKE '%VIRTUAL TABLE%' - AND sql LIKE '%USING FTS%' - AND sql LIKE '%content=%' - """ - ) + ).rows ] - - has_spatialite = await self.execute_fn(detect_spatialite) + has_spatialite = await self.ds.execute_against_connection_in_thread( + self.name, detect_spatialite + ) if has_spatialite: # Also hide Spatialite internal tables hidden_tables += [ @@ -615,51 +181,64 @@ class Database: "sqlite_sequence", "views_geometry_columns", "virts_geometry_columns", - "data_licenses", - "KNN", - "KNN2", ] + [ r[0] for r in ( - await self.execute( + await self.ds.execute( + self.name, """ select name from sqlite_master where name like "idx_%" and type = "table" - """ + """, ) ).rows ] + # Add any from metadata.json + db_metadata = self.ds.metadata(database=self.name) + if "tables" in db_metadata: + hidden_tables += [ + t + for t in db_metadata["tables"] + if db_metadata["tables"][t].get("hidden") + ] + # Also mark as hidden any tables which start with the name of a hidden table + # e.g. "searchable_fts" implies "searchable_fts_content" should be hidden + for table_name in await self.table_names(): + for hidden_table in hidden_tables[:]: + if table_name.startswith(hidden_table): + hidden_tables.append(table_name) + continue return hidden_tables async def view_names(self): - results = await self.execute("select name from sqlite_master where type='view'") + results = await self.ds.execute( + self.name, "select name from sqlite_master where type='view'" + ) return [r[0] for r in results.rows] async def get_all_foreign_keys(self): - return await self.execute_fn(get_all_foreign_keys) + return await self.ds.execute_against_connection_in_thread( + self.name, get_all_foreign_keys + ) + + async def get_outbound_foreign_keys(self, table): + return await self.ds.execute_against_connection_in_thread( + self.name, lambda conn: get_outbound_foreign_keys(conn, table) + ) async def get_table_definition(self, table, type_="table"): table_definition_rows = list( - await self.execute( + await self.ds.execute( + self.name, "select sql from sqlite_master where name = :n and type=:t", {"n": table, "t": type_}, ) ) if not table_definition_rows: return None - bits = [table_definition_rows[0][0] + ";"] - # Add on any indexes - index_rows = list( - await self.execute( - "select sql from sqlite_master where tbl_name = :n and type='index' and sql is not null", - {"n": table}, - ) - ) - for index_row in index_rows: - bits.append(index_row[0] + ";") - return "\n".join(bits) + return table_definition_rows[0][0] async def get_view_definition(self, view): return await self.get_table_definition(view, "view") @@ -671,67 +250,10 @@ class Database: if self.is_memory: tags.append("memory") if self.hash: - tags.append(f"hash={self.hash}") + tags.append("hash={}".format(self.hash)) if self.size is not None: - tags.append(f"size={self.size}") + tags.append("size={}".format(self.size)) tags_str = "" if tags: - tags_str = f" ({', '.join(tags)})" - return f"" - - -class WriteTask: - __slots__ = ("fn", "task_id", "reply_queue", "isolated_connection", "transaction") - - def __init__(self, fn, task_id, reply_queue, isolated_connection, transaction): - self.fn = fn - self.task_id = task_id - self.reply_queue = reply_queue - self.isolated_connection = isolated_connection - self.transaction = transaction - - -class QueryInterrupted(Exception): - def __init__(self, e, sql, params): - self.e = e - self.sql = sql - self.params = params - - def __str__(self): - return "QueryInterrupted: {}".format(self.e) - - -class MultipleValues(Exception): - pass - - -class Results: - def __init__(self, rows, truncated, description): - self.rows = rows - self.truncated = truncated - self.description = description - - @property - def columns(self): - return [d[0] for d in self.description] - - def first(self): - if self.rows: - return self.rows[0] - else: - return None - - def single_value(self): - if self.rows and 1 == len(self.rows) and 1 == len(self.rows[0]): - return self.rows[0][0] - else: - raise MultipleValues - - def dicts(self): - return [dict(row) for row in self.rows] - - def __iter__(self): - return iter(self.rows) - - def __len__(self): - return len(self.rows) + tags_str = " ({})".format(", ".join(tags)) + return "".format(self.name, tags_str) diff --git a/datasette/default_actions.py b/datasette/default_actions.py deleted file mode 100644 index 87d98fac..00000000 --- a/datasette/default_actions.py +++ /dev/null @@ -1,101 +0,0 @@ -from datasette import hookimpl -from datasette.permissions import Action -from datasette.resources import ( - DatabaseResource, - TableResource, - QueryResource, -) - - -@hookimpl -def register_actions(): - """Register the core Datasette actions.""" - return ( - # Global actions (no resource_class) - Action( - name="view-instance", - abbr="vi", - description="View Datasette instance", - ), - Action( - name="permissions-debug", - abbr="pd", - description="Access permission debug tool", - ), - Action( - name="debug-menu", - abbr="dm", - description="View debug menu items", - ), - # Database-level actions (parent-level) - Action( - name="view-database", - abbr="vd", - description="View database", - resource_class=DatabaseResource, - ), - Action( - name="view-database-download", - abbr="vdd", - description="Download database file", - resource_class=DatabaseResource, - also_requires="view-database", - ), - Action( - name="execute-sql", - abbr="es", - description="Execute read-only SQL queries", - resource_class=DatabaseResource, - also_requires="view-database", - ), - Action( - name="create-table", - abbr="ct", - description="Create tables", - resource_class=DatabaseResource, - ), - # Table-level actions (child-level) - Action( - name="view-table", - abbr="vt", - description="View table", - resource_class=TableResource, - ), - Action( - name="insert-row", - abbr="ir", - description="Insert rows", - resource_class=TableResource, - ), - Action( - name="delete-row", - abbr="dr", - description="Delete rows", - resource_class=TableResource, - ), - Action( - name="update-row", - abbr="ur", - description="Update rows", - resource_class=TableResource, - ), - Action( - name="alter-table", - abbr="at", - description="Alter tables", - resource_class=TableResource, - ), - Action( - name="drop-table", - abbr="dt", - description="Drop tables", - resource_class=TableResource, - ), - # Query-level actions (child-level) - Action( - name="view-query", - abbr="vq", - description="View named query results", - resource_class=QueryResource, - ), - ) diff --git a/datasette/default_magic_parameters.py b/datasette/default_magic_parameters.py deleted file mode 100644 index 91c1c5aa..00000000 --- a/datasette/default_magic_parameters.py +++ /dev/null @@ -1,57 +0,0 @@ -from datasette import hookimpl -import datetime -import os -import time - - -def header(key, request): - key = key.replace("_", "-").encode("utf-8") - headers_dict = dict(request.scope["headers"]) - return headers_dict.get(key, b"").decode("utf-8") - - -def actor(key, request): - if request.actor is None: - raise KeyError - return request.actor[key] - - -def cookie(key, request): - return request.cookies[key] - - -def now(key, request): - if key == "epoch": - return int(time.time()) - elif key == "date_utc": - return datetime.datetime.now(datetime.timezone.utc).date().isoformat() - elif key == "datetime_utc": - return ( - datetime.datetime.now(datetime.timezone.utc).strftime(r"%Y-%m-%dT%H:%M:%S") - + "Z" - ) - else: - raise KeyError - - -def random(key, request): - if key.startswith("chars_") and key.split("chars_")[-1].isdigit(): - num_chars = int(key.split("chars_")[-1]) - if num_chars % 2 == 1: - urandom_len = (num_chars + 1) / 2 - else: - urandom_len = num_chars / 2 - return os.urandom(int(urandom_len)).hex()[:num_chars] - else: - raise KeyError - - -@hookimpl -def register_magic_parameters(): - return [ - ("header", header), - ("actor", actor), - ("cookie", cookie), - ("now", now), - ("random", random), - ] diff --git a/datasette/default_menu_links.py b/datasette/default_menu_links.py deleted file mode 100644 index 85032387..00000000 --- a/datasette/default_menu_links.py +++ /dev/null @@ -1,41 +0,0 @@ -from datasette import hookimpl - - -@hookimpl -def menu_links(datasette, actor): - async def inner(): - if not await datasette.allowed(action="debug-menu", actor=actor): - return [] - - return [ - {"href": datasette.urls.path("/-/databases"), "label": "Databases"}, - { - "href": datasette.urls.path("/-/plugins"), - "label": "Installed plugins", - }, - { - "href": datasette.urls.path("/-/versions"), - "label": "Version info", - }, - { - "href": datasette.urls.path("/-/settings"), - "label": "Settings", - }, - { - "href": datasette.urls.path("/-/permissions"), - "label": "Debug permissions", - }, - { - "href": datasette.urls.path("/-/messages"), - "label": "Debug messages", - }, - { - "href": datasette.urls.path("/-/allow-debug"), - "label": "Debug allow rules", - }, - {"href": datasette.urls.path("/-/threads"), "label": "Debug threads"}, - {"href": datasette.urls.path("/-/actor"), "label": "Debug actor"}, - {"href": datasette.urls.path("/-/patterns"), "label": "Pattern portfolio"}, - ] - - return inner diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py deleted file mode 100644 index 4c82d705..00000000 --- a/datasette/default_permissions/__init__.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -Default permission implementations for Datasette. - -This module provides the built-in permission checking logic through implementations -of the permission_resources_sql hook. The hooks are organized by their purpose: - -1. Actor Restrictions - Enforces _r allowlists embedded in actor tokens -2. Root User - Grants full access when --root flag is used -3. Config Rules - Applies permissions from datasette.yaml -4. Default Settings - Enforces default_allow_sql and default view permissions - -IMPORTANT: These hooks return PermissionSQL objects that are combined using SQL -UNION/INTERSECT operations. The order of evaluation is: - - restriction_sql fields are INTERSECTed (all must match) - - Regular sql fields are UNIONed and evaluated with cascading priority -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Optional - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette import hookimpl - -# Re-export all hooks and public utilities -from .restrictions import ( - actor_restrictions_sql, - restrictions_allow_action, - ActorRestrictions, -) -from .root import root_user_permissions_sql -from .config import config_permissions_sql -from .defaults import ( - default_allow_sql_check, - default_action_permissions_sql, - DEFAULT_ALLOW_ACTIONS, -) -from .tokens import actor_from_signed_api_token - - -@hookimpl -def skip_csrf(scope) -> Optional[bool]: - """Skip CSRF check for JSON content-type requests.""" - if scope["type"] == "http": - headers = scope.get("headers") or {} - if dict(headers).get(b"content-type") == b"application/json": - return True - return None - - -@hookimpl -def canned_queries(datasette: "Datasette", database: str, actor) -> dict: - """Return canned queries defined in datasette.yaml configuration.""" - queries = ( - ((datasette.config or {}).get("databases") or {}).get(database) or {} - ).get("queries") or {} - return queries diff --git a/datasette/default_permissions/config.py b/datasette/default_permissions/config.py deleted file mode 100644 index aab87c1c..00000000 --- a/datasette/default_permissions/config.py +++ /dev/null @@ -1,442 +0,0 @@ -""" -Config-based permission handling for Datasette. - -Applies permission rules from datasette.yaml configuration. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette import hookimpl -from datasette.permissions import PermissionSQL -from datasette.utils import actor_matches_allow - -from .helpers import PermissionRowCollector, get_action_name_variants - - -class ConfigPermissionProcessor: - """ - Processes permission rules from datasette.yaml configuration. - - Configuration structure: - - permissions: # Root-level permissions block - view-instance: - id: admin - - databases: - mydb: - permissions: # Database-level permissions - view-database: - id: admin - allow: # Database-level allow block (for view-*) - id: viewer - allow_sql: # execute-sql allow block - id: analyst - tables: - users: - permissions: # Table-level permissions - view-table: - id: admin - allow: # Table-level allow block - id: viewer - queries: - my_query: - permissions: # Query-level permissions - view-query: - id: admin - allow: # Query-level allow block - id: viewer - """ - - def __init__( - self, - datasette: "Datasette", - actor: Optional[dict], - action: str, - ): - self.datasette = datasette - self.actor = actor - self.action = action - self.config = datasette.config or {} - self.collector = PermissionRowCollector(prefix="cfg") - - # Pre-compute action variants - self.action_checks = get_action_name_variants(datasette, action) - self.action_obj = datasette.actions.get(action) - - # Parse restrictions if present - self.has_restrictions = actor and "_r" in actor if actor else False - self.restrictions = actor.get("_r", {}) if actor else {} - - # Pre-compute restriction info for efficiency - self.restricted_databases: Set[str] = set() - self.restricted_tables: Set[Tuple[str, str]] = set() - - if self.has_restrictions: - self.restricted_databases = { - db_name - for db_name, db_actions in (self.restrictions.get("d") or {}).items() - if self.action_checks.intersection(db_actions) - } - self.restricted_tables = { - (db_name, table_name) - for db_name, tables in (self.restrictions.get("r") or {}).items() - for table_name, table_actions in tables.items() - if self.action_checks.intersection(table_actions) - } - # Tables implicitly reference their parent databases - self.restricted_databases.update(db for db, _ in self.restricted_tables) - - def evaluate_allow_block(self, allow_block: Any) -> Optional[bool]: - """Evaluate an allow block against the current actor.""" - if allow_block is None: - return None - return actor_matches_allow(self.actor, allow_block) - - def is_in_restriction_allowlist( - self, - parent: Optional[str], - child: Optional[str], - ) -> bool: - """Check if resource is allowed by actor restrictions.""" - if not self.has_restrictions: - return True # No restrictions, all resources allowed - - # Check global allowlist - if self.action_checks.intersection(self.restrictions.get("a", [])): - return True - - # Check database-level allowlist - if parent and self.action_checks.intersection( - self.restrictions.get("d", {}).get(parent, []) - ): - return True - - # Check table-level allowlist - if parent: - table_restrictions = (self.restrictions.get("r", {}) or {}).get(parent, {}) - if child: - table_actions = table_restrictions.get(child, []) - if self.action_checks.intersection(table_actions): - return True - else: - # Parent query should proceed if any child in this database is allowlisted - for table_actions in table_restrictions.values(): - if self.action_checks.intersection(table_actions): - return True - - # Parent/child both None: include if any restrictions exist for this action - if parent is None and child is None: - if self.action_checks.intersection(self.restrictions.get("a", [])): - return True - if self.restricted_databases: - return True - if self.restricted_tables: - return True - - return False - - def add_permissions_rule( - self, - parent: Optional[str], - child: Optional[str], - permissions_block: Optional[dict], - scope_desc: str, - ) -> None: - """Add a rule from a permissions:{action} block.""" - if permissions_block is None: - return - - action_allow_block = permissions_block.get(self.action) - result = self.evaluate_allow_block(action_allow_block) - - self.collector.add( - parent=parent, - child=child, - allow=result, - reason=f"config {'allow' if result else 'deny'} {scope_desc}", - if_not_none=True, - ) - - def add_allow_block_rule( - self, - parent: Optional[str], - child: Optional[str], - allow_block: Any, - scope_desc: str, - ) -> None: - """ - Add rules from an allow:{} block. - - For allow blocks, if the block exists but doesn't match the actor, - this is treated as a deny. We also handle the restriction-gate logic. - """ - if allow_block is None: - return - - # Skip if resource is not in restriction allowlist - if not self.is_in_restriction_allowlist(parent, child): - return - - result = self.evaluate_allow_block(allow_block) - bool_result = bool(result) - - self.collector.add( - parent, - child, - bool_result, - f"config {'allow' if result else 'deny'} {scope_desc}", - ) - - # Handle restriction-gate: add explicit denies for restricted resources - self._add_restriction_gate_denies(parent, child, bool_result, scope_desc) - - def _add_restriction_gate_denies( - self, - parent: Optional[str], - child: Optional[str], - is_allowed: bool, - scope_desc: str, - ) -> None: - """ - When a config rule denies at a higher level, add explicit denies - for restricted resources to prevent child-level allows from - incorrectly granting access. - """ - if is_allowed or child is not None or not self.has_restrictions: - return - - if not self.action_obj: - return - - reason = f"config deny {scope_desc} (restriction gate)" - - if parent is None: - # Root-level deny: add denies for all restricted resources - if self.action_obj.takes_parent: - for db_name in self.restricted_databases: - self.collector.add(db_name, None, False, reason) - if self.action_obj.takes_child: - for db_name, table_name in self.restricted_tables: - self.collector.add(db_name, table_name, False, reason) - else: - # Database-level deny: add denies for tables in that database - if self.action_obj.takes_child: - for db_name, table_name in self.restricted_tables: - if db_name == parent: - self.collector.add(db_name, table_name, False, reason) - - def process(self) -> Optional[PermissionSQL]: - """Process all config rules and return combined PermissionSQL.""" - self._process_root_permissions() - self._process_databases() - self._process_root_allow_blocks() - - return self.collector.to_permission_sql() - - def _process_root_permissions(self) -> None: - """Process root-level permissions block.""" - root_perms = self.config.get("permissions") or {} - self.add_permissions_rule( - None, - None, - root_perms, - f"permissions for {self.action}", - ) - - def _process_databases(self) -> None: - """Process database-level and nested configurations.""" - databases = self.config.get("databases") or {} - - for db_name, db_config in databases.items(): - self._process_database(db_name, db_config or {}) - - def _process_database(self, db_name: str, db_config: dict) -> None: - """Process a single database's configuration.""" - # Database-level permissions block - db_perms = db_config.get("permissions") or {} - self.add_permissions_rule( - db_name, - None, - db_perms, - f"permissions for {self.action} on {db_name}", - ) - - # Process tables - for table_name, table_config in (db_config.get("tables") or {}).items(): - self._process_table(db_name, table_name, table_config or {}) - - # Process queries - for query_name, query_config in (db_config.get("queries") or {}).items(): - self._process_query(db_name, query_name, query_config) - - # Database-level allow blocks - self._process_database_allow_blocks(db_name, db_config) - - def _process_table( - self, - db_name: str, - table_name: str, - table_config: dict, - ) -> None: - """Process a single table's configuration.""" - # Table-level permissions block - table_perms = table_config.get("permissions") or {} - self.add_permissions_rule( - db_name, - table_name, - table_perms, - f"permissions for {self.action} on {db_name}/{table_name}", - ) - - # Table-level allow block (for view-table) - if self.action == "view-table": - self.add_allow_block_rule( - db_name, - table_name, - table_config.get("allow"), - f"allow for {self.action} on {db_name}/{table_name}", - ) - - def _process_query( - self, - db_name: str, - query_name: str, - query_config: Any, - ) -> None: - """Process a single query's configuration.""" - # Query config can be a string (just SQL) or dict - if not isinstance(query_config, dict): - return - - # Query-level permissions block - query_perms = query_config.get("permissions") or {} - self.add_permissions_rule( - db_name, - query_name, - query_perms, - f"permissions for {self.action} on {db_name}/{query_name}", - ) - - # Query-level allow block (for view-query) - if self.action == "view-query": - self.add_allow_block_rule( - db_name, - query_name, - query_config.get("allow"), - f"allow for {self.action} on {db_name}/{query_name}", - ) - - def _process_database_allow_blocks( - self, - db_name: str, - db_config: dict, - ) -> None: - """Process database-level allow/allow_sql blocks.""" - # view-database allow block - if self.action == "view-database": - self.add_allow_block_rule( - db_name, - None, - db_config.get("allow"), - f"allow for {self.action} on {db_name}", - ) - - # execute-sql allow_sql block - if self.action == "execute-sql": - self.add_allow_block_rule( - db_name, - None, - db_config.get("allow_sql"), - f"allow_sql for {db_name}", - ) - - # view-table uses database-level allow for inheritance - if self.action == "view-table": - self.add_allow_block_rule( - db_name, - None, - db_config.get("allow"), - f"allow for {self.action} on {db_name}", - ) - - # view-query uses database-level allow for inheritance - if self.action == "view-query": - self.add_allow_block_rule( - db_name, - None, - db_config.get("allow"), - f"allow for {self.action} on {db_name}", - ) - - def _process_root_allow_blocks(self) -> None: - """Process root-level allow/allow_sql blocks.""" - root_allow = self.config.get("allow") - - if self.action == "view-instance": - self.add_allow_block_rule( - None, - None, - root_allow, - "allow for view-instance", - ) - - if self.action == "view-database": - self.add_allow_block_rule( - None, - None, - root_allow, - "allow for view-database", - ) - - if self.action == "view-table": - self.add_allow_block_rule( - None, - None, - root_allow, - "allow for view-table", - ) - - if self.action == "view-query": - self.add_allow_block_rule( - None, - None, - root_allow, - "allow for view-query", - ) - - if self.action == "execute-sql": - self.add_allow_block_rule( - None, - None, - self.config.get("allow_sql"), - "allow_sql", - ) - - -@hookimpl(specname="permission_resources_sql") -async def config_permissions_sql( - datasette: "Datasette", - actor: Optional[dict], - action: str, -) -> Optional[List[PermissionSQL]]: - """ - Apply permission rules from datasette.yaml configuration. - - This processes: - - permissions: blocks at root, database, table, and query levels - - allow: blocks for view-* actions - - allow_sql: blocks for execute-sql action - """ - processor = ConfigPermissionProcessor(datasette, actor, action) - result = processor.process() - - if result is None: - return [] - - return [result] diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py deleted file mode 100644 index f5a6a270..00000000 --- a/datasette/default_permissions/defaults.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -Default permission settings for Datasette. - -Provides default allow rules for standard view/execute actions. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Optional - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette import hookimpl -from datasette.permissions import PermissionSQL - - -# Actions that are allowed by default (unless --default-deny is used) -DEFAULT_ALLOW_ACTIONS = frozenset( - { - "view-instance", - "view-database", - "view-database-download", - "view-table", - "view-query", - "execute-sql", - } -) - - -@hookimpl(specname="permission_resources_sql") -async def default_allow_sql_check( - datasette: "Datasette", - actor: Optional[dict], - action: str, -) -> Optional[PermissionSQL]: - """ - Enforce the default_allow_sql setting. - - When default_allow_sql is false (the default), execute-sql is denied - unless explicitly allowed by config or other rules. - """ - if action == "execute-sql": - if not datasette.setting("default_allow_sql"): - return PermissionSQL.deny(reason="default_allow_sql is false") - - return None - - -@hookimpl(specname="permission_resources_sql") -async def default_action_permissions_sql( - datasette: "Datasette", - actor: Optional[dict], - action: str, -) -> Optional[PermissionSQL]: - """ - Provide default allow rules for standard view/execute actions. - - These defaults are skipped when datasette is started with --default-deny. - The restriction_sql mechanism (from actor_restrictions_sql) will still - filter these results if the actor has restrictions. - """ - if datasette.default_deny: - return None - - if action in DEFAULT_ALLOW_ACTIONS: - reason = f"default allow for {action}".replace("'", "''") - return PermissionSQL.allow(reason=reason) - - return None diff --git a/datasette/default_permissions/helpers.py b/datasette/default_permissions/helpers.py deleted file mode 100644 index 47e03569..00000000 --- a/datasette/default_permissions/helpers.py +++ /dev/null @@ -1,85 +0,0 @@ -""" -Shared helper utilities for default permission implementations. -""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import TYPE_CHECKING, List, Optional, Set - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette.permissions import PermissionSQL - - -def get_action_name_variants(datasette: "Datasette", action: str) -> Set[str]: - """ - Get all name variants for an action (full name and abbreviation). - - Example: - get_action_name_variants(ds, "view-table") -> {"view-table", "vt"} - """ - variants = {action} - action_obj = datasette.actions.get(action) - if action_obj and action_obj.abbr: - variants.add(action_obj.abbr) - return variants - - -def action_in_list(datasette: "Datasette", action: str, action_list: list) -> bool: - """Check if an action (or its abbreviation) is in a list.""" - return bool(get_action_name_variants(datasette, action).intersection(action_list)) - - -@dataclass -class PermissionRow: - """A single permission rule row.""" - - parent: Optional[str] - child: Optional[str] - allow: bool - reason: str - - -class PermissionRowCollector: - """Collects permission rows and converts them to PermissionSQL.""" - - def __init__(self, prefix: str = "row"): - self.rows: List[PermissionRow] = [] - self.prefix = prefix - - def add( - self, - parent: Optional[str], - child: Optional[str], - allow: Optional[bool], - reason: str, - if_not_none: bool = False, - ) -> None: - """Add a permission row. If if_not_none=True, only add if allow is not None.""" - if if_not_none and allow is None: - return - self.rows.append(PermissionRow(parent, child, allow, reason)) - - def to_permission_sql(self) -> Optional[PermissionSQL]: - """Convert collected rows to a PermissionSQL object.""" - if not self.rows: - return None - - parts = [] - params = {} - - for idx, row in enumerate(self.rows): - key = f"{self.prefix}_{idx}" - parts.append( - f"SELECT :{key}_parent AS parent, :{key}_child AS child, " - f":{key}_allow AS allow, :{key}_reason AS reason" - ) - params[f"{key}_parent"] = row.parent - params[f"{key}_child"] = row.child - params[f"{key}_allow"] = 1 if row.allow else 0 - params[f"{key}_reason"] = row.reason - - sql = "\nUNION ALL\n".join(parts) - return PermissionSQL(sql=sql, params=params) diff --git a/datasette/default_permissions/restrictions.py b/datasette/default_permissions/restrictions.py deleted file mode 100644 index a22cd7e5..00000000 --- a/datasette/default_permissions/restrictions.py +++ /dev/null @@ -1,195 +0,0 @@ -""" -Actor restriction handling for Datasette permissions. - -This module handles the _r (restrictions) key in actor dictionaries, which -contains allowlists of resources the actor can access. -""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import TYPE_CHECKING, List, Optional, Set, Tuple - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette import hookimpl -from datasette.permissions import PermissionSQL - -from .helpers import action_in_list, get_action_name_variants - - -@dataclass -class ActorRestrictions: - """Parsed actor restrictions from the _r key.""" - - global_actions: List[str] # _r.a - globally allowed actions - database_actions: dict # _r.d - {db_name: [actions]} - table_actions: dict # _r.r - {db_name: {table: [actions]}} - - @classmethod - def from_actor(cls, actor: Optional[dict]) -> Optional["ActorRestrictions"]: - """Parse restrictions from actor dict. Returns None if no restrictions.""" - if not actor: - return None - assert isinstance(actor, dict), "actor must be a dictionary" - - restrictions = actor.get("_r") - if restrictions is None: - return None - - return cls( - global_actions=restrictions.get("a", []), - database_actions=restrictions.get("d", {}), - table_actions=restrictions.get("r", {}), - ) - - def is_action_globally_allowed(self, datasette: "Datasette", action: str) -> bool: - """Check if action is in the global allowlist.""" - return action_in_list(datasette, action, self.global_actions) - - def get_allowed_databases(self, datasette: "Datasette", action: str) -> Set[str]: - """Get database names where this action is allowed.""" - allowed = set() - for db_name, db_actions in self.database_actions.items(): - if action_in_list(datasette, action, db_actions): - allowed.add(db_name) - return allowed - - def get_allowed_tables( - self, datasette: "Datasette", action: str - ) -> Set[Tuple[str, str]]: - """Get (database, table) pairs where this action is allowed.""" - allowed = set() - for db_name, tables in self.table_actions.items(): - for table_name, table_actions in tables.items(): - if action_in_list(datasette, action, table_actions): - allowed.add((db_name, table_name)) - return allowed - - -@hookimpl(specname="permission_resources_sql") -async def actor_restrictions_sql( - datasette: "Datasette", - actor: Optional[dict], - action: str, -) -> Optional[List[PermissionSQL]]: - """ - Handle actor restriction-based permission rules. - - When an actor has an "_r" key, it contains an allowlist of resources they - can access. This function returns restriction_sql that filters the final - results to only include resources in that allowlist. - - The _r structure: - { - "a": ["vi", "pd"], # Global actions allowed - "d": {"mydb": ["vt", "es"]}, # Database-level actions - "r": {"mydb": {"users": ["vt"]}} # Table-level actions - } - """ - if not actor: - return None - - restrictions = ActorRestrictions.from_actor(actor) - - if restrictions is None: - # No restrictions - all resources allowed - return [] - - # If globally allowed, no filtering needed - if restrictions.is_action_globally_allowed(datasette, action): - return [] - - # Build restriction SQL - allowed_dbs = restrictions.get_allowed_databases(datasette, action) - allowed_tables = restrictions.get_allowed_tables(datasette, action) - - # If nothing is allowed for this action, return empty-set restriction - if not allowed_dbs and not allowed_tables: - return [ - PermissionSQL( - params={"deny": f"actor restrictions: {action} not in allowlist"}, - restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", - ) - ] - - # Build UNION of allowed resources - selects = [] - params = {} - counter = 0 - - # Database-level entries (parent, NULL) - allows all children - for db_name in allowed_dbs: - key = f"restr_{counter}" - counter += 1 - selects.append(f"SELECT :{key}_parent AS parent, NULL AS child") - params[f"{key}_parent"] = db_name - - # Table-level entries (parent, child) - for db_name, table_name in allowed_tables: - key = f"restr_{counter}" - counter += 1 - selects.append(f"SELECT :{key}_parent AS parent, :{key}_child AS child") - params[f"{key}_parent"] = db_name - params[f"{key}_child"] = table_name - - restriction_sql = "\nUNION ALL\n".join(selects) - - return [PermissionSQL(params=params, restriction_sql=restriction_sql)] - - -def restrictions_allow_action( - datasette: "Datasette", - restrictions: dict, - action: str, - resource: Optional[str | Tuple[str, str]], -) -> bool: - """ - Check if restrictions allow the requested action on the requested resource. - - This is a synchronous utility function for use by other code that needs - to quickly check restriction allowlists. - - Args: - datasette: The Datasette instance - restrictions: The _r dict from an actor - action: The action name to check - resource: None for global, str for database, (db, table) tuple for table - - Returns: - True if allowed, False if denied - """ - # Does this action have an abbreviation? - to_check = get_action_name_variants(datasette, action) - - # Check global level (any resource) - all_allowed = restrictions.get("a") - if all_allowed is not None: - assert isinstance(all_allowed, list) - if to_check.intersection(all_allowed): - return True - - # Check database level - if resource: - if isinstance(resource, str): - database_name = resource - else: - database_name = resource[0] - database_allowed = restrictions.get("d", {}).get(database_name) - if database_allowed is not None: - assert isinstance(database_allowed, list) - if to_check.intersection(database_allowed): - return True - - # Check table/resource level - if resource is not None and not isinstance(resource, str) and len(resource) == 2: - database, table = resource - table_allowed = restrictions.get("r", {}).get(database, {}).get(table) - if table_allowed is not None: - assert isinstance(table_allowed, list) - if to_check.intersection(table_allowed): - return True - - # This action is not explicitly allowed, so reject it - return False diff --git a/datasette/default_permissions/root.py b/datasette/default_permissions/root.py deleted file mode 100644 index 4931f7ff..00000000 --- a/datasette/default_permissions/root.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Root user permission handling for Datasette. - -Grants full permissions to the root user when --root flag is used. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Optional - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette import hookimpl -from datasette.permissions import PermissionSQL - - -@hookimpl(specname="permission_resources_sql") -async def root_user_permissions_sql( - datasette: "Datasette", - actor: Optional[dict], -) -> Optional[PermissionSQL]: - """ - Grant root user full permissions when --root flag is used. - """ - if not datasette.root_enabled: - return None - if actor is not None and actor.get("id") == "root": - return PermissionSQL.allow(reason="root user") diff --git a/datasette/default_permissions/tokens.py b/datasette/default_permissions/tokens.py deleted file mode 100644 index 474b0c23..00000000 --- a/datasette/default_permissions/tokens.py +++ /dev/null @@ -1,95 +0,0 @@ -""" -Token authentication for Datasette. - -Handles signed API tokens (dstok_ prefix). -""" - -from __future__ import annotations - -import time -from typing import TYPE_CHECKING, Optional - -if TYPE_CHECKING: - from datasette.app import Datasette - -import itsdangerous - -from datasette import hookimpl - - -@hookimpl(specname="actor_from_request") -def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dict]: - """ - Authenticate requests using signed API tokens (dstok_ prefix). - - Token structure (signed JSON): - { - "a": "actor_id", # Actor ID - "t": 1234567890, # Timestamp (Unix epoch) - "d": 3600, # Optional: Duration in seconds - "_r": {...} # Optional: Restrictions - } - """ - prefix = "dstok_" - - # Check if tokens are enabled - if not datasette.setting("allow_signed_tokens"): - return None - - max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") - - # Get authorization header - authorization = request.headers.get("authorization") - if not authorization: - return None - if not authorization.startswith("Bearer "): - return None - - token = authorization[len("Bearer ") :] - if not token.startswith(prefix): - return None - - # Remove prefix and verify signature - token = token[len(prefix) :] - try: - decoded = datasette.unsign(token, namespace="token") - except itsdangerous.BadSignature: - return None - - # Validate timestamp - if "t" not in decoded: - return None - created = decoded["t"] - if not isinstance(created, int): - return None - - # Handle duration/expiry - duration = decoded.get("d") - if duration is not None and not isinstance(duration, int): - return None - - # Apply max TTL if configured - if (duration is None and max_signed_tokens_ttl) or ( - duration is not None - and max_signed_tokens_ttl - and duration > max_signed_tokens_ttl - ): - duration = max_signed_tokens_ttl - - # Check expiry - if duration: - if time.time() - created > duration: - return None - - # Build actor dict - actor = {"id": decoded["a"], "token": "dstok"} - - # Copy restrictions if present - if "_r" in decoded: - actor["_r"] = decoded["_r"] - - # Add expiry timestamp if applicable - if duration: - actor["token_expires"] = created + duration - - return actor diff --git a/datasette/events.py b/datasette/events.py deleted file mode 100644 index 5cd5ba3d..00000000 --- a/datasette/events.py +++ /dev/null @@ -1,235 +0,0 @@ -from abc import ABC, abstractproperty -from dataclasses import asdict, dataclass, field -from datasette.hookspecs import hookimpl -from datetime import datetime, timezone - - -@dataclass -class Event(ABC): - @abstractproperty - def name(self): - pass - - created: datetime = field( - init=False, default_factory=lambda: datetime.now(timezone.utc) - ) - actor: dict | None - - def properties(self): - properties = asdict(self) - properties.pop("actor", None) - properties.pop("created", None) - return properties - - -@dataclass -class LoginEvent(Event): - """ - Event name: ``login`` - - A user (represented by ``event.actor``) has logged in. - """ - - name = "login" - - -@dataclass -class LogoutEvent(Event): - """ - Event name: ``logout`` - - A user (represented by ``event.actor``) has logged out. - """ - - name = "logout" - - -@dataclass -class CreateTokenEvent(Event): - """ - Event name: ``create-token`` - - A user created an API token. - - :ivar expires_after: Number of seconds after which this token will expire. - :type expires_after: int or None - :ivar restrict_all: Restricted permissions for this token. - :type restrict_all: list - :ivar restrict_database: Restricted database permissions for this token. - :type restrict_database: dict - :ivar restrict_resource: Restricted resource permissions for this token. - :type restrict_resource: dict - """ - - name = "create-token" - expires_after: int | None - restrict_all: list - restrict_database: dict - restrict_resource: dict - - -@dataclass -class CreateTableEvent(Event): - """ - Event name: ``create-table`` - - A new table has been created in the database. - - :ivar database: The name of the database where the table was created. - :type database: str - :ivar table: The name of the table that was created - :type table: str - :ivar schema: The SQL schema definition for the new table. - :type schema: str - """ - - name = "create-table" - database: str - table: str - schema: str - - -@dataclass -class DropTableEvent(Event): - """ - Event name: ``drop-table`` - - A table has been dropped from the database. - - :ivar database: The name of the database where the table was dropped. - :type database: str - :ivar table: The name of the table that was dropped - :type table: str - """ - - name = "drop-table" - database: str - table: str - - -@dataclass -class AlterTableEvent(Event): - """ - Event name: ``alter-table`` - - A table has been altered. - - :ivar database: The name of the database where the table was altered - :type database: str - :ivar table: The name of the table that was altered - :type table: str - :ivar before_schema: The table's SQL schema before the alteration - :type before_schema: str - :ivar after_schema: The table's SQL schema after the alteration - :type after_schema: str - """ - - name = "alter-table" - database: str - table: str - before_schema: str - after_schema: str - - -@dataclass -class InsertRowsEvent(Event): - """ - Event name: ``insert-rows`` - - Rows were inserted into a table. - - :ivar database: The name of the database where the rows were inserted. - :type database: str - :ivar table: The name of the table where the rows were inserted. - :type table: str - :ivar num_rows: The number of rows that were requested to be inserted. - :type num_rows: int - :ivar ignore: Was ignore set? - :type ignore: bool - :ivar replace: Was replace set? - :type replace: bool - """ - - name = "insert-rows" - database: str - table: str - num_rows: int - ignore: bool - replace: bool - - -@dataclass -class UpsertRowsEvent(Event): - """ - Event name: ``upsert-rows`` - - Rows were upserted into a table. - - :ivar database: The name of the database where the rows were inserted. - :type database: str - :ivar table: The name of the table where the rows were inserted. - :type table: str - :ivar num_rows: The number of rows that were requested to be inserted. - :type num_rows: int - """ - - name = "upsert-rows" - database: str - table: str - num_rows: int - - -@dataclass -class UpdateRowEvent(Event): - """ - Event name: ``update-row`` - - A row was updated in a table. - - :ivar database: The name of the database where the row was updated. - :type database: str - :ivar table: The name of the table where the row was updated. - :type table: str - :ivar pks: The primary key values of the updated row. - """ - - name = "update-row" - database: str - table: str - pks: list - - -@dataclass -class DeleteRowEvent(Event): - """ - Event name: ``delete-row`` - - A row was deleted from a table. - - :ivar database: The name of the database where the row was deleted. - :type database: str - :ivar table: The name of the table where the row was deleted. - :type table: str - :ivar pks: The primary key values of the deleted row. - """ - - name = "delete-row" - database: str - table: str - pks: list - - -@hookimpl -def register_events(): - return [ - LoginEvent, - LogoutEvent, - CreateTableEvent, - CreateTokenEvent, - AlterTableEvent, - DropTableEvent, - InsertRowsEvent, - UpsertRowsEvent, - UpdateRowEvent, - DeleteRowEvent, - ] diff --git a/datasette/facets.py b/datasette/facets.py index dd149424..365d9c65 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -1,18 +1,20 @@ import json import urllib +import re from datasette import hookimpl -from datasette.database import QueryInterrupted from datasette.utils import ( escape_sqlite, path_with_added_args, path_with_removed_args, detect_json1, + QueryInterrupted, + InvalidSql, sqlite3, ) -def load_facet_configs(request, table_config): - # Given a request and the configuration for a table, return +def load_facet_configs(request, table_metadata): + # Given a request and the metadata configuration for a table, return # a dictionary of selected facets, their lists of configs and for each # config whether it came from the request or the metadata. # @@ -20,21 +22,21 @@ def load_facet_configs(request, table_config): # {"source": "metadata", "config": config1}, # {"source": "request", "config": config2}]} facet_configs = {} - table_config = table_config or {} - table_facet_configs = table_config.get("facets", []) - for facet_config in table_facet_configs: - if isinstance(facet_config, str): + table_metadata = table_metadata or {} + metadata_facets = table_metadata.get("facets", []) + for metadata_config in metadata_facets: + if isinstance(metadata_config, str): type = "column" - facet_config = {"simple": facet_config} + metadata_config = {"simple": metadata_config} else: assert ( - len(facet_config.values()) == 1 + len(metadata_config.values()) == 1 ), "Metadata config dicts should be {type: config}" - type, facet_config = list(facet_config.items())[0] - if isinstance(facet_config, str): - facet_config = {"simple": facet_config} + type, metadata_config = metadata_config.items()[0] + if isinstance(metadata_config, str): + metadata_config = {"simple": metadata_config} facet_configs.setdefault(type, []).append( - {"source": "metadata", "config": facet_config} + {"source": "metadata", "config": metadata_config} ) qs_pairs = urllib.parse.parse_qs(request.query_string, keep_blank_values=True) for key, values in qs_pairs.items(): @@ -45,12 +47,13 @@ def load_facet_configs(request, table_config): elif key.startswith("_facet_"): type = key[len("_facet_") :] for value in values: - # The value is the facet_config - either JSON or not - facet_config = ( - json.loads(value) if value.startswith("{") else {"simple": value} - ) + # The value is the config - either JSON or not + if value.startswith("{"): + config = json.loads(value) + else: + config = {"simple": value} facet_configs.setdefault(type, []).append( - {"source": "request", "config": facet_config} + {"source": "request", "config": config} ) return facet_configs @@ -65,8 +68,6 @@ def register_facet_classes(): class Facet: type = None - # How many rows to consider when suggesting facets: - suggest_consider = 1000 def __init__( self, @@ -76,7 +77,7 @@ class Facet: sql=None, table=None, params=None, - table_config=None, + metadata=None, row_count=None, ): assert table or sql, "Must provide either table= or sql=" @@ -85,14 +86,14 @@ class Facet: self.database = database # For foreign key expansion. Can be None for e.g. canned SQL queries: self.table = table - self.sql = sql or f"select * from [{table}]" + self.sql = sql or "select * from [{}]".format(table) self.params = params or [] - self.table_config = table_config + self.metadata = metadata # row_count can be None, in which case we calculate it ourselves: self.row_count = row_count def get_configs(self): - configs = load_facet_configs(self.request, self.table_config) + configs = load_facet_configs(self.request, self.metadata) return configs.get(self.type) or [] def get_querystring_pairs(self): @@ -100,36 +101,6 @@ class Facet: # [('_foo', 'bar'), ('_foo', '2'), ('empty', '')] return urllib.parse.parse_qsl(self.request.query_string, keep_blank_values=True) - def get_facet_size(self): - facet_size = self.ds.setting("default_facet_size") - max_returned_rows = self.ds.setting("max_returned_rows") - table_facet_size = None - if self.table: - config_facet_size = ( - self.ds.config.get("databases", {}) - .get(self.database, {}) - .get("tables", {}) - .get(self.table, {}) - .get("facet_size") - ) - if config_facet_size: - table_facet_size = config_facet_size - custom_facet_size = self.request.args.get("_facet_size") - if custom_facet_size: - if custom_facet_size == "max": - facet_size = max_returned_rows - elif custom_facet_size.isdigit(): - facet_size = int(custom_facet_size) - else: - # Invalid value, ignore it - custom_facet_size = None - if table_facet_size and not custom_facet_size: - if table_facet_size == "max": - facet_size = max_returned_rows - else: - facet_size = table_facet_size - return min(facet_size, max_returned_rows) - async def suggest(self): return [] @@ -143,10 +114,21 @@ class Facet: # Detect column names using the "limit 0" trick return ( await self.ds.execute( - self.database, f"select * from ({sql}) limit 0", params or [] + self.database, "select * from ({}) limit 0".format(sql), params or [] ) ).columns + async def get_row_count(self): + if self.row_count is None: + self.row_count = ( + await self.ds.execute( + self.database, + "select count(*) from ({})".format(self.sql), + self.params, + ) + ).rows[0][0] + return self.row_count + class ColumnFacet(Facet): type = "column" @@ -154,23 +136,19 @@ class ColumnFacet(Facet): async def suggest(self): row_count = await self.get_row_count() columns = await self.get_columns(self.sql, self.params) - facet_size = self.get_facet_size() + facet_size = self.ds.config("default_facet_size") suggested_facets = [] already_enabled = [c["config"]["simple"] for c in self.get_configs()] for column in columns: if column in already_enabled: continue suggested_facet_sql = """ - with limited as (select * from ({sql}) limit {suggest_consider}) - select {column} as value, count(*) as n from limited - where value is not null - group by value + select distinct {column} from ( + {sql} + ) where {column} is not null limit {limit} """.format( - column=escape_sqlite(column), - sql=self.sql, - limit=facet_size + 1, - suggest_consider=self.suggest_consider, + column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 ) distinct_values = None try: @@ -179,25 +157,21 @@ class ColumnFacet(Facet): suggested_facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.setting("facet_suggest_time_limit_ms"), + custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), ) num_distinct_values = len(distinct_values) if ( - 1 < num_distinct_values < row_count + num_distinct_values + and num_distinct_values > 1 and num_distinct_values <= facet_size - # And at least one has n > 1 - and any(r["n"] > 1 for r in distinct_values) + and num_distinct_values < row_count ): suggested_facets.append( { "name": column, "toggle_url": self.ds.absolute_url( self.request, - self.ds.urls.path( - path_with_added_args( - self.request, {"_facet": column} - ) - ), + path_with_added_args(self.request, {"_facet": column}), ), } ) @@ -205,24 +179,13 @@ class ColumnFacet(Facet): continue return suggested_facets - async def get_row_count(self): - if self.row_count is None: - self.row_count = ( - await self.ds.execute( - self.database, - f"select count(*) from (select * from ({self.sql}) limit {self.suggest_consider})", - self.params, - ) - ).rows[0][0] - return self.row_count - async def facet_results(self): - facet_results = [] + facet_results = {} facets_timed_out = [] qs_pairs = self.get_querystring_pairs() - facet_size = self.get_facet_size() + facet_size = self.ds.config("default_facet_size") for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] @@ -232,7 +195,7 @@ class ColumnFacet(Facet): {sql} ) where {col} is not null - group by {col} order by count desc, value limit {limit} + group by {col} order by count desc limit {limit} """.format( col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 ) @@ -242,42 +205,37 @@ class ColumnFacet(Facet): facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.setting("facet_time_limit_ms"), + custom_time_limit=self.ds.config("facet_time_limit_ms"), ) facet_results_values = [] - facet_results.append( - { - "name": column, - "type": self.type, - "hideable": source != "metadata", - "toggle_url": self.ds.urls.path( - path_with_removed_args(self.request, {"_facet": column}) - ), - "results": facet_results_values, - "truncated": len(facet_rows_results) > facet_size, - } - ) + facet_results[column] = { + "name": column, + "type": self.type, + "hideable": source != "metadata", + "toggle_url": path_with_removed_args( + self.request, {"_facet": column} + ), + "results": facet_results_values, + "truncated": len(facet_rows_results) > facet_size, + } facet_rows = facet_rows_results.rows[:facet_size] if self.table: # Attempt to expand foreign keys into labels values = [row["value"] for row in facet_rows] expanded = await self.ds.expand_foreign_keys( - self.request.actor, self.database, self.table, column, values + self.database, self.table, column, values ) else: expanded = {} for row in facet_rows: - column_qs = column - if column.startswith("_"): - column_qs = "{}__exact".format(column) - selected = (column_qs, str(row["value"])) in qs_pairs + selected = (column, str(row["value"])) in qs_pairs if selected: toggle_path = path_with_removed_args( - self.request, {column_qs: str(row["value"])} + self.request, {column: str(row["value"])} ) else: toggle_path = path_with_added_args( - self.request, {column_qs: row["value"]} + self.request, {column: row["value"]} ) facet_results_values.append( { @@ -285,7 +243,7 @@ class ColumnFacet(Facet): "label": expanded.get((column, row["value"]), row["value"]), "count": row["count"], "toggle_url": self.ds.absolute_url( - self.request, self.ds.urls.path(toggle_path) + self.request, toggle_path ), "selected": selected, } @@ -299,16 +257,6 @@ class ColumnFacet(Facet): class ArrayFacet(Facet): type = "array" - def _is_json_array_of_strings(self, json_string): - try: - array = json.loads(json_string) - except ValueError: - return False - for item in array: - if not isinstance(item, str): - return False - return True - async def suggest(self): columns = await self.get_columns(self.sql, self.params) suggested_facets = [] @@ -318,14 +266,10 @@ class ArrayFacet(Facet): continue # Is every value in this column either null or a JSON array? suggested_facet_sql = """ - with limited as (select * from ({sql}) limit {suggest_consider}) select distinct json_type({column}) - from limited - where {column} is not null and {column} != '' + from ({sql}) """.format( - column=escape_sqlite(column), - sql=self.sql, - suggest_consider=self.suggest_consider, + column=escape_sqlite(column), sql=self.sql ) try: results = await self.ds.execute( @@ -333,86 +277,44 @@ class ArrayFacet(Facet): suggested_facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.setting("facet_suggest_time_limit_ms"), + custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), log_sql_errors=False, ) types = tuple(r[0] for r in results.rows) if types in (("array",), ("array", None)): - # Now check that first 100 arrays contain only strings - first_100 = [ - v[0] - for v in await self.ds.execute( - self.database, - ( - "select {column} from ({sql}) " - "where {column} is not null " - "and {column} != '' " - "and json_array_length({column}) > 0 " - "limit 100" - ).format(column=escape_sqlite(column), sql=self.sql), - self.params, - truncate=False, - custom_time_limit=self.ds.setting( - "facet_suggest_time_limit_ms" - ), - log_sql_errors=False, - ) - ] - if first_100 and all( - self._is_json_array_of_strings(r) for r in first_100 - ): - suggested_facets.append( - { - "name": column, - "type": "array", - "toggle_url": self.ds.absolute_url( - self.request, - self.ds.urls.path( - path_with_added_args( - self.request, {"_facet_array": column} - ) - ), + suggested_facets.append( + { + "name": column, + "type": "array", + "toggle_url": self.ds.absolute_url( + self.request, + path_with_added_args( + self.request, {"_facet_array": column} ), - } - ) + ), + } + ) except (QueryInterrupted, sqlite3.OperationalError): continue return suggested_facets async def facet_results(self): # self.configs should be a plain list of columns - facet_results = [] + facet_results = {} facets_timed_out = [] - facet_size = self.get_facet_size() + facet_size = self.ds.config("default_facet_size") for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] column = config.get("column") or config["simple"] - # https://github.com/simonw/datasette/issues/448 facet_sql = """ - with inner as ({sql}), - deduped_array_items as ( - select - distinct j.value, - inner.* - from - json_each([inner].{col}) j - join inner - ) - select - value as value, - count(*) as count - from - deduped_array_items - group by - value - order by - count(*) desc, value limit {limit} + select j.value as value, count(*) as count from ( + {sql} + ) join json_each({col}) j + group by j.value order by count desc limit {limit} """.format( - col=escape_sqlite(column), - sql=self.sql, - limit=facet_size + 1, + col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 ) try: facet_rows_results = await self.ds.execute( @@ -420,35 +322,31 @@ class ArrayFacet(Facet): facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.setting("facet_time_limit_ms"), + custom_time_limit=self.ds.config("facet_time_limit_ms"), ) facet_results_values = [] - facet_results.append( - { - "name": column, - "type": self.type, - "results": facet_results_values, - "hideable": source != "metadata", - "toggle_url": self.ds.urls.path( - path_with_removed_args( - self.request, {"_facet_array": column} - ) - ), - "truncated": len(facet_rows_results) > facet_size, - } - ) + facet_results[column] = { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": path_with_removed_args( + self.request, {"_facet_array": column} + ), + "truncated": len(facet_rows_results) > facet_size, + } facet_rows = facet_rows_results.rows[:facet_size] pairs = self.get_querystring_pairs() for row in facet_rows: value = str(row["value"]) - selected = (f"{column}__arraycontains", value) in pairs + selected = ("{}__arraycontains".format(column), value) in pairs if selected: toggle_path = path_with_removed_args( - self.request, {f"{column}__arraycontains": value} + self.request, {"{}__arraycontains".format(column): value} ) else: toggle_path = path_with_added_args( - self.request, {f"{column}__arraycontains": value} + self.request, {"{}__arraycontains".format(column): value} ) facet_results_values.append( { @@ -480,8 +378,8 @@ class DateFacet(Facet): # Does this column contain any dates in the first 100 rows? suggested_facet_sql = """ select date({column}) from ( - select * from ({sql}) limit 100 - ) where {column} glob "????-??-*" + {sql} + ) where {column} glob "????-??-*" limit 100; """.format( column=escape_sqlite(column), sql=self.sql ) @@ -491,7 +389,7 @@ class DateFacet(Facet): suggested_facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.setting("facet_suggest_time_limit_ms"), + custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), log_sql_errors=False, ) values = tuple(r[0] for r in results.rows) @@ -502,10 +400,8 @@ class DateFacet(Facet): "type": "date", "toggle_url": self.ds.absolute_url( self.request, - self.ds.urls.path( - path_with_added_args( - self.request, {"_facet_date": column} - ) + path_with_added_args( + self.request, {"_facet_date": column} ), ), } @@ -515,10 +411,10 @@ class DateFacet(Facet): return suggested_facets async def facet_results(self): - facet_results = [] + facet_results = {} facets_timed_out = [] args = dict(self.get_querystring_pairs()) - facet_size = self.get_facet_size() + facet_size = self.ds.config("default_facet_size") for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] @@ -529,7 +425,7 @@ class DateFacet(Facet): {sql} ) where date({col}) is not null - group by date({col}) order by count desc, value limit {limit} + group by date({col}) order by count desc limit {limit} """.format( col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 ) @@ -539,31 +435,31 @@ class DateFacet(Facet): facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.setting("facet_time_limit_ms"), + custom_time_limit=self.ds.config("facet_time_limit_ms"), ) facet_results_values = [] - facet_results.append( - { - "name": column, - "type": self.type, - "results": facet_results_values, - "hideable": source != "metadata", - "toggle_url": path_with_removed_args( - self.request, {"_facet_date": column} - ), - "truncated": len(facet_rows_results) > facet_size, - } - ) + facet_results[column] = { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": path_with_removed_args( + self.request, {"_facet_date": column} + ), + "truncated": len(facet_rows_results) > facet_size, + } facet_rows = facet_rows_results.rows[:facet_size] for row in facet_rows: - selected = str(args.get(f"{column}__date")) == str(row["value"]) + selected = str(args.get("{}__date".format(column))) == str( + row["value"] + ) if selected: toggle_path = path_with_removed_args( - self.request, {f"{column}__date": str(row["value"])} + self.request, {"{}__date".format(column): str(row["value"])} ) else: toggle_path = path_with_added_args( - self.request, {f"{column}__date": row["value"]} + self.request, {"{}__date".format(column): row["value"]} ) facet_results_values.append( { diff --git a/datasette/filters.py b/datasette/filters.py index 95cc5f37..efe014ae 100644 --- a/datasette/filters.py +++ b/datasette/filters.py @@ -1,173 +1,7 @@ -from datasette import hookimpl -from datasette.resources import DatabaseResource -from datasette.views.base import DatasetteError -from datasette.utils.asgi import BadRequest import json -from .utils import detect_json1, escape_sqlite, path_with_removed_args +import numbers - -@hookimpl(specname="filters_from_request") -def where_filters(request, database, datasette): - # This one deals with ?_where= - async def inner(): - where_clauses = [] - extra_wheres_for_ui = [] - if "_where" in request.args: - if not await datasette.allowed( - action="execute-sql", - resource=DatabaseResource(database=database), - actor=request.actor, - ): - raise DatasetteError("_where= is not allowed", status=403) - else: - where_clauses.extend(request.args.getlist("_where")) - extra_wheres_for_ui = [ - { - "text": text, - "remove_url": path_with_removed_args(request, {"_where": text}), - } - for text in request.args.getlist("_where") - ] - - return FilterArguments( - where_clauses, - extra_context={ - "extra_wheres_for_ui": extra_wheres_for_ui, - }, - ) - - return inner - - -@hookimpl(specname="filters_from_request") -def search_filters(request, database, table, datasette): - # ?_search= and _search_colname= - async def inner(): - where_clauses = [] - params = {} - human_descriptions = [] - extra_context = {} - - # Figure out which fts_table to use - table_metadata = await datasette.table_config(database, table) - db = datasette.get_database(database) - fts_table = request.args.get("_fts_table") - fts_table = fts_table or table_metadata.get("fts_table") - fts_table = fts_table or await db.fts_table(table) - fts_pk = request.args.get("_fts_pk", table_metadata.get("fts_pk", "rowid")) - search_args = { - key: request.args[key] - for key in request.args - if key.startswith("_search") and key != "_searchmode" - } - search = "" - search_mode_raw = table_metadata.get("searchmode") == "raw" - # Or set search mode from the querystring - qs_searchmode = request.args.get("_searchmode") - if qs_searchmode == "escaped": - search_mode_raw = False - if qs_searchmode == "raw": - search_mode_raw = True - - extra_context["supports_search"] = bool(fts_table) - - if fts_table and search_args: - if "_search" in search_args: - # Simple ?_search=xxx - search = search_args["_search"] - where_clauses.append( - "{fts_pk} in (select rowid from {fts_table} where {fts_table} match {match_clause})".format( - fts_table=escape_sqlite(fts_table), - fts_pk=escape_sqlite(fts_pk), - match_clause=( - ":search" if search_mode_raw else "escape_fts(:search)" - ), - ) - ) - human_descriptions.append(f'search matches "{search}"') - params["search"] = search - extra_context["search"] = search - else: - # More complex: search against specific columns - for i, (key, search_text) in enumerate(search_args.items()): - search_col = key.split("_search_", 1)[1] - if search_col not in await db.table_columns(fts_table): - raise BadRequest("Cannot search by that column") - - where_clauses.append( - "rowid in (select rowid from {fts_table} where {search_col} match {match_clause})".format( - fts_table=escape_sqlite(fts_table), - search_col=escape_sqlite(search_col), - match_clause=( - ":search_{}".format(i) - if search_mode_raw - else "escape_fts(:search_{})".format(i) - ), - ) - ) - human_descriptions.append( - f'search column "{search_col}" matches "{search_text}"' - ) - params[f"search_{i}"] = search_text - extra_context["search"] = search_text - - return FilterArguments(where_clauses, params, human_descriptions, extra_context) - - return inner - - -@hookimpl(specname="filters_from_request") -def through_filters(request, database, table, datasette): - # ?_search= and _search_colname= - async def inner(): - where_clauses = [] - params = {} - human_descriptions = [] - extra_context = {} - - # Support for ?_through={table, column, value} - if "_through" in request.args: - for through in request.args.getlist("_through"): - through_data = json.loads(through) - through_table = through_data["table"] - other_column = through_data["column"] - value = through_data["value"] - db = datasette.get_database(database) - outgoing_foreign_keys = await db.foreign_keys_for_table(through_table) - try: - fk_to_us = [ - fk for fk in outgoing_foreign_keys if fk["other_table"] == table - ][0] - except IndexError: - raise DatasetteError( - "Invalid _through - could not find corresponding foreign key" - ) - param = f"p{len(params)}" - where_clauses.append( - "{our_pk} in (select {our_column} from {through_table} where {other_column} = :{param})".format( - through_table=escape_sqlite(through_table), - our_pk=escape_sqlite(fk_to_us["other_column"]), - our_column=escape_sqlite(fk_to_us["column"]), - other_column=escape_sqlite(other_column), - param=param, - ) - ) - params[param] = value - human_descriptions.append(f'{through_table}.{other_column} = "{value}"') - - return FilterArguments(where_clauses, params, human_descriptions, extra_context) - - return inner - - -class FilterArguments: - def __init__( - self, where_clauses, params=None, human_descriptions=None, extra_context=None - ): - self.where_clauses = where_clauses - self.params = params or {} - self.human_descriptions = human_descriptions or [] - self.extra_context = extra_context or {} +from .utils import detect_json1, escape_sqlite class Filter: @@ -209,7 +43,7 @@ class TemplatedFilter(Filter): kwargs = {"c": column} converted = None else: - kwargs = {"c": column, "p": f"p{param_counter}", "t": table} + kwargs = {"c": column, "p": "p{}".format(param_counter), "t": table} return self.sql_template.format(**kwargs), converted def human_clause(self, column, value): @@ -235,26 +69,12 @@ class InFilter(Filter): def where_clause(self, table, column, value, param_counter): values = self.split_value(value) - params = [f":p{param_counter + i}" for i in range(len(values))] - sql = f"{escape_sqlite(column)} in ({', '.join(params)})" + params = [":p{}".format(param_counter + i) for i in range(len(values))] + sql = "{} in ({})".format(escape_sqlite(column), ", ".join(params)) return sql, values def human_clause(self, column, value): - return f"{column} in {json.dumps(self.split_value(value))}" - - -class NotInFilter(InFilter): - key = "notin" - display = "not in" - - def where_clause(self, table, column, value, param_counter): - values = self.split_value(value) - params = [f":p{param_counter + i}" for i in range(len(values))] - sql = f"{escape_sqlite(column)} not in ({', '.join(params)})" - return sql, values - - def human_clause(self, column, value): - return f"{column} not in {json.dumps(self.split_value(value))}" + return "{} in {}".format(column, json.dumps(self.split_value(value))) class Filters: @@ -280,13 +100,6 @@ class Filters: '{c} contains "{v}"', format="%{}%", ), - TemplatedFilter( - "notcontains", - "does not contain", - '"{c}" not like :{p}', - '{c} does not contain "{v}"', - format="%{}%", - ), TemplatedFilter( "endswith", "ends with", @@ -310,27 +123,20 @@ class Filters: "lte", "\u2264", '"{c}" <= :{p}', "{c} \u2264 {v}", numeric=True ), TemplatedFilter("like", "like", '"{c}" like :{p}', '{c} like "{v}"'), - TemplatedFilter( - "notlike", "not like", '"{c}" not like :{p}', '{c} not like "{v}"' - ), TemplatedFilter("glob", "glob", '"{c}" glob :{p}', '{c} glob "{v}"'), InFilter(), - NotInFilter(), ] + ( [ TemplatedFilter( "arraycontains", "array contains", - """:{p} in (select value from json_each([{t}].[{c}]))""", + """rowid in ( + select {t}.rowid from {t}, json_each({t}.{c}) j + where j.value = :{p} + )""", '{c} contains "{v}"', - ), - TemplatedFilter( - "arraynotcontains", - "array does not contain", - """:{p} not in (select value from json_each([{t}].[{c}]))""", - '{c} does not contain "{v}"', - ), + ) ] if detect_json1() else [] @@ -367,11 +173,13 @@ class Filters: ) _filters_by_key = {f.key: f for f in _filters} - def __init__(self, pairs): + def __init__(self, pairs, units={}, ureg=None): self.pairs = pairs + self.units = units + self.ureg = ureg def lookups(self): - """Yields (lookup, display, no_argument) pairs""" + "Yields (lookup, display, no_argument) pairs" for filter in self._filters: yield filter.key, filter.display, filter.no_argument @@ -393,10 +201,10 @@ class Filters: s = " and ".join(and_bits) if not s: return "" - return f"where {s}" + return "where {}".format(s) def selections(self): - """Yields (column, lookup, value) tuples""" + "Yields (column, lookup, value) tuples" for key, value in self.pairs: if "__" in key: column, lookup = key.rsplit("__", 1) @@ -408,6 +216,20 @@ class Filters: def has_selections(self): return bool(self.pairs) + def convert_unit(self, column, value): + "If the user has provided a unit in the query, convert it into the column unit, if present." + if column not in self.units: + return value + + # Try to interpret the value as a unit + value = self.ureg(value) + if isinstance(value, numbers.Number): + # It's just a bare number, assume it's the column unit + return value + + column_unit = self.ureg(self.units[column]) + return value.to(column_unit).magnitude + def build_where_clauses(self, table): sql_bits = [] params = {} @@ -415,13 +237,15 @@ class Filters: for column, lookup, value in self.selections(): filter = self._filters_by_key.get(lookup, None) if filter: - sql_bit, param = filter.where_clause(table, column, value, i) + sql_bit, param = filter.where_clause( + table, column, self.convert_unit(column, value), i + ) sql_bits.append(sql_bit) if param is not None: if not isinstance(param, list): param = [param] for individual_param in param: - param_id = f"p{i}" + param_id = "p{}".format(i) params[param_id] = individual_param i += 1 return sql_bits, params diff --git a/datasette/forbidden.py b/datasette/forbidden.py deleted file mode 100644 index 41c48396..00000000 --- a/datasette/forbidden.py +++ /dev/null @@ -1,19 +0,0 @@ -from datasette import hookimpl, Response - - -@hookimpl(trylast=True) -def forbidden(datasette, request, message): - async def inner(): - return Response.html( - await datasette.render_template( - "error.html", - { - "title": "Forbidden", - "error": message, - }, - request=request, - ), - status=403, - ) - - return inner diff --git a/datasette/handle_exception.py b/datasette/handle_exception.py deleted file mode 100644 index 96398a4c..00000000 --- a/datasette/handle_exception.py +++ /dev/null @@ -1,77 +0,0 @@ -from datasette import hookimpl, Response -from .utils import add_cors_headers -from .utils.asgi import ( - Base400, -) -from .views.base import DatasetteError -from markupsafe import Markup -import traceback - -try: - import ipdb as pdb -except ImportError: - import pdb - -try: - import rich -except ImportError: - rich = None - - -@hookimpl(trylast=True) -def handle_exception(datasette, request, exception): - async def inner(): - if datasette.pdb: - pdb.post_mortem(exception.__traceback__) - - if rich is not None: - rich.get_console().print_exception(show_locals=True) - - title = None - if isinstance(exception, Base400): - status = exception.status - info = {} - message = exception.args[0] - elif isinstance(exception, DatasetteError): - status = exception.status - info = exception.error_dict - message = exception.message - if exception.message_is_html: - message = Markup(message) - title = exception.title - else: - status = 500 - info = {} - message = str(exception) - traceback.print_exc() - templates = [f"{status}.html", "error.html"] - info.update( - { - "ok": False, - "error": message, - "status": status, - "title": title, - } - ) - headers = {} - if datasette.cors: - add_cors_headers(headers) - if request.path.split("?")[0].endswith(".json"): - return Response.json(info, status=status, headers=headers) - else: - environment = datasette.get_jinja_environment(request) - template = environment.select_template(templates) - return Response.html( - await template.render_async( - dict( - info, - urls=datasette.urls, - app_css_hash=datasette.app_css_hash(), - menu_links=lambda: [], - ) - ), - status=status, - headers=headers, - ) - - return inner diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 3f6a1425..780b7732 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -5,218 +5,66 @@ hookspec = HookspecMarker("datasette") hookimpl = HookimplMarker("datasette") -@hookspec -def startup(datasette): - """Fires directly after Datasette first starts running""" - - @hookspec def asgi_wrapper(datasette): - """Returns an ASGI middleware callable to wrap our ASGI application with""" + "Returns an ASGI middleware callable to wrap our ASGI application with" @hookspec -def prepare_connection(conn, database, datasette): - """Modify SQLite connection in some way e.g. register custom SQL functions""" +def prepare_connection(conn): + "Modify SQLite connection in some way e.g. register custom SQL functions" @hookspec -def prepare_jinja2_environment(env, datasette): - """Modify Jinja2 template environment e.g. register custom template tags""" +def prepare_jinja2_environment(env): + "Modify Jinja2 template environment e.g. register custom template tags" @hookspec -def extra_css_urls(template, database, table, columns, view_name, request, datasette): - """Extra CSS URLs added by this plugin""" +def extra_css_urls(template, database, table, datasette): + "Extra CSS URLs added by this plugin" @hookspec -def extra_js_urls(template, database, table, columns, view_name, request, datasette): - """Extra JavaScript URLs added by this plugin""" +def extra_js_urls(template, database, table, datasette): + "Extra JavaScript URLs added by this plugin" @hookspec -def extra_body_script( - template, database, table, columns, view_name, request, datasette -): - """Extra JavaScript code to be included in diff --git a/datasette/templates/_codemirror.html b/datasette/templates/_codemirror.html index c4629aeb..237d6907 100644 --- a/datasette/templates/_codemirror.html +++ b/datasette/templates/_codemirror.html @@ -1,16 +1,7 @@ - - + + + diff --git a/datasette/templates/_codemirror_foot.html b/datasette/templates/_codemirror_foot.html index a624c8a4..4b55bf8d 100644 --- a/datasette/templates/_codemirror_foot.html +++ b/datasette/templates/_codemirror_foot.html @@ -1,42 +1,13 @@ diff --git a/datasette/templates/_crumbs.html b/datasette/templates/_crumbs.html deleted file mode 100644 index bd1ff0da..00000000 --- a/datasette/templates/_crumbs.html +++ /dev/null @@ -1,15 +0,0 @@ -{% macro nav(request, database=None, table=None) -%} -{% if crumb_items is defined %} - {% set items=crumb_items(request=request, database=database, table=table) %} - {% if items %} -

- {% for item in items %} - {{ item.label }} - {% if not loop.last %} - / - {% endif %} - {% endfor %} -

- {% endif %} -{% endif %} -{%- endmacro %} diff --git a/datasette/templates/_debug_common_functions.html b/datasette/templates/_debug_common_functions.html deleted file mode 100644 index d988a2f3..00000000 --- a/datasette/templates/_debug_common_functions.html +++ /dev/null @@ -1,50 +0,0 @@ - diff --git a/datasette/templates/_description_source_license.html b/datasette/templates/_description_source_license.html index f852268f..3327706e 100644 --- a/datasette/templates/_description_source_license.html +++ b/datasette/templates/_description_source_license.html @@ -1,6 +1,6 @@ -{% if metadata.get("description_html") or metadata.get("description") %} +{% if metadata.description_html or metadata.description %}
- - - {% for column in display_columns %} - + {% endfor %} + + + + {% for row in display_rows %} + + {% for cell in row %} + + {% endfor %} + + {% endfor %} + +
- {% if not column.sortable %} - {{ column.name }} + + + + {% for column in display_columns %} + - {% endfor %} - - - - {% for row in display_rows %} - - {% for cell in row %} - - {% endfor %} - - {% endfor %} - -
+ {% if not column.sortable %} + {{ column.name }} + {% else %} + {% if column.name == sort %} + {{ column.name }} ▼ {% else %} - {% if column.name == sort %} - {{ column.name }} ▼ - {% else %} - {{ column.name }}{% if column.name == sort_desc %} ▲{% endif %} - {% endif %} + {{ column.name }}{% if column.name == sort_desc %} ▲{% endif %} {% endif %} -
{{ cell.value }}
- -{% else %} -

0 records

-{% endif %} + {% endif %} +
{{ cell.value }}
diff --git a/datasette/templates/allow_debug.html b/datasette/templates/allow_debug.html deleted file mode 100644 index 1ecc92df..00000000 --- a/datasette/templates/allow_debug.html +++ /dev/null @@ -1,61 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Debug allow rules{% endblock %} - -{% block extra_head %} - -{% endblock %} - -{% block content %} - -

Debug allow rules

- -{% set current_tab = "allow_debug" %} -{% include "_permissions_debug_tabs.html" %} - -

Use this tool to try out different actor and allow combinations. See Defining permissions with "allow" blocks for documentation.

- -
-
-

- -
-
-

- -
-
- -
-
- -{% if error %}

{{ error }}

{% endif %} - -{% if result == "True" %}

Result: allow

{% endif %} - -{% if result == "False" %}

Result: deny

{% endif %} - -{% endblock %} diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html deleted file mode 100644 index dc393c20..00000000 --- a/datasette/templates/api_explorer.html +++ /dev/null @@ -1,208 +0,0 @@ -{% extends "base.html" %} - -{% block title %}API Explorer{% endblock %} - -{% block extra_head %} - -{% endblock %} - -{% block content %} - -

API Explorer{% if private %} 🔒{% endif %}

- -

Use this tool to try out the - {% if datasette_version %} - Datasette API. - {% else %} - Datasette API. - {% endif %} -

-
- GET -
-
- - - -
-
-
-
- POST -
-
- - -
-
- - -
-

-
-
- - - - - -{% if example_links %} -

API endpoints

-
    - {% for database in example_links %} -
  • Database: {{ database.name }}
  • -
      - {% for link in database.links %} -
    • {{ link.path }} - {{ link.label }}
    • - {% endfor %} - {% for table in database.tables %} -
    • {{ table.name }} -
        - {% for link in table.links %} -
      • {{ link.path }} - {{ link.label }}
      • - {% endfor %} -
      -
    • - {% endfor %} -
    - {% endfor %} -
-{% endif %} - -{% endblock %} diff --git a/datasette/templates/base.html b/datasette/templates/base.html index 0d89e11c..d26043f8 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -1,78 +1,32 @@ -{% import "_crumbs.html" as crumbs with context %} - + + {% block title %}{% endblock %} - + {% for url in extra_css_urls %} - + {% endfor %} - - {% for url in extra_js_urls %} - + {% endfor %} -{%- if alternate_url_json -%} - -{%- endif -%} -{%- block extra_head %}{% endblock -%} +{% block extra_head %}{% endblock %} - -
{% block footer %}{% include "_footer.html" %}{% endblock %}
-{% include "_close_open_menus.html" %} +
{% block footer %}{% include "_footer.html" %}{% endblock %}
{% for body_script in body_scripts %} - {{ body_script.script }} + {% endfor %} {% if select_templates %}{% endif %} - - diff --git a/datasette/templates/create_token.html b/datasette/templates/create_token.html deleted file mode 100644 index ad7c71b6..00000000 --- a/datasette/templates/create_token.html +++ /dev/null @@ -1,124 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Create an API token{% endblock %} - -{% block extra_head %} - -{% endblock %} - -{% block content %} - -

Create an API token

- -

This token will allow API access with the same abilities as your current user, {{ request.actor.id }}

- -{% if token %} -
-

Your API token

-
- - -
- -
- Token details -
{{ token_bits|tojson(4) }}
-
-
-

Create another token

-{% endif %} - -{% if errors %} - {% for error in errors %} -

{{ error }}

- {% endfor %} -{% endif %} - -
-
-
- -
- - - - -
- Restrict actions that can be performed using this token -

All databases and tables

-
    - {% for permission in all_actions %} -
  • - {% endfor %} -
- - {% for database in database_with_tables %} -

All tables in "{{ database.name }}"

-
    - {% for permission in database_actions %} -
  • - {% endfor %} -
- {% endfor %} -

Specific tables

- {% for database in database_with_tables %} - {% for table in database.tables %} -

{{ database.name }}: {{ table.name }}

-
    - {% for permission in child_actions %} -
  • - {% endfor %} -
- {% endfor %} - {% endfor %} -
- - -
- - - -{% endblock %} diff --git a/datasette/templates/csrf_error.html b/datasette/templates/csrf_error.html deleted file mode 100644 index 7cd4b42b..00000000 --- a/datasette/templates/csrf_error.html +++ /dev/null @@ -1,13 +0,0 @@ -{% extends "base.html" %} -{% block title %}CSRF check failed){% endblock %} -{% block content %} -

Form origin check failed

- -

Your request's origin could not be validated. Please return to the form and submit it again.

- -
Technical details -

Developers: consult Datasette's CSRF protection documentation.

-

Error code is {{ message_name }}.

-
- -{% endblock %} diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 42b4ca0b..a934f336 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -3,87 +3,67 @@ {% block title %}{{ database }}{% endblock %} {% block extra_head %} -{{- super() -}} +{{ super() }} {% include "_codemirror.html" %} {% endblock %} {% block body_class %}db db-{{ database|to_css_class }}{% endblock %} -{% block crumbs %} -{{ crumbs.nav(request=request, database=database) }} +{% block nav %} +

+ home +

+ {{ super() }} {% endblock %} {% block content %} - -{% set action_links, action_title = database_actions(), "Database actions" %} -{% include "_action_menu.html" %} -{{ top_database() }} +

{{ metadata.title or database }}

{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} -{% if allow_execute_sql %} -
+{% if config.allow_sql %} +

Custom SQL query

-

- - -

+

{% endif %} -{% if attached_databases %} -
-

The following databases are attached to this connection, and can be used for cross-database joins:

-
    - {% for db_name in attached_databases %} -
  • {{ db_name }} - tables
  • - {% endfor %} -
-
-{% endif %} - -{% if queries %} -

Queries

- -{% endif %} - -{% if tables %} -

Tables schema

-{% endif %} - {% for table in tables %} {% if show_hidden or not table.hidden %}
-

{{ table.name }}{% if table.private %} 🔒{% endif %}{% if table.hidden %} (hidden){% endif %}

-

{% for column in table.columns %}{{ column }}{% if not loop.last %}, {% endif %}{% endfor %}

-

{% if table.count is none %}Many rows{% elif table.count == count_limit + 1 %}>{{ "{:,}".format(count_limit) }} rows{% else %}{{ "{:,}".format(table.count) }} row{% if table.count == 1 %}{% else %}s{% endif %}{% endif %}

+

{{ table.name }}{% if table.hidden %} (hidden){% endif %}

+

{% for column in table.columns[:9] %}{{ column }}{% if not loop.last %}, {% endif %}{% endfor %}{% if table.columns|length > 9 %}...{% endif %}

+

{% if table.count is none %}Many rows{% else %}{{ "{:,}".format(table.count) }} row{% if table.count == 1 %}{% else %}s{% endif %}{% endif %}

{% endif %} {% endfor %} {% if hidden_count and not show_hidden %} -

... and {{ "{:,}".format(hidden_count) }} hidden table{% if hidden_count == 1 %}{% else %}s{% endif %}

+

... and {{ "{:,}".format(hidden_count) }} hidden table{% if hidden_count == 1 %}{% else %}s{% endif %}

{% endif %} {% if views %} -

Views

-
    +

    Views

    + +{% endif %} + +{% if queries %} +

    Queries

    + {% endif %} {% if allow_download %} -

    Download SQLite DB: {{ database }}.db {{ format_bytes(size) }}

    +

    Download SQLite DB: {{ database }}.db {{ format_bytes(size) }}

    {% endif %} {% include "_codemirror_foot.html" %} diff --git a/datasette/templates/debug_actions.html b/datasette/templates/debug_actions.html deleted file mode 100644 index 0ef7b329..00000000 --- a/datasette/templates/debug_actions.html +++ /dev/null @@ -1,43 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Registered Actions{% endblock %} - -{% block content %} -

    Registered actions

    - -{% set current_tab = "actions" %} -{% include "_permissions_debug_tabs.html" %} - -

    - This Datasette instance has registered {{ data|length }} action{{ data|length != 1 and "s" or "" }}. - Actions are used by the permission system to control access to different features. -

    - - - - - - - - - - - - - - - {% for action in data %} - - - - - - - - - - {% endfor %} - -
    NameAbbrDescriptionResourceTakes ParentTakes ChildAlso Requires
    {{ action.name }}{% if action.abbr %}{{ action.abbr }}{% endif %}{{ action.description or "" }}{% if action.resource_class %}{{ action.resource_class }}{% endif %}{% if action.takes_parent %}✓{% endif %}{% if action.takes_child %}✓{% endif %}{% if action.also_requires %}{{ action.also_requires }}{% endif %}
    - -{% endblock %} diff --git a/datasette/templates/debug_allowed.html b/datasette/templates/debug_allowed.html deleted file mode 100644 index add3154a..00000000 --- a/datasette/templates/debug_allowed.html +++ /dev/null @@ -1,229 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Allowed Resources{% endblock %} - -{% block extra_head %} - -{% include "_permission_ui_styles.html" %} -{% include "_debug_common_functions.html" %} -{% endblock %} - -{% block content %} -

    Allowed resources

    - -{% set current_tab = "allowed" %} -{% include "_permissions_debug_tabs.html" %} - -

    Use this tool to check which resources the current actor is allowed to access for a given permission action. It queries the /-/allowed.json API endpoint.

    - -{% if request.actor %} -

    Current actor: {{ request.actor.get("id", "anonymous") }}

    -{% else %} -

    Current actor: anonymous (not logged in)

    -{% endif %} - -
    -
    -
    - - - Only certain actions are supported by this endpoint -
    - -
    - - - Filter results to a specific parent resource -
    - -
    - - - Filter results to a specific child resource (requires parent to be set) -
    - -
    - - - Number of results per page (max 200) -
    - -
    - -
    -
    -
    - - - - - -{% endblock %} diff --git a/datasette/templates/debug_check.html b/datasette/templates/debug_check.html deleted file mode 100644 index c2e7997f..00000000 --- a/datasette/templates/debug_check.html +++ /dev/null @@ -1,270 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Permission Check{% endblock %} - -{% block extra_head %} - -{% include "_permission_ui_styles.html" %} -{% include "_debug_common_functions.html" %} - -{% endblock %} - -{% block content %} -

    Permission check

    - -{% set current_tab = "check" %} -{% include "_permissions_debug_tabs.html" %} - -

    Use this tool to test permission checks for the current actor. It queries the /-/check.json API endpoint.

    - -{% if request.actor %} -

    Current actor: {{ request.actor.get("id", "anonymous") }}

    -{% else %} -

    Current actor: anonymous (not logged in)

    -{% endif %} - -
    -
    -
    - - - The permission action to check -
    - -
    - - - For database-level permissions, specify the database name -
    - -
    - - - For table-level permissions, specify the table name (requires parent) -
    - -
    - -
    -
    -
    - - - - - -{% endblock %} diff --git a/datasette/templates/debug_permissions_playground.html b/datasette/templates/debug_permissions_playground.html deleted file mode 100644 index 91ce1fcf..00000000 --- a/datasette/templates/debug_permissions_playground.html +++ /dev/null @@ -1,166 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Debug permissions{% endblock %} - -{% block extra_head %} -{% include "_permission_ui_styles.html" %} - -{% endblock %} - -{% block content %} -

    Permission playground

    - -{% set current_tab = "permissions" %} -{% include "_permissions_debug_tabs.html" %} - -

    This tool lets you simulate an actor and a permission check for that actor.

    - -
    -
    - -
    -
    - - -
    -
    -
    -
    - - -
    -
    - - -
    -
    - - -
    -
    -
    - -
    -
    
    -    
    -
    - - - -

    Recent permissions checks

    - -

    - {% if filter != "all" %}All{% else %}All{% endif %}, - {% if filter != "exclude-yours" %}Exclude yours{% else %}Exclude yours{% endif %}, - {% if filter != "only-yours" %}Only yours{% else %}Only yours{% endif %} -

    - -{% if permission_checks %} - - - - - - - - - - - - - {% for check in permission_checks %} - - - - - - - - - {% endfor %} - -
    WhenActionParentChildActorResult
    {{ check.when.split('T', 1)[0] }}
    {{ check.when.split('T', 1)[1].split('+', 1)[0].split('-', 1)[0].split('Z', 1)[0] }}
    {{ check.action }}{{ check.parent or '—' }}{{ check.child or '—' }}{% if check.actor %}{{ check.actor|tojson }}{% else %}anonymous{% endif %}{% if check.result %}Allowed{% elif check.result is none %}No opinion{% else %}Denied{% endif %}
    -{% else %} -

    No permission checks have been recorded yet.

    -{% endif %} - -{% endblock %} diff --git a/datasette/templates/debug_rules.html b/datasette/templates/debug_rules.html deleted file mode 100644 index 9a290803..00000000 --- a/datasette/templates/debug_rules.html +++ /dev/null @@ -1,203 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Permission Rules{% endblock %} - -{% block extra_head %} - -{% include "_permission_ui_styles.html" %} -{% include "_debug_common_functions.html" %} -{% endblock %} - -{% block content %} -

    Permission rules

    - -{% set current_tab = "rules" %} -{% include "_permissions_debug_tabs.html" %} - -

    Use this tool to view the permission rules that allow the current actor to access resources for a given permission action. It queries the /-/rules.json API endpoint.

    - -{% if request.actor %} -

    Current actor: {{ request.actor.get("id", "anonymous") }}

    -{% else %} -

    Current actor: anonymous (not logged in)

    -{% endif %} - -
    -
    -
    - - - The permission action to check -
    - -
    - - - Number of results per page (max 200) -
    - -
    - -
    -
    -
    - - - - - -{% endblock %} diff --git a/datasette/templates/index.html b/datasette/templates/index.html index 03349279..69a34808 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -2,26 +2,18 @@ {% block title %}{{ metadata.title or "Datasette" }}: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %} -{% block extra_head %} -{% if noindex %}{% endif %} -{% endblock %} - {% block body_class %}index{% endblock %} {% block content %} -

    {{ metadata.title or "Datasette" }}{% if private %} 🔒{% endif %}

    - -{% set action_links, action_title = homepage_actions, "Homepage actions" %} -{% include "_action_menu.html" %} - -{{ top_homepage() }} +

    {{ metadata.title or "Datasette" }}

    {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} {% for database in databases %} -

    {{ database.name }}{% if database.private %} 🔒{% endif %}

    +

    {{ database.name }}

    + {% if database.comment %}

    {{ database.comment }}

    {% endif %}

    - {% if database.show_table_row_counts %}{{ "{:,}".format(database.table_rows_sum) }} rows in {% endif %}{{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.hidden_tables_count %}, {% endif -%} + {% if database.show_table_row_counts %}{{ "{:,}".format(database.table_rows_sum) }} rows in {% endif %}{{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.tables_count and database.hidden_tables_count %}, {% endif -%} {% if database.hidden_tables_count -%} {% if database.show_table_row_counts %}{{ "{:,}".format(database.hidden_table_rows_sum) }} rows in {% endif %}{{ database.hidden_tables_count }} hidden table{% if database.hidden_tables_count != 1 %}s{% endif -%} {% endif -%} @@ -30,7 +22,8 @@ {{ "{:,}".format(database.views_count) }} view{% if database.views_count != 1 %}s{% endif %} {% endif %}

    -

    {% for table in database.tables_and_views_truncated %}{{ table.name }}{% if table.private %} 🔒{% endif %}{% if not loop.last %}, {% endif %}{% endfor %}{% if database.tables_and_views_more %}, ...{% endif %}

    +

    {% for table in database.tables_and_views_truncated %}{{ table.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% if database.tables_and_views_more %}, ...{% endif %}

    {% endfor %} {% endblock %} diff --git a/datasette/templates/logout.html b/datasette/templates/logout.html deleted file mode 100644 index c8fc642a..00000000 --- a/datasette/templates/logout.html +++ /dev/null @@ -1,18 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Log out{% endblock %} - -{% block content %} - -

    Log out

    - -

    You are logged in as {{ display_actor(actor) }}

    - -
    -
    - - -
    -
    - -{% endblock %} diff --git a/datasette/templates/messages_debug.html b/datasette/templates/messages_debug.html deleted file mode 100644 index 2940cd69..00000000 --- a/datasette/templates/messages_debug.html +++ /dev/null @@ -1,27 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Debug messages{% endblock %} - -{% block content %} - -

    Debug messages

    - -

    Set a message:

    - -
    -
    - -
    - -
    - - -
    -
    - -{% endblock %} diff --git a/datasette/templates/patterns.html b/datasette/templates/patterns.html deleted file mode 100644 index 7770f7d4..00000000 --- a/datasette/templates/patterns.html +++ /dev/null @@ -1,507 +0,0 @@ - - - - Datasette: Pattern Portfolio - - - - - - - -
    - -
    -

    Pattern Portfolio

    -
    - - - - -

    Header for /database/table/row and Messages

    - -
    - -
    - -

    Example message

    -

    Example message

    -

    Example message

    - -

    .bd for /

    -
    -

    Datasette Fixtures

    - -

    - Data license: - Apache License 2.0 - · - Data source: - - tests/fixtures.py - · - About: - - About Datasette -

    -

    fixtures

    -

    - 1,258 rows in 24 tables, 206 rows in 5 hidden tables, 4 views -

    -

    compound_three_primary_keys, sortable, facetable, roadside_attraction_characteristics, simple_primary_key, ...

    -

    data

    -

    - 6 rows in 2 tables -

    -

    names, foo

    -
    - -

    .bd for /database

    -
    - -
    - -
    - - -

    - Data license: - Apache License 2.0 - · - Data source: - - tests/fixtures.py - · - About: - - About Datasette -

    -
    -

    Custom SQL query

    -

    -

    - - -

    -
    -
    -

    123_starts_with_digits

    -

    content

    -

    0 rows

    -
    -
    -

    Table With Space In Name

    -

    pk, content

    -

    0 rows

    -
    -
    -

    attraction_characteristic

    -

    pk, name

    -

    2 rows

    -
    -
    - -

    .bd for /database/table

    - -
    - -
    - -
    - -

    - Data license: - Apache License 2.0 - · - Data source: - - tests/fixtures.py - · - About: - - About Datasette -

    -

    3 rows - where characteristic_id = 2 -

    -
    -
    -
    -
    - -
    -
    - -
    -
    -
    -
    - -
    -
    - -
    -
    -
    -
    - -
    - - -
    -
    - -
    -

    2 extra where clauses

    - -
    - -

    View and edit SQL

    - - - -

    - Suggested facets: tags, created (date), tags (array) -

    - -
    - -
    -

    - tags (array) - - - -

    - -
    - -
    -

    - created - - - -

    - -
    - -
    -

    - city_id - - - -

    - -
    - -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - Link - - rowid ▼ - - attraction_id - - characteristic_id -
    1The Mystery Spot 1Paranormal 2
    2Winchester Mystery House 2Paranormal 2
    3Bigfoot Discovery Museum 4Paranormal 2
    -
    -

    Advanced export

    -

    JSON shape: - default, - array, - newline-delimited -

    -
    -

    - CSV options: - - - - - -

    -
    -
    -
    CREATE TABLE roadside_attraction_characteristics (
    -    attraction_id INTEGER REFERENCES roadside_attractions(pk),
    -    characteristic_id INTEGER REFERENCES attraction_characteristic(pk)
    -);
    -
    - -

    .bd for /database/table/row

    -
    -

    roadside_attractions: 2

    -

    This data as json

    - - - - - - - - - - - - - - - - - - - -
    - pk - - name - - address - - latitude - - longitude -
    2Winchester Mystery House525 South Winchester Boulevard, San Jose, CA 9512837.3184-121.9511
    -

    Links from other tables

    -
      -
    • - - 1 row - from attraction_id in roadside_attraction_characteristics -
    • -
    -
    - -

    .ft

    - - - -{% include "_close_open_menus.html" %} - - - diff --git a/datasette/templates/query.html b/datasette/templates/query.html index a6e9a3aa..7c6c59f3 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -3,7 +3,7 @@ {% block title %}{{ database }}{% if query and query.sql %}: {{ query.sql }}{% endif %}{% endblock %} {% block extra_head %} -{{- super() -}} +{{ super() }} {% if columns %} {% endblock %} {% block body_class %}table db-{{ database|to_css_class }} table-{{ table|to_css_class }}{% endblock %} -{% block crumbs %} -{{ crumbs.nav(request=request, database=database, table=table) }} +{% block nav %} +

    + home / + {{ database }} +

    + {{ super() }} {% endblock %} {% block content %} - -{% set action_links, action_title = actions(), "View actions" if is_view else "Table actions" %} -{% include "_action_menu.html" %} -{{ top_table() }} +

    {{ metadata.title or table }}{% if is_view %} (view){% endif %}

    {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} -{% if metadata.get("columns") %} -
    - {% for column_name, column_description in metadata.columns.items() %} -
    {{ column_name }}
    {{ column_description }}
    - {% endfor %} -
    -{% endif %} - -{% if count or human_description_en %} -

    - {% if count == count_limit + 1 %}>{{ "{:,}".format(count_limit) }} rows - {% if allow_execute_sql and query.sql %} count all{% endif %} - {% elif count or count == 0 %}{{ "{:,}".format(count) }} row{% if count == 1 %}{% else %}s{% endif %}{% endif %} +{% if filtered_table_rows_count or human_description_en %} +

    {% if filtered_table_rows_count or filtered_table_rows_count == 0 %}{{ "{:,}".format(filtered_table_rows_count) }} row{% if filtered_table_rows_count == 1 %}{% else %}s{% endif %}{% endif %} {% if human_description_en %}{{ human_description_en }}{% endif %}

    {% endif %} -
    + {% if supports_search %}
    {% endif %} @@ -81,7 +69,7 @@
    @@ -100,6 +88,9 @@ {% endif %} + {% for facet in sorted_facet_results %} + + {% endfor %} {% for key, value in form_hidden_args %} {% endfor %} @@ -118,14 +109,16 @@ {% endif %} -{% if query.sql and allow_execute_sql %} -

    View and edit SQL

    +{% if query.sql and config.allow_sql %} +

    View and edit SQL

    {% endif %} {% if suggested_facets %} - {% include "_suggested_facets.html" %} +

    + Suggested facets: {% for facet in suggested_facets %}{{ facet.name }}{% if facet.type %} ({{ facet.type }}){% endif %}{% if not loop.last %}, {% endif %}{% endfor %} +

    {% endif %} {% if facets_timed_out %} @@ -133,7 +126,30 @@ {% endif %} {% if facet_results %} - {% include "_facet_results.html" %} +
    + {% for facet_info in sorted_facet_results %} +
    +

    + {{ facet_info.name }}{% if facet_info.type != "column" %} ({{ facet_info.type }}){% endif %} + {% if facet_info.hideable %} + + {% endif %} +

    +
      + {% for facet_value in facet_info.results %} + {% if not facet_value.selected %} +
    • {{ (facet_value.label if facet_value.label is not none else "_") }} {{ "{:,}".format(facet_value.count) }}
    • + {% else %} +
    • {{ facet_value.label }} · {{ "{:,}".format(facet_value.count) }}
    • + {% endif %} + {% endfor %} + {% if facet_info.truncated %} +
    • ...
    • + {% endif %} +
    +
    + {% endfor %} +
    {% endif %} {% include custom_table_templates %} @@ -152,12 +168,12 @@ object {% endif %}

    - +

    CSV options: {% if expandable_columns %}{% endif %} - {% if next_url and settings.allow_csv_stream %}{% endif %} + {% if next_url and config.allow_csv_stream %}{% endif %} {% for key, value in url_csv_hidden_args %} @@ -175,41 +191,4 @@

    {{ view_definition }}
    {% endif %} -{% if allow_execute_sql and query.sql %} - -{% endif %} - {% endblock %} diff --git a/datasette/tracer.py b/datasette/tracer.py index 9e66613b..e46a6fda 100644 --- a/datasette/tracer.py +++ b/datasette/tracer.py @@ -1,7 +1,5 @@ import asyncio from contextlib import contextmanager -from contextvars import ContextVar -from markupsafe import escape import time import json import traceback @@ -10,59 +8,40 @@ tracers = {} TRACE_RESERVED_KEYS = {"type", "start", "end", "duration_ms", "traceback"} -trace_task_id = ContextVar("trace_task_id", default=None) - def get_task_id(): - current = trace_task_id.get(None) - if current is not None: - return current try: loop = asyncio.get_event_loop() except RuntimeError: return None - return id(asyncio.current_task(loop=loop)) + return id(asyncio.Task.current_task(loop=loop)) @contextmanager -def trace_child_tasks(): - token = trace_task_id.set(get_task_id()) - yield - trace_task_id.reset(token) - - -@contextmanager -def trace(trace_type, **kwargs): +def trace(type, **kwargs): assert not TRACE_RESERVED_KEYS.intersection( kwargs.keys() - ), f".trace() keyword parameters cannot include {TRACE_RESERVED_KEYS}" + ), ".trace() keyword parameters cannot include {}".format(TRACE_RESERVED_KEYS) task_id = get_task_id() if task_id is None: - yield kwargs + yield return tracer = tracers.get(task_id) if tracer is None: - yield kwargs + yield return - start = time.perf_counter() - captured_error = None - try: - yield kwargs - except Exception as ex: - captured_error = ex - raise - finally: - end = time.perf_counter() - trace_info = { - "type": trace_type, - "start": start, - "end": end, - "duration_ms": (end - start) * 1000, - "traceback": traceback.format_list(traceback.extract_stack(limit=6)[:-3]), - "error": str(captured_error) if captured_error else None, - } - trace_info.update(kwargs) - tracer.append(trace_info) + start = time.time() + yield + end = time.time() + trace_info = { + "type": type, + "start": start, + "end": end, + "duration_ms": (end - start) * 1000, + "traceback": traceback.format_list(traceback.extract_stack(limit=6)[:-3]), + } + trace_info.update(kwargs) + tracer.append(trace_info) @contextmanager @@ -88,7 +67,7 @@ class AsgiTracer: if b"_trace=1" not in scope.get("query_string", b"").split(b"&"): await self.app(scope, receive, send) return - trace_start = time.perf_counter() + trace_start = time.time() traces = [] accumulated_body = b"" @@ -97,7 +76,6 @@ class AsgiTracer: async def wrapped_send(message): nonlocal accumulated_body, size_limit_exceeded, response_headers - if message["type"] == "http.response.start": response_headers = message["headers"] await send(message) @@ -110,12 +88,11 @@ class AsgiTracer: # Accumulate body until the end or until size is exceeded accumulated_body += message["body"] if len(accumulated_body) > self.max_body_bytes: - # Send what we have accumulated so far await send( { "type": "http.response.body", "body": accumulated_body, - "more_body": bool(message.get("more_body")), + "more_body": True, } ) size_limit_exceeded = True @@ -125,7 +102,7 @@ class AsgiTracer: # We have all the body - modify it and send the result # TODO: What to do about Content-Type or other cases? trace_info = { - "request_duration_ms": 1000 * (time.perf_counter() - trace_start), + "request_duration_ms": 1000 * (time.time() - trace_start), "sum_trace_duration_ms": sum(t["duration_ms"] for t in traces), "num_traces": len(traces), "traces": traces, @@ -139,8 +116,8 @@ class AsgiTracer: except IndexError: content_type = "" if "text/html" in content_type and b"" in accumulated_body: - extra = escape(json.dumps(trace_info, indent=2)) - extra_html = f"
    {extra}
    ".encode("utf8") + extra = json.dumps(trace_info, indent=2) + extra_html = "
    {}
    ".format(extra).encode("utf8") accumulated_body = accumulated_body.replace(b"", extra_html) elif "json" in content_type and accumulated_body.startswith(b"{"): data = json.loads(accumulated_body.decode("utf8")) diff --git a/datasette/url_builder.py b/datasette/url_builder.py deleted file mode 100644 index 16b3d42b..00000000 --- a/datasette/url_builder.py +++ /dev/null @@ -1,61 +0,0 @@ -from .utils import tilde_encode, path_with_format, PrefixedUrlString -import urllib - - -class Urls: - def __init__(self, ds): - self.ds = ds - - def path(self, path, format=None): - if not isinstance(path, PrefixedUrlString): - if path.startswith("/"): - path = path[1:] - path = self.ds.setting("base_url") + path - if format is not None: - path = path_with_format(path=path, format=format) - return PrefixedUrlString(path) - - def instance(self, format=None): - return self.path("", format=format) - - def static(self, path): - return self.path(f"-/static/{path}") - - def static_plugins(self, plugin, path): - return self.path(f"-/static-plugins/{plugin}/{path}") - - def logout(self): - return self.path("-/logout") - - def database(self, database, format=None): - db = self.ds.get_database(database) - return self.path(tilde_encode(db.route), format=format) - - def database_query(self, database, sql, format=None): - path = f"{self.database(database)}/-/query?" + urllib.parse.urlencode( - {"sql": sql} - ) - return self.path(path, format=format) - - def table(self, database, table, format=None): - path = f"{self.database(database)}/{tilde_encode(table)}" - if format is not None: - path = path_with_format(path=path, format=format) - return PrefixedUrlString(path) - - def query(self, database, query, format=None): - path = f"{self.database(database)}/{tilde_encode(query)}" - if format is not None: - path = path_with_format(path=path, format=format) - return PrefixedUrlString(path) - - def row(self, database, table, row_path, format=None): - path = f"{self.table(database, table)}/{row_path}" - if format is not None: - path = path_with_format(path=path, format=format) - return PrefixedUrlString(path) - - def row_blob(self, database, table, row_path, column): - return self.table(database, table) + "/{}.blob?_blob_column={}".format( - row_path, urllib.parse.quote_plus(column) - ) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index ac2c74da..87147baf 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -1,85 +1,24 @@ -import asyncio from contextlib import contextmanager -import aiofiles -import click -from collections import OrderedDict, namedtuple, Counter -import copy -import dataclasses +from collections import OrderedDict import base64 +import click import hashlib -import inspect import json -import markupsafe -import mergedeep import os +import pkg_resources import re import shlex import tempfile -import typing import time import types -import secrets import shutil -from typing import Iterable, List, Tuple import urllib -import yaml -from .shutil_backport import copytree -from .sqlite import sqlite3, supports_table_xinfo - -if typing.TYPE_CHECKING: - from datasette.database import Database - from datasette.permissions import Resource - - -@dataclasses.dataclass -class PaginatedResources: - """Paginated results from allowed_resources query.""" - - resources: List["Resource"] - next: str | None # Keyset token for next page (None if no more results) - _datasette: typing.Any = dataclasses.field(default=None, repr=False) - _action: str = dataclasses.field(default=None, repr=False) - _actor: typing.Any = dataclasses.field(default=None, repr=False) - _parent: str | None = dataclasses.field(default=None, repr=False) - _include_is_private: bool = dataclasses.field(default=False, repr=False) - _include_reasons: bool = dataclasses.field(default=False, repr=False) - _limit: int = dataclasses.field(default=100, repr=False) - - async def all(self): - """ - Async generator that yields all resources across all pages. - - Automatically handles pagination under the hood. This is useful when you need - to iterate through all results without manually managing pagination tokens. - - Yields: - Resource objects one at a time - - Example: - page = await datasette.allowed_resources("view-table", actor) - async for table in page.all(): - print(f"{table.parent}/{table.child}") - """ - # Yield all resources from current page - for resource in self.resources: - yield resource - - # Continue fetching subsequent pages if there are more - next_token = self.next - while next_token: - page = await self._datasette.allowed_resources( - self._action, - self._actor, - parent=self._parent, - include_is_private=self._include_is_private, - include_reasons=self._include_reasons, - limit=self._limit, - next=next_token, - ) - for resource in page.resources: - yield resource - next_token = page.next +import numbers +try: + import pysqlite3 as sqlite3 +except ImportError: + import sqlite3 # From https://www.sqlite.org/lang_keywords.html reserved_words = set( @@ -99,85 +38,43 @@ reserved_words = set( ).split() ) -APT_GET_DOCKERFILE_EXTRAS = r""" +SPATIALITE_DOCKERFILE_EXTRAS = r""" RUN apt-get update && \ - apt-get install -y {} && \ + apt-get install -y python3-dev gcc libsqlite3-mod-spatialite && \ rm -rf /var/lib/apt/lists/* +ENV SQLITE_EXTENSIONS /usr/lib/x86_64-linux-gnu/mod_spatialite.so """ -# Can replace with sqlite-utils when I add that dependency -SPATIALITE_PATHS = ( - "/usr/lib/x86_64-linux-gnu/mod_spatialite.so", - "/usr/local/lib/mod_spatialite.dylib", - "/usr/local/lib/mod_spatialite.so", - "/opt/homebrew/lib/mod_spatialite.dylib", -) -# Used to display /-/versions.json SpatiaLite information -SPATIALITE_FUNCTIONS = ( - "spatialite_version", - "spatialite_target_cpu", - "check_strict_sql_quoting", - "freexl_version", - "proj_version", - "geos_version", - "rttopo_version", - "libxml2_version", - "HasIconv", - "HasMathSQL", - "HasGeoCallbacks", - "HasProj", - "HasProj6", - "HasGeos", - "HasGeosAdvanced", - "HasGeosTrunk", - "HasGeosReentrant", - "HasGeosOnlyReentrant", - "HasMiniZip", - "HasRtTopo", - "HasLibXML2", - "HasEpsg", - "HasFreeXL", - "HasGeoPackage", - "HasGCP", - "HasTopology", - "HasKNN", - "HasRouting", -) -# Length of hash subset used in hashed URLs: -HASH_LENGTH = 7 + +class QueryInterrupted(Exception): + pass -# Can replace this with Column from sqlite_utils when I add that dependency -Column = namedtuple( - "Column", ("cid", "name", "type", "notnull", "default_value", "is_pk", "hidden") -) +class Results: + def __init__(self, rows, truncated, description): + self.rows = rows + self.truncated = truncated + self.description = description -functions_marked_as_documented = [] + @property + def columns(self): + return [d[0] for d in self.description] + def __iter__(self): + return iter(self.rows) -def documented(fn): - functions_marked_as_documented.append(fn) - return fn - - -@documented -async def await_me_maybe(value: typing.Any) -> typing.Any: - "If value is callable, call it. If awaitable, await it. Otherwise return it." - if callable(value): - value = value() - if asyncio.iscoroutine(value): - value = await value - return value + def __len__(self): + return len(self.rows) def urlsafe_components(token): - """Splits token on commas and tilde-decodes each component""" - return [tilde_decode(b) for b in token.split(",")] + "Splits token on commas and URL decodes each component" + return [urllib.parse.unquote_plus(b) for b in token.split(",")] def path_from_row_pks(row, pks, use_rowid, quote=True): - """Generate an optionally tilde-encoded unique identifier - for a row from its primary keys.""" + """ Generate an optionally URL-quoted unique identifier + for a row from its primary keys.""" if use_rowid: bits = [row["rowid"]] else: @@ -185,7 +82,7 @@ def path_from_row_pks(row, pks, use_rowid, quote=True): row[pk]["value"] if isinstance(row[pk], dict) else row[pk] for pk in pks ] if quote: - bits = [tilde_encode(str(bit)) for bit in bits] + bits = [urllib.parse.quote_plus(str(bit)) for bit in bits] else: bits = [str(bit) for bit in bits] @@ -209,10 +106,13 @@ def compound_keys_after_sql(pks, start_index=0): last = pks_left[-1] rest = pks_left[:-1] and_clauses = [ - f"{escape_sqlite(pk)} = :p{i + start_index}" for i, pk in enumerate(rest) + "{} = :p{}".format(escape_sqlite(pk), (i + start_index)) + for i, pk in enumerate(rest) ] - and_clauses.append(f"{escape_sqlite(last)} > :p{len(rest) + start_index}") - or_clauses.append(f"({' and '.join(and_clauses)})") + and_clauses.append( + "{} > :p{}".format(escape_sqlite(last), (len(rest) + start_index)) + ) + or_clauses.append("({})".format(" and ".join(and_clauses))) pks_left.pop() or_clauses.reverse() return "({})".format("\n or\n".join(or_clauses)) @@ -238,18 +138,17 @@ class CustomJSONEncoder(json.JSONEncoder): @contextmanager def sqlite_timelimit(conn, ms): - deadline = time.perf_counter() + (ms / 1000) + deadline = time.time() + (ms / 1000) # n is the number of SQLite virtual machine instructions that will be - # executed between each check. It takes about 0.08ms to execute 1000. - # https://github.com/simonw/datasette/issues/1679 + # executed between each check. It's hard to know what to pick here. + # After some experimentation, I've decided to go with 1000 by default and + # 1 for time limits that are less than 50ms n = 1000 - if ms <= 20: - # This mainly happens while executing our test suite + if ms < 50: n = 1 def handler(): - if time.perf_counter() >= deadline: - # Returning 1 terminates the query with an error + if time.time() >= deadline: return 1 conn.set_progress_handler(handler, n) @@ -263,59 +162,16 @@ class InvalidSql(Exception): pass -# Allow SQL to start with a /* */ or -- comment -comment_re = ( - # Start of string, then any amount of whitespace - r"^\s*(" - + - # Comment that starts with -- and ends at a newline - r"(?:\-\-.*?\n\s*)" - + - # Comment that starts with /* and ends with */ - but does not have */ in it - r"|(?:\/\*((?!\*\/)[\s\S])*\*\/)" - + - # Whitespace - r"\s*)*\s*" -) - allowed_sql_res = [ - re.compile(comment_re + r"select\b"), - re.compile(comment_re + r"explain\s+select\b"), - re.compile(comment_re + r"explain\s+query\s+plan\s+select\b"), - re.compile(comment_re + r"with\b"), - re.compile(comment_re + r"explain\s+with\b"), - re.compile(comment_re + r"explain\s+query\s+plan\s+with\b"), -] - -allowed_pragmas = ( - "database_list", - "foreign_key_list", - "function_list", - "index_info", - "index_list", - "index_xinfo", - "page_count", - "max_page_count", - "page_size", - "schema_version", - "table_info", - "table_xinfo", - "table_list", -) -disallawed_sql_res = [ - ( - re.compile(f"pragma(?!_({'|'.join(allowed_pragmas)}))"), - "Statement contained a disallowed PRAGMA. Allowed pragma functions are {}".format( - ", ".join("pragma_{}()".format(pragma) for pragma in allowed_pragmas) - ), - ) + re.compile(r"^select\b"), + re.compile(r"^explain select\b"), + re.compile(r"^explain query plan select\b"), + re.compile(r"^with\b"), ] +disallawed_sql_res = [(re.compile("pragma"), "Statement may not contain PRAGMA")] def validate_sql_select(sql): - sql = "\n".join( - line for line in sql.split("\n") if not line.strip().startswith("--") - ) sql = sql.strip().lower() if not any(r.match(sql) for r in allowed_sql_res): raise InvalidSql("Statement must be a SELECT") @@ -326,7 +182,7 @@ def validate_sql_select(sql): def append_querystring(url, querystring): op = "&" if ("?" in url) else "?" - return f"{url}{op}{querystring}" + return "{}{}{}".format(url, op, querystring) def path_with_added_args(request, args, path=None): @@ -341,7 +197,7 @@ def path_with_added_args(request, args, path=None): current.extend([(key, value) for key, value in args if value is not None]) query_string = urllib.parse.urlencode(current) if query_string: - query_string = f"?{query_string}" + query_string = "?{}".format(query_string) return path + query_string @@ -370,7 +226,7 @@ def path_with_removed_args(request, args, path=None): current.append((key, value)) query_string = urllib.parse.urlencode(current) if query_string: - query_string = f"?{query_string}" + query_string = "?{}".format(query_string) return path + query_string @@ -386,7 +242,7 @@ def path_with_replaced_args(request, args, path=None): current.extend([p for p in args if p[1] is not None]) query_string = urllib.parse.urlencode(current) if query_string: - query_string = f"?{query_string}" + query_string = "?{}".format(query_string) return path + query_string @@ -395,17 +251,14 @@ _boring_keyword_re = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$") def escape_css_string(s): - return _css_re.sub( - lambda m: "\\" + (f"{ord(m.group()):X}".zfill(6)), - s.replace("\r\n", "\n"), - ) + return _css_re.sub(lambda m: "\\{:X}".format(ord(m.group())), s) def escape_sqlite(s): if _boring_keyword_re.match(s) and (s.lower() not in reserved_words): return s else: - return f"[{s}]" + return "[{}]".format(s) def make_dockerfile( @@ -419,77 +272,58 @@ def make_dockerfile( install, spatialite, version_note, - secret, environment_variables=None, - port=8001, - apt_get_extras=None, ): cmd = ["datasette", "serve", "--host", "0.0.0.0"] - environment_variables = environment_variables or {} - environment_variables["DATASETTE_SECRET"] = secret - apt_get_extras = apt_get_extras or [] for filename in files: cmd.extend(["-i", filename]) cmd.extend(["--cors", "--inspect-file", "inspect-data.json"]) if metadata_file: - cmd.extend(["--metadata", f"{metadata_file}"]) + cmd.extend(["--metadata", "{}".format(metadata_file)]) if template_dir: cmd.extend(["--template-dir", "templates/"]) if plugins_dir: cmd.extend(["--plugins-dir", "plugins/"]) if version_note: - cmd.extend(["--version-note", f"{version_note}"]) + cmd.extend(["--version-note", "{}".format(version_note)]) if static: for mount_point, _ in static: - cmd.extend(["--static", f"{mount_point}:{mount_point}"]) + cmd.extend(["--static", "{}:{}".format(mount_point, mount_point)]) if extra_options: for opt in extra_options.split(): - cmd.append(f"{opt}") + cmd.append("{}".format(opt)) cmd = [shlex.quote(part) for part in cmd] # port attribute is a (fixed) env variable and should not be quoted cmd.extend(["--port", "$PORT"]) cmd = " ".join(cmd) if branch: - install = [f"https://github.com/simonw/datasette/archive/{branch}.zip"] + list( - install - ) + install = [ + "https://github.com/simonw/datasette/archive/{}.zip".format(branch) + ] + list(install) else: install = ["datasette"] + list(install) - apt_get_extras_ = [] - apt_get_extras_.extend(apt_get_extras) - apt_get_extras = apt_get_extras_ - if spatialite: - apt_get_extras.extend(["python3-dev", "gcc", "libsqlite3-mod-spatialite"]) - environment_variables["SQLITE_EXTENSIONS"] = ( - "/usr/lib/x86_64-linux-gnu/mod_spatialite.so" - ) return """ -FROM python:3.11.0-slim-bullseye +FROM python:3.6 COPY . /app WORKDIR /app -{apt_get_extras} +{spatialite_extras} {environment_variables} RUN pip install -U {install_from} RUN datasette inspect {files} --inspect-file inspect-data.json -ENV PORT {port} -EXPOSE {port} +ENV PORT 8001 +EXPOSE 8001 CMD {cmd}""".format( - apt_get_extras=( - APT_GET_DOCKERFILE_EXTRAS.format(" ".join(apt_get_extras)) - if apt_get_extras - else "" - ), environment_variables="\n".join( [ "ENV {} '{}'".format(key, value) - for key, value in environment_variables.items() + for key, value in (environment_variables or {}).items() ] ), - install_from=" ".join(install), files=" ".join(files), - port=port, cmd=cmd, + install_from=" ".join(install), + spatialite_extras=SPATIALITE_DOCKERFILE_EXTRAS if spatialite else "", ).strip() @@ -506,11 +340,8 @@ def temporary_docker_directory( install, spatialite, version_note, - secret, extra_metadata=None, environment_variables=None, - port=8001, - apt_get_extras=None, ): extra_metadata = extra_metadata or {} tmp = tempfile.TemporaryDirectory() @@ -521,14 +352,12 @@ def temporary_docker_directory( file_paths = [os.path.join(saved_cwd, file_path) for file_path in files] file_names = [os.path.split(f)[-1] for f in files] if metadata: - metadata_content = parse_metadata(metadata.read()) + metadata_content = json.load(metadata) else: metadata_content = {} - # Merge in the non-null values in extra_metadata - mergedeep.merge( - metadata_content, - {key: value for key, value in extra_metadata.items() if value is not None}, - ) + for key, value in extra_metadata.items(): + if value: + metadata_content[key] = value try: dockerfile = make_dockerfile( file_names, @@ -541,17 +370,12 @@ def temporary_docker_directory( install, spatialite, version_note, - secret, environment_variables, - port=port, - apt_get_extras=apt_get_extras, ) os.chdir(datasette_dir) if metadata_content: - with open("metadata.json", "w") as fp: - fp.write(json.dumps(metadata_content, indent=2)) - with open("Dockerfile", "w") as fp: - fp.write(dockerfile) + open("metadata.json", "w").write(json.dumps(metadata_content, indent=2)) + open("Dockerfile", "w").write(dockerfile) for path, filename in zip(file_paths, file_names): link_or_copy(path, os.path.join(datasette_dir, filename)) if template_dir: @@ -575,39 +399,26 @@ def temporary_docker_directory( def detect_primary_keys(conn, table): - """Figure out primary keys for a table.""" - columns = table_column_details(conn, table) - pks = [column for column in columns if column.is_pk] - pks.sort(key=lambda column: column.is_pk) - return [column.name for column in pks] + " Figure out primary keys for a table. " + table_info_rows = [ + row + for row in conn.execute('PRAGMA table_info("{}")'.format(table)).fetchall() + if row[-1] + ] + table_info_rows.sort(key=lambda row: row[-1]) + return [str(r[1]) for r in table_info_rows] def get_outbound_foreign_keys(conn, table): - infos = conn.execute(f"PRAGMA foreign_key_list([{table}])").fetchall() + infos = conn.execute("PRAGMA foreign_key_list([{}])".format(table)).fetchall() fks = [] for info in infos: if info is not None: id, seq, table_name, from_, to_, on_update, on_delete, match = info fks.append( - { - "column": from_, - "other_table": table_name, - "other_column": to_, - "id": id, - "seq": seq, - } + {"other_table": table_name, "column": from_, "other_column": to_} ) - # Filter out compound foreign keys by removing any where "id" is not unique - id_counts = Counter(fk["id"] for fk in fks) - return [ - { - "column": fk["column"], - "other_table": fk["other_table"], - "other_column": fk["other_column"], - } - for fk in fks - if id_counts[fk["id"]] == 1 - ] + return fks def get_all_foreign_keys(conn): @@ -618,21 +429,20 @@ def get_all_foreign_keys(conn): for table in tables: table_to_foreign_keys[table] = {"incoming": [], "outgoing": []} for table in tables: - fks = get_outbound_foreign_keys(conn, table) - for fk in fks: - table_name = fk["other_table"] - from_ = fk["column"] - to_ = fk["other_column"] - if table_name not in table_to_foreign_keys: - # Weird edge case where something refers to a table that does - # not actually exist - continue - table_to_foreign_keys[table_name]["incoming"].append( - {"other_table": table, "column": to_, "other_column": from_} - ) - table_to_foreign_keys[table]["outgoing"].append( - {"other_table": table_name, "column": from_, "other_column": to_} - ) + infos = conn.execute("PRAGMA foreign_key_list([{}])".format(table)).fetchall() + for info in infos: + if info is not None: + id, seq, table_name, from_, to_, on_update, on_delete, match = info + if table_name not in table_to_foreign_keys: + # Weird edge case where something refers to a table that does + # not actually exist + continue + table_to_foreign_keys[table_name]["incoming"].append( + {"other_table": table, "column": to_, "other_column": from_} + ) + table_to_foreign_keys[table]["outgoing"].append( + {"other_table": table_name, "column": from_, "other_column": to_} + ) return table_to_foreign_keys @@ -645,7 +455,7 @@ def detect_spatialite(conn): def detect_fts(conn, table): - """Detect if table has a corresponding FTS virtual table and return it""" + "Detect if table has a corresponding FTS virtual table and return it" rows = conn.execute(detect_fts_sql(table)).fetchall() if len(rows) == 0: return None @@ -659,14 +469,13 @@ def detect_fts_sql(table): where rootpage = 0 and ( sql like '%VIRTUAL TABLE%USING FTS%content="{table}"%' - or sql like '%VIRTUAL TABLE%USING FTS%content=[{table}]%' or ( tbl_name = "{table}" and sql like '%VIRTUAL TABLE%USING FTS%' ) ) """.format( - table=table.replace("'", "''") + table=table ) @@ -681,26 +490,12 @@ def detect_json1(conn=None): def table_columns(conn, table): - return [column.name for column in table_column_details(conn, table)] - - -def table_column_details(conn, table): - if supports_table_xinfo(): - # table_xinfo was added in 3.26.0 - return [ - Column(*r) - for r in conn.execute( - f"PRAGMA table_xinfo({escape_sqlite(table)});" - ).fetchall() - ] - else: - # Treat hidden as 0 for all columns - return [ - Column(*(list(r) + [0])) - for r in conn.execute( - f"PRAGMA table_info({escape_sqlite(table)});" - ).fetchall() - ] + return [ + r[1] + for r in conn.execute( + "PRAGMA table_info({});".format(escape_sqlite(table)) + ).fetchall() + ] filter_column_re = re.compile(r"^_filter_column_\d+$") @@ -715,7 +510,9 @@ def filters_should_redirect(special_args): if "__" in filter_op: filter_op, filter_value = filter_op.split("__", 1) if filter_column: - redirect_params.append((f"{filter_column}__{filter_op}", filter_value)) + redirect_params.append( + ("{}__{}".format(filter_column, filter_op), filter_value) + ) for key in ("_filter_column", "_filter_op", "_filter_value"): if key in special_args: redirect_params.append((key, None)) @@ -724,17 +521,17 @@ def filters_should_redirect(special_args): for column_key in column_keys: number = column_key.split("_")[-1] column = special_args[column_key] - op = special_args.get(f"_filter_op_{number}") or "exact" - value = special_args.get(f"_filter_value_{number}") or "" + op = special_args.get("_filter_op_{}".format(number)) or "exact" + value = special_args.get("_filter_value_{}".format(number)) or "" if "__" in op: op, value = op.split("__", 1) if column: - redirect_params.append((f"{column}__{op}", value)) + redirect_params.append(("{}__{}".format(column, op), value)) redirect_params.extend( [ - (f"_filter_column_{number}", None), - (f"_filter_op_{number}", None), - (f"_filter_value_{number}", None), + ("_filter_column_{}".format(number), None), + ("_filter_op_{}".format(number), None), + ("_filter_value_{}".format(number), None), ] ) return redirect_params @@ -744,7 +541,7 @@ whitespace_re = re.compile(r"\s") def is_url(value): - """Must start with http:// or https:// and contain JUST a URL""" + "Must start with http:// or https:// and contain JUST a URL" if not isinstance(value, str): return False if not value.startswith("http://") and not value.startswith("https://"): @@ -770,7 +567,7 @@ def to_css_class(s): """ if css_class_re.match(s): return s - md5_suffix = md5_not_usedforsecurity(s)[:6] + md5_suffix = hashlib.md5(s.encode("utf8")).hexdigest()[:6] # Strip leading _, - s = s.lstrip("_").lstrip("-") # Replace any whitespace with hyphens @@ -794,9 +591,9 @@ def link_or_copy(src, dst): def link_or_copy_directory(src, dst): try: - copytree(src, dst, copy_function=os.link, dirs_exist_ok=True) + shutil.copytree(src, dst, copy_function=os.link) except OSError: - copytree(src, dst, dirs_exist_ok=True) + shutil.copytree(src, dst) def module_from_path(path, name): @@ -809,25 +606,65 @@ def module_from_path(path, name): return mod -def path_with_format( - *, request=None, path=None, format=None, extra_qs=None, replace_format=None -): +def get_plugins(pm): + plugins = [] + plugin_to_distinfo = dict(pm.list_plugin_distinfo()) + for plugin in pm.get_plugins(): + static_path = None + templates_path = None + try: + if pkg_resources.resource_isdir(plugin.__name__, "static"): + static_path = pkg_resources.resource_filename(plugin.__name__, "static") + if pkg_resources.resource_isdir(plugin.__name__, "templates"): + templates_path = pkg_resources.resource_filename( + plugin.__name__, "templates" + ) + except (KeyError, ImportError): + # Caused by --plugins_dir= plugins - KeyError/ImportError thrown in Py3.5 + pass + plugin_info = { + "name": plugin.__name__, + "static_path": static_path, + "templates_path": templates_path, + } + distinfo = plugin_to_distinfo.get(plugin) + if distinfo: + plugin_info["version"] = distinfo.version + plugins.append(plugin_info) + return plugins + + +async def resolve_table_and_format(table_and_format, table_exists, allowed_formats=[]): + if "." in table_and_format: + # Check if a table exists with this exact name + it_exists = await table_exists(table_and_format) + if it_exists: + return table_and_format, None + + # Check if table ends with a known format + formats = list(allowed_formats) + ["csv", "jsono"] + for _format in formats: + if table_and_format.endswith(".{}".format(_format)): + table = table_and_format[: -(len(_format) + 1)] + return table, _format + return table_and_format, None + + +def path_with_format(request, format, extra_qs=None): qs = extra_qs or {} - path = request.path if request else path - if replace_format and path.endswith(f".{replace_format}"): - path = path[: -(1 + len(replace_format))] - if "." in path: + path = request.path + if "." in request.path: qs["_format"] = format else: - path = f"{path}.{format}" + path = "{}.{}".format(path, format) if qs: extra = urllib.parse.urlencode(sorted(qs.items())) - if request and request.query_string: - path = f"{path}?{request.query_string}&{extra}" + if request.query_string: + path = "{}?{}&{}".format(path, request.query_string, extra) else: - path = f"{path}?{extra}" - elif request and request.query_string: - path = f"{path}?{request.query_string}" + path = "{}?{}".format(path, extra) + elif request.query_string: + path = "{}?{}".format(path, request.query_string) return path @@ -873,65 +710,38 @@ class LimitedWriter: async def write(self, bytes): self.bytes_count += len(bytes) if self.limit_bytes and (self.bytes_count > self.limit_bytes): - raise WriteLimitExceeded(f"CSV contains more than {self.limit_bytes} bytes") + raise WriteLimitExceeded( + "CSV contains more than {} bytes".format(self.limit_bytes) + ) await self.writer.write(bytes) -class EscapeHtmlWriter: - def __init__(self, writer): - self.writer = writer - - async def write(self, content): - await self.writer.write(markupsafe.escape(content)) - - _infinities = {float("inf"), float("-inf")} def remove_infinites(row): - to_check = row - if isinstance(row, dict): - to_check = row.values() - if not any((c in _infinities) if isinstance(c, float) else 0 for c in to_check): - return row - if isinstance(row, dict): - return { - k: (None if (isinstance(v, float) and v in _infinities) else v) - for k, v in row.items() - } - else: + if any((c in _infinities) if isinstance(c, float) else 0 for c in row): return [None if (isinstance(c, float) and c in _infinities) else c for c in row] + return row class StaticMount(click.ParamType): - name = "mount:directory" + name = "static mount" def convert(self, value, param, ctx): if ":" not in value: self.fail( - f'"{value}" should be of format mountpoint:directory', + '"{}" should be of format mountpoint:directory'.format(value), param, ctx, ) path, dirpath = value.split(":", 1) dirpath = os.path.abspath(dirpath) if not os.path.exists(dirpath) or not os.path.isdir(dirpath): - self.fail(f"{value} is not a valid directory path", param, ctx) + self.fail("%s is not a valid directory path" % value, param, ctx) return path, dirpath -# The --load-extension parameter can optionally include a specific entrypoint. -# This is done by appending ":entrypoint_name" after supplying the path to the extension -class LoadExtension(click.ParamType): - name = "path:entrypoint?" - - def convert(self, value, param, ctx): - if ":" not in value: - return value - path, entrypoint = value.split(":", 1) - return path, entrypoint - - def format_bytes(bytes): current = float(bytes) for unit in ("bytes", "KB", "MB", "GB", "TB"): @@ -939,577 +749,19 @@ def format_bytes(bytes): break current = current / 1024 if unit == "bytes": - return f"{int(current)} {unit}" + return "{} {}".format(int(current), unit) else: - return f"{current:.1f} {unit}" + return "{:.1f} {}".format(current, unit) -_escape_fts_re = re.compile(r'\s+|(".*?")') - - -def escape_fts(query): - # If query has unbalanced ", add one at end - if query.count('"') % 2: - query += '"' - bits = _escape_fts_re.split(query) - bits = [b for b in bits if b and b != '""'] - return " ".join( - '"{}"'.format(bit) if not bit.startswith('"') else bit for bit in bits - ) - - -class MultiParams: - def __init__(self, data): - # data is a dictionary of key => [list, of, values] or a list of [["key", "value"]] pairs - if isinstance(data, dict): - for key in data: - assert isinstance( - data[key], (list, tuple) - ), "dictionary data should be a dictionary of key => [list]" - self._data = data - elif isinstance(data, list) or isinstance(data, tuple): - new_data = {} - for item in data: - assert ( - isinstance(item, (list, tuple)) and len(item) == 2 - ), "list data should be a list of [key, value] pairs" - key, value = item - new_data.setdefault(key, []).append(value) - self._data = new_data - - def __repr__(self): - return f"" - - def __contains__(self, key): - return key in self._data - - def __getitem__(self, key): - return self._data[key][0] - - def keys(self): - return self._data.keys() - - def __iter__(self): - yield from self._data.keys() - - def __len__(self): - return len(self._data) - +class RequestParameters(dict): def get(self, name, default=None): - """Return first value in the list, if available""" + "Return first value in the list, if available" try: - return self._data.get(name)[0] + return super().get(name)[0] except (KeyError, TypeError): return default - def getlist(self, name): - """Return full list""" - return self._data.get(name) or [] - - -class ConnectionProblem(Exception): - pass - - -class SpatialiteConnectionProblem(ConnectionProblem): - pass - - -def check_connection(conn): - tables = [ - r[0] - for r in conn.execute( - "select name from sqlite_master where type='table'" - ).fetchall() - ] - for table in tables: - try: - conn.execute( - f"PRAGMA table_info({escape_sqlite(table)});", - ) - except sqlite3.OperationalError as e: - if e.args[0] == "no such module: VirtualSpatialIndex": - raise SpatialiteConnectionProblem(e) - else: - raise ConnectionProblem(e) - - -class BadMetadataError(Exception): - pass - - -@documented -def parse_metadata(content: str) -> dict: - "Detects if content is JSON or YAML and parses it appropriately." - # content can be JSON or YAML - try: - return json.loads(content) - except json.JSONDecodeError: - try: - return yaml.safe_load(content) - except yaml.YAMLError: - raise BadMetadataError("Metadata is not valid JSON or YAML") - - -def _gather_arguments(fn, kwargs): - parameters = inspect.signature(fn).parameters.keys() - call_with = [] - for parameter in parameters: - if parameter not in kwargs: - raise TypeError( - "{} requires parameters {}, missing: {}".format( - fn, tuple(parameters), set(parameters) - set(kwargs.keys()) - ) - ) - call_with.append(kwargs[parameter]) - return call_with - - -def call_with_supported_arguments(fn, **kwargs): - call_with = _gather_arguments(fn, kwargs) - return fn(*call_with) - - -async def async_call_with_supported_arguments(fn, **kwargs): - call_with = _gather_arguments(fn, kwargs) - return await fn(*call_with) - - -def actor_matches_allow(actor, allow): - if allow is True: - return True - if allow is False: - return False - if actor is None and allow and allow.get("unauthenticated") is True: - return True - if allow is None: - return True - actor = actor or {} - for key, values in allow.items(): - if values == "*" and key in actor: - return True - if not isinstance(values, list): - values = [values] - actor_values = actor.get(key) - if actor_values is None: - continue - if not isinstance(actor_values, list): - actor_values = [actor_values] - actor_values = set(actor_values) - if actor_values.intersection(values): - return True - return False - - -def resolve_env_secrets(config, environ): - """Create copy that recursively replaces {"$env": "NAME"} with values from environ""" - if isinstance(config, dict): - if list(config.keys()) == ["$env"]: - return environ.get(list(config.values())[0]) - elif list(config.keys()) == ["$file"]: - with open(list(config.values())[0]) as fp: - return fp.read() - else: - return { - key: resolve_env_secrets(value, environ) - for key, value in config.items() - } - elif isinstance(config, list): - return [resolve_env_secrets(value, environ) for value in config] - else: - return config - - -def display_actor(actor): - for key in ("display", "name", "username", "login", "id"): - if actor.get(key): - return actor[key] - return str(actor) - - -class SpatialiteNotFound(Exception): - pass - - -# Can replace with sqlite-utils when I add that dependency -def find_spatialite(): - for path in SPATIALITE_PATHS: - if os.path.exists(path): - return path - raise SpatialiteNotFound - - -async def initial_path_for_datasette(datasette): - """Return suggested path for opening this Datasette, based on number of DBs and tables""" - databases = dict([p for p in datasette.databases.items() if p[0] != "_internal"]) - if len(databases) == 1: - db_name = next(iter(databases.keys())) - path = datasette.urls.database(db_name) - # Does this DB only have one table? - db = next(iter(databases.values())) - tables = await db.table_names() - if len(tables) == 1: - path = datasette.urls.table(db_name, tables[0]) - else: - path = datasette.urls.instance() - return path - - -class PrefixedUrlString(str): - def __add__(self, other): - return type(self)(super().__add__(other)) - - def __str__(self): - return super().__str__() - - def __getattribute__(self, name): - if not name.startswith("__") and name in dir(str): - - def method(self, *args, **kwargs): - value = getattr(super(), name)(*args, **kwargs) - if isinstance(value, str): - return type(self)(value) - elif isinstance(value, list): - return [type(self)(i) for i in value] - elif isinstance(value, tuple): - return tuple(type(self)(i) for i in value) - else: - return value - - return method.__get__(self) - else: - return super().__getattribute__(name) - - -class StartupError(Exception): - pass - - -_single_line_comment_re = re.compile(r"--.*") -_multi_line_comment_re = re.compile(r"/\*.*?\*/", re.DOTALL) -_single_quote_re = re.compile(r"'(?:''|[^'])*'") -_double_quote_re = re.compile(r'"(?:\"\"|[^"])*"') -_named_param_re = re.compile(r":(\w+)") - - -@documented -def named_parameters(sql: str) -> List[str]: - """ - Given a SQL statement, return a list of named parameters that are used in the statement - - e.g. for ``select * from foo where id=:id`` this would return ``["id"]`` - """ - sql = _single_line_comment_re.sub("", sql) - sql = _multi_line_comment_re.sub("", sql) - sql = _single_quote_re.sub("", sql) - sql = _double_quote_re.sub("", sql) - # Extract parameters from what is left - return _named_param_re.findall(sql) - - -async def derive_named_parameters(db: "Database", sql: str) -> List[str]: - """ - This undocumented but stable method exists for backwards compatibility - with plugins that were using it before it switched to named_parameters() - """ - return named_parameters(sql) - - -def add_cors_headers(headers): - headers["Access-Control-Allow-Origin"] = "*" - headers["Access-Control-Allow-Headers"] = "Authorization, Content-Type" - headers["Access-Control-Expose-Headers"] = "Link" - headers["Access-Control-Allow-Methods"] = "GET, POST, HEAD, OPTIONS" - headers["Access-Control-Max-Age"] = "3600" - - -_TILDE_ENCODING_SAFE = frozenset( - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"abcdefghijklmnopqrstuvwxyz" - b"0123456789_-" - # This is the same as Python percent-encoding but I removed - # '.' and '~' -) - -_space = ord(" ") - - -class TildeEncoder(dict): - # Keeps a cache internally, via __missing__ - def __missing__(self, b): - # Handle a cache miss, store encoded string in cache and return. - if b in _TILDE_ENCODING_SAFE: - res = chr(b) - elif b == _space: - res = "+" - else: - res = "~{:02X}".format(b) - self[b] = res - return res - - -_tilde_encoder = TildeEncoder().__getitem__ - - -@documented -def tilde_encode(s: str) -> str: - "Returns tilde-encoded string - for example ``/foo/bar`` -> ``~2Ffoo~2Fbar``" - return "".join(_tilde_encoder(char) for char in s.encode("utf-8")) - - -@documented -def tilde_decode(s: str) -> str: - "Decodes a tilde-encoded string, so ``~2Ffoo~2Fbar`` -> ``/foo/bar``" - # Avoid accidentally decoding a %2f style sequence - temp = secrets.token_hex(16) - s = s.replace("%", temp) - decoded = urllib.parse.unquote_plus(s.replace("~", "%")) - return decoded.replace(temp, "%") - - -def resolve_routes(routes, path): - for regex, view in routes: - match = regex.match(path) - if match is not None: - return match, view - return None, None - - -def truncate_url(url, length): - if (not length) or (len(url) <= length): - return url - bits = url.rsplit(".", 1) - if len(bits) == 2 and 1 <= len(bits[1]) <= 4 and "/" not in bits[1]: - rest, ext = bits - return rest[: length - 1 - len(ext)] + "…." + ext - return url[: length - 1] + "…" - - -async def row_sql_params_pks(db, table, pk_values): - pks = await db.primary_keys(table) - use_rowid = not pks - select = "*" - if use_rowid: - select = "rowid, *" - pks = ["rowid"] - wheres = [f'"{pk}"=:p{i}' for i, pk in enumerate(pks)] - sql = f"select {select} from {escape_sqlite(table)} where {' AND '.join(wheres)}" - params = {} - for i, pk_value in enumerate(pk_values): - params[f"p{i}"] = pk_value - return sql, params, pks - - -def _handle_pair(key: str, value: str) -> dict: - """ - Turn a key-value pair into a nested dictionary. - foo, bar => {'foo': 'bar'} - foo.bar, baz => {'foo': {'bar': 'baz'}} - foo.bar, [1, 2, 3] => {'foo': {'bar': [1, 2, 3]}} - foo.bar, "baz" => {'foo': {'bar': 'baz'}} - foo.bar, '{"baz": "qux"}' => {'foo': {'bar': "{'baz': 'qux'}"}} - """ - try: - value = json.loads(value) - except json.JSONDecodeError: - # If it doesn't parse as JSON, treat it as a string - pass - - keys = key.split(".") - result = current_dict = {} - - for k in keys[:-1]: - current_dict[k] = {} - current_dict = current_dict[k] - - current_dict[keys[-1]] = value - return result - - -def _combine(base: dict, update: dict) -> dict: - """ - Recursively merge two dictionaries. - """ - for key, value in update.items(): - if isinstance(value, dict) and key in base and isinstance(base[key], dict): - base[key] = _combine(base[key], value) - else: - base[key] = value - return base - - -def pairs_to_nested_config(pairs: typing.List[typing.Tuple[str, typing.Any]]) -> dict: - """ - Parse a list of key-value pairs into a nested dictionary. - """ - result = {} - for key, value in pairs: - parsed_pair = _handle_pair(key, value) - result = _combine(result, parsed_pair) - return result - - -def make_slot_function(name, datasette, request, **kwargs): - from datasette.plugins import pm - - method = getattr(pm.hook, name, None) - assert method is not None, "No hook found for {}".format(name) - - async def inner(): - html_bits = [] - for hook in method(datasette=datasette, request=request, **kwargs): - html = await await_me_maybe(hook) - if html is not None: - html_bits.append(html) - return markupsafe.Markup("".join(html_bits)) - - return inner - - -def prune_empty_dicts(d: dict): - """ - Recursively prune all empty dictionaries from a given dictionary. - """ - for key, value in list(d.items()): - if isinstance(value, dict): - prune_empty_dicts(value) - if value == {}: - d.pop(key, None) - - -def move_plugins_and_allow(source: dict, destination: dict) -> Tuple[dict, dict]: - """ - Move 'plugins' and 'allow' keys from source to destination dictionary. Creates - hierarchy in destination if needed. After moving, recursively remove any keys - in the source that are left empty. - """ - source = copy.deepcopy(source) - destination = copy.deepcopy(destination) - - def recursive_move(src, dest, path=None): - if path is None: - path = [] - for key, value in list(src.items()): - new_path = path + [key] - if key in ("plugins", "allow"): - # Navigate and create the hierarchy in destination if needed - d = dest - for step in path: - d = d.setdefault(step, {}) - # Move the plugins - d[key] = value - # Remove the plugins from source - src.pop(key, None) - elif isinstance(value, dict): - recursive_move(value, dest, new_path) - # After moving, check if the current dictionary is empty and remove it if so - if not value: - src.pop(key, None) - - recursive_move(source, destination) - prune_empty_dicts(source) - return source, destination - - -_table_config_keys = ( - "hidden", - "sort", - "sort_desc", - "size", - "sortable_columns", - "label_column", - "facets", - "fts_table", - "fts_pk", - "searchmode", -) - - -def move_table_config(metadata: dict, config: dict): - """ - Move all known table configuration keys from metadata to config. - """ - if "databases" not in metadata: - return metadata, config - metadata = copy.deepcopy(metadata) - config = copy.deepcopy(config) - for database_name, database in metadata["databases"].items(): - if "tables" not in database: - continue - for table_name, table in database["tables"].items(): - for key in _table_config_keys: - if key in table: - config.setdefault("databases", {}).setdefault( - database_name, {} - ).setdefault("tables", {}).setdefault(table_name, {})[ - key - ] = table.pop( - key - ) - prune_empty_dicts(metadata) - return metadata, config - - -def redact_keys(original: dict, key_patterns: Iterable) -> dict: - """ - Recursively redact sensitive keys in a dictionary based on given patterns - - :param original: The original dictionary - :param key_patterns: A list of substring patterns to redact - :return: A copy of the original dictionary with sensitive values redacted - """ - - def redact(data): - if isinstance(data, dict): - return { - k: ( - redact(v) - if not any(pattern in k for pattern in key_patterns) - else "***" - ) - for k, v in data.items() - } - elif isinstance(data, list): - return [redact(item) for item in data] - else: - return data - - return redact(original) - - -def md5_not_usedforsecurity(s): - try: - return hashlib.md5(s.encode("utf8"), usedforsecurity=False).hexdigest() - except TypeError: - # For Python 3.8 which does not support usedforsecurity=False - return hashlib.md5(s.encode("utf8")).hexdigest() - - -_etag_cache = {} - - -async def calculate_etag(filepath, chunk_size=4096): - if filepath in _etag_cache: - return _etag_cache[filepath] - - hasher = hashlib.md5() - async with aiofiles.open(filepath, "rb") as f: - while True: - chunk = await f.read(chunk_size) - if not chunk: - break - hasher.update(chunk) - - etag = f'"{hasher.hexdigest()}"' - _etag_cache[filepath] = etag - - return etag - - -def deep_dict_update(dict1, dict2): - for key, value in dict2.items(): - if isinstance(value, dict): - dict1[key] = deep_dict_update(dict1.get(key, type(value)()), value) - else: - dict1[key] = value - return dict1 + def getlist(self, name, default=None): + "Return full list" + return super().get(name, default) diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py deleted file mode 100644 index 9c2add0e..00000000 --- a/datasette/utils/actions_sql.py +++ /dev/null @@ -1,587 +0,0 @@ -""" -SQL query builder for hierarchical permission checking. - -This module implements a cascading permission system based on the pattern -from https://github.com/simonw/research/tree/main/sqlite-permissions-poc - -It builds SQL queries that: - -1. Start with all resources of a given type (from resource_type.resources_sql()) -2. Gather permission rules from plugins (via permission_resources_sql hook) -3. Apply cascading logic: child → parent → global -4. Apply DENY-beats-ALLOW at each level - -The core pattern is: -- Resources are identified by (parent, child) tuples -- Rules are evaluated at three levels: - - child: exact match on (parent, child) - - parent: match on (parent, NULL) - - global: match on (NULL, NULL) -- At the same level, DENY (allow=0) beats ALLOW (allow=1) -- Across levels, child beats parent beats global -""" - -from typing import TYPE_CHECKING - -from datasette.utils.permissions import gather_permission_sql_from_hooks - -if TYPE_CHECKING: - from datasette.app import Datasette - - -async def build_allowed_resources_sql( - datasette: "Datasette", - actor: dict | None, - action: str, - *, - parent: str | None = None, - include_is_private: bool = False, -) -> tuple[str, dict]: - """ - Build a SQL query that returns all resources the actor can access for this action. - - Args: - datasette: The Datasette instance - actor: The actor dict (or None for unauthenticated) - action: The action name (e.g., "view-table", "view-database") - parent: Optional parent filter to limit results (e.g., database name) - include_is_private: If True, add is_private column showing if anonymous cannot access - - Returns: - A tuple of (sql_query, params_dict) - - The returned SQL query will have three columns (or four with include_is_private): - - parent: The parent resource identifier (or NULL) - - child: The child resource identifier (or NULL) - - reason: The reason from the rule that granted access - - is_private: (if include_is_private) 1 if anonymous cannot access, 0 otherwise - - Example: - For action="view-table", this might return: - SELECT parent, child, reason FROM ... WHERE is_allowed = 1 - - Results would be like: - ('analytics', 'users', 'role-based: analysts can access analytics DB') - ('analytics', 'events', 'role-based: analysts can access analytics DB') - ('production', 'orders', 'business-exception: allow production.orders for carol') - """ - # Get the Action object - action_obj = datasette.actions.get(action) - if not action_obj: - raise ValueError(f"Unknown action: {action}") - - # If this action also_requires another action, we need to combine the queries - if action_obj.also_requires: - # Build both queries - main_sql, main_params = await _build_single_action_sql( - datasette, - actor, - action, - parent=parent, - include_is_private=include_is_private, - ) - required_sql, required_params = await _build_single_action_sql( - datasette, - actor, - action_obj.also_requires, - parent=parent, - include_is_private=False, - ) - - # Merge parameters - they should have identical values for :actor, :actor_id, etc. - all_params = {**main_params, **required_params} - if parent is not None: - all_params["filter_parent"] = parent - - # Combine with INNER JOIN - only resources allowed by both actions - combined_sql = f""" -WITH -main_allowed AS ( -{main_sql} -), -required_allowed AS ( -{required_sql} -) -SELECT m.parent, m.child, m.reason""" - - if include_is_private: - combined_sql += ", m.is_private" - - combined_sql += """ -FROM main_allowed m -INNER JOIN required_allowed r - ON ((m.parent = r.parent) OR (m.parent IS NULL AND r.parent IS NULL)) - AND ((m.child = r.child) OR (m.child IS NULL AND r.child IS NULL)) -""" - - if parent is not None: - combined_sql += "WHERE m.parent = :filter_parent\n" - - combined_sql += "ORDER BY m.parent, m.child" - - return combined_sql, all_params - - # No also_requires, build single action query - return await _build_single_action_sql( - datasette, actor, action, parent=parent, include_is_private=include_is_private - ) - - -async def _build_single_action_sql( - datasette: "Datasette", - actor: dict | None, - action: str, - *, - parent: str | None = None, - include_is_private: bool = False, -) -> tuple[str, dict]: - """ - Build SQL for a single action (internal helper for build_allowed_resources_sql). - - This contains the original logic from build_allowed_resources_sql, extracted - to allow combining multiple actions when also_requires is used. - """ - # Get the Action object - action_obj = datasette.actions.get(action) - if not action_obj: - raise ValueError(f"Unknown action: {action}") - - # Get base resources SQL from the resource class - base_resources_sql = await action_obj.resource_class.resources_sql(datasette) - - permission_sqls = await gather_permission_sql_from_hooks( - datasette=datasette, - actor=actor, - action=action, - ) - - # If permission_sqls is the sentinel, skip all permission checks - # Return SQL that allows all resources - from datasette.utils.permissions import SKIP_PERMISSION_CHECKS - - if permission_sqls is SKIP_PERMISSION_CHECKS: - cols = "parent, child, 'skip_permission_checks' AS reason" - if include_is_private: - cols += ", 0 AS is_private" - return f"SELECT {cols} FROM ({base_resources_sql})", {} - - all_params = {} - rule_sqls = [] - restriction_sqls = [] - - for permission_sql in permission_sqls: - # Always collect params (even from restriction-only plugins) - all_params.update(permission_sql.params or {}) - - # Collect restriction SQL filters - if permission_sql.restriction_sql: - restriction_sqls.append(permission_sql.restriction_sql) - - # Skip plugins that only provide restriction_sql (no permission rules) - if permission_sql.sql is None: - continue - rule_sqls.append( - f""" - SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( - {permission_sql.sql} - ) - """.strip() - ) - - # If no rules, return empty result (deny all) - if not rule_sqls: - empty_cols = "NULL AS parent, NULL AS child, NULL AS reason" - if include_is_private: - empty_cols += ", NULL AS is_private" - return f"SELECT {empty_cols} WHERE 0", {} - - # Build the cascading permission query - rules_union = " UNION ALL ".join(rule_sqls) - - # Build the main query - query_parts = [ - "WITH", - "base AS (", - f" {base_resources_sql}", - "),", - "all_rules AS (", - f" {rules_union}", - "),", - ] - - # If include_is_private, we need to build anonymous permissions too - if include_is_private: - anon_permission_sqls = await gather_permission_sql_from_hooks( - datasette=datasette, - actor=None, - action=action, - ) - anon_sqls_rewritten = [] - anon_params = {} - - for permission_sql in anon_permission_sqls: - # Skip plugins that only provide restriction_sql (no permission rules) - if permission_sql.sql is None: - continue - rewritten_sql = permission_sql.sql - for key, value in (permission_sql.params or {}).items(): - anon_key = f"anon_{key}" - anon_params[anon_key] = value - rewritten_sql = rewritten_sql.replace(f":{key}", f":{anon_key}") - anon_sqls_rewritten.append(rewritten_sql) - - all_params.update(anon_params) - - if anon_sqls_rewritten: - anon_rules_union = " UNION ALL ".join(anon_sqls_rewritten) - query_parts.extend( - [ - "anon_rules AS (", - f" {anon_rules_union}", - "),", - ] - ) - - # Continue with the cascading logic - query_parts.extend( - [ - "child_lvl AS (", - " SELECT b.parent, b.child,", - " MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny,", - " MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow,", - " json_group_array(CASE WHEN ar.allow = 0 THEN ar.source_plugin || ': ' || ar.reason END) AS deny_reasons,", - " json_group_array(CASE WHEN ar.allow = 1 THEN ar.source_plugin || ': ' || ar.reason END) AS allow_reasons", - " FROM base b", - " LEFT JOIN all_rules ar ON ar.parent = b.parent AND ar.child = b.child", - " GROUP BY b.parent, b.child", - "),", - "parent_lvl AS (", - " SELECT b.parent, b.child,", - " MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny,", - " MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow,", - " json_group_array(CASE WHEN ar.allow = 0 THEN ar.source_plugin || ': ' || ar.reason END) AS deny_reasons,", - " json_group_array(CASE WHEN ar.allow = 1 THEN ar.source_plugin || ': ' || ar.reason END) AS allow_reasons", - " FROM base b", - " LEFT JOIN all_rules ar ON ar.parent = b.parent AND ar.child IS NULL", - " GROUP BY b.parent, b.child", - "),", - "global_lvl AS (", - " SELECT b.parent, b.child,", - " MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny,", - " MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow,", - " json_group_array(CASE WHEN ar.allow = 0 THEN ar.source_plugin || ': ' || ar.reason END) AS deny_reasons,", - " json_group_array(CASE WHEN ar.allow = 1 THEN ar.source_plugin || ': ' || ar.reason END) AS allow_reasons", - " FROM base b", - " LEFT JOIN all_rules ar ON ar.parent IS NULL AND ar.child IS NULL", - " GROUP BY b.parent, b.child", - "),", - ] - ) - - # Add anonymous decision logic if needed - if include_is_private: - query_parts.extend( - [ - "anon_child_lvl AS (", - " SELECT b.parent, b.child,", - " MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny,", - " MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow", - " FROM base b", - " LEFT JOIN anon_rules ar ON ar.parent = b.parent AND ar.child = b.child", - " GROUP BY b.parent, b.child", - "),", - "anon_parent_lvl AS (", - " SELECT b.parent, b.child,", - " MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny,", - " MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow", - " FROM base b", - " LEFT JOIN anon_rules ar ON ar.parent = b.parent AND ar.child IS NULL", - " GROUP BY b.parent, b.child", - "),", - "anon_global_lvl AS (", - " SELECT b.parent, b.child,", - " MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny,", - " MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow", - " FROM base b", - " LEFT JOIN anon_rules ar ON ar.parent IS NULL AND ar.child IS NULL", - " GROUP BY b.parent, b.child", - "),", - "anon_decisions AS (", - " SELECT", - " b.parent, b.child,", - " CASE", - " WHEN acl.any_deny = 1 THEN 0", - " WHEN acl.any_allow = 1 THEN 1", - " WHEN apl.any_deny = 1 THEN 0", - " WHEN apl.any_allow = 1 THEN 1", - " WHEN agl.any_deny = 1 THEN 0", - " WHEN agl.any_allow = 1 THEN 1", - " ELSE 0", - " END AS anon_is_allowed", - " FROM base b", - " JOIN anon_child_lvl acl ON b.parent = acl.parent AND (b.child = acl.child OR (b.child IS NULL AND acl.child IS NULL))", - " JOIN anon_parent_lvl apl ON b.parent = apl.parent AND (b.child = apl.child OR (b.child IS NULL AND apl.child IS NULL))", - " JOIN anon_global_lvl agl ON b.parent = agl.parent AND (b.child = agl.child OR (b.child IS NULL AND agl.child IS NULL))", - "),", - ] - ) - - # Final decisions - query_parts.extend( - [ - "decisions AS (", - " SELECT", - " b.parent, b.child,", - " -- Cascading permission logic: child → parent → global, DENY beats ALLOW at each level", - " -- Priority order:", - " -- 1. Child-level deny (most specific, blocks access)", - " -- 2. Child-level allow (most specific, grants access)", - " -- 3. Parent-level deny (intermediate, blocks access)", - " -- 4. Parent-level allow (intermediate, grants access)", - " -- 5. Global-level deny (least specific, blocks access)", - " -- 6. Global-level allow (least specific, grants access)", - " -- 7. Default deny (no rules match)", - " CASE", - " WHEN cl.any_deny = 1 THEN 0", - " WHEN cl.any_allow = 1 THEN 1", - " WHEN pl.any_deny = 1 THEN 0", - " WHEN pl.any_allow = 1 THEN 1", - " WHEN gl.any_deny = 1 THEN 0", - " WHEN gl.any_allow = 1 THEN 1", - " ELSE 0", - " END AS is_allowed,", - " CASE", - " WHEN cl.any_deny = 1 THEN cl.deny_reasons", - " WHEN cl.any_allow = 1 THEN cl.allow_reasons", - " WHEN pl.any_deny = 1 THEN pl.deny_reasons", - " WHEN pl.any_allow = 1 THEN pl.allow_reasons", - " WHEN gl.any_deny = 1 THEN gl.deny_reasons", - " WHEN gl.any_allow = 1 THEN gl.allow_reasons", - " ELSE '[]'", - " END AS reason", - ] - ) - - if include_is_private: - query_parts.append( - " , CASE WHEN ad.anon_is_allowed = 0 THEN 1 ELSE 0 END AS is_private" - ) - - query_parts.extend( - [ - " FROM base b", - " JOIN child_lvl cl ON b.parent = cl.parent AND (b.child = cl.child OR (b.child IS NULL AND cl.child IS NULL))", - " JOIN parent_lvl pl ON b.parent = pl.parent AND (b.child = pl.child OR (b.child IS NULL AND pl.child IS NULL))", - " JOIN global_lvl gl ON b.parent = gl.parent AND (b.child = gl.child OR (b.child IS NULL AND gl.child IS NULL))", - ] - ) - - if include_is_private: - query_parts.append( - " JOIN anon_decisions ad ON b.parent = ad.parent AND (b.child = ad.child OR (b.child IS NULL AND ad.child IS NULL))" - ) - - query_parts.append(")") - - # Add restriction list CTE if there are restrictions - if restriction_sqls: - # Wrap each restriction_sql in a subquery to avoid operator precedence issues - # with UNION ALL inside the restriction SQL statements - restriction_intersect = "\nINTERSECT\n".join( - f"SELECT * FROM ({sql})" for sql in restriction_sqls - ) - query_parts.extend( - [",", "restriction_list AS (", f" {restriction_intersect}", ")"] - ) - - # Final SELECT - select_cols = "parent, child, reason" - if include_is_private: - select_cols += ", is_private" - - query_parts.append(f"SELECT {select_cols}") - query_parts.append("FROM decisions") - query_parts.append("WHERE is_allowed = 1") - - # Add restriction filter if there are restrictions - if restriction_sqls: - query_parts.append( - """ - AND EXISTS ( - SELECT 1 FROM restriction_list r - WHERE (r.parent = decisions.parent OR r.parent IS NULL) - AND (r.child = decisions.child OR r.child IS NULL) - )""" - ) - - # Add parent filter if specified - if parent is not None: - query_parts.append(" AND parent = :filter_parent") - all_params["filter_parent"] = parent - - query_parts.append("ORDER BY parent, child") - - query = "\n".join(query_parts) - return query, all_params - - -async def build_permission_rules_sql( - datasette: "Datasette", actor: dict | None, action: str -) -> tuple[str, dict]: - """ - Build the UNION SQL and params for all permission rules for a given actor and action. - - Returns: - A tuple of (sql, params) where sql is a UNION ALL query that returns - (parent, child, allow, reason, source_plugin) rows. - """ - # Get the Action object - action_obj = datasette.actions.get(action) - if not action_obj: - raise ValueError(f"Unknown action: {action}") - - permission_sqls = await gather_permission_sql_from_hooks( - datasette=datasette, - actor=actor, - action=action, - ) - - # If permission_sqls is the sentinel, skip all permission checks - # Return SQL that allows everything - from datasette.utils.permissions import SKIP_PERMISSION_CHECKS - - if permission_sqls is SKIP_PERMISSION_CHECKS: - return ( - "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'skip_permission_checks' AS reason, 'skip' AS source_plugin", - {}, - [], - ) - - if not permission_sqls: - return ( - "SELECT NULL AS parent, NULL AS child, 0 AS allow, NULL AS reason, NULL AS source_plugin WHERE 0", - {}, - [], - ) - - union_parts = [] - all_params = {} - restriction_sqls = [] - - for permission_sql in permission_sqls: - all_params.update(permission_sql.params or {}) - - # Collect restriction SQL filters - if permission_sql.restriction_sql: - restriction_sqls.append(permission_sql.restriction_sql) - - # Skip plugins that only provide restriction_sql (no permission rules) - if permission_sql.sql is None: - continue - - union_parts.append( - f""" - SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( - {permission_sql.sql} - ) - """.strip() - ) - - rules_union = " UNION ALL ".join(union_parts) - return rules_union, all_params, restriction_sqls - - -async def check_permission_for_resource( - *, - datasette: "Datasette", - actor: dict | None, - action: str, - parent: str | None, - child: str | None, -) -> bool: - """ - Check if an actor has permission for a specific action on a specific resource. - - Args: - datasette: The Datasette instance - actor: The actor dict (or None) - action: The action name - parent: The parent resource identifier (e.g., database name, or None) - child: The child resource identifier (e.g., table name, or None) - - Returns: - True if the actor is allowed, False otherwise - - This builds the cascading permission query and checks if the specific - resource is in the allowed set. - """ - rules_union, all_params, restriction_sqls = await build_permission_rules_sql( - datasette, actor, action - ) - - # If no rules (empty SQL), default deny - if not rules_union: - return False - - # Add parameters for the resource we're checking - all_params["_check_parent"] = parent - all_params["_check_child"] = child - - # If there are restriction filters, check if the resource passes them first - if restriction_sqls: - # Check if resource is in restriction allowlist - # Database-level restrictions (parent, NULL) should match all children (parent, *) - # Wrap each restriction_sql in a subquery to avoid operator precedence issues - restriction_check = "\nINTERSECT\n".join( - f"SELECT * FROM ({sql})" for sql in restriction_sqls - ) - restriction_query = f""" -WITH restriction_list AS ( - {restriction_check} -) -SELECT EXISTS ( - SELECT 1 FROM restriction_list - WHERE (parent = :_check_parent OR parent IS NULL) - AND (child = :_check_child OR child IS NULL) -) AS in_allowlist -""" - result = await datasette.get_internal_database().execute( - restriction_query, all_params - ) - if result.rows and not result.rows[0][0]: - # Resource not in restriction allowlist - deny - return False - - query = f""" -WITH -all_rules AS ( - {rules_union} -), -matched_rules AS ( - SELECT ar.*, - CASE - WHEN ar.child IS NOT NULL THEN 2 -- child-level (most specific) - WHEN ar.parent IS NOT NULL THEN 1 -- parent-level - ELSE 0 -- root/global - END AS depth - FROM all_rules ar - WHERE (ar.parent IS NULL OR ar.parent = :_check_parent) - AND (ar.child IS NULL OR ar.child = :_check_child) -), -winner AS ( - SELECT * - FROM matched_rules - ORDER BY - depth DESC, -- specificity first (higher depth wins) - CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow - source_plugin -- stable tie-break - LIMIT 1 -) -SELECT COALESCE((SELECT allow FROM winner), 0) AS is_allowed -""" - - # Execute the query against the internal database - result = await datasette.get_internal_database().execute(query, all_params) - if result.rows: - return bool(result.rows[0][0]) - return False diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 7f3329a6..eaf3428d 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -1,66 +1,20 @@ import json -from datasette.utils import MultiParams, calculate_etag +from datasette.utils import RequestParameters from mimetypes import guess_type -from urllib.parse import parse_qs, urlunparse, parse_qsl +from urllib.parse import parse_qs, urlunparse from pathlib import Path -from http.cookies import SimpleCookie, Morsel -import aiofiles -import aiofiles.os +from html import escape import re - -# Workaround for adding samesite support to pre 3.8 python -Morsel._reserved["samesite"] = "SameSite" -# Thanks, Starlette: -# https://github.com/encode/starlette/blob/519f575/starlette/responses.py#L17 +import aiofiles -class Base400(Exception): - status = 400 - - -class NotFound(Base400): - status = 404 - - -class DatabaseNotFound(NotFound): - def __init__(self, database_name): - self.database_name = database_name - super().__init__("Database not found") - - -class TableNotFound(NotFound): - def __init__(self, database_name, table): - super().__init__("Table not found") - self.database_name = database_name - self.table = table - - -class RowNotFound(NotFound): - def __init__(self, database_name, table, pk_values): - super().__init__("Row not found") - self.database_name = database_name - self.table_name = table - self.pk_values = pk_values - - -class Forbidden(Base400): - status = 403 - - -class BadRequest(Base400): - status = 400 - - -SAMESITE_VALUES = ("strict", "lax", "none") +class NotFound(Exception): + pass class Request: - def __init__(self, scope, receive): + def __init__(self, scope): self.scope = scope - self.receive = receive - - def __repr__(self): - return ''.format(self.method, self.url) @property def method(self): @@ -72,89 +26,103 @@ class Request: (self.scheme, self.host, self.path, None, self.query_string, None) ) - @property - def url_vars(self): - return (self.scope.get("url_route") or {}).get("kwargs") or {} - @property def scheme(self): return self.scope.get("scheme") or "http" @property def headers(self): - return { - k.decode("latin-1").lower(): v.decode("latin-1") - for k, v in self.scope.get("headers") or [] - } + return dict( + [ + (k.decode("latin-1").lower(), v.decode("latin-1")) + for k, v in self.scope.get("headers") or [] + ] + ) @property def host(self): return self.headers.get("host") or "localhost" - @property - def cookies(self): - cookies = SimpleCookie() - cookies.load(self.headers.get("cookie", "")) - return {key: value.value for key, value in cookies.items()} - @property def path(self): - if self.scope.get("raw_path") is not None: - return self.scope["raw_path"].decode("latin-1").partition("?")[0] + if "raw_path" in self.scope: + return self.scope["raw_path"].decode("latin-1") else: - path = self.scope["path"] - if isinstance(path, str): - return path - else: - return path.decode("utf-8") + return self.scope["path"].decode("utf-8") @property def query_string(self): return (self.scope.get("query_string") or b"").decode("latin-1") - @property - def full_path(self): - qs = self.query_string - return "{}{}".format(self.path, ("?" + qs) if qs else "") - @property def args(self): - return MultiParams(parse_qs(qs=self.query_string, keep_blank_values=True)) + return RequestParameters(parse_qs(qs=self.query_string)) @property - def actor(self): - return self.scope.get("actor", None) - - async def post_body(self): - body = b"" - more_body = True - while more_body: - message = await self.receive() - assert message["type"] == "http.request", message - body += message.get("body", b"") - more_body = message.get("more_body", False) - return body - - async def post_vars(self): - body = await self.post_body() - return dict(parse_qsl(body.decode("utf-8"), keep_blank_values=True)) + def raw_args(self): + return {key: value[0] for key, value in self.args.items()} @classmethod - def fake(cls, path_with_query_string, method="GET", scheme="http", url_vars=None): - """Useful for constructing Request objects for tests""" + def fake(cls, path_with_query_string, method="GET", scheme="http"): + "Useful for constructing Request objects for tests" path, _, query_string = path_with_query_string.partition("?") scope = { "http_version": "1.1", "method": method, "path": path, - "raw_path": path_with_query_string.encode("latin-1"), + "raw_path": path.encode("latin-1"), "query_string": query_string.encode("latin-1"), "scheme": scheme, "type": "http", } - if url_vars: - scope["url_route"] = {"kwargs": url_vars} - return cls(scope, None) + return cls(scope) + + +class AsgiRouter: + def __init__(self, routes=None): + routes = routes or [] + self.routes = [ + # Compile any strings to regular expressions + ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) + for pattern, view in routes + ] + + async def __call__(self, scope, receive, send): + # Because we care about "foo/bar" v.s. "foo%2Fbar" we decode raw_path ourselves + path = scope["path"] + raw_path = scope.get("raw_path") + if raw_path: + path = raw_path.decode("ascii") + for regex, view in self.routes: + match = regex.match(path) + if match is not None: + new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) + try: + return await view(new_scope, receive, send) + except Exception as exception: + return await self.handle_500(scope, receive, send, exception) + return await self.handle_404(scope, receive, send) + + async def handle_404(self, scope, receive, send): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + await send({"type": "http.response.body", "body": b"

    404

    "}) + + async def handle_500(self, scope, receive, send, exception): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + html = "

    500

    ".format(escape(repr(exception))) + await send({"type": "http.response.body", "body": html.encode("latin-1")}) class AsgiLifespan: @@ -186,6 +154,33 @@ class AsgiLifespan: await self.app(scope, receive, send) +class AsgiView: + def dispatch_request(self, request, *args, **kwargs): + handler = getattr(self, request.method.lower(), None) + return handler(request, *args, **kwargs) + + @classmethod + def as_asgi(cls, *class_args, **class_kwargs): + async def view(scope, receive, send): + # Uses scope to create a request object, then dispatches that to + # self.get(...) or self.options(...) along with keyword arguments + # that were already tucked into scope["url_route"]["kwargs"] by + # the router, similar to how Django Channels works: + # https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter + request = Request(scope) + self = view.view_class(*class_args, **class_kwargs) + response = await self.dispatch_request( + request, **scope["url_route"]["kwargs"] + ) + await response.asgi_send(send) + + view.view_class = cls + view.__doc__ = cls.__doc__ + view.__module__ = cls.__module__ + view.__name__ = cls.__name__ + return view + + class AsgiStream: def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): self.stream_fn = stream_fn @@ -195,7 +190,9 @@ class AsgiStream: async def asgi_send(self, send): # Remove any existing content-type header - headers = {k: v for k, v in self.headers.items() if k.lower() != "content-type"} + headers = dict( + [(k, v) for k, v in self.headers.items() if k.lower() != "content-type"] + ) headers["content-type"] = self.content_type await send( { @@ -220,7 +217,7 @@ class AsgiWriter: await self.send( { "type": "http.response.body", - "body": chunk.encode("utf-8"), + "body": chunk.encode("latin-1"), "more_body": True, } ) @@ -240,36 +237,29 @@ async def asgi_send_json(send, info, status=200, headers=None): async def asgi_send_html(send, html, status=200, headers=None): headers = headers or {} await asgi_send( - send, - html, - status=status, - headers=headers, - content_type="text/html; charset=utf-8", + send, html, status=status, headers=headers, content_type="text/html" ) async def asgi_send_redirect(send, location, status=302): - # Prevent open redirect vulnerability: strip multiple leading slashes - # //example.com would be interpreted as a protocol-relative URL (e.g., https://example.com/) - location = re.sub(r"^/+", "/", location) await asgi_send( send, "", status=status, headers={"Location": location}, - content_type="text/html; charset=utf-8", + content_type="text/html", ) async def asgi_send(send, content, status, headers=None, content_type="text/plain"): await asgi_start(send, status, headers, content_type) - await send({"type": "http.response.body", "body": content.encode("utf-8")}) + await send({"type": "http.response.body", "body": content.encode("latin-1")}) async def asgi_start(send, status, headers=None, content_type="text/plain"): headers = headers or {} # Remove any existing content-type header - headers = {k: v for k, v in headers.items() if k.lower() != "content-type"} + headers = dict([(k, v) for k, v in headers.items() if k.lower() != "content-type"]) headers["content-type"] = content_type await send( { @@ -284,14 +274,12 @@ async def asgi_start(send, status, headers=None, content_type="text/plain"): async def asgi_send_file( - send, filepath, filename=None, content_type=None, chunk_size=4096, headers=None + send, filepath, filename=None, content_type=None, chunk_size=4096 ): - headers = headers or {} + headers = {} if filename: - headers["content-disposition"] = f'attachment; filename="{filename}"' - + headers["Content-Disposition"] = 'attachment; filename="{}"'.format(filename) first = True - headers["content-length"] = str((await aiofiles.os.stat(str(filepath))).st_size) async with aiofiles.open(str(filepath), mode="rb") as fp: if first: await asgi_start( @@ -311,41 +299,23 @@ async def asgi_send_file( def asgi_static(root_path, chunk_size=4096, headers=None, content_type=None): - root_path = Path(root_path) - static_headers = {} - - if headers: - static_headers = headers.copy() - - async def inner_static(request, send): - path = request.scope["url_route"]["kwargs"]["path"] - headers = static_headers.copy() + async def inner_static(scope, receive, send): + path = scope["url_route"]["kwargs"]["path"] try: - full_path = (root_path / path).resolve().absolute() + full_path = (Path(root_path) / path).resolve().absolute() except FileNotFoundError: - await asgi_send_html(send, "404: Directory not found", 404) - return - if full_path.is_dir(): - await asgi_send_html(send, "403: Directory listing is not allowed", 403) + await asgi_send_html(send, "404", 404) return # Ensure full_path is within root_path to avoid weird "../" tricks try: - full_path.relative_to(root_path.resolve()) + full_path.relative_to(root_path) except ValueError: - await asgi_send_html(send, "404: Path not inside root path", 404) + await asgi_send_html(send, "404", 404) return try: - # Calculate ETag for filepath - etag = await calculate_etag(full_path, chunk_size=chunk_size) - headers["ETag"] = etag - if_none_match = request.headers.get("if-none-match") - if if_none_match and if_none_match == etag: - return await asgi_send(send, "", 304) - await asgi_send_file( - send, full_path, chunk_size=chunk_size, headers=headers - ) + await asgi_send_file(send, full_path, chunk_size=chunk_size) except FileNotFoundError: - await asgi_send_html(send, "404: File not found", 404) + await asgi_send_html(send, "404", 404) return return inner_static @@ -356,24 +326,20 @@ class Response: self.body = body self.status = status self.headers = headers or {} - self._set_cookie_headers = [] self.content_type = content_type async def asgi_send(self, send): headers = {} headers.update(self.headers) headers["content-type"] = self.content_type - raw_headers = [ - [key.encode("utf-8"), value.encode("utf-8")] - for key, value in headers.items() - ] - for set_cookie in self._set_cookie_headers: - raw_headers.append([b"set-cookie", set_cookie.encode("utf-8")]) await send( { "type": "http.response.start", "status": self.status, - "headers": raw_headers, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in headers.items() + ], } ) body = self.body @@ -381,37 +347,6 @@ class Response: body = body.encode("utf-8") await send({"type": "http.response.body", "body": body}) - def set_cookie( - self, - key, - value="", - max_age=None, - expires=None, - path="/", - domain=None, - secure=False, - httponly=False, - samesite="lax", - ): - assert samesite in SAMESITE_VALUES, "samesite should be one of {}".format( - SAMESITE_VALUES - ) - cookie = SimpleCookie() - cookie[key] = value - for prop_name, prop_value in ( - ("max_age", max_age), - ("expires", expires), - ("path", path), - ("domain", domain), - ("samesite", samesite), - ): - if prop_value is not None: - cookie[key][prop_name.replace("_", "-")] = prop_value - for prop_name, prop_value in (("secure", secure), ("httponly", httponly)): - if prop_value: - cookie[key][prop_name] = True - self._set_cookie_headers.append(cookie.output(header="").strip()) - @classmethod def html(cls, body, status=200, headers=None): return cls( @@ -424,21 +359,12 @@ class Response: @classmethod def text(cls, body, status=200, headers=None): return cls( - str(body), + body, status=status, headers=headers, content_type="text/plain; charset=utf-8", ) - @classmethod - def json(cls, body, status=200, headers=None, default=None): - return cls( - json.dumps(body, default=default), - status=status, - headers=headers, - content_type="application/json; charset=utf-8", - ) - @classmethod def redirect(cls, path, status=302, headers=None): headers = headers or {} @@ -448,37 +374,11 @@ class Response: class AsgiFileDownload: def __init__( - self, - filepath, - filename=None, - content_type="application/octet-stream", - headers=None, + self, filepath, filename=None, content_type="application/octet-stream" ): - self.headers = headers or {} self.filepath = filepath self.filename = filename self.content_type = content_type async def asgi_send(self, send): - return await asgi_send_file( - send, - self.filepath, - filename=self.filename, - content_type=self.content_type, - headers=self.headers, - ) - - -class AsgiRunOnFirstRequest: - def __init__(self, asgi, on_startup): - assert isinstance(on_startup, list) - self.asgi = asgi - self.on_startup = on_startup - self._started = False - - async def __call__(self, scope, receive, send): - if not self._started: - self._started = True - for hook in self.on_startup: - await hook() - return await self.asgi(scope, receive, send) + return await asgi_send_file(send, self.filepath, content_type=self.content_type) diff --git a/datasette/utils/baseconv.py b/datasette/utils/baseconv.py deleted file mode 100644 index c4b64908..00000000 --- a/datasette/utils/baseconv.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -Convert numbers from base 10 integers to base X strings and back again. - -Sample usage: - ->>> base20 = BaseConverter('0123456789abcdefghij') ->>> base20.from_decimal(1234) -'31e' ->>> base20.to_decimal('31e') -1234 - -Originally shared here: https://www.djangosnippets.org/snippets/1431/ -""" - - -class BaseConverter(object): - decimal_digits = "0123456789" - - def __init__(self, digits): - self.digits = digits - - def encode(self, i): - return self.convert(i, self.decimal_digits, self.digits) - - def decode(self, s): - return int(self.convert(s, self.digits, self.decimal_digits)) - - def convert(number, fromdigits, todigits): - # Based on http://code.activestate.com/recipes/111286/ - if str(number)[0] == "-": - number = str(number)[1:] - neg = 1 - else: - neg = 0 - - # make an integer out of the number - x = 0 - for digit in str(number): - x = x * len(fromdigits) + fromdigits.index(digit) - - # create the result in base 'len(todigits)' - if x == 0: - res = todigits[0] - else: - res = "" - while x > 0: - digit = x % len(todigits) - res = todigits[digit] + res - x = int(x / len(todigits)) - if neg: - res = "-" + res - return res - - convert = staticmethod(convert) - - -bin = BaseConverter("01") -hexconv = BaseConverter("0123456789ABCDEF") -base62 = BaseConverter("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz") diff --git a/datasette/utils/check_callable.py b/datasette/utils/check_callable.py deleted file mode 100644 index a0997d20..00000000 --- a/datasette/utils/check_callable.py +++ /dev/null @@ -1,25 +0,0 @@ -import inspect -import types -from typing import NamedTuple, Any - - -class CallableStatus(NamedTuple): - is_callable: bool - is_async_callable: bool - - -def check_callable(obj: Any) -> CallableStatus: - if not callable(obj): - return CallableStatus(False, False) - - if isinstance(obj, type): - # It's a class - return CallableStatus(True, False) - - if isinstance(obj, types.FunctionType): - return CallableStatus(True, inspect.iscoroutinefunction(obj)) - - if hasattr(obj, "__call__"): - return CallableStatus(True, inspect.iscoroutinefunction(obj.__call__)) - - assert False, "obj {} is somehow callable with no __call__ method".format(repr(obj)) diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py deleted file mode 100644 index a3afbab2..00000000 --- a/datasette/utils/internal_db.py +++ /dev/null @@ -1,244 +0,0 @@ -import textwrap -from datasette.utils import table_column_details - - -async def init_internal_db(db): - create_tables_sql = textwrap.dedent( - """ - CREATE TABLE IF NOT EXISTS catalog_databases ( - database_name TEXT PRIMARY KEY, - path TEXT, - is_memory INTEGER, - schema_version INTEGER - ); - CREATE TABLE IF NOT EXISTS catalog_tables ( - database_name TEXT, - table_name TEXT, - rootpage INTEGER, - sql TEXT, - PRIMARY KEY (database_name, table_name), - FOREIGN KEY (database_name) REFERENCES catalog_databases(database_name) - ); - CREATE TABLE IF NOT EXISTS catalog_views ( - database_name TEXT, - view_name TEXT, - rootpage INTEGER, - sql TEXT, - PRIMARY KEY (database_name, view_name), - FOREIGN KEY (database_name) REFERENCES catalog_databases(database_name) - ); - CREATE TABLE IF NOT EXISTS catalog_columns ( - database_name TEXT, - table_name TEXT, - cid INTEGER, - name TEXT, - type TEXT, - "notnull" INTEGER, - default_value TEXT, -- renamed from dflt_value - is_pk INTEGER, -- renamed from pk - hidden INTEGER, - PRIMARY KEY (database_name, table_name, name), - FOREIGN KEY (database_name) REFERENCES catalog_databases(database_name), - FOREIGN KEY (database_name, table_name) REFERENCES catalog_tables(database_name, table_name) - ); - CREATE TABLE IF NOT EXISTS catalog_indexes ( - database_name TEXT, - table_name TEXT, - seq INTEGER, - name TEXT, - "unique" INTEGER, - origin TEXT, - partial INTEGER, - PRIMARY KEY (database_name, table_name, name), - FOREIGN KEY (database_name) REFERENCES catalog_databases(database_name), - FOREIGN KEY (database_name, table_name) REFERENCES catalog_tables(database_name, table_name) - ); - CREATE TABLE IF NOT EXISTS catalog_foreign_keys ( - database_name TEXT, - table_name TEXT, - id INTEGER, - seq INTEGER, - "table" TEXT, - "from" TEXT, - "to" TEXT, - on_update TEXT, - on_delete TEXT, - match TEXT, - PRIMARY KEY (database_name, table_name, id, seq), - FOREIGN KEY (database_name) REFERENCES catalog_databases(database_name), - FOREIGN KEY (database_name, table_name) REFERENCES catalog_tables(database_name, table_name) - ); - """ - ).strip() - await db.execute_write_script(create_tables_sql) - await initialize_metadata_tables(db) - - -async def initialize_metadata_tables(db): - await db.execute_write_script( - textwrap.dedent( - """ - CREATE TABLE IF NOT EXISTS metadata_instance ( - key text, - value text, - unique(key) - ); - - CREATE TABLE IF NOT EXISTS metadata_databases ( - database_name text, - key text, - value text, - unique(database_name, key) - ); - - CREATE TABLE IF NOT EXISTS metadata_resources ( - database_name text, - resource_name text, - key text, - value text, - unique(database_name, resource_name, key) - ); - - CREATE TABLE IF NOT EXISTS metadata_columns ( - database_name text, - resource_name text, - column_name text, - key text, - value text, - unique(database_name, resource_name, column_name, key) - ); - """ - ) - ) - - -async def populate_schema_tables(internal_db, db): - database_name = db.name - - def delete_everything(conn): - conn.execute( - "DELETE FROM catalog_tables WHERE database_name = ?", [database_name] - ) - conn.execute( - "DELETE FROM catalog_views WHERE database_name = ?", [database_name] - ) - conn.execute( - "DELETE FROM catalog_columns WHERE database_name = ?", [database_name] - ) - conn.execute( - "DELETE FROM catalog_foreign_keys WHERE database_name = ?", - [database_name], - ) - conn.execute( - "DELETE FROM catalog_indexes WHERE database_name = ?", [database_name] - ) - - await internal_db.execute_write_fn(delete_everything) - - tables = (await db.execute("select * from sqlite_master WHERE type = 'table'")).rows - views = (await db.execute("select * from sqlite_master WHERE type = 'view'")).rows - - def collect_info(conn): - tables_to_insert = [] - views_to_insert = [] - columns_to_insert = [] - foreign_keys_to_insert = [] - indexes_to_insert = [] - - for view in views: - view_name = view["name"] - views_to_insert.append( - (database_name, view_name, view["rootpage"], view["sql"]) - ) - - for table in tables: - table_name = table["name"] - tables_to_insert.append( - (database_name, table_name, table["rootpage"], table["sql"]) - ) - columns = table_column_details(conn, table_name) - columns_to_insert.extend( - { - **{"database_name": database_name, "table_name": table_name}, - **column._asdict(), - } - for column in columns - ) - foreign_keys = conn.execute( - f"PRAGMA foreign_key_list([{table_name}])" - ).fetchall() - foreign_keys_to_insert.extend( - { - **{"database_name": database_name, "table_name": table_name}, - **dict(foreign_key), - } - for foreign_key in foreign_keys - ) - indexes = conn.execute(f"PRAGMA index_list([{table_name}])").fetchall() - indexes_to_insert.extend( - { - **{"database_name": database_name, "table_name": table_name}, - **dict(index), - } - for index in indexes - ) - return ( - tables_to_insert, - views_to_insert, - columns_to_insert, - foreign_keys_to_insert, - indexes_to_insert, - ) - - ( - tables_to_insert, - views_to_insert, - columns_to_insert, - foreign_keys_to_insert, - indexes_to_insert, - ) = await db.execute_fn(collect_info) - - await internal_db.execute_write_many( - """ - INSERT INTO catalog_tables (database_name, table_name, rootpage, sql) - values (?, ?, ?, ?) - """, - tables_to_insert, - ) - await internal_db.execute_write_many( - """ - INSERT INTO catalog_views (database_name, view_name, rootpage, sql) - values (?, ?, ?, ?) - """, - views_to_insert, - ) - await internal_db.execute_write_many( - """ - INSERT INTO catalog_columns ( - database_name, table_name, cid, name, type, "notnull", default_value, is_pk, hidden - ) VALUES ( - :database_name, :table_name, :cid, :name, :type, :notnull, :default_value, :is_pk, :hidden - ) - """, - columns_to_insert, - ) - await internal_db.execute_write_many( - """ - INSERT INTO catalog_foreign_keys ( - database_name, table_name, "id", seq, "table", "from", "to", on_update, on_delete, match - ) VALUES ( - :database_name, :table_name, :id, :seq, :table, :from, :to, :on_update, :on_delete, :match - ) - """, - foreign_keys_to_insert, - ) - await internal_db.execute_write_many( - """ - INSERT INTO catalog_indexes ( - database_name, table_name, seq, name, "unique", origin, partial - ) VALUES ( - :database_name, :table_name, :seq, :name, :unique, :origin, :partial - ) - """, - indexes_to_insert, - ) diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py deleted file mode 100644 index 6c30a12a..00000000 --- a/datasette/utils/permissions.py +++ /dev/null @@ -1,439 +0,0 @@ -# perm_utils.py -from __future__ import annotations - -import json -from typing import Any, Dict, Iterable, List, Sequence, Tuple -import sqlite3 - -from datasette.permissions import PermissionSQL -from datasette.plugins import pm -from datasette.utils import await_me_maybe - - -# Sentinel object to indicate permission checks should be skipped -SKIP_PERMISSION_CHECKS = object() - - -async def gather_permission_sql_from_hooks( - *, datasette, actor: dict | None, action: str -) -> List[PermissionSQL] | object: - """Collect PermissionSQL objects from the permission_resources_sql hook. - - Ensures that each returned PermissionSQL has a populated ``source``. - - Returns SKIP_PERMISSION_CHECKS sentinel if skip_permission_checks context variable - is set, signaling that all permission checks should be bypassed. - """ - from datasette.permissions import _skip_permission_checks - - # Check if we should skip permission checks BEFORE calling hooks - # This avoids creating unawaited coroutines - if _skip_permission_checks.get(): - return SKIP_PERMISSION_CHECKS - - hook_caller = pm.hook.permission_resources_sql - hookimpls = hook_caller.get_hookimpls() - hook_results = list(hook_caller(datasette=datasette, actor=actor, action=action)) - - collected: List[PermissionSQL] = [] - actor_json = json.dumps(actor) if actor is not None else None - actor_id = actor.get("id") if isinstance(actor, dict) else None - - for index, result in enumerate(hook_results): - hookimpl = hookimpls[index] - resolved = await await_me_maybe(result) - default_source = _plugin_name_from_hookimpl(hookimpl) - for permission_sql in _iter_permission_sql_from_result(resolved, action=action): - if not permission_sql.source: - permission_sql.source = default_source - params = permission_sql.params or {} - params.setdefault("action", action) - params.setdefault("actor", actor_json) - params.setdefault("actor_id", actor_id) - collected.append(permission_sql) - - return collected - - -def _plugin_name_from_hookimpl(hookimpl) -> str: - if getattr(hookimpl, "plugin_name", None): - return hookimpl.plugin_name - plugin = getattr(hookimpl, "plugin", None) - if hasattr(plugin, "__name__"): - return plugin.__name__ - return repr(plugin) - - -def _iter_permission_sql_from_result( - result: Any, *, action: str -) -> Iterable[PermissionSQL]: - if result is None: - return [] - if isinstance(result, PermissionSQL): - return [result] - if isinstance(result, (list, tuple)): - collected: List[PermissionSQL] = [] - for item in result: - collected.extend(_iter_permission_sql_from_result(item, action=action)) - return collected - if callable(result): - permission_sql = result(action) # type: ignore[call-arg] - return _iter_permission_sql_from_result(permission_sql, action=action) - raise TypeError( - "Plugin providers must return PermissionSQL instances, sequences, or callables" - ) - - -# ----------------------------- -# Plugin interface & utilities -# ----------------------------- - - -def build_rules_union( - actor: dict | None, plugins: Sequence[PermissionSQL] -) -> Tuple[str, Dict[str, Any]]: - """ - Compose plugin SQL into a UNION ALL. - - Returns: - union_sql: a SELECT with columns (parent, child, allow, reason, source_plugin) - params: dict of bound parameters including :actor (JSON), :actor_id, and plugin params - - Note: Plugins are responsible for ensuring their parameter names don't conflict. - The system reserves these parameter names: :actor, :actor_id, :action, :filter_parent - Plugin parameters should be prefixed with a unique identifier (e.g., source name). - """ - parts: List[str] = [] - actor_json = json.dumps(actor) if actor else None - actor_id = actor.get("id") if actor else None - params: Dict[str, Any] = {"actor": actor_json, "actor_id": actor_id} - - for p in plugins: - # No namespacing - just use plugin params as-is - params.update(p.params or {}) - - # Skip plugins that only provide restriction_sql (no permission rules) - if p.sql is None: - continue - - parts.append( - f""" - SELECT parent, child, allow, reason, '{p.source}' AS source_plugin FROM ( - {p.sql} - ) - """.strip() - ) - - if not parts: - # Empty UNION that returns no rows - union_sql = "SELECT NULL parent, NULL child, NULL allow, NULL reason, 'none' source_plugin WHERE 0" - else: - union_sql = "\nUNION ALL\n".join(parts) - - return union_sql, params - - -# ----------------------------------------------- -# Core resolvers (no temp tables, no custom UDFs) -# ----------------------------------------------- - - -async def resolve_permissions_from_catalog( - db, - actor: dict | None, - plugins: Sequence[Any], - action: str, - candidate_sql: str, - candidate_params: Dict[str, Any] | None = None, - *, - implicit_deny: bool = True, -) -> List[Dict[str, Any]]: - """ - Resolve permissions by embedding the provided *candidate_sql* in a CTE. - - Expectations: - - candidate_sql SELECTs: parent TEXT, child TEXT - (Use child=NULL for parent-scoped actions like "execute-sql".) - - *db* exposes: rows = await db.execute(sql, params) - where rows is an iterable of sqlite3.Row - - plugins: hook results handled by await_me_maybe - can be sync/async, - single PermissionSQL, list, or callable returning PermissionSQL - - actor is the actor dict (or None), made available as :actor (JSON), :actor_id, and :action - - Decision policy: - 1) Specificity first: child (depth=2) > parent (depth=1) > root (depth=0) - 2) Within the same depth: deny (0) beats allow (1) - 3) If no matching rule: - - implicit_deny=True -> treat as allow=0, reason='implicit deny' - - implicit_deny=False -> allow=None, reason=None - - Returns: list of dict rows - - parent, child, allow, reason, source_plugin, depth - - resource (rendered "/parent/child" or "/parent" or "/") - """ - resolved_plugins: List[PermissionSQL] = [] - restriction_sqls: List[str] = [] - - for plugin in plugins: - if callable(plugin) and not isinstance(plugin, PermissionSQL): - resolved = plugin(action) # type: ignore[arg-type] - else: - resolved = plugin # type: ignore[assignment] - if not isinstance(resolved, PermissionSQL): - raise TypeError("Plugin providers must return PermissionSQL instances") - resolved_plugins.append(resolved) - - # Collect restriction SQL filters - if resolved.restriction_sql: - restriction_sqls.append(resolved.restriction_sql) - - union_sql, rule_params = build_rules_union(actor, resolved_plugins) - all_params = { - **(candidate_params or {}), - **rule_params, - "action": action, - } - - sql = f""" - WITH - cands AS ( - {candidate_sql} - ), - rules AS ( - {union_sql} - ), - matched AS ( - SELECT - c.parent, c.child, - r.allow, r.reason, r.source_plugin, - CASE - WHEN r.child IS NOT NULL THEN 2 -- child-level (most specific) - WHEN r.parent IS NOT NULL THEN 1 -- parent-level - ELSE 0 -- root/global - END AS depth - FROM cands c - JOIN rules r - ON (r.parent IS NULL OR r.parent = c.parent) - AND (r.child IS NULL OR r.child = c.child) - ), - ranked AS ( - SELECT *, - ROW_NUMBER() OVER ( - PARTITION BY parent, child - ORDER BY - depth DESC, -- specificity first - CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth - source_plugin -- stable tie-break - ) AS rn - FROM matched - ), - winner AS ( - SELECT parent, child, - allow, reason, source_plugin, depth - FROM ranked WHERE rn = 1 - ) - SELECT - c.parent, c.child, - COALESCE(w.allow, CASE WHEN :implicit_deny THEN 0 ELSE NULL END) AS allow, - COALESCE(w.reason, CASE WHEN :implicit_deny THEN 'implicit deny' ELSE NULL END) AS reason, - w.source_plugin, - COALESCE(w.depth, -1) AS depth, - :action AS action, - CASE - WHEN c.parent IS NULL THEN '/' - WHEN c.child IS NULL THEN '/' || c.parent - ELSE '/' || c.parent || '/' || c.child - END AS resource - FROM cands c - LEFT JOIN winner w - ON ((w.parent = c.parent) OR (w.parent IS NULL AND c.parent IS NULL)) - AND ((w.child = c.child ) OR (w.child IS NULL AND c.child IS NULL)) - ORDER BY c.parent, c.child - """ - - # If there are restriction filters, wrap the query with INTERSECT - # This ensures only resources in the restriction allowlist are returned - if restriction_sqls: - # Start with the main query, but select only parent/child for the INTERSECT - main_query_for_intersect = f""" - WITH - cands AS ( - {candidate_sql} - ), - rules AS ( - {union_sql} - ), - matched AS ( - SELECT - c.parent, c.child, - r.allow, r.reason, r.source_plugin, - CASE - WHEN r.child IS NOT NULL THEN 2 -- child-level (most specific) - WHEN r.parent IS NOT NULL THEN 1 -- parent-level - ELSE 0 -- root/global - END AS depth - FROM cands c - JOIN rules r - ON (r.parent IS NULL OR r.parent = c.parent) - AND (r.child IS NULL OR r.child = c.child) - ), - ranked AS ( - SELECT *, - ROW_NUMBER() OVER ( - PARTITION BY parent, child - ORDER BY - depth DESC, -- specificity first - CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth - source_plugin -- stable tie-break - ) AS rn - FROM matched - ), - winner AS ( - SELECT parent, child, - allow, reason, source_plugin, depth - FROM ranked WHERE rn = 1 - ), - permitted_resources AS ( - SELECT c.parent, c.child - FROM cands c - LEFT JOIN winner w - ON ((w.parent = c.parent) OR (w.parent IS NULL AND c.parent IS NULL)) - AND ((w.child = c.child ) OR (w.child IS NULL AND c.child IS NULL)) - WHERE COALESCE(w.allow, CASE WHEN :implicit_deny THEN 0 ELSE NULL END) = 1 - ) - SELECT parent, child FROM permitted_resources - """ - - # Build restriction list with INTERSECT (all must match) - # Then filter to resources that match hierarchically - # Wrap each restriction_sql in a subquery to avoid operator precedence issues - # with UNION ALL inside the restriction SQL statements - restriction_intersect = "\nINTERSECT\n".join( - f"SELECT * FROM ({sql})" for sql in restriction_sqls - ) - - # Combine: resources allowed by permissions AND in restriction allowlist - # Database-level restrictions (parent, NULL) should match all children (parent, *) - filtered_resources = f""" - WITH restriction_list AS ( - {restriction_intersect} - ), - permitted AS ( - {main_query_for_intersect} - ), - filtered AS ( - SELECT p.parent, p.child - FROM permitted p - WHERE EXISTS ( - SELECT 1 FROM restriction_list r - WHERE (r.parent = p.parent OR r.parent IS NULL) - AND (r.child = p.child OR r.child IS NULL) - ) - ) - """ - - # Now join back to get full results for only the filtered resources - sql = f""" - {filtered_resources} - , cands AS ( - {candidate_sql} - ), - rules AS ( - {union_sql} - ), - matched AS ( - SELECT - c.parent, c.child, - r.allow, r.reason, r.source_plugin, - CASE - WHEN r.child IS NOT NULL THEN 2 -- child-level (most specific) - WHEN r.parent IS NOT NULL THEN 1 -- parent-level - ELSE 0 -- root/global - END AS depth - FROM cands c - JOIN rules r - ON (r.parent IS NULL OR r.parent = c.parent) - AND (r.child IS NULL OR r.child = c.child) - ), - ranked AS ( - SELECT *, - ROW_NUMBER() OVER ( - PARTITION BY parent, child - ORDER BY - depth DESC, -- specificity first - CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth - source_plugin -- stable tie-break - ) AS rn - FROM matched - ), - winner AS ( - SELECT parent, child, - allow, reason, source_plugin, depth - FROM ranked WHERE rn = 1 - ) - SELECT - c.parent, c.child, - COALESCE(w.allow, CASE WHEN :implicit_deny THEN 0 ELSE NULL END) AS allow, - COALESCE(w.reason, CASE WHEN :implicit_deny THEN 'implicit deny' ELSE NULL END) AS reason, - w.source_plugin, - COALESCE(w.depth, -1) AS depth, - :action AS action, - CASE - WHEN c.parent IS NULL THEN '/' - WHEN c.child IS NULL THEN '/' || c.parent - ELSE '/' || c.parent || '/' || c.child - END AS resource - FROM filtered c - LEFT JOIN winner w - ON ((w.parent = c.parent) OR (w.parent IS NULL AND c.parent IS NULL)) - AND ((w.child = c.child ) OR (w.child IS NULL AND c.child IS NULL)) - ORDER BY c.parent, c.child - """ - - rows_iter: Iterable[sqlite3.Row] = await db.execute( - sql, - {**all_params, "implicit_deny": 1 if implicit_deny else 0}, - ) - return [dict(r) for r in rows_iter] - - -async def resolve_permissions_with_candidates( - db, - actor: dict | None, - plugins: Sequence[Any], - candidates: List[Tuple[str, str | None]], - action: str, - *, - implicit_deny: bool = True, -) -> List[Dict[str, Any]]: - """ - Resolve permissions without any external candidate table by embedding - the candidates as a UNION of parameterized SELECTs in a CTE. - - candidates: list of (parent, child) where child can be None for parent-scoped actions. - actor: actor dict (or None), made available as :actor (JSON), :actor_id, and :action - """ - # Build a small CTE for candidates. - cand_rows_sql: List[str] = [] - cand_params: Dict[str, Any] = {} - for i, (parent, child) in enumerate(candidates): - pkey = f"cand_p_{i}" - ckey = f"cand_c_{i}" - cand_params[pkey] = parent - cand_params[ckey] = child - cand_rows_sql.append(f"SELECT :{pkey} AS parent, :{ckey} AS child") - candidate_sql = ( - "\nUNION ALL\n".join(cand_rows_sql) - if cand_rows_sql - else "SELECT NULL AS parent, NULL AS child WHERE 0" - ) - - return await resolve_permissions_from_catalog( - db, - actor, - plugins, - action, - candidate_sql=candidate_sql, - candidate_params=cand_params, - implicit_deny=implicit_deny, - ) diff --git a/datasette/utils/shutil_backport.py b/datasette/utils/shutil_backport.py deleted file mode 100644 index d1fd1bd7..00000000 --- a/datasette/utils/shutil_backport.py +++ /dev/null @@ -1,102 +0,0 @@ -""" -Backported from Python 3.8. - -This code is licensed under the Python License: -https://github.com/python/cpython/blob/v3.8.3/LICENSE -""" - -import os -from shutil import copy, copy2, copystat, Error - - -def _copytree( - entries, - src, - dst, - symlinks, - ignore, - copy_function, - ignore_dangling_symlinks, - dirs_exist_ok=False, -): - if ignore is not None: - ignored_names = ignore(src, set(os.listdir(src))) - else: - ignored_names = set() - - os.makedirs(dst, exist_ok=dirs_exist_ok) - errors = [] - use_srcentry = copy_function is copy2 or copy_function is copy - - for srcentry in entries: - if srcentry.name in ignored_names: - continue - srcname = os.path.join(src, srcentry.name) - dstname = os.path.join(dst, srcentry.name) - srcobj = srcentry if use_srcentry else srcname - try: - if srcentry.is_symlink(): - linkto = os.readlink(srcname) - if symlinks: - os.symlink(linkto, dstname) - copystat(srcobj, dstname, follow_symlinks=not symlinks) - else: - if not os.path.exists(linkto) and ignore_dangling_symlinks: - continue - if srcentry.is_dir(): - copytree( - srcobj, - dstname, - symlinks, - ignore, - copy_function, - dirs_exist_ok=dirs_exist_ok, - ) - else: - copy_function(srcobj, dstname) - elif srcentry.is_dir(): - copytree( - srcobj, - dstname, - symlinks, - ignore, - copy_function, - dirs_exist_ok=dirs_exist_ok, - ) - else: - copy_function(srcentry, dstname) - except Error as err: - errors.extend(err.args[0]) - except OSError as why: - errors.append((srcname, dstname, str(why))) - try: - copystat(src, dst) - except OSError as why: - # Copying file access times may fail on Windows - if getattr(why, "winerror", None) is None: - errors.append((src, dst, str(why))) - if errors: - raise Error(errors) - return dst - - -def copytree( - src, - dst, - symlinks=False, - ignore=None, - copy_function=copy2, - ignore_dangling_symlinks=False, - dirs_exist_ok=False, -): - with os.scandir(src) as entries: - return _copytree( - entries=entries, - src=src, - dst=dst, - symlinks=symlinks, - ignore=ignore, - copy_function=copy_function, - ignore_dangling_symlinks=ignore_dangling_symlinks, - dirs_exist_ok=dirs_exist_ok, - ) diff --git a/datasette/utils/sqlite.py b/datasette/utils/sqlite.py deleted file mode 100644 index 342ff3fa..00000000 --- a/datasette/utils/sqlite.py +++ /dev/null @@ -1,39 +0,0 @@ -using_pysqlite3 = False -try: - import pysqlite3 as sqlite3 - - using_pysqlite3 = True -except ImportError: - import sqlite3 - -if hasattr(sqlite3, "enable_callback_tracebacks"): - sqlite3.enable_callback_tracebacks(True) - -_cached_sqlite_version = None - - -def sqlite_version(): - global _cached_sqlite_version - if _cached_sqlite_version is None: - _cached_sqlite_version = _sqlite_version() - return _cached_sqlite_version - - -def _sqlite_version(): - return tuple( - map( - int, - sqlite3.connect(":memory:") - .execute("select sqlite_version()") - .fetchone()[0] - .split("."), - ) - ) - - -def supports_table_xinfo(): - return sqlite_version() >= (3, 26, 0) - - -def supports_generated_columns(): - return sqlite_version() >= (3, 31, 0) diff --git a/datasette/utils/testing.py b/datasette/utils/testing.py deleted file mode 100644 index 1606da05..00000000 --- a/datasette/utils/testing.py +++ /dev/null @@ -1,181 +0,0 @@ -from asgiref.sync import async_to_sync -from urllib.parse import urlencode -import json - -# These wrapper classes pre-date the introduction of -# datasette.client and httpx to Datasette. They could -# be removed if the Datasette tests are modified to -# call datasette.client directly. - - -class TestResponse: - def __init__(self, httpx_response): - self.httpx_response = httpx_response - - @property - def status(self): - return self.httpx_response.status_code - - # Supports both for test-writing convenience - @property - def status_code(self): - return self.status - - @property - def headers(self): - return self.httpx_response.headers - - @property - def body(self): - return self.httpx_response.content - - @property - def content(self): - return self.body - - @property - def cookies(self): - return dict(self.httpx_response.cookies) - - @property - def json(self): - return json.loads(self.text) - - @property - def text(self): - return self.body.decode("utf8") - - -class TestClient: - max_redirects = 5 - - def __init__(self, ds): - self.ds = ds - - def actor_cookie(self, actor): - return self.ds.sign({"a": actor}, "actor") - - @async_to_sync - async def get( - self, - path, - follow_redirects=False, - redirect_count=0, - method="GET", - params=None, - cookies=None, - if_none_match=None, - headers=None, - ): - if params: - path += "?" + urlencode(params, doseq=True) - return await self._request( - path=path, - follow_redirects=follow_redirects, - redirect_count=redirect_count, - method=method, - cookies=cookies, - if_none_match=if_none_match, - headers=headers, - ) - - @async_to_sync - async def post( - self, - path, - post_data=None, - body=None, - follow_redirects=False, - redirect_count=0, - content_type="application/x-www-form-urlencoded", - cookies=None, - headers=None, - csrftoken_from=None, - ): - cookies = cookies or {} - post_data = post_data or {} - assert not (post_data and body), "Provide one or other of body= or post_data=" - # Maybe fetch a csrftoken first - if csrftoken_from is not None: - assert body is None, "body= is not compatible with csrftoken_from=" - if csrftoken_from is True: - csrftoken_from = path - token_response = await self._request(csrftoken_from, cookies=cookies) - csrftoken = token_response.cookies["ds_csrftoken"] - cookies["ds_csrftoken"] = csrftoken - post_data["csrftoken"] = csrftoken - if post_data: - body = urlencode(post_data, doseq=True) - return await self._request( - path=path, - follow_redirects=follow_redirects, - redirect_count=redirect_count, - method="POST", - cookies=cookies, - headers=headers, - post_body=body, - content_type=content_type, - ) - - @async_to_sync - async def request( - self, - path, - follow_redirects=True, - redirect_count=0, - method="GET", - cookies=None, - headers=None, - post_body=None, - content_type=None, - if_none_match=None, - ): - return await self._request( - path, - follow_redirects=follow_redirects, - redirect_count=redirect_count, - method=method, - cookies=cookies, - headers=headers, - post_body=post_body, - content_type=content_type, - if_none_match=if_none_match, - ) - - async def _request( - self, - path, - follow_redirects=True, - redirect_count=0, - method="GET", - cookies=None, - headers=None, - post_body=None, - content_type=None, - if_none_match=None, - ): - await self.ds.invoke_startup() - headers = headers or {} - if content_type: - headers["content-type"] = content_type - if if_none_match: - headers["if-none-match"] = if_none_match - httpx_response = await self.ds.client.request( - method, - path, - follow_redirects=follow_redirects, - avoid_path_rewrites=True, - cookies=cookies, - headers=headers, - content=post_body, - ) - response = TestResponse(httpx_response) - if follow_redirects and response.status in (301, 302): - assert ( - redirect_count < self.max_redirects - ), f"Redirected {redirect_count} times, max_redirects={self.max_redirects}" - location = response.headers["Location"] - return await self._request( - location, follow_redirects=True, redirect_count=redirect_count + 1 - ) - return response diff --git a/datasette/version.py b/datasette/version.py index fff37a72..e1fed2c4 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,6 @@ -__version__ = "1.0a23" +from ._version import get_versions + +__version__ = get_versions()["version"] +del get_versions + __version_info__ = tuple(__version__.split(".")) diff --git a/datasette/views/__init__.py b/datasette/views/__init__.py index 88106737..e69de29b 100644 --- a/datasette/views/__init__.py +++ b/datasette/views/__init__.py @@ -1,2 +0,0 @@ -class Context: - "Base class for all documented contexts" diff --git a/datasette/views/base.py b/datasette/views/base.py index 5216924f..4d783a07 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -1,36 +1,43 @@ import asyncio import csv -import hashlib -import json -import sys -import textwrap +import itertools +import re import time import urllib -from markupsafe import escape +import jinja2 +import pint -from datasette.database import QueryInterrupted -from datasette.utils.asgi import Request +from html import escape + +from datasette import __version__ +from datasette.plugins import pm from datasette.utils import ( - add_cors_headers, - await_me_maybe, - EscapeHtmlWriter, + QueryInterrupted, InvalidSql, LimitedWriter, - call_with_supported_arguments, - path_from_row_pks, + format_bytes, + is_url, path_with_added_args, path_with_removed_args, path_with_format, + resolve_table_and_format, sqlite3, + to_css_class, ) from datasette.utils.asgi import ( AsgiStream, + AsgiWriter, + AsgiRouter, + AsgiView, NotFound, Response, - BadRequest, ) +ureg = pint.UnitRegistry() + +HASH_LENGTH = 7 + class DatasetteError(Exception): def __init__( @@ -40,237 +47,390 @@ class DatasetteError(Exception): error_dict=None, status=500, template=None, - message_is_html=False, + messagge_is_html=False, ): self.message = message self.title = title self.error_dict = error_dict or {} self.status = status - self.message_is_html = message_is_html + self.messagge_is_html = messagge_is_html -class View: - async def head(self, request, datasette): - if not hasattr(self, "get"): - return await self.method_not_allowed(request) - response = await self.get(request, datasette) - response.body = "" - return response - - async def method_not_allowed(self, request): - if ( - request.path.endswith(".json") - or request.headers.get("content-type") == "application/json" - ): - response = Response.json( - {"ok": False, "error": "Method not allowed"}, status=405 - ) - else: - response = Response.text("Method not allowed", status=405) - return response - - async def options(self, request, datasette): - response = Response.text("ok") - response.headers["allow"] = ", ".join( - method.upper() - for method in ("head", "get", "post", "put", "patch", "delete") - if hasattr(self, method) - ) - return response - - async def __call__(self, request, datasette): - try: - handler = getattr(self, request.method.lower()) - except AttributeError: - return await self.method_not_allowed(request) - return await handler(request, datasette) - - -class BaseView: +class BaseView(AsgiView): ds = None - has_json_alternate = True - - def __init__(self, datasette): - self.ds = datasette async def head(self, *args, **kwargs): response = await self.get(*args, **kwargs) response.body = b"" return response - async def method_not_allowed(self, request): - if ( - request.path.endswith(".json") - or request.headers.get("content-type") == "application/json" + def _asset_urls(self, key, template, context): + # Flatten list-of-lists from plugins: + seen_urls = set() + for url_or_dict in itertools.chain( + itertools.chain.from_iterable( + getattr(pm.hook, key)( + template=template.name, + database=context.get("database"), + table=context.get("table"), + datasette=self.ds, + ) + ), + (self.ds.metadata(key) or []), ): - response = Response.json( - {"ok": False, "error": "Method not allowed"}, status=405 - ) + if isinstance(url_or_dict, dict): + url = url_or_dict["url"] + sri = url_or_dict.get("sri") + else: + url = url_or_dict + sri = None + if url in seen_urls: + continue + seen_urls.add(url) + if sri: + yield {"url": url, "sri": sri} + else: + yield {"url": url} + + def database_url(self, database): + db = self.ds.databases[database] + if self.ds.config("hash_urls") and db.hash: + return "/{}-{}".format(database, db.hash[:HASH_LENGTH]) else: - response = Response.text("Method not allowed", status=405) - return response + return "/{}".format(database) - async def options(self, request, *args, **kwargs): - return Response.text("ok") + def database_color(self, database): + return "ff0000" - async def get(self, request, *args, **kwargs): - return await self.method_not_allowed(request) + async def render(self, templates, request, context): + template = self.ds.jinja_env.select_template(templates) + select_templates = [ + "{}{}".format("*" if template_name == template.name else "", template_name) + for template_name in templates + ] + body_scripts = [] + # pylint: disable=no-member + for script in pm.hook.extra_body_script( + template=template.name, + database=context.get("database"), + table=context.get("table"), + view_name=self.name, + datasette=self.ds, + ): + body_scripts.append(jinja2.Markup(script)) - async def post(self, request, *args, **kwargs): - return await self.method_not_allowed(request) - - async def put(self, request, *args, **kwargs): - return await self.method_not_allowed(request) - - async def patch(self, request, *args, **kwargs): - return await self.method_not_allowed(request) - - async def delete(self, request, *args, **kwargs): - return await self.method_not_allowed(request) - - async def dispatch_request(self, request): - if self.ds: - await self.ds.refresh_schemas() - handler = getattr(self, request.method.lower(), None) - response = await handler(request) - if self.ds.cors: - add_cors_headers(response.headers) - return response - - async def render(self, templates, request, context=None): - context = context or {} - environment = self.ds.get_jinja_environment(request) - template = environment.select_template(templates) - template_context = { - **context, - **{ - "select_templates": [ - f"{'*' if template_name == template.name else ''}{template_name}" - for template_name in templates - ], - }, - } - headers = {} - if self.has_json_alternate: - alternate_url_json = self.ds.absolute_url( - request, - self.ds.urls.path(path_with_format(request=request, format="json")), + extra_template_vars = {} + # pylint: disable=no-member + for extra_vars in pm.hook.extra_template_vars( + template=template.name, + database=context.get("database"), + table=context.get("table"), + view_name=self.name, + request=request, + datasette=self.ds, + ): + if callable(extra_vars): + extra_vars = extra_vars() + if asyncio.iscoroutine(extra_vars): + extra_vars = await extra_vars + assert isinstance(extra_vars, dict), "extra_vars is of type {}".format( + type(extra_vars) ) - template_context["alternate_url_json"] = alternate_url_json - headers.update( + extra_template_vars.update(extra_vars) + + return Response.html( + template.render( { - "Link": '<{}>; rel="alternate"; type="application/json+datasette"'.format( - alternate_url_json - ) + **context, + **{ + "app_css_hash": self.ds.app_css_hash(), + "select_templates": select_templates, + "zip": zip, + "body_scripts": body_scripts, + "extra_css_urls": self._asset_urls( + "extra_css_urls", template, context + ), + "extra_js_urls": self._asset_urls( + "extra_js_urls", template, context + ), + "format_bytes": format_bytes, + "database_url": self.database_url, + "database_color": self.database_color, + }, + **extra_template_vars, } ) - return Response.html( - await self.ds.render_template( - template, - template_context, - request=request, - view_name=self.name, - ), - headers=headers, ) - @classmethod - def as_view(cls, *class_args, **class_kwargs): - async def view(request, send): - self = view.view_class(*class_args, **class_kwargs) - return await self.dispatch_request(request) - - view.view_class = cls - view.__doc__ = cls.__doc__ - view.__module__ = cls.__module__ - view.__name__ = cls.__name__ - return view - class DataView(BaseView): name = "" + re_named_parameter = re.compile(":([a-zA-Z0-9_]+)") + + def __init__(self, datasette): + self.ds = datasette + + def options(self, request, *args, **kwargs): + r = Response.text("ok") + if self.ds.cors: + r.headers["Access-Control-Allow-Origin"] = "*" + return r def redirect(self, request, path, forward_querystring=True, remove_args=None): if request.query_string and "?" not in path and forward_querystring: - path = f"{path}?{request.query_string}" + path = "{}?{}".format(path, request.query_string) if remove_args: path = path_with_removed_args(request, remove_args, path=path) r = Response.redirect(path) - r.headers["Link"] = f"<{path}>; rel=preload" + r.headers["Link"] = "<{}>; rel=preload".format(path) if self.ds.cors: - add_cors_headers(r.headers) + r.headers["Access-Control-Allow-Origin"] = "*" return r - async def data(self, request): + async def data(self, request, database, hash, **kwargs): raise NotImplementedError - async def as_csv(self, request, database): - return await stream_csv(self.ds, self.data, request, database) - - async def get(self, request): - db = await self.ds.resolve_database(request) - database = db.name - database_route = db.route - - _format = request.url_vars["format"] - data_kwargs = {} - - if _format == "csv": - return await self.as_csv(request, database_route) - - if _format is None: - # HTML views default to expanding all foreign key labels - data_kwargs["default_labels"] = True - - extra_template_data = {} - start = time.perf_counter() - status_code = None - templates = [] + async def resolve_db_name(self, request, db_name, **kwargs): + hash = None + name = None + if "-" in db_name: + # Might be name-and-hash, or might just be + # a name with a hyphen in it + name, hash = db_name.rsplit("-", 1) + if name not in self.ds.databases: + # Try the whole name + name = db_name + hash = None + else: + name = db_name + if "%" in name: + name = urllib.parse.unquote_plus(name) + # Verify the hash try: - response_or_template_contexts = await self.data(request, **data_kwargs) + db = self.ds.databases[name] + except KeyError: + raise NotFound("Database not found: {}".format(name)) + + expected = "000" + if db.hash is not None: + expected = db.hash[:HASH_LENGTH] + correct_hash_provided = expected == hash + + if not correct_hash_provided: + if "table_and_format" in kwargs: + + async def async_table_exists(t): + return await db.table_exists(t) + + table, _format = await resolve_table_and_format( + table_and_format=urllib.parse.unquote_plus( + kwargs["table_and_format"] + ), + table_exists=async_table_exists, + allowed_formats=self.ds.renderers.keys(), + ) + kwargs["table"] = table + if _format: + kwargs["as_format"] = ".{}".format(_format) + elif kwargs.get("table"): + kwargs["table"] = urllib.parse.unquote_plus(kwargs["table"]) + + should_redirect = "/{}-{}".format(name, expected) + if kwargs.get("table"): + should_redirect += "/" + urllib.parse.quote_plus(kwargs["table"]) + if kwargs.get("pk_path"): + should_redirect += "/" + kwargs["pk_path"] + if kwargs.get("as_format"): + should_redirect += kwargs["as_format"] + if kwargs.get("as_db"): + should_redirect += kwargs["as_db"] + + if ( + (self.ds.config("hash_urls") or "_hash" in request.args) + and + # Redirect only if database is immutable + not self.ds.databases[name].is_mutable + ): + return name, expected, correct_hash_provided, should_redirect + + return name, expected, correct_hash_provided, None + + def get_templates(self, database, table=None): + assert NotImplemented + + async def get(self, request, db_name, **kwargs): + database, hash, correct_hash_provided, should_redirect = await self.resolve_db_name( + request, db_name, **kwargs + ) + if should_redirect: + return self.redirect(request, should_redirect, remove_args={"_hash"}) + + return await self.view_get( + request, database, hash, correct_hash_provided, **kwargs + ) + + async def as_csv(self, request, database, hash, **kwargs): + stream = request.args.get("_stream") + if stream: + # Some quick sanity checks + if not self.ds.config("allow_csv_stream"): + raise DatasetteError("CSV streaming is disabled", status=400) + if request.args.get("_next"): + raise DatasetteError("_next not allowed for CSV streaming", status=400) + kwargs["_size"] = "max" + # Fetch the first page + try: + response_or_template_contexts = await self.data( + request, database, hash, **kwargs + ) if isinstance(response_or_template_contexts, Response): return response_or_template_contexts - # If it has four items, it includes an HTTP status code - if len(response_or_template_contexts) == 4: - ( - data, - extra_template_data, - templates, - status_code, - ) = response_or_template_contexts else: - data, extra_template_data, templates = response_or_template_contexts - except QueryInterrupted as ex: - raise DatasetteError( - textwrap.dedent( - """ -

    SQL query took too long. The time limit is controlled by the - sql_time_limit_ms - configuration option.

    - - - """.format( - escape(ex.sql) - ) - ).strip(), - title="SQL Interrupted", - status=400, - message_is_html=True, - ) + data, _, _ = response_or_template_contexts except (sqlite3.OperationalError, InvalidSql) as e: raise DatasetteError(str(e), title="Invalid SQL", status=400) - except sqlite3.OperationalError as e: + except (sqlite3.OperationalError) as e: raise DatasetteError(str(e)) except DatasetteError: raise - end = time.perf_counter() + # Convert rows and columns to CSV + headings = data["columns"] + # if there are expanded_columns we need to add additional headings + expanded_columns = set(data.get("expanded_columns") or []) + if expanded_columns: + headings = [] + for column in data["columns"]: + headings.append(column) + if column in expanded_columns: + headings.append("{}_label".format(column)) + + async def stream_fn(r): + nonlocal data + writer = csv.writer(LimitedWriter(r, self.ds.config("max_csv_mb"))) + first = True + next = None + while first or (next and stream): + try: + if next: + kwargs["_next"] = next + if not first: + data, _, _ = await self.data(request, database, hash, **kwargs) + if first: + await writer.writerow(headings) + first = False + next = data.get("next") + for row in data["rows"]: + if not expanded_columns: + # Simple path + await writer.writerow(row) + else: + # Look for {"value": "label": } dicts and expand + new_row = [] + for cell in row: + if isinstance(cell, dict): + new_row.append(cell["value"]) + new_row.append(cell["label"]) + else: + new_row.append(cell) + await writer.writerow(new_row) + except Exception as e: + print("caught this", e) + await r.write(str(e)) + return + + content_type = "text/plain; charset=utf-8" + headers = {} + if self.ds.cors: + headers["Access-Control-Allow-Origin"] = "*" + if request.args.get("_dl", None): + content_type = "text/csv; charset=utf-8" + disposition = 'attachment; filename="{}.csv"'.format( + kwargs.get("table", database) + ) + headers["Content-Disposition"] = disposition + + return AsgiStream(stream_fn, headers=headers, content_type=content_type) + + async def get_format(self, request, database, args): + """ Determine the format of the response from the request, from URL + parameters or from a file extension. + + `args` is a dict of the path components parsed from the URL by the router. + """ + # If ?_format= is provided, use that as the format + _format = request.args.get("_format", None) + if not _format: + _format = (args.pop("as_format", None) or "").lstrip(".") + if "table_and_format" in args: + db = self.ds.databases[database] + + async def async_table_exists(t): + return await db.table_exists(t) + + table, _ext_format = await resolve_table_and_format( + table_and_format=urllib.parse.unquote_plus(args["table_and_format"]), + table_exists=async_table_exists, + allowed_formats=self.ds.renderers.keys(), + ) + _format = _format or _ext_format + args["table"] = table + del args["table_and_format"] + elif "table" in args: + args["table"] = urllib.parse.unquote_plus(args["table"]) + return _format, args + + async def view_get(self, request, database, hash, correct_hash_provided, **kwargs): + _format, kwargs = await self.get_format(request, database, kwargs) + + if _format == "csv": + return await self.as_csv(request, database, hash, **kwargs) + + if _format is None: + # HTML views default to expanding all foriegn key labels + kwargs["default_labels"] = True + + extra_template_data = {} + start = time.time() + status_code = 200 + templates = [] + try: + response_or_template_contexts = await self.data( + request, database, hash, **kwargs + ) + if isinstance(response_or_template_contexts, Response): + return response_or_template_contexts + + else: + data, extra_template_data, templates = response_or_template_contexts + except QueryInterrupted: + raise DatasetteError( + """ + SQL query took too long. The time limit is controlled by the + sql_time_limit_ms + configuration option. + """, + title="SQL Interrupted", + status=400, + messagge_is_html=True, + ) + except (sqlite3.OperationalError, InvalidSql) as e: + raise DatasetteError(str(e), title="Invalid SQL", status=400) + + except (sqlite3.OperationalError) as e: + raise DatasetteError(str(e)) + + except DatasetteError: + raise + + end = time.time() data["query_ms"] = (end - start) * 1000 + for key in ("source", "source_url", "license", "license_url"): + value = self.ds.metadata(key) + if value: + data[key] = value # Special case for .jsono extension - redirect to _shape=objects if _format == "jsono": @@ -287,41 +447,15 @@ class DataView(BaseView): if _format in self.ds.renderers.keys(): # Dispatch request to the correct output format renderer # (CSV is not handled here due to streaming) - result = call_with_supported_arguments( - self.ds.renderers[_format][0], - datasette=self.ds, - columns=data.get("columns") or [], - rows=data.get("rows") or [], - sql=data.get("query", {}).get("sql", None), - query_name=data.get("query_name"), - database=database, - table=data.get("table"), - request=request, - view_name=self.name, - truncated=False, # TODO: support this - error=data.get("error"), - # These will be deprecated in Datasette 1.0: - args=request.args, - data=data, - ) - if asyncio.iscoroutine(result): - result = await result + result = self.ds.renderers[_format](request.args, data, self.name) if result is None: raise NotFound("No data") - if isinstance(result, dict): - r = Response( - body=result.get("body"), - status=result.get("status_code", status_code or 200), - content_type=result.get("content_type", "text/plain"), - headers=result.get("headers"), - ) - elif isinstance(result, Response): - r = result - if status_code is not None: - # Over-ride the status code - r.status = status_code - else: - assert False, f"{result} should be dict or Response" + + r = Response( + body=result.get("body"), + status=result.get("status_code", 200), + content_type=result.get("content_type", "text/plain"), + ) else: extras = {} if callable(extra_template_data): @@ -334,32 +468,12 @@ class DataView(BaseView): if data.get("expandable_columns"): url_labels_extra = {"_labels": "on"} - renderers = {} - for key, (_, can_render) in self.ds.renderers.items(): - it_can_render = call_with_supported_arguments( - can_render, - datasette=self.ds, - columns=data.get("columns") or [], - rows=data.get("rows") or [], - sql=data.get("query", {}).get("sql", None), - query_name=data.get("query_name"), - database=database, - table=data.get("table"), - request=request, - view_name=self.name, - ) - it_can_render = await await_me_maybe(it_can_render) - if it_can_render: - renderers[key] = self.ds.urls.path( - path_with_format( - request=request, format=key, extra_qs={**url_labels_extra} - ) - ) - + renderers = { + key: path_with_format(request, key, {**url_labels_extra}) + for key in self.ds.renderers.keys() + } url_csv_args = {"_size": "max", **url_labels_extra} - url_csv = self.ds.urls.path( - path_with_format(request=request, format="csv", extra_qs=url_csv_args) - ) + url_csv = path_with_format(request, "csv", url_csv_args) url_csv_path = url_csv.split("?")[0] context = { **data, @@ -374,18 +488,21 @@ class DataView(BaseView): if key not in ("_labels", "_facet", "_size") ] + [("_size", "max")], - "settings": self.ds.settings_dict(), + "datasette_version": __version__, + "config": self.ds.config_dict(), }, } if "metadata" not in context: - context["metadata"] = await self.ds.get_instance_metadata() + context["metadata"] = self.ds.metadata r = await self.render(templates, request=request, context=context) - if status_code is not None: - r.status = status_code + r.status = status_code ttl = request.args.get("_ttl", None) if ttl is None or not ttl.isdigit(): - ttl = self.ds.setting("default_cache_ttl") + if correct_hash_provided: + ttl = self.ds.config("default_cache_ttl_hashed") + else: + ttl = self.ds.config("default_cache_ttl") return self.set_response_headers(r, ttl) @@ -396,180 +513,110 @@ class DataView(BaseView): if ttl == 0: ttl_header = "no-cache" else: - ttl_header = f"max-age={ttl}" + ttl_header = "max-age={}".format(ttl) response.headers["Cache-Control"] = ttl_header response.headers["Referrer-Policy"] = "no-referrer" if self.ds.cors: - add_cors_headers(response.headers) + response.headers["Access-Control-Allow-Origin"] = "*" return response + async def custom_sql( + self, + request, + database, + hash, + sql, + editable=True, + canned_query=None, + metadata=None, + _size=None, + ): + params = request.raw_args + if "sql" in params: + params.pop("sql") + if "_shape" in params: + params.pop("_shape") + # Extract any :named parameters + named_parameters = self.re_named_parameter.findall(sql) + named_parameter_values = { + named_parameter: params.get(named_parameter) or "" + for named_parameter in named_parameters + } -def _error(messages, status=400): - return Response.json({"ok": False, "errors": messages}, status=status) + # Set to blank string if missing from params + for named_parameter in named_parameters: + if named_parameter not in params: + params[named_parameter] = "" - -async def stream_csv(datasette, fetch_data, request, database): - kwargs = {} - stream = request.args.get("_stream") - # Do not calculate facets or counts: - extra_parameters = [ - "{}=1".format(key) - for key in ("_nofacet", "_nocount") - if not request.args.get(key) - ] - if extra_parameters: - # Replace request object with a new one with modified scope - if not request.query_string: - new_query_string = "&".join(extra_parameters) - else: - new_query_string = request.query_string + "&" + "&".join(extra_parameters) - new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1")) - receive = request.receive - request = Request(new_scope, receive) - if stream: - # Some quick soundness checks - if not datasette.setting("allow_csv_stream"): - raise BadRequest("CSV streaming is disabled") - if request.args.get("_next"): - raise BadRequest("_next not allowed for CSV streaming") - kwargs["_size"] = "max" - # Fetch the first page - try: - response_or_template_contexts = await fetch_data(request) - if isinstance(response_or_template_contexts, Response): - return response_or_template_contexts - elif len(response_or_template_contexts) == 4: - data, _, _, _ = response_or_template_contexts - else: - data, _, _ = response_or_template_contexts - except (sqlite3.OperationalError, InvalidSql) as e: - raise DatasetteError(str(e), title="Invalid SQL", status=400) - - except sqlite3.OperationalError as e: - raise DatasetteError(str(e)) - - except DatasetteError: - raise - - # Convert rows and columns to CSV - headings = data["columns"] - # if there are expanded_columns we need to add additional headings - expanded_columns = set(data.get("expanded_columns") or []) - if expanded_columns: - headings = [] - for column in data["columns"]: - headings.append(column) - if column in expanded_columns: - headings.append(f"{column}_label") - - content_type = "text/plain; charset=utf-8" - preamble = "" - postamble = "" - - trace = request.args.get("_trace") - if trace: - content_type = "text/html; charset=utf-8" - preamble = ( - "CSV debug" - '" + columns = [r[0] for r in results.description] - async def stream_fn(r): - nonlocal data, trace - limited_writer = LimitedWriter(r, datasette.setting("max_csv_mb")) - if trace: - await limited_writer.write(preamble) - writer = csv.writer(EscapeHtmlWriter(limited_writer)) - else: - writer = csv.writer(limited_writer) - first = True - next = None - while first or (next and stream): - try: - kwargs = {} - if next: - kwargs["_next"] = next - if not first: - data, _, _ = await fetch_data(request, **kwargs) - if first: - if request.args.get("_header") != "off": - await writer.writerow(headings) - first = False - next = data.get("next") - for row in data["rows"]: - if any(isinstance(r, bytes) for r in row): - new_row = [] - for column, cell in zip(headings, row): - if isinstance(cell, bytes): - # If this is a table page, use .urls.row_blob() - if data.get("table"): - pks = data.get("primary_keys") or [] - cell = datasette.absolute_url( - request, - datasette.urls.row_blob( - database, - data["table"], - path_from_row_pks(row, pks, not pks), - column, - ), - ) - else: - # Otherwise generate URL for this query - url = datasette.absolute_url( - request, - path_with_format( - request=request, - format="blob", - extra_qs={ - "_blob_column": column, - "_blob_hash": hashlib.sha256( - cell - ).hexdigest(), - }, - replace_format="csv", - ), - ) - cell = url.replace("&_nocount=1", "").replace( - "&_nofacet=1", "" - ) - new_row.append(cell) - row = new_row - if not expanded_columns: - # Simple path - await writer.writerow(row) + templates = ["query-{}.html".format(to_css_class(database)), "query.html"] + if canned_query: + templates.insert( + 0, + "query-{}-{}.html".format( + to_css_class(database), to_css_class(canned_query) + ), + ) + + async def extra_template(): + display_rows = [] + for row in results.rows: + display_row = [] + for column, value in zip(results.columns, row): + display_value = value + # Let the plugins have a go + # pylint: disable=no-member + plugin_value = pm.hook.render_cell( + value=value, + column=column, + table=None, + database=database, + datasette=self.ds, + ) + if plugin_value is not None: + display_value = plugin_value else: - # Look for {"value": "label": } dicts and expand - new_row = [] - for heading, cell in zip(data["columns"], row): - if heading in expanded_columns: - if cell is None: - new_row.extend(("", "")) - else: - if not isinstance(cell, dict): - new_row.extend((cell, "")) - else: - new_row.append(cell["value"]) - new_row.append(cell["label"]) - else: - new_row.append(cell) - await writer.writerow(new_row) - except Exception as ex: - sys.stderr.write("Caught this error: {}\n".format(ex)) - sys.stderr.flush() - await r.write(str(ex)) - return - await limited_writer.write(postamble) + if value in ("", None): + display_value = jinja2.Markup(" ") + elif is_url(str(display_value).strip()): + display_value = jinja2.Markup( + '{url}'.format( + url=jinja2.escape(value.strip()) + ) + ) + display_row.append(display_value) + display_rows.append(display_row) + return { + "display_rows": display_rows, + "custom_sql": True, + "named_parameter_values": named_parameter_values, + "editable": editable, + "canned_query": canned_query, + "metadata": metadata, + "config": self.ds.config_dict(), + "request": request, + "path_with_added_args": path_with_added_args, + "path_with_removed_args": path_with_removed_args, + "hide_sql": "_hide_sql" in params, + } - headers = {} - if datasette.cors: - add_cors_headers(headers) - if request.args.get("_dl", None): - if not trace: - content_type = "text/csv; charset=utf-8" - disposition = 'attachment; filename="{}.csv"'.format( - request.url_vars.get("table", database) + return ( + { + "database": database, + "rows": results.rows, + "truncated": results.truncated, + "columns": columns, + "query": {"sql": sql, "params": params}, + }, + extra_template, + templates, ) - headers["content-disposition"] = disposition - - return AsgiStream(stream_fn, headers=headers, content_type=content_type) diff --git a/datasette/views/database.py b/datasette/views/database.py index 51c752a0..ce9498c5 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1,1256 +1,86 @@ -from dataclasses import dataclass, field -from urllib.parse import parse_qsl, urlencode -import asyncio -import hashlib -import itertools -import json -import markupsafe import os -import re -import sqlite_utils -import textwrap -from datasette.events import AlterTableEvent, CreateTableEvent, InsertRowsEvent -from datasette.database import QueryInterrupted -from datasette.resources import DatabaseResource, QueryResource -from datasette.utils import ( - add_cors_headers, - await_me_maybe, - call_with_supported_arguments, - named_parameters as derive_named_parameters, - format_bytes, - make_slot_function, - tilde_decode, - to_css_class, - validate_sql_select, - is_url, - path_with_added_args, - path_with_format, - path_with_removed_args, - sqlite3, - truncate_url, - InvalidSql, -) -from datasette.utils.asgi import AsgiFileDownload, NotFound, Response, Forbidden -from datasette.plugins import pm +from datasette.utils import to_css_class, validate_sql_select +from datasette.utils.asgi import AsgiFileDownload -from .base import BaseView, DatasetteError, View, _error, stream_csv -from . import Context +from .base import DatasetteError, DataView -class DatabaseView(View): - async def get(self, request, datasette): - format_ = request.url_vars.get("format") or "html" +class DatabaseView(DataView): + name = "database" - await datasette.refresh_schemas() + async def data(self, request, database, hash, default_labels=False, _size=None): + if request.args.get("sql"): + if not self.ds.config("allow_sql"): + raise DatasetteError("sql= is not allowed", status=400) + sql = request.raw_args.pop("sql") + validate_sql_select(sql) + return await self.custom_sql(request, database, hash, sql, _size=_size) - db = await datasette.resolve_database(request) - database = db.name + db = self.ds.databases[database] - visible, private = await datasette.check_visibility( - request.actor, - action="view-database", - resource=DatabaseResource(database=database), - ) - if not visible: - raise Forbidden("You do not have permission to view this database") + table_counts = await db.table_counts(5) + views = await db.view_names() + hidden_table_names = set(await db.hidden_table_names()) + all_foreign_keys = await db.get_all_foreign_keys() - sql = (request.args.get("sql") or "").strip() - if sql: - redirect_url = "/" + request.url_vars.get("database") + "/-/query" - if request.url_vars.get("format"): - redirect_url += "." + request.url_vars.get("format") - redirect_url += "?" + request.query_string - return Response.redirect(redirect_url) - return await QueryView()(request, datasette) + metadata = (self.ds.metadata("databases") or {}).get(database, {}) + self.ds.update_with_inherited_metadata(metadata) - if format_ not in ("html", "json"): - raise NotFound("Invalid format: {}".format(format_)) + tables = [] + for table in table_counts: + table_columns = await db.table_columns(table) + tables.append( + { + "name": table, + "columns": table_columns, + "primary_keys": await db.primary_keys(table), + "count": table_counts[table], + "hidden": table in hidden_table_names, + "fts_table": await db.fts_table(table), + "foreign_keys": all_foreign_keys[table], + } + ) - metadata = await datasette.get_database_metadata(database) - - # Get all tables/views this actor can see in bulk with private flag - allowed_tables_page = await datasette.allowed_resources( - "view-table", - request.actor, - parent=database, - include_is_private=True, - limit=1000, - ) - # Create lookup dict for quick access - allowed_dict = {r.child: r for r in allowed_tables_page.resources} - - # Filter to just views - view_names_set = set(await db.view_names()) - sql_views = [ - {"name": name, "private": allowed_dict[name].private} - for name in allowed_dict - if name in view_names_set - ] - - tables = await get_tables(datasette, request, db, allowed_dict) - - # Get allowed queries using the new permission system - allowed_query_page = await datasette.allowed_resources( - "view-query", - request.actor, - parent=database, - include_is_private=True, - limit=1000, - ) - - # Build canned_queries list by looking up each allowed query - all_queries = await datasette.get_canned_queries(database, request.actor) - canned_queries = [] - for query_resource in allowed_query_page.resources: - query_name = query_resource.child - if query_name in all_queries: - canned_queries.append( - dict(all_queries[query_name], private=query_resource.private) - ) - - async def database_actions(): - links = [] - for hook in pm.hook.database_actions( - datasette=datasette, - database=database, - actor=request.actor, - request=request, - ): - extra_links = await await_me_maybe(hook) - if extra_links: - links.extend(extra_links) - return links - - attached_databases = [d.name for d in await db.attached_databases()] - - allow_execute_sql = await datasette.allowed( - action="execute-sql", - resource=DatabaseResource(database=database), - actor=request.actor, - ) - json_data = { - "database": database, - "private": private, - "path": datasette.urls.database(database), - "size": db.size, - "tables": tables, - "hidden_count": len([t for t in tables if t["hidden"]]), - "views": sql_views, - "queries": canned_queries, - "allow_execute_sql": allow_execute_sql, - "table_columns": ( - await _table_columns(datasette, database) if allow_execute_sql else {} - ), - "metadata": await datasette.get_database_metadata(database), - } - - if format_ == "json": - response = Response.json(json_data) - if datasette.cors: - add_cors_headers(response.headers) - return response - - assert format_ == "html" - alternate_url_json = datasette.absolute_url( - request, - datasette.urls.path(path_with_format(request=request, format="json")), - ) - templates = (f"database-{to_css_class(database)}.html", "database.html") - environment = datasette.get_jinja_environment(request) - template = environment.select_template(templates) - return Response.html( - await datasette.render_template( - templates, - DatabaseContext( - database=database, - private=private, - path=datasette.urls.database(database), - size=db.size, - tables=tables, - hidden_count=len([t for t in tables if t["hidden"]]), - views=sql_views, - queries=canned_queries, - allow_execute_sql=allow_execute_sql, - table_columns=( - await _table_columns(datasette, database) - if allow_execute_sql - else {} - ), - metadata=metadata, - database_color=db.color, - database_actions=database_actions, - show_hidden=request.args.get("_show_hidden"), - editable=True, - count_limit=db.count_limit, - allow_download=datasette.setting("allow_download") - and not db.is_mutable - and not db.is_memory, - attached_databases=attached_databases, - alternate_url_json=alternate_url_json, - select_templates=[ - f"{'*' if template_name == template.name else ''}{template_name}" - for template_name in templates - ], - top_database=make_slot_function( - "top_database", datasette, request, database=database - ), - ), - request=request, - view_name="database", - ), - headers={ - "Link": '<{}>; rel="alternate"; type="application/json+datasette"'.format( - alternate_url_json - ) - }, - ) - - -@dataclass -class DatabaseContext(Context): - database: str = field(metadata={"help": "The name of the database"}) - private: bool = field( - metadata={"help": "Boolean indicating if this is a private database"} - ) - path: str = field(metadata={"help": "The URL path to this database"}) - size: int = field(metadata={"help": "The size of the database in bytes"}) - tables: list = field(metadata={"help": "List of table objects in the database"}) - hidden_count: int = field(metadata={"help": "Count of hidden tables"}) - views: list = field(metadata={"help": "List of view objects in the database"}) - queries: list = field(metadata={"help": "List of canned query objects"}) - allow_execute_sql: bool = field( - metadata={"help": "Boolean indicating if custom SQL can be executed"} - ) - table_columns: dict = field( - metadata={"help": "Dictionary mapping table names to their column lists"} - ) - metadata: dict = field(metadata={"help": "Metadata for the database"}) - database_color: str = field(metadata={"help": "The color assigned to the database"}) - database_actions: callable = field( - metadata={ - "help": "Callable returning list of action links for the database menu" - } - ) - show_hidden: str = field(metadata={"help": "Value of _show_hidden query parameter"}) - editable: bool = field( - metadata={"help": "Boolean indicating if the database is editable"} - ) - count_limit: int = field(metadata={"help": "The maximum number of rows to count"}) - allow_download: bool = field( - metadata={"help": "Boolean indicating if database download is allowed"} - ) - attached_databases: list = field( - metadata={"help": "List of names of attached databases"} - ) - alternate_url_json: str = field( - metadata={"help": "URL for the alternate JSON version of this page"} - ) - select_templates: list = field( - metadata={ - "help": "List of templates that were considered for rendering this page" - } - ) - top_database: callable = field( - metadata={"help": "Callable to render the top_database slot"} - ) - - -@dataclass -class QueryContext(Context): - database: str = field(metadata={"help": "The name of the database being queried"}) - database_color: str = field(metadata={"help": "The color of the database"}) - query: dict = field( - metadata={"help": "The SQL query object containing the `sql` string"} - ) - canned_query: str = field( - metadata={"help": "The name of the canned query if this is a canned query"} - ) - private: bool = field( - metadata={"help": "Boolean indicating if this is a private database"} - ) - # urls: dict = field( - # metadata={"help": "Object containing URL helpers like `database()`"} - # ) - canned_query_write: bool = field( - metadata={ - "help": "Boolean indicating if this is a canned query that allows writes" - } - ) - metadata: dict = field( - metadata={"help": "Metadata about the database or the canned query"} - ) - db_is_immutable: bool = field( - metadata={"help": "Boolean indicating if this database is immutable"} - ) - error: str = field(metadata={"help": "Any query error message"}) - hide_sql: bool = field( - metadata={"help": "Boolean indicating if the SQL should be hidden"} - ) - show_hide_link: str = field( - metadata={"help": "The URL to toggle showing/hiding the SQL"} - ) - show_hide_text: str = field( - metadata={"help": "The text for the show/hide SQL link"} - ) - editable: bool = field( - metadata={"help": "Boolean indicating if the SQL can be edited"} - ) - allow_execute_sql: bool = field( - metadata={"help": "Boolean indicating if custom SQL can be executed"} - ) - tables: list = field(metadata={"help": "List of table objects in the database"}) - named_parameter_values: dict = field( - metadata={"help": "Dictionary of parameter names/values"} - ) - edit_sql_url: str = field( - metadata={"help": "URL to edit the SQL for a canned query"} - ) - display_rows: list = field(metadata={"help": "List of result rows to display"}) - columns: list = field(metadata={"help": "List of column names"}) - renderers: dict = field(metadata={"help": "Dictionary of renderer name to URL"}) - url_csv: str = field(metadata={"help": "URL for CSV export"}) - show_hide_hidden: str = field( - metadata={"help": "Hidden input field for the _show_sql parameter"} - ) - table_columns: dict = field( - metadata={"help": "Dictionary of table name to list of column names"} - ) - alternate_url_json: str = field( - metadata={"help": "URL for alternate JSON version of this page"} - ) - # TODO: refactor this to somewhere else, probably ds.render_template() - select_templates: list = field( - metadata={ - "help": "List of templates that were considered for rendering this page" - } - ) - top_query: callable = field( - metadata={"help": "Callable to render the top_query slot"} - ) - top_canned_query: callable = field( - metadata={"help": "Callable to render the top_canned_query slot"} - ) - query_actions: callable = field( - metadata={ - "help": "Callable returning a list of links for the query action menu" - } - ) - - -async def get_tables(datasette, request, db, allowed_dict): - """ - Get list of tables with metadata for the database view. - - Args: - datasette: The Datasette instance - request: The current request - db: The database - allowed_dict: Dict mapping table name -> Resource object with .private attribute - """ - tables = [] - table_counts = await db.table_counts(100) - hidden_table_names = set(await db.hidden_table_names()) - all_foreign_keys = await db.get_all_foreign_keys() - - for table in table_counts: - if table not in allowed_dict: - continue - - table_columns = await db.table_columns(table) - tables.append( + tables.sort(key=lambda t: (t["hidden"], t["name"])) + return ( { - "name": table, - "columns": table_columns, - "primary_keys": await db.primary_keys(table), - "count": table_counts[table], - "hidden": table in hidden_table_names, - "fts_table": await db.fts_table(table), - "foreign_keys": all_foreign_keys[table], - "private": allowed_dict[table].private, - } + "database": database, + "size": db.size, + "comment": db.comment, + "tables": tables, + "hidden_count": len([t for t in tables if t["hidden"]]), + "views": views, + "queries": self.ds.get_canned_queries(database), + }, + { + "show_hidden": request.args.get("_show_hidden"), + "editable": True, + "metadata": metadata, + "allow_download": self.ds.config("allow_download") + and not db.is_mutable + and database != ":memory:", + }, + ("database-{}.html".format(to_css_class(database)), "database.html"), ) - tables.sort(key=lambda t: (t["hidden"], t["name"])) - return tables -async def database_download(request, datasette): - from datasette.resources import DatabaseResource +class DatabaseDownload(DataView): + name = "database_download" - database = tilde_decode(request.url_vars["database"]) - await datasette.ensure_permission( - action="view-database-download", - resource=DatabaseResource(database=database), - actor=request.actor, - ) - try: - db = datasette.get_database(route=database) - except KeyError: - raise DatasetteError("Invalid database", status=404) - - if db.is_memory: - raise DatasetteError("Cannot download in-memory databases", status=404) - if not datasette.setting("allow_download") or db.is_mutable: - raise Forbidden("Database download is forbidden") - if not db.path: - raise DatasetteError("Cannot download database", status=404) - filepath = db.path - headers = {} - if datasette.cors: - add_cors_headers(headers) - if db.hash: - etag = '"{}"'.format(db.hash) - headers["Etag"] = etag - # Has user seen this already? - if_none_match = request.headers.get("if-none-match") - if if_none_match and if_none_match == etag: - return Response("", status=304) - headers["Transfer-Encoding"] = "chunked" - return AsgiFileDownload( - filepath, - filename=os.path.basename(filepath), - content_type="application/octet-stream", - headers=headers, - ) - - -class QueryView(View): - async def post(self, request, datasette): - from datasette.app import TableNotFound - - db = await datasette.resolve_database(request) - - # We must be a canned query - table_found = False - try: - await datasette.resolve_table(request) - table_found = True - except TableNotFound as table_not_found: - canned_query = await datasette.get_canned_query( - table_not_found.database_name, table_not_found.table, request.actor - ) - if canned_query is None: - raise - if table_found: - # That should not have happened - raise DatasetteError("Unexpected table found on POST", status=404) - - # If database is immutable, return an error - if not db.is_mutable: - raise Forbidden("Database is immutable") - - # Process the POST - body = await request.post_body() - body = body.decode("utf-8").strip() - if body.startswith("{") and body.endswith("}"): - params = json.loads(body) - # But we want key=value strings - for key, value in params.items(): - params[key] = str(value) - else: - params = dict(parse_qsl(body, keep_blank_values=True)) - - # Don't ever send csrftoken as a SQL parameter - params.pop("csrftoken", None) - - # Should we return JSON? - should_return_json = ( - request.headers.get("accept") == "application/json" - or request.args.get("_json") - or params.get("_json") + async def view_get(self, request, database, hash, correct_hash_present, **kwargs): + if database not in self.ds.databases: + raise DatasetteError("Invalid database", status=404) + db = self.ds.databases[database] + if db.is_memory: + raise DatasetteError("Cannot download :memory: database", status=404) + if not self.ds.config("allow_download") or db.is_mutable: + raise DatasetteError("Database download is forbidden", status=403) + if not db.path: + raise DatasetteError("Cannot download database", status=404) + filepath = db.path + return AsgiFileDownload( + filepath, + filename=os.path.basename(filepath), + content_type="application/octet-stream", ) - params_for_query = MagicParameters( - canned_query["sql"], params, request, datasette - ) - await params_for_query.execute_params() - ok = None - redirect_url = None - try: - cursor = await db.execute_write(canned_query["sql"], params_for_query) - # success message can come from on_success_message or on_success_message_sql - message = None - message_type = datasette.INFO - on_success_message_sql = canned_query.get("on_success_message_sql") - if on_success_message_sql: - try: - message_result = ( - await db.execute(on_success_message_sql, params_for_query) - ).first() - if message_result: - message = message_result[0] - except Exception as ex: - message = "Error running on_success_message_sql: {}".format(ex) - message_type = datasette.ERROR - if not message: - message = canned_query.get( - "on_success_message" - ) or "Query executed, {} row{} affected".format( - cursor.rowcount, "" if cursor.rowcount == 1 else "s" - ) - - redirect_url = canned_query.get("on_success_redirect") - ok = True - except Exception as ex: - message = canned_query.get("on_error_message") or str(ex) - message_type = datasette.ERROR - redirect_url = canned_query.get("on_error_redirect") - ok = False - if should_return_json: - return Response.json( - { - "ok": ok, - "message": message, - "redirect": redirect_url, - } - ) - else: - datasette.add_message(request, message, message_type) - return Response.redirect(redirect_url or request.path) - - async def get(self, request, datasette): - from datasette.app import TableNotFound - - await datasette.refresh_schemas() - - db = await datasette.resolve_database(request) - database = db.name - - # Get all tables/views this actor can see in bulk with private flag - allowed_tables_page = await datasette.allowed_resources( - "view-table", - request.actor, - parent=database, - include_is_private=True, - limit=1000, - ) - # Create lookup dict for quick access - allowed_dict = {r.child: r for r in allowed_tables_page.resources} - - # Are we a canned query? - canned_query = None - canned_query_write = False - if "table" in request.url_vars: - try: - await datasette.resolve_table(request) - except TableNotFound as table_not_found: - # Was this actually a canned query? - canned_query = await datasette.get_canned_query( - table_not_found.database_name, table_not_found.table, request.actor - ) - if canned_query is None: - raise - canned_query_write = bool(canned_query.get("write")) - - private = False - if canned_query: - # Respect canned query permissions - visible, private = await datasette.check_visibility( - request.actor, - action="view-query", - resource=QueryResource(database=database, query=canned_query["name"]), - ) - if not visible: - raise Forbidden("You do not have permission to view this query") - - else: - await datasette.ensure_permission( - action="execute-sql", - resource=DatabaseResource(database=database), - actor=request.actor, - ) - - # Flattened because of ?sql=&name1=value1&name2=value2 feature - params = {key: request.args.get(key) for key in request.args} - sql = None - - if canned_query: - sql = canned_query["sql"] - elif "sql" in params: - sql = params.pop("sql") - - # Extract any :named parameters - named_parameters = [] - if canned_query and canned_query.get("params"): - named_parameters = canned_query["params"] - if not named_parameters: - named_parameters = derive_named_parameters(sql) - named_parameter_values = { - named_parameter: params.get(named_parameter) or "" - for named_parameter in named_parameters - if not named_parameter.startswith("_") - } - # Set to blank string if missing from params - for named_parameter in named_parameters: - if named_parameter not in params and not named_parameter.startswith("_"): - params[named_parameter] = "" - - extra_args = {} - if params.get("_timelimit"): - extra_args["custom_time_limit"] = int(params["_timelimit"]) - - format_ = request.url_vars.get("format") or "html" - - query_error = None - results = None - rows = [] - columns = [] - - params_for_query = params - - if not canned_query_write: - try: - if not canned_query: - # For regular queries we only allow SELECT, plus other rules - validate_sql_select(sql) - else: - # Canned queries can run magic parameters - params_for_query = MagicParameters(sql, params, request, datasette) - await params_for_query.execute_params() - results = await datasette.execute( - database, sql, params_for_query, truncate=True, **extra_args - ) - columns = results.columns - rows = results.rows - except QueryInterrupted as ex: - raise DatasetteError( - textwrap.dedent( - """ -

    SQL query took too long. The time limit is controlled by the - sql_time_limit_ms - configuration option.

    - - - """.format( - markupsafe.escape(ex.sql) - ) - ).strip(), - title="SQL Interrupted", - status=400, - message_is_html=True, - ) - except sqlite3.DatabaseError as ex: - query_error = str(ex) - results = None - rows = [] - columns = [] - except (sqlite3.OperationalError, InvalidSql) as ex: - raise DatasetteError(str(ex), title="Invalid SQL", status=400) - except sqlite3.OperationalError as ex: - raise DatasetteError(str(ex)) - except DatasetteError: - raise - - # Handle formats from plugins - if format_ == "csv": - - async def fetch_data_for_csv(request, _next=None): - results = await db.execute(sql, params, truncate=True) - data = {"rows": results.rows, "columns": results.columns} - return data, None, None - - return await stream_csv(datasette, fetch_data_for_csv, request, db.name) - elif format_ in datasette.renderers.keys(): - # Dispatch request to the correct output format renderer - # (CSV is not handled here due to streaming) - result = call_with_supported_arguments( - datasette.renderers[format_][0], - datasette=datasette, - columns=columns, - rows=rows, - sql=sql, - query_name=canned_query["name"] if canned_query else None, - database=database, - table=None, - request=request, - view_name="table", - truncated=results.truncated if results else False, - error=query_error, - # These will be deprecated in Datasette 1.0: - args=request.args, - data={"ok": True, "rows": rows, "columns": columns}, - ) - if asyncio.iscoroutine(result): - result = await result - if result is None: - raise NotFound("No data") - if isinstance(result, dict): - r = Response( - body=result.get("body"), - status=result.get("status_code") or 200, - content_type=result.get("content_type", "text/plain"), - headers=result.get("headers"), - ) - elif isinstance(result, Response): - r = result - # if status_code is not None: - # # Over-ride the status code - # r.status = status_code - else: - assert False, f"{result} should be dict or Response" - elif format_ == "html": - headers = {} - templates = [f"query-{to_css_class(database)}.html", "query.html"] - if canned_query: - templates.insert( - 0, - f"query-{to_css_class(database)}-{to_css_class(canned_query['name'])}.html", - ) - - environment = datasette.get_jinja_environment(request) - template = environment.select_template(templates) - alternate_url_json = datasette.absolute_url( - request, - datasette.urls.path(path_with_format(request=request, format="json")), - ) - data = {} - headers.update( - { - "Link": '<{}>; rel="alternate"; type="application/json+datasette"'.format( - alternate_url_json - ) - } - ) - metadata = await datasette.get_database_metadata(database) - - renderers = {} - for key, (_, can_render) in datasette.renderers.items(): - it_can_render = call_with_supported_arguments( - can_render, - datasette=datasette, - columns=data.get("columns") or [], - rows=data.get("rows") or [], - sql=data.get("query", {}).get("sql", None), - query_name=data.get("query_name"), - database=database, - table=data.get("table"), - request=request, - view_name="database", - ) - it_can_render = await await_me_maybe(it_can_render) - if it_can_render: - renderers[key] = datasette.urls.path( - path_with_format(request=request, format=key) - ) - - allow_execute_sql = await datasette.allowed( - action="execute-sql", - resource=DatabaseResource(database=database), - actor=request.actor, - ) - - show_hide_hidden = "" - if canned_query and canned_query.get("hide_sql"): - if bool(params.get("_show_sql")): - show_hide_link = path_with_removed_args(request, {"_show_sql"}) - show_hide_text = "hide" - show_hide_hidden = ( - '' - ) - else: - show_hide_link = path_with_added_args(request, {"_show_sql": 1}) - show_hide_text = "show" - else: - if bool(params.get("_hide_sql")): - show_hide_link = path_with_removed_args(request, {"_hide_sql"}) - show_hide_text = "show" - show_hide_hidden = ( - '' - ) - else: - show_hide_link = path_with_added_args(request, {"_hide_sql": 1}) - show_hide_text = "hide" - hide_sql = show_hide_text == "show" - - # Show 'Edit SQL' button only if: - # - User is allowed to execute SQL - # - SQL is an approved SELECT statement - # - No magic parameters, so no :_ in the SQL string - edit_sql_url = None - is_validated_sql = False - try: - validate_sql_select(sql) - is_validated_sql = True - except InvalidSql: - pass - if allow_execute_sql and is_validated_sql and ":_" not in sql: - edit_sql_url = ( - datasette.urls.database(database) - + "/-/query" - + "?" - + urlencode( - { - **{ - "sql": sql, - }, - **named_parameter_values, - } - ) - ) - - async def query_actions(): - query_actions = [] - for hook in pm.hook.query_actions( - datasette=datasette, - actor=request.actor, - database=database, - query_name=canned_query["name"] if canned_query else None, - request=request, - sql=sql, - params=params, - ): - extra_links = await await_me_maybe(hook) - if extra_links: - query_actions.extend(extra_links) - return query_actions - - r = Response.html( - await datasette.render_template( - template, - QueryContext( - database=database, - database_color=db.color, - query={ - "sql": sql, - "params": params, - }, - canned_query=canned_query["name"] if canned_query else None, - private=private, - canned_query_write=canned_query_write, - db_is_immutable=not db.is_mutable, - error=query_error, - hide_sql=hide_sql, - show_hide_link=datasette.urls.path(show_hide_link), - show_hide_text=show_hide_text, - editable=not canned_query, - allow_execute_sql=allow_execute_sql, - tables=await get_tables(datasette, request, db, allowed_dict), - named_parameter_values=named_parameter_values, - edit_sql_url=edit_sql_url, - display_rows=await display_rows( - datasette, database, request, rows, columns - ), - table_columns=( - await _table_columns(datasette, database) - if allow_execute_sql - else {} - ), - columns=columns, - renderers=renderers, - url_csv=datasette.urls.path( - path_with_format( - request=request, format="csv", extra_qs={"_size": "max"} - ) - ), - show_hide_hidden=markupsafe.Markup(show_hide_hidden), - metadata=canned_query or metadata, - alternate_url_json=alternate_url_json, - select_templates=[ - f"{'*' if template_name == template.name else ''}{template_name}" - for template_name in templates - ], - top_query=make_slot_function( - "top_query", datasette, request, database=database, sql=sql - ), - top_canned_query=make_slot_function( - "top_canned_query", - datasette, - request, - database=database, - query_name=canned_query["name"] if canned_query else None, - ), - query_actions=query_actions, - ), - request=request, - view_name="database", - ), - headers=headers, - ) - else: - assert False, "Invalid format: {}".format(format_) - if datasette.cors: - add_cors_headers(r.headers) - return r - - -class MagicParameters(dict): - def __init__(self, sql, data, request, datasette): - super().__init__(data) - self._sql = sql - self._request = request - self._magics = dict( - itertools.chain.from_iterable( - pm.hook.register_magic_parameters(datasette=datasette) - ) - ) - self._prepared = {} - - async def execute_params(self): - for key in derive_named_parameters(self._sql): - if key.startswith("_") and key.count("_") >= 2: - prefix, suffix = key[1:].split("_", 1) - if prefix in self._magics: - result = await await_me_maybe( - self._magics[prefix](suffix, self._request) - ) - self._prepared[key] = result - - def __len__(self): - # Workaround for 'Incorrect number of bindings' error - # https://github.com/simonw/datasette/issues/967#issuecomment-692951144 - return super().__len__() or 1 - - def __getitem__(self, key): - if key.startswith("_") and key.count("_") >= 2: - if key in self._prepared: - return self._prepared[key] - # Try the other route - prefix, suffix = key[1:].split("_", 1) - if prefix in self._magics: - try: - return self._magics[prefix](suffix, self._request) - except KeyError: - return super().__getitem__(key) - else: - return super().__getitem__(key) - - -class TableCreateView(BaseView): - name = "table-create" - - _valid_keys = { - "table", - "rows", - "row", - "columns", - "pk", - "pks", - "ignore", - "replace", - "alter", - } - _supported_column_types = { - "text", - "integer", - "float", - "blob", - } - # Any string that does not contain a newline or start with sqlite_ - _table_name_re = re.compile(r"^(?!sqlite_)[^\n]+$") - - def __init__(self, datasette): - self.ds = datasette - - async def post(self, request): - db = await self.ds.resolve_database(request) - database_name = db.name - - # Must have create-table permission - if not await self.ds.allowed( - action="create-table", - resource=DatabaseResource(database=database_name), - actor=request.actor, - ): - return _error(["Permission denied"], 403) - - body = await request.post_body() - try: - data = json.loads(body) - except json.JSONDecodeError as e: - return _error(["Invalid JSON: {}".format(e)]) - - if not isinstance(data, dict): - return _error(["JSON must be an object"]) - - invalid_keys = set(data.keys()) - self._valid_keys - if invalid_keys: - return _error(["Invalid keys: {}".format(", ".join(invalid_keys))]) - - # ignore and replace are mutually exclusive - if data.get("ignore") and data.get("replace"): - return _error(["ignore and replace are mutually exclusive"]) - - # ignore and replace only allowed with row or rows - if "ignore" in data or "replace" in data: - if not data.get("row") and not data.get("rows"): - return _error(["ignore and replace require row or rows"]) - - # ignore and replace require pk or pks - if "ignore" in data or "replace" in data: - if not data.get("pk") and not data.get("pks"): - return _error(["ignore and replace require pk or pks"]) - - ignore = data.get("ignore") - replace = data.get("replace") - - if replace: - # Must have update-row permission - if not await self.ds.allowed( - action="update-row", - resource=DatabaseResource(database=database_name), - actor=request.actor, - ): - return _error(["Permission denied: need update-row"], 403) - - table_name = data.get("table") - if not table_name: - return _error(["Table is required"]) - - if not self._table_name_re.match(table_name): - return _error(["Invalid table name"]) - - table_exists = await db.table_exists(data["table"]) - columns = data.get("columns") - rows = data.get("rows") - row = data.get("row") - if not columns and not rows and not row: - return _error(["columns, rows or row is required"]) - - if rows and row: - return _error(["Cannot specify both rows and row"]) - - if rows or row: - # Must have insert-row permission - if not await self.ds.allowed( - action="insert-row", - resource=DatabaseResource(database=database_name), - actor=request.actor, - ): - return _error(["Permission denied: need insert-row"], 403) - - alter = False - if rows or row: - if not table_exists: - # if table is being created for the first time, alter=True - alter = True - else: - # alter=True only if they request it AND they have permission - if data.get("alter"): - if not await self.ds.allowed( - action="alter-table", - resource=DatabaseResource(database=database_name), - actor=request.actor, - ): - return _error(["Permission denied: need alter-table"], 403) - alter = True - - if columns: - if rows or row: - return _error(["Cannot specify columns with rows or row"]) - if not isinstance(columns, list): - return _error(["columns must be a list"]) - for column in columns: - if not isinstance(column, dict): - return _error(["columns must be a list of objects"]) - if not column.get("name") or not isinstance(column.get("name"), str): - return _error(["Column name is required"]) - if not column.get("type"): - column["type"] = "text" - if column["type"] not in self._supported_column_types: - return _error( - ["Unsupported column type: {}".format(column["type"])] - ) - # No duplicate column names - dupes = {c["name"] for c in columns if columns.count(c) > 1} - if dupes: - return _error(["Duplicate column name: {}".format(", ".join(dupes))]) - - if row: - rows = [row] - - if rows: - if not isinstance(rows, list): - return _error(["rows must be a list"]) - for row in rows: - if not isinstance(row, dict): - return _error(["rows must be a list of objects"]) - - pk = data.get("pk") - pks = data.get("pks") - - if pk and pks: - return _error(["Cannot specify both pk and pks"]) - if pk: - if not isinstance(pk, str): - return _error(["pk must be a string"]) - if pks: - if not isinstance(pks, list): - return _error(["pks must be a list"]) - for pk in pks: - if not isinstance(pk, str): - return _error(["pks must be a list of strings"]) - - # If table exists already, read pks from that instead - if table_exists: - actual_pks = await db.primary_keys(table_name) - # if pk passed and table already exists check it does not change - bad_pks = False - if len(actual_pks) == 1 and data.get("pk") and data["pk"] != actual_pks[0]: - bad_pks = True - elif ( - len(actual_pks) > 1 - and data.get("pks") - and set(data["pks"]) != set(actual_pks) - ): - bad_pks = True - if bad_pks: - return _error(["pk cannot be changed for existing table"]) - pks = actual_pks - - initial_schema = None - if table_exists: - initial_schema = await db.execute_fn( - lambda conn: sqlite_utils.Database(conn)[table_name].schema - ) - - def create_table(conn): - table = sqlite_utils.Database(conn)[table_name] - if rows: - table.insert_all( - rows, pk=pks or pk, ignore=ignore, replace=replace, alter=alter - ) - else: - table.create( - {c["name"]: c["type"] for c in columns}, - pk=pks or pk, - ) - return table.schema - - try: - schema = await db.execute_write_fn(create_table) - except Exception as e: - return _error([str(e)]) - - if initial_schema is not None and initial_schema != schema: - await self.ds.track_event( - AlterTableEvent( - request.actor, - database=database_name, - table=table_name, - before_schema=initial_schema, - after_schema=schema, - ) - ) - - table_url = self.ds.absolute_url( - request, self.ds.urls.table(db.name, table_name) - ) - table_api_url = self.ds.absolute_url( - request, self.ds.urls.table(db.name, table_name, format="json") - ) - details = { - "ok": True, - "database": db.name, - "table": table_name, - "table_url": table_url, - "table_api_url": table_api_url, - "schema": schema, - } - if rows: - details["row_count"] = len(rows) - - if not table_exists: - # Only log creation if we created a table - await self.ds.track_event( - CreateTableEvent( - request.actor, database=db.name, table=table_name, schema=schema - ) - ) - if rows: - await self.ds.track_event( - InsertRowsEvent( - request.actor, - database=db.name, - table=table_name, - num_rows=len(rows), - ignore=ignore, - replace=replace, - ) - ) - return Response.json(details, status=201) - - -async def _table_columns(datasette, database_name): - internal_db = datasette.get_internal_database() - result = await internal_db.execute( - "select table_name, name from catalog_columns where database_name = ?", - [database_name], - ) - table_columns = {} - for row in result.rows: - table_columns.setdefault(row["table_name"], []).append(row["name"]) - # Add views - db = datasette.get_database(database_name) - for view_name in await db.view_names(): - table_columns[view_name] = [] - return table_columns - - -async def display_rows(datasette, database, request, rows, columns): - display_rows = [] - truncate_cells = datasette.setting("truncate_cells_html") - for row in rows: - display_row = [] - for column, value in zip(columns, row): - display_value = value - # Let the plugins have a go - # pylint: disable=no-member - plugin_display_value = None - for candidate in pm.hook.render_cell( - row=row, - value=value, - column=column, - table=None, - database=database, - datasette=datasette, - request=request, - ): - candidate = await await_me_maybe(candidate) - if candidate is not None: - plugin_display_value = candidate - break - if plugin_display_value is not None: - display_value = plugin_display_value - else: - if value in ("", None): - display_value = markupsafe.Markup(" ") - elif is_url(str(display_value).strip()): - display_value = markupsafe.Markup( - '{truncated_url}'.format( - url=markupsafe.escape(value.strip()), - truncated_url=markupsafe.escape( - truncate_url(value.strip(), truncate_cells) - ), - ) - ) - elif isinstance(display_value, bytes): - blob_url = path_with_format( - request=request, - format="blob", - extra_qs={ - "_blob_column": column, - "_blob_hash": hashlib.sha256(display_value).hexdigest(), - }, - ) - formatted = format_bytes(len(value)) - display_value = markupsafe.Markup( - '<Binary: {:,} byte{}>'.format( - blob_url, - ( - ' title="{}"'.format(formatted) - if "bytes" not in formatted - else "" - ), - len(value), - "" if len(value) == 1 else "s", - ) - ) - else: - display_value = str(value) - if truncate_cells and len(display_value) > truncate_cells: - display_value = display_value[:truncate_cells] + "\u2026" - display_row.append(display_value) - display_rows.append(display_row) - return display_rows diff --git a/datasette/views/index.py b/datasette/views/index.py index a59c687c..64877f2b 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -1,12 +1,7 @@ +import hashlib import json -from datasette.plugins import pm -from datasette.utils import ( - add_cors_headers, - await_me_maybe, - make_slot_function, - CustomJSONEncoder, -) +from datasette.utils import CustomJSONEncoder from datasette.utils.asgi import Response from datasette.version import __version__ @@ -16,73 +11,31 @@ from .base import BaseView # Truncate table list on homepage at: TRUNCATE_AT = 5 -# Only attempt counts if database less than this size in bytes: -COUNT_DB_SIZE_LIMIT = 100 * 1024 * 1024 +# Only attempt counts if less than this many tables: +COUNT_TABLE_LIMIT = 30 class IndexView(BaseView): name = "index" - async def get(self, request): - as_format = request.url_vars["format"] - await self.ds.ensure_permission(action="view-instance", actor=request.actor) - - # Get all allowed databases and tables in bulk - db_page = await self.ds.allowed_resources( - "view-database", request.actor, include_is_private=True - ) - allowed_databases = [r async for r in db_page.all()] - allowed_db_dict = {r.parent: r for r in allowed_databases} - - # Group tables by database - tables_by_db = {} - table_page = await self.ds.allowed_resources( - "view-table", request.actor, include_is_private=True - ) - async for t in table_page.all(): - if t.parent not in tables_by_db: - tables_by_db[t.parent] = {} - tables_by_db[t.parent][t.child] = t + def __init__(self, datasette): + self.ds = datasette + async def get(self, request, as_format): databases = [] - # Iterate over allowed databases instead of all databases - for name in allowed_db_dict.keys(): - db = self.ds.databases[name] - database_private = allowed_db_dict[name].private - - # Get allowed tables/views for this database - allowed_for_db = tables_by_db.get(name, {}) - - # Get table names from allowed set instead of db.table_names() - table_names = [child_name for child_name in allowed_for_db.keys()] - + for name, db in self.ds.databases.items(): + table_names = await db.table_names() hidden_table_names = set(await db.hidden_table_names()) - - # Determine which allowed items are views - view_names_set = set(await db.view_names()) - views = [ - {"name": child_name, "private": resource.private} - for child_name, resource in allowed_for_db.items() - if child_name in view_names_set - ] - - # Filter to just tables (not views) for table processing - table_names = [name for name in table_names if name not in view_names_set] - + views = await db.view_names() # Perform counts only for immutable or DBS with <= COUNT_TABLE_LIMIT tables table_counts = {} - if not db.is_mutable or db.size < COUNT_DB_SIZE_LIMIT: + if not db.is_mutable or len(table_names) <= COUNT_TABLE_LIMIT: table_counts = await db.table_counts(10) # If any of these are None it means at least one timed out - ignore them all if any(v is None for v in table_counts.values()): table_counts = {} - tables = {} for table in table_names: - # Check if table is in allowed set - if table not in allowed_for_db: - continue - table_columns = await db.table_columns(table) tables[table] = { "name": table, @@ -92,16 +45,14 @@ class IndexView(BaseView): "hidden": table in hidden_table_names, "fts_table": await db.fts_table(table), "num_relationships_for_sorting": 0, - "private": allowed_for_db[table].private, } if request.args.get("_sort") == "relationships" or not table_counts: # We will be sorting by number of relationships, so populate that field all_foreign_keys = await db.get_all_foreign_keys() for table, foreign_keys in all_foreign_keys.items(): - if table in tables.keys(): - count = len(foreign_keys["incoming"] + foreign_keys["outgoing"]) - tables[table]["num_relationships_for_sorting"] = count + count = len(foreign_keys["incoming"] + foreign_keys["outgoing"]) + tables[table]["num_relationships_for_sorting"] = count hidden_tables = [t for t in tables.values() if t["hidden"]] visible_tables = [t for t in tables.values() if not t["hidden"]] @@ -121,15 +72,18 @@ class IndexView(BaseView): # Only add views if this is less than TRUNCATE_AT if len(tables_and_views_truncated) < TRUNCATE_AT: num_views_to_add = TRUNCATE_AT - len(tables_and_views_truncated) - for view in views[:num_views_to_add]: - tables_and_views_truncated.append(view) + for view_name in views[:num_views_to_add]: + tables_and_views_truncated.append({"name": view_name}) databases.append( { "name": name, "hash": db.hash, - "color": db.color, - "path": self.ds.urls.database(name), + "comment": db.comment, + "color": db.hash[:6] + if db.hash + else hashlib.md5(name.encode("utf8")).hexdigest()[:6], + "path": self.database_url(name), "tables_and_views_truncated": tables_and_views_truncated, "tables_and_views_more": (len(visible_tables) + len(views)) > TRUNCATE_AT, @@ -141,50 +95,27 @@ class IndexView(BaseView): ), "hidden_tables_count": len(hidden_tables), "views_count": len(views), - "private": database_private, } ) + databases.sort(key=lambda database: database["name"]) + if as_format: headers = {} if self.ds.cors: - add_cors_headers(headers) + headers["Access-Control-Allow-Origin"] = "*" return Response( - json.dumps( - { - "databases": {db["name"]: db for db in databases}, - "metadata": await self.ds.get_instance_metadata(), - }, - cls=CustomJSONEncoder, - ), + json.dumps({db["name"]: db for db in databases}, cls=CustomJSONEncoder), content_type="application/json; charset=utf-8", headers=headers, ) else: - homepage_actions = [] - for hook in pm.hook.homepage_actions( - datasette=self.ds, - actor=request.actor, - request=request, - ): - extra_links = await await_me_maybe(hook) - if extra_links: - homepage_actions.extend(extra_links) - alternative_homepage = request.path == "/-/" return await self.render( - ["default:index.html" if alternative_homepage else "index.html"], + ["index.html"], request=request, context={ "databases": databases, - "metadata": await self.ds.get_instance_metadata(), + "metadata": self.ds.metadata(), "datasette_version": __version__, - "private": not await self.ds.allowed( - action="view-instance", actor=None - ), - "top_homepage": make_slot_function( - "top_homepage", self.ds, request - ), - "homepage_actions": homepage_actions, - "noindex": request.path == "/-/", }, ) diff --git a/datasette/views/row.py b/datasette/views/row.py deleted file mode 100644 index c9b74b12..00000000 --- a/datasette/views/row.py +++ /dev/null @@ -1,293 +0,0 @@ -from datasette.utils.asgi import NotFound, Forbidden, Response -from datasette.database import QueryInterrupted -from datasette.events import UpdateRowEvent, DeleteRowEvent -from datasette.resources import TableResource -from .base import DataView, BaseView, _error -from datasette.utils import ( - await_me_maybe, - make_slot_function, - to_css_class, - escape_sqlite, -) -from datasette.plugins import pm -import json -import sqlite_utils -from .table import display_columns_and_rows - - -class RowView(DataView): - name = "row" - - async def data(self, request, default_labels=False): - resolved = await self.ds.resolve_row(request) - db = resolved.db - database = db.name - table = resolved.table - pk_values = resolved.pk_values - - # Ensure user has permission to view this row - visible, private = await self.ds.check_visibility( - request.actor, - action="view-table", - resource=TableResource(database=database, table=table), - ) - if not visible: - raise Forbidden("You do not have permission to view this table") - - results = await resolved.db.execute( - resolved.sql, resolved.params, truncate=True - ) - columns = [r[0] for r in results.description] - rows = list(results.rows) - if not rows: - raise NotFound(f"Record not found: {pk_values}") - - async def template_data(): - display_columns, display_rows = await display_columns_and_rows( - self.ds, - database, - table, - results.description, - rows, - link_column=False, - truncate_cells=0, - request=request, - ) - for column in display_columns: - column["sortable"] = False - - row_actions = [] - for hook in pm.hook.row_actions( - datasette=self.ds, - actor=request.actor, - request=request, - database=database, - table=table, - row=rows[0], - ): - extra_links = await await_me_maybe(hook) - if extra_links: - row_actions.extend(extra_links) - - return { - "private": private, - "foreign_key_tables": await self.foreign_key_tables( - database, table, pk_values - ), - "database_color": db.color, - "display_columns": display_columns, - "display_rows": display_rows, - "custom_table_templates": [ - f"_table-{to_css_class(database)}-{to_css_class(table)}.html", - f"_table-row-{to_css_class(database)}-{to_css_class(table)}.html", - "_table.html", - ], - "row_actions": row_actions, - "top_row": make_slot_function( - "top_row", - self.ds, - request, - database=resolved.db.name, - table=resolved.table, - row=rows[0], - ), - "metadata": {}, - } - - data = { - "database": database, - "table": table, - "rows": rows, - "columns": columns, - "primary_keys": resolved.pks, - "primary_key_values": pk_values, - } - - if "foreign_key_tables" in (request.args.get("_extras") or "").split(","): - data["foreign_key_tables"] = await self.foreign_key_tables( - database, table, pk_values - ) - - return ( - data, - template_data, - ( - f"row-{to_css_class(database)}-{to_css_class(table)}.html", - "row.html", - ), - ) - - async def foreign_key_tables(self, database, table, pk_values): - if len(pk_values) != 1: - return [] - db = self.ds.databases[database] - all_foreign_keys = await db.get_all_foreign_keys() - foreign_keys = all_foreign_keys[table]["incoming"] - if len(foreign_keys) == 0: - return [] - - sql = "select " + ", ".join( - [ - "(select count(*) from {table} where {column}=:id)".format( - table=escape_sqlite(fk["other_table"]), - column=escape_sqlite(fk["other_column"]), - ) - for fk in foreign_keys - ] - ) - try: - rows = list(await db.execute(sql, {"id": pk_values[0]})) - except QueryInterrupted: - # Almost certainly hit the timeout - return [] - - foreign_table_counts = dict( - zip( - [(fk["other_table"], fk["other_column"]) for fk in foreign_keys], - list(rows[0]), - ) - ) - foreign_key_tables = [] - for fk in foreign_keys: - count = ( - foreign_table_counts.get((fk["other_table"], fk["other_column"])) or 0 - ) - key = fk["other_column"] - if key.startswith("_"): - key += "__exact" - link = "{}?{}={}".format( - self.ds.urls.table(database, fk["other_table"]), - key, - ",".join(pk_values), - ) - foreign_key_tables.append({**fk, **{"count": count, "link": link}}) - return foreign_key_tables - - -class RowError(Exception): - def __init__(self, error): - self.error = error - - -async def _resolve_row_and_check_permission(datasette, request, permission): - from datasette.app import DatabaseNotFound, TableNotFound, RowNotFound - - try: - resolved = await datasette.resolve_row(request) - except DatabaseNotFound as e: - return False, _error(["Database not found: {}".format(e.database_name)], 404) - except TableNotFound as e: - return False, _error(["Table not found: {}".format(e.table)], 404) - except RowNotFound as e: - return False, _error(["Record not found: {}".format(e.pk_values)], 404) - - # Ensure user has permission to delete this row - if not await datasette.allowed( - action=permission, - resource=TableResource(database=resolved.db.name, table=resolved.table), - actor=request.actor, - ): - return False, _error(["Permission denied"], 403) - - return True, resolved - - -class RowDeleteView(BaseView): - name = "row-delete" - - def __init__(self, datasette): - self.ds = datasette - - async def post(self, request): - ok, resolved = await _resolve_row_and_check_permission( - self.ds, request, "delete-row" - ) - if not ok: - return resolved - - # Delete table - def delete_row(conn): - sqlite_utils.Database(conn)[resolved.table].delete(resolved.pk_values) - - try: - await resolved.db.execute_write_fn(delete_row) - except Exception as e: - return _error([str(e)], 500) - - await self.ds.track_event( - DeleteRowEvent( - actor=request.actor, - database=resolved.db.name, - table=resolved.table, - pks=resolved.pk_values, - ) - ) - - return Response.json({"ok": True}, status=200) - - -class RowUpdateView(BaseView): - name = "row-update" - - def __init__(self, datasette): - self.ds = datasette - - async def post(self, request): - ok, resolved = await _resolve_row_and_check_permission( - self.ds, request, "update-row" - ) - if not ok: - return resolved - - body = await request.post_body() - try: - data = json.loads(body) - except json.JSONDecodeError as e: - return _error(["Invalid JSON: {}".format(e)]) - - if not isinstance(data, dict): - return _error(["JSON must be a dictionary"]) - if "update" not in data or not isinstance(data["update"], dict): - return _error(["JSON must contain an update dictionary"]) - - invalid_keys = set(data.keys()) - {"update", "return", "alter"} - if invalid_keys: - return _error(["Invalid keys: {}".format(", ".join(invalid_keys))]) - - update = data["update"] - - alter = data.get("alter") - if alter and not await self.ds.allowed( - action="alter-table", - resource=TableResource(database=resolved.db.name, table=resolved.table), - actor=request.actor, - ): - return _error(["Permission denied for alter-table"], 403) - - def update_row(conn): - sqlite_utils.Database(conn)[resolved.table].update( - resolved.pk_values, update, alter=alter - ) - - try: - await resolved.db.execute_write_fn(update_row) - except Exception as e: - return _error([str(e)], 400) - - result = {"ok": True} - if data.get("return"): - results = await resolved.db.execute( - resolved.sql, resolved.params, truncate=True - ) - result["row"] = results.dicts()[0] - - await self.ds.track_event( - UpdateRowEvent( - actor=request.actor, - database=resolved.db.name, - table=resolved.table, - pks=resolved.pk_values, - ) - ) - - return Response.json(result, status=200) diff --git a/datasette/views/special.py b/datasette/views/special.py index 411363ec..45e948f6 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -1,1158 +1,31 @@ import json -import logging -from datasette.events import LogoutEvent, LoginEvent, CreateTokenEvent -from datasette.resources import DatabaseResource, TableResource -from datasette.utils.asgi import Response, Forbidden -from datasette.utils import ( - actor_matches_allow, - add_cors_headers, - tilde_encode, - tilde_decode, -) -from .base import BaseView, View -import secrets -import urllib - - -logger = logging.getLogger(__name__) - - -def _resource_path(parent, child): - if parent is None: - return "/" - if child is None: - return f"/{parent}" - return f"/{parent}/{child}" +from datasette.utils.asgi import Response +from .base import BaseView class JsonDataView(BaseView): name = "json_data" - template = "show_json.html" # Can be overridden in subclasses - def __init__( - self, - datasette, - filename, - data_callback, - needs_request=False, - permission="view-instance", - template=None, - ): + def __init__(self, datasette, filename, data_callback): self.ds = datasette self.filename = filename self.data_callback = data_callback - self.needs_request = needs_request - self.permission = permission - if template is not None: - self.template = template - async def get(self, request): - if self.permission: - await self.ds.ensure_permission(action=self.permission, actor=request.actor) - if self.needs_request: - data = self.data_callback(request) - else: - data = self.data_callback() - - # Return JSON or HTML depending on format parameter - as_format = request.url_vars.get("format") + async def get(self, request, as_format): + data = self.data_callback() if as_format: headers = {} if self.ds.cors: - add_cors_headers(headers) - return Response.json(data, headers=headers) + headers["Access-Control-Allow-Origin"] = "*" + return Response( + json.dumps(data), + content_type="application/json; charset=utf-8", + headers=headers, + ) + else: - context = { - "filename": self.filename, - "data": data, - "data_json": json.dumps(data, indent=4, default=repr), - } - # Add has_debug_permission if this view requires permissions-debug - if self.permission == "permissions-debug": - context["has_debug_permission"] = True return await self.render( - [self.template], + ["show_json.html"], request=request, - context=context, - ) - - -class PatternPortfolioView(View): - async def get(self, request, datasette): - await datasette.ensure_permission(action="view-instance", actor=request.actor) - return Response.html( - await datasette.render_template( - "patterns.html", - request=request, - view_name="patterns", - ) - ) - - -class AuthTokenView(BaseView): - name = "auth_token" - has_json_alternate = False - - async def get(self, request): - # If already signed in as root, redirect - if request.actor and request.actor.get("id") == "root": - return Response.redirect(self.ds.urls.instance()) - token = request.args.get("token") or "" - if not self.ds._root_token: - raise Forbidden("Root token has already been used") - if secrets.compare_digest(token, self.ds._root_token): - self.ds._root_token = None - response = Response.redirect(self.ds.urls.instance()) - root_actor = {"id": "root"} - self.ds.set_actor_cookie(response, root_actor) - await self.ds.track_event(LoginEvent(actor=root_actor)) - return response - else: - raise Forbidden("Invalid token") - - -class LogoutView(BaseView): - name = "logout" - has_json_alternate = False - - async def get(self, request): - if not request.actor: - return Response.redirect(self.ds.urls.instance()) - return await self.render( - ["logout.html"], - request, - {"actor": request.actor}, - ) - - async def post(self, request): - response = Response.redirect(self.ds.urls.instance()) - self.ds.delete_actor_cookie(response) - self.ds.add_message(request, "You are now logged out", self.ds.WARNING) - await self.ds.track_event(LogoutEvent(actor=request.actor)) - return response - - -class PermissionsDebugView(BaseView): - name = "permissions_debug" - has_json_alternate = False - - async def get(self, request): - await self.ds.ensure_permission(action="view-instance", actor=request.actor) - await self.ds.ensure_permission(action="permissions-debug", actor=request.actor) - filter_ = request.args.get("filter") or "all" - permission_checks = list(reversed(self.ds._permission_checks)) - if filter_ == "exclude-yours": - permission_checks = [ - check - for check in permission_checks - if (check.actor or {}).get("id") != request.actor["id"] - ] - elif filter_ == "only-yours": - permission_checks = [ - check - for check in permission_checks - if (check.actor or {}).get("id") == request.actor["id"] - ] - return await self.render( - ["debug_permissions_playground.html"], - request, - # list() avoids error if check is performed during template render: - { - "permission_checks": permission_checks, - "filter": filter_, - "has_debug_permission": True, - "permissions": [ - { - "name": p.name, - "abbr": p.abbr, - "description": p.description, - "takes_parent": p.takes_parent, - "takes_child": p.takes_child, - } - for p in self.ds.actions.values() - ], - }, - ) - - async def post(self, request): - await self.ds.ensure_permission(action="view-instance", actor=request.actor) - await self.ds.ensure_permission(action="permissions-debug", actor=request.actor) - vars = await request.post_vars() - actor = json.loads(vars["actor"]) - permission = vars["permission"] - parent = vars.get("resource_1") or None - child = vars.get("resource_2") or None - - response, status = await _check_permission_for_actor( - self.ds, permission, parent, child, actor - ) - return Response.json(response, status=status) - - -class AllowedResourcesView(BaseView): - name = "allowed" - has_json_alternate = False - - async def get(self, request): - await self.ds.refresh_schemas() - - # Check if user has permissions-debug (to show sensitive fields) - has_debug_permission = await self.ds.allowed( - action="permissions-debug", actor=request.actor - ) - - # Check if this is a request for JSON (has .json extension) - as_format = request.url_vars.get("format") - - if not as_format: - # Render the HTML form (even if query parameters are present) - # Put most common/interesting actions first - priority_actions = [ - "view-instance", - "view-database", - "view-table", - "view-query", - "execute-sql", - "insert-row", - "update-row", - "delete-row", - ] - actions = list(self.ds.actions.keys()) - # Priority actions first (in order), then remaining alphabetically - sorted_actions = [a for a in priority_actions if a in actions] - sorted_actions.extend( - sorted(a for a in actions if a not in priority_actions) - ) - - return await self.render( - ["debug_allowed.html"], - request, - { - "supported_actions": sorted_actions, - "has_debug_permission": has_debug_permission, - }, - ) - - payload, status = await self._allowed_payload(request, has_debug_permission) - headers = {} - if self.ds.cors: - add_cors_headers(headers) - return Response.json(payload, status=status, headers=headers) - - async def _allowed_payload(self, request, has_debug_permission): - action = request.args.get("action") - if not action: - return {"error": "action parameter is required"}, 400 - if action not in self.ds.actions: - return {"error": f"Unknown action: {action}"}, 404 - - actor = request.actor if isinstance(request.actor, dict) else None - actor_id = actor.get("id") if actor else None - parent_filter = request.args.get("parent") - child_filter = request.args.get("child") - if child_filter and not parent_filter: - return {"error": "parent must be provided when child is specified"}, 400 - - try: - page = int(request.args.get("page", "1")) - page_size = int(request.args.get("page_size", "50")) - except ValueError: - return {"error": "page and page_size must be integers"}, 400 - if page < 1: - return {"error": "page must be >= 1"}, 400 - if page_size < 1: - return {"error": "page_size must be >= 1"}, 400 - max_page_size = 200 - if page_size > max_page_size: - page_size = max_page_size - offset = (page - 1) * page_size - - # Use the simplified allowed_resources method - # Collect all resources with optional reasons for debugging - try: - allowed_rows = [] - result = await self.ds.allowed_resources( - action=action, - actor=actor, - parent=parent_filter, - include_reasons=has_debug_permission, - ) - async for resource in result.all(): - parent_val = resource.parent - child_val = resource.child - - # Build resource path - if parent_val is None: - resource_path = "/" - elif child_val is None: - resource_path = f"/{parent_val}" - else: - resource_path = f"/{parent_val}/{child_val}" - - row = { - "parent": parent_val, - "child": child_val, - "resource": resource_path, - } - - # Add reason if we have it (from include_reasons=True) - if has_debug_permission and hasattr(resource, "reasons"): - row["reason"] = resource.reasons - - allowed_rows.append(row) - except Exception: - # If catalog tables don't exist yet, return empty results - return ( - { - "action": action, - "actor_id": actor_id, - "page": page, - "page_size": page_size, - "total": 0, - "items": [], - }, - 200, - ) - - # Apply child filter if specified - if child_filter is not None: - allowed_rows = [row for row in allowed_rows if row["child"] == child_filter] - - # Pagination - total = len(allowed_rows) - paged_rows = allowed_rows[offset : offset + page_size] - - # Items are already in the right format - items = paged_rows - - def build_page_url(page_number): - pairs = [] - for key in request.args: - if key in {"page", "page_size"}: - continue - for value in request.args.getlist(key): - pairs.append((key, value)) - pairs.append(("page", str(page_number))) - pairs.append(("page_size", str(page_size))) - query = urllib.parse.urlencode(pairs) - return f"{request.path}?{query}" - - response = { - "action": action, - "actor_id": actor_id, - "page": page, - "page_size": page_size, - "total": total, - "items": items, - } - - if total > offset + page_size: - response["next_url"] = build_page_url(page + 1) - if page > 1: - response["previous_url"] = build_page_url(page - 1) - - return response, 200 - - -class PermissionRulesView(BaseView): - name = "permission_rules" - has_json_alternate = False - - async def get(self, request): - await self.ds.ensure_permission(action="view-instance", actor=request.actor) - await self.ds.ensure_permission(action="permissions-debug", actor=request.actor) - - # Check if this is a request for JSON (has .json extension) - as_format = request.url_vars.get("format") - - if not as_format: - # Render the HTML form (even if query parameters are present) - return await self.render( - ["debug_rules.html"], - request, - { - "sorted_actions": sorted(self.ds.actions.keys()), - "has_debug_permission": True, - }, - ) - - # JSON API - action parameter is required - action = request.args.get("action") - if not action: - return Response.json({"error": "action parameter is required"}, status=400) - if action not in self.ds.actions: - return Response.json({"error": f"Unknown action: {action}"}, status=404) - - actor = request.actor if isinstance(request.actor, dict) else None - - try: - page = int(request.args.get("page", "1")) - page_size = int(request.args.get("page_size", "50")) - except ValueError: - return Response.json( - {"error": "page and page_size must be integers"}, status=400 - ) - if page < 1: - return Response.json({"error": "page must be >= 1"}, status=400) - if page_size < 1: - return Response.json({"error": "page_size must be >= 1"}, status=400) - max_page_size = 200 - if page_size > max_page_size: - page_size = max_page_size - offset = (page - 1) * page_size - - from datasette.utils.actions_sql import build_permission_rules_sql - - union_sql, union_params, restriction_sqls = await build_permission_rules_sql( - self.ds, actor, action - ) - await self.ds.refresh_schemas() - db = self.ds.get_internal_database() - - count_query = f""" - WITH rules AS ( - {union_sql} - ) - SELECT COUNT(*) AS count - FROM rules - """ - count_row = (await db.execute(count_query, union_params)).first() - total = count_row["count"] if count_row else 0 - - data_query = f""" - WITH rules AS ( - {union_sql} - ) - SELECT parent, child, allow, reason, source_plugin - FROM rules - ORDER BY allow DESC, (parent IS NOT NULL), parent, child - LIMIT :limit OFFSET :offset - """ - params = {**union_params, "limit": page_size, "offset": offset} - rows = await db.execute(data_query, params) - - items = [] - for row in rows: - parent = row["parent"] - child = row["child"] - items.append( - { - "parent": parent, - "child": child, - "resource": _resource_path(parent, child), - "allow": row["allow"], - "reason": row["reason"], - "source_plugin": row["source_plugin"], - } - ) - - def build_page_url(page_number): - pairs = [] - for key in request.args: - if key in {"page", "page_size"}: - continue - for value in request.args.getlist(key): - pairs.append((key, value)) - pairs.append(("page", str(page_number))) - pairs.append(("page_size", str(page_size))) - query = urllib.parse.urlencode(pairs) - return f"{request.path}?{query}" - - response = { - "action": action, - "actor_id": (actor or {}).get("id") if actor else None, - "page": page, - "page_size": page_size, - "total": total, - "items": items, - } - - if total > offset + page_size: - response["next_url"] = build_page_url(page + 1) - if page > 1: - response["previous_url"] = build_page_url(page - 1) - - headers = {} - if self.ds.cors: - add_cors_headers(headers) - return Response.json(response, headers=headers) - - -async def _check_permission_for_actor(ds, action, parent, child, actor): - """Shared logic for checking permissions. Returns a dict with check results.""" - if action not in ds.actions: - return {"error": f"Unknown action: {action}"}, 404 - - if child and not parent: - return {"error": "parent is required when child is provided"}, 400 - - # Use the action's properties to create the appropriate resource object - action_obj = ds.actions.get(action) - if not action_obj: - return {"error": f"Unknown action: {action}"}, 400 - - # Global actions (no resource_class) don't have a resource - if action_obj.resource_class is None: - resource_obj = None - elif action_obj.takes_parent and action_obj.takes_child: - # Child-level resource (e.g., TableResource, QueryResource) - resource_obj = action_obj.resource_class(database=parent, table=child) - elif action_obj.takes_parent: - # Parent-level resource (e.g., DatabaseResource) - resource_obj = action_obj.resource_class(database=parent) - else: - # This shouldn't happen given validation in Action.__post_init__ - return {"error": f"Invalid action configuration: {action}"}, 500 - - allowed = await ds.allowed(action=action, resource=resource_obj, actor=actor) - - response = { - "action": action, - "allowed": bool(allowed), - "resource": { - "parent": parent, - "child": child, - "path": _resource_path(parent, child), - }, - } - - if actor and "id" in actor: - response["actor_id"] = actor["id"] - - return response, 200 - - -class PermissionCheckView(BaseView): - name = "permission_check" - has_json_alternate = False - - async def get(self, request): - await self.ds.ensure_permission(action="permissions-debug", actor=request.actor) - as_format = request.url_vars.get("format") - - if not as_format: - return await self.render( - ["debug_check.html"], - request, - { - "sorted_actions": sorted(self.ds.actions.keys()), - "has_debug_permission": True, - }, - ) - - # JSON API - action parameter is required - action = request.args.get("action") - if not action: - return Response.json({"error": "action parameter is required"}, status=400) - - parent = request.args.get("parent") - child = request.args.get("child") - - response, status = await _check_permission_for_actor( - self.ds, action, parent, child, request.actor - ) - return Response.json(response, status=status) - - -class AllowDebugView(BaseView): - name = "allow_debug" - has_json_alternate = False - - async def get(self, request): - errors = [] - actor_input = request.args.get("actor") or '{"id": "root"}' - try: - actor = json.loads(actor_input) - actor_input = json.dumps(actor, indent=4) - except json.decoder.JSONDecodeError as ex: - errors.append(f"Actor JSON error: {ex}") - allow_input = request.args.get("allow") or '{"id": "*"}' - try: - allow = json.loads(allow_input) - allow_input = json.dumps(allow, indent=4) - except json.decoder.JSONDecodeError as ex: - errors.append(f"Allow JSON error: {ex}") - - result = None - if not errors: - result = str(actor_matches_allow(actor, allow)) - - return await self.render( - ["allow_debug.html"], - request, - { - "result": result, - "error": "\n\n".join(errors) if errors else "", - "actor_input": actor_input, - "allow_input": allow_input, - "has_debug_permission": await self.ds.allowed( - action="permissions-debug", actor=request.actor - ), - }, - ) - - -class MessagesDebugView(BaseView): - name = "messages_debug" - has_json_alternate = False - - async def get(self, request): - await self.ds.ensure_permission(action="view-instance", actor=request.actor) - return await self.render(["messages_debug.html"], request) - - async def post(self, request): - await self.ds.ensure_permission(action="view-instance", actor=request.actor) - post = await request.post_vars() - message = post.get("message", "") - message_type = post.get("message_type") or "INFO" - assert message_type in ("INFO", "WARNING", "ERROR", "all") - datasette = self.ds - if message_type == "all": - datasette.add_message(request, message, datasette.INFO) - datasette.add_message(request, message, datasette.WARNING) - datasette.add_message(request, message, datasette.ERROR) - else: - datasette.add_message(request, message, getattr(datasette, message_type)) - return Response.redirect(self.ds.urls.instance()) - - -class CreateTokenView(BaseView): - name = "create_token" - has_json_alternate = False - - def check_permission(self, request): - if not self.ds.setting("allow_signed_tokens"): - raise Forbidden("Signed tokens are not enabled for this Datasette instance") - if not request.actor: - raise Forbidden("You must be logged in to create a token") - if not request.actor.get("id"): - raise Forbidden( - "You must be logged in as an actor with an ID to create a token" - ) - if request.actor.get("token"): - raise Forbidden( - "Token authentication cannot be used to create additional tokens" - ) - - async def shared(self, request): - self.check_permission(request) - # Build list of databases and tables the user has permission to view - db_page = await self.ds.allowed_resources("view-database", request.actor) - allowed_databases = [r async for r in db_page.all()] - - table_page = await self.ds.allowed_resources("view-table", request.actor) - allowed_tables = [r async for r in table_page.all()] - - # Build database -> tables mapping - database_with_tables = [] - for db_resource in allowed_databases: - database_name = db_resource.parent - if database_name == "_memory": - continue - - # Find tables for this database - tables = [] - for table_resource in allowed_tables: - if table_resource.parent == database_name: - tables.append( - { - "name": table_resource.child, - "encoded": tilde_encode(table_resource.child), - } - ) - - database_with_tables.append( - { - "name": database_name, - "encoded": tilde_encode(database_name), - "tables": tables, - } - ) - return { - "actor": request.actor, - "all_actions": self.ds.actions.keys(), - "database_actions": [ - key for key, value in self.ds.actions.items() if value.takes_parent - ], - "child_actions": [ - key for key, value in self.ds.actions.items() if value.takes_child - ], - "database_with_tables": database_with_tables, - } - - async def get(self, request): - self.check_permission(request) - return await self.render( - ["create_token.html"], request, await self.shared(request) - ) - - async def post(self, request): - self.check_permission(request) - post = await request.post_vars() - errors = [] - expires_after = None - if post.get("expire_type"): - duration_string = post.get("expire_duration") - if ( - not duration_string - or not duration_string.isdigit() - or not int(duration_string) > 0 - ): - errors.append("Invalid expire duration") - else: - unit = post["expire_type"] - if unit == "minutes": - expires_after = int(duration_string) * 60 - elif unit == "hours": - expires_after = int(duration_string) * 60 * 60 - elif unit == "days": - expires_after = int(duration_string) * 60 * 60 * 24 - else: - errors.append("Invalid expire duration unit") - - # Are there any restrictions? - restrict_all = [] - restrict_database = {} - restrict_resource = {} - - for key in post: - if key.startswith("all:") and key.count(":") == 1: - restrict_all.append(key.split(":")[1]) - elif key.startswith("database:") and key.count(":") == 2: - bits = key.split(":") - database = tilde_decode(bits[1]) - action = bits[2] - restrict_database.setdefault(database, []).append(action) - elif key.startswith("resource:") and key.count(":") == 3: - bits = key.split(":") - database = tilde_decode(bits[1]) - resource = tilde_decode(bits[2]) - action = bits[3] - restrict_resource.setdefault(database, {}).setdefault( - resource, [] - ).append(action) - - token = self.ds.create_token( - request.actor["id"], - expires_after=expires_after, - restrict_all=restrict_all, - restrict_database=restrict_database, - restrict_resource=restrict_resource, - ) - token_bits = self.ds.unsign(token[len("dstok_") :], namespace="token") - await self.ds.track_event( - CreateTokenEvent( - actor=request.actor, - expires_after=expires_after, - restrict_all=restrict_all, - restrict_database=restrict_database, - restrict_resource=restrict_resource, - ) - ) - context = await self.shared(request) - context.update({"errors": errors, "token": token, "token_bits": token_bits}) - return await self.render(["create_token.html"], request, context) - - -class ApiExplorerView(BaseView): - name = "api_explorer" - has_json_alternate = False - - async def example_links(self, request): - databases = [] - for name, db in self.ds.databases.items(): - database_visible, _ = await self.ds.check_visibility( - request.actor, - action="view-database", - resource=DatabaseResource(database=name), - ) - if not database_visible: - continue - tables = [] - table_names = await db.table_names() - for table in table_names: - visible, _ = await self.ds.check_visibility( - request.actor, - action="view-table", - resource=TableResource(database=name, table=table), - ) - if not visible: - continue - table_links = [] - tables.append({"name": table, "links": table_links}) - table_links.append( - { - "label": "Get rows for {}".format(table), - "method": "GET", - "path": self.ds.urls.table(name, table, format="json"), - } - ) - # If not mutable don't show any write APIs - if not db.is_mutable: - continue - - if await self.ds.allowed( - action="insert-row", - resource=TableResource(database=name, table=table), - actor=request.actor, - ): - pks = await db.primary_keys(table) - table_links.extend( - [ - { - "path": self.ds.urls.table(name, table) + "/-/insert", - "method": "POST", - "label": "Insert rows into {}".format(table), - "json": { - "rows": [ - { - column: None - for column in await db.table_columns(table) - if column not in pks - } - ] - }, - }, - { - "path": self.ds.urls.table(name, table) + "/-/upsert", - "method": "POST", - "label": "Upsert rows into {}".format(table), - "json": { - "rows": [ - { - column: None - for column in await db.table_columns(table) - if column not in pks - } - ] - }, - }, - ] - ) - if await self.ds.allowed( - action="drop-table", - resource=TableResource(database=name, table=table), - actor=request.actor, - ): - table_links.append( - { - "path": self.ds.urls.table(name, table) + "/-/drop", - "label": "Drop table {}".format(table), - "json": {"confirm": False}, - "method": "POST", - } - ) - database_links = [] - if ( - await self.ds.allowed( - action="create-table", - resource=DatabaseResource(database=name), - actor=request.actor, - ) - and db.is_mutable - ): - database_links.append( - { - "path": self.ds.urls.database(name) + "/-/create", - "label": "Create table in {}".format(name), - "json": { - "table": "new_table", - "columns": [ - {"name": "id", "type": "integer"}, - {"name": "name", "type": "text"}, - ], - "pk": "id", - }, - "method": "POST", - } - ) - if database_links or tables: - databases.append( - { - "name": name, - "links": database_links, - "tables": tables, - } - ) - # Sort so that mutable databases are first - databases.sort(key=lambda d: not self.ds.databases[d["name"]].is_mutable) - return databases - - async def get(self, request): - visible, private = await self.ds.check_visibility( - request.actor, - action="view-instance", - ) - if not visible: - raise Forbidden("You do not have permission to view this instance") - - def api_path(link): - return "/-/api#{}".format( - urllib.parse.urlencode( - { - key: json.dumps(value, indent=2) if key == "json" else value - for key, value in link.items() - if key in ("path", "method", "json") - } - ) - ) - - return await self.render( - ["api_explorer.html"], - request, - { - "example_links": await self.example_links(request), - "api_path": api_path, - "private": private, - }, - ) - - -class TablesView(BaseView): - """ - Simple endpoint that uses the new allowed_resources() API. - Returns JSON list of all tables the actor can view. - - Supports ?q=foo+bar to filter tables matching .*foo.*bar.* pattern, - ordered by shortest name first. - """ - - name = "tables" - has_json_alternate = False - - async def get(self, request): - # Get search query parameter - q = request.args.get("q", "").strip() - - # Get SQL for allowed resources using the permission system - permission_sql, params = await self.ds.allowed_resources_sql( - action="view-table", actor=request.actor - ) - - # Build query based on whether we have a search query - if q: - # Build SQL LIKE pattern from search terms - # Split search terms by whitespace and build pattern: %term1%term2%term3% - terms = q.split() - pattern = "%" + "%".join(terms) + "%" - - # Build query with CTE to filter by search pattern - sql = f""" - WITH allowed_tables AS ( - {permission_sql} - ) - SELECT parent, child - FROM allowed_tables - WHERE child LIKE :pattern COLLATE NOCASE - ORDER BY length(child), child - """ - all_params = {**params, "pattern": pattern} - else: - # No search query - return all tables, ordered by name - # Fetch 101 to detect if we need to truncate - sql = f""" - WITH allowed_tables AS ( - {permission_sql} - ) - SELECT parent, child - FROM allowed_tables - ORDER BY parent, child - LIMIT 101 - """ - all_params = params - - # Execute against internal database - result = await self.ds.get_internal_database().execute(sql, all_params) - - # Build response with truncation - rows = list(result.rows) - truncated = len(rows) > 100 - if truncated: - rows = rows[:100] - - matches = [ - { - "name": f"{row['parent']}: {row['child']}", - "url": self.ds.urls.table(row["parent"], row["child"]), - } - for row in rows - ] - - return Response.json({"matches": matches, "truncated": truncated}) - - -class SchemaBaseView(BaseView): - """Base class for schema views with common response formatting.""" - - has_json_alternate = False - - async def get_database_schema(self, database_name): - """Get schema SQL for a database.""" - db = self.ds.databases[database_name] - result = await db.execute( - "select group_concat(sql, ';' || CHAR(10)) as schema from sqlite_master where sql is not null" - ) - row = result.first() - return row["schema"] if row and row["schema"] else "" - - def format_json_response(self, data): - """Format data as JSON response with CORS headers if needed.""" - headers = {} - if self.ds.cors: - add_cors_headers(headers) - return Response.json(data, headers=headers) - - def format_error_response(self, error_message, format_, status=404): - """Format error response based on requested format.""" - if format_ == "json": - headers = {} - if self.ds.cors: - add_cors_headers(headers) - return Response.json( - {"ok": False, "error": error_message}, status=status, headers=headers - ) - else: - return Response.text(error_message, status=status) - - def format_markdown_response(self, heading, schema): - """Format schema as Markdown response.""" - md_output = f"# {heading}\n\n```sql\n{schema}\n```\n" - return Response.text( - md_output, headers={"content-type": "text/markdown; charset=utf-8"} - ) - - async def format_html_response( - self, request, schemas, is_instance=False, table_name=None - ): - """Format schema as HTML response.""" - context = { - "schemas": schemas, - "is_instance": is_instance, - } - if table_name: - context["table_name"] = table_name - return await self.render(["schema.html"], request=request, context=context) - - -class InstanceSchemaView(SchemaBaseView): - """ - Displays schema for all databases in the instance. - Supports HTML, JSON, and Markdown formats. - """ - - name = "instance_schema" - - async def get(self, request): - format_ = request.url_vars.get("format") or "html" - - # Get all databases the actor can view - allowed_databases_page = await self.ds.allowed_resources( - "view-database", - request.actor, - ) - allowed_databases = [r.parent async for r in allowed_databases_page.all()] - - # Get schema for each database - schemas = [] - for database_name in allowed_databases: - schema = await self.get_database_schema(database_name) - schemas.append({"database": database_name, "schema": schema}) - - if format_ == "json": - return self.format_json_response({"schemas": schemas}) - elif format_ == "md": - md_parts = [ - f"# Schema for {item['database']}\n\n```sql\n{item['schema']}\n```" - for item in schemas - ] - return Response.text( - "\n\n".join(md_parts), - headers={"content-type": "text/markdown; charset=utf-8"}, - ) - else: - return await self.format_html_response(request, schemas, is_instance=True) - - -class DatabaseSchemaView(SchemaBaseView): - """ - Displays schema for a specific database. - Supports HTML, JSON, and Markdown formats. - """ - - name = "database_schema" - - async def get(self, request): - database_name = request.url_vars["database"] - format_ = request.url_vars.get("format") or "html" - - # Check if database exists - if database_name not in self.ds.databases: - return self.format_error_response("Database not found", format_) - - # Check view-database permission - await self.ds.ensure_permission( - action="view-database", - resource=DatabaseResource(database=database_name), - actor=request.actor, - ) - - schema = await self.get_database_schema(database_name) - - if format_ == "json": - return self.format_json_response( - {"database": database_name, "schema": schema} - ) - elif format_ == "md": - return self.format_markdown_response(f"Schema for {database_name}", schema) - else: - schemas = [{"database": database_name, "schema": schema}] - return await self.format_html_response(request, schemas) - - -class TableSchemaView(SchemaBaseView): - """ - Displays schema for a specific table. - Supports HTML, JSON, and Markdown formats. - """ - - name = "table_schema" - - async def get(self, request): - database_name = request.url_vars["database"] - table_name = request.url_vars["table"] - format_ = request.url_vars.get("format") or "html" - - # Check view-table permission - await self.ds.ensure_permission( - action="view-table", - resource=TableResource(database=database_name, table=table_name), - actor=request.actor, - ) - - # Get schema for the table - db = self.ds.databases[database_name] - result = await db.execute( - "select sql from sqlite_master where name = ? and sql is not null", - [table_name], - ) - row = result.first() - - # Return 404 if table doesn't exist - if not row or not row["sql"]: - return self.format_error_response("Table not found", format_) - - schema = row["sql"] - - if format_ == "json": - return self.format_json_response( - {"database": database_name, "table": table_name, "schema": schema} - ) - elif format_ == "md": - return self.format_markdown_response( - f"Schema for {database_name}.{table_name}", schema - ) - else: - schemas = [{"database": database_name, "schema": schema}] - return await self.format_html_response( - request, schemas, table_name=table_name + context={"filename": self.filename, "data": data}, ) diff --git a/datasette/views/table.py b/datasette/views/table.py index 007c0c85..8ba3abe4 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1,56 +1,36 @@ -import asyncio +import urllib import itertools import json -import urllib -from asyncinject import Registry -import markupsafe +import jinja2 from datasette.plugins import pm -from datasette.database import QueryInterrupted -from datasette.events import ( - AlterTableEvent, - DropTableEvent, - InsertRowsEvent, - UpsertRowsEvent, -) -from datasette import tracer -from datasette.resources import DatabaseResource, TableResource from datasette.utils import ( - add_cors_headers, - await_me_maybe, - call_with_supported_arguments, CustomRow, + QueryInterrupted, + RequestParameters, append_querystring, compound_keys_after_sql, - format_bytes, - make_slot_function, - tilde_encode, escape_sqlite, filters_should_redirect, is_url, path_from_row_pks, path_with_added_args, - path_with_format, path_with_removed_args, path_with_replaced_args, + sqlite3, to_css_class, - truncate_url, urlsafe_components, value_as_boolean, - InvalidSql, - sqlite3, ) -from datasette.utils.asgi import BadRequest, Forbidden, NotFound, Response +from datasette.utils.asgi import NotFound from datasette.filters import Filters -import sqlite_utils -from .base import BaseView, DatasetteError, _error, stream_csv -from .database import QueryView +from .base import DataView, DatasetteError, ureg LINK_WITH_LABEL = ( - '{label} {id}' + '{label} {id}' ) -LINK_WITH_VALUE = '{id}' +LINK_WITH_VALUE = '{id}' class Row: @@ -82,1742 +62,845 @@ class Row: return json.dumps(d, default=repr, indent=2) -async def run_sequential(*args): - # This used to be swappable for asyncio.gather() to run things in - # parallel, but this lead to hard-to-debug locking issues with - # in-memory databases: https://github.com/simonw/datasette/issues/2189 - results = [] - for fn in args: - results.append(await fn) - return results - - -def _redirect(datasette, request, path, forward_querystring=True, remove_args=None): - if request.query_string and "?" not in path and forward_querystring: - path = f"{path}?{request.query_string}" - if remove_args: - path = path_with_removed_args(request, remove_args, path=path) - r = Response.redirect(path) - r.headers["Link"] = f"<{path}>; rel=preload" - if datasette.cors: - add_cors_headers(r.headers) - return r - - -async def _redirect_if_needed(datasette, request, resolved): - # Handle ?_filter_column - redirect_params = filters_should_redirect(request.args) - if redirect_params: - return _redirect( - datasette, - request, - datasette.urls.path(path_with_added_args(request, redirect_params)), - forward_querystring=False, - ) - - # If ?_sort_by_desc=on (from checkbox) redirect to _sort_desc=(_sort) - if "_sort_by_desc" in request.args: - return _redirect( - datasette, - request, - datasette.urls.path( - path_with_added_args( - request, - { - "_sort_desc": request.args.get("_sort"), - "_sort_by_desc": None, - "_sort": None, - }, - ) - ), - forward_querystring=False, - ) - - -async def display_columns_and_rows( - datasette, - database_name, - table_name, - description, - rows, - link_column=False, - truncate_cells=0, - sortable_columns=None, - request=None, -): - """Returns columns, rows for specified table - including fancy foreign key treatment""" - sortable_columns = sortable_columns or set() - db = datasette.databases[database_name] - column_descriptions = dict( - await datasette.get_internal_database().execute( - """ - SELECT - column_name, - value - FROM metadata_columns - WHERE database_name = ? - AND resource_name = ? - AND key = 'description' - """, - [database_name, table_name], - ) - ) - - column_details = { - col.name: col for col in await db.table_column_details(table_name) - } - pks = await db.primary_keys(table_name) - pks_for_display = pks - if not pks_for_display: - pks_for_display = ["rowid"] - - columns = [] - for r in description: - if r[0] == "rowid" and "rowid" not in column_details: - type_ = "integer" - notnull = 0 +class RowTableShared(DataView): + async def sortable_columns_for_table(self, database, table, use_rowid): + db = self.ds.databases[database] + table_metadata = self.ds.table_metadata(database, table) + if "sortable_columns" in table_metadata: + sortable_columns = set(table_metadata["sortable_columns"]) else: - type_ = column_details[r[0]].type - notnull = column_details[r[0]].notnull - columns.append( - { - "name": r[0], - "sortable": r[0] in sortable_columns, - "is_pk": r[0] in pks_for_display, - "type": type_, - "notnull": notnull, - "description": column_descriptions.get(r[0]), - } - ) + sortable_columns = set(await db.table_columns(table)) + if use_rowid: + sortable_columns.add("rowid") + return sortable_columns - column_to_foreign_key_table = { - fk["column"]: fk["other_table"] - for fk in await db.foreign_keys_for_table(table_name) - } - - cell_rows = [] - base_url = datasette.setting("base_url") - for row in rows: - cells = [] - # Unless we are a view, the first column is a link - either to the rowid - # or to the simple or compound primary key - if link_column: - is_special_link_column = len(pks) != 1 - pk_path = path_from_row_pks(row, pks, not pks, False) - cells.append( - { - "column": pks[0] if len(pks) == 1 else "Link", - "value_type": "pk", - "is_special_link_column": is_special_link_column, - "raw": pk_path, - "value": markupsafe.Markup( - '{flat_pks}'.format( - table_path=datasette.urls.table(database_name, table_name), - flat_pks=str(markupsafe.escape(pk_path)), - flat_pks_quoted=path_from_row_pks(row, pks, not pks), - ) - ), - } - ) - - for value, column_dict in zip(row, columns): - column = column_dict["name"] - if link_column and len(pks) == 1 and column == pks[0]: - # If there's a simple primary key, don't repeat the value as it's - # already shown in the link column. - continue - - # First let the plugins have a go - # pylint: disable=no-member - plugin_display_value = None - for candidate in pm.hook.render_cell( - row=row, - value=value, - column=column, - table=table_name, - database=database_name, - datasette=datasette, - request=request, - ): - candidate = await await_me_maybe(candidate) - if candidate is not None: - plugin_display_value = candidate - break - if plugin_display_value: - display_value = plugin_display_value - elif isinstance(value, bytes): - formatted = format_bytes(len(value)) - display_value = markupsafe.Markup( - '<Binary: {:,} byte{}>'.format( - datasette.urls.row_blob( - database_name, - table_name, - path_from_row_pks(row, pks, not pks), - column, - ), - ( - ' title="{}"'.format(formatted) - if "bytes" not in formatted - else "" - ), - len(value), - "" if len(value) == 1 else "s", - ) - ) - elif isinstance(value, dict): - # It's an expanded foreign key - display link to other row - label = value["label"] - value = value["value"] - # The table we link to depends on the column - other_table = column_to_foreign_key_table[column] - link_template = LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE - display_value = markupsafe.Markup( - link_template.format( - database=tilde_encode(database_name), - base_url=base_url, - table=tilde_encode(other_table), - link_id=tilde_encode(str(value)), - id=str(markupsafe.escape(value)), - label=str(markupsafe.escape(label)) or "-", - ) - ) - elif value in ("", None): - display_value = markupsafe.Markup(" ") - elif is_url(str(value).strip()): - display_value = markupsafe.Markup( - '{truncated_url}'.format( - url=markupsafe.escape(value.strip()), - truncated_url=markupsafe.escape( - truncate_url(value.strip(), truncate_cells) - ), - ) - ) - else: - display_value = str(value) - if truncate_cells and len(display_value) > truncate_cells: - display_value = display_value[:truncate_cells] + "\u2026" - - cells.append( - { - "column": column, - "value": display_value, - "raw": value, - "value_type": ( - "none" if value is None else str(type(value).__name__) - ), - } - ) - cell_rows.append(Row(cells)) - - if link_column: - # Add the link column header. - # If it's a simple primary key, we have to remove and re-add that column name at - # the beginning of the header row. - first_column = None - if len(pks) == 1: - columns = [col for col in columns if col["name"] != pks[0]] - first_column = { - "name": pks[0], - "sortable": len(pks) == 1, - "is_pk": True, - "type": column_details[pks[0]].type, - "notnull": column_details[pks[0]].notnull, - } - else: - first_column = { - "name": "Link", - "sortable": False, - "is_pk": False, - "type": "", - "notnull": 0, - } - columns = [first_column] + columns - return columns, cell_rows - - -class TableInsertView(BaseView): - name = "table-insert" - - def __init__(self, datasette): - self.ds = datasette - - async def _validate_data(self, request, db, table_name, pks, upsert): - errors = [] - - pks_list = [] - if isinstance(pks, str): - pks_list = [pks] - else: - pks_list = list(pks) - - if not pks_list: - pks_list = ["rowid"] - - def _errors(errors): - return None, errors, {} - - if not request.headers.get("content-type").startswith("application/json"): - # TODO: handle form-encoded data - return _errors(["Invalid content-type, must be application/json"]) - body = await request.post_body() - try: - data = json.loads(body) - except json.JSONDecodeError as e: - return _errors(["Invalid JSON: {}".format(e)]) - if not isinstance(data, dict): - return _errors(["JSON must be a dictionary"]) - keys = data.keys() - - # keys must contain "row" or "rows" - if "row" not in keys and "rows" not in keys: - return _errors(['JSON must have one or other of "row" or "rows"']) - rows = [] - if "row" in keys: - if "rows" in keys: - return _errors(['Cannot use "row" and "rows" at the same time']) - row = data["row"] - if not isinstance(row, dict): - return _errors(['"row" must be a dictionary']) - rows = [row] - data["return"] = True - else: - rows = data["rows"] - if not isinstance(rows, list): - return _errors(['"rows" must be a list']) - for row in rows: - if not isinstance(row, dict): - return _errors(['"rows" must be a list of dictionaries']) - - # Does this exceed max_insert_rows? - max_insert_rows = self.ds.setting("max_insert_rows") - if len(rows) > max_insert_rows: - return _errors( - ["Too many rows, maximum allowed is {}".format(max_insert_rows)] - ) - - # Validate other parameters - extras = { - key: value for key, value in data.items() if key not in ("row", "rows") - } - valid_extras = {"return", "ignore", "replace", "alter"} - invalid_extras = extras.keys() - valid_extras - if invalid_extras: - return _errors( - ['Invalid parameter: "{}"'.format('", "'.join(sorted(invalid_extras)))] - ) - if extras.get("ignore") and extras.get("replace"): - return _errors(['Cannot use "ignore" and "replace" at the same time']) - - columns = set(await db.table_columns(table_name)) - columns.update(pks_list) - - for i, row in enumerate(rows): - if upsert: - # It MUST have the primary key - missing_pks = [pk for pk in pks_list if pk not in row] - if missing_pks: - errors.append( - 'Row {} is missing primary key column(s): "{}"'.format( - i, '", "'.join(missing_pks) - ) - ) - invalid_columns = set(row.keys()) - columns - if invalid_columns and not extras.get("alter"): - errors.append( - "Row {} has invalid columns: {}".format( - i, ", ".join(sorted(invalid_columns)) - ) - ) - if errors: - return _errors(errors) - return rows, errors, extras - - async def post(self, request, upsert=False): - try: - resolved = await self.ds.resolve_table(request) - except NotFound as e: - return _error([e.args[0]], 404) - db = resolved.db - database_name = db.name - table_name = resolved.table - - # Table must exist (may handle table creation in the future) - db = self.ds.get_database(database_name) - if not await db.table_exists(table_name): - return _error(["Table not found: {}".format(table_name)], 404) - - if upsert: - # Must have insert-row AND upsert-row permissions - if not ( - await self.ds.allowed( - action="insert-row", - resource=TableResource(database=database_name, table=table_name), - actor=request.actor, - ) - and await self.ds.allowed( - action="update-row", - resource=TableResource(database=database_name, table=table_name), - actor=request.actor, - ) - ): - return _error( - ["Permission denied: need both insert-row and update-row"], 403 - ) - else: - # Must have insert-row permission - if not await self.ds.allowed( - action="insert-row", - resource=TableResource(database=database_name, table=table_name), - actor=request.actor, - ): - return _error(["Permission denied"], 403) - - if not db.is_mutable: - return _error(["Database is immutable"], 403) - - pks = await db.primary_keys(table_name) - - rows, errors, extras = await self._validate_data( - request, db, table_name, pks, upsert - ) - if errors: - return _error(errors, 400) - - num_rows = len(rows) - - # No that we've passed pks to _validate_data it's safe to - # fix the rowids case: - if not pks: - pks = ["rowid"] - - ignore = extras.get("ignore") - replace = extras.get("replace") - alter = extras.get("alter") - - if upsert and (ignore or replace): - return _error(["Upsert does not support ignore or replace"], 400) - - if replace and not await self.ds.allowed( - action="update-row", - resource=TableResource(database=database_name, table=table_name), - actor=request.actor, - ): - return _error(['Permission denied: need update-row to use "replace"'], 403) - - initial_schema = None - if alter: - # Must have alter-table permission - if not await self.ds.allowed( - action="alter-table", - resource=TableResource(database=database_name, table=table_name), - actor=request.actor, - ): - return _error(["Permission denied for alter-table"], 403) - # Track initial schema to check if it changed later - initial_schema = await db.execute_fn( - lambda conn: sqlite_utils.Database(conn)[table_name].schema - ) - - should_return = bool(extras.get("return", False)) - row_pk_values_for_later = [] - if should_return and upsert: - row_pk_values_for_later = [tuple(row[pk] for pk in pks) for row in rows] - - def insert_or_upsert_rows(conn): - table = sqlite_utils.Database(conn)[table_name] - kwargs = {} - if upsert: - kwargs = { - "pk": pks[0] if len(pks) == 1 else pks, - "alter": alter, - } - else: - # Insert - kwargs = {"ignore": ignore, "replace": replace, "alter": alter} - if should_return and not upsert: - rowids = [] - method = table.upsert if upsert else table.insert - for row in rows: - rowids.append(method(row, **kwargs).last_rowid) - return list( - table.rows_where( - "rowid in ({})".format(",".join("?" for _ in rowids)), - rowids, - ) - ) - else: - method_all = table.upsert_all if upsert else table.insert_all - method_all(rows, **kwargs) - - try: - rows = await db.execute_write_fn(insert_or_upsert_rows) - except Exception as e: - return _error([str(e)]) - result = {"ok": True} - if should_return: - if upsert: - # Fetch based on initial input IDs - where_clause = " OR ".join( - ["({})".format(" AND ".join("{} = ?".format(pk) for pk in pks))] - * len(row_pk_values_for_later) - ) - args = list(itertools.chain.from_iterable(row_pk_values_for_later)) - fetched_rows = await db.execute( - "select {}* from [{}] where {}".format( - "rowid, " if pks == ["rowid"] else "", table_name, where_clause - ), - args, - ) - result["rows"] = fetched_rows.dicts() - else: - result["rows"] = rows - # We track the number of rows requested, but do not attempt to show which were actually - # inserted or upserted v.s. ignored - if upsert: - await self.ds.track_event( - UpsertRowsEvent( - actor=request.actor, - database=database_name, - table=table_name, - num_rows=num_rows, - ) - ) - else: - await self.ds.track_event( - InsertRowsEvent( - actor=request.actor, - database=database_name, - table=table_name, - num_rows=num_rows, - ignore=bool(ignore), - replace=bool(replace), - ) - ) - - if initial_schema is not None: - after_schema = await db.execute_fn( - lambda conn: sqlite_utils.Database(conn)[table_name].schema - ) - if initial_schema != after_schema: - await self.ds.track_event( - AlterTableEvent( - request.actor, - database=database_name, - table=table_name, - before_schema=initial_schema, - after_schema=after_schema, - ) - ) - - return Response.json(result, status=200 if upsert else 201) - - -class TableUpsertView(TableInsertView): - name = "table-upsert" - - async def post(self, request): - return await super().post(request, upsert=True) - - -class TableDropView(BaseView): - name = "table-drop" - - def __init__(self, datasette): - self.ds = datasette - - async def post(self, request): - try: - resolved = await self.ds.resolve_table(request) - except NotFound as e: - return _error([e.args[0]], 404) - db = resolved.db - database_name = db.name - table_name = resolved.table - # Table must exist - db = self.ds.get_database(database_name) - if not await db.table_exists(table_name): - return _error(["Table not found: {}".format(table_name)], 404) - if not await self.ds.allowed( - action="drop-table", - resource=TableResource(database=database_name, table=table_name), - actor=request.actor, - ): - return _error(["Permission denied"], 403) - if not db.is_mutable: - return _error(["Database is immutable"], 403) - confirm = False - try: - data = json.loads(await request.post_body()) - confirm = data.get("confirm") - except json.JSONDecodeError: - pass - - if not confirm: - return Response.json( - { - "ok": True, - "database": database_name, - "table": table_name, - "row_count": ( - await db.execute("select count(*) from [{}]".format(table_name)) - ).single_value(), - "message": 'Pass "confirm": true to confirm', - }, - status=200, - ) - - # Drop table - def drop_table(conn): - sqlite_utils.Database(conn)[table_name].drop() - - await db.execute_write_fn(drop_table) - await self.ds.track_event( - DropTableEvent( - actor=request.actor, database=database_name, table=table_name - ) - ) - return Response.json({"ok": True}, status=200) - - -def _get_extras(request): - extra_bits = request.args.getlist("_extra") - extras = set() - for bit in extra_bits: - extras.update(bit.split(",")) - return extras - - -async def _columns_to_select(table_columns, pks, request): - columns = list(table_columns) - if "_col" in request.args: - columns = list(pks) - _cols = request.args.getlist("_col") - bad_columns = [column for column in _cols if column not in table_columns] - if bad_columns: - raise DatasetteError( - "_col={} - invalid columns".format(", ".join(bad_columns)), - status=400, - ) - # De-duplicate maintaining order: - columns.extend(dict.fromkeys(_cols)) - if "_nocol" in request.args: - # Return all columns EXCEPT these - bad_columns = [ - column - for column in request.args.getlist("_nocol") - if (column not in table_columns) or (column in pks) - ] - if bad_columns: - raise DatasetteError( - "_nocol={} - invalid columns".format(", ".join(bad_columns)), - status=400, - ) - tmp_columns = [ - column for column in columns if column not in request.args.getlist("_nocol") - ] - columns = tmp_columns - return columns - - -async def _sortable_columns_for_table(datasette, database_name, table_name, use_rowid): - db = datasette.databases[database_name] - table_metadata = await datasette.table_config(database_name, table_name) - if "sortable_columns" in table_metadata: - sortable_columns = set(table_metadata["sortable_columns"]) - else: - sortable_columns = set(await db.table_columns(table_name)) - if use_rowid: - sortable_columns.add("rowid") - return sortable_columns - - -async def _sort_order(table_metadata, sortable_columns, request, order_by): - sort = request.args.get("_sort") - sort_desc = request.args.get("_sort_desc") - - if not sort and not sort_desc: - sort = table_metadata.get("sort") - sort_desc = table_metadata.get("sort_desc") - - if sort and sort_desc: - raise DatasetteError( - "Cannot use _sort and _sort_desc at the same time", status=400 - ) - - if sort: - if sort not in sortable_columns: - raise DatasetteError(f"Cannot sort table by {sort}", status=400) - - order_by = escape_sqlite(sort) - - if sort_desc: - if sort_desc not in sortable_columns: - raise DatasetteError(f"Cannot sort table by {sort_desc}", status=400) - - order_by = f"{escape_sqlite(sort_desc)} desc" - - return sort, sort_desc, order_by - - -async def table_view(datasette, request): - await datasette.refresh_schemas() - with tracer.trace_child_tasks(): - response = await table_view_traced(datasette, request) - - # CORS - if datasette.cors: - add_cors_headers(response.headers) - - # Cache TTL header - ttl = request.args.get("_ttl", None) - if ttl is None or not ttl.isdigit(): - ttl = datasette.setting("default_cache_ttl") - - if datasette.cache_headers and response.status == 200: - ttl = int(ttl) - if ttl == 0: - ttl_header = "no-cache" - else: - ttl_header = f"max-age={ttl}" - response.headers["Cache-Control"] = ttl_header - - # Referrer policy - response.headers["Referrer-Policy"] = "no-referrer" - - return response - - -async def table_view_traced(datasette, request): - from datasette.app import TableNotFound - - try: - resolved = await datasette.resolve_table(request) - except TableNotFound as not_found: - # Was this actually a canned query? - canned_query = await datasette.get_canned_query( - not_found.database_name, not_found.table, request.actor - ) - # If this is a canned query, not a table, then dispatch to QueryView instead - if canned_query: - return await QueryView()(request, datasette) - else: - raise - - if request.method == "POST": - return Response.text("Method not allowed", status=405) - - format_ = request.url_vars.get("format") or "html" - extra_extras = None - context_for_html_hack = False - default_labels = False - if format_ == "html": - extra_extras = {"_html"} - context_for_html_hack = True - default_labels = True - - view_data = await table_view_data( - datasette, - request, - resolved, - extra_extras=extra_extras, - context_for_html_hack=context_for_html_hack, - default_labels=default_labels, - ) - if isinstance(view_data, Response): - return view_data - data, rows, columns, expanded_columns, sql, next_url = view_data - - # Handle formats from plugins - if format_ == "csv": - - async def fetch_data(request, _next=None): - ( - data, - rows, - columns, - expanded_columns, - sql, - next_url, - ) = await table_view_data( - datasette, - request, - resolved, - extra_extras=extra_extras, - context_for_html_hack=context_for_html_hack, - default_labels=default_labels, - _next=_next, - ) - data["rows"] = rows - data["table"] = resolved.table - data["columns"] = columns - data["expanded_columns"] = expanded_columns - return data, None, None - - return await stream_csv(datasette, fetch_data, request, resolved.db.name) - elif format_ in datasette.renderers.keys(): - # Dispatch request to the correct output format renderer - # (CSV is not handled here due to streaming) - result = call_with_supported_arguments( - datasette.renderers[format_][0], - datasette=datasette, - columns=columns, - rows=rows, - sql=sql, - query_name=None, - database=resolved.db.name, - table=resolved.table, - request=request, - view_name="table", - truncated=False, - error=None, - # These will be deprecated in Datasette 1.0: - args=request.args, - data=data, - ) - if asyncio.iscoroutine(result): - result = await result - if result is None: - raise NotFound("No data") - if isinstance(result, dict): - r = Response( - body=result.get("body"), - status=result.get("status_code") or 200, - content_type=result.get("content_type", "text/plain"), - headers=result.get("headers"), - ) - elif isinstance(result, Response): - r = result - # if status_code is not None: - # # Over-ride the status code - # r.status = status_code - else: - assert False, f"{result} should be dict or Response" - elif format_ == "html": - headers = {} - templates = [ - f"table-{to_css_class(resolved.db.name)}-{to_css_class(resolved.table)}.html", - "table.html", - ] - environment = datasette.get_jinja_environment(request) - template = environment.select_template(templates) - alternate_url_json = datasette.absolute_url( - request, - datasette.urls.path(path_with_format(request=request, format="json")), - ) - headers.update( - { - "Link": '<{}>; rel="alternate"; type="application/json+datasette"'.format( - alternate_url_json - ) - } - ) - r = Response.html( - await datasette.render_template( - template, - dict( - data, - append_querystring=append_querystring, - path_with_replaced_args=path_with_replaced_args, - fix_path=datasette.urls.path, - settings=datasette.settings_dict(), - # TODO: review up all of these hacks: - alternate_url_json=alternate_url_json, - datasette_allow_facet=( - "true" if datasette.setting("allow_facet") else "false" - ), - is_sortable=any(c["sortable"] for c in data["display_columns"]), - allow_execute_sql=await datasette.allowed( - action="execute-sql", - resource=DatabaseResource(database=resolved.db.name), - actor=request.actor, - ), - query_ms=1.2, - select_templates=[ - f"{'*' if template_name == template.name else ''}{template_name}" - for template_name in templates - ], - top_table=make_slot_function( - "top_table", - datasette, - request, - database=resolved.db.name, - table=resolved.table, - ), - count_limit=resolved.db.count_limit, - ), - request=request, - view_name="table", - ), - headers=headers, - ) - else: - assert False, "Invalid format: {}".format(format_) - if next_url: - r.headers["link"] = f'<{next_url}>; rel="next"' - return r - - -async def table_view_data( - datasette, - request, - resolved, - extra_extras=None, - context_for_html_hack=False, - default_labels=False, - _next=None, -): - extra_extras = extra_extras or set() - # We have a table or view - db = resolved.db - database_name = resolved.db.name - table_name = resolved.table - is_view = resolved.is_view - - # Can this user view it? - visible, private = await datasette.check_visibility( - request.actor, - action="view-table", - resource=TableResource(database=database_name, table=table_name), - ) - if not visible: - raise Forbidden("You do not have permission to view this table") - - # Redirect based on request.args, if necessary - redirect_response = await _redirect_if_needed(datasette, request, resolved) - if redirect_response: - return redirect_response - - # Introspect columns and primary keys for table - pks = await db.primary_keys(table_name) - table_columns = await db.table_columns(table_name) - - # Take ?_col= and ?_nocol= into account - specified_columns = await _columns_to_select(table_columns, pks, request) - select_specified_columns = ", ".join(escape_sqlite(t) for t in specified_columns) - select_all_columns = ", ".join(escape_sqlite(t) for t in table_columns) - - # rowid tables (no specified primary key) need a different SELECT - use_rowid = not pks and not is_view - order_by = "" - if use_rowid: - select_specified_columns = f"rowid, {select_specified_columns}" - select_all_columns = f"rowid, {select_all_columns}" - order_by = "rowid" - order_by_pks = "rowid" - else: - order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) - order_by = order_by_pks - - if is_view: - order_by = "" - - # TODO: This logic should turn into logic about which ?_extras get - # executed instead: - nocount = request.args.get("_nocount") - nofacet = request.args.get("_nofacet") - nosuggest = request.args.get("_nosuggest") - if request.args.get("_shape") in ("array", "object"): - nocount = True - nofacet = True - - table_metadata = await datasette.table_config(database_name, table_name) - - # Arguments that start with _ and don't contain a __ are - # special - things like ?_search= - and should not be - # treated as filters. - filter_args = [] - for key in request.args: - if not (key.startswith("_") and "__" not in key): - for v in request.args.getlist(key): - filter_args.append((key, v)) - - # Build where clauses from query string arguments - filters = Filters(sorted(filter_args)) - where_clauses, params = filters.build_where_clauses(table_name) - - # Execute filters_from_request plugin hooks - including the default - # ones that live in datasette/filters.py - extra_context_from_filters = {} - extra_human_descriptions = [] - - for hook in pm.hook.filters_from_request( - request=request, - table=table_name, - database=database_name, - datasette=datasette, - ): - filter_arguments = await await_me_maybe(hook) - if filter_arguments: - where_clauses.extend(filter_arguments.where_clauses) - params.update(filter_arguments.params) - extra_human_descriptions.extend(filter_arguments.human_descriptions) - extra_context_from_filters.update(filter_arguments.extra_context) - - # Deal with custom sort orders - sortable_columns = await _sortable_columns_for_table( - datasette, database_name, table_name, use_rowid - ) - - sort, sort_desc, order_by = await _sort_order( - table_metadata, sortable_columns, request, order_by - ) - - from_sql = "from {table_name} {where}".format( - table_name=escape_sqlite(table_name), - where=( - ("where {} ".format(" and ".join(where_clauses))) if where_clauses else "" - ), - ) - # Copy of params so we can mutate them later: - from_sql_params = dict(**params) - - count_sql = f"select count(*) {from_sql}" - - # Handle pagination driven by ?_next= - _next = _next or request.args.get("_next") - - offset = "" - if _next: - sort_value = None - if is_view: - # _next is an offset - offset = f" offset {int(_next)}" - else: - components = urlsafe_components(_next) - # If a sort order is applied and there are multiple components, - # the first of these is the sort value - if (sort or sort_desc) and (len(components) > 1): - sort_value = components[0] - # Special case for if non-urlencoded first token was $null - if _next.split(",")[0] == "$null": - sort_value = None - components = components[1:] - - # Figure out the SQL for next-based-on-primary-key first - next_by_pk_clauses = [] - if use_rowid: - next_by_pk_clauses.append(f"rowid > :p{len(params)}") - params[f"p{len(params)}"] = components[0] - else: - # Apply the tie-breaker based on primary keys - if len(components) == len(pks): - param_len = len(params) - next_by_pk_clauses.append(compound_keys_after_sql(pks, param_len)) - for i, pk_value in enumerate(components): - params[f"p{param_len + i}"] = pk_value - - # Now add the sort SQL, which may incorporate next_by_pk_clauses - if sort or sort_desc: - if sort_value is None: - if sort_desc: - # Just items where column is null ordered by pk - where_clauses.append( - "({column} is null and {next_clauses})".format( - column=escape_sqlite(sort_desc), - next_clauses=" and ".join(next_by_pk_clauses), - ) - ) - else: - where_clauses.append( - "({column} is not null or ({column} is null and {next_clauses}))".format( - column=escape_sqlite(sort), - next_clauses=" and ".join(next_by_pk_clauses), - ) - ) - else: - where_clauses.append( - "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))".format( - column=escape_sqlite(sort or sort_desc), - op=">" if sort else "<", - p=len(params), - extra_desc_only=( - "" - if sort - else " or {column2} is null".format( - column2=escape_sqlite(sort or sort_desc) - ) - ), - next_clauses=" and ".join(next_by_pk_clauses), - ) - ) - params[f"p{len(params)}"] = sort_value - order_by = f"{order_by}, {order_by_pks}" - else: - where_clauses.extend(next_by_pk_clauses) - - where_clause = "" - if where_clauses: - where_clause = f"where {' and '.join(where_clauses)} " - - if order_by: - order_by = f"order by {order_by}" - - extra_args = {} - # Handle ?_size=500 - # TODO: This was: - # page_size = _size or request.args.get("_size") or table_metadata.get("size") - page_size = request.args.get("_size") or table_metadata.get("size") - if page_size: - if page_size == "max": - page_size = datasette.max_returned_rows - try: - page_size = int(page_size) - if page_size < 0: - raise ValueError - - except ValueError: - raise BadRequest("_size must be a positive integer") - - if page_size > datasette.max_returned_rows: - raise BadRequest(f"_size must be <= {datasette.max_returned_rows}") - - extra_args["page_size"] = page_size - else: - page_size = datasette.page_size - - # Facets are calculated against SQL without order by or limit - sql_no_order_no_limit = ( - "select {select_all_columns} from {table_name} {where}".format( - select_all_columns=select_all_columns, - table_name=escape_sqlite(table_name), - where=where_clause, - ) - ) - - # This is the SQL that populates the main table on the page - sql = "select {select_specified_columns} from {table_name} {where}{order_by} limit {page_size}{offset}".format( - select_specified_columns=select_specified_columns, - table_name=escape_sqlite(table_name), - where=where_clause, - order_by=order_by, - page_size=page_size + 1, - offset=offset, - ) - - if request.args.get("_timelimit"): - extra_args["custom_time_limit"] = int(request.args.get("_timelimit")) - - # Execute the main query! - try: - results = await db.execute(sql, params, truncate=True, **extra_args) - except (sqlite3.OperationalError, InvalidSql) as e: - raise DatasetteError(str(e), title="Invalid SQL", status=400) - - except sqlite3.OperationalError as e: - raise DatasetteError(str(e)) - - columns = [r[0] for r in results.description] - rows = list(results.rows) - - # Expand labeled columns if requested - expanded_columns = [] - # List of (fk_dict, label_column-or-None) pairs for that table - expandable_columns = [] - for fk in await db.foreign_keys_for_table(table_name): - label_column = await db.label_column_for_table(fk["other_table"]) - expandable_columns.append((fk, label_column)) - - columns_to_expand = None - try: - all_labels = value_as_boolean(request.args.get("_labels", "")) - except ValueError: - all_labels = default_labels - # Check for explicit _label= - if "_label" in request.args: - columns_to_expand = request.args.getlist("_label") - if columns_to_expand is None and all_labels: - # expand all columns with foreign keys - columns_to_expand = [fk["column"] for fk, _ in expandable_columns] - - if columns_to_expand: - expanded_labels = {} - for fk, _ in expandable_columns: - column = fk["column"] - if column not in columns_to_expand: - continue - if column not in columns: - continue - expanded_columns.append(column) - # Gather the values - column_index = columns.index(column) - values = [row[column_index] for row in rows] - # Expand them - expanded_labels.update( - await datasette.expand_foreign_keys( - request.actor, database_name, table_name, column, values - ) - ) - if expanded_labels: - # Rewrite the rows - new_rows = [] - for row in rows: - new_row = CustomRow(columns) - for column in row.keys(): - value = row[column] - if (column, value) in expanded_labels and value is not None: - new_row[column] = { - "value": value, - "label": expanded_labels[(column, value)], - } - else: - new_row[column] = value - new_rows.append(new_row) - rows = new_rows - - _next = request.args.get("_next") - - # Pagination next link - next_value, next_url = await _next_value_and_url( - datasette, - db, - request, - table_name, - _next, - rows, - pks, - use_rowid, - sort, - sort_desc, - page_size, - is_view, - ) - rows = rows[:page_size] - - # Resolve extras - extras = _get_extras(request) - if any(k for k in request.args.keys() if k == "_facet" or k.startswith("_facet_")): - extras.add("facet_results") - if request.args.get("_shape") == "object": - extras.add("primary_keys") - if extra_extras: - extras.update(extra_extras) - - async def extra_count_sql(): - return count_sql - - async def extra_count(): - "Total count of rows matching these filters" - # Calculate the total count for this query - count = None - if ( - not db.is_mutable - and datasette.inspect_data - and count_sql == f"select count(*) from {table_name} " - ): - # We can use a previously cached table row count - try: - count = datasette.inspect_data[database_name]["tables"][table_name][ - "count" - ] - except KeyError: - pass - - # Otherwise run a select count(*) ... - if count_sql and count is None and not nocount: - count_sql_limited = ( - f"select count(*) from (select * {from_sql} limit 10001)" - ) - try: - count_rows = list(await db.execute(count_sql_limited, from_sql_params)) - count = count_rows[0][0] - except QueryInterrupted: - pass - return count - - async def facet_instances(extra_count): - facet_instances = [] - facet_classes = list( - itertools.chain.from_iterable(pm.hook.register_facet_classes()) - ) - for facet_class in facet_classes: - facet_instances.append( - facet_class( - datasette, - request, - database_name, - sql=sql_no_order_no_limit, - params=params, - table=table_name, - table_config=table_metadata, - row_count=extra_count, - ) - ) - return facet_instances - - async def extra_facet_results(facet_instances): - "Results of facets calculated against this data" - facet_results = {} - facets_timed_out = [] - - if not nofacet: - # Run them in parallel - facet_awaitables = [facet.facet_results() for facet in facet_instances] - facet_awaitable_results = await run_sequential(*facet_awaitables) - for ( - instance_facet_results, - instance_facets_timed_out, - ) in facet_awaitable_results: - for facet_info in instance_facet_results: - base_key = facet_info["name"] - key = base_key - i = 1 - while key in facet_results: - i += 1 - key = f"{base_key}_{i}" - facet_results[key] = facet_info - facets_timed_out.extend(instance_facets_timed_out) - - return { - "results": facet_results, - "timed_out": facets_timed_out, - } - - async def extra_suggested_facets(facet_instances): - "Suggestions for facets that might return interesting results" - suggested_facets = [] - # Calculate suggested facets - if ( - datasette.setting("suggest_facets") - and datasette.setting("allow_facet") - and not _next - and not nofacet - and not nosuggest - ): - # Run them in parallel - facet_suggest_awaitables = [facet.suggest() for facet in facet_instances] - for suggest_result in await run_sequential(*facet_suggest_awaitables): - suggested_facets.extend(suggest_result) - return suggested_facets - - # Faceting - if not datasette.setting("allow_facet") and any( - arg.startswith("_facet") for arg in request.args - ): - raise BadRequest("_facet= is not allowed") - - # human_description_en combines filters AND search, if provided - async def extra_human_description_en(): - "Human-readable description of the filters" - human_description_en = filters.human_description_en( - extra=extra_human_descriptions - ) - if sort or sort_desc: - human_description_en = " ".join( - [b for b in [human_description_en, sorted_by] if b] - ) - return human_description_en - - if sort or sort_desc: - sorted_by = "sorted by {}{}".format( - (sort or sort_desc), " descending" if sort_desc else "" - ) - - async def extra_next_url(): - "Full URL for the next page of results" - return next_url - - async def extra_columns(): - "Column names returned by this query" - return columns - - async def extra_primary_keys(): - "Primary keys for this table" - return pks - - async def extra_actions(): - async def actions(): - links = [] - kwargs = { - "datasette": datasette, - "database": database_name, - "actor": request.actor, - "request": request, - } - if is_view: - kwargs["view"] = table_name - method = pm.hook.view_actions - else: - kwargs["table"] = table_name - method = pm.hook.table_actions - for hook in method(**kwargs): - extra_links = await await_me_maybe(hook) - if extra_links: - links.extend(extra_links) - return links - - return actions - - async def extra_is_view(): - return is_view - - async def extra_debug(): - "Extra debug information" - return { - "resolved": repr(resolved), - "url_vars": request.url_vars, - "nofacet": nofacet, - "nosuggest": nosuggest, - } - - async def extra_request(): - "Full information about the request" - return { - "url": request.url, - "path": request.path, - "full_path": request.full_path, - "host": request.host, - "args": request.args._data, - } - - async def run_display_columns_and_rows(): - display_columns, display_rows = await display_columns_and_rows( - datasette, - database_name, - table_name, - results.description, - rows, - link_column=not is_view, - truncate_cells=datasette.setting("truncate_cells_html"), - sortable_columns=sortable_columns, - request=request, - ) - return { - "columns": display_columns, - "rows": display_rows, - } - - async def extra_display_columns(run_display_columns_and_rows): - return run_display_columns_and_rows["columns"] - - async def extra_display_rows(run_display_columns_and_rows): - return run_display_columns_and_rows["rows"] - - async def extra_query(): - "Details of the underlying SQL query" - return { - "sql": sql, - "params": params, - } - - async def extra_metadata(): - "Metadata about the table and database" - tablemetadata = await datasette.get_resource_metadata(database_name, table_name) - - rows = await datasette.get_internal_database().execute( - """ - SELECT - column_name, - value - FROM metadata_columns - WHERE database_name = ? - AND resource_name = ? - AND key = 'description' - """, - [database_name, table_name], - ) - tablemetadata["columns"] = dict(rows) - return tablemetadata - - async def extra_database(): - return database_name - - async def extra_table(): - return table_name - - async def extra_database_color(): - return db.color - - async def extra_form_hidden_args(): - form_hidden_args = [] - for key in request.args: - if ( - key.startswith("_") - and key not in ("_sort", "_sort_desc", "_search", "_next") - and "__" not in key - ): - for value in request.args.getlist(key): - form_hidden_args.append((key, value)) - return form_hidden_args - - async def extra_filters(): - return filters - - async def extra_custom_table_templates(): - return [ - f"_table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", - f"_table-table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", - "_table.html", - ] - - async def extra_sorted_facet_results(extra_facet_results): - return sorted( - extra_facet_results["results"].values(), - key=lambda f: (len(f["results"]), f["name"]), - reverse=True, - ) - - async def extra_table_definition(): - return await db.get_table_definition(table_name) - - async def extra_view_definition(): - return await db.get_view_definition(table_name) - - async def extra_renderers(extra_expandable_columns, extra_query): - renderers = {} - url_labels_extra = {} - if extra_expandable_columns: - url_labels_extra = {"_labels": "on"} - for key, (_, can_render) in datasette.renderers.items(): - it_can_render = call_with_supported_arguments( - can_render, - datasette=datasette, - columns=columns or [], - rows=rows or [], - sql=extra_query.get("sql", None), - query_name=None, - database=database_name, - table=table_name, - request=request, - view_name="table", - ) - it_can_render = await await_me_maybe(it_can_render) - if it_can_render: - renderers[key] = datasette.urls.path( - path_with_format( - request=request, format=key, extra_qs={**url_labels_extra} - ) - ) - return renderers - - async def extra_private(): - return private - - async def extra_expandable_columns(): + async def expandable_columns(self, database, table): + # Returns list of (fk_dict, label_column-or-None) pairs for that table expandables = [] - db = datasette.databases[database_name] - for fk in await db.foreign_keys_for_table(table_name): + db = self.ds.databases[database] + for fk in await db.foreign_keys_for_table(table): label_column = await db.label_column_for_table(fk["other_table"]) expandables.append((fk, label_column)) return expandables - async def extra_extras(): - "Available ?_extra= blocks" - all_extras = [ - (key[len("extra_") :], fn.__doc__) - for key, fn in registry._registry.items() - if key.startswith("extra_") + async def display_columns_and_rows( + self, database, table, description, rows, link_column=False, truncate_cells=0 + ): + "Returns columns, rows for specified table - including fancy foreign key treatment" + db = self.ds.databases[database] + table_metadata = self.ds.table_metadata(database, table) + sortable_columns = await self.sortable_columns_for_table(database, table, True) + columns = [ + {"name": r[0], "sortable": r[0] in sortable_columns} for r in description ] - return [ - { - "name": name, - "description": doc, - "toggle_url": datasette.absolute_url( - request, - datasette.urls.path( - path_with_added_args(request, {"_extra": name}) - if name not in extras - else path_with_removed_args(request, {"_extra": name}) - ), - ), - "selected": name in extras, - } - for name, doc in all_extras - ] - - async def extra_facets_timed_out(extra_facet_results): - return extra_facet_results["timed_out"] - - bundles = { - "html": [ - "suggested_facets", - "facet_results", - "facets_timed_out", - "count", - "count_sql", - "human_description_en", - "next_url", - "metadata", - "query", - "columns", - "display_columns", - "display_rows", - "database", - "table", - "database_color", - "actions", - "filters", - "renderers", - "custom_table_templates", - "sorted_facet_results", - "table_definition", - "view_definition", - "is_view", - "private", - "primary_keys", - "expandable_columns", - "form_hidden_args", - ] - } - - for key, values in bundles.items(): - if f"_{key}" in extras: - extras.update(values) - extras.discard(f"_{key}") - - registry = Registry( - extra_count, - extra_count_sql, - extra_facet_results, - extra_facets_timed_out, - extra_suggested_facets, - facet_instances, - extra_human_description_en, - extra_next_url, - extra_columns, - extra_primary_keys, - run_display_columns_and_rows, - extra_display_columns, - extra_display_rows, - extra_debug, - extra_request, - extra_query, - extra_metadata, - extra_extras, - extra_database, - extra_table, - extra_database_color, - extra_actions, - extra_filters, - extra_renderers, - extra_custom_table_templates, - extra_sorted_facet_results, - extra_table_definition, - extra_view_definition, - extra_is_view, - extra_private, - extra_expandable_columns, - extra_form_hidden_args, - ) - - results = await registry.resolve_multi( - ["extra_{}".format(extra) for extra in extras] - ) - data = { - "ok": True, - "next": next_value and str(next_value) or None, - } - data.update( - { - key.replace("extra_", ""): value - for key, value in results.items() - if key.startswith("extra_") and key.replace("extra_", "") in extras + pks = await db.primary_keys(table) + column_to_foreign_key_table = { + fk["column"]: fk["other_table"] + for fk in await db.foreign_keys_for_table(table) } - ) - raw_sqlite_rows = rows[:page_size] - data["rows"] = [dict(r) for r in raw_sqlite_rows] - if context_for_html_hack: - data.update(extra_context_from_filters) - # filter_columns combine the columns we know are available - # in the table with any additional columns (such as rowid) - # which are available in the query - data["filter_columns"] = list(columns) + [ - table_column - for table_column in table_columns - if table_column not in columns - ] - url_labels_extra = {} - if data.get("expandable_columns"): - url_labels_extra = {"_labels": "on"} - url_csv_args = {"_size": "max", **url_labels_extra} - url_csv = datasette.urls.path( - path_with_format(request=request, format="csv", extra_qs=url_csv_args) - ) - url_csv_path = url_csv.split("?")[0] - data.update( - { - "url_csv": url_csv, - "url_csv_path": url_csv_path, - "url_csv_hidden_args": [ - (key, value) - for key, value in urllib.parse.parse_qsl(request.query_string) - if key not in ("_labels", "_facet", "_size") - ] - + [("_size", "max")], - } - ) - # if no sort specified AND table has a single primary key, - # set sort to that so arrow is displayed - if not sort and not sort_desc: - if 1 == len(pks): - sort = pks[0] - elif use_rowid: - sort = "rowid" - data["sort"] = sort - data["sort_desc"] = sort_desc - - return data, rows[:page_size], columns, expanded_columns, sql, next_url - - -async def _next_value_and_url( - datasette, - db, - request, - table_name, - _next, - rows, - pks, - use_rowid, - sort, - sort_desc, - page_size, - is_view, -): - next_value = None - next_url = None - if 0 < page_size < len(rows): - if is_view: - next_value = int(_next or 0) + page_size - else: - next_value = path_from_row_pks(rows[-2], pks, use_rowid) - # If there's a sort or sort_desc, add that value as a prefix - if (sort or sort_desc) and not is_view: - try: - prefix = rows[-2][sort or sort_desc] - except IndexError: - # sort/sort_desc column missing from SELECT - look up value by PK instead - prefix_where_clause = " and ".join( - "[{}] = :pk{}".format(pk, i) for i, pk in enumerate(pks) + cell_rows = [] + for row in rows: + cells = [] + # Unless we are a view, the first column is a link - either to the rowid + # or to the simple or compound primary key + if link_column: + is_special_link_column = len(pks) != 1 + pk_path = path_from_row_pks(row, pks, not pks, False) + cells.append( + { + "column": pks[0] if len(pks) == 1 else "Link", + "is_special_link_column": is_special_link_column, + "raw": pk_path, + "value": jinja2.Markup( + '{flat_pks}'.format( + database=database, + table=urllib.parse.quote_plus(table), + flat_pks=str(jinja2.escape(pk_path)), + flat_pks_quoted=path_from_row_pks(row, pks, not pks), + ) + ), + } ) - prefix_lookup_sql = "select [{}] from [{}] where {}".format( - sort or sort_desc, table_name, prefix_where_clause + + for value, column_dict in zip(row, columns): + column = column_dict["name"] + if link_column and len(pks) == 1 and column == pks[0]: + # If there's a simple primary key, don't repeat the value as it's + # already shown in the link column. + continue + + # First let the plugins have a go + # pylint: disable=no-member + plugin_display_value = pm.hook.render_cell( + value=value, + column=column, + table=table, + database=database, + datasette=self.ds, ) - prefix = ( - await db.execute( - prefix_lookup_sql, - { - **{ - "pk{}".format(i): rows[-2][pk] - for i, pk in enumerate(pks) - } - }, + if plugin_display_value is not None: + display_value = plugin_display_value + elif isinstance(value, bytes): + display_value = jinja2.Markup( + "<Binary data: {} byte{}>".format( + len(value), "" if len(value) == 1 else "s" + ) ) - ).single_value() - if isinstance(prefix, dict) and "value" in prefix: - prefix = prefix["value"] - if prefix is None: - prefix = "$null" - else: - prefix = tilde_encode(str(prefix)) - next_value = f"{prefix},{next_value}" - added_args = {"_next": next_value} - if sort: - added_args["_sort"] = sort - else: - added_args["_sort_desc"] = sort_desc + elif isinstance(value, dict): + # It's an expanded foreign key - display link to other row + label = value["label"] + value = value["value"] + # The table we link to depends on the column + other_table = column_to_foreign_key_table[column] + link_template = ( + LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE + ) + display_value = jinja2.Markup( + link_template.format( + database=database, + table=urllib.parse.quote_plus(other_table), + link_id=urllib.parse.quote_plus(str(value)), + id=str(jinja2.escape(value)), + label=str(jinja2.escape(label)), + ) + ) + elif value in ("", None): + display_value = jinja2.Markup(" ") + elif is_url(str(value).strip()): + display_value = jinja2.Markup( + '{url}'.format( + url=jinja2.escape(value.strip()) + ) + ) + elif column in table_metadata.get("units", {}) and value != "": + # Interpret units using pint + value = value * ureg(table_metadata["units"][column]) + # Pint uses floating point which sometimes introduces errors in the compact + # representation, which we have to round off to avoid ugliness. In the vast + # majority of cases this rounding will be inconsequential. I hope. + value = round(value.to_compact(), 6) + display_value = jinja2.Markup( + "{:~P}".format(value).replace(" ", " ") + ) + else: + display_value = str(value) + if truncate_cells and len(display_value) > truncate_cells: + display_value = display_value[:truncate_cells] + u"\u2026" + + cells.append({"column": column, "value": display_value, "raw": value}) + cell_rows.append(Row(cells)) + + if link_column: + # Add the link column header. + # If it's a simple primary key, we have to remove and re-add that column name at + # the beginning of the header row. + if len(pks) == 1: + columns = [col for col in columns if col["name"] != pks[0]] + + columns = [ + {"name": pks[0] if len(pks) == 1 else "Link", "sortable": len(pks) == 1} + ] + columns + return columns, cell_rows + + +class TableView(RowTableShared): + name = "table" + + async def data( + self, + request, + database, + hash, + table, + default_labels=False, + _next=None, + _size=None, + ): + canned_query = self.ds.get_canned_query(database, table) + if canned_query is not None: + return await self.custom_sql( + request, + database, + hash, + canned_query["sql"], + metadata=canned_query, + editable=False, + canned_query=table, + ) + db = self.ds.databases[database] + is_view = bool(await db.get_view_definition(table)) + table_exists = bool(await db.table_exists(table)) + if not is_view and not table_exists: + raise NotFound("Table not found: {}".format(table)) + + pks = await db.primary_keys(table) + use_rowid = not pks and not is_view + if use_rowid: + select = "rowid, *" + order_by = "rowid" + order_by_pks = "rowid" else: - added_args = {"_next": next_value} - next_url = datasette.absolute_url( - request, datasette.urls.path(path_with_replaced_args(request, added_args)) + select = "*" + order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) + order_by = order_by_pks + + if is_view: + order_by = "" + + # Ensure we don't drop anything with an empty value e.g. ?name__exact= + args = RequestParameters( + urllib.parse.parse_qs(request.query_string, keep_blank_values=True) ) - return next_value, next_url + + # Special args start with _ and do not contain a __ + # That's so if there is a column that starts with _ + # it can still be queried using ?_col__exact=blah + special_args = {} + special_args_lists = {} + other_args = [] + for key, value in args.items(): + if key.startswith("_") and "__" not in key: + special_args[key] = value[0] + special_args_lists[key] = value + else: + for v in value: + other_args.append((key, v)) + + # Handle ?_filter_column and redirect, if present + redirect_params = filters_should_redirect(special_args) + if redirect_params: + return self.redirect( + request, + path_with_added_args(request, redirect_params), + forward_querystring=False, + ) + + # Spot ?_sort_by_desc and redirect to _sort_desc=(_sort) + if "_sort_by_desc" in special_args: + return self.redirect( + request, + path_with_added_args( + request, + { + "_sort_desc": special_args.get("_sort"), + "_sort_by_desc": None, + "_sort": None, + }, + ), + forward_querystring=False, + ) + + table_metadata = self.ds.table_metadata(database, table) + units = table_metadata.get("units", {}) + filters = Filters(sorted(other_args), units, ureg) + where_clauses, params = filters.build_where_clauses(table) + + extra_wheres_for_ui = [] + # Add _where= from querystring + if "_where" in request.args: + if not self.ds.config("allow_sql"): + raise DatasetteError("_where= is not allowed", status=400) + else: + where_clauses.extend(request.args["_where"]) + extra_wheres_for_ui = [ + { + "text": text, + "remove_url": path_with_removed_args(request, {"_where": text}), + } + for text in request.args["_where"] + ] + + # Support for ?_through={table, column, value} + extra_human_descriptions = [] + if "_through" in request.args: + for through in request.args["_through"]: + through_data = json.loads(through) + through_table = through_data["table"] + other_column = through_data["column"] + value = through_data["value"] + outgoing_foreign_keys = await db.get_outbound_foreign_keys( + through_table + ) + try: + fk_to_us = [ + fk for fk in outgoing_foreign_keys if fk["other_table"] == table + ][0] + except IndexError: + raise DatasetteError( + "Invalid _through - could not find corresponding foreign key" + ) + param = "p{}".format(len(params)) + where_clauses.append( + "{our_pk} in (select {our_column} from {through_table} where {other_column} = :{param})".format( + through_table=escape_sqlite(through_table), + our_pk=escape_sqlite(fk_to_us["other_column"]), + our_column=escape_sqlite(fk_to_us["column"]), + other_column=escape_sqlite(other_column), + param=param, + ) + ) + params[param] = value + extra_human_descriptions.append( + '{}.{} = "{}"'.format(through_table, other_column, value) + ) + + # _search support: + fts_table = special_args.get("_fts_table") + fts_table = fts_table or table_metadata.get("fts_table") + fts_table = fts_table or await db.fts_table(table) + fts_pk = special_args.get("_fts_pk", table_metadata.get("fts_pk", "rowid")) + search_args = dict( + pair for pair in special_args.items() if pair[0].startswith("_search") + ) + search = "" + if fts_table and search_args: + if "_search" in search_args: + # Simple ?_search=xxx + search = search_args["_search"] + where_clauses.append( + "{fts_pk} in (select rowid from {fts_table} where {fts_table} match :search)".format( + fts_table=escape_sqlite(fts_table), fts_pk=escape_sqlite(fts_pk) + ) + ) + extra_human_descriptions.append('search matches "{}"'.format(search)) + params["search"] = search + else: + # More complex: search against specific columns + for i, (key, search_text) in enumerate(search_args.items()): + search_col = key.split("_search_", 1)[1] + if search_col not in await db.table_columns(fts_table): + raise DatasetteError("Cannot search by that column", status=400) + + where_clauses.append( + "rowid in (select rowid from {fts_table} where {search_col} match :search_{i})".format( + fts_table=escape_sqlite(fts_table), + search_col=escape_sqlite(search_col), + i=i, + ) + ) + extra_human_descriptions.append( + 'search column "{}" matches "{}"'.format( + search_col, search_text + ) + ) + params["search_{}".format(i)] = search_text + + sortable_columns = set() + + sortable_columns = await self.sortable_columns_for_table( + database, table, use_rowid + ) + + # Allow for custom sort order + sort = special_args.get("_sort") + if sort: + if sort not in sortable_columns: + raise DatasetteError("Cannot sort table by {}".format(sort)) + + order_by = escape_sqlite(sort) + sort_desc = special_args.get("_sort_desc") + if sort_desc: + if sort_desc not in sortable_columns: + raise DatasetteError("Cannot sort table by {}".format(sort_desc)) + + if sort: + raise DatasetteError("Cannot use _sort and _sort_desc at the same time") + + order_by = "{} desc".format(escape_sqlite(sort_desc)) + + from_sql = "from {table_name} {where}".format( + table_name=escape_sqlite(table), + where=("where {} ".format(" and ".join(where_clauses))) + if where_clauses + else "", + ) + # Copy of params so we can mutate them later: + from_sql_params = dict(**params) + + count_sql = "select count(*) {}".format(from_sql) + + _next = _next or special_args.get("_next") + offset = "" + if _next: + if is_view: + # _next is an offset + offset = " offset {}".format(int(_next)) + else: + components = urlsafe_components(_next) + # If a sort order is applied, the first of these is the sort value + if sort or sort_desc: + sort_value = components[0] + # Special case for if non-urlencoded first token was $null + if _next.split(",")[0] == "$null": + sort_value = None + components = components[1:] + + # Figure out the SQL for next-based-on-primary-key first + next_by_pk_clauses = [] + if use_rowid: + next_by_pk_clauses.append("rowid > :p{}".format(len(params))) + params["p{}".format(len(params))] = components[0] + else: + # Apply the tie-breaker based on primary keys + if len(components) == len(pks): + param_len = len(params) + next_by_pk_clauses.append( + compound_keys_after_sql(pks, param_len) + ) + for i, pk_value in enumerate(components): + params["p{}".format(param_len + i)] = pk_value + + # Now add the sort SQL, which may incorporate next_by_pk_clauses + if sort or sort_desc: + if sort_value is None: + if sort_desc: + # Just items where column is null ordered by pk + where_clauses.append( + "({column} is null and {next_clauses})".format( + column=escape_sqlite(sort_desc), + next_clauses=" and ".join(next_by_pk_clauses), + ) + ) + else: + where_clauses.append( + "({column} is not null or ({column} is null and {next_clauses}))".format( + column=escape_sqlite(sort), + next_clauses=" and ".join(next_by_pk_clauses), + ) + ) + else: + where_clauses.append( + "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))".format( + column=escape_sqlite(sort or sort_desc), + op=">" if sort else "<", + p=len(params), + extra_desc_only="" + if sort + else " or {column2} is null".format( + column2=escape_sqlite(sort or sort_desc) + ), + next_clauses=" and ".join(next_by_pk_clauses), + ) + ) + params["p{}".format(len(params))] = sort_value + order_by = "{}, {}".format(order_by, order_by_pks) + else: + where_clauses.extend(next_by_pk_clauses) + + where_clause = "" + if where_clauses: + where_clause = "where {} ".format(" and ".join(where_clauses)) + + if order_by: + order_by = "order by {} ".format(order_by) + + # _group_count=col1&_group_count=col2 + group_count = special_args_lists.get("_group_count") or [] + if group_count: + sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format( + group_cols=", ".join( + '"{}"'.format(group_count_col) for group_count_col in group_count + ), + table_name=escape_sqlite(table), + where=where_clause, + ) + return await self.custom_sql(request, database, hash, sql, editable=True) + + extra_args = {} + # Handle ?_size=500 + page_size = _size or request.raw_args.get("_size") + if page_size: + if page_size == "max": + page_size = self.ds.max_returned_rows + try: + page_size = int(page_size) + if page_size < 0: + raise ValueError + + except ValueError: + raise DatasetteError("_size must be a positive integer", status=400) + + if page_size > self.ds.max_returned_rows: + raise DatasetteError( + "_size must be <= {}".format(self.ds.max_returned_rows), status=400 + ) + + extra_args["page_size"] = page_size + else: + page_size = self.ds.page_size + + sql_no_limit = "select {select} from {table_name} {where}{order_by}".format( + select=select, + table_name=escape_sqlite(table), + where=where_clause, + order_by=order_by, + ) + sql = "{sql_no_limit} limit {limit}{offset}".format( + sql_no_limit=sql_no_limit.rstrip(), limit=page_size + 1, offset=offset + ) + + if request.raw_args.get("_timelimit"): + extra_args["custom_time_limit"] = int(request.raw_args["_timelimit"]) + + results = await self.ds.execute( + database, sql, params, truncate=True, **extra_args + ) + + # Number of filtered rows in whole set: + filtered_table_rows_count = None + if count_sql: + try: + count_rows = list( + await self.ds.execute(database, count_sql, from_sql_params) + ) + filtered_table_rows_count = count_rows[0][0] + except QueryInterrupted: + pass + + # facets support + if not self.ds.config("allow_facet") and any( + arg.startswith("_facet") for arg in request.args + ): + raise DatasetteError("_facet= is not allowed", status=400) + + # pylint: disable=no-member + facet_classes = list( + itertools.chain.from_iterable(pm.hook.register_facet_classes()) + ) + facet_results = {} + facets_timed_out = [] + facet_instances = [] + for klass in facet_classes: + facet_instances.append( + klass( + self.ds, + request, + database, + sql=sql_no_limit, + params=params, + table=table, + metadata=table_metadata, + row_count=filtered_table_rows_count, + ) + ) + + for facet in facet_instances: + instance_facet_results, instance_facets_timed_out = ( + await facet.facet_results() + ) + facet_results.update(instance_facet_results) + facets_timed_out.extend(instance_facets_timed_out) + + # Figure out columns and rows for the query + columns = [r[0] for r in results.description] + rows = list(results.rows) + + filter_columns = columns[:] + if use_rowid and filter_columns[0] == "rowid": + filter_columns = filter_columns[1:] + + # Expand labeled columns if requested + expanded_columns = [] + expandable_columns = await self.expandable_columns(database, table) + columns_to_expand = None + try: + all_labels = value_as_boolean(special_args.get("_labels", "")) + except ValueError: + all_labels = default_labels + # Check for explicit _label= + if "_label" in request.args: + columns_to_expand = request.args["_label"] + if columns_to_expand is None and all_labels: + # expand all columns with foreign keys + columns_to_expand = [fk["column"] for fk, _ in expandable_columns] + + if columns_to_expand: + expanded_labels = {} + for fk, _ in expandable_columns: + column = fk["column"] + if column not in columns_to_expand: + continue + expanded_columns.append(column) + # Gather the values + column_index = columns.index(column) + values = [row[column_index] for row in rows] + # Expand them + expanded_labels.update( + await self.ds.expand_foreign_keys(database, table, column, values) + ) + if expanded_labels: + # Rewrite the rows + new_rows = [] + for row in rows: + new_row = CustomRow(columns) + for column in row.keys(): + value = row[column] + if (column, value) in expanded_labels: + new_row[column] = { + "value": value, + "label": expanded_labels[(column, value)], + } + else: + new_row[column] = value + new_rows.append(new_row) + rows = new_rows + + # Pagination next link + next_value = None + next_url = None + if len(rows) > page_size and page_size > 0: + if is_view: + next_value = int(_next or 0) + page_size + else: + next_value = path_from_row_pks(rows[-2], pks, use_rowid) + # If there's a sort or sort_desc, add that value as a prefix + if (sort or sort_desc) and not is_view: + prefix = rows[-2][sort or sort_desc] + if isinstance(prefix, dict) and "value" in prefix: + prefix = prefix["value"] + if prefix is None: + prefix = "$null" + else: + prefix = urllib.parse.quote_plus(str(prefix)) + next_value = "{},{}".format(prefix, next_value) + added_args = {"_next": next_value} + if sort: + added_args["_sort"] = sort + else: + added_args["_sort_desc"] = sort_desc + else: + added_args = {"_next": next_value} + next_url = self.ds.absolute_url( + request, path_with_replaced_args(request, added_args) + ) + rows = rows[:page_size] + + # Detect suggested facets + suggested_facets = [] + + if ( + self.ds.config("suggest_facets") + and self.ds.config("allow_facet") + and not _next + ): + for facet in facet_instances: + suggested_facets.extend(await facet.suggest()) + + # human_description_en combines filters AND search, if provided + human_description_en = filters.human_description_en( + extra=extra_human_descriptions + ) + + if sort or sort_desc: + sorted_by = "sorted by {}{}".format( + (sort or sort_desc), " descending" if sort_desc else "" + ) + human_description_en = " ".join( + [b for b in [human_description_en, sorted_by] if b] + ) + + async def extra_template(): + display_columns, display_rows = await self.display_columns_and_rows( + database, + table, + results.description, + rows, + link_column=not is_view, + truncate_cells=self.ds.config("truncate_cells_html"), + ) + metadata = ( + (self.ds.metadata("databases") or {}) + .get(database, {}) + .get("tables", {}) + .get(table, {}) + ) + self.ds.update_with_inherited_metadata(metadata) + form_hidden_args = [] + for arg in ("_fts_table", "_fts_pk"): + if arg in special_args: + form_hidden_args.append((arg, special_args[arg])) + return { + "supports_search": bool(fts_table), + "search": search or "", + "use_rowid": use_rowid, + "filters": filters, + "display_columns": display_columns, + "filter_columns": filter_columns, + "display_rows": display_rows, + "facets_timed_out": facets_timed_out, + "sorted_facet_results": sorted( + facet_results.values(), + key=lambda f: (len(f["results"]), f["name"]), + reverse=True, + ), + "extra_wheres_for_ui": extra_wheres_for_ui, + "form_hidden_args": form_hidden_args, + "is_sortable": any(c["sortable"] for c in display_columns), + "path_with_replaced_args": path_with_replaced_args, + "path_with_removed_args": path_with_removed_args, + "append_querystring": append_querystring, + "request": request, + "sort": sort, + "sort_desc": sort_desc, + "disable_sort": is_view, + "custom_table_templates": [ + "_table-{}-{}.html".format( + to_css_class(database), to_css_class(table) + ), + "_table-table-{}-{}.html".format( + to_css_class(database), to_css_class(table) + ), + "_table.html", + ], + "metadata": metadata, + "view_definition": await db.get_view_definition(table), + "table_definition": await db.get_table_definition(table), + } + + return ( + { + "database": database, + "table": table, + "is_view": is_view, + "human_description_en": human_description_en, + "rows": rows[:page_size], + "truncated": results.truncated, + "filtered_table_rows_count": filtered_table_rows_count, + "expanded_columns": expanded_columns, + "expandable_columns": expandable_columns, + "columns": columns, + "primary_keys": pks, + "units": units, + "query": {"sql": sql, "params": params}, + "facet_results": facet_results, + "suggested_facets": suggested_facets, + "next": next_value and str(next_value) or None, + "next_url": next_url, + }, + extra_template, + ( + "table-{}-{}.html".format(to_css_class(database), to_css_class(table)), + "table.html", + ), + ) + + +class RowView(RowTableShared): + name = "row" + + async def data(self, request, database, hash, table, pk_path, default_labels=False): + pk_values = urlsafe_components(pk_path) + db = self.ds.databases[database] + pks = await db.primary_keys(table) + use_rowid = not pks + select = "*" + if use_rowid: + select = "rowid, *" + pks = ["rowid"] + wheres = ['"{}"=:p{}'.format(pk, i) for i, pk in enumerate(pks)] + sql = "select {} from {} where {}".format( + select, escape_sqlite(table), " AND ".join(wheres) + ) + params = {} + for i, pk_value in enumerate(pk_values): + params["p{}".format(i)] = pk_value + results = await self.ds.execute(database, sql, params, truncate=True) + columns = [r[0] for r in results.description] + rows = list(results.rows) + if not rows: + raise NotFound("Record not found: {}".format(pk_values)) + + async def template_data(): + display_columns, display_rows = await self.display_columns_and_rows( + database, + table, + results.description, + rows, + link_column=False, + truncate_cells=0, + ) + for column in display_columns: + column["sortable"] = False + return { + "foreign_key_tables": await self.foreign_key_tables( + database, table, pk_values + ), + "display_columns": display_columns, + "display_rows": display_rows, + "custom_table_templates": [ + "_table-{}-{}.html".format( + to_css_class(database), to_css_class(table) + ), + "_table-row-{}-{}.html".format( + to_css_class(database), to_css_class(table) + ), + "_table.html", + ], + "metadata": (self.ds.metadata("databases") or {}) + .get(database, {}) + .get("tables", {}) + .get(table, {}), + } + + data = { + "database": database, + "table": table, + "rows": rows, + "columns": columns, + "primary_keys": pks, + "primary_key_values": pk_values, + "units": self.ds.table_metadata(database, table).get("units", {}), + } + + if "foreign_key_tables" in (request.raw_args.get("_extras") or "").split(","): + data["foreign_key_tables"] = await self.foreign_key_tables( + database, table, pk_values + ) + + return ( + data, + template_data, + ( + "row-{}-{}.html".format(to_css_class(database), to_css_class(table)), + "row.html", + ), + ) + + async def foreign_key_tables(self, database, table, pk_values): + if len(pk_values) != 1: + return [] + db = self.ds.databases[database] + all_foreign_keys = await db.get_all_foreign_keys() + foreign_keys = all_foreign_keys[table]["incoming"] + if len(foreign_keys) == 0: + return [] + + sql = "select " + ", ".join( + [ + "(select count(*) from {table} where {column}=:id)".format( + table=escape_sqlite(fk["other_table"]), + column=escape_sqlite(fk["other_column"]), + ) + for fk in foreign_keys + ] + ) + try: + rows = list(await self.ds.execute(database, sql, {"id": pk_values[0]})) + except sqlite3.OperationalError: + # Almost certainly hit the timeout + return [] + + foreign_table_counts = dict( + zip( + [(fk["other_table"], fk["other_column"]) for fk in foreign_keys], + list(rows[0]), + ) + ) + foreign_key_tables = [] + for fk in foreign_keys: + count = ( + foreign_table_counts.get((fk["other_table"], fk["other_column"])) or 0 + ) + foreign_key_tables.append({**fk, **{"count": count}}) + return foreign_key_tables diff --git a/demos/apache-proxy/000-default.conf b/demos/apache-proxy/000-default.conf deleted file mode 100644 index 5b6607a3..00000000 --- a/demos/apache-proxy/000-default.conf +++ /dev/null @@ -1,13 +0,0 @@ - - Options Indexes FollowSymLinks - AllowOverride None - Require all granted - - - - ServerName localhost - DocumentRoot /app/html - ProxyPreserveHost On - ProxyPass /prefix/ http://127.0.0.1:8001/ - Header add X-Proxied-By "Apache2 Debian" - diff --git a/demos/apache-proxy/Dockerfile b/demos/apache-proxy/Dockerfile deleted file mode 100644 index 9a8448da..00000000 --- a/demos/apache-proxy/Dockerfile +++ /dev/null @@ -1,56 +0,0 @@ -FROM python:3.11.0-slim-bullseye - -RUN apt-get update && \ - apt-get install -y apache2 supervisor && \ - apt clean && \ - rm -rf /var/lib/apt && \ - rm -rf /var/lib/dpkg/info/* - -# Apache environment, copied from -# https://github.com/ijklim/laravel-benfords-law-app/blob/e9bf385dcaddb62ea466a7b245ab6e4ef708c313/docker/os/Dockerfile -ENV APACHE_DOCUMENT_ROOT=/var/www/html/public -ENV APACHE_RUN_USER www-data -ENV APACHE_RUN_GROUP www-data -ENV APACHE_PID_FILE /var/run/apache2.pid -ENV APACHE_RUN_DIR /var/run/apache2 -ENV APACHE_LOCK_DIR /var/lock/apache2 -ENV APACHE_LOG_DIR /var/log -RUN ln -sf /dev/stdout /var/log/apache2-access.log -RUN ln -sf /dev/stderr /var/log/apache2-error.log -RUN mkdir -p $APACHE_RUN_DIR $APACHE_LOCK_DIR - -RUN a2enmod proxy -RUN a2enmod proxy_http -RUN a2enmod headers - -ARG DATASETTE_REF - -RUN pip install \ - https://github.com/simonw/datasette/archive/${DATASETTE_REF}.zip \ - datasette-redirect-to-https datasette-debug-asgi - -ADD 000-default.conf /etc/apache2/sites-enabled/000-default.conf - -WORKDIR /app -RUN mkdir -p /app/html -RUN echo '

    Demo is at /prefix/

    ' > /app/html/index.html - -ADD https://latest.datasette.io/fixtures.db /app/fixtures.db - -EXPOSE 80 - -# Dynamically build supervisord config since it includes $DATASETTE_REF: -RUN echo "[supervisord]" >> /app/supervisord.conf -RUN echo "nodaemon=true" >> /app/supervisord.conf -RUN echo "" >> /app/supervisord.conf -RUN echo "[program:apache2]" >> /app/supervisord.conf -RUN echo "command=apache2 -D FOREGROUND" >> /app/supervisord.conf -RUN echo "stdout_logfile=/dev/stdout" >> /app/supervisord.conf -RUN echo "stdout_logfile_maxbytes=0" >> /app/supervisord.conf -RUN echo "" >> /app/supervisord.conf -RUN echo "[program:datasette]" >> /app/supervisord.conf -RUN echo "command=datasette /app/fixtures.db --setting base_url '/prefix/' --version-note '${DATASETTE_REF}' -h 0.0.0.0 -p 8001" >> /app/supervisord.conf -RUN echo "stdout_logfile=/dev/stdout" >> /app/supervisord.conf -RUN echo "stdout_logfile_maxbytes=0" >> /app/supervisord.conf - -CMD ["/usr/bin/supervisord", "-c", "/app/supervisord.conf"] diff --git a/demos/apache-proxy/README.md b/demos/apache-proxy/README.md deleted file mode 100644 index c76e440d..00000000 --- a/demos/apache-proxy/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Datasette running behind an Apache proxy - -See also [Running Datasette behind a proxy](https://docs.datasette.io/en/latest/deploying.html#running-datasette-behind-a-proxy) - -This live demo is running at https://datasette-apache-proxy-demo.fly.dev/prefix/ - -To build locally, passing in a Datasette commit hash (or `main` for the main branch): - - docker build -t datasette-apache-proxy-demo . \ - --build-arg DATASETTE_REF=c617e1769ea27e045b0f2907ef49a9a1244e577d - -Then run it like this: - - docker run -p 5000:80 datasette-apache-proxy-demo - -And visit `http://localhost:5000/` or `http://localhost:5000/prefix/` - -## Deployment to Fly - -To deploy to [Fly](https://fly.io/) first create an application there by running: - - flyctl apps create --name datasette-apache-proxy-demo - -You will need a different name, since I have already taken that one. - -Then run this command to deploy: - - flyctl deploy --build-arg DATASETTE_REF=main - -This uses `fly.toml` in this directory, which hard-codes the `datasette-apache-proxy-demo` name - so you would need to edit that file to match your application name before running this. - -## Deployment to Cloud Run - -Deployments to Cloud Run currently result in intermittent 503 errors and I'm not sure why, see [issue #1522](https://github.com/simonw/datasette/issues/1522). - -You can deploy like this: - - DATASETTE_REF=main ./deploy-to-cloud-run.sh diff --git a/demos/apache-proxy/deploy-to-cloud-run.sh b/demos/apache-proxy/deploy-to-cloud-run.sh deleted file mode 100755 index 2846590a..00000000 --- a/demos/apache-proxy/deploy-to-cloud-run.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# https://til.simonwillison.net/cloudrun/using-build-args-with-cloud-run - -if [[ -z "$DATASETTE_REF" ]]; then - echo "Must provide DATASETTE_REF environment variable" 1>&2 - exit 1 -fi - -NAME="datasette-apache-proxy-demo" -PROJECT=$(gcloud config get-value project) -IMAGE="gcr.io/$PROJECT/$NAME" - -# Need YAML so we can set --build-arg -echo " -steps: -- name: 'gcr.io/cloud-builders/docker' - args: ['build', '-t', '$IMAGE', '.', '--build-arg', 'DATASETTE_REF=$DATASETTE_REF'] -- name: 'gcr.io/cloud-builders/docker' - args: ['push', '$IMAGE'] -" > /tmp/cloudbuild.yml - -gcloud builds submit --config /tmp/cloudbuild.yml - -rm /tmp/cloudbuild.yml - -gcloud run deploy $NAME \ - --allow-unauthenticated \ - --platform=managed \ - --image $IMAGE \ - --port 80 diff --git a/demos/apache-proxy/fly.toml b/demos/apache-proxy/fly.toml deleted file mode 100644 index 52e6af5d..00000000 --- a/demos/apache-proxy/fly.toml +++ /dev/null @@ -1,37 +0,0 @@ -app = "datasette-apache-proxy-demo" - -kill_signal = "SIGINT" -kill_timeout = 5 -processes = [] - -[env] - -[experimental] - allowed_public_ports = [] - auto_rollback = true - -[[services]] - http_checks = [] - internal_port = 80 - processes = ["app"] - protocol = "tcp" - script_checks = [] - - [services.concurrency] - hard_limit = 25 - soft_limit = 20 - type = "connections" - - [[services.ports]] - handlers = ["http"] - port = 80 - - [[services.ports]] - handlers = ["tls", "http"] - port = 443 - - [[services.tcp_checks]] - grace_period = "1s" - interval = "15s" - restart_limit = 0 - timeout = "2s" diff --git a/demos/plugins/example_js_manager_plugins.py b/demos/plugins/example_js_manager_plugins.py deleted file mode 100644 index 2705f2c5..00000000 --- a/demos/plugins/example_js_manager_plugins.py +++ /dev/null @@ -1,21 +0,0 @@ -from datasette import hookimpl - -# Test command: -# datasette fixtures.db \ --plugins-dir=demos/plugins/ -# \ --static static:demos/plugins/static - -# Create a set with view names that qualify for this JS, since plugins won't do anything on other pages -# Same pattern as in Nteract data explorer -# https://github.com/hydrosquall/datasette-nteract-data-explorer/blob/main/datasette_nteract_data_explorer/__init__.py#L77 -PERMITTED_VIEWS = {"table", "query", "database"} - - -@hookimpl -def extra_js_urls(view_name): - print(view_name) - if view_name in PERMITTED_VIEWS: - return [ - { - "url": "/static/table-example-plugins.js", - } - ] diff --git a/demos/plugins/static/table-example-plugins.js b/demos/plugins/static/table-example-plugins.js deleted file mode 100644 index 8c19d9a6..00000000 --- a/demos/plugins/static/table-example-plugins.js +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Example usage of Datasette JS Manager API - */ - -document.addEventListener("datasette_init", function (evt) { - const { detail: manager } = evt; - // === Demo plugins: remove before merge=== - addPlugins(manager); -}); - -/** - * Examples for to test datasette JS api - */ -const addPlugins = (manager) => { - - manager.registerPlugin("column-name-plugin", { - version: 0.1, - makeColumnActions: (columnMeta) => { - const { column } = columnMeta; - - return [ - { - label: "Copy name to clipboard", - onClick: (evt) => copyToClipboard(column), - }, - { - label: "Log column metadata to console", - onClick: (evt) => console.log(column), - }, - ]; - }, - }); - - manager.registerPlugin("panel-plugin-graphs", { - version: 0.1, - makeAboveTablePanelConfigs: () => { - return [ - { - id: 'first-panel', - label: "First", - render: node => { - const description = document.createElement('p'); - description.innerText = 'Hello world'; - node.appendChild(description); - } - }, - { - id: 'second-panel', - label: "Second", - render: node => { - const iframe = document.createElement('iframe'); - iframe.src = "https://observablehq.com/embed/@d3/sortable-bar-chart?cell=viewof+order&cell=chart"; - iframe.width = 800; - iframe.height = 635; - iframe.frameborder = '0'; - node.appendChild(iframe); - } - }, - ]; - }, - }); - - manager.registerPlugin("panel-plugin-maps", { - version: 0.1, - makeAboveTablePanelConfigs: () => { - return [ - { - // ID only has to be unique within a plugin, manager namespaces for you - id: 'first-map-panel', - label: "Map plugin", - // datasette-vega, leafleft can provide a "render" function - render: node => node.innerHTML = "Here sits a map", - }, - { - id: 'second-panel', - label: "Image plugin", - render: node => { - const img = document.createElement('img'); - img.src = 'https://datasette.io/static/datasette-logo.svg' - node.appendChild(img); - }, - } - ]; - }, - }); - - // Future: dispatch message to some other part of the page with CustomEvent API - // Could use to drive filter/sort query builder actions without page refresh. -} - - - -async function copyToClipboard(str) { - try { - await navigator.clipboard.writeText(str); - } catch (err) { - /** Rejected - text failed to copy to the clipboard. Browsers didn't give permission */ - console.error('Failed to copy: ', err); - } -} diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css deleted file mode 100644 index 0a6f8799..00000000 --- a/docs/_static/css/custom.css +++ /dev/null @@ -1,8 +0,0 @@ -a.external { - overflow-wrap: anywhere; -} -body[data-theme="dark"] .sidebar-logo-container { - background-color: white; - padding: 5px; - opacity: 0.6; -} diff --git a/docs/_static/datasette-favicon.png b/docs/_static/datasette-favicon.png deleted file mode 100644 index 4993163f..00000000 Binary files a/docs/_static/datasette-favicon.png and /dev/null differ diff --git a/docs/_static/js/custom.js b/docs/_static/js/custom.js deleted file mode 100644 index 91c3e306..00000000 --- a/docs/_static/js/custom.js +++ /dev/null @@ -1,23 +0,0 @@ -jQuery(function ($) { - // Show banner linking to /stable/ if this is a /latest/ page - if (!/\/latest\//.test(location.pathname)) { - return; - } - var stableUrl = location.pathname.replace("/latest/", "/stable/"); - // Check it's not a 404 - fetch(stableUrl, { method: "HEAD" }).then((response) => { - if (response.status == 200) { - var warning = $( - `
    -

    Note

    -

    - This documentation covers the development version of Datasette.

    -

    See this page for the current stable release. -

    -
    ` - ); - warning.find("a").attr("href", stableUrl); - $("article[role=main]").prepend(warning); - } - }); -}); diff --git a/docs/_templates/base.html b/docs/_templates/base.html deleted file mode 100644 index 9dea86eb..00000000 --- a/docs/_templates/base.html +++ /dev/null @@ -1,37 +0,0 @@ -{%- extends "!base.html" %} - -{% block site_meta %} -{{ super() }} - -{% endblock %} - -{% block scripts %} -{{ super() }} - -{% endblock %} diff --git a/docs/_templates/sidebar/brand.html b/docs/_templates/sidebar/brand.html deleted file mode 100644 index 8be9e8ee..00000000 --- a/docs/_templates/sidebar/brand.html +++ /dev/null @@ -1,16 +0,0 @@ - diff --git a/docs/_templates/sidebar/navigation.html b/docs/_templates/sidebar/navigation.html deleted file mode 100644 index c460a17e..00000000 --- a/docs/_templates/sidebar/navigation.html +++ /dev/null @@ -1,11 +0,0 @@ - \ No newline at end of file diff --git a/docs/advanced_export.png b/docs/advanced_export.png new file mode 100644 index 00000000..d4349fac Binary files /dev/null and b/docs/advanced_export.png differ diff --git a/docs/authentication.rst b/docs/authentication.rst deleted file mode 100644 index 69a6f606..00000000 --- a/docs/authentication.rst +++ /dev/null @@ -1,1386 +0,0 @@ -.. _authentication: - -================================ - Authentication and permissions -================================ - -Datasette doesn't require authentication by default. Any visitor to a Datasette instance can explore the full data and execute read-only SQL queries. - -Datasette can be configured to only allow authenticated users, or to control which databases, tables, and queries can be accessed by the public or by specific users. Datasette's plugin system can be used to add many different styles of authentication, such as user accounts, single sign-on or API keys. - -.. _authentication_actor: - -Actors -====== - -Through plugins, Datasette can support both authenticated users (with cookies) and authenticated API clients (via authentication tokens). The word "actor" is used to cover both of these cases. - -Every request to Datasette has an associated actor value, available in the code as ``request.actor``. This can be ``None`` for unauthenticated requests, or a JSON compatible Python dictionary for authenticated users or API clients. - -The actor dictionary can be any shape - the design of that data structure is left up to the plugins. Actors should always include a unique ``"id"`` string, as demonstrated by the "root" actor below. - -Plugins can use the :ref:`plugin_hook_actor_from_request` hook to implement custom logic for authenticating an actor based on the incoming HTTP request. - -.. _authentication_root: - -Using the "root" actor ----------------------- - -Datasette currently leaves almost all forms of authentication to plugins - `datasette-auth-github `__ for example. - -The one exception is the "root" account, which you can sign into while using Datasette on your local machine. The root user has **all permissions** - they can perform any action regardless of other permission rules. - -The ``--root`` flag is designed for local development and testing. When you start Datasette with ``--root``, the root user automatically receives every permission, including: - -* All view permissions (``view-instance``, ``view-database``, ``view-table``, etc.) -* All write permissions (``insert-row``, ``update-row``, ``delete-row``, ``create-table``, ``alter-table``, ``drop-table``) -* Debug permissions (``permissions-debug``, ``debug-menu``) -* Any custom permissions defined by plugins - -If you add explicit deny rules in ``datasette.yaml`` those can still block the -root actor from specific databases or tables. - -The ``--root`` flag sets an internal ``root_enabled`` switch—without it, a signed-in user with ``{"id": "root"}`` is treated like any other actor. - -To sign in as root, start Datasette using the ``--root`` command-line option, like this:: - - datasette --root - -Datasette will output a single-use-only login URL on startup:: - - http://127.0.0.1:8001/-/auth-token?token=786fc524e0199d70dc9a581d851f466244e114ca92f33aa3b42a139e9388daa7 - INFO: Started server process [25801] - INFO: Waiting for application startup. - INFO: Application startup complete. - INFO: Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit) - -Click on that link and then visit ``http://127.0.0.1:8001/-/actor`` to confirm that you are authenticated as an actor that looks like this: - -.. code-block:: json - - { - "id": "root" - } - -.. _authentication_permissions: - -Permissions -=========== - -Datasette's permissions system is built around SQL queries. Datasette and its plugins construct SQL queries to resolve the list of resources that an actor cas access. - -The key question the permissions system answers is this: - - Is this **actor** allowed to perform this **action**, optionally against this particular **resource**? - -**Actors** are :ref:`described above `. - -An **action** is a string describing the action the actor would like to perform. A full list is :ref:`provided below ` - examples include ``view-table`` and ``execute-sql``. - -A **resource** is the item the actor wishes to interact with - for example a specific database or table. Some actions, such as ``permissions-debug``, are not associated with a particular resource. - -Datasette's built-in view actions (``view-database``, ``view-table`` etc) are allowed by Datasette's default configuration: unless you :ref:`configure additional permission rules ` unauthenticated users will be allowed to access content. - -Other actions, including those introduced by plugins, will default to *deny*. - -.. _authentication_default_deny: - -Denying all permissions by default ----------------------------------- - -By default, Datasette allows unauthenticated access to view databases, tables, and execute SQL queries. - -You may want to run Datasette in a mode where **all** access is denied by default, and you explicitly grant permissions only to authenticated users, either using the :ref:`--root mechanism ` or through :ref:`configuration file rules ` or plugins. - -Use the ``--default-deny`` command-line option to run Datasette in this mode:: - - datasette --default-deny data.db --root - -With ``--default-deny`` enabled: - -* Anonymous users are denied access to view the instance, databases, tables, and queries -* Authenticated users are also denied access unless they're explicitly granted permissions -* The root user (when using ``--root``) still has access to everything -* You can grant permissions using :ref:`configuration file rules ` or plugins - -For example, to allow only a specific user to access your instance:: - - datasette --default-deny data.db --config datasette.yaml - -Where ``datasette.yaml`` contains: - -.. code-block:: yaml - - allow: - id: alice - -This configuration will deny access to everyone except the user with ``id`` of ``alice``. - -.. _authentication_permissions_explained: - -How permissions are resolved ----------------------------- - -Datasette performs permission checks using the internal :ref:`datasette_allowed`, method which accepts keyword arguments for ``action``, ``resource`` and an optional ``actor``. - -``resource`` should be an instance of the appropriate ``Resource`` subclass from :mod:`datasette.resources`—for example ``InstanceResource()``, ``DatabaseResource(database="...``)`` or ``TableResource(database="...", table="...")``. This defaults to ``InstanceResource()`` if not specified. - -When a check runs Datasette gathers allow/deny rules from multiple sources and -compiles them into a SQL query. The resulting query describes all of the -resources an actor may access for that action, together with the reasons those -resources were allowed or denied. The combined sources are: - -* ``allow`` blocks configured in :ref:`datasette.yaml `. -* :ref:`Actor restrictions ` encoded into the actor dictionary or API token. -* The "root" user shortcut when ``--root`` (or :attr:`Datasette.root_enabled `) is active, replying ``True`` to all permission chucks unless configuration rules deny them at a more specific level. -* Any additional SQL provided by plugins implementing :ref:`plugin_hook_permission_resources_sql`. - -Datasette evaluates the SQL to determine if the requested ``resource`` is -included. Explicit deny rules returned by configuration or plugins will block -access even if other rules allowed it. - -.. _authentication_permissions_allow: - -Defining permissions with "allow" blocks ----------------------------------------- - -One way to define permissions in Datasette is to use an ``"allow"`` block :ref:`in the datasette.yaml file `. This is a JSON document describing which actors are allowed to perform an action against a specific resource. - -Each ``allow`` block is compiled into SQL and combined with any -:ref:`plugin-provided rules ` to produce -the cascading allow/deny decisions that power :ref:`datasette_allowed`. - -The most basic form of allow block is this (`allow demo `__, `deny demo `__): - -.. [[[cog - from metadata_doc import config_example - import textwrap - config_example(cog, textwrap.dedent( - """ - allow: - id: root - """).strip(), - "YAML", "JSON" - ) -.. ]]] - -.. tab:: YAML - - .. code-block:: yaml - - allow: - id: root - -.. tab:: JSON - - .. code-block:: json - - { - "allow": { - "id": "root" - } - } -.. [[[end]]] - -This will match any actors with an ``"id"`` property of ``"root"`` - for example, an actor that looks like this: - -.. code-block:: json - - { - "id": "root", - "name": "Root User" - } - -An allow block can specify "deny all" using ``false`` (`demo `__): - -.. [[[cog - from metadata_doc import config_example - import textwrap - config_example(cog, textwrap.dedent( - """ - allow: false - """).strip(), - "YAML", "JSON" - ) -.. ]]] - -.. tab:: YAML - - .. code-block:: yaml - - allow: false - -.. tab:: JSON - - .. code-block:: json - - { - "allow": false - } -.. [[[end]]] - -An ``"allow"`` of ``true`` allows all access (`demo `__): - -.. [[[cog - from metadata_doc import config_example - import textwrap - config_example(cog, textwrap.dedent( - """ - allow: true - """).strip(), - "YAML", "JSON" - ) -.. ]]] - -.. tab:: YAML - - .. code-block:: yaml - - allow: true - -.. tab:: JSON - - .. code-block:: json - - { - "allow": true - } -.. [[[end]]] - -Allow keys can provide a list of values. These will match any actor that has any of those values (`allow demo `__, `deny demo `__): - -.. [[[cog - from metadata_doc import config_example - import textwrap - config_example(cog, textwrap.dedent( - """ - allow: - id: - - simon - - cleopaws - """).strip(), - "YAML", "JSON" - ) -.. ]]] - -.. tab:: YAML - - .. code-block:: yaml - - allow: - id: - - simon - - cleopaws - -.. tab:: JSON - - .. code-block:: json - - { - "allow": { - "id": [ - "simon", - "cleopaws" - ] - } - } -.. [[[end]]] - -This will match any actor with an ``"id"`` of either ``"simon"`` or ``"cleopaws"``. - -Actors can have properties that feature a list of values. These will be matched against the list of values in an allow block. Consider the following actor: - -.. code-block:: json - - { - "id": "simon", - "roles": ["staff", "developer"] - } - -This allow block will provide access to any actor that has ``"developer"`` as one of their roles (`allow demo `__, `deny demo `__): - -.. [[[cog - from metadata_doc import config_example - import textwrap - config_example(cog, textwrap.dedent( - """ - allow: - roles: - - developer - """).strip(), - "YAML", "JSON" - ) -.. ]]] - -.. tab:: YAML - - .. code-block:: yaml - - allow: - roles: - - developer - -.. tab:: JSON - - .. code-block:: json - - { - "allow": { - "roles": [ - "developer" - ] - } - } -.. [[[end]]] - -Note that "roles" is not a concept that is baked into Datasette - it's a convention that plugins can choose to implement and act on. - -If you want to provide access to any actor with a value for a specific key, use ``"*"``. For example, to match any logged-in user specify the following (`allow demo `__, `deny demo `__): - -.. [[[cog - from metadata_doc import config_example - import textwrap - config_example(cog, textwrap.dedent( - """ - allow: - id: "*" - """).strip(), - "YAML", "JSON" - ) -.. ]]] - -.. tab:: YAML - - .. code-block:: yaml - - allow: - id: "*" - -.. tab:: JSON - - .. code-block:: json - - { - "allow": { - "id": "*" - } - } -.. [[[end]]] - -You can specify that only unauthenticated actors (from anonymous HTTP requests) should be allowed access using the special ``"unauthenticated": true`` key in an allow block (`allow demo `__, `deny demo `__): - -.. [[[cog - from metadata_doc import config_example - import textwrap - config_example(cog, textwrap.dedent( - """ - allow: - unauthenticated: true - """).strip(), - "YAML", "JSON" - ) -.. ]]] - -.. tab:: YAML - - .. code-block:: yaml - - allow: - unauthenticated: true - -.. tab:: JSON - - .. code-block:: json - - { - "allow": { - "unauthenticated": true - } - } -.. [[[end]]] - -Allow keys act as an "or" mechanism. An actor will be able to execute the query if any of their JSON properties match any of the values in the corresponding lists in the ``allow`` block. The following block will allow users with either a ``role`` of ``"ops"`` OR users who have an ``id`` of ``"simon"`` or ``"cleopaws"``: - -.. [[[cog - from metadata_doc import config_example - import textwrap - config_example(cog, textwrap.dedent( - """ - allow: - id: - - simon - - cleopaws - role: ops - """).strip(), - "YAML", "JSON" - ) -.. ]]] - -.. tab:: YAML - - .. code-block:: yaml - - allow: - id: - - simon - - cleopaws - role: ops - -.. tab:: JSON - - .. code-block:: json - - { - "allow": { - "id": [ - "simon", - "cleopaws" - ], - "role": "ops" - } - } -.. [[[end]]] - -`Demo for cleopaws `__, `demo for ops role `__, `demo for an actor matching neither rule `__. - -.. _AllowDebugView: - -The /-/allow-debug tool ------------------------ - -The ``/-/allow-debug`` tool lets you try out different ``"action"`` blocks against different ``"actor"`` JSON objects. You can try that out here: https://latest.datasette.io/-/allow-debug - -.. _authentication_permissions_config: - -Access permissions in ``datasette.yaml`` -======================================== - -There are two ways to configure permissions using ``datasette.yaml`` (or ``datasette.json``). - -For simple visibility permissions you can use ``"allow"`` blocks in the root, database, table and query sections. - -For other permissions you can use a ``"permissions"`` block, described :ref:`in the next section `. - -You can limit who is allowed to view different parts of your Datasette instance using ``"allow"`` keys in your :ref:`configuration`. - -You can control the following: - -* Access to the entire Datasette instance -* Access to specific databases -* Access to specific tables and views -* Access to specific :ref:`canned_queries` - -If a user has permission to view a table they will be able to view that table, independent of if they have permission to view the database or instance that the table exists within. - -.. _authentication_permissions_instance: - -Access to an instance ---------------------- - -Here's how to restrict access to your entire Datasette instance to just the ``"id": "root"`` user: - -.. [[[cog - from metadata_doc import config_example - config_example(cog, """ - title: My private Datasette instance - allow: - id: root - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - title: My private Datasette instance - allow: - id: root - - -.. tab:: datasette.json - - .. code-block:: json - - { - "title": "My private Datasette instance", - "allow": { - "id": "root" - } - } -.. [[[end]]] - -To deny access to all users, you can use ``"allow": false``: - -.. [[[cog - config_example(cog, """ - title: My entirely inaccessible instance - allow: false - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - title: My entirely inaccessible instance - allow: false - - -.. tab:: datasette.json - - .. code-block:: json - - { - "title": "My entirely inaccessible instance", - "allow": false - } -.. [[[end]]] - -One reason to do this is if you are using a Datasette plugin - such as `datasette-permissions-sql `__ - to control permissions instead. - -.. _authentication_permissions_database: - -Access to specific databases ----------------------------- - -To limit access to a specific ``private.db`` database to just authenticated users, use the ``"allow"`` block like this: - -.. [[[cog - config_example(cog, """ - databases: - private: - allow: - id: "*" - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - databases: - private: - allow: - id: "*" - - -.. tab:: datasette.json - - .. code-block:: json - - { - "databases": { - "private": { - "allow": { - "id": "*" - } - } - } - } -.. [[[end]]] - -.. _authentication_permissions_table: - -Access to specific tables and views ------------------------------------ - -To limit access to the ``users`` table in your ``bakery.db`` database: - -.. [[[cog - config_example(cog, """ - databases: - bakery: - tables: - users: - allow: - id: '*' - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - databases: - bakery: - tables: - users: - allow: - id: '*' - - -.. tab:: datasette.json - - .. code-block:: json - - { - "databases": { - "bakery": { - "tables": { - "users": { - "allow": { - "id": "*" - } - } - } - } - } - } -.. [[[end]]] - -This works for SQL views as well - you can list their names in the ``"tables"`` block above in the same way as regular tables. - -.. warning:: - Restricting access to tables and views in this way will NOT prevent users from querying them using arbitrary SQL queries, `like this `__ for example. - - If you are restricting access to specific tables you should also use the ``"allow_sql"`` block to prevent users from bypassing the limit with their own SQL queries - see :ref:`authentication_permissions_execute_sql`. - -.. _authentication_permissions_query: - -Access to specific canned queries ---------------------------------- - -:ref:`canned_queries` allow you to configure named SQL queries in your ``datasette.yaml`` that can be executed by users. These queries can be set up to both read and write to the database, so controlling who can execute them can be important. - -To limit access to the ``add_name`` canned query in your ``dogs.db`` database to just the :ref:`root user`: - -.. [[[cog - config_example(cog, """ - databases: - dogs: - queries: - add_name: - sql: INSERT INTO names (name) VALUES (:name) - write: true - allow: - id: - - root - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - databases: - dogs: - queries: - add_name: - sql: INSERT INTO names (name) VALUES (:name) - write: true - allow: - id: - - root - - -.. tab:: datasette.json - - .. code-block:: json - - { - "databases": { - "dogs": { - "queries": { - "add_name": { - "sql": "INSERT INTO names (name) VALUES (:name)", - "write": true, - "allow": { - "id": [ - "root" - ] - } - } - } - } - } - } -.. [[[end]]] - -.. _authentication_permissions_execute_sql: - -Controlling the ability to execute arbitrary SQL ------------------------------------------------- - -Datasette defaults to allowing any site visitor to execute their own custom SQL queries, for example using the form on `the database page `__ or by appending a ``?_where=`` parameter to the table page `like this `__. - -Access to this ability is controlled by the :ref:`actions_execute_sql` permission. - -The easiest way to disable arbitrary SQL queries is using the :ref:`default_allow_sql setting ` when you first start Datasette running. - -You can alternatively use an ``"allow_sql"`` block to control who is allowed to execute arbitrary SQL queries. - -To prevent any user from executing arbitrary SQL queries, use this: - -.. [[[cog - config_example(cog, """ - allow_sql: false - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - allow_sql: false - - -.. tab:: datasette.json - - .. code-block:: json - - { - "allow_sql": false - } -.. [[[end]]] - -To enable just the :ref:`root user` to execute SQL for all databases in your instance, use the following: - -.. [[[cog - config_example(cog, """ - allow_sql: - id: root - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - allow_sql: - id: root - - -.. tab:: datasette.json - - .. code-block:: json - - { - "allow_sql": { - "id": "root" - } - } -.. [[[end]]] - -To limit this ability for just one specific database, use this: - -.. [[[cog - config_example(cog, """ - databases: - mydatabase: - allow_sql: - id: root - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - databases: - mydatabase: - allow_sql: - id: root - - -.. tab:: datasette.json - - .. code-block:: json - - { - "databases": { - "mydatabase": { - "allow_sql": { - "id": "root" - } - } - } - } -.. [[[end]]] - -.. _authentication_permissions_other: - -Other permissions in ``datasette.yaml`` -======================================= - -For all other permissions, you can use one or more ``"permissions"`` blocks in your ``datasette.yaml`` configuration file. - -To grant access to the :ref:`permissions debug tool ` to all signed in users, you can grant ``permissions-debug`` to any actor with an ``id`` matching the wildcard ``*`` by adding this a the root of your configuration: - -.. [[[cog - config_example(cog, """ - permissions: - debug-menu: - id: '*' - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - permissions: - debug-menu: - id: '*' - - -.. tab:: datasette.json - - .. code-block:: json - - { - "permissions": { - "debug-menu": { - "id": "*" - } - } - } -.. [[[end]]] - -To grant ``create-table`` to the user with ``id`` of ``editor`` for the ``docs`` database: - -.. [[[cog - config_example(cog, """ - databases: - docs: - permissions: - create-table: - id: editor - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - databases: - docs: - permissions: - create-table: - id: editor - - -.. tab:: datasette.json - - .. code-block:: json - - { - "databases": { - "docs": { - "permissions": { - "create-table": { - "id": "editor" - } - } - } - } - } -.. [[[end]]] - -And for ``insert-row`` against the ``reports`` table in that ``docs`` database: - -.. [[[cog - config_example(cog, """ - databases: - docs: - tables: - reports: - permissions: - insert-row: - id: editor - """) -.. ]]] - -.. tab:: datasette.yaml - - .. code-block:: yaml - - - databases: - docs: - tables: - reports: - permissions: - insert-row: - id: editor - - -.. tab:: datasette.json - - .. code-block:: json - - { - "databases": { - "docs": { - "tables": { - "reports": { - "permissions": { - "insert-row": { - "id": "editor" - } - } - } - } - } - } - } -.. [[[end]]] - -The :ref:`permissions debug tool ` can be useful for helping test permissions that you have configured in this way. - -.. _CreateTokenView: - -API Tokens -========== - -Datasette includes a default mechanism for generating API tokens that can be used to authenticate requests. - -Authenticated users can create new API tokens using a form on the ``/-/create-token`` page. - -Tokens created in this way can be further restricted to only allow access to specific actions, or to limit those actions to specific databases, tables or queries. - -Created tokens can then be passed in the ``Authorization: Bearer $token`` header of HTTP requests to Datasette. - -A token created by a user will include that user's ``"id"`` in the token payload, so any permissions granted to that user based on their ID can be made available to the token as well. - -When one of these a token accompanies a request, the actor for that request will have the following shape: - -.. code-block:: json - - { - "id": "user_id", - "token": "dstok", - "token_expires": 1667717426 - } - -The ``"id"`` field duplicates the ID of the actor who first created the token. - -The ``"token"`` field identifies that this actor was authenticated using a Datasette signed token (``dstok``). - -The ``"token_expires"`` field, if present, indicates that the token will expire after that integer timestamp. - -The ``/-/create-token`` page cannot be accessed by actors that are authenticated with a ``"token": "some-value"`` property. This is to prevent API tokens from being used to create more tokens. - -Datasette plugins that implement their own form of API token authentication should follow this convention. - -You can disable the signed token feature entirely using the :ref:`allow_signed_tokens ` setting. - -.. _authentication_cli_create_token: - -datasette create-token ----------------------- - -You can also create tokens on the command line using the ``datasette create-token`` command. - -This command takes one required argument - the ID of the actor to be associated with the created token. - -You can specify a ``-e/--expires-after`` option in seconds. If omitted, the token will never expire. - -The command will sign the token using the ``DATASETTE_SECRET`` environment variable, if available. You can also pass the secret using the ``--secret`` option. - -This means you can run the command locally to create tokens for use with a deployed Datasette instance, provided you know that instance's secret. - -To create a token for the ``root`` actor that will expire in one hour:: - - datasette create-token root --expires-after 3600 - -To create a token that never expires using a specific secret:: - - datasette create-token root --secret my-secret-goes-here - -.. _authentication_cli_create_token_restrict: - -Restricting the actions that a token can perform -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Tokens created using ``datasette create-token ACTOR_ID`` will inherit all of the permissions of the actor that they are associated with. - -You can pass additional options to create tokens that are restricted to a subset of that actor's permissions. - -To restrict the token to just specific permissions against all available databases, use the ``--all`` option:: - - datasette create-token root --all insert-row --all update-row - -This option can be passed as many times as you like. In the above example the token will only be allowed to insert and update rows. - -You can also restrict permissions such that they can only be used within specific databases:: - - datasette create-token root --database mydatabase insert-row - -The resulting token will only be able to insert rows, and only to tables in the ``mydatabase`` database. - -Finally, you can restrict permissions to individual resources - tables, SQL views and :ref:`named queries ` - within a specific database:: - - datasette create-token root --resource mydatabase mytable insert-row - -These options have short versions: ``-a`` for ``--all``, ``-d`` for ``--database`` and ``-r`` for ``--resource``. - -You can add ``--debug`` to see a JSON representation of the token that has been created. Here's a full example:: - - datasette create-token root \ - --secret mysecret \ - --all view-instance \ - --all view-table \ - --database docs view-query \ - --resource docs documents insert-row \ - --resource docs documents update-row \ - --debug - -This example outputs the following:: - - dstok_.eJxFizEKgDAMRe_y5w4qYrFXERGxDkVsMI0uxbubdjFL8l_ez1jhwEQCA6Fjjxp90qtkuHawzdjYrh8MFobLxZ_wBH0_gtnAF-hpS5VfmF8D_lnd97lHqUJgLd6sls4H1qwlhA.nH_7RecYHj5qSzvjhMU95iy0Xlc - - Decoded: - - { - "a": "root", - "token": "dstok", - "t": 1670907246, - "_r": { - "a": [ - "vi", - "vt" - ], - "d": { - "docs": [ - "vq" - ] - }, - "r": { - "docs": { - "documents": [ - "ir", - "ur" - ] - } - } - } - } - -Restrictions act as an allowlist layered on top of the actor's existing -permissions. They can only remove access the actor would otherwise have—they -cannot grant new access. If the underlying actor is denied by ``allow`` rules in -``datasette.yaml`` or by a plugin, a token that lists that resource in its -``"_r"`` section will still be denied. - - -.. _permissions_plugins: - -Checking permissions in plugins -=============================== - -Datasette plugins can check if an actor has permission to perform an action using :ref:`datasette_allowed`—for example:: - - from datasette.resources import TableResource - - can_edit = await datasette.allowed( - action="update-row", - resource=TableResource(database="fixtures", table="facetable"), - actor=request.actor, - ) - -Use :ref:`datasette_ensure_permission` when you need to enforce a permission and -raise a ``Forbidden`` error automatically. - -Plugins that define new operations should return :class:`~datasette.permissions.Action` -objects from :ref:`plugin_register_actions` and can supply additional allow/deny -rules by returning :class:`~datasette.permissions.PermissionSQL` objects from the -:ref:`plugin_hook_permission_resources_sql` hook. Those rules are merged with -configuration ``allow`` blocks and actor restrictions to determine the final -result for each check. - -.. _authentication_actor_matches_allow: - -actor_matches_allow() -===================== - -Plugins that wish to implement this same ``"allow"`` block permissions scheme can take advantage of the ``datasette.utils.actor_matches_allow(actor, allow)`` function: - -.. code-block:: python - - from datasette.utils import actor_matches_allow - - actor_matches_allow({"id": "root"}, {"id": "*"}) - # returns True - -The currently authenticated actor is made available to plugins as ``request.actor``. - -.. _PermissionsDebugView: - -Permissions debug tools -======================= - -The debug tool at ``/-/permissions`` is available to any actor with the ``permissions-debug`` permission. By default this is just the :ref:`authenticated root user ` but you can open it up to all users by starting Datasette like this:: - - datasette -s permissions.permissions-debug true data.db - -The page shows the permission checks that have been carried out by the Datasette instance. - -It also provides an interface for running hypothetical permission checks against a hypothetical actor. This is a useful way of confirming that your configured permissions work in the way you expect. - -This is designed to help administrators and plugin authors understand exactly how permission checks are being carried out, in order to effectively configure Datasette's permission system. - -.. _AllowedResourcesView: - -Allowed resources view ----------------------- - -The ``/-/allowed`` endpoint displays resources that the current actor can access for a specified ``action``. - -This endpoint provides an interactive HTML form interface. Add ``.json`` to the URL path (e.g. ``/-/allowed.json``) to get the raw JSON response instead. - -Pass ``?action=view-table`` (or another action) to select the action. Optional ``parent=`` and ``child=`` query parameters can narrow the results to a specific database/table pair. - -This endpoint is publicly accessible to help users understand their own permissions. The potentially sensitive ``reason`` field is only shown to users with the ``permissions-debug`` permission - it shows the plugins and explanatory reasons that were responsible for each decision. - -.. _PermissionRulesView: - -Permission rules view ---------------------- - -The ``/-/rules`` endpoint displays all permission rules (both allow and deny) for each candidate resource for the requested action. - -This endpoint provides an interactive HTML form interface. Add ``.json`` to the URL path (e.g. ``/-/rules.json?action=view-table``) to get the raw JSON response instead. - -Pass ``?action=`` as a query parameter to specify which action to check. - -This endpoint requires the ``permissions-debug`` permission. - -.. _PermissionCheckView: - -Permission check view ---------------------- - -The ``/-/check`` endpoint evaluates a single action/resource pair and returns information indicating whether the access was allowed along with diagnostic information. - -This endpoint provides an interactive HTML form interface. Add ``.json`` to the URL path (e.g. ``/-/check.json?action=view-instance``) to get the raw JSON response instead. - -Pass ``?action=`` to specify the action to check, and optional ``?parent=`` and ``?child=`` parameters to specify the resource. - -.. _authentication_ds_actor: - -The ds_actor cookie -=================== - -Datasette includes a default authentication plugin which looks for a signed ``ds_actor`` cookie containing a JSON actor dictionary. This is how the :ref:`root actor ` mechanism works. - -Authentication plugins can set signed ``ds_actor`` cookies themselves like so: - -.. code-block:: python - - response = Response.redirect("/") - datasette.set_actor_cookie(response, {"id": "cleopaws"}) - -The shape of data encoded in the cookie is as follows: - -.. code-block:: json - - { - "a": { - "id": "cleopaws" - } - } - -To implement logout in a plugin, use the ``delete_actor_cookie()`` method: - -.. code-block:: python - - response = Response.redirect("/") - datasette.delete_actor_cookie(response) - -.. _authentication_ds_actor_expiry: - -Including an expiry time ------------------------- - -``ds_actor`` cookies can optionally include a signed expiry timestamp, after which the cookies will no longer be valid. Authentication plugins may chose to use this mechanism to limit the lifetime of the cookie. For example, if a plugin implements single-sign-on against another source it may decide to set short-lived cookies so that if the user is removed from the SSO system their existing Datasette cookies will stop working shortly afterwards. - -To include an expiry pass ``expire_after=`` to ``datasette.set_actor_cookie()`` with a number of seconds. For example, to expire in 24 hours: - -.. code-block:: python - - response = Response.redirect("/") - datasette.set_actor_cookie( - response, {"id": "cleopaws"}, expire_after=60 * 60 * 24 - ) - -The resulting cookie will encode data that looks something like this: - -.. code-block:: json - - { - "a": { - "id": "cleopaws" - }, - "e": "1jjSji" - } - -.. _LogoutView: - -The /-/logout page ------------------- - -The page at ``/-/logout`` provides the ability to log out of a ``ds_actor`` cookie authentication session. - -.. _actions: - -Built-in actions -================ - -This section lists all of the permission checks that are carried out by Datasette core, along with the ``resource`` if it was passed. - -.. _actions_view_instance: - -view-instance -------------- - -Top level permission - Actor is allowed to view any pages within this instance, starting at https://latest.datasette.io/ - -.. _actions_view_database: - -view-database -------------- - -Actor is allowed to view a database page, e.g. https://latest.datasette.io/fixtures - -``resource`` - ``datasette.permissions.DatabaseResource(database)`` - ``database`` is the name of the database (string) - -.. _actions_view_database_download: - -view-database-download ----------------------- - -Actor is allowed to download a database, e.g. https://latest.datasette.io/fixtures.db - -``resource`` - ``datasette.resources.DatabaseResource(database)`` - ``database`` is the name of the database (string) - -.. _actions_view_table: - -view-table ----------- - -Actor is allowed to view a table (or view) page, e.g. https://latest.datasette.io/fixtures/complex_foreign_keys - -``resource`` - ``datasette.resources.TableResource(database, table)`` - ``database`` is the name of the database (string) - - ``table`` is the name of the table (string) - -.. _actions_view_query: - -view-query ----------- - -Actor is allowed to view (and execute) a :ref:`canned query ` page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size - this includes executing :ref:`canned_queries_writable`. - -``resource`` - ``datasette.resources.QueryResource(database, query)`` - ``database`` is the name of the database (string) - - ``query`` is the name of the canned query (string) - -.. _actions_insert_row: - -insert-row ----------- - -Actor is allowed to insert rows into a table. - -``resource`` - ``datasette.resources.TableResource(database, table)`` - ``database`` is the name of the database (string) - - ``table`` is the name of the table (string) - -.. _actions_delete_row: - -delete-row ----------- - -Actor is allowed to delete rows from a table. - -``resource`` - ``datasette.resources.TableResource(database, table)`` - ``database`` is the name of the database (string) - - ``table`` is the name of the table (string) - -.. _actions_update_row: - -update-row ----------- - -Actor is allowed to update rows in a table. - -``resource`` - ``datasette.resources.TableResource(database, table)`` - ``database`` is the name of the database (string) - - ``table`` is the name of the table (string) - -.. _actions_create_table: - -create-table ------------- - -Actor is allowed to create a database table. - -``resource`` - ``datasette.resources.DatabaseResource(database)`` - ``database`` is the name of the database (string) - -.. _actions_alter_table: - -alter-table ------------ - -Actor is allowed to alter a database table. - -``resource`` - ``datasette.resources.TableResource(database, table)`` - ``database`` is the name of the database (string) - - ``table`` is the name of the table (string) - -.. _actions_drop_table: - -drop-table ----------- - -Actor is allowed to drop a database table. - -``resource`` - ``datasette.resources.TableResource(database, table)`` - ``database`` is the name of the database (string) - - ``table`` is the name of the table (string) - -.. _actions_execute_sql: - -execute-sql ------------ - -Actor is allowed to run arbitrary SQL queries against a specific database, e.g. https://latest.datasette.io/fixtures/-/query?sql=select+100 - -``resource`` - ``datasette.resources.DatabaseResource(database)`` - ``database`` is the name of the database (string) - -See also :ref:`the default_allow_sql setting `. - -.. _actions_permissions_debug: - -permissions-debug ------------------ - -Actor is allowed to view the ``/-/permissions`` debug tools. - -.. _actions_debug_menu: - -debug-menu ----------- - -Controls if the various debug pages are displayed in the navigation menu. diff --git a/docs/binary_data.rst b/docs/binary_data.rst deleted file mode 100644 index 0c890fe5..00000000 --- a/docs/binary_data.rst +++ /dev/null @@ -1,68 +0,0 @@ -.. _binary: - -============= - Binary data -============= - -SQLite tables can contain binary data in ``BLOB`` columns. - -Datasette includes special handling for these binary values. The Datasette interface detects binary values and provides a link to download their content, for example on https://latest.datasette.io/fixtures/binary_data - -.. image:: https://raw.githubusercontent.com/simonw/datasette-screenshots/0.62/binary-data.png - :width: 311px - :alt: Screenshot showing download links next to binary data in the table view - -Binary data is represented in ``.json`` exports using Base64 encoding. - -https://latest.datasette.io/fixtures/binary_data.json?_shape=array - -.. code-block:: json - - [ - { - "rowid": 1, - "data": { - "$base64": true, - "encoded": "FRwCx60F/g==" - } - }, - { - "rowid": 2, - "data": { - "$base64": true, - "encoded": "FRwDx60F/g==" - } - }, - { - "rowid": 3, - "data": null - } - ] - -.. _binary_linking: - -Linking to binary downloads ---------------------------- - -The ``.blob`` output format is used to return binary data. It requires a ``_blob_column=`` query string argument specifying which BLOB column should be downloaded, for example: - -https://latest.datasette.io/fixtures/binary_data/1.blob?_blob_column=data - -This output format can also be used to return binary data from an arbitrary SQL query. Since such queries do not specify an exact row, an additional ``?_blob_hash=`` parameter can be used to specify the SHA-256 hash of the value that is being linked to. - -Consider the query ``select data from binary_data`` - `demonstrated here `__. - -That page links to the binary value downloads. Those links look like this: - -https://latest.datasette.io/fixtures.blob?sql=select+data+from+binary_data&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d - -These ``.blob`` links are also returned in the ``.csv`` exports Datasette provides for binary tables and queries, since the CSV format does not have a mechanism for representing binary data. - -Binary plugins --------------- - -Several Datasette plugins are available that change the way Datasette treats binary data. - -- `datasette-render-binary `__ modifies Datasette's default interface to show an automatic guess at what type of binary data is being stored, along with a visual representation of the binary value that displays ASCII strings directly in the interface. -- `datasette-render-images `__ detects common image formats and renders them as images directly in the Datasette interface. -- `datasette-media `__ allows Datasette interfaces to be configured to serve binary files from configured SQL queries, and includes the ability to resize images directly before serving them. diff --git a/docs/changelog.rst b/docs/changelog.rst index feba7e86..08d3b347 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,1764 +4,13 @@ Changelog ========= -.. _v1_0_a23: - -1.0a23 (2025-12-02) -------------------- - -- Fix for bug where a stale database entry in ``internal.db`` could cause a 500 error on the homepage. (:issue:`2605`) -- Cosmetic improvement to ``/-/actions`` page. (:issue:`2599`) - -.. _v1_0_a22: - -1.0a22 (2025-11-13) -------------------- - -- ``datasette serve --default-deny`` option for running Datasette configured to :ref:`deny all permissions by default `. (:issue:`2592`) -- ``datasette.is_client()`` method for detecting if code is :ref:`executing inside a datasette.client request `. (:issue:`2594`) -- ``datasette.pm`` property can now be used to :ref:`register and unregister plugins in tests `. (:issue:`2595`) - -.. _v1_0_a21: - -1.0a21 (2025-11-05) -------------------- - -- Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) -- Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) -- New ``datasette --get /path --headers`` option for inspecting the headers returned by a path. (:issue:`2578`) -- New ``datasette.client.get(..., skip_permission_checks=True)`` parameter to bypass permission checks when making requests using the internal client. (:issue:`2583`) - -.. _v0_65_2: - -0.65.2 (2025-11-05) -------------------- - -- Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) -- Upgraded for compatibility with Python 3.14. -- Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) -- Minor upgrades to fix warnings, including ``pkg_resources`` deprecation. - -.. _v1_0_a20: - -1.0a20 (2025-11-03) -------------------- - -This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning how Datasette's permission system works. - -Permission system redesign -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Previously the permission system worked using ``datasette.permission_allowed()`` checks which consulted all available plugins in turn to determine whether a given actor was allowed to perform a given action on a given resource. - -This approach could become prohibitively expensive for large lists of items - for example to determine the list of tables that a user could view in a large Datasette instance each plugin implementation of that hook would be fired for every table. - -The new design uses SQL queries against Datasette's internal :ref:`catalog tables ` to derive the list of resources for which an actor has permission for a given action. This turns an N x M problem (N resources, M plugins) into a single SQL query. - -Plugins can use the new :ref:`plugin_hook_permission_resources_sql` hook to return SQL fragments which will be used as part of that query. - -Plugins that use any of the following features will need to be updated to work with this and following alphas (and Datasette 1.0 stable itself): - -- Checking permissions with ``datasette.permission_allowed()`` - this method has been replaced with :ref:`datasette.allowed() `. -- Implementing the ``permission_allowed()`` plugin hook - this hook has been removed in favor of :ref:`permission_resources_sql() `. -- Using ``register_permissions()`` to register permissions - this hook has been removed in favor of :ref:`register_actions() `. - -Consult the :ref:`v1.0a20 upgrade guide ` for further details on how to upgrade affected plugins. - -Plugins can now make use of two new internal methods to help resolve permission checks: - -- :ref:`datasette.allowed_resources() ` returns a ``PaginatedResources`` object with a ``.resources`` list of ``Resource`` instances that an actor is allowed to access for a given action (and a ``.next`` token for pagination). -- :ref:`datasette.allowed_resources_sql() ` returns the SQL and parameters that can be executed against the internal catalog tables to determine which resources an actor is allowed to access for a given action. This can be combined with further SQL to perform advanced custom filtering. - -Related changes: - -- The way ``datasette --root`` works has changed. Running Datasette with this flag now causes the root actor to pass *all* permission checks. (:issue:`2521`) - -- Permission debugging improvements: - - - The ``/-/allowed`` endpoint shows resources the user is allowed to interact with for different actions. - - ``/-/rules`` shows the raw allow/deny rules that apply to different permission checks. - - ``/-/actions`` lists every available action. - - ``/-/check`` can be used to try out different permission checks for the current actor. - -Other changes -~~~~~~~~~~~~~ - -- The internal ``catalog_views`` table now tracks SQLite views alongside tables in the introspection database. (:issue:`2495`) -- Hitting the ``/`` brings up a search interface for navigating to tables that the current user can view. A new ``/-/tables`` endpoint supports this functionality. (:issue:`2523`) -- Datasette attempts to detect some configuration errors on startup. -- Datasette now supports Python 3.14 and no longer tests against Python 3.9. - -.. _v1_0_a19: - -1.0a19 (2025-04-21) -------------------- - -- Tiny cosmetic bug fix for mobile display of table rows. (:issue:`2479`) - -.. _v1_0_a18: - -1.0a18 (2025-04-16) -------------------- - -- Fix for incorrect foreign key references in the internal database schema. (:issue:`2466`) -- The ``prepare_connection()`` hook no longer runs for the internal database. (:issue:`2468`) -- Fixed bug where ``link:`` HTTP headers used invalid syntax. (:issue:`2470`) -- No longer tested against Python 3.8. Now tests against Python 3.13. -- FTS tables are now hidden by default if they correspond to a content table. (:issue:`2477`) -- Fixed bug with foreign key links to rows in databases with filenames containing a special character. Thanks, `Jack Stratton `__. (`#2476 `__) - -.. _v1_0_a17: - -1.0a17 (2025-02-06) -------------------- - -- ``DATASETTE_SSL_KEYFILE`` and ``DATASETTE_SSL_CERTFILE`` environment variables as alternatives to ``--ssl-keyfile`` and ``--ssl-certfile``. Thanks, Alex Garcia. (:issue:`2422`) -- ``SQLITE_EXTENSIONS`` environment variable has been renamed to ``DATASETTE_LOAD_EXTENSION``. (:issue:`2424`) -- ``datasette serve`` environment variables are now :ref:`documented here `. -- The :ref:`plugin_hook_register_magic_parameters` plugin hook can now register async functions. (:issue:`2441`) -- Datasette is now tested against Python 3.13. -- Breadcrumbs on database and table pages now include a consistent self-link for resetting query string parameters. (:issue:`2454`) -- Fixed issue where Datasette could crash on ``metadata.json`` with nested values. (:issue:`2455`) -- New internal methods ``datasette.set_actor_cookie()`` and ``datasette.delete_actor_cookie()``, :ref:`described here `. (:issue:`1690`) -- ``/-/permissions`` page now shows a list of all permissions registered by plugins. (:issue:`1943`) -- If a table has a single unique text column Datasette now detects that as the foreign key label for that table. (:issue:`2458`) -- The ``/-/permissions`` page now includes options for filtering or exclude permission checks recorded against the current user. (:issue:`2460`) -- Fixed a bug where replacing a database with a new one with the same name did not pick up the new database correctly. (:issue:`2465`) - -.. _v0_65_1: - -0.65.1 (2024-11-28) -------------------- - -- Fixed bug with upgraded HTTPX 0.28.0 dependency. (:issue:`2443`) - -.. _v0_65: - -0.65 (2024-10-07) ------------------ - -- Upgrade for compatibility with Python 3.13 (by vendoring Pint dependency). (:issue:`2434`) -- Dropped support for Python 3.8. - -.. _v1_0_a16: - -1.0a16 (2024-09-05) -------------------- - -This release focuses on performance, in particular against large tables, and introduces some minor breaking changes for CSS styling in Datasette plugins. - -- Removed the unit conversions feature and its dependency, Pint. This means Datasette is now compatible with the upcoming Python 3.13. (:issue:`2400`, :issue:`2320`) -- The ``datasette --pdb`` option now uses the `ipdb `__ debugger if it is installed. You can install it using ``datasette install ipdb``. Thanks, `Tiago Ilieve `__. (`#2342 `__) -- Fixed a confusing error that occurred if ``metadata.json`` contained nested objects. (:issue:`2403`) -- Fixed a bug with ``?_trace=1`` where it returned a blank page if the response was larger than 256KB. (:issue:`2404`) -- Tracing mechanism now also displays SQL queries that returned errors or ran out of time. `datasette-pretty-traces 0.5 `__ includes support for displaying this new type of trace. (:issue:`2405`) -- Fixed a text spacing with table descriptions on the homepage. (:issue:`2399`) -- Performance improvements for large tables: - - Suggested facets now only consider the first 1000 rows. (:issue:`2406`) - - Improved performance of date facet suggestion against large tables. (:issue:`2407`) - - Row counts stop at 10,000 rows when listing tables. (:issue:`2398`) - - On table page the count stops at 10,000 rows too, with a "count all" button to execute the full count. (:issue:`2408`) -- New ``.dicts()`` internal method on :ref:`database_results` that returns a list of dictionaries representing the results from a SQL query: (:issue:`2414`) - - .. code-block:: bash - - rows = (await db.execute("select * from t")).dicts() - -- Default Datasette core CSS that styles inputs and buttons now requires a class of ``"core"`` on the element or a containing element, for example ````. (:issue:`2415`) -- Similarly, default table styles now only apply to ````. (:issue:`2420`) - -.. _v1_0_a15: - -1.0a15 (2024-08-15) -------------------- - -- Datasette now defaults to hiding SQLite "shadow" tables, as seen in extensions such as SQLite FTS and `sqlite-vec `__. Virtual tables that it makes sense to display, such as FTS core tables, are no longer hidden. Thanks, `Alex Garcia `__. (:issue:`2296`) -- Fixed bug where running Datasette with one or more ``-s/--setting`` options could over-ride settings that were present in ``datasette.yml``. (:issue:`2389`) -- The Datasette homepage is now duplicated at ``/-/``, using the default ``index.html`` template. This ensures that the information on that page is still accessible even if the Datasette homepage has been customized using a custom ``index.html`` template, for example on sites like `datasette.io `__. (:issue:`2393`) -- Failed CSRF checks now display a more user-friendly error page. (:issue:`2390`) -- Fixed a bug where the ``json1`` extension was not correctly detected on the ``/-/versions`` page. Thanks, `Seb Bacon `__. (:issue:`2326`) -- Fixed a bug where the Datasette write API did not correctly accept ``Content-Type: application/json; charset=utf-8``. (:issue:`2384`) -- Fixed a bug where Datasette would fail to start if ``metadata.yml`` contained a ``queries`` block. (`#2386 `__) - -.. _v1_0_a14: - -1.0a14 (2024-08-05) -------------------- - -This alpha introduces significant changes to Datasette's :ref:`metadata` system, some of which represent breaking changes in advance of the full 1.0 release. The new :ref:`upgrade_guide` document provides detailed coverage of those breaking changes and how they affect plugin authors and Datasette API consumers. - -- The ``/databasename?sql=`` interface and JSON API for executing arbitrary SQL queries can now be found at ``/databasename/-/query?sql=``. Requests with a ``?sql=`` parameter to the old endpoints will be redirected. Thanks, `Alex Garcia `__. (:issue:`2360`) -- Metadata about tables, databases, instances and columns is now stored in :ref:`internals_internal`. Thanks, Alex Garcia. (:issue:`2341`) -- Database write connections now execute using the ``IMMEDIATE`` isolation level for SQLite. This should help avoid a rare ``SQLITE_BUSY`` error that could occur when a transaction upgraded to a write mid-flight. (:issue:`2358`) -- Fix for a bug where canned queries with named parameters could fail against SQLite 3.46. (:issue:`2353`) -- Datasette now serves ``E-Tag`` headers for static files. Thanks, `Agustin Bacigalup `__. (`#2306 `__) -- Dropdown menus now use a ``z-index`` that should avoid them being hidden by plugins. (:issue:`2311`) -- Incorrect table and row names are no longer reflected back on the resulting 404 page. (:issue:`2359`) -- Improved documentation for async usage of the :ref:`plugin_hook_track_event` hook. (:issue:`2319`) -- Fixed some HTTPX deprecation warnings. (:issue:`2307`) -- Datasette now serves a ```` attribute. Thanks, `Charles Nepote `__. (:issue:`2348`) -- Datasette's automated tests now run against the maximum and minimum supported versions of SQLite: 3.25 (from September 2018) and 3.46 (from May 2024). Thanks, Alex Garcia. (`#2352 `__) -- Fixed an issue where clicking twice on the URL output by ``datasette --root`` produced a confusing error. (:issue:`2375`) - -.. _v0_64_8: - -0.64.8 (2024-06-21) -------------------- - -- Security improvement: 404 pages used to reflect content from the URL path, which could be used to display misleading information to Datasette users. 404 errors no longer display additional information from the URL. (:issue:`2359`) -- Backported a better fix for correctly extracting named parameters from canned query SQL against SQLite 3.46.0. (:issue:`2353`) - -.. _v0_64_7: - -0.64.7 (2024-06-12) -------------------- - -- Fixed a bug where canned queries with named parameters threw an error when run against SQLite 3.46.0. (:issue:`2353`) - -.. _v1_0_a13: - -1.0a13 (2024-03-12) -------------------- - -Each of the key concepts in Datasette now has an :ref:`actions menu `, which plugins can use to add additional functionality targeting that entity. - -- Plugin hook: :ref:`view_actions() ` for actions that can be applied to a SQL view. (:issue:`2297`) -- Plugin hook: :ref:`homepage_actions() ` for actions that apply to the instance homepage. (:issue:`2298`) -- Plugin hook: :ref:`row_actions() ` for actions that apply to the row page. (:issue:`2299`) -- Action menu items for all of the ``*_actions()`` plugin hooks can now return an optional ``"description"`` key, which will be displayed in the menu below the action label. (:issue:`2294`) -- :ref:`Plugin hooks ` documentation page is now organized with additional headings. (:issue:`2300`) -- Improved the display of action buttons on pages that also display metadata. (:issue:`2286`) -- The header and footer of the page now uses a subtle gradient effect, and options in the navigation menu are better visually defined. (:issue:`2302`) -- Table names that start with an underscore now default to hidden. (:issue:`2104`) -- ``pragma_table_list`` has been added to the allow-list of SQLite pragma functions supported by Datasette. ``select * from pragma_table_list()`` is no longer blocked. (`#2104 `__) - -.. _v1_0_a12: - -1.0a12 (2024-02-29) -------------------- - -- New :ref:`query_actions() ` plugin hook, similar to :ref:`table_actions() ` and :ref:`database_actions() `. Can be used to add a menu of actions to the canned query or arbitrary SQL query page. (:issue:`2283`) -- New design for the button that opens the query, table and database actions menu. (:issue:`2281`) -- "does not contain" table filter for finding rows that do not contain a string. (:issue:`2287`) -- Fixed a bug in the :ref:`javascript_plugins_makeColumnActions` JavaScript plugin mechanism where the column action menu was not fully reset in between each interaction. (:issue:`2289`) - -.. _v1_0_a11: - -1.0a11 (2024-02-19) -------------------- - -- The ``"replace": true`` argument to the ``/db/table/-/insert`` API now requires the actor to have the ``update-row`` permission. (:issue:`2279`) -- Fixed some UI bugs in the interactive permissions debugging tool. (:issue:`2278`) -- The column action menu now aligns better with the cog icon, and positions itself taking into account the width of the browser window. (:issue:`2263`) - -.. _v1_0_a10: - -1.0a10 (2024-02-17) -------------------- - -The only changes in this alpha correspond to the way Datasette handles database transactions. (:issue:`2277`) - -- The :ref:`database.execute_write_fn() ` method has a new ``transaction=True`` parameter. This defaults to ``True`` which means all functions executed using this method are now automatically wrapped in a transaction - previously the functions needed to roll transaction handling on their own, and many did not. -- Pass ``transaction=False`` to ``execute_write_fn()`` if you want to manually handle transactions in your function. -- Several internal Datasette features, including parts of the :ref:`JSON write API `, had been failing to wrap their operations in a transaction. This has been fixed by the new ``transaction=True`` default. - -.. _v1_0_a9: - -1.0a9 (2024-02-16) ------------------- - -This alpha release adds basic alter table support to the Datasette Write API and fixes a permissions bug relating to the ``/upsert`` API endpoint. - -Alter table support for create, insert, upsert and update -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :ref:`JSON write API ` can now be used to apply simple alter table schema changes, provided the acting actor has the new :ref:`actions_alter_table` permission. (:issue:`2101`) - -The only alter operation supported so far is adding new columns to an existing table. - -* The :ref:`/db/-/create ` API now adds new columns during large operations to create a table based on incoming example ``"rows"``, in the case where one of the later rows includes columns that were not present in the earlier batches. This requires the ``create-table`` but not the ``alter-table`` permission. -* When ``/db/-/create`` is called with rows in a situation where the table may have been already created, an ``"alter": true`` key can be included to indicate that any missing columns from the new rows should be added to the table. This requires the ``alter-table`` permission. -* :ref:`/db/table/-/insert ` and :ref:`/db/table/-/upsert ` and :ref:`/db/table/row-pks/-/update ` all now also accept ``"alter": true``, depending on the ``alter-table`` permission. - -Operations that alter a table now fire the new :ref:`alter-table event `. - -Permissions fix for the upsert API -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :ref:`/database/table/-/upsert API ` had a minor permissions bug, only affecting Datasette instances that had configured the ``insert-row`` and ``update-row`` permissions to apply to a specific table rather than the database or instance as a whole. Full details in issue :issue:`2262`. - -To avoid similar mistakes in the future the ``datasette.permission_allowed()`` method now specifies ``default=`` as a keyword-only argument. - -Permission checks now consider opinions from every plugin -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``datasette.permission_allowed()`` method previously consulted every plugin that implemented the ``permission_allowed()`` plugin hook and obeyed the opinion of the last plugin to return a value. (:issue:`2275`) - -Datasette now consults every plugin and checks to see if any of them returned ``False`` (the veto rule), and if none of them did, it then checks to see if any of them returned ``True``. - -This is explained at length in the new documentation covering :ref:`authentication_permissions_explained`. - -Other changes -~~~~~~~~~~~~~ - -- The new :ref:`DATASETTE_TRACE_PLUGINS=1 environment variable ` turns on detailed trace output for every executed plugin hook, useful for debugging and understanding how the plugin system works at a low level. (:issue:`2274`) -- Datasette on Python 3.9 or above marks its non-cryptographic uses of the MD5 hash function as ``usedforsecurity=False``, for compatibility with FIPS systems. (:issue:`2270`) -- SQL relating to :ref:`internals_internal` now executes inside a transaction, avoiding a potential database locked error. (:issue:`2273`) -- The ``/-/threads`` debug page now identifies the database in the name associated with each dedicated write thread. (:issue:`2265`) -- The ``/db/-/create`` API now fires a ``insert-rows`` event if rows were inserted after the table was created. (:issue:`2260`) - -.. _v1_0_a8: - -1.0a8 (2024-02-07) ------------------- - -This alpha release continues the migration of Datasette's configuration from ``metadata.yaml`` to the new ``datasette.yaml`` configuration file, introduces a new system for JavaScript plugins and adds several new plugin hooks. - -See `Datasette 1.0a8: JavaScript plugins, new plugin hooks and plugin configuration in datasette.yaml `__ for an annotated version of these release notes. - -Configuration -~~~~~~~~~~~~~ - -- Plugin configuration now lives in the :ref:`datasette.yaml configuration file `, passed to Datasette using the ``-c/--config`` option. Thanks, Alex Garcia. (:issue:`2093`) - - .. code-block:: bash - - datasette -c datasette.yaml - - Where ``datasette.yaml`` contains configuration that looks like this: - - .. code-block:: yaml - - plugins: - datasette-cluster-map: - latitude_column: xlat - longitude_column: xlon - - Previously plugins were configured in ``metadata.yaml``, which was confusing as plugin settings were unrelated to database and table metadata. -- The ``-s/--setting`` option can now be used to set plugin configuration as well. See :ref:`configuration_cli` for details. (:issue:`2252`) - - The above YAML configuration example using ``-s/--setting`` looks like this: - - .. code-block:: bash - - datasette mydatabase.db \ - -s plugins.datasette-cluster-map.latitude_column xlat \ - -s plugins.datasette-cluster-map.longitude_column xlon - -- The new ``/-/config`` page shows the current instance configuration, after redacting keys that could contain sensitive data such as API keys or passwords. (:issue:`2254`) - -- Existing Datasette installations may already have configuration set in ``metadata.yaml`` that should be migrated to ``datasette.yaml``. To avoid breaking these installations, Datasette will silently treat table configuration, plugin configuration and allow blocks in metadata as if they had been specified in configuration instead. (:issue:`2247`) (:issue:`2248`) (:issue:`2249`) - -Note that the ``datasette publish`` command has not yet been updated to accept a ``datasette.yaml`` configuration file. This will be addressed in :issue:`2195` but for the moment you can include those settings in ``metadata.yaml`` instead. - -JavaScript plugins -~~~~~~~~~~~~~~~~~~ - -Datasette now includes a :ref:`JavaScript plugins mechanism `, allowing JavaScript to customize Datasette in a way that can collaborate with other plugins. - -This provides two initial hooks, with more to come in the future: - -- :ref:`makeAboveTablePanelConfigs() ` can add additional panels to the top of the table page. -- :ref:`makeColumnActions() ` can add additional actions to the column menu. - -Thanks `Cameron Yick `__ for contributing this feature. (`#2052 `__) - -Plugin hooks -~~~~~~~~~~~~ - -- New :ref:`plugin_hook_jinja2_environment_from_request` plugin hook, which can be used to customize the current Jinja environment based on the incoming request. This can be used to modify the template lookup path based on the incoming request hostname, among other things. (:issue:`2225`) -- New :ref:`family of template slot plugin hooks `: ``top_homepage``, ``top_database``, ``top_table``, ``top_row``, ``top_query``, ``top_canned_query``. Plugins can use these to provide additional HTML to be injected at the top of the corresponding pages. (:issue:`1191`) -- New :ref:`track_event() mechanism ` for plugins to emit and receive events when certain events occur within Datasette. (:issue:`2240`) - - Plugins can register additional event classes using :ref:`plugin_hook_register_events`. - - They can then trigger those events with the :ref:`datasette.track_event(event) ` internal method. - - Plugins can subscribe to notifications of events using the :ref:`plugin_hook_track_event` plugin hook. - - Datasette core now emits ``login``, ``logout``, ``create-token``, ``create-table``, ``drop-table``, ``insert-rows``, ``upsert-rows``, ``update-row``, ``delete-row`` events, :ref:`documented here `. -- New internal function for plugin authors: :ref:`database_execute_isolated_fn`, for creating a new SQLite connection, executing code and then closing that connection, all while preventing other code from writing to that particular database. This connection will not have the :ref:`prepare_connection() ` plugin hook executed against it, allowing plugins to perform actions that might otherwise be blocked by existing connection configuration. (:issue:`2218`) - -Documentation -~~~~~~~~~~~~~ - -- Documentation describing :ref:`how to write tests that use signed actor cookies ` using ``datasette.client.actor_cookie()``. (:issue:`1830`) -- Documentation on how to :ref:`register a plugin for the duration of a test `. (:issue:`2234`) -- The :ref:`configuration documentation ` now shows examples of both YAML and JSON for each setting. - -Minor fixes -~~~~~~~~~~~ - -- Datasette no longer attempts to run SQL queries in parallel when rendering a table page, as this was leading to some rare crashing bugs. (:issue:`2189`) -- Fixed warning: ``DeprecationWarning: pkg_resources is deprecated as an API`` (:issue:`2057`) -- Fixed bug where ``?_extra=columns`` parameter returned an incorrectly shaped response. (:issue:`2230`) - -.. _v0_64_6: - -0.64.6 (2023-12-22) -------------------- - -- Fixed a bug where CSV export with expanded labels could fail if a foreign key reference did not correctly resolve. (:issue:`2214`) - -.. _v0_64_5: - -0.64.5 (2023-10-08) -------------------- - -- Dropped dependency on ``click-default-group-wheel``, which could cause a dependency conflict. (:issue:`2197`) - -.. _v1_0_a7: - -1.0a7 (2023-09-21) ------------------- - -- Fix for a crashing bug caused by viewing the table page for a named in-memory database. (:issue:`2189`) - -.. _v0_64_4: - -0.64.4 (2023-09-21) -------------------- - -- Fix for a crashing bug caused by viewing the table page for a named in-memory database. (:issue:`2189`) - -.. _v1_0_a6: - -1.0a6 (2023-09-07) ------------------- - -- New plugin hook: :ref:`plugin_hook_actors_from_ids` and an internal method to accompany it, :ref:`datasette_actors_from_ids`. This mechanism is intended to be used by plugins that may need to display the actor who was responsible for something managed by that plugin: they can now resolve the recorded IDs of actors into the full actor objects. (:issue:`2181`) -- ``DATASETTE_LOAD_PLUGINS`` environment variable for :ref:`controlling which plugins ` are loaded by Datasette. (:issue:`2164`) -- Datasette now checks if the user has permission to view a table linked to by a foreign key before turning that foreign key into a clickable link. (:issue:`2178`) -- The ``execute-sql`` permission now implies that the actor can also view the database and instance. (:issue:`2169`) -- Documentation describing a pattern for building plugins that themselves :ref:`define further hooks ` for other plugins. (:issue:`1765`) -- Datasette is now tested against the Python 3.12 preview. (`#2175 `__) - -.. _v1_0_a5: - -1.0a5 (2023-08-29) ------------------- - -- When restrictions are applied to :ref:`API tokens `, those restrictions now behave slightly differently: applying the ``view-table`` restriction will imply the ability to ``view-database`` for the database containing that table, and both ``view-table`` and ``view-database`` will imply ``view-instance``. Previously you needed to create a token with restrictions that explicitly listed ``view-instance`` and ``view-database`` and ``view-table`` in order to view a table without getting a permission denied error. (:issue:`2102`) -- New ``datasette.yaml`` (or ``.json``) configuration file, which can be specified using ``datasette -c path-to-file``. The goal here to consolidate settings, plugin configuration, permissions, canned queries, and other Datasette configuration into a single single file, separate from ``metadata.yaml``. The legacy ``settings.json`` config file used for :ref:`config_dir` has been removed, and ``datasette.yaml`` has a ``"settings"`` section where the same settings key/value pairs can be included. In the next future alpha release, more configuration such as plugins/permissions/canned queries will be moved to the ``datasette.yaml`` file. See :issue:`2093` for more details. Thanks, Alex Garcia. -- The ``-s/--setting`` option can now take dotted paths to nested settings. These will then be used to set or over-ride the same options as are present in the new configuration file. (:issue:`2156`) -- New ``--actor '{"id": "json-goes-here"}'`` option for use with ``datasette --get`` to treat the simulated request as being made by a specific actor, see :ref:`cli_datasette_get`. (:issue:`2153`) -- The Datasette ``_internal`` database has had some changes. It no longer shows up in the ``datasette.databases`` list by default, and is now instead available to plugins using the ``datasette.get_internal_database()``. Plugins are invited to use this as a private database to store configuration and settings and secrets that should not be made visible through the default Datasette interface. Users can pass the new ``--internal internal.db`` option to persist that internal database to disk. Thanks, Alex Garcia. (:issue:`2157`). - -.. _v1_0_a4: - -1.0a4 (2023-08-21) ------------------- - -This alpha fixes a security issue with the ``/-/api`` API explorer. On authenticated Datasette instances (instances protected using plugins such as `datasette-auth-passwords `__) the API explorer interface could reveal the names of databases and tables within the protected instance. The data stored in those tables was not revealed. - -For more information and workarounds, read `the security advisory `__. The issue has been present in every previous alpha version of Datasette 1.0: versions 1.0a0, 1.0a1, 1.0a2 and 1.0a3. - -Also in this alpha: - -- The new ``datasette plugins --requirements`` option outputs a list of currently installed plugins in Python ``requirements.txt`` format, useful for duplicating that installation elsewhere. (:issue:`2133`) -- :ref:`canned_queries_writable` can now define a ``on_success_message_sql`` field in their configuration, containing a SQL query that should be executed upon successful completion of the write operation in order to generate a message to be shown to the user. (:issue:`2138`) -- The automatically generated border color for a database is now shown in more places around the application. (:issue:`2119`) -- Every instance of example shell script code in the documentation should now include a working copy button, free from additional syntax. (:issue:`2140`) - -.. _v1_0_a3: - -1.0a3 (2023-08-09) ------------------- - -This alpha release previews the updated design for Datasette's default JSON API. (:issue:`782`) - -The new :ref:`default JSON representation ` for both table pages (``/dbname/table.json``) and arbitrary SQL queries (``/dbname.json?sql=...``) is now shaped like this: - -.. code-block:: json - - { - "ok": true, - "rows": [ - { - "id": 3, - "name": "Detroit" - }, - { - "id": 2, - "name": "Los Angeles" - }, - { - "id": 4, - "name": "Memnonia" - }, - { - "id": 1, - "name": "San Francisco" - } - ], - "truncated": false - } - -Tables will include an additional ``"next"`` key for pagination, which can be passed to ``?_next=`` to fetch the next page of results. - -The various ``?_shape=`` options continue to work as before - see :ref:`json_api_shapes` for details. - -A new ``?_extra=`` mechanism is available for tables, but has not yet been stabilized or documented. Details on that are available in :issue:`262`. - -Smaller changes -~~~~~~~~~~~~~~~ - -- Datasette documentation now shows YAML examples for :ref:`metadata` by default, with a tab interface for switching to JSON. (:issue:`1153`) -- :ref:`plugin_register_output_renderer` plugins now have access to ``error`` and ``truncated`` arguments, allowing them to display error messages and take into account truncated results. (:issue:`2130`) -- ``render_cell()`` plugin hook now also supports an optional ``request`` argument. (:issue:`2007`) -- New ``Justfile`` to support development workflows for Datasette using `Just `__. -- ``datasette.render_template()`` can now accepts a ``datasette.views.Context`` subclass as an alternative to a dictionary. (:issue:`2127`) -- ``datasette install -e path`` option for editable installations, useful while developing plugins. (:issue:`2106`) -- When started with the ``--cors`` option Datasette now serves an ``Access-Control-Max-Age: 3600`` header, ensuring CORS OPTIONS requests are repeated no more than once an hour. (:issue:`2079`) -- Fixed a bug where the ``_internal`` database could display ``None`` instead of ``null`` for in-memory databases. (:issue:`1970`) - -.. _v0_64_2: - -0.64.2 (2023-03-08) -------------------- - -- Fixed a bug with ``datasette publish cloudrun`` where deploys all used the same Docker image tag. This was mostly inconsequential as the service is deployed as soon as the image has been pushed to the registry, but could result in the incorrect image being deployed if two different deploys for two separate services ran at exactly the same time. (:issue:`2036`) - -.. _v0_64_1: - -0.64.1 (2023-01-11) -------------------- - -- Documentation now links to a current source of information for installing Python 3. (:issue:`1987`) -- Incorrectly calling the Datasette constructor using ``Datasette("path/to/data.db")`` instead of ``Datasette(["path/to/data.db"])`` now returns a useful error message. (:issue:`1985`) - -.. _v0_64: - -0.64 (2023-01-09) ------------------ - -- Datasette now **strongly recommends against allowing arbitrary SQL queries if you are using SpatiaLite**. SpatiaLite includes SQL functions that could cause the Datasette server to crash. See :ref:`spatialite` for more details. -- New :ref:`setting_default_allow_sql` setting, providing an easier way to disable all arbitrary SQL execution by end users: ``datasette --setting default_allow_sql off``. See also :ref:`authentication_permissions_execute_sql`. (:issue:`1409`) -- `Building a location to time zone API with SpatiaLite `__ is a new Datasette tutorial showing how to safely use SpatiaLite to create a location to time zone API. -- New documentation about :ref:`how to debug problems loading SQLite extensions `. The error message shown when an extension cannot be loaded has also been improved. (:issue:`1979`) -- Fixed an accessibility issue: the ``\n' - "" - ), + "error": "Table not found: blah", + "status": 404, + "title": None, + } == app_client.get("/fixtures/blah.json").json + + +def test_jsono_redirects_to_shape_objects(app_client_with_hash): + response_1 = app_client_with_hash.get( + "/fixtures/simple_primary_key.jsono", allow_redirects=False + ) + response = app_client_with_hash.get( + response_1.headers["Location"], allow_redirects=False + ) + assert response.status == 302 + assert response.headers["Location"].endswith("?_shape=objects") + + +def test_table_shape_arrays(app_client): + response = app_client.get("/fixtures/simple_primary_key.json?_shape=arrays") + assert [ + ["1", "hello"], + ["2", "world"], + ["3", ""], + ["4", "RENDER_CELL_DEMO"], + ] == response.json["rows"] + + +def test_table_shape_arrayfirst(app_client): + response = app_client.get( + "/fixtures.json?" + + urllib.parse.urlencode( + { + "sql": "select content from simple_primary_key order by id", + "_shape": "arrayfirst", + } + ) + ) + assert ["hello", "world", "", "RENDER_CELL_DEMO"] == response.json + + +def test_table_shape_objects(app_client): + response = app_client.get("/fixtures/simple_primary_key.json?_shape=objects") + assert [ + {"id": "1", "content": "hello"}, + {"id": "2", "content": "world"}, + {"id": "3", "content": ""}, + {"id": "4", "content": "RENDER_CELL_DEMO"}, + ] == response.json["rows"] + + +def test_table_shape_array(app_client): + response = app_client.get("/fixtures/simple_primary_key.json?_shape=array") + assert [ + {"id": "1", "content": "hello"}, + {"id": "2", "content": "world"}, + {"id": "3", "content": ""}, + {"id": "4", "content": "RENDER_CELL_DEMO"}, + ] == response.json + + +def test_table_shape_array_nl(app_client): + response = app_client.get("/fixtures/simple_primary_key.json?_shape=array&_nl=on") + lines = response.text.split("\n") + results = [json.loads(line) for line in lines] + assert [ + {"id": "1", "content": "hello"}, + {"id": "2", "content": "world"}, + {"id": "3", "content": ""}, + {"id": "4", "content": "RENDER_CELL_DEMO"}, + ] == results + + +def test_table_shape_invalid(app_client): + response = app_client.get("/fixtures/simple_primary_key.json?_shape=invalid") + assert { + "ok": False, + "error": "Invalid _shape: invalid", "status": 400, - "title": "SQL Interrupted", - } + "title": None, + } == response.json -@pytest.mark.asyncio -async def test_custom_sql_time_limit(ds_client): - response = await ds_client.get( - "/fixtures/-/query.json?sql=select+sleep(0.01)", +def test_table_shape_object(app_client): + response = app_client.get("/fixtures/simple_primary_key.json?_shape=object") + assert { + "1": {"id": "1", "content": "hello"}, + "2": {"id": "2", "content": "world"}, + "3": {"id": "3", "content": ""}, + "4": {"id": "4", "content": "RENDER_CELL_DEMO"}, + } == response.json + + +def test_table_shape_object_compound_primary_Key(app_client): + response = app_client.get("/fixtures/compound_primary_key.json?_shape=object") + assert {"a,b": {"pk1": "a", "pk2": "b", "content": "c"}} == response.json + + +def test_table_with_slashes_in_name(app_client): + response = app_client.get( + "/fixtures/table%2Fwith%2Fslashes.csv?_shape=objects&_format=json" ) - assert response.status_code == 200 - response = await ds_client.get( - "/fixtures/-/query.json?sql=select+sleep(0.01)&_timelimit=5", + assert response.status == 200 + data = response.json + assert data["rows"] == [{"pk": "3", "content": "hey"}] + + +def test_table_with_reserved_word_name(app_client): + response = app_client.get("/fixtures/select.json?_shape=objects") + assert response.status == 200 + data = response.json + assert data["rows"] == [ + { + "rowid": 1, + "group": "group", + "having": "having", + "and": "and", + "json": '{"href": "http://example.com/", "label":"Example"}', + } + ] + + +@pytest.mark.parametrize( + "path,expected_rows,expected_pages", + [ + ("/fixtures/no_primary_key.json", 201, 5), + ("/fixtures/paginated_view.json", 201, 5), + ("/fixtures/no_primary_key.json?_size=25", 201, 9), + ("/fixtures/paginated_view.json?_size=25", 201, 9), + ("/fixtures/paginated_view.json?_size=max", 201, 3), + ("/fixtures/123_starts_with_digits.json", 0, 1), + # Ensure faceting doesn't break pagination: + ("/fixtures/compound_three_primary_keys.json?_facet=pk1", 1001, 21), + # Paginating while sorted by an expanded foreign key should work + ( + "/fixtures/roadside_attraction_characteristics.json?_size=2&_sort=attraction_id&_labels=on", + 5, + 3, + ), + ], +) +def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pages): + fetched = [] + count = 0 + while path: + response = app_client.get(path) + assert 200 == response.status + count += 1 + fetched.extend(response.json["rows"]) + path = response.json["next_url"] + if path: + assert urllib.parse.urlencode({"_next": response.json["next"]}) in path + path = path.replace("http://localhost", "") + assert count < 30, "Possible infinite loop detected" + + assert expected_rows == len(fetched) + assert expected_pages == count + + +@pytest.mark.parametrize( + "path,expected_error", + [ + ("/fixtures/no_primary_key.json?_size=-4", "_size must be a positive integer"), + ("/fixtures/no_primary_key.json?_size=dog", "_size must be a positive integer"), + ("/fixtures/no_primary_key.json?_size=1001", "_size must be <= 100"), + ], +) +def test_validate_page_size(app_client, path, expected_error): + response = app_client.get(path) + assert expected_error == response.json["error"] + assert 400 == response.status + + +def test_page_size_zero(app_client): + "For _size=0 we return the counts, empty rows and no continuation token" + response = app_client.get("/fixtures/no_primary_key.json?_size=0") + assert 200 == response.status + assert [] == response.json["rows"] + assert 201 == response.json["filtered_table_rows_count"] + assert None is response.json["next"] + assert None is response.json["next_url"] + + +def test_paginate_compound_keys(app_client): + fetched = [] + path = "/fixtures/compound_three_primary_keys.json?_shape=objects" + page = 0 + while path: + page += 1 + response = app_client.get(path) + fetched.extend(response.json["rows"]) + path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") + assert page < 100 + assert 1001 == len(fetched) + assert 21 == page + # Should be correctly ordered + contents = [f["content"] for f in fetched] + expected = [r[3] for r in generate_compound_rows(1001)] + assert expected == contents + + +def test_paginate_compound_keys_with_extra_filters(app_client): + fetched = [] + path = ( + "/fixtures/compound_three_primary_keys.json?content__contains=d&_shape=objects" ) - assert response.status_code == 400 - assert response.json()["title"] == "SQL Interrupted" + page = 0 + while path: + page += 1 + assert page < 100 + response = app_client.get(path) + fetched.extend(response.json["rows"]) + path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") + assert 2 == page + expected = [r[3] for r in generate_compound_rows(1001) if "d" in r[3]] + assert expected == [f["content"] for f in fetched] -@pytest.mark.asyncio -async def test_invalid_custom_sql(ds_client): - response = await ds_client.get( - "/fixtures/-/query.json?sql=.schema", +@pytest.mark.parametrize( + "query_string,sort_key,human_description_en", + [ + ("_sort=sortable", lambda row: row["sortable"], "sorted by sortable"), + ( + "_sort_desc=sortable", + lambda row: -row["sortable"], + "sorted by sortable descending", + ), + ( + "_sort=sortable_with_nulls", + lambda row: ( + 1 if row["sortable_with_nulls"] is not None else 0, + row["sortable_with_nulls"], + ), + "sorted by sortable_with_nulls", + ), + ( + "_sort_desc=sortable_with_nulls", + lambda row: ( + 1 if row["sortable_with_nulls"] is None else 0, + -row["sortable_with_nulls"] + if row["sortable_with_nulls"] is not None + else 0, + row["content"], + ), + "sorted by sortable_with_nulls descending", + ), + # text column contains '$null' - ensure it doesn't confuse pagination: + ("_sort=text", lambda row: row["text"], "sorted by text"), + ], +) +def test_sortable(app_client, query_string, sort_key, human_description_en): + path = "/fixtures/sortable.json?_shape=objects&{}".format(query_string) + fetched = [] + page = 0 + while path: + page += 1 + assert page < 100 + response = app_client.get(path) + assert human_description_en == response.json["human_description_en"] + fetched.extend(response.json["rows"]) + path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") + assert 5 == page + expected = list(generate_sortable_rows(201)) + expected.sort(key=sort_key) + assert [r["content"] for r in expected] == [r["content"] for r in fetched] + + +def test_sortable_and_filtered(app_client): + path = ( + "/fixtures/sortable.json" + "?content__contains=d&_sort_desc=sortable&_shape=objects" ) - assert response.status_code == 400 - assert response.json()["ok"] is False - assert "Statement must be a SELECT" == response.json()["error"] - - -@pytest.mark.asyncio -async def test_row(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key/1.json?_shape=objects") - assert response.status_code == 200 - assert response.json()["rows"] == [{"id": 1, "content": "hello"}] - - -@pytest.mark.asyncio -async def test_row_strange_table_name(ds_client): - response = await ds_client.get( - "/fixtures/table~2Fwith~2Fslashes~2Ecsv/3.json?_shape=objects" + response = app_client.get(path) + fetched = response.json["rows"] + assert ( + 'where content contains "d" sorted by sortable descending' + == response.json["human_description_en"] ) - assert response.status_code == 200 - assert response.json()["rows"] == [{"pk": "3", "content": "hey"}] + expected = [row for row in generate_sortable_rows(201) if "d" in row["content"]] + assert len(expected) == response.json["filtered_table_rows_count"] + expected.sort(key=lambda row: -row["sortable"]) + assert [r["content"] for r in expected] == [r["content"] for r in fetched] -@pytest.mark.asyncio -async def test_row_foreign_key_tables(ds_client): - response = await ds_client.get( +def test_sortable_argument_errors(app_client): + response = app_client.get("/fixtures/sortable.json?_sort=badcolumn") + assert "Cannot sort table by badcolumn" == response.json["error"] + response = app_client.get("/fixtures/sortable.json?_sort_desc=badcolumn2") + assert "Cannot sort table by badcolumn2" == response.json["error"] + response = app_client.get( + "/fixtures/sortable.json?_sort=sortable_with_nulls&_sort_desc=sortable" + ) + assert "Cannot use _sort and _sort_desc at the same time" == response.json["error"] + + +def test_sortable_columns_metadata(app_client): + response = app_client.get("/fixtures/sortable.json?_sort=content") + assert "Cannot sort table by content" == response.json["error"] + # no_primary_key has ALL sort options disabled + for column in ("content", "a", "b", "c"): + response = app_client.get("/fixtures/sortable.json?_sort={}".format(column)) + assert "Cannot sort table by {}".format(column) == response.json["error"] + + +@pytest.mark.parametrize( + "path,expected_rows", + [ + ( + "/fixtures/searchable.json?_search=dog", + [ + [1, "barry cat", "terry dog", "panther"], + [2, "terry dog", "sara weasel", "puma"], + ], + ), + ( + "/fixtures/searchable.json?_search=weasel", + [[2, "terry dog", "sara weasel", "puma"]], + ), + ( + "/fixtures/searchable.json?_search_text2=dog", + [[1, "barry cat", "terry dog", "panther"]], + ), + ( + "/fixtures/searchable.json?_search_name%20with%20.%20and%20spaces=panther", + [[1, "barry cat", "terry dog", "panther"]], + ), + ], +) +def test_searchable(app_client, path, expected_rows): + response = app_client.get(path) + assert expected_rows == response.json["rows"] + + +@pytest.mark.parametrize( + "path,expected_rows", + [ + ( + "/fixtures/searchable_view_configured_by_metadata.json?_search=weasel", + [[2, "terry dog", "sara weasel", "puma"]], + ), + # This should return all results because search is not configured: + ( + "/fixtures/searchable_view.json?_search=weasel", + [ + [1, "barry cat", "terry dog", "panther"], + [2, "terry dog", "sara weasel", "puma"], + ], + ), + ( + "/fixtures/searchable_view.json?_search=weasel&_fts_table=searchable_fts&_fts_pk=pk", + [[2, "terry dog", "sara weasel", "puma"]], + ), + ], +) +def test_searchable_views(app_client, path, expected_rows): + response = app_client.get(path) + assert expected_rows == response.json["rows"] + + +def test_searchable_invalid_column(app_client): + response = app_client.get("/fixtures/searchable.json?_search_invalid=x") + assert 400 == response.status + assert { + "ok": False, + "error": "Cannot search by that column", + "status": 400, + "title": None, + } == response.json + + +@pytest.mark.parametrize( + "path,expected_rows", + [ + ("/fixtures/simple_primary_key.json?content=hello", [["1", "hello"]]), + ( + "/fixtures/simple_primary_key.json?content__contains=o", + [["1", "hello"], ["2", "world"], ["4", "RENDER_CELL_DEMO"]], + ), + ("/fixtures/simple_primary_key.json?content__exact=", [["3", ""]]), + ( + "/fixtures/simple_primary_key.json?content__not=world", + [["1", "hello"], ["3", ""], ["4", "RENDER_CELL_DEMO"]], + ), + ], +) +def test_table_filter_queries(app_client, path, expected_rows): + response = app_client.get(path) + assert expected_rows == response.json["rows"] + + +def test_table_filter_queries_multiple_of_same_type(app_client): + response = app_client.get( + "/fixtures/simple_primary_key.json?content__not=world&content__not=hello" + ) + assert [["3", ""], ["4", "RENDER_CELL_DEMO"]] == response.json["rows"] + + +@pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") +def test_table_filter_json_arraycontains(app_client): + response = app_client.get("/fixtures/facetable.json?tags__arraycontains=tag1") + assert [ + [1, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Mission", '["tag1", "tag2"]'], + [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'], + ] == response.json["rows"] + + +def test_table_filter_extra_where(app_client): + response = app_client.get("/fixtures/facetable.json?_where=neighborhood='Dogpatch'") + assert [ + [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'] + ] == response.json["rows"] + + +def test_table_filter_extra_where_invalid(app_client): + response = app_client.get("/fixtures/facetable.json?_where=neighborhood=Dogpatch'") + assert 400 == response.status + assert "Invalid SQL" == response.json["title"] + + +def test_table_filter_extra_where_disabled_if_no_sql_allowed(): + for client in make_app_client(config={"allow_sql": False}): + response = client.get("/fixtures/facetable.json?_where=neighborhood='Dogpatch'") + assert 400 == response.status + assert "_where= is not allowed" == response.json["error"] + + +def test_table_through(app_client): + # Just the museums: + response = app_client.get( + '/fixtures/roadside_attractions.json?_through={"table":"roadside_attraction_characteristics","column":"characteristic_id","value":"1"}' + ) + assert [ + [ + 3, + "Burlingame Museum of PEZ Memorabilia", + "214 California Drive, Burlingame, CA 94010", + 37.5793, + -122.3442, + ], + [ + 4, + "Bigfoot Discovery Museum", + "5497 Highway 9, Felton, CA 95018", + 37.0414, + -122.0725, + ], + ] == response.json["rows"] + assert ( + 'where roadside_attraction_characteristics.characteristic_id = "1"' + == response.json["human_description_en"] + ) + + +def test_max_returned_rows(app_client): + response = app_client.get("/fixtures.json?sql=select+content+from+no_primary_key") + data = response.json + assert {"sql": "select content from no_primary_key", "params": {}} == data["query"] + assert data["truncated"] + assert 100 == len(data["rows"]) + + +def test_view(app_client): + response = app_client.get("/fixtures/simple_view.json?_shape=objects") + assert response.status == 200 + data = response.json + assert data["rows"] == [ + {"upper_content": "HELLO", "content": "hello"}, + {"upper_content": "WORLD", "content": "world"}, + {"upper_content": "", "content": ""}, + {"upper_content": "RENDER_CELL_DEMO", "content": "RENDER_CELL_DEMO"}, + ] + + +def test_row(app_client): + response = app_client.get("/fixtures/simple_primary_key/1.json?_shape=objects") + assert response.status == 200 + assert [{"id": "1", "content": "hello"}] == response.json["rows"] + + +def test_row_strange_table_name(app_client): + response = app_client.get( + "/fixtures/table%2Fwith%2Fslashes.csv/3.json?_shape=objects" + ) + assert response.status == 200 + assert [{"pk": "3", "content": "hey"}] == response.json["rows"] + + +def test_row_foreign_key_tables(app_client): + response = app_client.get( "/fixtures/simple_primary_key/1.json?_extras=foreign_key_tables" ) - assert response.status_code == 200 - assert response.json()["foreign_key_tables"] == [ + assert response.status == 200 + assert [ { - "other_table": "foreign_key_references", - "column": "id", - "other_column": "foreign_key_with_blank_label", - "count": 0, - "link": "/fixtures/foreign_key_references?foreign_key_with_blank_label=1", - }, - { - "other_table": "foreign_key_references", "column": "id", + "count": 1, "other_column": "foreign_key_with_label", - "count": 1, - "link": "/fixtures/foreign_key_references?foreign_key_with_label=1", + "other_table": "foreign_key_references", }, { - "other_table": "complex_foreign_keys", "column": "id", + "count": 1, "other_column": "f3", - "count": 1, - "link": "/fixtures/complex_foreign_keys?f3=1", + "other_table": "complex_foreign_keys", }, { - "other_table": "complex_foreign_keys", "column": "id", - "other_column": "f2", "count": 0, - "link": "/fixtures/complex_foreign_keys?f2=1", + "other_column": "f2", + "other_table": "complex_foreign_keys", }, { - "other_table": "complex_foreign_keys", "column": "id", - "other_column": "f1", "count": 1, - "link": "/fixtures/complex_foreign_keys?f1=1", + "other_column": "f1", + "other_table": "complex_foreign_keys", }, - ] + ] == response.json["foreign_key_tables"] + + +def test_unit_filters(app_client): + response = app_client.get( + "/fixtures/units.json?distance__lt=75km&frequency__gt=1kHz" + ) + assert response.status == 200 + data = response.json + + assert data["units"]["distance"] == "m" + assert data["units"]["frequency"] == "Hz" + + assert len(data["rows"]) == 1 + assert data["rows"][0][0] == 2 def test_databases_json(app_client_two_attached_databases_one_immutable): response = app_client_two_attached_databases_one_immutable.get("/-/databases.json") databases = response.json - assert 2 == len(databases) - extra_database, fixtures_database = databases - assert "extra database" == extra_database["name"] + assert 3 == len(databases) + by_name = {database["name"]: database for database in databases} + extra_database = by_name["extra_database"] + fixtures_database = by_name["fixtures"] + assert "extra_database" == extra_database["name"] assert None == extra_database["hash"] assert True == extra_database["is_mutable"] assert False == extra_database["is_memory"] @@ -783,119 +1211,378 @@ def test_databases_json(app_client_two_attached_databases_one_immutable): assert False == fixtures_database["is_memory"] -@pytest.mark.asyncio -async def test_threads_json(ds_client): - response = await ds_client.get("/-/threads.json") - expected_keys = {"threads", "num_threads"} - if sys.version_info >= (3, 7, 0): - expected_keys.update({"tasks", "num_tasks"}) - data = response.json() - assert set(data.keys()) == expected_keys - # Should be at least one _execute_writes thread for __INTERNAL__ - thread_names = [thread["name"] for thread in data["threads"]] - assert "_execute_writes for database __INTERNAL__" in thread_names +def test_metadata_json(app_client): + response = app_client.get("/-/metadata.json") + assert METADATA == response.json -@pytest.mark.asyncio -async def test_plugins_json(ds_client): - response = await ds_client.get("/-/plugins.json") - # Filter out TrackEventPlugin - actual_plugins = sorted( - [p for p in response.json() if p["name"] != "TrackEventPlugin"], - key=lambda p: p["name"], - ) - assert EXPECTED_PLUGINS == actual_plugins - # Try with ?all=1 - response = await ds_client.get("/-/plugins.json?all=1") - names = {p["name"] for p in response.json()} - assert names.issuperset(p["name"] for p in EXPECTED_PLUGINS) - assert names.issuperset(DEFAULT_PLUGINS) +def test_plugins_json(app_client): + response = app_client.get("/-/plugins.json") + assert [ + {"name": "my_plugin.py", "static": False, "templates": False, "version": None}, + { + "name": "my_plugin_2.py", + "static": False, + "templates": False, + "version": None, + }, + ] == sorted(response.json, key=lambda p: p["name"]) -@pytest.mark.asyncio -async def test_versions_json(ds_client): - response = await ds_client.get("/-/versions.json") - data = response.json() - assert "python" in data - assert "3.0" == data.get("asgi") - assert "version" in data["python"] - assert "full" in data["python"] - assert "datasette" in data - assert "version" in data["datasette"] - assert data["datasette"]["version"] == __version__ - assert "sqlite" in data - assert "version" in data["sqlite"] - assert "fts_versions" in data["sqlite"] - assert "compile_options" in data["sqlite"] - # By default, the json1 extension is enabled in the SQLite - # provided by the `ubuntu-latest` github actions runner, and - # all versions of SQLite from 3.38.0 onwards - assert data["sqlite"]["extensions"]["json1"] +def test_versions_json(app_client): + response = app_client.get("/-/versions.json") + assert "python" in response.json + assert "3.0" == response.json.get("asgi") + assert "version" in response.json["python"] + assert "full" in response.json["python"] + assert "datasette" in response.json + assert "version" in response.json["datasette"] + assert "sqlite" in response.json + assert "version" in response.json["sqlite"] + assert "fts_versions" in response.json["sqlite"] + assert "compile_options" in response.json["sqlite"] -@pytest.mark.asyncio -async def test_actions_json(ds_client): - original_root_enabled = ds_client.ds.root_enabled - try: - ds_client.ds.root_enabled = True - cookies = {"ds_actor": ds_client.actor_cookie({"id": "root"})} - response = await ds_client.get("/-/actions.json", cookies=cookies) - data = response.json() - finally: - ds_client.ds.root_enabled = original_root_enabled - assert isinstance(data, list) - assert len(data) > 0 - # Check structure of first action - action = data[0] - for key in ( - "name", - "abbr", - "description", - "takes_parent", - "takes_child", - "resource_class", - "also_requires", - ): - assert key in action - # Check that some expected actions exist - action_names = {a["name"] for a in data} - for expected_action in ( - "view-instance", - "view-database", - "view-table", - "execute-sql", - ): - assert expected_action in action_names - - -@pytest.mark.asyncio -async def test_settings_json(ds_client): - response = await ds_client.get("/-/settings.json") - assert response.json() == { +def test_config_json(app_client): + response = app_client.get("/-/config.json") + assert { "default_page_size": 50, "default_facet_size": 30, - "default_allow_sql": True, - "facet_suggest_time_limit_ms": 200, + "facet_suggest_time_limit_ms": 50, "facet_time_limit_ms": 200, "max_returned_rows": 100, - "max_insert_rows": 100, "sql_time_limit_ms": 200, "allow_download": True, - "allow_signed_tokens": True, - "max_signed_tokens_ttl": 0, "allow_facet": True, "suggest_facets": True, + "allow_sql": True, "default_cache_ttl": 5, - "num_sql_threads": 1, + "default_cache_ttl_hashed": 365 * 24 * 60 * 60, + "num_sql_threads": 3, "cache_size_kb": 0, "allow_csv_stream": True, "max_csv_mb": 100, "truncate_cells_html": 2048, "force_https_urls": False, - "template_debug": False, - "trace_debug": False, - "base_url": "/", - } + "hash_urls": False, + } == response.json + + +def test_page_size_matching_max_returned_rows( + app_client_returned_rows_matches_page_size +): + fetched = [] + path = "/fixtures/no_primary_key.json" + while path: + response = app_client_returned_rows_matches_page_size.get(path) + fetched.extend(response.json["rows"]) + assert len(response.json["rows"]) in (1, 50) + path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") + assert 201 == len(fetched) + + +@pytest.mark.parametrize( + "path,expected_facet_results", + [ + ( + "/fixtures/facetable.json?_facet=state&_facet=city_id", + { + "state": { + "name": "state", + "hideable": True, + "type": "column", + "toggle_url": "/fixtures/facetable.json?_facet=city_id", + "results": [ + { + "value": "CA", + "label": "CA", + "count": 10, + "toggle_url": "_facet=state&_facet=city_id&state=CA", + "selected": False, + }, + { + "value": "MI", + "label": "MI", + "count": 4, + "toggle_url": "_facet=state&_facet=city_id&state=MI", + "selected": False, + }, + { + "value": "MC", + "label": "MC", + "count": 1, + "toggle_url": "_facet=state&_facet=city_id&state=MC", + "selected": False, + }, + ], + "truncated": False, + }, + "city_id": { + "name": "city_id", + "hideable": True, + "type": "column", + "toggle_url": "/fixtures/facetable.json?_facet=state", + "results": [ + { + "value": 1, + "label": "San Francisco", + "count": 6, + "toggle_url": "_facet=state&_facet=city_id&city_id=1", + "selected": False, + }, + { + "value": 2, + "label": "Los Angeles", + "count": 4, + "toggle_url": "_facet=state&_facet=city_id&city_id=2", + "selected": False, + }, + { + "value": 3, + "label": "Detroit", + "count": 4, + "toggle_url": "_facet=state&_facet=city_id&city_id=3", + "selected": False, + }, + { + "value": 4, + "label": "Memnonia", + "count": 1, + "toggle_url": "_facet=state&_facet=city_id&city_id=4", + "selected": False, + }, + ], + "truncated": False, + }, + }, + ), + ( + "/fixtures/facetable.json?_facet=state&_facet=city_id&state=MI", + { + "state": { + "name": "state", + "hideable": True, + "type": "column", + "toggle_url": "/fixtures/facetable.json?_facet=city_id&state=MI", + "results": [ + { + "value": "MI", + "label": "MI", + "count": 4, + "selected": True, + "toggle_url": "_facet=state&_facet=city_id", + } + ], + "truncated": False, + }, + "city_id": { + "name": "city_id", + "hideable": True, + "type": "column", + "toggle_url": "/fixtures/facetable.json?_facet=state&state=MI", + "results": [ + { + "value": 3, + "label": "Detroit", + "count": 4, + "selected": False, + "toggle_url": "_facet=state&_facet=city_id&state=MI&city_id=3", + } + ], + "truncated": False, + }, + }, + ), + ( + "/fixtures/facetable.json?_facet=planet_int", + { + "planet_int": { + "name": "planet_int", + "hideable": True, + "type": "column", + "toggle_url": "/fixtures/facetable.json", + "results": [ + { + "value": 1, + "label": 1, + "count": 14, + "selected": False, + "toggle_url": "_facet=planet_int&planet_int=1", + }, + { + "value": 2, + "label": 2, + "count": 1, + "selected": False, + "toggle_url": "_facet=planet_int&planet_int=2", + }, + ], + "truncated": False, + } + }, + ), + ( + # planet_int is an integer field: + "/fixtures/facetable.json?_facet=planet_int&planet_int=1", + { + "planet_int": { + "name": "planet_int", + "hideable": True, + "type": "column", + "toggle_url": "/fixtures/facetable.json?planet_int=1", + "results": [ + { + "value": 1, + "label": 1, + "count": 14, + "selected": True, + "toggle_url": "_facet=planet_int", + } + ], + "truncated": False, + } + }, + ), + ], +) +def test_facets(app_client, path, expected_facet_results): + response = app_client.get(path) + facet_results = response.json["facet_results"] + # We only compare the querystring portion of the taggle_url + for facet_name, facet_info in facet_results.items(): + assert facet_name == facet_info["name"] + assert False is facet_info["truncated"] + for facet_value in facet_info["results"]: + facet_value["toggle_url"] = facet_value["toggle_url"].split("?")[1] + assert expected_facet_results == facet_results + + +def test_suggested_facets(app_client): + suggestions = [ + { + "name": suggestion["name"], + "querystring": suggestion["toggle_url"].split("?")[-1], + } + for suggestion in app_client.get("/fixtures/facetable.json").json[ + "suggested_facets" + ] + ] + expected = [ + {"name": "created", "querystring": "_facet=created"}, + {"name": "planet_int", "querystring": "_facet=planet_int"}, + {"name": "on_earth", "querystring": "_facet=on_earth"}, + {"name": "state", "querystring": "_facet=state"}, + {"name": "city_id", "querystring": "_facet=city_id"}, + {"name": "neighborhood", "querystring": "_facet=neighborhood"}, + {"name": "tags", "querystring": "_facet=tags"}, + {"name": "created", "querystring": "_facet_date=created"}, + ] + if detect_json1(): + expected.append({"name": "tags", "querystring": "_facet_array=tags"}) + assert expected == suggestions + + +def test_allow_facet_off(): + for client in make_app_client(config={"allow_facet": False}): + assert 400 == client.get("/fixtures/facetable.json?_facet=planet_int").status + # Should not suggest any facets either: + assert [] == client.get("/fixtures/facetable.json").json["suggested_facets"] + + +def test_suggest_facets_off(): + for client in make_app_client(config={"suggest_facets": False}): + # Now suggested_facets should be [] + assert [] == client.get("/fixtures/facetable.json").json["suggested_facets"] + + +def test_expand_labels(app_client): + response = app_client.get( + "/fixtures/facetable.json?_shape=object&_labels=1&_size=2" + "&neighborhood__contains=c" + ) + assert { + "2": { + "pk": 2, + "created": "2019-01-14 08:00:00", + "planet_int": 1, + "on_earth": 1, + "state": "CA", + "city_id": {"value": 1, "label": "San Francisco"}, + "neighborhood": "Dogpatch", + "tags": '["tag1", "tag3"]', + }, + "13": { + "pk": 13, + "created": "2019-01-17 08:00:00", + "planet_int": 1, + "on_earth": 1, + "state": "MI", + "city_id": {"value": 3, "label": "Detroit"}, + "neighborhood": "Corktown", + "tags": "[]", + }, + } == response.json + + +def test_expand_label(app_client): + response = app_client.get( + "/fixtures/foreign_key_references.json?_shape=object" + "&_label=foreign_key_with_label" + ) + assert { + "1": { + "pk": "1", + "foreign_key_with_label": {"value": "1", "label": "hello"}, + "foreign_key_with_no_label": "1", + } + } == response.json + + +@pytest.mark.parametrize( + "path,expected_cache_control", + [ + ("/fixtures/facetable.json", "max-age=5"), + ("/fixtures/facetable.json?_ttl=invalid", "max-age=5"), + ("/fixtures/facetable.json?_ttl=10", "max-age=10"), + ("/fixtures/facetable.json?_ttl=0", "no-cache"), + ], +) +def test_ttl_parameter(app_client, path, expected_cache_control): + response = app_client.get(path) + assert expected_cache_control == response.headers["Cache-Control"] + + +@pytest.mark.parametrize( + "path,expected_redirect", + [ + ("/fixtures/facetable.json?_hash=1", "/fixtures-HASH/facetable.json"), + ( + "/fixtures/facetable.json?city_id=1&_hash=1", + "/fixtures-HASH/facetable.json?city_id=1", + ), + ], +) +def test_hash_parameter( + app_client_two_attached_databases_one_immutable, path, expected_redirect +): + # First get the current hash for the fixtures database + current_hash = app_client_two_attached_databases_one_immutable.ds.databases[ + "fixtures" + ].hash[:7] + response = app_client_two_attached_databases_one_immutable.get( + path, allow_redirects=False + ) + assert response.status == 302 + location = response.headers["Location"] + assert expected_redirect.replace("HASH", current_hash) == location + + +def test_hash_parameter_ignored_for_mutable_databases(app_client): + path = "/fixtures/facetable.json?_hash=1" + response = app_client.get(path, allow_redirects=False) + assert response.status == 200 test_json_columns_default_expected = [ @@ -903,7 +1590,6 @@ test_json_columns_default_expected = [ ] -@pytest.mark.asyncio @pytest.mark.parametrize( "extra_args,expected", [ @@ -917,329 +1603,86 @@ test_json_columns_default_expected = [ ), ], ) -async def test_json_columns(ds_client, extra_args, expected): +def test_json_columns(app_client, extra_args, expected): sql = """ select 1 as intval, "s" as strval, 0.5 as floatval, '{"foo": "bar"}' as jsonval """ - path = "/fixtures/-/query.json?" + urllib.parse.urlencode( - {"sql": sql, "_shape": "array"} - ) + path = "/fixtures.json?" + urllib.parse.urlencode({"sql": sql, "_shape": "array"}) path += extra_args - response = await ds_client.get( - path, - ) - assert response.json() == expected + response = app_client.get(path) + assert expected == response.json def test_config_cache_size(app_client_larger_cache_size): response = app_client_larger_cache_size.get("/fixtures/pragma_cache_size.json") - assert response.json["rows"] == [{"cache_size": -2500}] + assert [[-2500]] == response.json["rows"] def test_config_force_https_urls(): - with make_app_client(settings={"force_https_urls": True}) as client: - response = client.get( - "/fixtures/facetable.json?_size=3&_facet=state&_extra=next_url,suggested_facets" - ) + for client in make_app_client(config={"force_https_urls": True}): + response = client.get("/fixtures/facetable.json?_size=3&_facet=state") assert response.json["next_url"].startswith("https://") - assert response.json["facet_results"]["results"]["state"]["results"][0][ + assert response.json["facet_results"]["state"]["results"][0][ "toggle_url" ].startswith("https://") assert response.json["suggested_facets"][0]["toggle_url"].startswith("https://") - # Also confirm that request.url and request.scheme are set correctly - response = client.get("/") - assert client.ds._last_request.url.startswith("https://") - assert client.ds._last_request.scheme == "https" + + +def test_infinity_returned_as_null(app_client): + response = app_client.get("/fixtures/infinity.json?_shape=array") + assert [ + {"rowid": 1, "value": None}, + {"rowid": 2, "value": None}, + {"rowid": 3, "value": 1.5}, + ] == response.json + + +def test_infinity_returned_as_invalid_json_if_requested(app_client): + response = app_client.get("/fixtures/infinity.json?_shape=array&_json_infinity=1") + assert [ + {"rowid": 1, "value": float("inf")}, + {"rowid": 2, "value": float("-inf")}, + {"rowid": 3, "value": 1.5}, + ] == response.json + + +def test_custom_query_with_unicode_characters(app_client): + response = app_client.get("/fixtures/𝐜𝐢𝐭𝐢𝐞𝐬.json?_shape=array") + assert [{"id": 1, "name": "San Francisco"}] == response.json + + +def test_trace(app_client): + response = app_client.get("/fixtures/simple_primary_key.json?_trace=1") + data = response.json + assert "_trace" in data + trace_info = data["_trace"] + assert isinstance(trace_info["request_duration_ms"], float) + assert isinstance(trace_info["sum_trace_duration_ms"], float) + assert isinstance(trace_info["num_traces"], int) + assert isinstance(trace_info["traces"], list) + assert len(trace_info["traces"]) == trace_info["num_traces"] + for trace in trace_info["traces"]: + assert isinstance(trace["type"], str) + assert isinstance(trace["start"], float) + assert isinstance(trace["end"], float) + assert trace["duration_ms"] == (trace["end"] - trace["start"]) * 1000 + assert isinstance(trace["traceback"], list) + assert isinstance(trace["database"], str) + assert isinstance(trace["sql"], str) + assert isinstance(trace["params"], (list, dict)) @pytest.mark.parametrize( "path,status_code", [ - ("/fixtures.db", 200), ("/fixtures.json", 200), ("/fixtures/no_primary_key.json", 200), # A 400 invalid SQL query should still have the header: - ("/fixtures/-/query.json?sql=select+blah", 400), - # Write APIs - ("/fixtures/-/create", 405), - ("/fixtures/facetable/-/insert", 405), - ("/fixtures/facetable/-/drop", 405), + ("/fixtures.json?sql=select+blah", 400), ], ) -def test_cors( - app_client_with_cors, - app_client_two_attached_databases_one_immutable, - path, - status_code, -): - response = app_client_with_cors.get( - path, - ) +def test_cors(app_client_with_cors, path, status_code): + response = app_client_with_cors.get(path) assert response.status == status_code - assert response.headers["Access-Control-Allow-Origin"] == "*" - assert ( - response.headers["Access-Control-Allow-Headers"] - == "Authorization, Content-Type" - ) - assert response.headers["Access-Control-Expose-Headers"] == "Link" - assert ( - response.headers["Access-Control-Allow-Methods"] == "GET, POST, HEAD, OPTIONS" - ) - assert response.headers["Access-Control-Max-Age"] == "3600" - # Same request to app_client_two_attached_databases_one_immutable - # should not have those headers - I'm using that fixture because - # regular app_client doesn't have immutable fixtures.db which means - # the test for /fixtures.db returns a 403 error - response = app_client_two_attached_databases_one_immutable.get( - path, - ) - assert response.status == status_code - assert "Access-Control-Allow-Origin" not in response.headers - assert "Access-Control-Allow-Headers" not in response.headers - assert "Access-Control-Expose-Headers" not in response.headers - assert "Access-Control-Allow-Methods" not in response.headers - assert "Access-Control-Max-Age" not in response.headers - - -@pytest.mark.parametrize( - "path", - ( - "/", - ".json", - "/searchable", - "/searchable.json", - "/searchable_view", - "/searchable_view.json", - ), -) -def test_database_with_space_in_name(app_client_two_attached_databases, path): - response = app_client_two_attached_databases.get( - "/extra~20database" + path, follow_redirects=True - ) - assert response.status == 200 - - -def test_common_prefix_database_names(app_client_conflicting_database_names): - # https://github.com/simonw/datasette/issues/597 - assert ["foo-bar", "foo", "fixtures"] == [ - d["name"] - for d in app_client_conflicting_database_names.get("/-/databases.json").json - ] - for db_name, path in (("foo", "/foo.json"), ("foo-bar", "/foo-bar.json")): - data = app_client_conflicting_database_names.get(path).json - assert db_name == data["database"] - - -def test_inspect_file_used_for_count(app_client_immutable_and_inspect_file): - response = app_client_immutable_and_inspect_file.get( - "/fixtures/sortable.json?_extra=count" - ) - assert response.json["count"] == 100 - - -@pytest.mark.asyncio -async def test_http_options_request(ds_client): - response = await ds_client.options("/fixtures") - assert response.status_code == 200 - assert response.text == "ok" - - -@pytest.mark.asyncio -async def test_db_path(app_client): - # Needs app_client because needs file based database - db = app_client.ds.get_database() - path = pathlib.Path(db.path) - - assert path.exists() - - datasette = Datasette([path]) - - # Previously this broke if path was a pathlib.Path: - await datasette.refresh_schemas() - - -@pytest.mark.asyncio -async def test_hidden_sqlite_stat1_table(): - ds = Datasette() - db = ds.add_memory_database("db") - await db.execute_write("create table normal (id integer primary key, name text)") - await db.execute_write("create index idx on normal (name)") - await db.execute_write("analyze") - data = (await ds.client.get("/db.json?_show_hidden=1")).json() - tables = [(t["name"], t["hidden"]) for t in data["tables"]] - assert tables in ( - [("normal", False), ("sqlite_stat1", True)], - [("normal", False), ("sqlite_stat1", True), ("sqlite_stat4", True)], - ) - - -@pytest.mark.asyncio -async def test_hide_tables_starting_with_underscore(): - ds = Datasette() - db = ds.add_memory_database("test_hide_tables_starting_with_underscore") - await db.execute_write("create table normal (id integer primary key, name text)") - await db.execute_write("create table _hidden (id integer primary key, name text)") - data = ( - await ds.client.get( - "/test_hide_tables_starting_with_underscore.json?_show_hidden=1" - ) - ).json() - tables = [(t["name"], t["hidden"]) for t in data["tables"]] - assert tables == [("normal", False), ("_hidden", True)] - - -@pytest.mark.asyncio -@pytest.mark.parametrize("db_name", ("foo", r"fo%o", "f~/c.d")) -async def test_tilde_encoded_database_names(db_name): - ds = Datasette() - ds.add_memory_database(db_name) - response = await ds.client.get("/.json") - assert db_name in response.json()["databases"].keys() - path = response.json()["databases"][db_name]["path"] - # And the JSON for that database - response2 = await ds.client.get(path + ".json") - assert response2.status_code == 200 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "config,expected", - ( - ({}, {}), - ({"plugins": {"datasette-foo": "bar"}}, {"plugins": {"datasette-foo": "bar"}}), - # Test redaction - ( - { - "plugins": { - "datasette-auth": {"secret_key": "key"}, - "datasette-foo": "bar", - "datasette-auth2": {"password": "password"}, - "datasette-sentry": { - "dsn": "sentry:///foo", - }, - } - }, - { - "plugins": { - "datasette-auth": {"secret_key": "***"}, - "datasette-foo": "bar", - "datasette-auth2": {"password": "***"}, - "datasette-sentry": {"dsn": "***"}, - } - }, - ), - ), -) -async def test_config_json(config, expected): - "/-/config.json should return redacted configuration" - ds = Datasette(config=config) - response = await ds.client.get("/-/config.json") - assert response.json() == expected - - -@pytest.mark.asyncio -@pytest.mark.skip(reason="rm?") -@pytest.mark.parametrize( - "metadata,expected_config,expected_metadata", - ( - ({}, {}, {}), - ( - # Metadata input - { - "title": "Datasette Fixtures", - "databases": { - "fixtures": { - "tables": { - "sortable": { - "sortable_columns": [ - "sortable", - "sortable_with_nulls", - "sortable_with_nulls_2", - "text", - ], - }, - "no_primary_key": {"sortable_columns": [], "hidden": True}, - "primary_key_multiple_columns_explicit_label": { - "label_column": "content2" - }, - "simple_view": {"sortable_columns": ["content"]}, - "searchable_view_configured_by_metadata": { - "fts_table": "searchable_fts", - "fts_pk": "pk", - }, - "roadside_attractions": { - "columns": { - "name": "The name of the attraction", - "address": "The street address for the attraction", - } - }, - "attraction_characteristic": {"sort_desc": "pk"}, - "facet_cities": {"sort": "name"}, - "paginated_view": {"size": 25}, - }, - } - }, - }, - # Should produce a config with just the table configuration keys - { - "databases": { - "fixtures": { - "tables": { - "sortable": { - "sortable_columns": [ - "sortable", - "sortable_with_nulls", - "sortable_with_nulls_2", - "text", - ] - }, - # These one get redacted: - "no_primary_key": "***", - "primary_key_multiple_columns_explicit_label": "***", - "simple_view": {"sortable_columns": ["content"]}, - "searchable_view_configured_by_metadata": { - "fts_table": "searchable_fts", - "fts_pk": "pk", - }, - "attraction_characteristic": {"sort_desc": "pk"}, - "facet_cities": {"sort": "name"}, - "paginated_view": {"size": 25}, - } - } - } - }, - # And metadata with everything else - { - "title": "Datasette Fixtures", - "databases": { - "fixtures": { - "tables": { - "roadside_attractions": { - "columns": { - "name": "The name of the attraction", - "address": "The street address for the attraction", - } - }, - } - } - }, - }, - ), - ), -) -async def test_upgrade_metadata(metadata, expected_config, expected_metadata): - ds = Datasette(metadata=metadata) - response = await ds.client.get("/-/config.json") - assert response.json() == expected_config - response2 = await ds.client.get("/-/metadata.json") - assert response2.json() == expected_metadata - - -class Either: - def __init__(self, a, b): - self.a = a - self.b = b - - def __eq__(self, other): - return other == self.a or other == self.b + assert "*" == response.headers["Access-Control-Allow-Origin"] diff --git a/tests/test_api_write.py b/tests/test_api_write.py deleted file mode 100644 index 3a76e655..00000000 --- a/tests/test_api_write.py +++ /dev/null @@ -1,1607 +0,0 @@ -from datasette.app import Datasette -from datasette.utils import sqlite3 -from .utils import last_event -import pytest -import time - - -@pytest.fixture -def ds_write(tmp_path_factory): - db_directory = tmp_path_factory.mktemp("dbs") - db_path = str(db_directory / "data.db") - db_path_immutable = str(db_directory / "immutable.db") - db1 = sqlite3.connect(str(db_path)) - db2 = sqlite3.connect(str(db_path_immutable)) - for db in (db1, db2): - db.execute("vacuum") - db.execute( - "create table docs (id integer primary key, title text, score float, age integer)" - ) - ds = Datasette([db_path], immutables=[db_path_immutable]) - ds.root_enabled = True - yield ds - db.close() - - -def write_token(ds, actor_id="root", permissions=None): - to_sign = {"a": actor_id, "token": "dstok", "t": int(time.time())} - if permissions: - to_sign["_r"] = {"a": permissions} - return "dstok_{}".format(ds.sign(to_sign, namespace="token")) - - -def _headers(token): - return { - "Authorization": "Bearer {}".format(token), - "Content-Type": "application/json", - } - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "content_type", - ( - "application/json", - "application/json; charset=utf-8", - ), -) -async def test_insert_row(ds_write, content_type): - token = write_token(ds_write) - response = await ds_write.client.post( - "/data/docs/-/insert", - json={"row": {"title": "Test", "score": 1.2, "age": 5}}, - headers={ - "Authorization": "Bearer {}".format(token), - "Content-Type": content_type, - }, - ) - expected_row = {"id": 1, "title": "Test", "score": 1.2, "age": 5} - assert response.status_code == 201 - assert response.json()["ok"] is True - assert response.json()["rows"] == [expected_row] - rows = (await ds_write.get_database("data").execute("select * from docs")).dicts() - assert rows[0] == expected_row - # Analytics event - event = last_event(ds_write) - assert event.name == "insert-rows" - assert event.num_rows == 1 - assert event.database == "data" - assert event.table == "docs" - assert not event.ignore - assert not event.replace - - -@pytest.mark.asyncio -async def test_insert_row_alter(ds_write): - token = write_token(ds_write) - response = await ds_write.client.post( - "/data/docs/-/insert", - json={ - "row": {"title": "Test", "score": 1.2, "age": 5, "extra": "extra"}, - "alter": True, - }, - headers=_headers(token), - ) - assert response.status_code == 201 - assert response.json()["ok"] is True - assert response.json()["rows"][0]["extra"] == "extra" - # Analytics event - event = last_event(ds_write) - assert event.name == "alter-table" - assert "extra" not in event.before_schema - assert "extra" in event.after_schema - - -@pytest.mark.asyncio -@pytest.mark.parametrize("return_rows", (True, False)) -async def test_insert_rows(ds_write, return_rows): - token = write_token(ds_write) - data = { - "rows": [ - {"title": "Test {}".format(i), "score": 1.0, "age": 5} for i in range(20) - ] - } - if return_rows: - data["return"] = True - response = await ds_write.client.post( - "/data/docs/-/insert", - json=data, - headers=_headers(token), - ) - assert response.status_code == 201 - - # Analytics event - event = last_event(ds_write) - assert event.name == "insert-rows" - assert event.num_rows == 20 - assert event.database == "data" - assert event.table == "docs" - assert not event.ignore - assert not event.replace - - actual_rows = ( - await ds_write.get_database("data").execute("select * from docs") - ).dicts() - assert len(actual_rows) == 20 - assert actual_rows == [ - {"id": i + 1, "title": "Test {}".format(i), "score": 1.0, "age": 5} - for i in range(20) - ] - assert response.json()["ok"] is True - if return_rows: - assert response.json()["rows"] == actual_rows - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,input,special_case,expected_status,expected_errors", - ( - ( - "/data2/docs/-/insert", - {}, - None, - 404, - ["Database not found"], - ), - ( - "/data/docs2/-/insert", - {}, - None, - 404, - ["Table not found"], - ), - ( - "/data/docs/-/insert", - {"rows": [{"title": "Test"} for i in range(10)]}, - "bad_token", - 403, - ["Permission denied"], - ), - ( - "/data/docs/-/insert", - {}, - "invalid_json", - 400, - [ - "Invalid JSON: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)" - ], - ), - ( - "/data/docs/-/insert", - {}, - "invalid_content_type", - 400, - ["Invalid content-type, must be application/json"], - ), - ( - "/data/docs/-/insert", - [], - None, - 400, - ["JSON must be a dictionary"], - ), - ( - "/data/docs/-/insert", - {"row": "blah"}, - None, - 400, - ['"row" must be a dictionary'], - ), - ( - "/data/docs/-/insert", - {"blah": "blah"}, - None, - 400, - ['JSON must have one or other of "row" or "rows"'], - ), - ( - "/data/docs/-/insert", - {"rows": "blah"}, - None, - 400, - ['"rows" must be a list'], - ), - ( - "/data/docs/-/insert", - {"rows": ["blah"]}, - None, - 400, - ['"rows" must be a list of dictionaries'], - ), - ( - "/data/docs/-/insert", - {"rows": [{"title": "Test"} for i in range(101)]}, - None, - 400, - ["Too many rows, maximum allowed is 100"], - ), - ( - "/data/docs/-/insert", - {"rows": [{"id": 1, "title": "Test"}, {"id": 2, "title": "Test"}]}, - "duplicate_id", - 400, - ["UNIQUE constraint failed: docs.id"], - ), - ( - "/data/docs/-/insert", - {"rows": [{"title": "Test"}], "ignore": True, "replace": True}, - None, - 400, - ['Cannot use "ignore" and "replace" at the same time'], - ), - ( - # Replace is not allowed if you don't have update-row - "/data/docs/-/insert", - {"rows": [{"title": "Test"}], "replace": True}, - "insert-but-not-update", - 403, - ['Permission denied: need update-row to use "replace"'], - ), - ( - "/data/docs/-/insert", - {"rows": [{"title": "Test"}], "invalid_param": True}, - None, - 400, - ['Invalid parameter: "invalid_param"'], - ), - ( - "/data/docs/-/insert", - {"rows": [{"title": "Test"}], "one": True, "two": True}, - None, - 400, - ['Invalid parameter: "one", "two"'], - ), - ( - "/immutable/docs/-/insert", - {"rows": [{"title": "Test"}]}, - None, - 403, - ["Database is immutable"], - ), - # Validate columns of each row - ( - "/data/docs/-/insert", - {"rows": [{"title": "Test", "bad": 1, "worse": 2} for i in range(2)]}, - None, - 400, - [ - "Row 0 has invalid columns: bad, worse", - "Row 1 has invalid columns: bad, worse", - ], - ), - ## UPSERT ERRORS: - ( - "/immutable/docs/-/upsert", - {"rows": [{"title": "Test"}]}, - None, - 403, - ["Database is immutable"], - ), - ( - "/data/badtable/-/upsert", - {"rows": [{"title": "Test"}]}, - None, - 404, - ["Table not found"], - ), - # missing primary key - ( - "/data/docs/-/upsert", - {"rows": [{"title": "Missing PK"}]}, - None, - 400, - ['Row 0 is missing primary key column(s): "id"'], - ), - # Upsert does not support ignore or replace - ( - "/data/docs/-/upsert", - {"rows": [{"id": 1, "title": "Bad"}], "ignore": True}, - None, - 400, - ["Upsert does not support ignore or replace"], - ), - # Upsert permissions - ( - "/data/docs/-/upsert", - {"rows": [{"id": 1, "title": "Disallowed"}]}, - "insert-but-not-update", - 403, - ["Permission denied: need both insert-row and update-row"], - ), - ( - "/data/docs/-/upsert", - {"rows": [{"id": 1, "title": "Disallowed"}]}, - "update-but-not-insert", - 403, - ["Permission denied: need both insert-row and update-row"], - ), - # Alter table forbidden without alter permission - ( - "/data/docs/-/upsert", - {"rows": [{"id": 1, "title": "One", "extra": "extra"}], "alter": True}, - "update-and-insert-but-no-alter", - 403, - ["Permission denied for alter-table"], - ), - ), -) -async def test_insert_or_upsert_row_errors( - ds_write, path, input, special_case, expected_status, expected_errors -): - token_permissions = [] - if special_case == "insert-but-not-update": - token_permissions = ["ir", "vi"] - if special_case == "update-but-not-insert": - token_permissions = ["ur", "vi"] - if special_case == "update-and-insert-but-no-alter": - token_permissions = ["ur", "ir"] - token = write_token(ds_write, permissions=token_permissions) - if special_case == "duplicate_id": - await ds_write.get_database("data").execute_write( - "insert into docs (id) values (1)" - ) - if special_case == "bad_token": - token += "bad" - kwargs = dict( - json=input, - headers={ - "Authorization": "Bearer {}".format(token), - "Content-Type": ( - "text/plain" - if special_case == "invalid_content_type" - else "application/json" - ), - }, - ) - - actor_response = ( - await ds_write.client.get("/-/actor.json", headers=kwargs["headers"]) - ).json() - assert set((actor_response["actor"] or {}).get("_r", {}).get("a") or []) == set( - token_permissions - ) - - if special_case == "invalid_json": - del kwargs["json"] - kwargs["content"] = "{bad json" - before_count = ( - await ds_write.get_database("data").execute("select count(*) from docs") - ).rows[0][0] == 0 - response = await ds_write.client.post( - path, - **kwargs, - ) - assert response.status_code == expected_status - assert response.json()["ok"] is False - assert response.json()["errors"] == expected_errors - # Check that no rows were inserted - after_count = ( - await ds_write.get_database("data").execute("select count(*) from docs") - ).rows[0][0] == 0 - assert before_count == after_count - - -@pytest.mark.asyncio -@pytest.mark.parametrize("allowed", (True, False)) -async def test_upsert_permissions_per_table(ds_write, allowed): - # https://github.com/simonw/datasette/issues/2262 - token = "dstok_{}".format( - ds_write.sign( - { - "a": "root", - "token": "dstok", - "t": int(time.time()), - "_r": { - "r": { - "data": { - "docs" if allowed else "other": ["ir", "ur"], - } - } - }, - }, - namespace="token", - ) - ) - response = await ds_write.client.post( - "/data/docs/-/upsert", - json={"rows": [{"id": 1, "title": "One"}]}, - headers={ - "Authorization": "Bearer {}".format(token), - }, - ) - if allowed: - assert response.status_code == 200 - assert response.json()["ok"] is True - else: - assert response.status_code == 403 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "ignore,replace,expected_rows", - ( - ( - True, - False, - [ - {"id": 1, "title": "Exists", "score": None, "age": None}, - ], - ), - ( - False, - True, - [ - {"id": 1, "title": "One", "score": None, "age": None}, - ], - ), - ), -) -@pytest.mark.parametrize("should_return", (True, False)) -async def test_insert_ignore_replace( - ds_write, ignore, replace, expected_rows, should_return -): - await ds_write.get_database("data").execute_write( - "insert into docs (id, title) values (1, 'Exists')" - ) - token = write_token(ds_write) - data = {"rows": [{"id": 1, "title": "One"}]} - if ignore: - data["ignore"] = True - if replace: - data["replace"] = True - if should_return: - data["return"] = True - response = await ds_write.client.post( - "/data/docs/-/insert", - json=data, - headers=_headers(token), - ) - assert response.status_code == 201 - - # Analytics event - event = last_event(ds_write) - assert event.name == "insert-rows" - assert event.num_rows == 1 - assert event.database == "data" - assert event.table == "docs" - assert event.ignore == ignore - assert event.replace == replace - - actual_rows = ( - await ds_write.get_database("data").execute("select * from docs") - ).dicts() - - assert actual_rows == expected_rows - assert response.json()["ok"] is True - if should_return: - assert response.json()["rows"] == expected_rows - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "initial,input,expected_rows", - ( - ( - # Simple primary key update - {"rows": [{"id": 1, "title": "One"}], "pk": "id"}, - {"rows": [{"id": 1, "title": "Two"}]}, - [ - {"id": 1, "title": "Two"}, - ], - ), - ( - # Multiple rows update one of them - { - "rows": [{"id": 1, "title": "One"}, {"id": 2, "title": "Two"}], - "pk": "id", - }, - {"rows": [{"id": 1, "title": "Three"}]}, - [ - {"id": 1, "title": "Three"}, - {"id": 2, "title": "Two"}, - ], - ), - ( - # rowid update - {"rows": [{"title": "One"}]}, - {"rows": [{"rowid": 1, "title": "Two"}]}, - [ - {"rowid": 1, "title": "Two"}, - ], - ), - ( - # Compound primary key update - {"rows": [{"id": 1, "title": "One", "score": 1}], "pks": ["id", "score"]}, - {"rows": [{"id": 1, "title": "Two", "score": 1}]}, - [ - {"id": 1, "title": "Two", "score": 1}, - ], - ), - ( - # Upsert with an alter - {"rows": [{"id": 1, "title": "One"}], "pk": "id"}, - {"rows": [{"id": 1, "title": "Two", "extra": "extra"}], "alter": True}, - [{"id": 1, "title": "Two", "extra": "extra"}], - ), - ), -) -@pytest.mark.parametrize("should_return", (False, True)) -async def test_upsert(ds_write, initial, input, expected_rows, should_return): - token = write_token(ds_write) - # Insert initial data - initial["table"] = "upsert_test" - create_response = await ds_write.client.post( - "/data/-/create", - json=initial, - headers=_headers(token), - ) - assert create_response.status_code == 201 - if should_return: - input["return"] = True - response = await ds_write.client.post( - "/data/upsert_test/-/upsert", - json=input, - headers=_headers(token), - ) - assert response.status_code == 200, response.text - assert response.json()["ok"] is True - - # Analytics event - event = last_event(ds_write) - assert event.database == "data" - assert event.table == "upsert_test" - if input.get("alter"): - assert event.name == "alter-table" - assert "extra" in event.after_schema - else: - assert event.name == "upsert-rows" - assert event.num_rows == 1 - - if should_return: - # We only expect it to return rows corresponding to those we sent - expected_returned_rows = expected_rows[: len(input["rows"])] - assert response.json()["rows"] == expected_returned_rows - # Check the database too - actual_rows = ( - await ds_write.client.get("/data/upsert_test.json?_shape=array") - ).json() - assert actual_rows == expected_rows - # Drop the upsert_test table - await ds_write.get_database("data").execute_write("drop table upsert_test") - - -async def _insert_row(ds): - insert_response = await ds.client.post( - "/data/docs/-/insert", - json={"row": {"title": "Row one", "score": 1.2, "age": 5}, "return": True}, - headers=_headers(write_token(ds)), - ) - assert insert_response.status_code == 201 - return insert_response.json()["rows"][0]["id"] - - -@pytest.mark.asyncio -@pytest.mark.parametrize("scenario", ("no_token", "no_perm", "bad_table")) -async def test_delete_row_errors(ds_write, scenario): - if scenario == "no_token": - token = "bad_token" - elif scenario == "no_perm": - token = write_token(ds_write, actor_id="not-root") - else: - token = write_token(ds_write) - - pk = await _insert_row(ds_write) - - path = "/data/{}/{}/-/delete".format( - "docs" if scenario != "bad_table" else "bad_table", pk - ) - response = await ds_write.client.post( - path, - headers=_headers(token), - ) - assert response.status_code == 403 if scenario in ("no_token", "bad_token") else 404 - assert response.json()["ok"] is False - assert ( - response.json()["errors"] == ["Permission denied"] - if scenario == "no_token" - else ["Table not found"] - ) - assert len((await ds_write.client.get("/data/docs.json?_shape=array")).json()) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "table,row_for_create,pks,delete_path", - ( - ("rowid_table", {"name": "rowid row"}, None, None), - ("pk_table", {"id": 1, "name": "ID table"}, "id", "1"), - ( - "compound_pk_table", - {"type": "article", "key": "k"}, - ["type", "key"], - "article,k", - ), - ), -) -async def test_delete_row(ds_write, table, row_for_create, pks, delete_path): - # First create the table with that example row - create_data = { - "table": table, - "row": row_for_create, - } - if pks: - if isinstance(pks, str): - create_data["pk"] = pks - else: - create_data["pks"] = pks - create_response = await ds_write.client.post( - "/data/-/create", - json=create_data, - headers=_headers(write_token(ds_write)), - ) - assert create_response.status_code == 201, create_response.json() - # Should be a single row - assert ( - await ds_write.client.get( - "/data/-/query.json?_shape=arrayfirst&sql=select+count(*)+from+{}".format( - table - ) - ) - ).json() == [1] - # Now delete the row - if delete_path is None: - # Special case for that rowid table - delete_path = ( - await ds_write.client.get( - "/data/-/query.json?_shape=arrayfirst&sql=select+rowid+from+{}".format( - table - ) - ) - ).json()[0] - - delete_response = await ds_write.client.post( - "/data/{}/{}/-/delete".format(table, delete_path), - headers=_headers(write_token(ds_write)), - ) - assert delete_response.status_code == 200 - - # Analytics event - event = last_event(ds_write) - assert event.name == "delete-row" - assert event.database == "data" - assert event.table == table - assert event.pks == str(delete_path).split(",") - assert ( - await ds_write.client.get( - "/data/-/query.json?_shape=arrayfirst&sql=select+count(*)+from+{}".format( - table - ) - ) - ).json() == [0] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "scenario", ("no_token", "no_perm", "bad_table", "cannot_alter") -) -async def test_update_row_check_permission(ds_write, scenario): - if scenario == "no_token": - token = "bad_token" - elif scenario == "no_perm": - token = write_token(ds_write, actor_id="not-root") - elif scenario == "cannot_alter": - # update-row but no alter-table: - token = write_token(ds_write, permissions=["ur"]) - else: - token = write_token(ds_write) - - pk = await _insert_row(ds_write) - - path = "/data/{}/{}/-/update".format( - "docs" if scenario != "bad_table" else "bad_table", pk - ) - - json_body = {"update": {"title": "New title"}} - if scenario == "cannot_alter": - json_body["alter"] = True - - response = await ds_write.client.post( - path, - json=json_body, - headers=_headers(token), - ) - assert response.status_code == 403 if scenario in ("no_token", "bad_token") else 404 - assert response.json()["ok"] is False - assert ( - response.json()["errors"] == ["Permission denied"] - if scenario == "no_token" - else ["Table not found"] - ) - - -@pytest.mark.asyncio -async def test_update_row_invalid_key(ds_write): - token = write_token(ds_write) - - pk = await _insert_row(ds_write) - - path = "/data/docs/{}/-/update".format(pk) - response = await ds_write.client.post( - path, - json={"update": {"title": "New title"}, "bad_key": 1}, - headers=_headers(token), - ) - assert response.status_code == 400 - assert response.json() == {"ok": False, "errors": ["Invalid keys: bad_key"]} - - -@pytest.mark.asyncio -async def test_update_row_alter(ds_write): - token = write_token(ds_write, permissions=["ur", "at"]) - pk = await _insert_row(ds_write) - path = "/data/docs/{}/-/update".format(pk) - response = await ds_write.client.post( - path, - json={"update": {"title": "New title", "extra": "extra"}, "alter": True}, - headers=_headers(token), - ) - assert response.status_code == 200 - assert response.json() == {"ok": True} - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input,expected_errors", - ( - ({"title": "New title"}, None), - ({"title": None}, None), - ({"score": 1.6}, None), - ({"age": 10}, None), - ({"title": "New title", "score": 1.6}, None), - ({"title2": "New title"}, ["no such column: title2"]), - ), -) -@pytest.mark.parametrize("use_return", (True, False)) -async def test_update_row(ds_write, input, expected_errors, use_return): - token = write_token(ds_write) - pk = await _insert_row(ds_write) - - path = "/data/docs/{}/-/update".format(pk) - - data = {"update": input} - if use_return: - data["return"] = True - - response = await ds_write.client.post( - path, - json=data, - headers=_headers(token), - ) - if expected_errors: - assert response.status_code == 400 - assert response.json()["ok"] is False - assert response.json()["errors"] == expected_errors - return - - assert response.json()["ok"] is True - if not use_return: - assert "row" not in response.json() - else: - returned_row = response.json()["row"] - assert returned_row["id"] == pk - for k, v in input.items(): - assert returned_row[k] == v - - # Analytics event - event = last_event(ds_write) - assert event.actor == {"id": "root", "token": "dstok"} - assert event.database == "data" - assert event.table == "docs" - assert event.pks == [str(pk)] - - # And fetch the row to check it's updated - response = await ds_write.client.get( - "/data/docs/{}.json?_shape=array".format(pk), - ) - assert response.status_code == 200 - row = response.json()[0] - assert row["id"] == pk - for k, v in input.items(): - assert row[k] == v - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "scenario", ("no_token", "no_perm", "bad_table", "has_perm", "immutable") -) -async def test_drop_table(ds_write, scenario): - if scenario == "no_token": - token = "bad_token" - elif scenario == "no_perm": - token = write_token(ds_write, actor_id="not-root") - else: - token = write_token(ds_write) - should_work = scenario == "has_perm" - await ds_write.get_database("data").execute_write( - "insert into docs (id, title) values (1, 'Row 1')" - ) - path = "/{database}/{table}/-/drop".format( - database="immutable" if scenario == "immutable" else "data", - table="docs" if scenario != "bad_table" else "bad_table", - ) - response = await ds_write.client.post( - path, - headers=_headers(token), - ) - if not should_work: - assert ( - response.status_code == 403 - if scenario in ("no_token", "bad_token") - else 404 - ) - assert response.json()["ok"] is False - expected_error = "Permission denied" - if scenario == "bad_table": - expected_error = "Table not found" - elif scenario == "immutable": - expected_error = "Database is immutable" - assert response.json()["errors"] == [expected_error] - assert (await ds_write.client.get("/data/docs")).status_code == 200 - else: - # It should show a confirmation page - assert response.status_code == 200 - assert response.json() == { - "ok": True, - "database": "data", - "table": "docs", - "row_count": 1, - "message": 'Pass "confirm": true to confirm', - } - assert (await ds_write.client.get("/data/docs")).status_code == 200 - # Now send confirm: true - response2 = await ds_write.client.post( - path, - json={"confirm": True}, - headers=_headers(token), - ) - assert response2.json() == {"ok": True} - # Check event - event = last_event(ds_write) - assert event.name == "drop-table" - assert event.actor == {"id": "root", "token": "dstok"} - assert event.table == "docs" - assert event.database == "data" - # Table should 404 - assert (await ds_write.client.get("/data/docs")).status_code == 404 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input,expected_status,expected_response,expected_events", - ( - # Permission error with a bad token - ( - {"table": "bad", "row": {"id": 1}}, - 403, - {"ok": False, "errors": ["Permission denied"]}, - [], - ), - # Successful creation with columns: - ( - { - "table": "one", - "columns": [ - { - "name": "id", - "type": "integer", - }, - { - "name": "title", - "type": "text", - }, - { - "name": "score", - "type": "integer", - }, - { - "name": "weight", - "type": "float", - }, - { - "name": "thumbnail", - "type": "blob", - }, - ], - "pk": "id", - }, - 201, - { - "ok": True, - "database": "data", - "table": "one", - "table_url": "http://localhost/data/one", - "table_api_url": "http://localhost/data/one.json", - "schema": ( - "CREATE TABLE [one] (\n" - " [id] INTEGER PRIMARY KEY,\n" - " [title] TEXT,\n" - " [score] INTEGER,\n" - " [weight] FLOAT,\n" - " [thumbnail] BLOB\n" - ")" - ), - }, - ["create-table"], - ), - # Successful creation with rows: - ( - { - "table": "two", - "rows": [ - { - "id": 1, - "title": "Row 1", - "score": 1.5, - }, - { - "id": 2, - "title": "Row 2", - "score": 1.5, - }, - ], - "pk": "id", - }, - 201, - { - "ok": True, - "database": "data", - "table": "two", - "table_url": "http://localhost/data/two", - "table_api_url": "http://localhost/data/two.json", - "schema": ( - "CREATE TABLE [two] (\n" - " [id] INTEGER PRIMARY KEY,\n" - " [title] TEXT,\n" - " [score] FLOAT\n" - ")" - ), - "row_count": 2, - }, - ["create-table", "insert-rows"], - ), - # Successful creation with row: - ( - { - "table": "three", - "row": { - "id": 1, - "title": "Row 1", - "score": 1.5, - }, - "pk": "id", - }, - 201, - { - "ok": True, - "database": "data", - "table": "three", - "table_url": "http://localhost/data/three", - "table_api_url": "http://localhost/data/three.json", - "schema": ( - "CREATE TABLE [three] (\n" - " [id] INTEGER PRIMARY KEY,\n" - " [title] TEXT,\n" - " [score] FLOAT\n" - ")" - ), - "row_count": 1, - }, - ["create-table", "insert-rows"], - ), - # Create with row and no primary key - ( - { - "table": "four", - "row": { - "name": "Row 1", - }, - }, - 201, - { - "ok": True, - "database": "data", - "table": "four", - "table_url": "http://localhost/data/four", - "table_api_url": "http://localhost/data/four.json", - "schema": ("CREATE TABLE [four] (\n" " [name] TEXT\n" ")"), - "row_count": 1, - }, - ["create-table", "insert-rows"], - ), - # Create table with compound primary key - ( - { - "table": "five", - "row": {"type": "article", "key": 123, "title": "Article 1"}, - "pks": ["type", "key"], - }, - 201, - { - "ok": True, - "database": "data", - "table": "five", - "table_url": "http://localhost/data/five", - "table_api_url": "http://localhost/data/five.json", - "schema": ( - "CREATE TABLE [five] (\n [type] TEXT,\n [key] INTEGER,\n" - " [title] TEXT,\n PRIMARY KEY ([type], [key])\n)" - ), - "row_count": 1, - }, - ["create-table", "insert-rows"], - ), - # Error: Table is required - ( - { - "row": {"id": 1}, - }, - 400, - { - "ok": False, - "errors": ["Table is required"], - }, - [], - ), - # Error: Invalid table name - ( - { - "table": "sqlite_bad_name", - "row": {"id": 1}, - }, - 400, - { - "ok": False, - "errors": ["Invalid table name"], - }, - [], - ), - # Error: JSON must be an object - ( - [], - 400, - { - "ok": False, - "errors": ["JSON must be an object"], - }, - [], - ), - # Error: Cannot specify columns with rows or row - ( - { - "table": "bad", - "columns": [{"name": "id", "type": "integer"}], - "rows": [{"id": 1}], - }, - 400, - { - "ok": False, - "errors": ["Cannot specify columns with rows or row"], - }, - [], - ), - # Error: columns, rows or row is required - ( - { - "table": "bad", - }, - 400, - { - "ok": False, - "errors": ["columns, rows or row is required"], - }, - [], - ), - # Error: columns must be a list - ( - { - "table": "bad", - "columns": {"name": "id", "type": "integer"}, - }, - 400, - { - "ok": False, - "errors": ["columns must be a list"], - }, - [], - ), - # Error: columns must be a list of objects - ( - { - "table": "bad", - "columns": ["id"], - }, - 400, - { - "ok": False, - "errors": ["columns must be a list of objects"], - }, - [], - ), - # Error: Column name is required - ( - { - "table": "bad", - "columns": [{"type": "integer"}], - }, - 400, - { - "ok": False, - "errors": ["Column name is required"], - }, - [], - ), - # Error: Unsupported column type - ( - { - "table": "bad", - "columns": [{"name": "id", "type": "bad"}], - }, - 400, - { - "ok": False, - "errors": ["Unsupported column type: bad"], - }, - [], - ), - # Error: Duplicate column name - ( - { - "table": "bad", - "columns": [ - {"name": "id", "type": "integer"}, - {"name": "id", "type": "integer"}, - ], - }, - 400, - { - "ok": False, - "errors": ["Duplicate column name: id"], - }, - [], - ), - # Error: rows must be a list - ( - { - "table": "bad", - "rows": {"id": 1}, - }, - 400, - { - "ok": False, - "errors": ["rows must be a list"], - }, - [], - ), - # Error: rows must be a list of objects - ( - { - "table": "bad", - "rows": ["id"], - }, - 400, - { - "ok": False, - "errors": ["rows must be a list of objects"], - }, - [], - ), - # Error: pk must be a string - ( - { - "table": "bad", - "row": {"id": 1}, - "pk": 1, - }, - 400, - { - "ok": False, - "errors": ["pk must be a string"], - }, - [], - ), - # Error: Cannot specify both pk and pks - ( - { - "table": "bad", - "row": {"id": 1, "name": "Row 1"}, - "pk": "id", - "pks": ["id", "name"], - }, - 400, - { - "ok": False, - "errors": ["Cannot specify both pk and pks"], - }, - [], - ), - # Error: pks must be a list - ( - { - "table": "bad", - "row": {"id": 1, "name": "Row 1"}, - "pks": "id", - }, - 400, - { - "ok": False, - "errors": ["pks must be a list"], - }, - [], - ), - # Error: pks must be a list of strings - ( - {"table": "bad", "row": {"id": 1, "name": "Row 1"}, "pks": [1, 2]}, - 400, - {"ok": False, "errors": ["pks must be a list of strings"]}, - [], - ), - # Error: ignore and replace are mutually exclusive - ( - { - "table": "bad", - "row": {"id": 1, "name": "Row 1"}, - "pk": "id", - "ignore": True, - "replace": True, - }, - 400, - { - "ok": False, - "errors": ["ignore and replace are mutually exclusive"], - }, - [], - ), - # ignore and replace require row or rows - ( - { - "table": "bad", - "columns": [{"name": "id", "type": "integer"}], - "ignore": True, - }, - 400, - { - "ok": False, - "errors": ["ignore and replace require row or rows"], - }, - [], - ), - # ignore and replace require pk or pks - ( - { - "table": "bad", - "row": {"id": 1}, - "ignore": True, - }, - 400, - { - "ok": False, - "errors": ["ignore and replace require pk or pks"], - }, - [], - ), - ( - { - "table": "bad", - "row": {"id": 1}, - "replace": True, - }, - 400, - { - "ok": False, - "errors": ["ignore and replace require pk or pks"], - }, - [], - ), - ), -) -async def test_create_table( - ds_write, input, expected_status, expected_response, expected_events -): - ds_write._tracked_events = [] - # Special case for expected status of 403 - if expected_status == 403: - token = "bad_token" - else: - token = write_token(ds_write) - response = await ds_write.client.post( - "/data/-/create", - json=input, - headers=_headers(token), - ) - assert response.status_code == expected_status - data = response.json() - assert data == expected_response - # Should have tracked the expected events - events = ds_write._tracked_events - assert [e.name for e in events] == expected_events - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "permissions,body,expected_status,expected_errors", - ( - (["create-table"], {"table": "t", "columns": [{"name": "c"}]}, 201, None), - # Need insert-row too if you use "rows": - ( - ["create-table"], - {"table": "t", "rows": [{"name": "c"}]}, - 403, - ["Permission denied: need insert-row"], - ), - # This should work: - ( - ["create-table", "insert-row"], - {"table": "t", "rows": [{"name": "c"}]}, - 201, - None, - ), - # If you use replace: true you need update-row too: - ( - ["create-table", "insert-row"], - {"table": "t", "rows": [{"id": 1}], "pk": "id", "replace": True}, - 403, - ["Permission denied: need update-row"], - ), - ), -) -async def test_create_table_permissions( - ds_write, permissions, body, expected_status, expected_errors -): - token = ds_write.create_token("root", restrict_all=["view-instance"] + permissions) - response = await ds_write.client.post( - "/data/-/create", - json=body, - headers=_headers(token), - ) - assert response.status_code == expected_status - if expected_errors: - data = response.json() - assert data["ok"] is False - assert data["errors"] == expected_errors - - -@pytest.mark.asyncio -@pytest.mark.xfail(reason="Flaky, see https://github.com/simonw/datasette/issues/2356") -@pytest.mark.parametrize( - "input,expected_rows_after", - ( - ( - { - "table": "test_insert_replace", - "rows": [ - {"id": 1, "name": "Row 1 new"}, - {"id": 3, "name": "Row 3 new"}, - ], - "pk": "id", - "ignore": True, - }, - [ - {"id": 1, "name": "Row 1"}, - {"id": 2, "name": "Row 2"}, - {"id": 3, "name": "Row 3 new"}, - ], - ), - ( - { - "table": "test_insert_replace", - "rows": [ - {"id": 1, "name": "Row 1 new"}, - {"id": 3, "name": "Row 3 new"}, - ], - "pk": "id", - "replace": True, - }, - [ - {"id": 1, "name": "Row 1 new"}, - {"id": 2, "name": "Row 2"}, - {"id": 3, "name": "Row 3 new"}, - ], - ), - ), -) -async def test_create_table_ignore_replace(ds_write, input, expected_rows_after): - # Create table with two rows - token = write_token(ds_write) - first_response = await ds_write.client.post( - "/data/-/create", - json={ - "rows": [{"id": 1, "name": "Row 1"}, {"id": 2, "name": "Row 2"}], - "table": "test_insert_replace", - "pk": "id", - }, - headers=_headers(token), - ) - assert first_response.status_code == 201 - - ds_write._tracked_events = [] - - # Try a second time - second_response = await ds_write.client.post( - "/data/-/create", - json=input, - headers=_headers(token), - ) - assert second_response.status_code == 201 - # Check that the rows are as expected - rows = await ds_write.client.get("/data/test_insert_replace.json?_shape=array") - assert rows.json() == expected_rows_after - - # Check it fired the right events - event_names = [e.name for e in ds_write._tracked_events] - assert event_names == ["insert-rows"] - - -@pytest.mark.asyncio -async def test_create_table_error_if_pk_changed(ds_write): - token = write_token(ds_write) - first_response = await ds_write.client.post( - "/data/-/create", - json={ - "rows": [{"id": 1, "name": "Row 1"}, {"id": 2, "name": "Row 2"}], - "table": "test_insert_replace", - "pk": "id", - }, - headers=_headers(token), - ) - assert first_response.status_code == 201 - # Try a second time with a different pk - second_response = await ds_write.client.post( - "/data/-/create", - json={ - "rows": [{"id": 1, "name": "Row 1"}, {"id": 2, "name": "Row 2"}], - "table": "test_insert_replace", - "pk": "name", - "replace": True, - }, - headers=_headers(token), - ) - assert second_response.status_code == 400 - assert second_response.json() == { - "ok": False, - "errors": ["pk cannot be changed for existing table"], - } - - -@pytest.mark.asyncio -async def test_create_table_error_rows_twice_with_duplicates(ds_write): - # Error if you don't send ignore: True or replace: True - token = write_token(ds_write) - input = { - "rows": [{"id": 1, "name": "Row 1"}, {"id": 2, "name": "Row 2"}], - "table": "test_create_twice", - "pk": "id", - } - first_response = await ds_write.client.post( - "/data/-/create", - json=input, - headers=_headers(token), - ) - assert first_response.status_code == 201 - second_response = await ds_write.client.post( - "/data/-/create", - json=input, - headers=_headers(token), - ) - assert second_response.status_code == 400 - assert second_response.json() == { - "ok": False, - "errors": ["UNIQUE constraint failed: test_create_twice.id"], - } - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path", - ( - "/data/-/create", - "/data/docs/-/drop", - "/data/docs/-/insert", - ), -) -async def test_method_not_allowed(ds_write, path): - response = await ds_write.client.get( - path, - headers={ - "Content-Type": "application/json", - }, - ) - assert response.status_code == 405 - assert response.json() == { - "ok": False, - "error": "Method not allowed", - } - - -@pytest.mark.asyncio -async def test_create_uses_alter_by_default_for_new_table(ds_write): - ds_write._tracked_events = [] - token = write_token(ds_write) - response = await ds_write.client.post( - "/data/-/create", - json={ - "table": "new_table", - "rows": [ - { - "name": "Row 1", - } - ] - * 100 - + [ - {"name": "Row 2", "extra": "Extra"}, - ], - "pk": "id", - }, - headers=_headers(token), - ) - assert response.status_code == 201 - event_names = [e.name for e in ds_write._tracked_events] - assert event_names == ["create-table", "insert-rows"] - - -@pytest.mark.asyncio -@pytest.mark.parametrize("has_alter_permission", (True, False)) -async def test_create_using_alter_against_existing_table( - ds_write, has_alter_permission -): - token = write_token( - ds_write, permissions=["ir", "ct"] + (["at"] if has_alter_permission else []) - ) - # First create the table - response = await ds_write.client.post( - "/data/-/create", - json={ - "table": "new_table", - "rows": [ - { - "name": "Row 1", - } - ], - "pk": "id", - }, - headers=_headers(token), - ) - assert response.status_code == 201 - - ds_write._tracked_events = [] - # Now try to insert more rows using /-/create with alter=True - response2 = await ds_write.client.post( - "/data/-/create", - json={ - "table": "new_table", - "rows": [{"name": "Row 2", "extra": "extra"}], - "pk": "id", - "alter": True, - }, - headers=_headers(token), - ) - if not has_alter_permission: - assert response2.status_code == 403 - assert response2.json() == { - "ok": False, - "errors": ["Permission denied: need alter-table"], - } - else: - assert response2.status_code == 201 - - event_names = [e.name for e in ds_write._tracked_events] - assert event_names == ["alter-table", "insert-rows"] - - # It should have altered the table - alter_event = ds_write._tracked_events[0] - assert alter_event.name == "alter-table" - assert "extra" not in alter_event.before_schema - assert "extra" in alter_event.after_schema - - insert_rows_event = ds_write._tracked_events[1] - assert insert_rows_event.name == "insert-rows" - assert insert_rows_event.num_rows == 1 diff --git a/tests/test_auth.py b/tests/test_auth.py deleted file mode 100644 index 1e1cd622..00000000 --- a/tests/test_auth.py +++ /dev/null @@ -1,493 +0,0 @@ -from bs4 import BeautifulSoup as Soup -from .utils import cookie_was_deleted, last_event -from click.testing import CliRunner -from datasette.utils import baseconv -from datasette.cli import cli -from datasette.resources import ( - DatabaseResource, - TableResource, -) -import pytest -import time - - -@pytest.mark.asyncio -async def test_auth_token(ds_client): - """The /-/auth-token endpoint sets the correct cookie""" - assert ds_client.ds._root_token is not None - path = f"/-/auth-token?token={ds_client.ds._root_token}" - response = await ds_client.get(path) - assert response.status_code == 302 - assert "/" == response.headers["Location"] - assert {"a": {"id": "root"}} == ds_client.ds.unsign( - response.cookies["ds_actor"], "actor" - ) - # Should have recorded a login event - event = last_event(ds_client.ds) - assert event.name == "login" - assert event.actor == {"id": "root"} - # Check that a second with same token fails - assert ds_client.ds._root_token is None - assert (await ds_client.get(path)).status_code == 403 - # But attempting with same token while logged in as root should redirect to / - response = await ds_client.get( - path, cookies={"ds_actor": ds_client.actor_cookie({"id": "root"})} - ) - assert response.status_code == 302 - assert response.headers["Location"] == "/" - - -@pytest.mark.asyncio -async def test_actor_cookie(ds_client): - """A valid actor cookie sets request.scope['actor']""" - cookie = ds_client.actor_cookie({"id": "test"}) - await ds_client.get("/", cookies={"ds_actor": cookie}) - assert ds_client.ds._last_request.scope["actor"] == {"id": "test"} - - -@pytest.mark.asyncio -async def test_actor_cookie_invalid(ds_client): - cookie = ds_client.actor_cookie({"id": "test"}) - # Break the signature - await ds_client.get("/", cookies={"ds_actor": cookie[:-1] + "."}) - assert ds_client.ds._last_request.scope["actor"] is None - # Break the cookie format - cookie = ds_client.ds.sign({"b": {"id": "test"}}, "actor") - await ds_client.get("/", cookies={"ds_actor": cookie}) - assert ds_client.ds._last_request.scope["actor"] is None - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "offset,expected", - [ - ((24 * 60 * 60), {"id": "test"}), - (-(24 * 60 * 60), None), - ], -) -async def test_actor_cookie_that_expires(ds_client, offset, expected): - expires_at = int(time.time()) + offset - cookie = ds_client.ds.sign( - {"a": {"id": "test"}, "e": baseconv.base62.encode(expires_at)}, "actor" - ) - await ds_client.get("/", cookies={"ds_actor": cookie}) - assert ds_client.ds._last_request.scope["actor"] == expected - - -def test_logout(app_client): - # Keeping app_client for the moment because of csrftoken_from - response = app_client.get( - "/-/logout", cookies={"ds_actor": app_client.actor_cookie({"id": "test"})} - ) - assert 200 == response.status - assert "

    You are logged in as test

    " in response.text - # Actors without an id get full serialization - response2 = app_client.get( - "/-/logout", cookies={"ds_actor": app_client.actor_cookie({"name2": "bob"})} - ) - assert 200 == response2.status - assert ( - "

    You are logged in as {'name2': 'bob'}

    " - in response2.text - ) - # If logged out you get a redirect to / - response3 = app_client.get("/-/logout") - assert 302 == response3.status - # A POST to that page should log the user out - response4 = app_client.post( - "/-/logout", - csrftoken_from=True, - cookies={"ds_actor": app_client.actor_cookie({"id": "test"})}, - ) - # Should have recorded a logout event - event = last_event(app_client.ds) - assert event.name == "logout" - assert event.actor == {"id": "test"} - # The ds_actor cookie should have been unset - assert cookie_was_deleted(response4, "ds_actor") - # Should also have set a message - messages = app_client.ds.unsign(response4.cookies["ds_messages"], "messages") - assert [["You are now logged out", 2]] == messages - - -@pytest.mark.asyncio -@pytest.mark.parametrize("path", ["/", "/fixtures", "/fixtures/facetable"]) -async def test_logout_button_in_navigation(ds_client, path): - response = await ds_client.get( - path, cookies={"ds_actor": ds_client.actor_cookie({"id": "test"})} - ) - anon_response = await ds_client.get(path) - for fragment in ( - "test", - '', - ): - assert fragment in response.text - assert fragment not in anon_response.text - - -@pytest.mark.asyncio -@pytest.mark.parametrize("path", ["/", "/fixtures", "/fixtures/facetable"]) -async def test_no_logout_button_in_navigation_if_no_ds_actor_cookie(ds_client, path): - response = await ds_client.get(path + "?_bot=1") - assert "bot" in response.text - assert ( - '' - not in response.text - ) - - -@pytest.mark.parametrize( - "post_data,errors,expected_duration,expected_r", - ( - ({"expire_type": ""}, [], None, None), - ({"expire_type": "x"}, ["Invalid expire duration"], None, None), - ({"expire_type": "minutes"}, ["Invalid expire duration"], None, None), - ( - {"expire_type": "minutes", "expire_duration": "x"}, - ["Invalid expire duration"], - None, - None, - ), - ( - {"expire_type": "minutes", "expire_duration": "-1"}, - ["Invalid expire duration"], - None, - None, - ), - ( - {"expire_type": "minutes", "expire_duration": "0"}, - ["Invalid expire duration"], - None, - None, - ), - ({"expire_type": "minutes", "expire_duration": "10"}, [], 600, None), - ({"expire_type": "hours", "expire_duration": "10"}, [], 10 * 60 * 60, None), - ({"expire_type": "days", "expire_duration": "3"}, [], 60 * 60 * 24 * 3, None), - # Token restrictions - ({"all:view-instance": "on"}, [], None, {"a": ["vi"]}), - ({"database:fixtures:view-query": "on"}, [], None, {"d": {"fixtures": ["vq"]}}), - ( - {"resource:fixtures:facetable:insert-row": "on"}, - [], - None, - {"r": {"fixtures": {"facetable": ["ir"]}}}, - ), - ), -) -def test_auth_create_token( - app_client, post_data, errors, expected_duration, expected_r -): - assert app_client.get("/-/create-token").status == 403 - ds_actor = app_client.actor_cookie({"id": "test"}) - response = app_client.get("/-/create-token", cookies={"ds_actor": ds_actor}) - assert response.status == 200 - assert ">Create an API token<" in response.text - # Confirm some aspects of expected set of checkboxes - soup = Soup(response.text, "html.parser") - checkbox_names = {el["name"] for el in soup.select('input[type="checkbox"]')} - assert checkbox_names.issuperset( - { - "all:view-instance", - "all:view-query", - "database:fixtures:drop-table", - "resource:fixtures:foreign_key_references:insert-row", - } - ) - # Now try actually creating one - response2 = app_client.post( - "/-/create-token", - post_data, - csrftoken_from=True, - cookies={"ds_actor": ds_actor}, - ) - assert response2.status == 200 - if errors: - for error in errors: - assert '

    {}

    '.format(error) in response2.text - else: - # Check create-token event - event = last_event(app_client.ds) - assert event.name == "create-token" - assert event.expires_after == expected_duration - assert isinstance(event.restrict_all, list) - assert isinstance(event.restrict_database, dict) - assert isinstance(event.restrict_resource, dict) - # Extract token from page - token = response2.text.split('value="dstok_')[1].split('"')[0] - details = app_client.ds.unsign(token, "token") - if expected_r: - r = details.pop("_r") - assert r == expected_r - assert details.keys() == {"a", "t", "d"} or details.keys() == {"a", "t"} - assert details["a"] == "test" - if expected_duration is None: - assert "d" not in details - else: - assert details["d"] == expected_duration - # And test that token - response3 = app_client.get( - "/-/actor.json", - headers={"Authorization": "Bearer {}".format("dstok_{}".format(token))}, - ) - assert response3.status == 200 - assert response3.json["actor"]["id"] == "test" - - -@pytest.mark.asyncio -async def test_auth_create_token_not_allowed_for_tokens(ds_client): - ds_tok = ds_client.ds.sign({"a": "test", "token": "dstok"}, "token") - response = await ds_client.get( - "/-/create-token", - headers={"Authorization": "Bearer dstok_{}".format(ds_tok)}, - ) - assert response.status_code == 403 - - -@pytest.mark.asyncio -async def test_auth_create_token_not_allowed_if_allow_signed_tokens_off(ds_client): - ds_client.ds._settings["allow_signed_tokens"] = False - try: - ds_actor = ds_client.actor_cookie({"id": "test"}) - response = await ds_client.get( - "/-/create-token", cookies={"ds_actor": ds_actor} - ) - assert response.status_code == 403 - finally: - ds_client.ds._settings["allow_signed_tokens"] = True - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "scenario,should_work", - ( - ("allow_signed_tokens_off", False), - ("no_token", False), - ("no_timestamp", False), - ("invalid_token", False), - ("expired_token", False), - ("valid_unlimited_token", True), - ("valid_expiring_token", True), - ), -) -async def test_auth_with_dstok_token(ds_client, scenario, should_work): - token = None - _time = int(time.time()) - if scenario in ("valid_unlimited_token", "allow_signed_tokens_off"): - token = ds_client.ds.sign({"a": "test", "t": _time}, "token") - elif scenario == "valid_expiring_token": - token = ds_client.ds.sign({"a": "test", "t": _time - 50, "d": 1000}, "token") - elif scenario == "expired_token": - token = ds_client.ds.sign({"a": "test", "t": _time - 2000, "d": 1000}, "token") - elif scenario == "no_timestamp": - token = ds_client.ds.sign({"a": "test"}, "token") - elif scenario == "invalid_token": - token = "invalid" - if token: - token = "dstok_{}".format(token) - if scenario == "allow_signed_tokens_off": - ds_client.ds._settings["allow_signed_tokens"] = False - headers = {} - if token: - headers["Authorization"] = "Bearer {}".format(token) - response = await ds_client.get("/-/actor.json", headers=headers) - try: - if should_work: - data = response.json() - assert data.keys() == {"actor"} - actor = data["actor"] - expected_keys = {"id", "token"} - if scenario != "valid_unlimited_token": - expected_keys.add("token_expires") - assert actor.keys() == expected_keys - assert actor["id"] == "test" - assert actor["token"] == "dstok" - if scenario != "valid_unlimited_token": - assert isinstance(actor["token_expires"], int) - else: - assert response.json() == {"actor": None} - finally: - ds_client.ds._settings["allow_signed_tokens"] = True - - -@pytest.mark.parametrize("expires", (None, 1000, -1000)) -def test_cli_create_token(app_client, expires): - secret = app_client.ds._secret - runner = CliRunner() - args = ["create-token", "--secret", secret, "test"] - if expires: - args += ["--expires-after", str(expires)] - result = runner.invoke(cli, args) - assert result.exit_code == 0 - token = result.output.strip() - assert token.startswith("dstok_") - details = app_client.ds.unsign(token[len("dstok_") :], "token") - expected_keys = {"a", "t"} - if expires: - expected_keys.add("d") - assert details.keys() == expected_keys - assert details["a"] == "test" - response = app_client.get( - "/-/actor.json", headers={"Authorization": "Bearer {}".format(token)} - ) - if expires is None or expires > 0: - expected_actor = { - "id": "test", - "token": "dstok", - } - if expires and expires > 0: - expected_actor["token_expires"] = details["t"] + expires - assert response.json == {"actor": expected_actor} - else: - expected_actor = None - assert response.json == {"actor": expected_actor} - - -@pytest.mark.asyncio -async def test_root_with_root_enabled_gets_all_permissions(ds_client): - """Root user with root_enabled=True gets all permissions""" - # Ensure catalog tables are populated - await ds_client.ds.invoke_startup() - await ds_client.ds._refresh_schemas() - - # Set root_enabled to simulate --root flag - ds_client.ds.root_enabled = True - - root_actor = {"id": "root"} - - # Test instance-level permissions (no resource) - assert ( - await ds_client.ds.allowed(action="permissions-debug", actor=root_actor) is True - ) - assert await ds_client.ds.allowed(action="debug-menu", actor=root_actor) is True - - # Test view permissions using the new ds.allowed() method - assert await ds_client.ds.allowed(action="view-instance", actor=root_actor) is True - - assert ( - await ds_client.ds.allowed( - action="view-database", - resource=DatabaseResource("fixtures"), - actor=root_actor, - ) - is True - ) - - assert ( - await ds_client.ds.allowed( - action="view-table", - resource=TableResource("fixtures", "facetable"), - actor=root_actor, - ) - is True - ) - - # Test write permissions using ds.allowed() - assert ( - await ds_client.ds.allowed( - action="insert-row", - resource=TableResource("fixtures", "facetable"), - actor=root_actor, - ) - is True - ) - - assert ( - await ds_client.ds.allowed( - action="delete-row", - resource=TableResource("fixtures", "facetable"), - actor=root_actor, - ) - is True - ) - - assert ( - await ds_client.ds.allowed( - action="update-row", - resource=TableResource("fixtures", "facetable"), - actor=root_actor, - ) - is True - ) - - assert ( - await ds_client.ds.allowed( - action="create-table", - resource=DatabaseResource("fixtures"), - actor=root_actor, - ) - is True - ) - - assert ( - await ds_client.ds.allowed( - action="alter-table", - resource=TableResource("fixtures", "facetable"), - actor=root_actor, - ) - is True - ) - - assert ( - await ds_client.ds.allowed( - action="drop-table", - resource=TableResource("fixtures", "facetable"), - actor=root_actor, - ) - is True - ) - - -@pytest.mark.asyncio -async def test_root_without_root_enabled_no_special_permissions(ds_client): - """Root user without root_enabled doesn't get automatic permissions""" - # Ensure catalog tables are populated - await ds_client.ds.invoke_startup() - await ds_client.ds._refresh_schemas() - - # Ensure root_enabled is NOT set (or is False) - ds_client.ds.root_enabled = False - - root_actor = {"id": "root"} - - # Test permissions that normally require special access - # Without root_enabled, root should follow normal permission rules - - # View permissions should still work (default=True) - assert ( - await ds_client.ds.allowed(action="view-instance", actor=root_actor) is True - ) # Default permission - - assert ( - await ds_client.ds.allowed( - action="view-database", - resource=DatabaseResource("fixtures"), - actor=root_actor, - ) - is True - ) # Default permission - - # But restricted permissions should NOT automatically be granted - # Test with instance-level permission (no resource class) - result = await ds_client.ds.allowed(action="permissions-debug", actor=root_actor) - assert ( - result is not True - ), "Root without root_enabled should not automatically get permissions-debug" - - # Test with resource-based permissions using ds.allowed() - assert ( - await ds_client.ds.allowed( - action="create-table", - resource=DatabaseResource("fixtures"), - actor=root_actor, - ) - is not True - ), "Root without root_enabled should not automatically get create-table" - - assert ( - await ds_client.ds.allowed( - action="drop-table", - resource=TableResource("fixtures", "facetable"), - actor=root_actor, - ) - is not True - ), "Root without root_enabled should not automatically get drop-table" diff --git a/tests/test_base_view.py b/tests/test_base_view.py deleted file mode 100644 index 2cd4d601..00000000 --- a/tests/test_base_view.py +++ /dev/null @@ -1,84 +0,0 @@ -from datasette.views.base import View -from datasette import Request, Response -from datasette.app import Datasette -import json -import pytest - - -class GetView(View): - async def get(self, request, datasette): - return Response.json( - { - "absolute_url": datasette.absolute_url(request, "/"), - "request_path": request.path, - } - ) - - -class GetAndPostView(GetView): - async def post(self, request, datasette): - return Response.json( - { - "method": request.method, - "absolute_url": datasette.absolute_url(request, "/"), - "request_path": request.path, - } - ) - - -@pytest.mark.asyncio -async def test_get_view(): - v = GetView() - datasette = Datasette() - response = await v(Request.fake("/foo"), datasette) - assert json.loads(response.body) == { - "absolute_url": "http://localhost/", - "request_path": "/foo", - } - # Try a HEAD request - head_response = await v(Request.fake("/foo", method="HEAD"), datasette) - assert head_response.body == "" - assert head_response.status == 200 - # And OPTIONS - options_response = await v(Request.fake("/foo", method="OPTIONS"), datasette) - assert options_response.body == "ok" - assert options_response.status == 200 - assert options_response.headers["allow"] == "HEAD, GET" - # And POST - post_response = await v(Request.fake("/foo", method="POST"), datasette) - assert post_response.body == "Method not allowed" - assert post_response.status == 405 - # And POST with .json extension - post_json_response = await v(Request.fake("/foo.json", method="POST"), datasette) - assert json.loads(post_json_response.body) == { - "ok": False, - "error": "Method not allowed", - } - assert post_json_response.status == 405 - - -@pytest.mark.asyncio -async def test_post_view(): - v = GetAndPostView() - datasette = Datasette() - response = await v(Request.fake("/foo"), datasette) - assert json.loads(response.body) == { - "absolute_url": "http://localhost/", - "request_path": "/foo", - } - # Try a HEAD request - head_response = await v(Request.fake("/foo", method="HEAD"), datasette) - assert head_response.body == "" - assert head_response.status == 200 - # And OPTIONS - options_response = await v(Request.fake("/foo", method="OPTIONS"), datasette) - assert options_response.body == "ok" - assert options_response.status == 200 - assert options_response.headers["allow"] == "HEAD, GET, POST" - # And POST - post_response = await v(Request.fake("/foo", method="POST"), datasette) - assert json.loads(post_response.body) == { - "method": "POST", - "absolute_url": "http://localhost/", - "request_path": "/foo", - } diff --git a/tests/test_black.py b/tests/test_black.py new file mode 100644 index 00000000..68e2dcc0 --- /dev/null +++ b/tests/test_black.py @@ -0,0 +1,20 @@ +from click.testing import CliRunner +from pathlib import Path +import pytest +import sys + +code_root = Path(__file__).parent.parent + + +@pytest.mark.skipif( + sys.version_info[:2] < (3, 6), reason="Black requires Python 3.6 or later" +) +def test_black(): + # Do not import at top of module because Python 3.5 will not have it installed + import black + + runner = CliRunner() + result = runner.invoke( + black.main, [str(code_root / "tests"), str(code_root / "datasette"), "--check"] + ) + assert result.exit_code == 0, result.output diff --git a/tests/test_canned_queries.py b/tests/test_canned_queries.py deleted file mode 100644 index ed6202a4..00000000 --- a/tests/test_canned_queries.py +++ /dev/null @@ -1,457 +0,0 @@ -from bs4 import BeautifulSoup as Soup -import json -import pytest -import re -from .fixtures import make_app_client - - -@pytest.fixture -def canned_write_client(tmpdir): - template_dir = tmpdir / "canned_write_templates" - template_dir.mkdir() - (template_dir / "query-data-update_name.html").write_text( - """ - {% extends "query.html" %} - {% block content %}!!!CUSTOM_UPDATE_NAME_TEMPLATE!!!{{ super() }}{% endblock %} - """, - "utf-8", - ) - with make_app_client( - extra_databases={"data.db": "create table names (name text)"}, - template_dir=str(template_dir), - config={ - "databases": { - "data": { - "queries": { - "canned_read": {"sql": "select * from names"}, - "add_name": { - "sql": "insert into names (name) values (:name)", - "write": True, - "on_success_redirect": "/data/add_name?success", - }, - "add_name_specify_id": { - "sql": "insert into names (rowid, name) values (:rowid, :name)", - "on_success_message_sql": "select 'Name added: ' || :name || ' with rowid ' || :rowid", - "write": True, - "on_error_redirect": "/data/add_name_specify_id?error", - }, - "add_name_specify_id_with_error_in_on_success_message_sql": { - "sql": "insert into names (rowid, name) values (:rowid, :name)", - "on_success_message_sql": "select this is bad SQL", - "write": True, - }, - "delete_name": { - "sql": "delete from names where rowid = :rowid", - "write": True, - "on_success_message": "Name deleted", - "allow": {"id": "root"}, - }, - "update_name": { - "sql": "update names set name = :name where rowid = :rowid", - "params": ["rowid", "name", "extra"], - "write": True, - }, - } - } - } - }, - ) as client: - yield client - - -@pytest.fixture -def canned_write_immutable_client(): - with make_app_client( - is_immutable=True, - config={ - "databases": { - "fixtures": { - "queries": { - "add": { - "sql": "insert into sortable (text) values (:text)", - "write": True, - }, - } - } - } - }, - ) as client: - yield client - - -@pytest.mark.asyncio -async def test_canned_query_with_named_parameter(ds_client): - response = await ds_client.get( - "/fixtures/neighborhood_search.json?text=town&_shape=arrays" - ) - assert response.json()["rows"] == [ - ["Corktown", "Detroit", "MI"], - ["Downtown", "Los Angeles", "CA"], - ["Downtown", "Detroit", "MI"], - ["Greektown", "Detroit", "MI"], - ["Koreatown", "Los Angeles", "CA"], - ["Mexicantown", "Detroit", "MI"], - ] - - -def test_insert(canned_write_client): - response = canned_write_client.post( - "/data/add_name", - {"name": "Hello"}, - csrftoken_from=True, - cookies={"foo": "bar"}, - ) - messages = canned_write_client.ds.unsign( - response.cookies["ds_messages"], "messages" - ) - assert messages == [["Query executed, 1 row affected", 1]] - assert response.status == 302 - assert response.headers["Location"] == "/data/add_name?success" - - -@pytest.mark.parametrize( - "query_name,expect_csrf_hidden_field", - [ - ("canned_read", False), - ("add_name_specify_id", True), - ("add_name", True), - ], -) -def test_canned_query_form_csrf_hidden_field( - canned_write_client, query_name, expect_csrf_hidden_field -): - response = canned_write_client.get(f"/data/{query_name}") - html = response.text - fragment = '' in response.text - - -def test_vary_header(canned_write_client): - # These forms embed a csrftoken so they should be served with Vary: Cookie - assert "vary" not in canned_write_client.get("/data").headers - assert "Cookie" == canned_write_client.get("/data/update_name").headers["vary"] - - -def test_json_post_body(canned_write_client): - response = canned_write_client.post( - "/data/add_name", - body=json.dumps({"name": ["Hello", "there"]}), - ) - assert 302 == response.status - assert "/data/add_name?success" == response.headers["Location"] - rows = canned_write_client.get("/data/names.json?_shape=array").json - assert rows == [{"rowid": 1, "name": "['Hello', 'there']"}] - - -@pytest.mark.parametrize( - "headers,body,querystring", - ( - (None, "name=NameGoesHere", "?_json=1"), - ({"Accept": "application/json"}, "name=NameGoesHere", None), - (None, "name=NameGoesHere&_json=1", None), - (None, '{"name": "NameGoesHere", "_json": 1}', None), - ), -) -def test_json_response(canned_write_client, headers, body, querystring): - response = canned_write_client.post( - "/data/add_name" + (querystring or ""), - body=body, - headers=headers, - ) - assert 200 == response.status - assert response.headers["content-type"] == "application/json; charset=utf-8" - assert response.json == { - "ok": True, - "message": "Query executed, 1 row affected", - "redirect": "/data/add_name?success", - } - rows = canned_write_client.get("/data/names.json?_shape=array").json - assert rows == [{"rowid": 1, "name": "NameGoesHere"}] - - -def test_canned_query_permissions_on_database_page(canned_write_client): - # Without auth only shows three queries - query_names = { - q["name"] for q in canned_write_client.get("/data.json").json["queries"] - } - assert query_names == { - "add_name_specify_id_with_error_in_on_success_message_sql", - "from_hook", - "update_name", - "add_name_specify_id", - "from_async_hook", - "canned_read", - "add_name", - } - - # With auth shows four - response = canned_write_client.get( - "/data.json", - cookies={"ds_actor": canned_write_client.actor_cookie({"id": "root"})}, - ) - assert response.status == 200 - query_names_and_private = sorted( - [ - {"name": q["name"], "private": q["private"]} - for q in response.json["queries"] - ], - key=lambda q: q["name"], - ) - assert query_names_and_private == [ - {"name": "add_name", "private": False}, - {"name": "add_name_specify_id", "private": False}, - { - "name": "add_name_specify_id_with_error_in_on_success_message_sql", - "private": False, - }, - {"name": "canned_read", "private": False}, - {"name": "delete_name", "private": True}, - {"name": "from_async_hook", "private": False}, - {"name": "from_hook", "private": False}, - {"name": "update_name", "private": False}, - ] - - -def test_canned_query_permissions(canned_write_client): - assert 403 == canned_write_client.get("/data/delete_name").status - assert 200 == canned_write_client.get("/data/update_name").status - cookies = {"ds_actor": canned_write_client.actor_cookie({"id": "root"})} - assert 200 == canned_write_client.get("/data/delete_name", cookies=cookies).status - assert 200 == canned_write_client.get("/data/update_name", cookies=cookies).status - - -@pytest.fixture(scope="session") -def magic_parameters_client(): - with make_app_client( - extra_databases={"data.db": "create table logs (line text)"}, - config={ - "databases": { - "data": { - "queries": { - "runme_post": {"sql": "", "write": True}, - "runme_get": {"sql": ""}, - } - } - } - }, - ) as client: - yield client - - -@pytest.mark.parametrize( - "magic_parameter,expected_re", - [ - ("_actor_id", "root"), - ("_header_host", "localhost"), - ("_header_not_a_thing", ""), - ("_cookie_foo", "bar"), - ("_now_epoch", r"^\d+$"), - ("_now_date_utc", r"^\d{4}-\d{2}-\d{2}$"), - ("_now_datetime_utc", r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$"), - ("_random_chars_1", r"^\w$"), - ("_random_chars_10", r"^\w{10}$"), - ], -) -def test_magic_parameters(magic_parameters_client, magic_parameter, expected_re): - magic_parameters_client.ds.config["databases"]["data"]["queries"]["runme_post"][ - "sql" - ] = f"insert into logs (line) values (:{magic_parameter})" - magic_parameters_client.ds.config["databases"]["data"]["queries"]["runme_get"][ - "sql" - ] = f"select :{magic_parameter} as result" - cookies = { - "ds_actor": magic_parameters_client.actor_cookie({"id": "root"}), - "foo": "bar", - } - # Test the GET version - get_response = magic_parameters_client.get( - "/data/runme_get.json?_shape=array", cookies=cookies - ) - get_actual = get_response.json[0]["result"] - assert re.match(expected_re, str(get_actual)) - # Test the form - form_response = magic_parameters_client.get("/data/runme_post") - soup = Soup(form_response.body, "html.parser") - # The magic parameter should not be represented as a form field - assert None is soup.find("input", {"name": magic_parameter}) - # Submit the form to create a log line - response = magic_parameters_client.post( - "/data/runme_post?_json=1", {}, csrftoken_from=True, cookies=cookies - ) - assert response.json == { - "ok": True, - "message": "Query executed, 1 row affected", - "redirect": None, - } - post_actual = magic_parameters_client.get( - "/data/logs.json?_sort_desc=rowid&_shape=array" - ).json[0]["line"] - assert re.match(expected_re, post_actual) - - -@pytest.mark.parametrize("use_csrf", [True, False]) -@pytest.mark.parametrize("return_json", [True, False]) -def test_magic_parameters_csrf_json(magic_parameters_client, use_csrf, return_json): - magic_parameters_client.ds.config["databases"]["data"]["queries"]["runme_post"][ - "sql" - ] = "insert into logs (line) values (:_header_host)" - qs = "" - if return_json: - qs = "?_json=1" - response = magic_parameters_client.post( - f"/data/runme_post{qs}", - {}, - csrftoken_from=use_csrf or None, - ) - if return_json: - assert response.status == 200 - assert response.json["ok"], response.json - else: - assert response.status == 302 - messages = magic_parameters_client.ds.unsign( - response.cookies["ds_messages"], "messages" - ) - assert [["Query executed, 1 row affected", 1]] == messages - post_actual = magic_parameters_client.get( - "/data/logs.json?_sort_desc=rowid&_shape=array" - ).json[0]["line"] - assert post_actual == "localhost" - - -def test_magic_parameters_cannot_be_used_in_arbitrary_queries(magic_parameters_client): - response = magic_parameters_client.get( - "/data/-/query.json?sql=select+:_header_host&_shape=array" - ) - assert 400 == response.status - assert response.json["error"].startswith("You did not supply a value for binding") - - -def test_canned_write_custom_template(canned_write_client): - response = canned_write_client.get("/data/update_name") - assert response.status == 200 - assert "!!!CUSTOM_UPDATE_NAME_TEMPLATE!!!" in response.text - assert ( - "" - in response.text - ) - # And test for link rel=alternate while we're here: - assert ( - '' - in response.text - ) - assert ( - response.headers["link"] - == '; rel="alternate"; type="application/json+datasette"' - ) - - -def test_canned_write_query_disabled_for_immutable_database( - canned_write_immutable_client, -): - response = canned_write_immutable_client.get("/fixtures/add") - assert response.status == 200 - assert ( - "This query cannot be executed because the database is immutable." - in response.text - ) - assert '' in response.text - # Submitting form should get a forbidden error - response = canned_write_immutable_client.post( - "/fixtures/add", - {"text": "text"}, - csrftoken_from=True, - ) - assert response.status == 403 - assert "Database is immutable" in response.text diff --git a/tests/test_cli.py b/tests/test_cli.py index 21b86569..1dab4d1f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,29 +1,14 @@ -from .fixtures import ( - make_app_client, - TestClient as _TestClient, - EXPECTED_PLUGINS, -) -from datasette.app import SETTINGS -from datasette.plugins import DEFAULT_PLUGINS -from datasette.cli import cli, serve -from datasette.version import __version__ -from datasette.utils import tilde_encode -from datasette.utils.sqlite import sqlite3 +from .fixtures import app_client, make_app_client +from datasette.cli import cli from click.testing import CliRunner -import io -import json import pathlib -import pytest -import sys -import textwrap -from unittest import mock +import json def test_inspect_cli(app_client): runner = CliRunner() result = runner.invoke(cli, ["inspect", "fixtures.db"]) data = json.loads(result.output) - assert ["fixtures"] == list(data.keys()) database = data["fixtures"] assert "fixtures.db" == database["file"] assert isinstance(database["hash"], str) @@ -41,503 +26,22 @@ def test_inspect_cli_writes_to_file(app_client): cli, ["inspect", "fixtures.db", "--inspect-file", "foo.json"] ) assert 0 == result.exit_code, result.output - with open("foo.json") as fp: - data = json.load(fp) - assert ["fixtures"] == list(data.keys()) + data = json.load(open("foo.json")) + assert {"fixtures", "special"} == set(data.keys()) def test_serve_with_inspect_file_prepopulates_table_counts_cache(): inspect_data = {"fixtures": {"tables": {"hithere": {"count": 44}}}} - with make_app_client(inspect_data=inspect_data, is_immutable=True) as client: + for client in make_app_client(inspect_data=inspect_data, is_immutable=True): assert inspect_data == client.ds.inspect_data db = client.ds.databases["fixtures"] assert {"hithere": 44} == db.cached_table_counts -@pytest.mark.parametrize( - "spatialite_paths,should_suggest_load_extension", - ( - ([], False), - (["/tmp"], True), - ), -) -def test_spatialite_error_if_attempt_to_open_spatialite( - spatialite_paths, should_suggest_load_extension -): - with mock.patch("datasette.utils.SPATIALITE_PATHS", spatialite_paths): - runner = CliRunner() - result = runner.invoke( - cli, ["serve", str(pathlib.Path(__file__).parent / "spatialite.db")] - ) - assert result.exit_code != 0 - assert "It looks like you're trying to load a SpatiaLite" in result.output - suggestion = "--load-extension=spatialite" - if should_suggest_load_extension: - assert suggestion in result.output - else: - assert suggestion not in result.output - - -@mock.patch("datasette.utils.SPATIALITE_PATHS", ["/does/not/exist"]) -def test_spatialite_error_if_cannot_find_load_extension_spatialite(): +def test_spatialite_error_if_attempt_to_open_spatialite(): runner = CliRunner() result = runner.invoke( - cli, - [ - "serve", - str(pathlib.Path(__file__).parent / "spatialite.db"), - "--load-extension", - "spatialite", - ], + cli, ["serve", str(pathlib.Path(__file__).parent / "spatialite.db")] ) assert result.exit_code != 0 - assert "Could not find SpatiaLite extension" in result.output - - -def test_plugins_cli(app_client): - runner = CliRunner() - result1 = runner.invoke(cli, ["plugins"]) - actual_plugins = sorted( - [p for p in json.loads(result1.output) if p["name"] != "TrackEventPlugin"], - key=lambda p: p["name"], - ) - assert actual_plugins == EXPECTED_PLUGINS - # Try with --all - result2 = runner.invoke(cli, ["plugins", "--all"]) - names = [p["name"] for p in json.loads(result2.output)] - # Should have all the EXPECTED_PLUGINS - assert set(names).issuperset({p["name"] for p in EXPECTED_PLUGINS}) - # And the following too: - assert set(names).issuperset(DEFAULT_PLUGINS) - # --requirements should be empty because there are no installed non-plugins-dir plugins - result3 = runner.invoke(cli, ["plugins", "--requirements"]) - assert result3.output == "" - - -def test_metadata_yaml(): - yaml_file = io.StringIO( - textwrap.dedent( - """ - title: Hello from YAML - """ - ) - ) - # Annoyingly we have to provide all default arguments here: - ds = serve.callback( - [], - metadata=yaml_file, - immutable=[], - host="127.0.0.1", - port=8001, - uds=None, - reload=False, - cors=False, - sqlite_extensions=[], - inspect_file=None, - template_dir=None, - plugins_dir=None, - static=[], - memory=False, - config=[], - settings=[], - secret=None, - root=False, - default_deny=False, - token=None, - actor=None, - version_note=None, - get=None, - headers=False, - help_settings=False, - pdb=False, - crossdb=False, - nolock=False, - open_browser=False, - create=False, - ssl_keyfile=None, - ssl_certfile=None, - return_instance=True, - internal=None, - ) - client = _TestClient(ds) - response = client.get("/.json") - assert {"title": "Hello from YAML"} == response.json["metadata"] - - -@mock.patch("datasette.cli.run_module") -def test_install(run_module): - runner = CliRunner() - runner.invoke(cli, ["install", "datasette-mock-plugin", "datasette-mock-plugin2"]) - run_module.assert_called_once_with("pip", run_name="__main__") - assert sys.argv == [ - "pip", - "install", - "datasette-mock-plugin", - "datasette-mock-plugin2", - ] - - -@pytest.mark.parametrize("flag", ["-U", "--upgrade"]) -@mock.patch("datasette.cli.run_module") -def test_install_upgrade(run_module, flag): - runner = CliRunner() - runner.invoke(cli, ["install", flag, "datasette"]) - run_module.assert_called_once_with("pip", run_name="__main__") - assert sys.argv == ["pip", "install", "--upgrade", "datasette"] - - -@mock.patch("datasette.cli.run_module") -def test_install_requirements(run_module, tmpdir): - path = tmpdir.join("requirements.txt") - path.write("datasette-mock-plugin\ndatasette-plugin-2") - runner = CliRunner() - runner.invoke(cli, ["install", "-r", str(path)]) - run_module.assert_called_once_with("pip", run_name="__main__") - assert sys.argv == ["pip", "install", "-r", str(path)] - - -def test_install_error_if_no_packages(): - runner = CliRunner() - result = runner.invoke(cli, ["install"]) - assert result.exit_code == 2 - assert "Error: Please specify at least one package to install" in result.output - - -@mock.patch("datasette.cli.run_module") -def test_uninstall(run_module): - runner = CliRunner() - runner.invoke(cli, ["uninstall", "datasette-mock-plugin", "-y"]) - run_module.assert_called_once_with("pip", run_name="__main__") - assert sys.argv == ["pip", "uninstall", "datasette-mock-plugin", "-y"] - - -def test_version(): - runner = CliRunner() - result = runner.invoke(cli, ["--version"]) - assert result.output == f"cli, version {__version__}\n" - - -@pytest.mark.parametrize("invalid_port", ["-1", "0.5", "dog", "65536"]) -def test_serve_invalid_ports(invalid_port): - runner = CliRunner() - result = runner.invoke(cli, ["--port", invalid_port]) - assert result.exit_code == 2 - assert "Invalid value for '-p'" in result.stderr - - -@pytest.mark.parametrize( - "args", - ( - ["--setting", "default_page_size", "5"], - ["--setting", "settings.default_page_size", "5"], - ["-s", "settings.default_page_size", "5"], - ), -) -def test_setting(args): - runner = CliRunner() - result = runner.invoke(cli, ["--get", "/-/settings.json"] + args) - assert result.exit_code == 0, result.output - settings = json.loads(result.output) - assert settings["default_page_size"] == 5 - - -def test_setting_compatible_with_config(tmp_path): - # https://github.com/simonw/datasette/issues/2389 - runner = CliRunner() - config_path = tmp_path / "config.json" - config_path.write_text( - '{"settings": {"default_page_size": 5, "sql_time_limit_ms": 50}}', "utf-8" - ) - result = runner.invoke( - cli, - [ - "--get", - "/-/settings.json", - "--config", - str(config_path), - "--setting", - "default_page_size", - "10", - ], - ) - assert result.exit_code == 0, result.output - settings = json.loads(result.output) - assert settings["default_page_size"] == 10 - assert settings["sql_time_limit_ms"] == 50 - - -def test_plugin_s_overwrite(): - runner = CliRunner() - plugins_dir = str(pathlib.Path(__file__).parent / "plugins") - - result = runner.invoke( - cli, - [ - "--plugins-dir", - plugins_dir, - "--get", - "/_memory/-/query.json?sql=select+prepare_connection_args()", - ], - ) - assert result.exit_code == 0, result.output - assert ( - json.loads(result.output).get("rows")[0].get("prepare_connection_args()") - == 'database=_memory, datasette.plugin_config("name-of-plugin")=None' - ) - - result = runner.invoke( - cli, - [ - "--plugins-dir", - plugins_dir, - "--get", - "/_memory/-/query.json?sql=select+prepare_connection_args()", - "-s", - "plugins.name-of-plugin", - "OVERRIDE", - ], - ) - assert result.exit_code == 0, result.output - assert ( - json.loads(result.output).get("rows")[0].get("prepare_connection_args()") - == 'database=_memory, datasette.plugin_config("name-of-plugin")=OVERRIDE' - ) - - -def test_setting_type_validation(): - runner = CliRunner() - result = runner.invoke(cli, ["--setting", "default_page_size", "dog"]) - assert result.exit_code == 2 - assert '"settings.default_page_size" should be an integer' in result.output - - -def test_setting_boolean_validation_invalid(): - """Test that invalid boolean values are rejected""" - runner = CliRunner() - result = runner.invoke( - cli, ["--setting", "default_allow_sql", "invalid", "--get", "/-/settings.json"] - ) - assert result.exit_code == 2 - assert ( - '"settings.default_allow_sql" should be on/off/true/false/1/0' in result.output - ) - - -@pytest.mark.parametrize("value", ("off", "false", "0")) -def test_setting_boolean_validation_false_values(value): - """Test that 'off', 'false', '0' work for boolean settings""" - runner = CliRunner() - result = runner.invoke( - cli, - [ - "--setting", - "default_allow_sql", - value, - "--get", - "/_memory/-/query.json?sql=select+1", - ], - ) - # Should be forbidden (setting is false) - assert result.exit_code == 1, result.output - assert "Forbidden" in result.output - - -@pytest.mark.parametrize("value", ("on", "true", "1")) -def test_setting_boolean_validation_true_values(value): - """Test that 'on', 'true', '1' work for boolean settings""" - runner = CliRunner() - result = runner.invoke( - cli, - [ - "--setting", - "default_allow_sql", - value, - "--get", - "/_memory/-/query.json?sql=select+1&_shape=objects", - ], - ) - # Should succeed (setting is true) - assert result.exit_code == 0, result.output - assert json.loads(result.output)["rows"][0] == {"1": 1} - - -@pytest.mark.parametrize("default_allow_sql", (True, False)) -def test_setting_default_allow_sql(default_allow_sql): - runner = CliRunner() - result = runner.invoke( - cli, - [ - "--setting", - "default_allow_sql", - "on" if default_allow_sql else "off", - "--get", - "/_memory/-/query.json?sql=select+21&_shape=objects", - ], - ) - if default_allow_sql: - assert result.exit_code == 0, result.output - assert json.loads(result.output)["rows"][0] == {"21": 21} - else: - assert result.exit_code == 1, result.output - # This isn't JSON at the moment, maybe it should be though - assert "Forbidden" in result.output - - -def test_sql_errors_logged_to_stderr(): - runner = CliRunner() - result = runner.invoke(cli, ["--get", "/_memory/-/query.json?sql=select+blah"]) - assert result.exit_code == 1 - assert "sql = 'select blah', params = {}: no such column: blah\n" in result.stderr - - -def test_serve_create(tmpdir): - runner = CliRunner() - db_path = tmpdir / "does_not_exist_yet.db" - assert not db_path.exists() - result = runner.invoke( - cli, [str(db_path), "--create", "--get", "/-/databases.json"] - ) - assert result.exit_code == 0, result.output - databases = json.loads(result.output) - assert { - "name": "does_not_exist_yet", - "is_mutable": True, - "is_memory": False, - "hash": None, - }.items() <= databases[0].items() - assert db_path.exists() - - -@pytest.mark.parametrize("argument", ("-c", "--config")) -@pytest.mark.parametrize("format_", ("json", "yaml")) -def test_serve_config(tmpdir, argument, format_): - config_path = tmpdir / "datasette.{}".format(format_) - config_path.write_text( - ( - "settings:\n default_page_size: 5\n" - if format_ == "yaml" - else '{"settings": {"default_page_size": 5}}' - ), - "utf-8", - ) - runner = CliRunner() - result = runner.invoke( - cli, - [ - argument, - str(config_path), - "--get", - "/-/settings.json", - ], - ) - assert result.exit_code == 0, result.output - assert json.loads(result.output)["default_page_size"] == 5 - - -def test_serve_duplicate_database_names(tmpdir): - "'datasette db.db nested/db.db' should attach two databases, /db and /db_2" - runner = CliRunner() - db_1_path = str(tmpdir / "db.db") - nested = tmpdir / "nested" - nested.mkdir() - db_2_path = str(tmpdir / "nested" / "db.db") - for path in (db_1_path, db_2_path): - sqlite3.connect(path).execute("vacuum") - result = runner.invoke(cli, [db_1_path, db_2_path, "--get", "/-/databases.json"]) - assert result.exit_code == 0, result.output - databases = json.loads(result.output) - assert {db["name"] for db in databases} == {"db", "db_2"} - - -@pytest.mark.parametrize( - "filename", ["test-database (1).sqlite", "database (1).sqlite"] -) -def test_weird_database_names(tmpdir, filename): - # https://github.com/simonw/datasette/issues/1181 - runner = CliRunner() - db_path = str(tmpdir / filename) - sqlite3.connect(db_path).execute("vacuum") - result1 = runner.invoke(cli, [db_path, "--get", "/"]) - assert result1.exit_code == 0, result1.output - filename_no_stem = filename.rsplit(".", 1)[0] - expected_link = '{}'.format( - tilde_encode(filename_no_stem), filename_no_stem - ) - assert expected_link in result1.output - # Now try hitting that database page - result2 = runner.invoke( - cli, [db_path, "--get", "/{}".format(tilde_encode(filename_no_stem))] - ) - assert result2.exit_code == 0, result2.output - - -def test_help_settings(): - runner = CliRunner() - result = runner.invoke(cli, ["--help-settings"]) - for setting in SETTINGS: - assert setting.name in result.output - - -def test_internal_db(tmpdir): - runner = CliRunner() - internal_path = tmpdir / "internal.db" - assert not internal_path.exists() - result = runner.invoke( - cli, ["--memory", "--internal", str(internal_path), "--get", "/"] - ) - assert result.exit_code == 0 - assert internal_path.exists() - - -def test_duplicate_database_files_error(tmpdir): - """Test that passing the same database file multiple times raises an error""" - runner = CliRunner() - db_path = str(tmpdir / "test.db") - sqlite3.connect(db_path).execute("vacuum") - - # Test with exact duplicate - result = runner.invoke(cli, ["serve", db_path, db_path, "--get", "/"]) - assert result.exit_code == 1 - assert "Duplicate database file" in result.output - assert "both refer to" in result.output - - # Test with different paths to same file (relative vs absolute) - result2 = runner.invoke( - cli, ["serve", db_path, str(pathlib.Path(db_path).resolve()), "--get", "/"] - ) - assert result2.exit_code == 1 - assert "Duplicate database file" in result2.output - - # Test that a file in the config_dir can't also be passed explicitly - config_dir = tmpdir / "config" - config_dir.mkdir() - config_db_path = str(config_dir / "data.db") - sqlite3.connect(config_db_path).execute("vacuum") - - result3 = runner.invoke( - cli, ["serve", config_db_path, str(config_dir), "--get", "/"] - ) - assert result3.exit_code == 1 - assert "Duplicate database file" in result3.output - assert "both refer to" in result3.output - - # Test that mixing a file NOT in the directory with a directory works fine - other_db_path = str(tmpdir / "other.db") - sqlite3.connect(other_db_path).execute("vacuum") - - result4 = runner.invoke( - cli, ["serve", other_db_path, str(config_dir), "--get", "/-/databases.json"] - ) - assert result4.exit_code == 0 - databases = json.loads(result4.output) - assert {db["name"] for db in databases} == {"other", "data"} - - # Test that multiple directories raise an error - config_dir2 = tmpdir / "config2" - config_dir2.mkdir() - - result5 = runner.invoke( - cli, ["serve", str(config_dir), str(config_dir2), "--get", "/"] - ) - assert result5.exit_code == 1 - assert "Cannot pass multiple directories" in result5.output + assert "trying to load a SpatiaLite database" in result.output diff --git a/tests/test_cli_serve_get.py b/tests/test_cli_serve_get.py deleted file mode 100644 index 5ad01bfa..00000000 --- a/tests/test_cli_serve_get.py +++ /dev/null @@ -1,139 +0,0 @@ -from datasette.cli import cli -from datasette.plugins import pm -from click.testing import CliRunner -import textwrap -import json - - -def test_serve_with_get(tmp_path_factory): - plugins_dir = tmp_path_factory.mktemp("plugins_for_serve_with_get") - (plugins_dir / "init_for_serve_with_get.py").write_text( - textwrap.dedent( - """ - from datasette import hookimpl - - @hookimpl - def startup(datasette): - with open("{}", "w") as fp: - fp.write("hello") - """.format( - str(plugins_dir / "hello.txt") - ), - ), - "utf-8", - ) - runner = CliRunner() - result = runner.invoke( - cli, - [ - "serve", - "--memory", - "--plugins-dir", - str(plugins_dir), - "--get", - "/_memory/-/query.json?sql=select+sqlite_version()", - ], - ) - assert result.exit_code == 0, result.output - data = json.loads(result.output) - # Should have a single row with a single column - assert len(data["rows"]) == 1 - assert list(data["rows"][0].keys()) == ["sqlite_version()"] - assert set(data.keys()) == {"rows", "ok", "truncated"} - - # The plugin should have created hello.txt - assert (plugins_dir / "hello.txt").read_text() == "hello" - - # Annoyingly that new test plugin stays resident - we need - # to manually unregister it to avoid conflict with other tests - to_unregister = [ - p for p in pm.get_plugins() if p.__name__ == "init_for_serve_with_get.py" - ][0] - pm.unregister(to_unregister) - - -def test_serve_with_get_headers(): - runner = CliRunner() - result = runner.invoke( - cli, - [ - "serve", - "--memory", - "--get", - "/_memory/", - "--headers", - ], - ) - # exit_code is 1 because it wasn't a 200 response - assert result.exit_code == 1, result.output - lines = result.output.splitlines() - assert lines and lines[0] == "HTTP/1.1 302" - assert "location: /_memory" in lines - assert "content-type: text/html; charset=utf-8" in lines - - -def test_serve_with_get_and_token(): - runner = CliRunner() - result1 = runner.invoke( - cli, - [ - "create-token", - "--secret", - "sekrit", - "root", - ], - ) - token = result1.output.strip() - result2 = runner.invoke( - cli, - [ - "serve", - "--secret", - "sekrit", - "--get", - "/-/actor.json", - "--token", - token, - ], - ) - assert 0 == result2.exit_code, result2.output - assert json.loads(result2.output) == {"actor": {"id": "root", "token": "dstok"}} - - -def test_serve_with_get_exit_code_for_error(): - runner = CliRunner() - result = runner.invoke( - cli, - [ - "serve", - "--memory", - "--get", - "/this-is-404", - ], - catch_exceptions=False, - ) - assert result.exit_code == 1 - assert "404" in result.output - - -def test_serve_get_actor(): - runner = CliRunner() - result = runner.invoke( - cli, - [ - "serve", - "--memory", - "--get", - "/-/actor.json", - "--actor", - '{"id": "root", "extra": "x"}', - ], - catch_exceptions=False, - ) - assert result.exit_code == 0 - assert json.loads(result.output) == { - "actor": { - "id": "root", - "extra": "x", - } - } diff --git a/tests/test_cli_serve_server.py b/tests/test_cli_serve_server.py deleted file mode 100644 index 47f23c08..00000000 --- a/tests/test_cli_serve_server.py +++ /dev/null @@ -1,29 +0,0 @@ -import httpx -import pytest -import socket - - -@pytest.mark.serial -def test_serve_localhost_http(ds_localhost_http_server): - response = httpx.get("http://localhost:8041/_memory.json") - assert { - "database": "_memory", - "path": "/_memory", - "tables": [], - }.items() <= response.json().items() - - -@pytest.mark.serial -@pytest.mark.skipif( - not hasattr(socket, "AF_UNIX"), reason="Requires socket.AF_UNIX support" -) -def test_serve_unix_domain_socket(ds_unix_domain_socket_server): - _, uds = ds_unix_domain_socket_server - transport = httpx.HTTPTransport(uds=uds) - client = httpx.Client(transport=transport) - response = client.get("http://localhost/_memory.json") - assert { - "database": "_memory", - "path": "/_memory", - "tables": [], - }.items() <= response.json().items() diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py deleted file mode 100644 index 0598a4a6..00000000 --- a/tests/test_config_dir.py +++ /dev/null @@ -1,151 +0,0 @@ -import json -import pathlib -import pytest - -from datasette.app import Datasette -from datasette.utils.sqlite import sqlite3 -from datasette.utils import StartupError -from .fixtures import TestClient as _TestClient - -PLUGIN = """ -from datasette import hookimpl - -@hookimpl -def extra_template_vars(): - return { - "from_plugin": "hooray" - } -""" -METADATA = {"title": "This is from metadata"} -CONFIG = { - "settings": { - "default_cache_ttl": 60, - } -} -CSS = """ -body { margin-top: 3em} -""" - - -@pytest.fixture(scope="session") -def config_dir(tmp_path_factory): - config_dir = tmp_path_factory.mktemp("config-dir") - plugins_dir = config_dir / "plugins" - plugins_dir.mkdir() - (plugins_dir / "hooray.py").write_text(PLUGIN, "utf-8") - (plugins_dir / "non_py_file.txt").write_text(PLUGIN, "utf-8") - (plugins_dir / ".mypy_cache").mkdir() - - templates_dir = config_dir / "templates" - templates_dir.mkdir() - (templates_dir / "row.html").write_text( - "Show row here. Plugin says {{ from_plugin }}", "utf-8" - ) - - static_dir = config_dir / "static" - static_dir.mkdir() - (static_dir / "hello.css").write_text(CSS, "utf-8") - - (config_dir / "metadata.json").write_text(json.dumps(METADATA), "utf-8") - (config_dir / "datasette.json").write_text(json.dumps(CONFIG), "utf-8") - - for dbname in ("demo.db", "immutable.db", "j.sqlite3", "k.sqlite"): - db = sqlite3.connect(str(config_dir / dbname)) - db.executescript( - """ - CREATE TABLE cities ( - id integer primary key, - name text - ); - INSERT INTO cities (id, name) VALUES - (1, 'San Francisco') - ; - """ - ) - - # Mark "immutable.db" as immutable - (config_dir / "inspect-data.json").write_text( - json.dumps( - { - "immutable": { - "hash": "hash", - "size": 8192, - "file": "immutable.db", - "tables": {"cities": {"count": 1}}, - } - } - ), - "utf-8", - ) - return config_dir - - -def test_invalid_settings(config_dir): - previous = (config_dir / "datasette.json").read_text("utf-8") - (config_dir / "datasette.json").write_text( - json.dumps({"settings": {"invalid": "invalid-setting"}}), "utf-8" - ) - try: - with pytest.raises(StartupError) as ex: - ds = Datasette([], config_dir=config_dir) - assert ex.value.args[0] == "Invalid setting 'invalid' in config file" - finally: - (config_dir / "datasette.json").write_text(previous, "utf-8") - - -@pytest.fixture(scope="session") -def config_dir_client(config_dir): - ds = Datasette([], config_dir=config_dir) - yield _TestClient(ds) - - -def test_settings(config_dir_client): - response = config_dir_client.get("/-/settings.json") - assert 200 == response.status - assert 60 == response.json["default_cache_ttl"] - - -def test_plugins(config_dir_client): - response = config_dir_client.get("/-/plugins.json") - assert 200 == response.status - assert "hooray.py" in {p["name"] for p in response.json} - assert "non_py_file.txt" not in {p["name"] for p in response.json} - assert "mypy_cache" not in {p["name"] for p in response.json} - - -def test_templates_and_plugin(config_dir_client): - response = config_dir_client.get("/demo/cities/1") - assert 200 == response.status - assert "Show row here. Plugin says hooray" == response.text - - -def test_static(config_dir_client): - response = config_dir_client.get("/static/hello.css") - assert 200 == response.status - assert CSS == response.text - assert "text/css" == response.headers["content-type"] - - -def test_static_directory_browsing_not_allowed(config_dir_client): - response = config_dir_client.get("/static/") - assert 403 == response.status - assert "403: Directory listing is not allowed" == response.text - - -def test_databases(config_dir_client): - response = config_dir_client.get("/-/databases.json") - assert 200 == response.status - databases = response.json - assert 4 == len(databases) - databases.sort(key=lambda d: d["name"]) - for db, expected_name in zip(databases, ("demo", "immutable", "j", "k")): - assert expected_name == db["name"] - assert db["is_mutable"] == (expected_name != "immutable") - - -def test_store_config_dir(config_dir_client): - ds = config_dir_client.ds - - assert hasattr(ds, "config_dir") - assert ds.config_dir is not None - assert isinstance(ds.config_dir, pathlib.Path) diff --git a/tests/test_config_permission_rules.py b/tests/test_config_permission_rules.py deleted file mode 100644 index 8327ecbf..00000000 --- a/tests/test_config_permission_rules.py +++ /dev/null @@ -1,163 +0,0 @@ -import pytest - -from datasette.app import Datasette -from datasette.database import Database -from datasette.resources import DatabaseResource, TableResource - - -async def setup_datasette(config=None, databases=None): - ds = Datasette(memory=True, config=config) - for name in databases or []: - ds.add_database(Database(ds, memory_name=f"{name}_memory"), name=name) - await ds.invoke_startup() - await ds.refresh_schemas() - return ds - - -@pytest.mark.asyncio -async def test_root_permissions_allow(): - config = {"permissions": {"execute-sql": {"id": "alice"}}} - ds = await setup_datasette(config=config, databases=["content"]) - - assert await ds.allowed( - action="execute-sql", - resource=DatabaseResource(database="content"), - actor={"id": "alice"}, - ) - assert not await ds.allowed( - action="execute-sql", - resource=DatabaseResource(database="content"), - actor={"id": "bob"}, - ) - - -@pytest.mark.asyncio -async def test_database_permission(): - config = { - "databases": { - "content": { - "permissions": { - "insert-row": {"id": "alice"}, - } - } - } - } - ds = await setup_datasette(config=config, databases=["content"]) - - assert await ds.allowed( - action="insert-row", - resource=TableResource(database="content", table="repos"), - actor={"id": "alice"}, - ) - assert not await ds.allowed( - action="insert-row", - resource=TableResource(database="content", table="repos"), - actor={"id": "bob"}, - ) - - -@pytest.mark.asyncio -async def test_table_permission(): - config = { - "databases": { - "content": { - "tables": {"repos": {"permissions": {"delete-row": {"id": "alice"}}}} - } - } - } - ds = await setup_datasette(config=config, databases=["content"]) - - assert await ds.allowed( - action="delete-row", - resource=TableResource(database="content", table="repos"), - actor={"id": "alice"}, - ) - assert not await ds.allowed( - action="delete-row", - resource=TableResource(database="content", table="repos"), - actor={"id": "bob"}, - ) - - -@pytest.mark.asyncio -async def test_view_table_allow_block(): - config = { - "databases": {"content": {"tables": {"repos": {"allow": {"id": "alice"}}}}} - } - ds = await setup_datasette(config=config, databases=["content"]) - - assert await ds.allowed( - action="view-table", - resource=TableResource(database="content", table="repos"), - actor={"id": "alice"}, - ) - assert not await ds.allowed( - action="view-table", - resource=TableResource(database="content", table="repos"), - actor={"id": "bob"}, - ) - assert await ds.allowed( - action="view-table", - resource=TableResource(database="content", table="other"), - actor={"id": "bob"}, - ) - - -@pytest.mark.asyncio -async def test_view_table_allow_false_blocks(): - config = {"databases": {"content": {"tables": {"repos": {"allow": False}}}}} - ds = await setup_datasette(config=config, databases=["content"]) - - assert not await ds.allowed( - action="view-table", - resource=TableResource(database="content", table="repos"), - actor={"id": "alice"}, - ) - - -@pytest.mark.asyncio -async def test_allow_sql_blocks(): - config = {"allow_sql": {"id": "alice"}} - ds = await setup_datasette(config=config, databases=["content"]) - - assert await ds.allowed( - action="execute-sql", - resource=DatabaseResource(database="content"), - actor={"id": "alice"}, - ) - assert not await ds.allowed( - action="execute-sql", - resource=DatabaseResource(database="content"), - actor={"id": "bob"}, - ) - - config = {"databases": {"content": {"allow_sql": {"id": "bob"}}}} - ds = await setup_datasette(config=config, databases=["content"]) - - assert await ds.allowed( - action="execute-sql", - resource=DatabaseResource(database="content"), - actor={"id": "bob"}, - ) - assert not await ds.allowed( - action="execute-sql", - resource=DatabaseResource(database="content"), - actor={"id": "alice"}, - ) - - config = {"allow_sql": False} - ds = await setup_datasette(config=config, databases=["content"]) - assert not await ds.allowed( - action="execute-sql", - resource=DatabaseResource(database="content"), - actor={"id": "alice"}, - ) - - -@pytest.mark.asyncio -async def test_view_instance_allow_block(): - config = {"allow": {"id": "alice"}} - ds = await setup_datasette(config=config) - - assert await ds.allowed(action="view-instance", actor={"id": "alice"}) - assert not await ds.allowed(action="view-instance", actor={"id": "bob"}) diff --git a/tests/test_crossdb.py b/tests/test_crossdb.py deleted file mode 100644 index 1ec1a05c..00000000 --- a/tests/test_crossdb.py +++ /dev/null @@ -1,76 +0,0 @@ -from datasette.cli import cli -from click.testing import CliRunner -import urllib -import sqlite3 - - -def test_crossdb_join(app_client_two_attached_databases_crossdb_enabled): - app_client = app_client_two_attached_databases_crossdb_enabled - sql = """ - select - 'extra database' as db, - pk, - text1, - text2 - from - [extra database].searchable - union all - select - 'fixtures' as db, - pk, - text1, - text2 - from - fixtures.searchable - """ - response = app_client.get( - "/_memory/-/query.json?" - + urllib.parse.urlencode({"sql": sql, "_shape": "array"}) - ) - assert response.status == 200 - assert response.json == [ - {"db": "extra database", "pk": 1, "text1": "barry cat", "text2": "terry dog"}, - {"db": "extra database", "pk": 2, "text1": "terry dog", "text2": "sara weasel"}, - {"db": "fixtures", "pk": 1, "text1": "barry cat", "text2": "terry dog"}, - {"db": "fixtures", "pk": 2, "text1": "terry dog", "text2": "sara weasel"}, - ] - - -def test_crossdb_warning_if_too_many_databases(tmp_path_factory): - db_dir = tmp_path_factory.mktemp("dbs") - dbs = [] - for i in range(11): - path = str(db_dir / "db_{}.db".format(i)) - conn = sqlite3.connect(path) - conn.execute("vacuum") - dbs.append(path) - runner = CliRunner() - result = runner.invoke( - cli, - [ - "serve", - "--crossdb", - "--get", - "/", - ] - + dbs, - catch_exceptions=False, - ) - assert ( - "Warning: --crossdb only works with the first 10 attached databases" - in result.stderr - ) - - -def test_crossdb_attached_database_list_display( - app_client_two_attached_databases_crossdb_enabled, -): - app_client = app_client_two_attached_databases_crossdb_enabled - response = app_client.get("/_memory") - response2 = app_client.get("/") - for fragment in ( - "databases are attached to this connection", - "
  • fixtures - ", - '
  • extra database - " - - -def test_redirect(custom_pages_client): - response = custom_pages_client.get("/redirect") - assert response.status == 302 - assert response.headers["Location"] == "/example" - - -def test_redirect2(custom_pages_client): - response = custom_pages_client.get("/redirect2") - assert response.status == 301 - assert response.headers["Location"] == "/example" - - -@pytest.mark.parametrize( - "path,expected", - [ - ("/route_Sally", "

    Hello from Sally

    "), - ("/topic_python", "Topic page for python"), - ("/topic_python/info", "Slug: info, Topic: python"), - ], -) -def test_custom_route_pattern(custom_pages_client, path, expected): - response = custom_pages_client.get(path) - assert response.status == 200 - assert response.text.strip() == expected - - -def test_custom_route_pattern_404(custom_pages_client): - response = custom_pages_client.get("/route_OhNo") - assert response.status == 404 - assert "

    Error 404

    " in response.text - assert ">Oh no /dev/null ); do - if [ $waiting -eq 4 ]; then - echo "$server_pid does still exist, server failed to stop" - cleanup - exit 1 - fi - let waiting=waiting+1 - sleep 1 -done - -# Clean up the certificates -cleanup - -echo $curl_exit_code -exit $curl_exit_code diff --git a/tests/test_default_deny.py b/tests/test_default_deny.py deleted file mode 100644 index 81e95b84..00000000 --- a/tests/test_default_deny.py +++ /dev/null @@ -1,129 +0,0 @@ -import pytest -from datasette.app import Datasette -from datasette.resources import DatabaseResource, TableResource - - -@pytest.mark.asyncio -async def test_default_deny_denies_default_permissions(): - """Test that default_deny=True denies default permissions""" - # Without default_deny, anonymous users can view instance/database/tables - ds_normal = Datasette() - await ds_normal.invoke_startup() - - # Add a test database - db = ds_normal.add_memory_database("test_db_normal") - await db.execute_write("create table test_table (id integer primary key)") - await ds_normal._refresh_schemas() # Trigger catalog refresh - - # Test default behavior - anonymous user should be able to view - response = await ds_normal.client.get("/") - assert response.status_code == 200 - - response = await ds_normal.client.get("/test_db_normal") - assert response.status_code == 200 - - response = await ds_normal.client.get("/test_db_normal/test_table") - assert response.status_code == 200 - - # With default_deny=True, anonymous users should be denied - ds_deny = Datasette(default_deny=True) - await ds_deny.invoke_startup() - - # Add the same test database - db = ds_deny.add_memory_database("test_db_deny") - await db.execute_write("create table test_table (id integer primary key)") - await ds_deny._refresh_schemas() # Trigger catalog refresh - - # Anonymous user should be denied - response = await ds_deny.client.get("/") - assert response.status_code == 403 - - response = await ds_deny.client.get("/test_db_deny") - assert response.status_code == 403 - - response = await ds_deny.client.get("/test_db_deny/test_table") - assert response.status_code == 403 - - -@pytest.mark.asyncio -async def test_default_deny_with_root_user(): - """Test that root user still has access when default_deny=True""" - ds = Datasette(default_deny=True) - ds.root_enabled = True - await ds.invoke_startup() - - root_actor = {"id": "root"} - - # Root user should have all permissions even with default_deny - assert await ds.allowed(action="view-instance", actor=root_actor) is True - assert ( - await ds.allowed( - action="view-database", - actor=root_actor, - resource=DatabaseResource("test_db"), - ) - is True - ) - assert ( - await ds.allowed( - action="view-table", - actor=root_actor, - resource=TableResource("test_db", "test_table"), - ) - is True - ) - assert ( - await ds.allowed( - action="execute-sql", actor=root_actor, resource=DatabaseResource("test_db") - ) - is True - ) - - -@pytest.mark.asyncio -async def test_default_deny_with_config_allow(): - """Test that config allow rules still work with default_deny=True""" - ds = Datasette(default_deny=True, config={"allow": {"id": "user1"}}) - await ds.invoke_startup() - - # Anonymous user should be denied - assert await ds.allowed(action="view-instance", actor=None) is False - - # Authenticated user with explicit permission should have access - assert await ds.allowed(action="view-instance", actor={"id": "user1"}) is True - - # Different user should be denied - assert await ds.allowed(action="view-instance", actor={"id": "user2"}) is False - - -@pytest.mark.asyncio -async def test_default_deny_basic_permissions(): - """Test that default_deny=True denies basic permissions""" - ds = Datasette(default_deny=True) - await ds.invoke_startup() - - # Anonymous user should be denied all default permissions - assert await ds.allowed(action="view-instance", actor=None) is False - assert ( - await ds.allowed( - action="view-database", actor=None, resource=DatabaseResource("test_db") - ) - is False - ) - assert ( - await ds.allowed( - action="view-table", - actor=None, - resource=TableResource("test_db", "test_table"), - ) - is False - ) - assert ( - await ds.allowed( - action="execute-sql", actor=None, resource=DatabaseResource("test_db") - ) - is False - ) - - # Authenticated user without explicit permission should also be denied - assert await ds.allowed(action="view-instance", actor={"id": "user"}) is False diff --git a/tests/test_docs.py b/tests/test_docs.py index b94a6f23..d481a633 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,9 +1,9 @@ """ Tests to ensure certain things are documented. """ - -from datasette import app, utils -from datasette.app import Datasette +from click.testing import CliRunner +from datasette import app +from datasette.cli import cli from datasette.filters import Filters from pathlib import Path import pytest @@ -13,47 +13,49 @@ docs_path = Path(__file__).parent.parent / "docs" label_re = re.compile(r"\.\. _([^\s:]+):") -def get_headings(content, underline="-"): +def get_headings(filename, underline="-"): + content = (docs_path / filename).open().read() heading_re = re.compile(r"(\w+)(\([^)]*\))?\n\{}+\n".format(underline)) - return {h[0] for h in heading_re.findall(content)} + return set(h[0] for h in heading_re.findall(content)) def get_labels(filename): - content = (docs_path / filename).read_text() + content = (docs_path / filename).open().read() return set(label_re.findall(content)) -@pytest.fixture(scope="session") -def settings_headings(): - return get_headings((docs_path / "settings.rst").read_text(), "~") +@pytest.mark.parametrize("config", app.CONFIG_OPTIONS) +def test_config_options_are_documented(config): + assert config.name in get_headings("config.rst") -def test_settings_are_documented(settings_headings, subtests): - for setting in app.SETTINGS: - with subtests.test(setting=setting.name): - assert setting.name in settings_headings +@pytest.mark.parametrize( + "name,filename", + ( + ("serve", "datasette-serve-help.txt"), + ("package", "datasette-package-help.txt"), + ("publish nowv1", "datasette-publish-nowv1-help.txt"), + ("publish heroku", "datasette-publish-heroku-help.txt"), + ("publish cloudrun", "datasette-publish-cloudrun-help.txt"), + ), +) +def test_help_includes(name, filename): + expected = open(str(docs_path / filename)).read() + runner = CliRunner() + result = runner.invoke(cli, name.split() + ["--help"], terminal_width=88) + actual = "$ datasette {} --help\n\n{}".format(name, result.output) + # actual has "Usage: cli package [OPTIONS] FILES" + # because it doesn't know that cli will be aliased to datasette + expected = expected.replace("Usage: datasette", "Usage: cli") + assert expected == actual -@pytest.fixture(scope="session") -def plugin_hooks_content(): - return (docs_path / "plugin_hooks.rst").read_text() - - -def test_plugin_hooks_are_documented(plugin_hooks_content, subtests): - headings = set() - headings.update(get_headings(plugin_hooks_content, "-")) - headings.update(get_headings(plugin_hooks_content, "~")) - plugins = [name for name in dir(app.pm.hook) if not name.startswith("_")] - for plugin in plugins: - with subtests.test(plugin=plugin): - assert plugin in headings - hook_caller = getattr(app.pm.hook, plugin) - arg_names = [a for a in hook_caller.spec.argnames if a != "__multicall__"] - # Check for plugin_name(arg1, arg2, arg3) - expected = f"{plugin}({', '.join(arg_names)})" - assert ( - expected in plugin_hooks_content - ), f"Missing from plugin hook documentation: {expected}" +@pytest.mark.parametrize( + "plugin", [name for name in dir(app.pm.hook) if not name.startswith("_")] +) +def test_plugin_hooks_are_documented(plugin): + headings = [s.split("(")[0] for s in get_headings("plugins.rst", "~")] + assert plugin in headings @pytest.fixture(scope="session") @@ -64,16 +66,12 @@ def documented_views(): first_word = label.split("_")[0] if first_word.endswith("View"): view_labels.add(first_word) - # We deliberately don't document these: - view_labels.update(("PatternPortfolioView", "AuthTokenView", "ApiExplorerView")) return view_labels -def test_view_classes_are_documented(documented_views, subtests): - view_classes = [v for v in dir(app) if v.endswith("View")] - for view_class in view_classes: - with subtests.test(view_class=view_class): - assert view_class in documented_views +@pytest.mark.parametrize("view_class", [v for v in dir(app) if v.endswith("View")]) +def test_view_classes_are_documented(documented_views, view_class): + assert view_class in documented_views @pytest.fixture(scope="session") @@ -81,122 +79,13 @@ def documented_table_filters(): json_api_rst = (docs_path / "json_api.rst").read_text() section = json_api_rst.split(".. _table_arguments:")[-1] # Lines starting with ``?column__exact= are docs for filters - return { + return set( line.split("__")[1].split("=")[0] for line in section.split("\n") if line.startswith("``?column__") - } + ) -def test_table_filters_are_documented(documented_table_filters, subtests): - for f in Filters._filters: - with subtests.test(filter=f.key): - assert f.key in documented_table_filters - - -@pytest.fixture(scope="session") -def documented_fns(): - internals_rst = (docs_path / "internals.rst").read_text() - # Any line that starts .. _internals_utils_X - lines = internals_rst.split("\n") - prefix = ".. _internals_utils_" - return { - line.split(prefix)[1].split(":")[0] for line in lines if line.startswith(prefix) - } - - -def test_functions_marked_with_documented_are_documented(documented_fns, subtests): - for fn in utils.functions_marked_as_documented: - with subtests.test(fn=fn.__name__): - assert fn.__name__ in documented_fns - - -def test_rst_heading_underlines_match_title_length(): - """Test that RST heading underlines are the same length as their titles.""" - # Common RST underline characters - underline_chars = ["-", "=", "~", "^", "+", "*", "#"] - - errors = [] - - for rst_file in docs_path.glob("*.rst"): - content = rst_file.read_text() - lines = content.split("\n") - - for i in range(len(lines) - 1): - current_line = lines[i] - next_line = lines[i + 1] - - # Check if next line is entirely made of a single underline character - # and is at least 5 characters long (to avoid false positives) - if ( - next_line - and len(next_line) >= 5 - and len(set(next_line)) == 1 - and next_line[0] in underline_chars - ): - # Skip if the previous line is empty (blank line before underline) - if not current_line: - continue - - # Check if this is an overline+underline style heading - # Look at the line before current_line to see if it's also an underline - if i > 0: - prev_line = lines[i - 1] - if ( - prev_line - and len(prev_line) >= 5 - and len(set(prev_line)) == 1 - and prev_line[0] in underline_chars - and len(prev_line) == len(next_line) - ): - # This is overline+underline style, skip it - continue - - # This is a heading underline - title_length = len(current_line) - underline_length = len(next_line) - - if title_length != underline_length: - errors.append( - f"{rst_file.name}:{i+1}: Title length {title_length} != underline length {underline_length}\n" - f" Title: {current_line!r}\n" - f" Underline: {next_line!r}" - ) - - if errors: - raise AssertionError( - f"Found {len(errors)} RST heading(s) with mismatched underline length:\n\n" - + "\n\n".join(errors) - ) - - -# Tests for testing_plugins.rst documentation - -# fmt: off -# -- start test_homepage -- -@pytest.mark.asyncio -async def test_homepage(): - ds = Datasette(memory=True) - response = await ds.client.get("/") - html = response.text - assert "

    " in html -# -- end test_homepage -- - - -# -- start test_actor_is_null -- -@pytest.mark.asyncio -async def test_actor_is_null(): - ds = Datasette(memory=True) - response = await ds.client.get("/-/actor.json") - assert response.json() == {"actor": None} -# -- end test_actor_is_null -- - - -# -- start test_signed_cookie_actor -- -@pytest.mark.asyncio -async def test_signed_cookie_actor(): - ds = Datasette(memory=True) - cookies = {"ds_actor": ds.client.actor_cookie({"id": "root"})} - response = await ds.client.get("/-/actor.json", cookies=cookies) - assert response.json() == {"actor": {"id": "root"}} -# -- end test_signed_cookie_actor -- +@pytest.mark.parametrize("filter", [f.key for f in Filters._filters]) +def test_table_filters_are_documented(documented_table_filters, filter): + assert filter in documented_table_filters diff --git a/tests/test_docs_plugins.py b/tests/test_docs_plugins.py deleted file mode 100644 index c51858d3..00000000 --- a/tests/test_docs_plugins.py +++ /dev/null @@ -1,34 +0,0 @@ -# fmt: off -# -- start datasette_with_plugin_fixture -- -from datasette import hookimpl -from datasette.app import Datasette -import pytest -import pytest_asyncio - - -@pytest_asyncio.fixture -async def datasette_with_plugin(): - class TestPlugin: - __name__ = "TestPlugin" - - @hookimpl - def register_routes(self): - return [ - (r"^/error$", lambda: 1 / 0), - ] - - datasette = Datasette() - datasette.pm.register(TestPlugin(), name="undo") - try: - yield datasette - finally: - datasette.pm.unregister(name="undo") -# -- end datasette_with_plugin_fixture -- - - -# -- start datasette_with_plugin_test -- -@pytest.mark.asyncio -async def test_error(datasette_with_plugin): - response = await datasette_with_plugin.client.get("/error") - assert response.status_code == 500 -# -- end datasette_with_plugin_test -- diff --git a/tests/test_facets.py b/tests/test_facets.py index a2b505ec..9169f666 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -1,18 +1,15 @@ -from datasette.app import Datasette -from datasette.database import Database -from datasette.facets import Facet, ColumnFacet, ArrayFacet, DateFacet -from datasette.utils.asgi import Request +from datasette.facets import ColumnFacet, ArrayFacet, DateFacet from datasette.utils import detect_json1 -from .fixtures import make_app_client -import json +from .fixtures import app_client # noqa +from .utils import MockRequest import pytest @pytest.mark.asyncio -async def test_column_facet_suggest(ds_client): +async def test_column_facet_suggest(app_client): facet = ColumnFacet( - ds_client.ds, - Request.fake("/"), + app_client.ds, + MockRequest("http://localhost/"), database="fixtures", sql="select * from facetable", table="facetable", @@ -23,24 +20,17 @@ async def test_column_facet_suggest(ds_client): {"name": "planet_int", "toggle_url": "http://localhost/?_facet=planet_int"}, {"name": "on_earth", "toggle_url": "http://localhost/?_facet=on_earth"}, {"name": "state", "toggle_url": "http://localhost/?_facet=state"}, - {"name": "_city_id", "toggle_url": "http://localhost/?_facet=_city_id"}, - { - "name": "_neighborhood", - "toggle_url": "http://localhost/?_facet=_neighborhood", - }, + {"name": "city_id", "toggle_url": "http://localhost/?_facet=city_id"}, + {"name": "neighborhood", "toggle_url": "http://localhost/?_facet=neighborhood"}, {"name": "tags", "toggle_url": "http://localhost/?_facet=tags"}, - { - "name": "complex_array", - "toggle_url": "http://localhost/?_facet=complex_array", - }, ] == suggestions @pytest.mark.asyncio -async def test_column_facet_suggest_skip_if_already_selected(ds_client): +async def test_column_facet_suggest_skip_if_already_selected(app_client): facet = ColumnFacet( - ds_client.ds, - Request.fake("/?_facet=planet_int&_facet=on_earth"), + app_client.ds, + MockRequest("http://localhost/?_facet=planet_int&_facet=on_earth"), database="fixtures", sql="select * from facetable", table="facetable", @@ -56,33 +46,29 @@ async def test_column_facet_suggest_skip_if_already_selected(ds_client): "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=state", }, { - "name": "_city_id", - "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=_city_id", + "name": "city_id", + "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=city_id", }, { - "name": "_neighborhood", - "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=_neighborhood", + "name": "neighborhood", + "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=neighborhood", }, { "name": "tags", "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=tags", }, - { - "name": "complex_array", - "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=complex_array", - }, ] == suggestions @pytest.mark.asyncio -async def test_column_facet_suggest_skip_if_enabled_by_metadata(ds_client): +async def test_column_facet_suggest_skip_if_enabled_by_metadata(app_client): facet = ColumnFacet( - ds_client.ds, - Request.fake("/"), + app_client.ds, + MockRequest("http://localhost/"), database="fixtures", sql="select * from facetable", table="facetable", - table_config={"facets": ["_city_id"]}, + metadata={"facets": ["city_id"]}, ) suggestions = [s["name"] for s in await facet.suggest()] assert [ @@ -90,26 +76,25 @@ async def test_column_facet_suggest_skip_if_enabled_by_metadata(ds_client): "planet_int", "on_earth", "state", - "_neighborhood", + "neighborhood", "tags", - "complex_array", ] == suggestions @pytest.mark.asyncio -async def test_column_facet_results(ds_client): +async def test_column_facet_results(app_client): facet = ColumnFacet( - ds_client.ds, - Request.fake("/?_facet=_city_id"), + app_client.ds, + MockRequest("http://localhost/?_facet=city_id"), database="fixtures", sql="select * from facetable", table="facetable", ) buckets, timed_out = await facet.facet_results() assert [] == timed_out - assert [ - { - "name": "_city_id", + assert { + "city_id": { + "name": "city_id", "type": "column", "hideable": True, "toggle_url": "/", @@ -118,173 +103,51 @@ async def test_column_facet_results(ds_client): "value": 1, "label": "San Francisco", "count": 6, - "toggle_url": "http://localhost/?_facet=_city_id&_city_id__exact=1", + "toggle_url": "http://localhost/?_facet=city_id&city_id=1", "selected": False, }, { "value": 2, "label": "Los Angeles", "count": 4, - "toggle_url": "http://localhost/?_facet=_city_id&_city_id__exact=2", + "toggle_url": "http://localhost/?_facet=city_id&city_id=2", "selected": False, }, { "value": 3, "label": "Detroit", "count": 4, - "toggle_url": "http://localhost/?_facet=_city_id&_city_id__exact=3", + "toggle_url": "http://localhost/?_facet=city_id&city_id=3", "selected": False, }, { "value": 4, "label": "Memnonia", "count": 1, - "toggle_url": "http://localhost/?_facet=_city_id&_city_id__exact=4", + "toggle_url": "http://localhost/?_facet=city_id&city_id=4", "selected": False, }, ], "truncated": False, } - ] == buckets + } == buckets @pytest.mark.asyncio -async def test_column_facet_results_column_starts_with_underscore(ds_client): +async def test_column_facet_from_metadata_cannot_be_hidden(app_client): facet = ColumnFacet( - ds_client.ds, - Request.fake("/?_facet=_neighborhood"), + app_client.ds, + MockRequest("http://localhost/"), database="fixtures", sql="select * from facetable", table="facetable", + metadata={"facets": ["city_id"]}, ) buckets, timed_out = await facet.facet_results() assert [] == timed_out - assert buckets == [ - { - "name": "_neighborhood", - "type": "column", - "hideable": True, - "toggle_url": "/", - "results": [ - { - "value": "Downtown", - "label": "Downtown", - "count": 2, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Downtown", - "selected": False, - }, - { - "value": "Arcadia Planitia", - "label": "Arcadia Planitia", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Arcadia+Planitia", - "selected": False, - }, - { - "value": "Bernal Heights", - "label": "Bernal Heights", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Bernal+Heights", - "selected": False, - }, - { - "value": "Corktown", - "label": "Corktown", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Corktown", - "selected": False, - }, - { - "value": "Dogpatch", - "label": "Dogpatch", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Dogpatch", - "selected": False, - }, - { - "value": "Greektown", - "label": "Greektown", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Greektown", - "selected": False, - }, - { - "value": "Hayes Valley", - "label": "Hayes Valley", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Hayes+Valley", - "selected": False, - }, - { - "value": "Hollywood", - "label": "Hollywood", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Hollywood", - "selected": False, - }, - { - "value": "Koreatown", - "label": "Koreatown", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Koreatown", - "selected": False, - }, - { - "value": "Los Feliz", - "label": "Los Feliz", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Los+Feliz", - "selected": False, - }, - { - "value": "Mexicantown", - "label": "Mexicantown", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Mexicantown", - "selected": False, - }, - { - "value": "Mission", - "label": "Mission", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Mission", - "selected": False, - }, - { - "value": "SOMA", - "label": "SOMA", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=SOMA", - "selected": False, - }, - { - "value": "Tenderloin", - "label": "Tenderloin", - "count": 1, - "toggle_url": "http://localhost/?_facet=_neighborhood&_neighborhood__exact=Tenderloin", - "selected": False, - }, - ], - "truncated": False, - } - ] - - -@pytest.mark.asyncio -async def test_column_facet_from_metadata_cannot_be_hidden(ds_client): - facet = ColumnFacet( - ds_client.ds, - Request.fake("/"), - database="fixtures", - sql="select * from facetable", - table="facetable", - table_config={"facets": ["_city_id"]}, - ) - buckets, timed_out = await facet.facet_results() - assert [] == timed_out - assert [ - { - "name": "_city_id", + assert { + "city_id": { + "name": "city_id", "type": "column", "hideable": False, "toggle_url": "/", @@ -293,42 +156,42 @@ async def test_column_facet_from_metadata_cannot_be_hidden(ds_client): "value": 1, "label": "San Francisco", "count": 6, - "toggle_url": "http://localhost/?_city_id__exact=1", + "toggle_url": "http://localhost/?city_id=1", "selected": False, }, { "value": 2, "label": "Los Angeles", "count": 4, - "toggle_url": "http://localhost/?_city_id__exact=2", + "toggle_url": "http://localhost/?city_id=2", "selected": False, }, { "value": 3, "label": "Detroit", "count": 4, - "toggle_url": "http://localhost/?_city_id__exact=3", + "toggle_url": "http://localhost/?city_id=3", "selected": False, }, { "value": 4, "label": "Memnonia", "count": 1, - "toggle_url": "http://localhost/?_city_id__exact=4", + "toggle_url": "http://localhost/?city_id=4", "selected": False, }, ], "truncated": False, } - ] == buckets + } == buckets @pytest.mark.asyncio @pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") -async def test_array_facet_suggest(ds_client): +async def test_array_facet_suggest(app_client): facet = ArrayFacet( - ds_client.ds, - Request.fake("/"), + app_client.ds, + MockRequest("http://localhost/"), database="fixtures", sql="select * from facetable", table="facetable", @@ -345,32 +208,18 @@ async def test_array_facet_suggest(ds_client): @pytest.mark.asyncio @pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") -async def test_array_facet_suggest_not_if_all_empty_arrays(ds_client): +async def test_array_facet_results(app_client): facet = ArrayFacet( - ds_client.ds, - Request.fake("/"), - database="fixtures", - sql="select * from facetable where tags = '[]'", - table="facetable", - ) - suggestions = await facet.suggest() - assert [] == suggestions - - -@pytest.mark.asyncio -@pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") -async def test_array_facet_results(ds_client): - facet = ArrayFacet( - ds_client.ds, - Request.fake("/?_facet_array=tags"), + app_client.ds, + MockRequest("http://localhost/?_facet_array=tags"), database="fixtures", sql="select * from facetable", table="facetable", ) buckets, timed_out = await facet.facet_results() assert [] == timed_out - assert [ - { + assert { + "tags": { "name": "tags", "type": "array", "results": [ @@ -400,77 +249,22 @@ async def test_array_facet_results(ds_client): "toggle_url": "/", "truncated": False, } - ] == buckets + } == buckets @pytest.mark.asyncio -@pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") -async def test_array_facet_handle_duplicate_tags(): - ds = Datasette([], memory=True) - db = ds.add_database(Database(ds, memory_name="test_array_facet")) - await db.execute_write("create table otters(name text, tags text)") - for name, tags in ( - ("Charles", ["friendly", "cunning", "friendly"]), - ("Shaun", ["cunning", "empathetic", "friendly"]), - ("Tracy", ["empathetic", "eager"]), - ): - await db.execute_write( - "insert into otters (name, tags) values (?, ?)", [name, json.dumps(tags)] - ) - - response = await ds.client.get("/test_array_facet/otters.json?_facet_array=tags") - assert response.json()["facet_results"]["results"]["tags"] == { - "name": "tags", - "type": "array", - "results": [ - { - "value": "cunning", - "label": "cunning", - "count": 2, - "toggle_url": "http://localhost/test_array_facet/otters.json?_facet_array=tags&tags__arraycontains=cunning", - "selected": False, - }, - { - "value": "empathetic", - "label": "empathetic", - "count": 2, - "toggle_url": "http://localhost/test_array_facet/otters.json?_facet_array=tags&tags__arraycontains=empathetic", - "selected": False, - }, - { - "value": "friendly", - "label": "friendly", - "count": 2, - "toggle_url": "http://localhost/test_array_facet/otters.json?_facet_array=tags&tags__arraycontains=friendly", - "selected": False, - }, - { - "value": "eager", - "label": "eager", - "count": 1, - "toggle_url": "http://localhost/test_array_facet/otters.json?_facet_array=tags&tags__arraycontains=eager", - "selected": False, - }, - ], - "hideable": True, - "toggle_url": "/test_array_facet/otters.json", - "truncated": False, - } - - -@pytest.mark.asyncio -async def test_date_facet_results(ds_client): +async def test_date_facet_results(app_client): facet = DateFacet( - ds_client.ds, - Request.fake("/?_facet_date=created"), + app_client.ds, + MockRequest("http://localhost/?_facet_date=created"), database="fixtures", sql="select * from facetable", table="facetable", ) buckets, timed_out = await facet.facet_results() assert [] == timed_out - assert [ - { + assert { + "created": { "name": "created", "type": "date", "results": [ @@ -507,194 +301,4 @@ async def test_date_facet_results(ds_client): "toggle_url": "/", "truncated": False, } - ] == buckets - - -@pytest.mark.asyncio -async def test_json_array_with_blanks_and_nulls(): - ds = Datasette([], memory=True) - db = ds.add_database(Database(ds, memory_name="test_json_array")) - await db.execute_write("create table foo(json_column text)") - for value in ('["a", "b", "c"]', '["a", "b"]', "", None): - await db.execute_write("insert into foo (json_column) values (?)", [value]) - response = await ds.client.get("/test_json_array/foo.json?_extra=suggested_facets") - data = response.json() - assert data["suggested_facets"] == [ - { - "name": "json_column", - "type": "array", - "toggle_url": "http://localhost/test_json_array/foo.json?_extra=suggested_facets&_facet_array=json_column", - } - ] - - -@pytest.mark.asyncio -async def test_facet_size(): - ds = Datasette([], memory=True, settings={"max_returned_rows": 50}) - db = ds.add_database(Database(ds, memory_name="test_facet_size")) - await db.execute_write("create table neighbourhoods(city text, neighbourhood text)") - for i in range(1, 51): - for j in range(1, 4): - await db.execute_write( - "insert into neighbourhoods (city, neighbourhood) values (?, ?)", - ["City {}".format(i), "Neighbourhood {}".format(j)], - ) - response = await ds.client.get( - "/test_facet_size/neighbourhoods.json?_extra=suggested_facets" - ) - data = response.json() - assert data["suggested_facets"] == [ - { - "name": "neighbourhood", - "toggle_url": "http://localhost/test_facet_size/neighbourhoods.json?_extra=suggested_facets&_facet=neighbourhood", - } - ] - # Bump up _facet_size= to suggest city too - response2 = await ds.client.get( - "/test_facet_size/neighbourhoods.json?_facet_size=50&_extra=suggested_facets" - ) - data2 = response2.json() - assert sorted(data2["suggested_facets"], key=lambda f: f["name"]) == [ - { - "name": "city", - "toggle_url": "http://localhost/test_facet_size/neighbourhoods.json?_facet_size=50&_extra=suggested_facets&_facet=city", - }, - { - "name": "neighbourhood", - "toggle_url": "http://localhost/test_facet_size/neighbourhoods.json?_facet_size=50&_extra=suggested_facets&_facet=neighbourhood", - }, - ] - # Facet by city should return expected number of results - response3 = await ds.client.get( - "/test_facet_size/neighbourhoods.json?_facet_size=50&_facet=city" - ) - data3 = response3.json() - assert len(data3["facet_results"]["results"]["city"]["results"]) == 50 - # Reduce max_returned_rows and check that it's respected - ds._settings["max_returned_rows"] = 20 - response4 = await ds.client.get( - "/test_facet_size/neighbourhoods.json?_facet_size=50&_facet=city" - ) - data4 = response4.json() - assert len(data4["facet_results"]["results"]["city"]["results"]) == 20 - # Test _facet_size=max - response5 = await ds.client.get( - "/test_facet_size/neighbourhoods.json?_facet_size=max&_facet=city" - ) - data5 = response5.json() - assert len(data5["facet_results"]["results"]["city"]["results"]) == 20 - # Now try messing with facet_size in the table metadata - orig_config = ds.config - try: - ds.config = { - "databases": { - "test_facet_size": {"tables": {"neighbourhoods": {"facet_size": 6}}} - } - } - response6 = await ds.client.get( - "/test_facet_size/neighbourhoods.json?_facet=city" - ) - data6 = response6.json() - assert len(data6["facet_results"]["results"]["city"]["results"]) == 6 - # Setting it to max bumps it up to 50 again - ds.config["databases"]["test_facet_size"]["tables"]["neighbourhoods"][ - "facet_size" - ] = "max" - data7 = ( - await ds.client.get("/test_facet_size/neighbourhoods.json?_facet=city") - ).json() - assert len(data7["facet_results"]["results"]["city"]["results"]) == 20 - finally: - ds.config = orig_config - - -def test_other_types_of_facet_in_metadata(): - with make_app_client( - metadata={ - "databases": { - "fixtures": { - "tables": { - "facetable": { - "facets": ["state", {"array": "tags"}, {"date": "created"}] - } - } - } - } - } - ) as client: - response = client.get("/fixtures/facetable") - for fragment in ( - "created (date)\n", - "tags (array)\n", - "state\n", - ): - assert fragment in response.text - - -@pytest.mark.asyncio -async def test_conflicting_facet_names_json(ds_client): - response = await ds_client.get( - "/fixtures/facetable.json?_facet=created&_facet_date=created" - "&_facet=tags&_facet_array=tags" - ) - assert set(response.json()["facet_results"]["results"].keys()) == { - "created", - "tags", - "created_2", - "tags_2", - } - - -@pytest.mark.asyncio -async def test_facet_against_in_memory_database(): - ds = Datasette() - db = ds.add_memory_database("mem") - await db.execute_write( - "create table t (id integer primary key, name text, name2 text)" - ) - to_insert = [{"name": "one", "name2": "1"} for _ in range(800)] + [ - {"name": "two", "name2": "2"} for _ in range(300) - ] - await db.execute_write_many( - "insert into t (name, name2) values (:name, :name2)", to_insert - ) - response1 = await ds.client.get("/mem/t") - assert response1.status_code == 200 - response2 = await ds.client.get("/mem/t?_facet=name&_facet=name2") - assert response2.status_code == 200 - - -@pytest.mark.asyncio -async def test_facet_only_considers_first_x_rows(): - # This test works by manually fiddling with Facet.suggest_consider - ds = Datasette() - original_suggest_consider = Facet.suggest_consider - try: - Facet.suggest_consider = 40 - db = ds.add_memory_database("test_facet_only_x_rows") - await db.execute_write("create table t (id integer primary key, col text)") - # First 50 rows make it look like col and col_json should be faceted - to_insert = [{"col": "one" if i % 2 else "two"} for i in range(50)] - await db.execute_write_many("insert into t (col) values (:col)", to_insert) - # Next 50 break that assumption - to_insert2 = [{"col": f"x{i}"} for i in range(50)] - await db.execute_write_many("insert into t (col) values (:col)", to_insert2) - response = await ds.client.get( - "/test_facet_only_x_rows/t.json?_extra=suggested_facets" - ) - data = response.json() - assert data["suggested_facets"] == [ - { - "name": "col", - "toggle_url": "http://localhost/test_facet_only_x_rows/t.json?_extra=suggested_facets&_facet=col", - } - ] - # But if we set suggest_consider to 100 they are not suggested - Facet.suggest_consider = 100 - response2 = await ds.client.get( - "/test_facet_only_x_rows/t.json?_extra=suggested_facets" - ) - data2 = response2.json() - assert data2["suggested_facets"] == [] - finally: - Facet.suggest_consider = original_suggest_consider + } == buckets diff --git a/tests/test_filters.py b/tests/test_filters.py index a3fada98..fd682cd9 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -1,5 +1,4 @@ -from datasette.filters import Filters, through_filters, where_filters, search_filters -from datasette.utils.asgi import Request +from datasette.filters import Filters import pytest @@ -7,11 +6,6 @@ import pytest "args,expected_where,expected_params", [ ((("name_english__contains", "foo"),), ['"name_english" like :p0'], ["%foo%"]), - ( - (("name_english__notcontains", "foo"),), - ['"name_english" not like :p0'], - ["%foo%"], - ), ( (("foo", "bar"), ("bar__contains", "baz")), ['"bar" like :p0', '"foo" = :p1'], @@ -38,12 +32,6 @@ import pytest ['"foo" like :p0', '"foo" like :p1'], ["2%2", "3%3"], ), - # notlike: - ( - (("foo__notlike", "2%2"),), - ['"foo" not like :p0'], - ["2%2"], - ), ( (("foo__isnull", "1"), ("baz__isnull", "1"), ("bar__gt", "10")), ['"bar" > :p0', '"baz" is null', '"foo" is null'], @@ -59,100 +47,12 @@ import pytest ["foo in (:p0, :p1)"], ["dog,cat", "cat[dog]"], ), - # Not in, and JSON array not in - ((("foo__notin", "1,2,3"),), ["foo not in (:p0, :p1, :p2)"], ["1", "2", "3"]), - ((("foo__notin", "[1,2,3]"),), ["foo not in (:p0, :p1, :p2)"], [1, 2, 3]), - # JSON arraycontains, arraynotcontains - ( - (("Availability+Info__arraycontains", "yes"),), - [":p0 in (select value from json_each([table].[Availability+Info]))"], - ["yes"], - ), - ( - (("Availability+Info__arraynotcontains", "yes"),), - [":p0 not in (select value from json_each([table].[Availability+Info]))"], - ["yes"], - ), ], ) def test_build_where(args, expected_where, expected_params): f = Filters(sorted(args)) sql_bits, actual_params = f.build_where_clauses("table") assert expected_where == sql_bits - assert {f"p{i}": param for i, param in enumerate(expected_params)} == actual_params - - -@pytest.mark.asyncio -async def test_through_filters_from_request(ds_client): - request = Request.fake( - '/?_through={"table":"roadside_attraction_characteristics","column":"characteristic_id","value":"1"}' - ) - filter_args = await through_filters( - request=request, - datasette=ds_client.ds, - table="roadside_attractions", - database="fixtures", - )() - assert filter_args.where_clauses == [ - "pk in (select attraction_id from roadside_attraction_characteristics where characteristic_id = :p0)" - ] - assert filter_args.params == {"p0": "1"} - assert filter_args.human_descriptions == [ - 'roadside_attraction_characteristics.characteristic_id = "1"' - ] - assert filter_args.extra_context == {} - - -@pytest.mark.asyncio -async def test_through_filters_from_request(ds_client): - request = Request.fake( - '/?_through={"table":"roadside_attraction_characteristics","column":"characteristic_id","value":"1"}' - ) - filter_args = await through_filters( - request=request, - datasette=ds_client.ds, - table="roadside_attractions", - database="fixtures", - )() - assert filter_args.where_clauses == [ - "pk in (select attraction_id from roadside_attraction_characteristics where characteristic_id = :p0)" - ] - assert filter_args.params == {"p0": "1"} - assert filter_args.human_descriptions == [ - 'roadside_attraction_characteristics.characteristic_id = "1"' - ] - assert filter_args.extra_context == {} - - -@pytest.mark.asyncio -async def test_where_filters_from_request(ds_client): - await ds_client.ds.invoke_startup() - request = Request.fake("/?_where=pk+>+3") - filter_args = await where_filters( - request=request, - datasette=ds_client.ds, - database="fixtures", - )() - assert filter_args.where_clauses == ["pk > 3"] - assert filter_args.params == {} - assert filter_args.human_descriptions == [] - assert filter_args.extra_context == { - "extra_wheres_for_ui": [{"text": "pk > 3", "remove_url": "/"}] - } - - -@pytest.mark.asyncio -async def test_search_filters_from_request(ds_client): - request = Request.fake("/?_search=bobcat") - filter_args = await search_filters( - request=request, - datasette=ds_client.ds, - database="fixtures", - table="searchable", - )() - assert filter_args.where_clauses == [ - "rowid in (select rowid from searchable_fts where searchable_fts match escape_fts(:search))" - ] - assert filter_args.params == {"search": "bobcat"} - assert filter_args.human_descriptions == ['search matches "bobcat"'] - assert filter_args.extra_context == {"supports_search": True, "search": "bobcat"} + assert { + "p{}".format(i): param for i, param in enumerate(expected_params) + } == actual_params diff --git a/tests/test_html.py b/tests/test_html.py index 7b667301..4a49551f 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1,16 +1,12 @@ from bs4 import BeautifulSoup as Soup -from datasette.app import Datasette -from datasette.utils import allowed_pragmas from .fixtures import ( # noqa app_client, - app_client_base_url_prefix, app_client_shorter_time_limit, app_client_two_attached_databases, + app_client_with_hash, make_app_client, METADATA, ) -from .utils import assert_footer_links, inner_html -import copy import json import pathlib import pytest @@ -20,251 +16,399 @@ import urllib.parse def test_homepage(app_client_two_attached_databases): response = app_client_two_attached_databases.get("/") - assert response.status_code == 200 + assert response.status == 200 assert "text/html; charset=utf-8" == response.headers["content-type"] - # Should have a html lang="en" attribute - assert '' in response.text - soup = Soup(response.content, "html.parser") + soup = Soup(response.body, "html.parser") assert "Datasette Fixtures" == soup.find("h1").text assert ( - "An example SQLite database demonstrating Datasette. Sign in as root user" + "An example SQLite database demonstrating Datasette" == soup.select(".metadata-description")[0].text.strip() ) # Should be two attached databases assert [ - {"href": "/extra+database", "text": "extra database"}, + {"href": "/extra_database", "text": "extra_database"}, {"href": "/fixtures", "text": "fixtures"}, + {"href": "/special", "text": "special"}, ] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")] - # Database should show count text and attached tables + # The first attached database should show count text and attached tables h2 = soup.select("h2")[0] - assert "extra database" == h2.text.strip() + assert "extra_database" == h2.text.strip() counts_p, links_p = h2.find_all_next("p")[:2] assert ( "2 rows in 1 table, 5 rows in 4 hidden tables, 1 view" == counts_p.text.strip() ) # We should only show visible, not hidden tables here: table_links = [ - {"href": a["href"], "text": a.text.strip()} for a in links_p.find_all("a") + {"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a") ] assert [ - {"href": r"/extra+database/searchable", "text": "searchable"}, - {"href": r"/extra+database/searchable_view", "text": "searchable_view"}, + {"href": "/extra_database/searchable", "text": "searchable"}, + {"href": "/extra_database/searchable_view", "text": "searchable_view"}, ] == table_links -@pytest.mark.asyncio -@pytest.mark.parametrize("path", ("/", "/-/")) -async def test_homepage_alternative_location(path, tmp_path_factory): - template_dir = tmp_path_factory.mktemp("templates") - (template_dir / "index.html").write_text("Custom homepage", "utf-8") - datasette = Datasette(template_dir=str(template_dir)) - response = await datasette.client.get(path) - assert response.status_code == 200 - html = response.text - if path == "/": - assert html == "Custom homepage" - else: - assert '' in html +def test_http_head(app_client): + response = app_client.get("/", method="HEAD") + assert response.status == 200 -@pytest.mark.asyncio -async def test_homepage_alternative_redirect(ds_client): - response = await ds_client.get("/-") - assert response.status_code == 301 - - -@pytest.mark.asyncio -async def test_http_head(ds_client): - response = await ds_client.head("/") - assert response.status_code == 200 - - -@pytest.mark.asyncio -async def test_homepage_options(ds_client): - response = await ds_client.options("/") - assert response.status_code == 200 - assert response.text == "ok" - - -@pytest.mark.asyncio -async def test_favicon(ds_client): - response = await ds_client.get("/favicon.ico") - assert response.status_code == 200 - assert response.headers["cache-control"] == "max-age=3600, immutable, public" - assert int(response.headers["content-length"]) > 100 - assert response.headers["content-type"] == "image/png" - - -@pytest.mark.asyncio -async def test_static(ds_client): - response = await ds_client.get("/-/static/app2.css") - assert response.status_code == 404 - response = await ds_client.get("/-/static/app.css") - assert response.status_code == 200 +def test_static(app_client): + response = app_client.get("/-/static/app2.css") + assert response.status == 404 + response = app_client.get("/-/static/app.css") + assert response.status == 200 assert "text/css" == response.headers["content-type"] - assert "etag" in response.headers - etag = response.headers.get("etag") - response = await ds_client.get("/-/static/app.css", headers={"if-none-match": etag}) - assert response.status_code == 304 def test_static_mounts(): - with make_app_client( + for client in make_app_client( static_mounts=[("custom-static", str(pathlib.Path(__file__).parent))] - ) as client: + ): response = client.get("/custom-static/test_html.py") - assert response.status_code == 200 + assert response.status == 200 response = client.get("/custom-static/not_exists.py") - assert response.status_code == 404 + assert response.status == 404 response = client.get("/custom-static/../LICENSE") - assert response.status_code == 404 + assert response.status == 404 def test_memory_database_page(): - with make_app_client(memory=True) as client: - response = client.get("/_memory") - assert response.status_code == 200 + for client in make_app_client(memory=True): + response = client.get("/:memory:") + assert response.status == 200 -def test_not_allowed_methods(): - with make_app_client(memory=True) as client: - for method in ("post", "put", "patch", "delete"): - response = client.request(path="/_memory", method=method.upper()) - assert response.status_code == 405 +def test_database_page_redirects_with_url_hash(app_client_with_hash): + response = app_client_with_hash.get("/fixtures", allow_redirects=False) + assert response.status == 302 + response = app_client_with_hash.get("/fixtures") + assert "fixtures" in response.text -@pytest.mark.asyncio -async def test_database_page(ds_client): - response = await ds_client.get("/fixtures") - soup = Soup(response.text, "html.parser") - # Should have a ', - ] - for expected_html_fragment in expected_html_fragments: - assert expected_html_fragment in response.text + expected_html_fragment = """ + sql_time_limit_ms + """.strip() + assert expected_html_fragment in response.text + + +def test_row_redirects_with_url_hash(app_client_with_hash): + response = app_client_with_hash.get( + "/fixtures/simple_primary_key/1", allow_redirects=False + ) + assert response.status == 302 + assert response.headers["Location"].endswith("/1") + response = app_client_with_hash.get("/fixtures/simple_primary_key/1") + assert response.status == 200 + + +def test_row_strange_table_name_with_url_hash(app_client_with_hash): + response = app_client_with_hash.get( + "/fixtures/table%2Fwith%2Fslashes.csv/3", allow_redirects=False + ) + assert response.status == 302 + assert response.headers["Location"].endswith("/table%2Fwith%2Fslashes.csv/3") + response = app_client_with_hash.get("/fixtures/table%2Fwith%2Fslashes.csv/3") + assert response.status == 200 + + +def test_table_cell_truncation(): + for client in make_app_client(config={"truncate_cells_html": 5}): + response = client.get("/fixtures/facetable") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + assert table["class"] == ["rows-and-columns"] + assert [ + "Missi…", + "Dogpa…", + "SOMA", + "Tende…", + "Berna…", + "Hayes…", + "Holly…", + "Downt…", + "Los F…", + "Korea…", + "Downt…", + "Greek…", + "Corkt…", + "Mexic…", + "Arcad…", + ] == [td.string for td in table.findAll("td", {"class": "col-neighborhood"})] def test_row_page_does_not_truncate(): - with make_app_client(settings={"truncate_cells_html": 5}) as client: + for client in make_app_client(config={"truncate_cells_html": 5}): response = client.get("/fixtures/facetable/1") - assert response.status_code == 200 - table = Soup(response.content, "html.parser").find("table") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") assert table["class"] == ["rows-and-columns"] assert ["Mission"] == [ - td.string - for td in table.find_all("td", {"class": "col-neighborhood-b352a7"}) + td.string for td in table.findAll("td", {"class": "col-neighborhood"}) ] -def test_query_page_truncates(): - with make_app_client(settings={"truncate_cells_html": 5}) as client: - response = client.get( - "/fixtures/-/query?" - + urllib.parse.urlencode( - { - "sql": "select 'this is longer than 5' as a, 'https://example.com/' as b" - } - ) +def test_add_filter_redirects(app_client): + filter_args = urllib.parse.urlencode( + {"_filter_column": "content", "_filter_op": "startswith", "_filter_value": "x"} + ) + path_base = "/fixtures/simple_primary_key" + path = path_base + "?" + filter_args + response = app_client.get(path, allow_redirects=False) + assert response.status == 302 + assert response.headers["Location"].endswith("?content__startswith=x") + + # Adding a redirect to an existing querystring: + path = path_base + "?foo=bar&" + filter_args + response = app_client.get(path, allow_redirects=False) + assert response.status == 302 + assert response.headers["Location"].endswith("?foo=bar&content__startswith=x") + + # Test that op with a __x suffix overrides the filter value + path = ( + path_base + + "?" + + urllib.parse.urlencode( + { + "_filter_column": "content", + "_filter_op": "isnull__5", + "_filter_value": "x", + } ) - assert response.status_code == 200 - table = Soup(response.content, "html.parser").find("table") - tds = table.find_all("td") - assert [str(td) for td in tds] == [ - '

  • ', - '', - ] + ) + response = app_client.get(path, allow_redirects=False) + assert response.status == 302 + assert response.headers["Location"].endswith("?content__isnull=5") + + +def test_existing_filter_redirects(app_client): + filter_args = { + "_filter_column_1": "name", + "_filter_op_1": "contains", + "_filter_value_1": "hello", + "_filter_column_2": "age", + "_filter_op_2": "gte", + "_filter_value_2": "22", + "_filter_column_3": "age", + "_filter_op_3": "lt", + "_filter_value_3": "30", + "_filter_column_4": "name", + "_filter_op_4": "contains", + "_filter_value_4": "world", + } + path_base = "/fixtures/simple_primary_key" + path = path_base + "?" + urllib.parse.urlencode(filter_args) + response = app_client.get(path, allow_redirects=False) + assert response.status == 302 + assert_querystring_equal( + "name__contains=hello&age__gte=22&age__lt=30&name__contains=world", + response.headers["Location"].split("?")[1], + ) + + # Setting _filter_column_3 to empty string should remove *_3 entirely + filter_args["_filter_column_3"] = "" + path = path_base + "?" + urllib.parse.urlencode(filter_args) + response = app_client.get(path, allow_redirects=False) + assert response.status == 302 + assert_querystring_equal( + "name__contains=hello&age__gte=22&name__contains=world", + response.headers["Location"].split("?")[1], + ) + + # ?_filter_op=exact should be removed if unaccompanied by _fiter_column + response = app_client.get(path_base + "?_filter_op=exact", allow_redirects=False) + assert response.status == 302 + assert "?" not in response.headers["Location"] + + +def test_empty_search_parameter_gets_removed(app_client): + path_base = "/fixtures/simple_primary_key" + path = ( + path_base + + "?" + + urllib.parse.urlencode( + { + "_search": "", + "_filter_column": "name", + "_filter_op": "exact", + "_filter_value": "chidi", + } + ) + ) + response = app_client.get(path, allow_redirects=False) + assert response.status == 302 + assert response.headers["Location"].endswith("?name__exact=chidi") + + +def test_searchable_view_persists_fts_table(app_client): + # The search form should persist ?_fts_table as a hidden field + response = app_client.get( + "/fixtures/searchable_view?_fts_table=searchable_fts&_fts_pk=pk" + ) + inputs = Soup(response.body, "html.parser").find("form").findAll("input") + hiddens = [i for i in inputs if i["type"] == "hidden"] + assert [("_fts_table", "searchable_fts"), ("_fts_pk", "pk")] == [ + (hidden["name"], hidden["value"]) for hidden in hiddens + ] + + +def test_sort_by_desc_redirects(app_client): + path_base = "/fixtures/sortable" + path = ( + path_base + + "?" + + urllib.parse.urlencode({"_sort": "sortable", "_sort_by_desc": "1"}) + ) + response = app_client.get(path, allow_redirects=False) + assert response.status == 302 + assert response.headers["Location"].endswith("?_sort_desc=sortable") + + +def test_sort_links(app_client): + response = app_client.get("/fixtures/sortable?_sort=sortable") + assert response.status == 200 + ths = Soup(response.body, "html.parser").findAll("th") + attrs_and_link_attrs = [ + { + "attrs": th.attrs, + "a_href": (th.find("a")["href"].split("/")[-1] if th.find("a") else None), + } + for th in ths + ] + assert [ + {"attrs": {"class": ["col-Link"], "scope": "col"}, "a_href": None}, + {"attrs": {"class": ["col-pk1"], "scope": "col"}, "a_href": None}, + {"attrs": {"class": ["col-pk2"], "scope": "col"}, "a_href": None}, + {"attrs": {"class": ["col-content"], "scope": "col"}, "a_href": None}, + { + "attrs": {"class": ["col-sortable"], "scope": "col"}, + "a_href": "sortable?_sort_desc=sortable", + }, + { + "attrs": {"class": ["col-sortable_with_nulls"], "scope": "col"}, + "a_href": "sortable?_sort=sortable_with_nulls", + }, + { + "attrs": {"class": ["col-sortable_with_nulls_2"], "scope": "col"}, + "a_href": "sortable?_sort=sortable_with_nulls_2", + }, + { + "attrs": {"class": ["col-text"], "scope": "col"}, + "a_href": "sortable?_sort=text", + }, + ] == attrs_and_link_attrs + + +def test_facet_display(app_client): + response = app_client.get( + "/fixtures/facetable?_facet=planet_int&_facet=city_id&_facet=on_earth" + ) + assert response.status == 200 + soup = Soup(response.body, "html.parser") + divs = soup.find("div", {"class": "facet-results"}).findAll("div") + actual = [] + for div in divs: + actual.append( + { + "name": div.find("strong").text, + "items": [ + { + "name": a.text, + "qs": a["href"].split("?")[-1], + "count": int(str(a.parent).split("")[1].split("<")[0]), + } + for a in div.find("ul").findAll("a") + ], + } + ) + assert [ + { + "name": "city_id", + "items": [ + { + "name": "San Francisco", + "qs": "_facet=planet_int&_facet=city_id&_facet=on_earth&city_id=1", + "count": 6, + }, + { + "name": "Los Angeles", + "qs": "_facet=planet_int&_facet=city_id&_facet=on_earth&city_id=2", + "count": 4, + }, + { + "name": "Detroit", + "qs": "_facet=planet_int&_facet=city_id&_facet=on_earth&city_id=3", + "count": 4, + }, + { + "name": "Memnonia", + "qs": "_facet=planet_int&_facet=city_id&_facet=on_earth&city_id=4", + "count": 1, + }, + ], + }, + { + "name": "planet_int", + "items": [ + { + "name": "1", + "qs": "_facet=planet_int&_facet=city_id&_facet=on_earth&planet_int=1", + "count": 14, + }, + { + "name": "2", + "qs": "_facet=planet_int&_facet=city_id&_facet=on_earth&planet_int=2", + "count": 1, + }, + ], + }, + { + "name": "on_earth", + "items": [ + { + "name": "1", + "qs": "_facet=planet_int&_facet=city_id&_facet=on_earth&on_earth=1", + "count": 14, + }, + { + "name": "0", + "qs": "_facet=planet_int&_facet=city_id&_facet=on_earth&on_earth=0", + "count": 1, + }, + ], + }, + ] == actual + + +def test_facets_persist_through_filter_form(app_client): + response = app_client.get("/fixtures/facetable?_facet=planet_int&_facet=city_id") + assert response.status == 200 + inputs = Soup(response.body, "html.parser").find("form").findAll("input") + hiddens = [i for i in inputs if i["type"] == "hidden"] + assert [("_facet", "city_id"), ("_facet", "planet_int")] == [ + (hidden["name"], hidden["value"]) for hidden in hiddens + ] -@pytest.mark.asyncio @pytest.mark.parametrize( "path,expected_classes", [ ("/", ["index"]), ("/fixtures", ["db", "db-fixtures"]), - ("/fixtures/-/query?sql=select+1", ["query", "db-fixtures"]), ( "/fixtures/simple_primary_key", ["table", "db-fixtures", "table-simple_primary_key"], ), ( - "/fixtures/neighborhood_search", - ["query", "db-fixtures", "query-neighborhood_search"], - ), - ( - "/fixtures/table~2Fwith~2Fslashes~2Ecsv", + "/fixtures/table%2Fwith%2Fslashes.csv", ["table", "db-fixtures", "table-tablewithslashescsv-fa7563"], ), ( @@ -273,108 +417,153 @@ def test_query_page_truncates(): ), ], ) -async def test_css_classes_on_body(ds_client, path, expected_classes): - response = await ds_client.get(path) - assert response.status_code == 200 +def test_css_classes_on_body(app_client, path, expected_classes): + response = app_client.get(path) + assert response.status == 200 classes = re.search(r'', response.text).group(1).split() assert classes == expected_classes -templates_considered_re = re.compile(r"") - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_considered", - [ - ("/", "*index.html"), - ("/fixtures", "database-fixtures.html, *database.html"), - ( - "/fixtures/simple_primary_key", - "table-fixtures-simple_primary_key.html, *table.html", - ), - ( - "/fixtures/table~2Fwith~2Fslashes~2Ecsv", - "table-fixtures-tablewithslashescsv-fa7563.html, *table.html", - ), - ( - "/fixtures/simple_primary_key/1", - "row-fixtures-simple_primary_key.html, *row.html", - ), - ], -) -async def test_templates_considered(ds_client, path, expected_considered): - response = await ds_client.get(path) - assert response.status_code == 200 - match = templates_considered_re.search(response.text) - assert match, "No templates considered comment found" - actual_considered = match.group(1) - assert actual_considered == expected_considered - - -@pytest.mark.asyncio -async def test_row_json_export_link(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key/1") - assert response.status_code == 200 - assert 'json' in response.text - - -@pytest.mark.asyncio -async def test_query_json_csv_export_links(ds_client): - response = await ds_client.get("/fixtures/-/query?sql=select+1") - assert response.status_code == 200 - assert 'json' in response.text - assert ( - 'CSV' - in response.text - ) - - -@pytest.mark.asyncio -async def test_query_parameter_form_fields(ds_client): - response = await ds_client.get("/fixtures/-/query?sql=select+:name") - assert response.status_code == 200 - assert ( - ' ' - in response.text - ) - response2 = await ds_client.get("/fixtures/-/query?sql=select+:name&name=hello") - assert response2.status_code == 200 - assert ( - ' ' - in response2.text - ) - - -@pytest.mark.asyncio -async def test_row_html_simple_primary_key(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key/1") - assert response.status_code == 200 - table = Soup(response.content, "html.parser").find("table") - assert ["id", "content"] == [th.string.strip() for th in table.select("thead th")] +def test_table_html_simple_primary_key(app_client): + response = app_client.get("/fixtures/simple_primary_key?_size=3") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + assert table["class"] == ["rows-and-columns"] + ths = table.findAll("th") + assert "id" == ths[0].find("a").string.strip() + for expected_col, th in zip(("content",), ths[1:]): + a = th.find("a") + assert expected_col == a.string + assert a["href"].endswith( + "/simple_primary_key?_size=3&_sort={}".format(expected_col) + ) + assert ["nofollow"] == a["rel"] assert [ [ - '', - '', - ] + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], ] == [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] -@pytest.mark.asyncio -async def test_row_html_no_primary_key(ds_client): - response = await ds_client.get("/fixtures/no_primary_key/1") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") +def test_table_csv_json_export_interface(app_client): + response = app_client.get("/fixtures/simple_primary_key?id__gt=2") + assert response.status == 200 + # The links at the top of the page + links = ( + Soup(response.body, "html.parser") + .find("p", {"class": "export-links"}) + .findAll("a") + ) + actual = [l["href"].split("/")[-1] for l in links] + expected = [ + "simple_primary_key.json?id__gt=2", + "simple_primary_key.csv?id__gt=2&_size=max", + "#export", + ] + assert expected == actual + # And the advaced export box at the bottom: + div = Soup(response.body, "html.parser").find("div", {"class": "advanced-export"}) + json_links = [a["href"].split("/")[-1] for a in div.find("p").findAll("a")] + assert [ + "simple_primary_key.json?id__gt=2", + "simple_primary_key.json?id__gt=2&_shape=array", + "simple_primary_key.json?id__gt=2&_shape=array&_nl=on", + "simple_primary_key.json?id__gt=2&_shape=object", + ] == json_links + # And the CSV form + form = div.find("form") + assert form["action"].endswith("/simple_primary_key.csv") + inputs = [str(input) for input in form.findAll("input")] + assert [ + '', + '', + '', + '', + ] == inputs + + +def test_csv_json_export_links_include_labels_if_foreign_keys(app_client): + response = app_client.get("/fixtures/facetable") + assert response.status == 200 + links = ( + Soup(response.body, "html.parser") + .find("p", {"class": "export-links"}) + .findAll("a") + ) + actual = [l["href"].split("/")[-1] for l in links] + expected = [ + "facetable.json?_labels=on", + "facetable.csv?_labels=on&_size=max", + "#export", + ] + assert expected == actual + + +def test_row_html_simple_primary_key(app_client): + response = app_client.get("/fixtures/simple_primary_key/1") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + assert ["id", "content"] == [th.string.strip() for th in table.select("thead th")] + assert [['', '']] == [ + [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") + ] + + +def test_table_not_exists(app_client): + assert "Table not found: blah" in app_client.get("/fixtures/blah").body.decode( + "utf8" + ) + + +def test_table_html_no_primary_key(app_client): + response = app_client.get("/fixtures/no_primary_key") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + # We have disabled sorting for this table using metadata.json + assert ["content", "a", "b", "c"] == [ + th.string.strip() for th in table.select("thead th")[2:] + ] + expected = [ + [ + ''.format( + i, i + ), + ''.format(i), + ''.format(i), + ''.format(i), + ''.format(i), + ''.format(i), + ] + for i in range(1, 51) + ] + assert expected == [ + [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") + ] + + +def test_row_html_no_primary_key(app_client): + response = app_client.get("/fixtures/no_primary_key/1") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") assert ["rowid", "content", "a", "b", "c"] == [ th.string.strip() for th in table.select("thead th") ] expected = [ [ - '', - '', - '', - '', - '', + '', + '', + '', + '', + '', ] ] assert expected == [ @@ -382,93 +571,163 @@ async def test_row_html_no_primary_key(ds_client): ] -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_text,expected_link", - ( - ( - "/fixtures/facet_cities/1", - "6 rows from _city_id in facetable", - "/fixtures/facetable?_city_id__exact=1", - ), - ( - "/fixtures/attraction_characteristic/2", - "3 rows from characteristic_id in roadside_attraction_characteristics", - "/fixtures/roadside_attraction_characteristics?characteristic_id=2", - ), - ), -) -async def test_row_links_from_other_tables( - ds_client, path, expected_text, expected_link -): - response = await ds_client.get(path) - assert response.status_code == 200 - soup = Soup(response.text, "html.parser") - h2 = soup.find("h2") - assert h2.text == "Links from other tables" - li = h2.find_next("ul").find("li") - text = re.sub(r"\s+", " ", li.text.strip()) - assert text == expected_text - link = li.find("a")["href"] - assert link == expected_link - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected", - ( - ( - "/fixtures/compound_primary_key/a,b", - [ - [ - '', - '', - '', - ] - ], - ), - ( - "/fixtures/compound_primary_key/a~2Fb,~2Ec~2Dd", - [ - [ - '', - '', - '', - ] - ], - ), - ), -) -async def test_row_html_compound_primary_key(ds_client, path, expected): - response = await ds_client.get(path) - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - assert ["pk1", "pk2", "content"] == [ - th.string.strip() for th in table.select("thead th") +def test_table_html_compound_primary_key(app_client): + response = app_client.get("/fixtures/compound_primary_key") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + ths = table.findAll("th") + assert "Link" == ths[0].string.strip() + for expected_col, th in zip(("pk1", "pk2", "content"), ths[1:]): + a = th.find("a") + assert expected_col == a.string + assert th["class"] == ["col-{}".format(expected_col)] + assert a["href"].endswith("/compound_primary_key?_sort={}".format(expected_col)) + expected = [ + [ + '', + '', + '', + '', + ] ] assert expected == [ [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") ] -@pytest.mark.asyncio -async def test_index_metadata(ds_client): - response = await ds_client.get("/") - assert response.status_code == 200 - soup = Soup(response.text, "html.parser") +def test_table_html_foreign_key_links(app_client): + response = app_client.get("/fixtures/foreign_key_references") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + expected = [ + [ + '', + '', + '', + ] + ] + assert expected == [ + [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") + ] + + +def test_table_html_disable_foreign_key_links_with_labels(app_client): + response = app_client.get("/fixtures/foreign_key_references?_labels=off") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + expected = [ + [ + '', + '', + '', + ] + ] + assert expected == [ + [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") + ] + + +def test_table_html_foreign_key_custom_label_column(app_client): + response = app_client.get("/fixtures/custom_foreign_key_label") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + expected = [ + [ + '', + '', + ] + ] + assert expected == [ + [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") + ] + + +def test_row_html_compound_primary_key(app_client): + response = app_client.get("/fixtures/compound_primary_key/a,b") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + assert ["pk1", "pk2", "content"] == [ + th.string.strip() for th in table.select("thead th") + ] + expected = [ + [ + '', + '', + '', + ] + ] + assert expected == [ + [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") + ] + + +def test_compound_primary_key_with_foreign_key_references(app_client): + # e.g. a many-to-many table with a compound primary key on the two columns + response = app_client.get("/fixtures/searchable_tags") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + expected = [ + [ + '', + '', + '', + ], + [ + '', + '', + '', + ], + ] + assert expected == [ + [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") + ] + + +def test_view_html(app_client): + response = app_client.get("/fixtures/simple_view?_size=3") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + ths = table.select("thead th") + assert 2 == len(ths) + assert ths[0].find("a") is not None + assert ths[0].find("a")["href"].endswith("/simple_view?_size=3&_sort=content") + assert ths[0].find("a").string.strip() == "content" + assert ths[1].find("a") is None + assert ths[1].string.strip() == "upper_content" + expected = [ + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + ] + assert expected == [ + [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") + ] + + +def test_index_metadata(app_client): + response = app_client.get("/") + assert response.status == 200 + soup = Soup(response.body, "html.parser") assert "Datasette Fixtures" == soup.find("h1").text - assert ( - 'An example SQLite database demonstrating Datasette. Sign in as root user' - == inner_html(soup.find("div", {"class": "metadata-description"})) + assert "An example SQLite database demonstrating Datasette" == inner_html( + soup.find("div", {"class": "metadata-description"}) ) assert_footer_links(soup) -@pytest.mark.asyncio -async def test_database_metadata(ds_client): - response = await ds_client.get("/fixtures") - assert response.status_code == 200 - soup = Soup(response.text, "html.parser") +def test_database_metadata(app_client): + response = app_client.get("/fixtures") + assert response.status == 200 + soup = Soup(response.body, "html.parser") # Page title should be the default assert "fixtures" == soup.find("h1").text # Description should be custom @@ -476,136 +735,123 @@ async def test_database_metadata(ds_client): soup.find("div", {"class": "metadata-description"}) ) # The source/license should be inherited - # assert_footer_links(soup) TODO(alex) ensure + assert_footer_links(soup) -@pytest.mark.asyncio -async def test_database_metadata_with_custom_sql(ds_client): - response = await ds_client.get( - "/fixtures/-/query?sql=select+*+from+simple_primary_key" +def test_table_metadata(app_client): + response = app_client.get("/fixtures/simple_primary_key") + assert response.status == 200 + soup = Soup(response.body, "html.parser") + # Page title should be custom and should be HTML escaped + assert "This <em>HTML</em> is escaped" == inner_html(soup.find("h1")) + # Description should be custom and NOT escaped (we used description_html) + assert "Simple primary key" == inner_html( + soup.find("div", {"class": "metadata-description"}) ) - assert response.status_code == 200 - soup = Soup(response.text, "html.parser") - # Page title should be the default - assert "fixtures" == soup.find("h1").text - # Description should be custom - assert "Custom SQL query returning" in soup.find("h3").text # The source/license should be inherited - # assert_footer_links(soup)TODO(alex) ensure + assert_footer_links(soup) -def test_database_download_for_immutable(): - with make_app_client(is_immutable=True) as client: +def test_database_download_allowed_for_immutable(): + for client in make_app_client(is_immutable=True): assert not client.ds.databases["fixtures"].is_mutable # Regular page should have a download link response = client.get("/fixtures") - soup = Soup(response.content, "html.parser") - assert len(soup.find_all("a", {"href": re.compile(r"\.db$")})) + soup = Soup(response.body, "html.parser") + assert len(soup.findAll("a", {"href": re.compile(r"\.db$")})) # Check we can actually download it - download_response = client.get("/fixtures.db") - assert download_response.status_code == 200 - # Check the content-length header exists - assert "content-length" in download_response.headers - content_length = download_response.headers["content-length"] - assert content_length.isdigit() - assert int(content_length) > 100 - assert "content-disposition" in download_response.headers - assert ( - download_response.headers["content-disposition"] - == 'attachment; filename="fixtures.db"' - ) - assert download_response.headers["transfer-encoding"] == "chunked" - # ETag header should be present and match db.hash - assert "etag" in download_response.headers - etag = download_response.headers["etag"] - assert etag == '"{}"'.format(client.ds.databases["fixtures"].hash) - # Try a second download with If-None-Match: current-etag - download_response2 = client.get("/fixtures.db", if_none_match=etag) - assert download_response2.body == b"" - assert download_response2.status == 304 + assert 200 == client.get("/fixtures.db").status def test_database_download_disallowed_for_mutable(app_client): - # Use app_client because we need a file database, not in-memory response = app_client.get("/fixtures") - soup = Soup(response.content, "html.parser") - assert len(soup.find_all("a", {"href": re.compile(r"\.db$")})) == 0 - assert app_client.get("/fixtures.db").status_code == 403 + soup = Soup(response.body, "html.parser") + assert 0 == len(soup.findAll("a", {"href": re.compile(r"\.db$")})) + assert 403 == app_client.get("/fixtures.db").status def test_database_download_disallowed_for_memory(): - with make_app_client(memory=True) as client: + for client in make_app_client(memory=True): # Memory page should NOT have a download link - response = client.get("/_memory") - soup = Soup(response.content, "html.parser") - assert 0 == len(soup.find_all("a", {"href": re.compile(r"\.db$")})) - assert 404 == client.get("/_memory.db").status + response = client.get("/:memory:") + soup = Soup(response.body, "html.parser") + assert 0 == len(soup.findAll("a", {"href": re.compile(r"\.db$")})) + assert 404 == client.get("/:memory:.db").status def test_allow_download_off(): - with make_app_client( - is_immutable=True, settings={"allow_download": False} - ) as client: + for client in make_app_client(is_immutable=True, config={"allow_download": False}): response = client.get("/fixtures") - soup = Soup(response.content, "html.parser") - assert not len(soup.find_all("a", {"href": re.compile(r"\.db$")})) + soup = Soup(response.body, "html.parser") + assert not len(soup.findAll("a", {"href": re.compile(r"\.db$")})) # Accessing URL directly should 403 response = client.get("/fixtures.db") assert 403 == response.status +def test_allow_sql_on(app_client): + response = app_client.get("/fixtures") + soup = Soup(response.body, "html.parser") + assert len(soup.findAll("textarea", {"name": "sql"})) + response = app_client.get("/fixtures/sortable") + assert b"View and edit SQL" in response.body + + def test_allow_sql_off(): - with make_app_client(config={"allow_sql": {}}) as client: + for client in make_app_client(config={"allow_sql": False}): response = client.get("/fixtures") - soup = Soup(response.content, "html.parser") - assert not len(soup.find_all("textarea", {"name": "sql"})) + soup = Soup(response.body, "html.parser") + assert not len(soup.findAll("textarea", {"name": "sql"})) # The table page should no longer show "View and edit SQL" response = client.get("/fixtures/sortable") - assert b"View and edit SQL" not in response.content + assert b"View and edit SQL" not in response.body -@pytest.mark.asyncio -@pytest.mark.parametrize("path", ["/404", "/fixtures/404"]) -async def test_404(ds_client, path): - response = await ds_client.get(path) - assert response.status_code == 404 +def assert_querystring_equal(expected, actual): + assert sorted(expected.split("&")) == sorted(actual.split("&")) + + +def assert_footer_links(soup): + footer_links = soup.find("div", {"class": "ft"}).findAll("a") + assert 4 == len(footer_links) + datasette_link, license_link, source_link, about_link = footer_links + assert "Datasette" == datasette_link.text.strip() + assert "tests/fixtures.py" == source_link.text.strip() + assert "Apache License 2.0" == license_link.text.strip() + assert "About Datasette" == about_link.text.strip() + assert "https://github.com/simonw/datasette" == datasette_link["href"] assert ( - f'", 1)[1].rsplit("<", 1)[0] + return inner_html.strip() -@pytest.mark.asyncio @pytest.mark.parametrize( "path,expected_redirect", [("/fixtures/", "/fixtures"), ("/fixtures/simple_view/", "/fixtures/simple_view")], ) -async def test_404_trailing_slash_redirect(ds_client, path, expected_redirect): - response = await ds_client.get(path) - assert response.status_code == 302 - assert response.headers["Location"] == expected_redirect +def test_404_trailing_slash_redirect(app_client, path, expected_redirect): + response = app_client.get(path, allow_redirects=False) + assert 302 == response.status + assert expected_redirect == response.headers["Location"] -@pytest.mark.asyncio -async def test_404_content_type(ds_client): - response = await ds_client.get("/404") - assert response.status_code == 404 - assert "text/html; charset=utf-8" == response.headers["content-type"] - - -@pytest.mark.asyncio -async def test_canned_query_default_title(ds_client): - response = await ds_client.get("/fixtures/magic_parameters") - assert response.status_code == 200 - soup = Soup(response.content, "html.parser") - assert "fixtures: magic_parameters" == soup.find("h1").text - - -@pytest.mark.asyncio -async def test_canned_query_with_custom_metadata(ds_client): - response = await ds_client.get("/fixtures/neighborhood_search?text=town") - assert response.status_code == 200 - soup = Soup(response.content, "html.parser") +def test_canned_query_with_custom_metadata(app_client): + response = app_client.get("/fixtures/neighborhood_search?text=town") + assert response.status == 200 + soup = Soup(response.body, "html.parser") assert "Search neighborhoods" == soup.find("h1").text assert ( """ @@ -619,14 +865,38 @@ async def test_canned_query_with_custom_metadata(ds_client): ) -@pytest.mark.asyncio -async def test_urlify_custom_queries(ds_client): - path = "/fixtures/-/query?" + urllib.parse.urlencode( +@pytest.mark.parametrize( + "path,has_object,has_stream,has_expand", + [ + ("/fixtures/no_primary_key", False, True, False), + ("/fixtures/complex_foreign_keys", True, False, True), + ], +) +def test_advanced_export_box(app_client, path, has_object, has_stream, has_expand): + response = app_client.get(path) + assert response.status == 200 + soup = Soup(response.body, "html.parser") + # JSON shape options + expected_json_shapes = ["default", "array", "newline-delimited"] + if has_object: + expected_json_shapes.append("object") + div = soup.find("div", {"class": "advanced-export"}) + assert expected_json_shapes == [a.text for a in div.find("p").findAll("a")] + # "stream all rows" option + if has_stream: + assert "stream all rows" in str(div) + # "expand labels" option + if has_expand: + assert "expand labels" in str(div) + + +def test_urlify_custom_queries(app_client): + path = "/fixtures?" + urllib.parse.urlencode( {"sql": "select ('https://twitter.com/' || 'simonw') as user_url;"} ) - response = await ds_client.get(path) - assert response.status_code == 200 - soup = Soup(response.content, "html.parser") + response = app_client.get(path) + assert response.status == 200 + soup = Soup(response.body, "html.parser") assert ( """' - ], - [ - '' - ], - [''], + '', + '', + '', + ] ] assert expected_tds == [ [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") ] -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_filename", - [ - ("/fixtures/binary_data/1.blob?_blob_column=data", "binary_data-1-data.blob"), - ( - "/fixtures/-/query.blob?sql=select+*+from+binary_data&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d", - "data-f30889.blob", - ), - ], -) -async def test_blob_download(ds_client, path, expected_filename): - response = await ds_client.get(path) - assert response.status_code == 200 - assert response.content == b"\x15\x1c\x02\xc7\xad\x05\xfe" - assert response.headers["x-content-type-options"] == "nosniff" - assert ( - response.headers["content-disposition"] - == f'attachment; filename="{expected_filename}"' - ) - assert response.headers["content-type"] == "application/binary" +def test_metadata_json_html(app_client): + response = app_client.get("/-/metadata") + assert response.status == 200 + pre = Soup(response.body, "html.parser").find("pre") + assert METADATA == json.loads(pre.text) -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_message", - [ - ("/fixtures/binary_data/1.blob", "?_blob_column= is required"), - ("/fixtures/binary_data/1.blob?_blob_column=foo", "foo is not a valid column"), - ( - "/fixtures/binary_data/1.blob?_blob_column=data&_blob_hash=x", - "Link has expired - the requested binary content has changed or could not be found.", - ), - ], -) -async def test_blob_download_invalid_messages(ds_client, path, expected_message): - response = await ds_client.get(path) - assert response.status_code == 400 - assert expected_message in response.text - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path", - [ - "/fixtures/-/query?sql=select+*+from+[123_starts_with_digits]", - "/fixtures/123_starts_with_digits", - ], -) -async def test_zero_results(ds_client, path): - response = await ds_client.get(path) - soup = Soup(response.text, "html.parser") - assert 0 == len(soup.select("table")) - assert 1 == len(soup.select("p.zero-results")) - - -@pytest.mark.asyncio -async def test_query_error(ds_client): - response = await ds_client.get("/fixtures/-/query?sql=select+*+from+notatable") - html = response.text - assert '

    no such table: notatable

    ' in html - assert '" in html - assert "0 results" not in html - - -def test_config_template_debug_on(): - with make_app_client(settings={"template_debug": True}) as client: - response = client.get("/fixtures/facetable?_context=1") - assert response.status_code == 200 - assert response.text.startswith("
    {")
    -
    -
    -@pytest.mark.asyncio
    -async def test_config_template_debug_off(ds_client):
    -    response = await ds_client.get("/fixtures/facetable?_context=1")
    -    assert response.status_code == 200
    -    assert not response.text.startswith("
    {")
    -
    -
    -def test_debug_context_includes_extra_template_vars():
    -    # https://github.com/simonw/datasette/issues/693
    -    with make_app_client(settings={"template_debug": True}) as client:
    -        response = client.get("/fixtures/facetable?_context=1")
    -        # scope_path is added by PLUGIN1
    -        assert "scope_path" in response.text
    -
    -
    -@pytest.mark.parametrize(
    -    "path",
    -    [
    -        "/",
    -        "/fixtures",
    -        "/fixtures/compound_three_primary_keys",
    -        "/fixtures/compound_three_primary_keys/a,a,a",
    -        "/fixtures/paginated_view",
    -        "/fixtures/facetable",
    -        "/fixtures/facetable?_facet=state",
    -        "/fixtures/-/query?sql=select+1",
    -    ],
    -)
    -@pytest.mark.parametrize("use_prefix", (True, False))
    -def test_base_url_config(app_client_base_url_prefix, path, use_prefix):
    -    client = app_client_base_url_prefix
    -    path_to_get = path
    -    if use_prefix:
    -        path_to_get = "/prefix/" + path.lstrip("/")
    -    response = client.get(path_to_get)
    -    soup = Soup(response.content, "html.parser")
    -    for form in soup.select("form"):
    -        assert form["action"].startswith("/prefix")
    -    for el in soup.find_all(["a", "link", "script"]):
    -        if "href" in el.attrs:
    -            href = el["href"]
    -        elif "src" in el.attrs:
    -            href = el["src"]
    -        else:
    -            continue  # Could be a 
    -        if (
    -            not href.startswith("#")
    -            and href
    -            not in {
    -                "https://datasette.io/",
    -                "https://github.com/simonw/datasette",
    -                "https://github.com/simonw/datasette/blob/main/LICENSE",
    -                "https://github.com/simonw/datasette/blob/main/tests/fixtures.py",
    -                "/login-as-root",  # Only used for the latest.datasette.io demo
    -            }
    -            and not href.startswith("https://plugin-example.datasette.io/")
    -        ):
    -            # If this has been made absolute it may start http://localhost/
    -            if href.startswith("http://localhost/"):
    -                href = href[len("http://localost/") :]
    -            assert href.startswith("/prefix/"), json.dumps(
    -                {
    -                    "path": path,
    -                    "path_to_get": path_to_get,
    -                    "href_or_src": href,
    -                    "element_parent": str(el.parent),
    -                },
    -                indent=4,
    -                default=repr,
    -            )
    -
    -
    -def test_base_url_affects_filter_redirects(app_client_base_url_prefix):
    -    path = "/fixtures/binary_data?_filter_column=rowid&_filter_op=exact&_filter_value=1&_sort=rowid"
    -    response = app_client_base_url_prefix.get(path)
    -    assert response.status_code == 302
    -    assert (
    -        response.headers["location"]
    -        == "/prefix/fixtures/binary_data?_sort=rowid&rowid__exact=1"
    -    )
    -
    -
    -def test_base_url_affects_metadata_extra_css_urls(app_client_base_url_prefix):
    -    html = app_client_base_url_prefix.get("/").text
    -    assert '' in html
    -
    -
    -@pytest.mark.asyncio
    -@pytest.mark.parametrize(
    -    "path,expected",
    -    [
    -        (
    -            "/fixtures/neighborhood_search",
    -            "/fixtures/-/query?sql=%0Aselect+_neighborhood%2C+facet_cities.name%2C+state%0Afrom+facetable%0A++++join+facet_cities%0A++++++++on+facetable._city_id+%3D+facet_cities.id%0Awhere+_neighborhood+like+%27%25%27+%7C%7C+%3Atext+%7C%7C+%27%25%27%0Aorder+by+_neighborhood%3B%0A&text=",
    -        ),
    -        (
    -            "/fixtures/neighborhood_search?text=ber",
    -            "/fixtures/-/query?sql=%0Aselect+_neighborhood%2C+facet_cities.name%2C+state%0Afrom+facetable%0A++++join+facet_cities%0A++++++++on+facetable._city_id+%3D+facet_cities.id%0Awhere+_neighborhood+like+%27%25%27+%7C%7C+%3Atext+%7C%7C+%27%25%27%0Aorder+by+_neighborhood%3B%0A&text=ber",
    -        ),
    -        ("/fixtures/pragma_cache_size", None),
    -        (
    -            # /fixtures/𝐜𝐢𝐭𝐢𝐞𝐬
    -            "/fixtures/~F0~9D~90~9C~F0~9D~90~A2~F0~9D~90~AD~F0~9D~90~A2~F0~9D~90~9E~F0~9D~90~AC",
    -            "/fixtures/-/query?sql=select+id%2C+name+from+facet_cities+order+by+id+limit+1%3B",
    -        ),
    -        ("/fixtures/magic_parameters", None),
    -    ],
    -)
    -async def test_edit_sql_link_on_canned_queries(ds_client, path, expected):
    -    response = await ds_client.get(path)
    -    assert response.status_code == 200
    -    expected_link = f'Edit SQL'
    -    if expected:
    -        assert expected_link in response.text
    -    else:
    -        assert "Edit SQL" not in response.text
    -
    -
    -@pytest.mark.parametrize(
    -    "has_permission",
    -    [
    -        pytest.param(
    -            True,
    -        ),
    -        False,
    -    ],
    -)
    -def test_edit_sql_link_not_shown_if_user_lacks_permission(has_permission):
    -    with make_app_client(
    -        config={
    -            "allow_sql": None if has_permission else {"id": "not-you"},
    -            "databases": {"fixtures": {"queries": {"simple": "select 1 + 1"}}},
    -        }
    -    ) as client:
    -        response = client.get("/fixtures/simple")
    -        if has_permission:
    -            assert "Edit SQL" in response.text
    -        else:
    -            assert "Edit SQL" not in response.text
    -
    -
    -@pytest.mark.asyncio
    -@pytest.mark.parametrize(
    -    "actor_id,should_have_links,should_not_have_links",
    -    [
    -        (None, None, None),
    -        ("test", None, ["/-/permissions"]),
    -        ("root", ["/-/permissions", "/-/allow-debug"], None),
    -    ],
    -)
    -async def test_navigation_menu_links(
    -    ds_client, actor_id, should_have_links, should_not_have_links
    -):
    -    # Enable root user if testing with root actor
    -    if actor_id == "root":
    -        ds_client.ds.root_enabled = True
    -    cookies = {}
    -    if actor_id:
    -        cookies = {"ds_actor": ds_client.actor_cookie({"id": actor_id})}
    -    html = (await ds_client.get("/", cookies=cookies)).text
    -    soup = Soup(html, "html.parser")
    -    details = soup.find("nav").find("details")
    -    if not actor_id:
    -        # Should not show a menu
    -        assert details is None
    -        return
    -    # They are logged in: should show a menu
    -    assert details is not None
    -    # And a logout form
    -    assert details.find("form") is not None
    -    if should_have_links:
    -        for link in should_have_links:
    -            assert (
    -                details.find("a", {"href": link}) is not None
    -            ), f"{link} expected but missing from nav menu"
    -
    -    if should_not_have_links:
    -        for link in should_not_have_links:
    -            assert (
    -                details.find("a", {"href": link}) is None
    -            ), f"{link} found but should not have been in nav menu"
    -
    -
    -@pytest.mark.asyncio
    -async def test_trace_correctly_escaped(ds_client):
    -    response = await ds_client.get("/fixtures/-/query?sql=select+'

    Hello'&_trace=1") - assert "select '

    Hello" not in response.text - assert "select '<h1>Hello" in response.text - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected", - ( - # Instance index page - ("/", "http://localhost/.json"), - # Table page - ("/fixtures/facetable", "http://localhost/fixtures/facetable.json"), - ( - "/fixtures/table~2Fwith~2Fslashes~2Ecsv", - "http://localhost/fixtures/table~2Fwith~2Fslashes~2Ecsv.json", - ), - # Row page - ( - "/fixtures/no_primary_key/1", - "http://localhost/fixtures/no_primary_key/1.json", - ), - # Database index page - ( - "/fixtures", - "http://localhost/fixtures.json", - ), - # Custom query page - ( - "/fixtures/-/query?sql=select+*+from+facetable", - "http://localhost/fixtures/-/query.json?sql=select+*+from+facetable", - ), - # Canned query page - ( - "/fixtures/neighborhood_search?text=town", - "http://localhost/fixtures/neighborhood_search.json?text=town", - ), - # /-/ pages - ( - "/-/plugins", - "http://localhost/-/plugins.json", - ), - ), -) -async def test_alternate_url_json(ds_client, path, expected): - response = await ds_client.get(path) - assert response.status_code == 200 - link = response.headers["link"] - assert link == '<{}>; rel="alternate"; type="application/json+datasette"'.format( - expected - ) - assert ( - ''.format( - expected - ) - in response.text - ) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path", - ("/-/patterns", "/-/messages", "/-/allow-debug", "/fixtures.db"), -) -async def test_no_alternate_url_json(ds_client, path): - response = await ds_client.get(path) - assert "link" not in response.headers - assert ( - 'Name" in response.text - assert "view-instance" in response.text - assert "view-database" in response.text - finally: - ds_client.ds.root_enabled = original_root_enabled - - -@pytest.mark.asyncio -async def test_actions_page_does_not_display_none_string(ds_client): - """Ensure the Resource column doesn't display the string 'None' for null values.""" - # https://github.com/simonw/datasette/issues/2599 - original_root_enabled = ds_client.ds.root_enabled - try: - ds_client.ds.root_enabled = True - cookies = {"ds_actor": ds_client.actor_cookie({"id": "root"})} - response = await ds_client.get("/-/actions", cookies=cookies) - assert response.status_code == 200 - assert "None" not in response.text - finally: - ds_client.ds.root_enabled = original_root_enabled - - -@pytest.mark.asyncio -async def test_permission_debug_tabs_with_query_string(ds_client): - """Test that navigation tabs persist query strings across Check, Allowed, and Rules pages""" - original_root_enabled = ds_client.ds.root_enabled - try: - ds_client.ds.root_enabled = True - cookies = {"ds_actor": ds_client.actor_cookie({"id": "root"})} - - # Test /-/allowed with query string - response = await ds_client.get( - "/-/allowed?action=view-table&page_size=50", cookies=cookies - ) - assert response.status_code == 200 - # Check that Rules and Check tabs have the query string - assert 'href="/-/rules?action=view-table&page_size=50"' in response.text - assert 'href="/-/check?action=view-table&page_size=50"' in response.text - # Playground and Actions should not have query string - assert 'href="/-/permissions"' in response.text - assert 'href="/-/actions"' in response.text - - # Test /-/rules with query string - response = await ds_client.get( - "/-/rules?action=view-database&parent=test", cookies=cookies - ) - assert response.status_code == 200 - # Check that Allowed and Check tabs have the query string - assert 'href="/-/allowed?action=view-database&parent=test"' in response.text - assert 'href="/-/check?action=view-database&parent=test"' in response.text - - # Test /-/check with query string - response = await ds_client.get("/-/check?action=execute-sql", cookies=cookies) - assert response.status_code == 200 - # Check that Allowed and Rules tabs have the query string - assert 'href="/-/allowed?action=execute-sql"' in response.text - assert 'href="/-/rules?action=execute-sql"' in response.text - finally: - ds_client.ds.root_enabled = original_root_enabled + response = client.get("/fixtures/complex_foreign_keys") + assert response.status == 200 + assert ( + '
    ' + '1 - 2 - hello 1' + "
    " + ) == str(Soup(response.text, "html.parser").select_one("div.custom-table-row")) diff --git a/tests/test_internal_db.py b/tests/test_internal_db.py deleted file mode 100644 index 7a0d1630..00000000 --- a/tests/test_internal_db.py +++ /dev/null @@ -1,141 +0,0 @@ -import pytest -import sqlite_utils - - -# ensure refresh_schemas() gets called before interacting with internal_db -async def ensure_internal(ds_client): - await ds_client.get("/fixtures.json?sql=select+1") - return ds_client.ds.get_internal_database() - - -@pytest.mark.asyncio -async def test_internal_databases(ds_client): - internal_db = await ensure_internal(ds_client) - databases = await internal_db.execute("select * from catalog_databases") - assert len(databases) == 1 - assert databases.rows[0]["database_name"] == "fixtures" - - -@pytest.mark.asyncio -async def test_internal_tables(ds_client): - internal_db = await ensure_internal(ds_client) - tables = await internal_db.execute("select * from catalog_tables") - assert len(tables) > 5 - table = tables.rows[0] - assert set(table.keys()) == {"rootpage", "table_name", "database_name", "sql"} - - -@pytest.mark.asyncio -async def test_internal_views(ds_client): - internal_db = await ensure_internal(ds_client) - views = await internal_db.execute("select * from catalog_views") - assert len(views) >= 4 - view = views.rows[0] - assert set(view.keys()) == {"rootpage", "view_name", "database_name", "sql"} - - -@pytest.mark.asyncio -async def test_internal_indexes(ds_client): - internal_db = await ensure_internal(ds_client) - indexes = await internal_db.execute("select * from catalog_indexes") - assert len(indexes) > 5 - index = indexes.rows[0] - assert set(index.keys()) == { - "partial", - "name", - "table_name", - "unique", - "seq", - "database_name", - "origin", - } - - -@pytest.mark.asyncio -async def test_internal_foreign_keys(ds_client): - internal_db = await ensure_internal(ds_client) - foreign_keys = await internal_db.execute("select * from catalog_foreign_keys") - assert len(foreign_keys) > 5 - foreign_key = foreign_keys.rows[0] - assert set(foreign_key.keys()) == { - "table", - "seq", - "on_update", - "on_delete", - "to", - "id", - "match", - "database_name", - "table_name", - "from", - } - - -@pytest.mark.asyncio -async def test_internal_foreign_key_references(ds_client): - internal_db = await ensure_internal(ds_client) - - def inner(conn): - db = sqlite_utils.Database(conn) - table_names = db.table_names() - for table in db.tables: - for fk in table.foreign_keys: - other_table = fk.other_table - other_column = fk.other_column - message = 'Column "{}.{}" references other column "{}.{}" which does not exist'.format( - table.name, fk.column, other_table, other_column - ) - assert other_table in table_names, message + " (bad table)" - assert other_column in db[other_table].columns_dict, ( - message + " (bad column)" - ) - - await internal_db.execute_fn(inner) - - -@pytest.mark.asyncio -async def test_stale_catalog_entry_database_fix(tmp_path): - """ - Test for https://github.com/simonw/datasette/issues/2605 - - When the internal database persists across restarts and has entries in - catalog_databases for databases that no longer exist, accessing the - index page should not cause a 500 error (KeyError). - """ - from datasette.app import Datasette - - internal_db_path = str(tmp_path / "internal.db") - data_db_path = str(tmp_path / "data.db") - - # Create a data database file - import sqlite3 - - conn = sqlite3.connect(data_db_path) - conn.execute("CREATE TABLE test_table (id INTEGER PRIMARY KEY)") - conn.close() - - # First Datasette instance: with the data database and persistent internal db - ds1 = Datasette(files=[data_db_path], internal=internal_db_path) - await ds1.invoke_startup() - - # Access the index page to populate the internal catalog - response = await ds1.client.get("/") - assert "data" in ds1.databases - assert response.status_code == 200 - - # Second Datasette instance: reusing internal.db but WITHOUT the data database - # This simulates restarting Datasette after removing a database - ds2 = Datasette(internal=internal_db_path) - await ds2.invoke_startup() - - # The database is not in ds2.databases - assert "data" not in ds2.databases - - # Accessing the index page should NOT cause a 500 error - # This is the bug: it currently raises KeyError when trying to - # access ds.databases["data"] for the stale catalog entry - response = await ds2.client.get("/") - assert response.status_code == 200, ( - f"Index page should return 200, not {response.status_code}. " - "This fails due to stale catalog entries causing KeyError." - ) diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py deleted file mode 100644 index 4a078f75..00000000 --- a/tests/test_internals_database.py +++ /dev/null @@ -1,772 +0,0 @@ -""" -Tests for the datasette.database.Database class -""" - -from datasette.app import Datasette -from datasette.database import Database, Results, MultipleValues -from datasette.utils.sqlite import sqlite3, sqlite_version -from datasette.utils import Column -import pytest -import time -import uuid - - -@pytest.fixture -def db(app_client): - return app_client.ds.get_database("fixtures") - - -@pytest.mark.asyncio -async def test_execute(db): - results = await db.execute("select * from facetable") - assert isinstance(results, Results) - assert 15 == len(results) - - -@pytest.mark.asyncio -async def test_results_first(db): - assert None is (await db.execute("select * from facetable where pk > 100")).first() - results = await db.execute("select * from facetable") - row = results.first() - assert isinstance(row, sqlite3.Row) - - -@pytest.mark.asyncio -@pytest.mark.parametrize("expected", (True, False)) -async def test_results_bool(db, expected): - where = "" if expected else "where pk = 0" - results = await db.execute("select * from facetable {}".format(where)) - assert bool(results) is expected - - -@pytest.mark.asyncio -async def test_results_dicts(db): - results = await db.execute("select pk, name from roadside_attractions") - assert results.dicts() == [ - {"pk": 1, "name": "The Mystery Spot"}, - {"pk": 2, "name": "Winchester Mystery House"}, - {"pk": 3, "name": "Burlingame Museum of PEZ Memorabilia"}, - {"pk": 4, "name": "Bigfoot Discovery Museum"}, - ] - - -@pytest.mark.parametrize( - "query,expected", - [ - ("select 1", 1), - ("select 1, 2", None), - ("select 1 as num union select 2 as num", None), - ], -) -@pytest.mark.asyncio -async def test_results_single_value(db, query, expected): - results = await db.execute(query) - if expected: - assert expected == results.single_value() - else: - with pytest.raises(MultipleValues): - results.single_value() - - -@pytest.mark.asyncio -async def test_execute_fn(db): - def get_1_plus_1(conn): - return conn.execute("select 1 + 1").fetchall()[0][0] - - assert 2 == await db.execute_fn(get_1_plus_1) - - -@pytest.mark.asyncio -async def test_execute_fn_transaction_false(): - datasette = Datasette(memory=True) - db = datasette.add_memory_database("test_execute_fn_transaction_false") - - def run(conn): - try: - with conn: - conn.execute("create table foo (id integer primary key)") - conn.execute("insert into foo (id) values (44)") - # Table should exist - assert ( - conn.execute( - 'select count(*) from sqlite_master where name = "foo"' - ).fetchone()[0] - == 1 - ) - assert conn.execute("select id from foo").fetchall()[0][0] == 44 - raise ValueError("Cancel commit") - except ValueError: - pass - # Row should NOT exist - assert conn.execute("select count(*) from foo").fetchone()[0] == 0 - - await db.execute_write_fn(run, transaction=False) - - -@pytest.mark.parametrize( - "tables,exists", - ( - (["facetable", "searchable", "tags", "searchable_tags"], True), - (["foo", "bar", "baz"], False), - ), -) -@pytest.mark.asyncio -async def test_table_exists(db, tables, exists): - for table in tables: - actual = await db.table_exists(table) - assert exists == actual - - -@pytest.mark.parametrize( - "view,expected", - ( - ("not_a_view", False), - ("paginated_view", True), - ), -) -@pytest.mark.asyncio -async def test_view_exists(db, view, expected): - actual = await db.view_exists(view) - assert actual == expected - - -@pytest.mark.parametrize( - "table,expected", - ( - ( - "facetable", - [ - "pk", - "created", - "planet_int", - "on_earth", - "state", - "_city_id", - "_neighborhood", - "tags", - "complex_array", - "distinct_some_null", - "n", - ], - ), - ( - "sortable", - [ - "pk1", - "pk2", - "content", - "sortable", - "sortable_with_nulls", - "sortable_with_nulls_2", - "text", - ], - ), - ), -) -@pytest.mark.asyncio -async def test_table_columns(db, table, expected): - columns = await db.table_columns(table) - assert columns == expected - - -@pytest.mark.parametrize( - "table,expected", - ( - ( - "facetable", - [ - Column( - cid=0, - name="pk", - type="integer", - notnull=0, - default_value=None, - is_pk=1, - hidden=0, - ), - Column( - cid=1, - name="created", - type="text", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=2, - name="planet_int", - type="integer", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=3, - name="on_earth", - type="integer", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=4, - name="state", - type="text", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=5, - name="_city_id", - type="integer", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=6, - name="_neighborhood", - type="text", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=7, - name="tags", - type="text", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=8, - name="complex_array", - type="text", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=9, - name="distinct_some_null", - type="", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=10, - name="n", - type="text", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - ], - ), - ( - "sortable", - [ - Column( - cid=0, - name="pk1", - type="varchar(30)", - notnull=0, - default_value=None, - is_pk=1, - hidden=0, - ), - Column( - cid=1, - name="pk2", - type="varchar(30)", - notnull=0, - default_value=None, - is_pk=2, - hidden=0, - ), - Column( - cid=2, - name="content", - type="text", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=3, - name="sortable", - type="integer", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=4, - name="sortable_with_nulls", - type="real", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=5, - name="sortable_with_nulls_2", - type="real", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - Column( - cid=6, - name="text", - type="text", - notnull=0, - default_value=None, - is_pk=0, - hidden=0, - ), - ], - ), - ), -) -@pytest.mark.asyncio -async def test_table_column_details(db, table, expected): - columns = await db.table_column_details(table) - # Convert "type" to lowercase before comparison - # https://github.com/simonw/datasette/issues/1647 - compare_columns = [ - Column( - c.cid, c.name, c.type.lower(), c.notnull, c.default_value, c.is_pk, c.hidden - ) - for c in columns - ] - assert compare_columns == expected - - -@pytest.mark.asyncio -async def test_get_all_foreign_keys(db): - all_foreign_keys = await db.get_all_foreign_keys() - assert all_foreign_keys["roadside_attraction_characteristics"] == { - "incoming": [], - "outgoing": [ - { - "other_table": "attraction_characteristic", - "column": "characteristic_id", - "other_column": "pk", - }, - { - "other_table": "roadside_attractions", - "column": "attraction_id", - "other_column": "pk", - }, - ], - } - assert all_foreign_keys["attraction_characteristic"] == { - "incoming": [ - { - "other_table": "roadside_attraction_characteristics", - "column": "pk", - "other_column": "characteristic_id", - } - ], - "outgoing": [], - } - assert all_foreign_keys["compound_primary_key"] == { - # No incoming because these are compound foreign keys, which we currently ignore - "incoming": [], - "outgoing": [], - } - assert all_foreign_keys["foreign_key_references"] == { - "incoming": [], - "outgoing": [ - { - "other_table": "primary_key_multiple_columns", - "column": "foreign_key_with_no_label", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "foreign_key_with_blank_label", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "foreign_key_with_label", - "other_column": "id", - }, - ], - } - - -@pytest.mark.asyncio -async def test_table_names(db): - table_names = await db.table_names() - assert table_names == [ - "simple_primary_key", - "primary_key_multiple_columns", - "primary_key_multiple_columns_explicit_label", - "compound_primary_key", - "compound_three_primary_keys", - "foreign_key_references", - "sortable", - "no_primary_key", - "123_starts_with_digits", - "Table With Space In Name", - "table/with/slashes.csv", - "complex_foreign_keys", - "custom_foreign_key_label", - "tags", - "searchable", - "searchable_tags", - "searchable_fts", - "searchable_fts_segments", - "searchable_fts_segdir", - "searchable_fts_docsize", - "searchable_fts_stat", - "select", - "infinity", - "facet_cities", - "facetable", - "binary_data", - "roadside_attractions", - "attraction_characteristic", - "roadside_attraction_characteristics", - ] - - -@pytest.mark.asyncio -async def test_view_names(db): - view_names = await db.view_names() - assert view_names == [ - "paginated_view", - "simple_view", - "searchable_view", - "searchable_view_configured_by_metadata", - ] - - -@pytest.mark.asyncio -async def test_execute_write_block_true(db): - await db.execute_write( - "update roadside_attractions set name = ? where pk = ?", ["Mystery!", 1] - ) - rows = await db.execute("select name from roadside_attractions where pk = 1") - assert "Mystery!" == rows.rows[0][0] - - -@pytest.mark.asyncio -async def test_execute_write_block_false(db): - await db.execute_write( - "update roadside_attractions set name = ? where pk = ?", - ["Mystery!", 1], - ) - time.sleep(0.1) - rows = await db.execute("select name from roadside_attractions where pk = 1") - assert "Mystery!" == rows.rows[0][0] - - -@pytest.mark.asyncio -async def test_execute_write_script(db): - await db.execute_write_script( - "create table foo (id integer primary key); create table bar (id integer primary key);" - ) - table_names = await db.table_names() - assert {"foo", "bar"}.issubset(table_names) - - -@pytest.mark.asyncio -async def test_execute_write_many(db): - await db.execute_write_script("create table foomany (id integer primary key)") - await db.execute_write_many( - "insert into foomany (id) values (?)", [(1,), (10,), (100,)] - ) - result = await db.execute("select * from foomany") - assert [r[0] for r in result.rows] == [1, 10, 100] - - -@pytest.mark.asyncio -async def test_execute_write_has_correctly_prepared_connection(db): - # The sleep() function is only available if ds._prepare_connection() was called - await db.execute_write("select sleep(0.01)") - - -@pytest.mark.asyncio -async def test_execute_write_fn_block_false(db): - def write_fn(conn): - conn.execute("delete from roadside_attractions where pk = 1;") - row = conn.execute("select count(*) from roadside_attractions").fetchone() - return row[0] - - task_id = await db.execute_write_fn(write_fn, block=False) - assert isinstance(task_id, uuid.UUID) - - -@pytest.mark.asyncio -async def test_execute_write_fn_block_true(db): - def write_fn(conn): - conn.execute("delete from roadside_attractions where pk = 1;") - row = conn.execute("select count(*) from roadside_attractions").fetchone() - return row[0] - - new_count = await db.execute_write_fn(write_fn) - assert 3 == new_count - - -@pytest.mark.asyncio -async def test_execute_write_fn_exception(db): - def write_fn(conn): - assert False - - with pytest.raises(AssertionError): - await db.execute_write_fn(write_fn) - - -@pytest.mark.asyncio -@pytest.mark.timeout(1) -async def test_execute_write_fn_connection_exception(tmpdir, app_client): - path = str(tmpdir / "immutable.db") - sqlite3.connect(path).execute("vacuum") - db = Database(app_client.ds, path=path, is_mutable=False) - app_client.ds.add_database(db, name="immutable-db") - - def write_fn(conn): - assert False - - with pytest.raises(AssertionError): - await db.execute_write_fn(write_fn) - - app_client.ds.remove_database("immutable-db") - - -def table_exists(conn, name): - return bool( - conn.execute( - """ - with all_tables as ( - select name from sqlite_master where type = 'table' - union all - select name from temp.sqlite_master where type = 'table' - ) - select 1 from all_tables where name = ? - """, - (name,), - ).fetchall(), - ) - - -def table_exists_checker(name): - def inner(conn): - return table_exists(conn, name) - - return inner - - -@pytest.mark.asyncio -@pytest.mark.parametrize("disable_threads", (False, True)) -async def test_execute_isolated(db, disable_threads): - if disable_threads: - ds = Datasette(memory=True, settings={"num_sql_threads": 0}) - db = ds.add_database(Database(ds, memory_name="test_num_sql_threads_zero")) - - # Create temporary table in write - await db.execute_write( - "create temporary table created_by_write (id integer primary key)" - ) - # Should stay visible to write connection - assert await db.execute_write_fn(table_exists_checker("created_by_write")) - - def create_shared_table(conn): - conn.execute("create table shared (id integer primary key)") - # And a temporary table that should not continue to exist - conn.execute( - "create temporary table created_by_isolated (id integer primary key)" - ) - assert table_exists(conn, "created_by_isolated") - # Also confirm that created_by_write does not exist - return table_exists(conn, "created_by_write") - - # shared should not exist - assert not await db.execute_fn(table_exists_checker("shared")) - - # Create it using isolated - created_by_write_exists = await db.execute_isolated_fn(create_shared_table) - assert not created_by_write_exists - - # shared SHOULD exist now - assert await db.execute_fn(table_exists_checker("shared")) - - # created_by_isolated should not exist, even in write connection - assert not await db.execute_write_fn(table_exists_checker("created_by_isolated")) - - # ... and a second call to isolated should not see that connection either - assert not await db.execute_isolated_fn(table_exists_checker("created_by_isolated")) - - -@pytest.mark.asyncio -async def test_mtime_ns(db): - assert isinstance(db.mtime_ns, int) - - -def test_mtime_ns_is_none_for_memory(app_client): - memory_db = Database(app_client.ds, is_memory=True) - assert memory_db.is_memory is True - assert None is memory_db.mtime_ns - - -def test_is_mutable(app_client): - assert Database(app_client.ds, is_memory=True).is_mutable is True - assert Database(app_client.ds, is_memory=True, is_mutable=True).is_mutable is True - assert Database(app_client.ds, is_memory=True, is_mutable=False).is_mutable is False - - -@pytest.mark.asyncio -async def test_attached_databases(app_client_two_attached_databases_crossdb_enabled): - database = app_client_two_attached_databases_crossdb_enabled.ds.get_database( - "_memory" - ) - attached = await database.attached_databases() - assert {a.name for a in attached} == {"extra database", "fixtures"} - - -@pytest.mark.asyncio -async def test_database_memory_name(app_client): - ds = app_client.ds - foo1 = ds.add_database(Database(ds, memory_name="foo")) - foo2 = ds.add_memory_database("foo") - bar1 = ds.add_database(Database(ds, memory_name="bar")) - bar2 = ds.add_memory_database("bar") - for db in (foo1, foo2, bar1, bar2): - table_names = await db.table_names() - assert table_names == [] - # Now create a table in foo - await foo1.execute_write("create table foo (t text)") - assert await foo1.table_names() == ["foo"] - assert await foo2.table_names() == ["foo"] - assert await bar1.table_names() == [] - assert await bar2.table_names() == [] - - -@pytest.mark.asyncio -async def test_in_memory_databases_forbid_writes(app_client): - ds = app_client.ds - db = ds.add_database(Database(ds, memory_name="test")) - with pytest.raises(sqlite3.OperationalError): - await db.execute("create table foo (t text)") - assert await db.table_names() == [] - # Using db.execute_write() should work: - await db.execute_write("create table foo (t text)") - assert await db.table_names() == ["foo"] - - -def pragma_table_list_supported(): - return sqlite_version()[1] >= 37 - - -@pytest.mark.asyncio -@pytest.mark.skipif( - not pragma_table_list_supported(), reason="Requires PRAGMA table_list support" -) -async def test_hidden_tables(app_client): - ds = app_client.ds - db = ds.add_database(Database(ds, is_memory=True, is_mutable=True)) - assert await db.hidden_table_names() == [] - await db.execute("create virtual table f using fts5(a)") - assert await db.hidden_table_names() == [ - "f_config", - "f_content", - "f_data", - "f_docsize", - "f_idx", - ] - - await db.execute("create virtual table r using rtree(id, amin, amax)") - assert await db.hidden_table_names() == [ - "f_config", - "f_content", - "f_data", - "f_docsize", - "f_idx", - "r_node", - "r_parent", - "r_rowid", - ] - - await db.execute("create table _hideme(_)") - assert await db.hidden_table_names() == [ - "_hideme", - "f_config", - "f_content", - "f_data", - "f_docsize", - "f_idx", - "r_node", - "r_parent", - "r_rowid", - ] - - # A fts virtual table with a content table should be hidden too - await db.execute("create virtual table f2_fts using fts5(a, content='f')") - assert await db.hidden_table_names() == [ - "_hideme", - "f2_fts_config", - "f2_fts_data", - "f2_fts_docsize", - "f2_fts_idx", - "f_config", - "f_content", - "f_data", - "f_docsize", - "f_idx", - "r_node", - "r_parent", - "r_rowid", - "f2_fts", - ] - - -@pytest.mark.asyncio -async def test_replace_database(tmpdir): - path1 = str(tmpdir / "data1.db") - (tmpdir / "two").mkdir() - path2 = str(tmpdir / "two" / "data1.db") - sqlite3.connect(path1).executescript( - """ - create table t (id integer primary key); - insert into t (id) values (1); - insert into t (id) values (2); - """ - ) - sqlite3.connect(path2).executescript( - """ - create table t (id integer primary key); - insert into t (id) values (1); - """ - ) - datasette = Datasette([path1]) - db = datasette.get_database("data1") - count = (await db.execute("select count(*) from t")).first()[0] - assert count == 2 - # Now replace that database - datasette.get_database("data1").close() - datasette.remove_database("data1") - datasette.add_database(Database(datasette, path2), "data1") - db2 = datasette.get_database("data1") - count = (await db2.execute("select count(*) from t")).first()[0] - assert count == 1 diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py deleted file mode 100644 index c64620a6..00000000 --- a/tests/test_internals_datasette.py +++ /dev/null @@ -1,208 +0,0 @@ -""" -Tests for the datasette.app.Datasette class -""" - -import dataclasses -from datasette import Context -from datasette.app import Datasette, Database, ResourcesSQL -from datasette.resources import DatabaseResource -from itsdangerous import BadSignature -import pytest - - -@pytest.fixture -def datasette(ds_client): - return ds_client.ds - - -def test_get_database(datasette): - db = datasette.get_database("fixtures") - assert "fixtures" == db.name - with pytest.raises(KeyError): - datasette.get_database("missing") - - -def test_get_database_no_argument(datasette): - # Returns the first available database: - db = datasette.get_database() - assert "fixtures" == db.name - - -@pytest.mark.parametrize("value", ["hello", 123, {"key": "value"}]) -@pytest.mark.parametrize("namespace", [None, "two"]) -def test_sign_unsign(datasette, value, namespace): - extra_args = [namespace] if namespace else [] - signed = datasette.sign(value, *extra_args) - assert value != signed - assert value == datasette.unsign(signed, *extra_args) - with pytest.raises(BadSignature): - datasette.unsign(signed[:-1] + ("!" if signed[-1] != "!" else ":")) - - -@pytest.mark.parametrize( - "setting,expected", - ( - ("base_url", "/"), - ("max_csv_mb", 100), - ("allow_csv_stream", True), - ), -) -def test_datasette_setting(datasette, setting, expected): - assert datasette.setting(setting) == expected - - -@pytest.mark.asyncio -async def test_datasette_constructor(): - ds = Datasette() - databases = (await ds.client.get("/-/databases.json")).json() - assert databases == [ - { - "name": "_memory", - "route": "_memory", - "path": None, - "size": 0, - "is_mutable": False, - "is_memory": True, - "hash": None, - } - ] - - -@pytest.mark.asyncio -async def test_num_sql_threads_zero(): - ds = Datasette([], memory=True, settings={"num_sql_threads": 0}) - db = ds.add_database(Database(ds, memory_name="test_num_sql_threads_zero")) - await db.execute_write("create table t(id integer primary key)") - await db.execute_write("insert into t (id) values (1)") - response = await ds.client.get("/-/threads.json") - assert response.json() == {"num_threads": 0, "threads": []} - response2 = await ds.client.get("/test_num_sql_threads_zero/t.json?_shape=array") - assert response2.json() == [{"id": 1}] - - -ROOT = {"id": "root"} -ALLOW_ROOT = {"allow": {"id": "root"}} - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "actor,config,action,resource,should_allow,expected_private", - ( - (None, ALLOW_ROOT, "view-instance", None, False, False), - (ROOT, ALLOW_ROOT, "view-instance", None, True, True), - ( - None, - {"databases": {"_memory": ALLOW_ROOT}}, - "view-database", - DatabaseResource(database="_memory"), - False, - False, - ), - ( - ROOT, - {"databases": {"_memory": ALLOW_ROOT}}, - "view-database", - DatabaseResource(database="_memory"), - True, - True, - ), - # Check private is false for non-protected instance check - ( - ROOT, - {"allow": True}, - "view-instance", - None, - True, - False, - ), - ), -) -async def test_datasette_check_visibility( - actor, config, action, resource, should_allow, expected_private -): - ds = Datasette([], memory=True, config=config) - await ds.invoke_startup() - visible, private = await ds.check_visibility( - actor, action=action, resource=resource - ) - assert visible == should_allow - assert private == expected_private - - -@pytest.mark.asyncio -async def test_datasette_render_template_no_request(): - # https://github.com/simonw/datasette/issues/1849 - ds = Datasette(memory=True) - await ds.invoke_startup() - rendered = await ds.render_template("error.html") - assert "Error " in rendered - - -@pytest.mark.asyncio -async def test_datasette_render_template_with_dataclass(): - @dataclasses.dataclass - class ExampleContext(Context): - title: str - status: int - error: str - - context = ExampleContext(title="Hello", status=200, error="Error message") - ds = Datasette(memory=True) - await ds.invoke_startup() - rendered = await ds.render_template("error.html", context) - assert "

    Hello

    " in rendered - assert "Error message" in rendered - - -def test_datasette_error_if_string_not_list(tmpdir): - # https://github.com/simonw/datasette/issues/1985 - db_path = str(tmpdir / "data.db") - with pytest.raises(ValueError): - ds = Datasette(db_path) - - -@pytest.mark.asyncio -async def test_get_action(ds_client): - ds = ds_client.ds - for name_or_abbr in ("vi", "view-instance", "vt", "view-table"): - action = ds.get_action(name_or_abbr) - if "-" in name_or_abbr: - assert action.name == name_or_abbr - else: - assert action.abbr == name_or_abbr - # And test None return for missing action - assert ds.get_action("missing-permission") is None - - -@pytest.mark.asyncio -async def test_apply_metadata_json(): - ds = Datasette( - metadata={ - "databases": { - "legislators": { - "tables": {"offices": {"summary": "office address or sumtin"}}, - "queries": { - "millennial_representatives": { - "summary": "Social media accounts for current legislators" - } - }, - } - }, - "weird_instance_value": {"nested": [1, 2, 3]}, - }, - ) - await ds.invoke_startup() - assert (await ds.client.get("/")).status_code == 200 - value = (await ds.get_instance_metadata()).get("weird_instance_value") - assert value == '{"nested": [1, 2, 3]}' - - -@pytest.mark.asyncio -async def test_allowed_resources_sql(datasette): - result = await datasette.allowed_resources_sql( - action="view-table", - actor=None, - ) - assert isinstance(result, ResourcesSQL) - assert "all_rules AS" in result.sql - assert result.params["action"] == "view-table" diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py deleted file mode 100644 index 326fcdc0..00000000 --- a/tests/test_internals_datasette_client.py +++ /dev/null @@ -1,313 +0,0 @@ -import httpx -import pytest -import pytest_asyncio -from datasette.app import Datasette - - -@pytest_asyncio.fixture -async def datasette(ds_client): - await ds_client.ds.invoke_startup() - return ds_client.ds - - -@pytest_asyncio.fixture -async def datasette_with_permissions(): - """A datasette instance with permission restrictions for testing""" - ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) - await ds.invoke_startup() - db = ds.add_memory_database("test_datasette_with_permissions", name="test_db") - await db.execute_write( - "create table if not exists test_table (id integer primary key, name text)" - ) - await db.execute_write( - "insert or ignore into test_table (id, name) values (1, 'Alice')" - ) - # Trigger catalog refresh - await ds.client.get("/") - return ds - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "method,path,expected_status", - [ - ("get", "/", 200), - ("options", "/", 200), - ("head", "/", 200), - ("put", "/", 405), - ("patch", "/", 405), - ("delete", "/", 405), - ], -) -async def test_client_methods(datasette, method, path, expected_status): - client_method = getattr(datasette.client, method) - response = await client_method(path) - assert isinstance(response, httpx.Response) - assert response.status_code == expected_status - # Try that again using datasette.client.request - response2 = await datasette.client.request(method, path) - assert response2.status_code == expected_status - - -@pytest.mark.asyncio -@pytest.mark.parametrize("prefix", [None, "/prefix/"]) -async def test_client_post(datasette, prefix): - original_base_url = datasette._settings["base_url"] - try: - if prefix is not None: - datasette._settings["base_url"] = prefix - response = await datasette.client.post( - "/-/messages", - data={ - "message": "A message", - }, - ) - assert isinstance(response, httpx.Response) - assert response.status_code == 302 - assert "ds_messages" in response.cookies - finally: - datasette._settings["base_url"] = original_base_url - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "prefix,expected_path", [(None, "/asgi-scope"), ("/prefix/", "/prefix/asgi-scope")] -) -async def test_client_path(datasette, prefix, expected_path): - original_base_url = datasette._settings["base_url"] - try: - if prefix is not None: - datasette._settings["base_url"] = prefix - response = await datasette.client.get("/asgi-scope") - path = response.json()["path"] - assert path == expected_path - finally: - datasette._settings["base_url"] = original_base_url - - -@pytest.mark.asyncio -async def test_skip_permission_checks_allows_forbidden_access( - datasette_with_permissions, -): - """Test that skip_permission_checks=True bypasses permission checks""" - ds = datasette_with_permissions - - # Without skip_permission_checks, anonymous user should get 403 for protected database - response = await ds.client.get("/test_db.json") - assert response.status_code == 403 - - # With skip_permission_checks=True, should get 200 - response = await ds.client.get("/test_db.json", skip_permission_checks=True) - assert response.status_code == 200 - data = response.json() - assert data["database"] == "test_db" - - -@pytest.mark.asyncio -async def test_skip_permission_checks_on_table(datasette_with_permissions): - """Test skip_permission_checks works for table access""" - ds = datasette_with_permissions - - # Without skip_permission_checks, should get 403 - response = await ds.client.get("/test_db/test_table.json") - assert response.status_code == 403 - - # With skip_permission_checks=True, should get table data - response = await ds.client.get( - "/test_db/test_table.json", skip_permission_checks=True - ) - assert response.status_code == 200 - data = response.json() - assert data["rows"] == [{"id": 1, "name": "Alice"}] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "method", ["get", "post", "put", "patch", "delete", "options", "head"] -) -async def test_skip_permission_checks_all_methods(datasette_with_permissions, method): - """Test that skip_permission_checks works with all HTTP methods""" - ds = datasette_with_permissions - - # All methods should work with skip_permission_checks=True - client_method = getattr(ds.client, method) - response = await client_method("/test_db.json", skip_permission_checks=True) - # We don't check status code since some methods might not be allowed, - # but we verify the request doesn't fail due to permissions - assert isinstance(response, httpx.Response) - - -@pytest.mark.asyncio -async def test_skip_permission_checks_request_method(datasette_with_permissions): - """Test that skip_permission_checks works with client.request()""" - ds = datasette_with_permissions - - # Without skip_permission_checks - response = await ds.client.request("GET", "/test_db.json") - assert response.status_code == 403 - - # With skip_permission_checks=True - response = await ds.client.request( - "GET", "/test_db.json", skip_permission_checks=True - ) - assert response.status_code == 200 - - -@pytest.mark.asyncio -async def test_skip_permission_checks_isolated_to_request(datasette_with_permissions): - """Test that skip_permission_checks doesn't affect other concurrent requests""" - ds = datasette_with_permissions - - # First request with skip_permission_checks=True should succeed - response1 = await ds.client.get("/test_db.json", skip_permission_checks=True) - assert response1.status_code == 200 - - # Subsequent request without it should still get 403 - response2 = await ds.client.get("/test_db.json") - assert response2.status_code == 403 - - # And another with skip should succeed again - response3 = await ds.client.get("/test_db.json", skip_permission_checks=True) - assert response3.status_code == 200 - - -@pytest.mark.asyncio -async def test_skip_permission_checks_with_admin_actor(datasette_with_permissions): - """Test that skip_permission_checks works even when actor is provided""" - ds = datasette_with_permissions - - # Admin actor should normally have access - admin_cookies = {"ds_actor": ds.client.actor_cookie({"id": "admin"})} - response = await ds.client.get("/test_db.json", cookies=admin_cookies) - assert response.status_code == 200 - - # Non-admin actor should get 403 - user_cookies = {"ds_actor": ds.client.actor_cookie({"id": "user"})} - response = await ds.client.get("/test_db.json", cookies=user_cookies) - assert response.status_code == 403 - - # Non-admin actor with skip_permission_checks=True should get 200 - response = await ds.client.get( - "/test_db.json", cookies=user_cookies, skip_permission_checks=True - ) - assert response.status_code == 200 - - -@pytest.mark.asyncio -async def test_skip_permission_checks_shows_denied_tables(): - """Test that skip_permission_checks=True shows tables from denied databases in /-/tables.json""" - ds = Datasette( - config={ - "databases": { - "fixtures": {"allow": False} # Deny all access to this database - } - } - ) - await ds.invoke_startup() - db = ds.add_memory_database("fixtures") - await db.execute_write( - "CREATE TABLE test_table (id INTEGER PRIMARY KEY, name TEXT)" - ) - await db.execute_write("INSERT INTO test_table (id, name) VALUES (1, 'Alice')") - await ds._refresh_schemas() - - # Without skip_permission_checks, tables from denied database should not appear in /-/tables.json - response = await ds.client.get("/-/tables.json") - assert response.status_code == 200 - data = response.json() - table_names = [match["name"] for match in data["matches"]] - # Should not see any fixtures tables since access is denied - fixtures_tables = [name for name in table_names if name.startswith("fixtures:")] - assert len(fixtures_tables) == 0 - - # With skip_permission_checks=True, tables from denied database SHOULD appear - response = await ds.client.get("/-/tables.json", skip_permission_checks=True) - assert response.status_code == 200 - data = response.json() - table_names = [match["name"] for match in data["matches"]] - # Should see fixtures tables when permission checks are skipped - assert "fixtures: test_table" in table_names - - -@pytest.mark.asyncio -async def test_in_client_returns_false_outside_request(datasette): - """Test that datasette.in_client() returns False outside of a client request""" - assert datasette.in_client() is False - - -@pytest.mark.asyncio -async def test_in_client_returns_true_inside_request(): - """Test that datasette.in_client() returns True inside a client request""" - from datasette import hookimpl, Response - - class TestPlugin: - __name__ = "test_in_client_plugin" - - @hookimpl - def register_routes(self): - async def test_view(datasette): - # Assert in_client() returns True within the view - assert datasette.in_client() is True - return Response.json({"in_client": datasette.in_client()}) - - return [ - (r"^/-/test-in-client$", test_view), - ] - - ds = Datasette() - await ds.invoke_startup() - ds.pm.register(TestPlugin(), name="test_in_client_plugin") - try: - - # Outside of a client request, should be False - assert ds.in_client() is False - - # Make a request via datasette.client - response = await ds.client.get("/-/test-in-client") - assert response.status_code == 200 - assert response.json()["in_client"] is True - - # After the request, should be False again - assert ds.in_client() is False - finally: - ds.pm.unregister(name="test_in_client_plugin") - - -@pytest.mark.asyncio -async def test_in_client_with_skip_permission_checks(): - """Test that in_client() works regardless of skip_permission_checks value""" - from datasette import hookimpl - from datasette.utils.asgi import Response - - in_client_values = [] - - class TestPlugin: - __name__ = "test_in_client_skip_plugin" - - @hookimpl - def register_routes(self): - async def test_view(datasette): - in_client_values.append(datasette.in_client()) - return Response.json({"in_client": datasette.in_client()}) - - return [ - (r"^/-/test-in-client$", test_view), - ] - - ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) - await ds.invoke_startup() - ds.pm.register(TestPlugin(), name="test_in_client_skip_plugin") - try: - - # Request without skip_permission_checks - await ds.client.get("/-/test-in-client") - # Request with skip_permission_checks=True - await ds.client.get("/-/test-in-client", skip_permission_checks=True) - - # Both should have detected in_client as True - assert ( - len(in_client_values) == 2 - ), f"Expected 2 values, got {len(in_client_values)}" - assert all(in_client_values), f"Expected all True, got {in_client_values}" - finally: - ds.pm.unregister(name="test_in_client_skip_plugin") diff --git a/tests/test_internals_request.py b/tests/test_internals_request.py deleted file mode 100644 index d1ca1f46..00000000 --- a/tests/test_internals_request.py +++ /dev/null @@ -1,148 +0,0 @@ -from datasette.utils.asgi import Request -import json -import pytest - - -@pytest.mark.asyncio -async def test_request_post_vars(): - scope = { - "http_version": "1.1", - "method": "POST", - "path": "/", - "raw_path": b"/", - "query_string": b"", - "scheme": "http", - "type": "http", - "headers": [[b"content-type", b"application/x-www-form-urlencoded"]], - } - - async def receive(): - return { - "type": "http.request", - "body": b"foo=bar&baz=1&empty=", - "more_body": False, - } - - request = Request(scope, receive) - assert {"foo": "bar", "baz": "1", "empty": ""} == await request.post_vars() - - -@pytest.mark.asyncio -async def test_request_post_body(): - scope = { - "http_version": "1.1", - "method": "POST", - "path": "/", - "raw_path": b"/", - "query_string": b"", - "scheme": "http", - "type": "http", - "headers": [[b"content-type", b"application/json"]], - } - - data = {"hello": "world"} - - async def receive(): - return { - "type": "http.request", - "body": json.dumps(data, indent=4).encode("utf-8"), - "more_body": False, - } - - request = Request(scope, receive) - body = await request.post_body() - assert isinstance(body, bytes) - assert data == json.loads(body) - - -def test_request_args(): - request = Request.fake("/foo?multi=1&multi=2&single=3") - assert "1" == request.args.get("multi") - assert "3" == request.args.get("single") - assert "1" == request.args["multi"] - assert "3" == request.args["single"] - assert ["1", "2"] == request.args.getlist("multi") - assert [] == request.args.getlist("missing") - assert "multi" in request.args - assert "single" in request.args - assert "missing" not in request.args - expected = ["multi", "single"] - assert expected == list(request.args.keys()) - for i, key in enumerate(request.args): - assert expected[i] == key - assert 2 == len(request.args) - with pytest.raises(KeyError): - request.args["missing"] - - -def test_request_fake_url_vars(): - request = Request.fake("/") - assert request.url_vars == {} - request = Request.fake("/", url_vars={"database": "fixtures"}) - assert request.url_vars == {"database": "fixtures"} - - -def test_request_repr(): - request = Request.fake("/foo?multi=1&multi=2&single=3") - assert ( - repr(request) - == '' - ) - - -def test_request_url_vars(): - scope = { - "http_version": "1.1", - "method": "POST", - "path": "/", - "raw_path": b"/", - "query_string": b"", - "scheme": "http", - "type": "http", - "headers": [[b"content-type", b"application/x-www-form-urlencoded"]], - } - assert {} == Request(scope, None).url_vars - assert {"name": "cleo"} == Request( - dict(scope, url_route={"kwargs": {"name": "cleo"}}), None - ).url_vars - - -@pytest.mark.parametrize( - "path,query_string,expected_full_path", - [("/", "", "/"), ("/", "foo=bar", "/?foo=bar"), ("/foo", "bar", "/foo?bar")], -) -def test_request_properties(path, query_string, expected_full_path): - path_with_query_string = path - if query_string: - path_with_query_string += "?" + query_string - scope = { - "http_version": "1.1", - "method": "POST", - "path": path, - "raw_path": path_with_query_string.encode("latin-1"), - "query_string": query_string.encode("latin-1"), - "scheme": "http", - "type": "http", - } - request = Request(scope, None) - assert request.path == path - assert request.query_string == query_string - assert request.full_path == expected_full_path - - -def test_request_blank_values(): - request = Request.fake("/?a=b&foo=bar&foo=bar2&baz=") - assert request.args._data == {"a": ["b"], "foo": ["bar", "bar2"], "baz": [""]} - - -def test_json_in_query_string_name(): - query_string = ( - '?_through.["roadside_attraction_characteristics"%2C"characteristic_id"]=1' - ) - request = Request.fake("/" + query_string) - assert ( - request.args[ - '_through.["roadside_attraction_characteristics","characteristic_id"]' - ] - == "1" - ) diff --git a/tests/test_internals_response.py b/tests/test_internals_response.py deleted file mode 100644 index 820b20b2..00000000 --- a/tests/test_internals_response.py +++ /dev/null @@ -1,54 +0,0 @@ -from datasette.utils.asgi import Response -import pytest - - -def test_response_html(): - response = Response.html("Hello from HTML") - assert 200 == response.status - assert "Hello from HTML" == response.body - assert "text/html; charset=utf-8" == response.content_type - - -def test_response_text(): - response = Response.text("Hello from text") - assert 200 == response.status - assert "Hello from text" == response.body - assert "text/plain; charset=utf-8" == response.content_type - - -def test_response_json(): - response = Response.json({"this_is": "json"}) - assert 200 == response.status - assert '{"this_is": "json"}' == response.body - assert "application/json; charset=utf-8" == response.content_type - - -def test_response_redirect(): - response = Response.redirect("/foo") - assert 302 == response.status - assert "/foo" == response.headers["Location"] - - -@pytest.mark.asyncio -async def test_response_set_cookie(): - events = [] - - async def send(event): - events.append(event) - - response = Response.redirect("/foo") - response.set_cookie("foo", "bar", max_age=10, httponly=True) - await response.asgi_send(send) - - assert [ - { - "type": "http.response.start", - "status": 302, - "headers": [ - [b"Location", b"/foo"], - [b"content-type", b"text/plain"], - [b"set-cookie", b"foo=bar; HttpOnly; Max-Age=10; Path=/; SameSite=lax"], - ], - }, - {"type": "http.response.body", "body": b""}, - ] == events diff --git a/tests/test_internals_urls.py b/tests/test_internals_urls.py deleted file mode 100644 index d60aafcf..00000000 --- a/tests/test_internals_urls.py +++ /dev/null @@ -1,148 +0,0 @@ -from datasette.app import Datasette -from datasette.utils import PrefixedUrlString -import pytest - - -@pytest.fixture(scope="module") -def ds(): - return Datasette([], memory=True) - - -@pytest.mark.parametrize( - "base_url,path,expected", - [ - ("/", "/", "/"), - ("/", "/foo", "/foo"), - ("/prefix/", "/", "/prefix/"), - ("/prefix/", "/foo", "/prefix/foo"), - ("/prefix/", "foo", "/prefix/foo"), - ], -) -def test_path(ds, base_url, path, expected): - ds._settings["base_url"] = base_url - actual = ds.urls.path(path) - assert actual == expected - assert isinstance(actual, PrefixedUrlString) - - -def test_path_applied_twice_does_not_double_prefix(ds): - ds._settings["base_url"] = "/prefix/" - path = ds.urls.path("/") - assert path == "/prefix/" - path = ds.urls.path(path) - assert path == "/prefix/" - - -@pytest.mark.parametrize( - "base_url,expected", - [ - ("/", "/"), - ("/prefix/", "/prefix/"), - ], -) -def test_instance(ds, base_url, expected): - ds._settings["base_url"] = base_url - actual = ds.urls.instance() - assert actual == expected - assert isinstance(actual, PrefixedUrlString) - - -@pytest.mark.parametrize( - "base_url,file,expected", - [ - ("/", "foo.js", "/-/static/foo.js"), - ("/prefix/", "foo.js", "/prefix/-/static/foo.js"), - ], -) -def test_static(ds, base_url, file, expected): - ds._settings["base_url"] = base_url - actual = ds.urls.static(file) - assert actual == expected - assert isinstance(actual, PrefixedUrlString) - - -@pytest.mark.parametrize( - "base_url,plugin,file,expected", - [ - ( - "/", - "datasette_cluster_map", - "datasette-cluster-map.js", - "/-/static-plugins/datasette_cluster_map/datasette-cluster-map.js", - ), - ( - "/prefix/", - "datasette_cluster_map", - "datasette-cluster-map.js", - "/prefix/-/static-plugins/datasette_cluster_map/datasette-cluster-map.js", - ), - ], -) -def test_static_plugins(ds, base_url, plugin, file, expected): - ds._settings["base_url"] = base_url - actual = ds.urls.static_plugins(plugin, file) - assert actual == expected - assert isinstance(actual, PrefixedUrlString) - - -@pytest.mark.parametrize( - "base_url,expected", - [ - ("/", "/-/logout"), - ("/prefix/", "/prefix/-/logout"), - ], -) -def test_logout(ds, base_url, expected): - ds._settings["base_url"] = base_url - actual = ds.urls.logout() - assert actual == expected - assert isinstance(actual, PrefixedUrlString) - - -@pytest.mark.parametrize( - "base_url,format,expected", - [ - ("/", None, "/_memory"), - ("/prefix/", None, "/prefix/_memory"), - ("/", "json", "/_memory.json"), - ], -) -def test_database(ds, base_url, format, expected): - ds._settings["base_url"] = base_url - actual = ds.urls.database("_memory", format=format) - assert actual == expected - assert isinstance(actual, PrefixedUrlString) - - -@pytest.mark.parametrize( - "base_url,name,format,expected", - [ - ("/", "name", None, "/_memory/name"), - ("/prefix/", "name", None, "/prefix/_memory/name"), - ("/", "name", "json", "/_memory/name.json"), - ("/", "name.json", "json", "/_memory/name~2Ejson.json"), - ], -) -def test_table_and_query(ds, base_url, name, format, expected): - ds._settings["base_url"] = base_url - actual1 = ds.urls.table("_memory", name, format=format) - assert actual1 == expected - assert isinstance(actual1, PrefixedUrlString) - actual2 = ds.urls.query("_memory", name, format=format) - assert actual2 == expected - assert isinstance(actual2, PrefixedUrlString) - - -@pytest.mark.parametrize( - "base_url,format,expected", - [ - ("/", None, "/_memory/facetable/1"), - ("/prefix/", None, "/prefix/_memory/facetable/1"), - ("/", "json", "/_memory/facetable/1.json"), - ], -) -def test_row(ds, base_url, format, expected): - ds._settings["base_url"] = base_url - actual = ds.urls.row("_memory", "facetable", "1", format=format) - assert actual == expected - assert isinstance(actual, PrefixedUrlString) diff --git a/tests/test_label_column_for_table.py b/tests/test_label_column_for_table.py deleted file mode 100644 index 7667b595..00000000 --- a/tests/test_label_column_for_table.py +++ /dev/null @@ -1,97 +0,0 @@ -import pytest -from datasette.database import Database -from datasette.app import Datasette - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "create_sql,table_name,config,expected_label_column", - [ - # Explicit label_column - ( - "create table t1 (id integer primary key, name text, title text);", - "t1", - {"t1": {"label_column": "title"}}, - "title", - ), - # Single unique text column - ( - "create table t2 (id integer primary key, name2 text unique, title text);", - "t2", - {}, - "name2", - ), - ( - "create table t3 (id integer primary key, title2 text unique, name text);", - "t3", - {}, - "title2", - ), - # Two unique text columns means it cannot decide on one - ( - "create table t3x (id integer primary key, name2 text unique, title2 text unique);", - "t3x", - {}, - None, - ), - # Name or title column - ( - "create table t4 (id integer primary key, name text);", - "t4", - {}, - "name", - ), - ( - "create table t5 (id integer primary key, title text);", - "t5", - {}, - "title", - ), - # But not if there are multiple non-unique text that are not called title - ( - "create table t5x (id integer primary key, other1 text, other2 text);", - "t5x", - {}, - None, - ), - ( - "create table t6 (id integer primary key, Name text);", - "t6", - {}, - "Name", - ), - ( - "create table t7 (id integer primary key, Title text);", - "t7", - {}, - "Title", - ), - # Two columns, one of which is id - ( - "create table t8 (id integer primary key, content text);", - "t8", - {}, - "content", - ), - ( - "create table t9 (pk integer primary key, content text);", - "t9", - {}, - "content", - ), - ], -) -async def test_label_column_for_table( - create_sql, table_name, config, expected_label_column -): - """Test cases for label_column_for_table method""" - ds = Datasette() - db = ds.add_database(Database(ds, memory_name="test_label_column_for_table")) - await db.execute_write_script(create_sql) - if config: - ds.config = {"databases": {"test_label_column_for_table": {"tables": config}}} - actual_label_column = await db.label_column_for_table(table_name) - if expected_label_column is None: - assert actual_label_column is None - else: - assert actual_label_column == expected_label_column diff --git a/tests/test_load_extensions.py b/tests/test_load_extensions.py deleted file mode 100644 index cdadb091..00000000 --- a/tests/test_load_extensions.py +++ /dev/null @@ -1,64 +0,0 @@ -from datasette.app import Datasette -import pytest -from pathlib import Path - -# not necessarily a full path - the full compiled path looks like "ext.dylib" -# or another suffix, but sqlite will, under the hood, decide which file -# extension to use based on the operating system (apple=dylib, windows=dll etc) -# this resolves to "./ext", which is enough for SQLite to calculate the rest -COMPILED_EXTENSION_PATH = str(Path(__file__).parent / "ext") - - -# See if ext.c has been compiled, based off the different possible suffixes. -def has_compiled_ext(): - for ext in ["dylib", "so", "dll"]: - path = Path(__file__).parent / f"ext.{ext}" - if path.is_file(): - return True - return False - - -@pytest.mark.asyncio -@pytest.mark.skipif(not has_compiled_ext(), reason="Requires compiled ext.c") -async def test_load_extension_default_entrypoint(): - # The default entrypoint only loads a() and NOT b() or c(), so those - # should fail. - ds = Datasette(sqlite_extensions=[COMPILED_EXTENSION_PATH]) - - response = await ds.client.get("/_memory/-/query.json?_shape=arrays&sql=select+a()") - assert response.status_code == 200 - assert response.json()["rows"][0][0] == "a" - - response = await ds.client.get("/_memory/-/query.json?_shape=arrays&sql=select+b()") - assert response.status_code == 400 - assert response.json()["error"] == "no such function: b" - - response = await ds.client.get("/_memory/-/query.json?_shape=arrays&sql=select+c()") - assert response.status_code == 400 - assert response.json()["error"] == "no such function: c" - - -@pytest.mark.asyncio -@pytest.mark.skipif(not has_compiled_ext(), reason="Requires compiled ext.c") -async def test_load_extension_multiple_entrypoints(): - # Load in the default entrypoint and the other 2 custom entrypoints, now - # all a(), b(), and c() should run successfully. - ds = Datasette( - sqlite_extensions=[ - COMPILED_EXTENSION_PATH, - (COMPILED_EXTENSION_PATH, "sqlite3_ext_b_init"), - (COMPILED_EXTENSION_PATH, "sqlite3_ext_c_init"), - ] - ) - - response = await ds.client.get("/_memory/-/query.json?_shape=arrays&sql=select+a()") - assert response.status_code == 200 - assert response.json()["rows"][0][0] == "a" - - response = await ds.client.get("/_memory/-/query.json?_shape=arrays&sql=select+b()") - assert response.status_code == 200 - assert response.json()["rows"][0][0] == "b" - - response = await ds.client.get("/_memory/-/query.json?_shape=arrays&sql=select+c()") - assert response.status_code == 200 - assert response.json()["rows"][0][0] == "c" diff --git a/tests/test_messages.py b/tests/test_messages.py deleted file mode 100644 index 62d9f647..00000000 --- a/tests/test_messages.py +++ /dev/null @@ -1,32 +0,0 @@ -from .utils import cookie_was_deleted -import pytest - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "qs,expected", - [ - ("add_msg=added-message", [["added-message", 1]]), - ("add_msg=added-warning&type=WARNING", [["added-warning", 2]]), - ("add_msg=added-error&type=ERROR", [["added-error", 3]]), - ], -) -async def test_add_message_sets_cookie(ds_client, qs, expected): - response = await ds_client.get(f"/fixtures/-/query.message?sql=select+1&{qs}") - signed = response.cookies["ds_messages"] - decoded = ds_client.ds.unsign(signed, "messages") - assert expected == decoded - - -@pytest.mark.asyncio -async def test_messages_are_displayed_and_cleared(ds_client): - # First set the message cookie - set_msg_response = await ds_client.get( - "/fixtures/-/query.message?sql=select+1&add_msg=xmessagex" - ) - # Now access a page that displays messages - response = await ds_client.get("/", cookies=set_msg_response.cookies) - # Messages should be in that HTML - assert "xmessagex" in response.text - # Cookie should have been set that clears messages - assert cookie_was_deleted(response, "ds_messages") diff --git a/tests/test_package.py b/tests/test_package.py deleted file mode 100644 index f05f3ece..00000000 --- a/tests/test_package.py +++ /dev/null @@ -1,59 +0,0 @@ -from click.testing import CliRunner -from datasette import cli -from unittest import mock -import os -import pathlib -import pytest - - -class CaptureDockerfile: - def __call__(self, _): - self.captured = (pathlib.Path() / "Dockerfile").read_text() - - -EXPECTED_DOCKERFILE = """ -FROM python:3.11.0-slim-bullseye -COPY . /app -WORKDIR /app - -ENV DATASETTE_SECRET 'sekrit' -RUN pip install -U datasette -RUN datasette inspect test.db --inspect-file inspect-data.json -ENV PORT {port} -EXPOSE {port} -CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --port $PORT -""".strip() - - -@pytest.mark.serial -@mock.patch("shutil.which") -@mock.patch("datasette.cli.call") -def test_package(mock_call, mock_which, tmp_path_factory): - mock_which.return_value = True - runner = CliRunner() - capture = CaptureDockerfile() - mock_call.side_effect = capture - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke(cli.cli, ["package", "test.db", "--secret", "sekrit"]) - assert 0 == result.exit_code - mock_call.assert_has_calls([mock.call(["docker", "build", "."])]) - assert EXPECTED_DOCKERFILE.format(port=8001) == capture.captured - - -@mock.patch("shutil.which") -@mock.patch("datasette.cli.call") -def test_package_with_port(mock_call, mock_which, tmp_path_factory): - mock_which.return_value = True - capture = CaptureDockerfile() - mock_call.side_effect = capture - runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke( - cli.cli, ["package", "test.db", "-p", "8080", "--secret", "sekrit"] - ) - assert 0 == result.exit_code - assert EXPECTED_DOCKERFILE.format(port=8080) == capture.captured diff --git a/tests/test_permission_endpoints.py b/tests/test_permission_endpoints.py deleted file mode 100644 index 84f3370f..00000000 --- a/tests/test_permission_endpoints.py +++ /dev/null @@ -1,501 +0,0 @@ -""" -Tests for permission endpoints: -- /-/allowed.json -- /-/rules.json -""" - -import pytest -import pytest_asyncio -from datasette.app import Datasette - - -@pytest_asyncio.fixture -async def ds_with_permissions(): - """Create a Datasette instance with test data and permissions.""" - ds = Datasette() - ds.root_enabled = True - await ds.invoke_startup() - - # Add some test databases and tables - db = ds.add_memory_database("analytics") - await db.execute_write( - "CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT, email TEXT)" - ) - await db.execute_write( - "CREATE TABLE IF NOT EXISTS events (id INTEGER PRIMARY KEY, event_type TEXT, user_id INTEGER)" - ) - - db2 = ds.add_memory_database("production") - await db2.execute_write( - "CREATE TABLE IF NOT EXISTS orders (id INTEGER PRIMARY KEY, total REAL)" - ) - await db2.execute_write( - "CREATE TABLE IF NOT EXISTS customers (id INTEGER PRIMARY KEY, name TEXT)" - ) - - await ds.refresh_schemas() - - return ds - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_status,expected_keys", - [ - # Instance level permission - ( - "/-/allowed.json?action=view-instance", - 200, - {"action", "items", "total", "page"}, - ), - # Database level permission - ( - "/-/allowed.json?action=view-database", - 200, - {"action", "items", "total", "page"}, - ), - # Table level permission - ( - "/-/allowed.json?action=view-table", - 200, - {"action", "items", "total", "page"}, - ), - ( - "/-/allowed.json?action=execute-sql", - 200, - {"action", "items", "total", "page"}, - ), - # Missing action parameter - ("/-/allowed.json", 400, {"error"}), - # Invalid action - ("/-/allowed.json?action=nonexistent", 404, {"error"}), - # Any valid action works, even if no permission rules exist for it - ( - "/-/allowed.json?action=insert-row", - 200, - {"action", "items", "total", "page"}, - ), - ], -) -async def test_allowed_json_basic( - ds_with_permissions, path, expected_status, expected_keys -): - response = await ds_with_permissions.client.get(path) - assert response.status_code == expected_status - data = response.json() - assert expected_keys.issubset(data.keys()) - - -@pytest.mark.asyncio -async def test_allowed_json_response_structure(ds_with_permissions): - """Test that /-/allowed.json returns the expected structure.""" - response = await ds_with_permissions.client.get( - "/-/allowed.json?action=view-instance" - ) - assert response.status_code == 200 - data = response.json() - - # Check required fields - assert "action" in data - assert "actor_id" in data - assert "page" in data - assert "page_size" in data - assert "total" in data - assert "items" in data - - # Check items structure - assert isinstance(data["items"], list) - if data["items"]: - item = data["items"][0] - assert "parent" in item - assert "child" in item - assert "resource" in item - - -@pytest.mark.asyncio -async def test_allowed_json_with_actor(ds_with_permissions): - """Test /-/allowed.json includes actor information.""" - response = await ds_with_permissions.client.get( - "/-/allowed.json?action=view-table", - cookies={ - "ds_actor": ds_with_permissions.client.actor_cookie({"id": "test_user"}) - }, - ) - assert response.status_code == 200 - data = response.json() - assert data["actor_id"] == "test_user" - - -@pytest.mark.asyncio -async def test_allowed_json_pagination(): - """Test that /-/allowed.json pagination works.""" - ds = Datasette() - await ds.invoke_startup() - - # Create many tables to test pagination - db = ds.add_memory_database("test") - for i in range(30): - await db.execute_write(f"CREATE TABLE table{i:02d} (id INTEGER PRIMARY KEY)") - await ds.refresh_schemas() - - # Test page 1 - response = await ds.client.get( - "/-/allowed.json?action=view-table&page_size=10&page=1" - ) - assert response.status_code == 200 - data = response.json() - assert data["page"] == 1 - assert data["page_size"] == 10 - assert len(data["items"]) == 10 - - # Test page 2 - response = await ds.client.get( - "/-/allowed.json?action=view-table&page_size=10&page=2" - ) - assert response.status_code == 200 - data = response.json() - assert data["page"] == 2 - assert len(data["items"]) == 10 - - # Verify items are different between pages - response1 = await ds.client.get( - "/-/allowed.json?action=view-table&page_size=10&page=1" - ) - response2 = await ds.client.get( - "/-/allowed.json?action=view-table&page_size=10&page=2" - ) - items1 = {(item["parent"], item["child"]) for item in response1.json()["items"]} - items2 = {(item["parent"], item["child"]) for item in response2.json()["items"]} - assert items1 != items2 - - -@pytest.mark.asyncio -async def test_allowed_json_total_count(tmp_path_factory): - """Test that /-/allowed.json returns correct total count.""" - from datasette.database import Database - - # Use temporary file databases to avoid leakage from other tests - tmp_dir = tmp_path_factory.mktemp("test_allowed_json_total_count") - - ds = Datasette() - await ds.invoke_startup() - - # Create test databases with tables - analytics_db = ds.add_database( - Database(ds, path=str(tmp_dir / "analytics.db")), name="analytics" - ) - await analytics_db.execute_write( - "CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT, email TEXT)" - ) - await analytics_db.execute_write( - "CREATE TABLE IF NOT EXISTS events (id INTEGER PRIMARY KEY, event_type TEXT, user_id INTEGER)" - ) - - production_db = ds.add_database( - Database(ds, path=str(tmp_dir / "production.db")), name="production" - ) - await production_db.execute_write( - "CREATE TABLE IF NOT EXISTS orders (id INTEGER PRIMARY KEY, total REAL)" - ) - await production_db.execute_write( - "CREATE TABLE IF NOT EXISTS customers (id INTEGER PRIMARY KEY, name TEXT)" - ) - - await ds.refresh_schemas() - - response = await ds.client.get("/-/allowed.json?action=view-table") - assert response.status_code == 200 - data = response.json() - - # We created 4 tables total (2 in analytics, 2 in production) - import json - - assert ( - data["total"] == 4 - ), f"Expected total=4, got: {json.dumps(data, separators=(',', ':'))}" - - -# /-/rules.json tests - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_status,expected_keys", - [ - # Instance level rules - ( - "/-/rules.json?action=view-instance", - 200, - {"action", "items", "total", "page"}, - ), - # Database level rules - ( - "/-/rules.json?action=view-database", - 200, - {"action", "items", "total", "page"}, - ), - # Table level rules - ( - "/-/rules.json?action=view-table", - 200, - {"action", "items", "total", "page"}, - ), - # Missing action parameter - ("/-/rules.json", 400, {"error"}), - # Invalid action - ("/-/rules.json?action=nonexistent", 404, {"error"}), - ], -) -async def test_rules_json_basic( - ds_with_permissions, path, expected_status, expected_keys -): - # Use root actor for rules endpoint (requires permissions-debug) - response = await ds_with_permissions.client.get( - path, - cookies={"ds_actor": ds_with_permissions.client.actor_cookie({"id": "root"})}, - ) - assert response.status_code == expected_status - data = response.json() - assert expected_keys.issubset(data.keys()) - - -@pytest.mark.asyncio -async def test_rules_json_response_structure(ds_with_permissions): - """Test that /-/rules.json returns the expected structure.""" - response = await ds_with_permissions.client.get( - "/-/rules.json?action=view-instance", - cookies={"ds_actor": ds_with_permissions.client.actor_cookie({"id": "root"})}, - ) - assert response.status_code == 200 - data = response.json() - - # Check required fields - assert "action" in data - assert "actor_id" in data - assert "page" in data - assert "page_size" in data - assert "total" in data - assert "items" in data - - # Check items structure - assert isinstance(data["items"], list) - if data["items"]: - item = data["items"][0] - assert "parent" in item - assert "child" in item - assert "resource" in item - assert "allow" in item - assert "reason" in item - - -@pytest.mark.asyncio -async def test_rules_json_includes_all_rules(ds_with_permissions): - """Test that /-/rules.json includes both allowed and denied resources.""" - # Root user should see rules for everything - response = await ds_with_permissions.client.get( - "/-/rules.json?action=view-table", - cookies={"ds_actor": ds_with_permissions.client.actor_cookie({"id": "root"})}, - ) - assert response.status_code == 200 - data = response.json() - - # Should have items (root has global allow) - assert len(data["items"]) > 0 - - # Each item should have allow field (0 or 1) - for item in data["items"]: - assert "allow" in item - assert item["allow"] in [0, 1] - - -@pytest.mark.asyncio -async def test_rules_json_pagination(): - """Test that /-/rules.json pagination works.""" - ds = Datasette() - ds.root_enabled = True - await ds.invoke_startup() - - # Create some tables - db = ds.add_memory_database("test") - for i in range(5): - await db.execute_write( - f"CREATE TABLE IF NOT EXISTS table{i:02d} (id INTEGER PRIMARY KEY)" - ) - await ds.refresh_schemas() - - # Test basic pagination structure - just verify it returns paginated results - response = await ds.client.get( - "/-/rules.json?action=view-table&page_size=2&page=1", - cookies={"ds_actor": ds.client.actor_cookie({"id": "root"})}, - ) - assert response.status_code == 200 - data = response.json() - assert data["page"] == 1 - assert data["page_size"] == 2 - # Verify items is a list (may have fewer items than page_size if there aren't many rules) - assert isinstance(data["items"], list) - assert "total" in data - - -@pytest.mark.asyncio -async def test_rules_json_with_actor(ds_with_permissions): - """Test /-/rules.json includes actor information.""" - # Use root actor (rules endpoint requires permissions-debug) - response = await ds_with_permissions.client.get( - "/-/rules.json?action=view-table", - cookies={"ds_actor": ds_with_permissions.client.actor_cookie({"id": "root"})}, - ) - assert response.status_code == 200 - data = response.json() - assert data["actor_id"] == "root" - - -@pytest.mark.asyncio -async def test_root_user_respects_settings_deny(): - """ - Test for issue #2509: Settings-based deny rules should override root user privileges. - - When a database has `allow: false` in settings, the root user should NOT see - that database in /-/allowed.json?action=view-database. - """ - ds = Datasette( - config={ - "databases": { - "content": { - "allow": False, # Deny everyone, including root - } - } - } - ) - ds.root_enabled = True - await ds.invoke_startup() - ds.add_memory_database("content") - - # Root user should NOT see the denied database - response = await ds.client.get( - "/-/allowed.json?action=view-database", - cookies={"ds_actor": ds.client.actor_cookie({"id": "root"})}, - ) - assert response.status_code == 200 - data = response.json() - - # Check that content database is NOT in the allowed list - allowed_databases = [item["parent"] for item in data["items"]] - assert "content" not in allowed_databases, ( - f"Root user should not see 'content' database when settings deny it, " - f"but found it in: {allowed_databases}" - ) - - -@pytest.mark.asyncio -async def test_root_user_respects_settings_deny_tables(): - """ - Test for issue #2509: Settings-based deny rules should override root for tables too. - - When a database has `allow: false` in settings, the root user should NOT see - tables from that database in /-/allowed.json?action=view-table. - """ - ds = Datasette( - config={ - "databases": { - "content": { - "allow": False, # Deny everyone, including root - } - } - } - ) - ds.root_enabled = True - await ds.invoke_startup() - - # Add a database with a table - db = ds.add_memory_database("content") - await db.execute_write("CREATE TABLE repos (id INTEGER PRIMARY KEY, name TEXT)") - await ds.refresh_schemas() - - # Root user should NOT see tables from the content database - response = await ds.client.get( - "/-/allowed.json?action=view-table", - cookies={"ds_actor": ds.client.actor_cookie({"id": "root"})}, - ) - assert response.status_code == 200 - data = response.json() - - # Check that content.repos table is NOT in the allowed list - content_tables = [ - item["child"] for item in data["items"] if item["parent"] == "content" - ] - assert "repos" not in content_tables, ( - f"Root user should not see tables from 'content' database when settings deny it, " - f"but found: {content_tables}" - ) - - -@pytest.mark.asyncio -async def test_execute_sql_requires_view_database(): - """ - Test for issue #2527: execute-sql permission should require view-database permission. - - A user who has execute-sql permission but not view-database permission should not - be able to execute SQL on that database. - """ - from datasette.permissions import PermissionSQL - from datasette import hookimpl - - class TestPermissionPlugin: - __name__ = "TestPermissionPlugin" - - @hookimpl - def permission_resources_sql(self, datasette, actor, action): - if actor is None or actor.get("id") != "test_user": - return [] - - if action == "execute-sql": - # Grant execute-sql on the "secret" database - return PermissionSQL( - sql="SELECT 'secret' AS parent, NULL AS child, 1 AS allow, 'can execute sql' AS reason", - ) - elif action == "view-database": - # Deny view-database on the "secret" database - return PermissionSQL( - sql="SELECT 'secret' AS parent, NULL AS child, 0 AS allow, 'cannot view db' AS reason", - ) - - return [] - - plugin = TestPermissionPlugin() - - ds = Datasette() - await ds.invoke_startup() - ds.pm.register(plugin, name="test_plugin") - - try: - ds.add_memory_database("secret") - await ds.refresh_schemas() - - # User should NOT have execute-sql permission because view-database is denied - response = await ds.client.get( - "/-/allowed.json?action=execute-sql", - cookies={"ds_actor": ds.client.actor_cookie({"id": "test_user"})}, - ) - assert response.status_code == 200 - data = response.json() - - # The "secret" database should NOT be in the allowed list for execute-sql - allowed_databases = [item["parent"] for item in data["items"]] - assert "secret" not in allowed_databases, ( - f"User should not have execute-sql permission without view-database, " - f"but found 'secret' in: {allowed_databases}" - ) - - # Also verify that attempting to execute SQL on the database is denied - # (may be 403 or 302 redirect to login/error page depending on middleware) - response = await ds.client.get( - "/secret?sql=SELECT+1", - cookies={"ds_actor": ds.client.actor_cookie({"id": "test_user"})}, - ) - assert response.status_code in (302, 403), ( - f"Expected 302 or 403 when trying to execute SQL without view-database permission, " - f"but got {response.status_code}" - ) - finally: - ds.pm.unregister(plugin) diff --git a/tests/test_permissions.py b/tests/test_permissions.py deleted file mode 100644 index e2dd92b8..00000000 --- a/tests/test_permissions.py +++ /dev/null @@ -1,1714 +0,0 @@ -import collections -from datasette.app import Datasette -from datasette.cli import cli -from datasette.default_permissions import restrictions_allow_action -from .fixtures import app_client, assert_permissions_checked, make_app_client -from click.testing import CliRunner -from bs4 import BeautifulSoup as Soup -import copy -import json -from pprint import pprint -import pytest_asyncio -import pytest -import re -import time -import urllib - - -@pytest.fixture(scope="module") -def padlock_client(): - with make_app_client( - config={ - "databases": { - "fixtures": { - "queries": {"two": {"sql": "select 1 + 1"}}, - } - } - } - ) as client: - yield client - - -@pytest_asyncio.fixture -async def perms_ds(): - ds = Datasette() - await ds.invoke_startup() - one = ds.add_memory_database("perms_ds_one") - two = ds.add_memory_database("perms_ds_two") - await one.execute_write("create table if not exists t1 (id integer primary key)") - await one.execute_write("insert or ignore into t1 (id) values (1)") - await one.execute_write("create view if not exists v1 as select * from t1") - await one.execute_write("create table if not exists t2 (id integer primary key)") - await two.execute_write("create table if not exists t1 (id integer primary key)") - # Trigger catalog refresh so allowed_resources() can be called - await ds.client.get("/") - return ds - - -@pytest.mark.parametrize( - "allow,expected_anon,expected_auth", - [ - (None, 200, 200), - ({}, 403, 403), - ({"id": "root"}, 403, 200), - ], -) -@pytest.mark.parametrize( - "path", - ( - "/", - "/fixtures", - "/-/api", - "/fixtures/compound_three_primary_keys", - "/fixtures/compound_three_primary_keys/a,a,a", - pytest.param( - "/fixtures/two", - marks=pytest.mark.xfail( - reason="view-query not yet migrated to new permission system" - ), - ), # Query - ), -) -def test_view_padlock(allow, expected_anon, expected_auth, path, padlock_client): - padlock_client.ds.config["allow"] = allow - fragment = "🔒" - anon_response = padlock_client.get(path) - assert expected_anon == anon_response.status - if allow and anon_response.status == 200: - # Should be no padlock - assert fragment not in anon_response.text - auth_response = padlock_client.get( - path, - cookies={"ds_actor": padlock_client.actor_cookie({"id": "root"})}, - ) - assert expected_auth == auth_response.status - # Check for the padlock - if allow and expected_anon == 403 and expected_auth == 200: - assert fragment in auth_response.text - del padlock_client.ds.config["allow"] - - -@pytest.mark.parametrize( - "allow,expected_anon,expected_auth", - [ - (None, 200, 200), - ({}, 403, 403), - ({"id": "root"}, 403, 200), - ], -) -@pytest.mark.parametrize("use_metadata", (True, False)) -def test_view_database(allow, expected_anon, expected_auth, use_metadata): - key = "metadata" if use_metadata else "config" - kwargs = {key: {"databases": {"fixtures": {"allow": allow}}}} - with make_app_client(**kwargs) as client: - for path in ( - "/fixtures", - "/fixtures/compound_three_primary_keys", - "/fixtures/compound_three_primary_keys/a,a,a", - ): - anon_response = client.get(path) - assert expected_anon == anon_response.status, path - if allow and path == "/fixtures" and anon_response.status == 200: - # Should be no padlock - assert ">fixtures 🔒" not in anon_response.text - auth_response = client.get( - path, - cookies={"ds_actor": client.actor_cookie({"id": "root"})}, - ) - assert expected_auth == auth_response.status - if ( - allow - and path == "/fixtures" - and expected_anon == 403 - and expected_auth == 200 - ): - assert ">fixtures 🔒" in auth_response.text - - -def test_database_list_respects_view_database(): - with make_app_client( - config={"databases": {"fixtures": {"allow": {"id": "root"}}}}, - extra_databases={"data.db": "create table names (name text)"}, - ) as client: - anon_response = client.get("/") - assert 'data' in anon_response.text - assert 'fixtures' not in anon_response.text - auth_response = client.get( - "/", - cookies={"ds_actor": client.actor_cookie({"id": "root"})}, - ) - assert 'data' in auth_response.text - assert 'fixtures 🔒' in auth_response.text - - -def test_database_list_respects_view_table(): - with make_app_client( - config={ - "databases": { - "data": { - "tables": { - "names": {"allow": {"id": "root"}}, - "v": {"allow": {"id": "root"}}, - } - } - } - }, - extra_databases={ - "data.db": "create table names (name text); create view v as select * from names" - }, - ) as client: - html_fragments = [ - ">names 🔒", - ">v 🔒", - ] - anon_response_text = client.get("/").text - assert "0 rows in 0 tables" in anon_response_text - for html_fragment in html_fragments: - assert html_fragment not in anon_response_text - auth_response_text = client.get( - "/", - cookies={"ds_actor": client.actor_cookie({"id": "root"})}, - ).text - for html_fragment in html_fragments: - assert html_fragment in auth_response_text - - -@pytest.mark.parametrize( - "allow,expected_anon,expected_auth", - [ - (None, 200, 200), - ({}, 403, 403), - ({"id": "root"}, 403, 200), - ], -) -@pytest.mark.parametrize("use_metadata", (True, False)) -def test_view_table(allow, expected_anon, expected_auth, use_metadata): - key = "metadata" if use_metadata else "config" - kwargs = { - key: { - "databases": { - "fixtures": { - "tables": {"compound_three_primary_keys": {"allow": allow}} - } - } - } - } - with make_app_client(**kwargs) as client: - anon_response = client.get("/fixtures/compound_three_primary_keys") - assert expected_anon == anon_response.status - if allow and anon_response.status == 200: - # Should be no padlock - assert ">compound_three_primary_keys 🔒" not in anon_response.text - auth_response = client.get( - "/fixtures/compound_three_primary_keys", - cookies={"ds_actor": client.actor_cookie({"id": "root"})}, - ) - assert expected_auth == auth_response.status - if allow and expected_anon == 403 and expected_auth == 200: - assert ">compound_three_primary_keys 🔒" in auth_response.text - - -def test_table_list_respects_view_table(): - with make_app_client( - config={ - "databases": { - "fixtures": { - "tables": { - "compound_three_primary_keys": {"allow": {"id": "root"}}, - # And a SQL view too: - "paginated_view": {"allow": {"id": "root"}}, - } - } - } - } - ) as client: - html_fragments = [ - ">compound_three_primary_keys 🔒", - ">paginated_view 🔒", - ] - anon_response = client.get("/fixtures") - for html_fragment in html_fragments: - assert html_fragment not in anon_response.text - auth_response = client.get( - "/fixtures", cookies={"ds_actor": client.actor_cookie({"id": "root"})} - ) - for html_fragment in html_fragments: - assert html_fragment in auth_response.text - - -@pytest.mark.parametrize( - "allow,expected_anon,expected_auth", - [ - (None, 200, 200), - ({}, 403, 403), - ({"id": "root"}, 403, 200), - ], -) -def test_view_query(allow, expected_anon, expected_auth): - with make_app_client( - config={ - "databases": { - "fixtures": {"queries": {"q": {"sql": "select 1 + 1", "allow": allow}}} - } - } - ) as client: - anon_response = client.get("/fixtures/q") - assert expected_anon == anon_response.status - if allow and anon_response.status == 200: - # Should be no padlock - assert "🔒" not in anon_response.text - auth_response = client.get( - "/fixtures/q", cookies={"ds_actor": client.actor_cookie({"id": "root"})} - ) - assert expected_auth == auth_response.status - if allow and expected_anon == 403 and expected_auth == 200: - assert ">fixtures: q 🔒" in auth_response.text - - -@pytest.mark.parametrize( - "config", - [ - {"allow_sql": {"id": "root"}}, - {"databases": {"fixtures": {"allow_sql": {"id": "root"}}}}, - ], -) -def test_execute_sql(config): - schema_re = re.compile("const schema = ({.*?});", re.DOTALL) - with make_app_client(config=config) as client: - form_fragment = '', - '', - '', - ): - assert fragment in response.text - # Should show one failure and one success - soup = Soup(response.text, "html.parser") - table = soup.find("table", {"id": "permission-checks-table"}) - rows = table.find("tbody").find_all("tr") - checks = [] - for row in rows: - cells = row.find_all("td") - result_cell = cells[5] - if result_cell.select_one(".check-result-true"): - result = True - elif result_cell.select_one(".check-result-false"): - result = False - else: - result = None - actor_code = cells[4].find("code") - actor = json.loads(actor_code.text) if actor_code else None - checks.append( - { - "action": cells[1].text.strip(), - "result": result, - "actor": actor, - } - ) - expected_checks = [ - { - "action": "permissions-debug", - "result": True, - "actor": {"id": "root"}, - }, - { - "action": "view-instance", - "result": True, - "actor": {"id": "root"}, - }, - {"action": "debug-menu", "result": False, "actor": None}, - { - "action": "view-instance", - "result": True, - "actor": None, - }, - { - "action": "permissions-debug", - "result": False, - "actor": None, - }, - { - "action": "view-instance", - "result": True, - "actor": None, - }, - ] - if filter_ == "only-yours": - expected_checks = [ - check for check in expected_checks if check["actor"] is not None - ] - elif filter_ == "exclude-yours": - expected_checks = [check for check in expected_checks if check["actor"] is None] - assert checks == expected_checks - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "actor,allow,expected_fragment", - [ - ('{"id":"root"}', "{}", "Result: deny"), - ('{"id":"root"}', '{"id": "*"}', "Result: allow"), - ('{"', '{"id": "*"}', "Actor JSON error"), - ('{"id":"root"}', '"*"}', "Allow JSON error"), - ], -) -async def test_allow_debug(ds_client, actor, allow, expected_fragment): - response = await ds_client.get( - "/-/allow-debug?" + urllib.parse.urlencode({"actor": actor, "allow": allow}) - ) - assert response.status_code == 200 - assert expected_fragment in response.text - - -@pytest.mark.parametrize( - "allow,expected", - [ - ({"id": "root"}, 403), - ({"id": "root", "unauthenticated": True}, 200), - ], -) -def test_allow_unauthenticated(allow, expected): - with make_app_client(config={"allow": allow}) as client: - assert expected == client.get("/").status - - -@pytest.fixture(scope="session") -def view_instance_client(): - with make_app_client(config={"allow": {}}) as client: - yield client - - -@pytest.mark.parametrize( - "path", - [ - "/", - "/fixtures", - "/fixtures/facetable", - "/-/versions", - "/-/plugins", - "/-/settings", - "/-/threads", - "/-/databases", - "/-/permissions", - "/-/messages", - "/-/patterns", - ], -) -def test_view_instance(path, view_instance_client): - assert 403 == view_instance_client.get(path).status - if path not in ("/-/permissions", "/-/messages", "/-/patterns"): - assert 403 == view_instance_client.get(path + ".json").status - - -@pytest.fixture(scope="session") -def cascade_app_client(): - with make_app_client(is_immutable=True) as client: - yield client - - -@pytest.mark.parametrize( - "path,permissions,expected_status", - [ - ("/", [], 403), - ("/", ["instance"], 200), - # Can view table even if not allowed database or instance - ("/fixtures/binary_data", [], 403), - ("/fixtures/binary_data", ["database"], 403), - ("/fixtures/binary_data", ["instance"], 403), - ("/fixtures/binary_data", ["table"], 200), - ("/fixtures/binary_data", ["table", "database"], 200), - ("/fixtures/binary_data", ["table", "database", "instance"], 200), - # ... same for row - ("/fixtures/binary_data/1", [], 403), - ("/fixtures/binary_data/1", ["database"], 403), - ("/fixtures/binary_data/1", ["instance"], 403), - ("/fixtures/binary_data/1", ["table"], 200), - ("/fixtures/binary_data/1", ["table", "database"], 200), - ("/fixtures/binary_data/1", ["table", "database", "instance"], 200), - # Can view query even if not allowed database or instance - ("/fixtures/magic_parameters", [], 403), - ("/fixtures/magic_parameters", ["database"], 403), - ("/fixtures/magic_parameters", ["instance"], 403), - ("/fixtures/magic_parameters", ["query"], 200), - ("/fixtures/magic_parameters", ["query", "database"], 200), - ("/fixtures/magic_parameters", ["query", "database", "instance"], 200), - # Can view database even if not allowed instance - ("/fixtures", [], 403), - ("/fixtures", ["instance"], 403), - ("/fixtures", ["database"], 200), - # Downloading the fixtures.db file - ("/fixtures.db", [], 403), - ("/fixtures.db", ["instance"], 403), - ("/fixtures.db", ["database"], 200), - ("/fixtures.db", ["download"], 200), - ], -) -def test_permissions_cascade(cascade_app_client, path, permissions, expected_status): - """Test that e.g. having view-table but NOT view-database lets you view table page, etc""" - allow = {"id": "*"} - deny = {} - previous_config = cascade_app_client.ds.config - updated_config = copy.deepcopy(previous_config) - actor = {"id": "test"} - if "download" in permissions: - actor["can_download"] = 1 - try: - # Set up the different allow blocks - updated_config["allow"] = allow if "instance" in permissions else deny - # Note: download permission also needs database access (via plugin granting both) - # so we don't set a deny rule when download is in permissions - updated_config["databases"]["fixtures"]["allow"] = ( - allow if ("database" in permissions or "download" in permissions) else deny - ) - updated_config["databases"]["fixtures"]["tables"]["binary_data"] = { - "allow": (allow if "table" in permissions else deny) - } - updated_config["databases"]["fixtures"]["queries"]["magic_parameters"][ - "allow" - ] = (allow if "query" in permissions else deny) - cascade_app_client.ds.config = updated_config - response = cascade_app_client.get( - path, - cookies={"ds_actor": cascade_app_client.actor_cookie(actor)}, - ) - assert ( - response.status == expected_status - ), "path: {}, permissions: {}, expected_status: {}, status: {}".format( - path, permissions, expected_status, response.status - ) - finally: - cascade_app_client.ds.config = previous_config - - -def test_padlocks_on_database_page(cascade_app_client): - config = { - "databases": { - "fixtures": { - "allow": {"id": "test"}, - "tables": { - "123_starts_with_digits": {"allow": True}, - "simple_view": {"allow": True}, - }, - "queries": {"query_two": {"allow": True, "sql": "select 2"}}, - } - } - } - previous_config = cascade_app_client.ds.config - try: - cascade_app_client.ds.config = config - response = cascade_app_client.get( - "/fixtures", - cookies={"ds_actor": cascade_app_client.actor_cookie({"id": "test"})}, - ) - # Tables - assert ">123_starts_with_digits" in response.text - assert ">Table With Space In Name 🔒" in response.text - # Queries - assert ">from_async_hook 🔒" in response.text - assert ">query_two" in response.text - # Views - assert ">paginated_view 🔒" in response.text - assert ">simple_view" in response.text - finally: - cascade_app_client.ds.config = previous_config - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "actor,permission,resource_1,resource_2,expected_result", - ( - # Without restrictions the defaults apply - ({"id": "t"}, "view-instance", None, None, True), - ({"id": "t"}, "view-database", "one", None, True), - ({"id": "t"}, "view-table", "one", "t1", True), - # If there is an _r block, everything gets denied unless explicitly allowed - ({"id": "t", "_r": {}}, "view-instance", None, None, False), - ({"id": "t", "_r": {}}, "view-database", "one", None, False), - ({"id": "t", "_r": {}}, "view-table", "one", "t1", False), - # Explicit allowing works at the "a" for all level: - ({"id": "t", "_r": {"a": ["vi"]}}, "view-instance", None, None, True), - ({"id": "t", "_r": {"a": ["vd"]}}, "view-database", "one", None, True), - ({"id": "t", "_r": {"a": ["vt"]}}, "view-table", "one", "t1", True), - # But not if it's the wrong permission - ({"id": "t", "_r": {"a": ["vi"]}}, "view-database", "one", None, False), - ({"id": "t", "_r": {"a": ["vd"]}}, "view-table", "one", "t1", False), - # Works at the "d" for database level: - ({"id": "t", "_r": {"d": {"one": ["vd"]}}}, "view-database", "one", None, True), - ( - # view-database-download requires view-database too (also_requires) - {"id": "t", "_r": {"d": {"one": ["vdd", "vd"]}}}, - "view-database-download", - "one", - None, - True, - ), - ( - # execute-sql requires view-database too (also_requires) - {"id": "t", "_r": {"d": {"one": ["es", "vd"]}}}, - "execute-sql", - "one", - None, - True, - ), - # Works at the "r" for table level: - ( - {"id": "t", "_r": {"r": {"one": {"t1": ["vt"]}}}}, - "view-table", - "one", - "t1", - True, - ), - ( - {"id": "t", "_r": {"r": {"one": {"t1": ["vt"]}}}}, - "view-table", - "one", - "t2", - False, - ), - # non-abbreviations should work too - ( - {"id": "t", "_r": {"a": ["view-instance"]}}, - "view-instance", - None, - None, - True, - ), - ( - {"id": "t", "_r": {"d": {"one": ["view-database"]}}}, - "view-database", - "one", - None, - True, - ), - ( - {"id": "t", "_r": {"r": {"one": {"t1": ["view-table"]}}}}, - "view-table", - "one", - "t1", - True, - ), - # view-database does NOT grant view-instance (no upward cascading) - ({"id": "t", "_r": {"a": ["vd"]}}, "view-instance", None, None, False), - ), -) -async def test_actor_restricted_permissions( - perms_ds, actor, permission, resource_1, resource_2, expected_result -): - perms_ds.pdb = True - perms_ds.root_enabled = True # Allow root actor to access /-/permissions - cookies = {"ds_actor": perms_ds.sign({"a": {"id": "root"}}, "actor")} - csrftoken = (await perms_ds.client.get("/-/permissions", cookies=cookies)).cookies[ - "ds_csrftoken" - ] - cookies["ds_csrftoken"] = csrftoken - response = await perms_ds.client.post( - "/-/permissions", - data={ - "actor": json.dumps(actor), - "permission": permission, - "resource_1": resource_1, - "resource_2": resource_2, - "csrftoken": csrftoken, - }, - cookies=cookies, - ) - # Response mirrors /-/check JSON structure - if resource_1 is None: - expected_path = "/" - elif resource_2 is None: - expected_path = f"/{resource_1}" - else: - expected_path = f"/{resource_1}/{resource_2}" - - expected_resource = { - "parent": resource_1, - "child": resource_2, - "path": expected_path, - } - expected = { - "action": permission, - "allowed": expected_result, - "resource": expected_resource, - } - if actor.get("id"): - expected["actor_id"] = actor["id"] - assert response.json() == expected - - -PermConfigTestCase = collections.namedtuple( - "PermConfigTestCase", - "config,actor,action,resource,expected_result", -) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "config,actor,action,resource,expected_result", - ( - # Simple view-instance default=True example - PermConfigTestCase( - config={}, - actor=None, - action="view-instance", - resource=None, - expected_result=True, - ), - # debug-menu on root - PermConfigTestCase( - config={"permissions": {"debug-menu": {"id": "user"}}}, - actor={"id": "user"}, - action="debug-menu", - resource=None, - expected_result=True, - ), - # debug-menu on root, wrong actor - PermConfigTestCase( - config={"permissions": {"debug-menu": {"id": "user"}}}, - actor={"id": "user2"}, - action="debug-menu", - resource=None, - expected_result=False, - ), - # create-table on root - PermConfigTestCase( - config={"permissions": {"create-table": {"id": "user"}}}, - actor={"id": "user"}, - action="create-table", - resource=None, - expected_result=True, - ), - # create-table on database - no resource specified - PermConfigTestCase( - config={ - "databases": { - "perms_ds_one": {"permissions": {"create-table": {"id": "user"}}} - } - }, - actor={"id": "user"}, - action="create-table", - resource=None, - expected_result=False, - ), - # create-table on database - PermConfigTestCase( - config={ - "databases": { - "perms_ds_one": {"permissions": {"create-table": {"id": "user"}}} - } - }, - actor={"id": "user"}, - action="create-table", - resource="perms_ds_one", - expected_result=True, - ), - # insert-row on root, wrong actor - PermConfigTestCase( - config={"permissions": {"insert-row": {"id": "user"}}}, - actor={"id": "user2"}, - action="insert-row", - resource=("perms_ds_one", "t1"), - expected_result=False, - ), - # insert-row on root, right actor - PermConfigTestCase( - config={"permissions": {"insert-row": {"id": "user"}}}, - actor={"id": "user"}, - action="insert-row", - resource=("perms_ds_one", "t1"), - expected_result=True, - ), - # insert-row on database - PermConfigTestCase( - config={ - "databases": { - "perms_ds_one": {"permissions": {"insert-row": {"id": "user"}}} - } - }, - actor={"id": "user"}, - action="insert-row", - resource="perms_ds_one", - expected_result=True, - ), - # insert-row on table, wrong table - PermConfigTestCase( - config={ - "databases": { - "perms_ds_one": { - "tables": { - "t1": {"permissions": {"insert-row": {"id": "user"}}} - } - } - } - }, - actor={"id": "user"}, - action="insert-row", - resource=("perms_ds_one", "t2"), - expected_result=False, - ), - # insert-row on table, right table - PermConfigTestCase( - config={ - "databases": { - "perms_ds_one": { - "tables": { - "t1": {"permissions": {"insert-row": {"id": "user"}}} - } - } - } - }, - actor={"id": "user"}, - action="insert-row", - resource=("perms_ds_one", "t1"), - expected_result=True, - ), - # view-query on canned query, wrong actor - PermConfigTestCase( - config={ - "databases": { - "perms_ds_one": { - "queries": { - "q1": { - "sql": "select 1 + 1", - "permissions": {"view-query": {"id": "user"}}, - } - } - } - } - }, - actor={"id": "user2"}, - action="view-query", - resource=("perms_ds_one", "q1"), - expected_result=False, - ), - # view-query on canned query, right actor - PermConfigTestCase( - config={ - "databases": { - "perms_ds_one": { - "queries": { - "q1": { - "sql": "select 1 + 1", - "permissions": {"view-query": {"id": "user"}}, - } - } - } - } - }, - actor={"id": "user"}, - action="view-query", - resource=("perms_ds_one", "q1"), - expected_result=True, - ), - ), -) -async def test_permissions_in_config( - perms_ds, config, actor, action, resource, expected_result -): - previous_config = perms_ds.config - updated_config = copy.deepcopy(previous_config) - updated_config.update(config) - perms_ds.config = updated_config - try: - # Convert old-style resource to Resource object - from datasette.resources import DatabaseResource, TableResource - - resource_obj = None - if resource: - if isinstance(resource, str): - resource_obj = DatabaseResource(database=resource) - elif isinstance(resource, tuple) and len(resource) == 2: - resource_obj = TableResource(database=resource[0], table=resource[1]) - - result = await perms_ds.allowed( - action=action, resource=resource_obj, actor=actor - ) - if result != expected_result: - pprint(perms_ds._permission_checks) - assert result == expected_result - finally: - perms_ds.config = previous_config - - -@pytest.mark.asyncio -async def test_actor_endpoint_allows_any_token(): - ds = Datasette() - token = ds.sign( - { - "a": "root", - "token": "dstok", - "t": int(time.time()), - "_r": {"a": ["debug-menu"]}, - }, - namespace="token", - ) - response = await ds.client.get( - "/-/actor.json", headers={"Authorization": f"Bearer dstok_{token}"} - ) - assert response.status_code == 200 - assert response.json()["actor"] == { - "id": "root", - "token": "dstok", - "_r": {"a": ["debug-menu"]}, - } - - -@pytest.mark.serial -@pytest.mark.parametrize( - "options,expected", - ( - ([], {"id": "root", "token": "dstok"}), - ( - ["--all", "debug-menu"], - {"_r": {"a": ["dm"]}, "id": "root", "token": "dstok"}, - ), - ( - ["-a", "debug-menu", "--all", "create-table"], - {"_r": {"a": ["dm", "ct"]}, "id": "root", "token": "dstok"}, - ), - ( - ["-r", "db1", "t1", "insert-row"], - {"_r": {"r": {"db1": {"t1": ["ir"]}}}, "id": "root", "token": "dstok"}, - ), - ( - ["-d", "db1", "create-table"], - {"_r": {"d": {"db1": ["ct"]}}, "id": "root", "token": "dstok"}, - ), - # And one with all of them multiple times using all the names - ( - [ - "-a", - "debug-menu", - "--all", - "create-table", - "-r", - "db1", - "t1", - "insert-row", - "--resource", - "db1", - "t2", - "update-row", - "-d", - "db1", - "create-table", - "--database", - "db2", - "drop-table", - ], - { - "_r": { - "a": ["dm", "ct"], - "d": {"db1": ["ct"], "db2": ["dt"]}, - "r": {"db1": {"t1": ["ir"], "t2": ["ur"]}}, - }, - "id": "root", - "token": "dstok", - }, - ), - ), -) -def test_cli_create_token(options, expected): - runner = CliRunner() - result1 = runner.invoke( - cli, - [ - "create-token", - "--secret", - "sekrit", - "root", - ] - + options, - ) - token = result1.output.strip() - result2 = runner.invoke( - cli, - [ - "serve", - "--secret", - "sekrit", - "--get", - "/-/actor.json", - "--token", - token, - ], - ) - assert 0 == result2.exit_code, result2.output - assert json.loads(result2.output) == {"actor": expected} - - -_visible_tables_re = re.compile(r">\/((\w+)\/(\w+))\.json<\/a> - Get rows for") - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "is_logged_in,config,expected_visible_tables", - ( - # Unprotected instance logged out user sees everything: - ( - False, - None, - ["perms_ds_one/t1", "perms_ds_one/t2", "perms_ds_two/t1"], - ), - # Fully protected instance logged out user sees nothing - (False, {"allow": {"id": "user"}}, None), - # User with visibility of just perms_ds_one sees both tables there - ( - True, - { - "databases": { - "perms_ds_one": {"allow": {"id": "user"}}, - "perms_ds_two": {"allow": False}, - } - }, - ["perms_ds_one/t1", "perms_ds_one/t2"], - ), - # User with visibility of only table perms_ds_one/t1 sees just that one - ( - True, - { - "databases": { - "perms_ds_one": { - "allow": {"id": "user"}, - "tables": {"t2": {"allow": False}}, - }, - "perms_ds_two": {"allow": False}, - } - }, - ["perms_ds_one/t1"], - ), - ), -) -async def test_api_explorer_visibility( - perms_ds, is_logged_in, config, expected_visible_tables -): - try: - prev_config = perms_ds.config - perms_ds.config = config or {} - cookies = {} - if is_logged_in: - cookies = {"ds_actor": perms_ds.client.actor_cookie({"id": "user"})} - response = await perms_ds.client.get("/-/api", cookies=cookies) - if expected_visible_tables: - assert response.status_code == 200 - # Search HTML for stuff matching: - # '>/perms_ds_one/t2.json - Get rows for' - visible_tables = [ - match[0] for match in _visible_tables_re.findall(response.text) - ] - assert visible_tables == expected_visible_tables - else: - assert response.status_code == 403 - finally: - perms_ds.config = prev_config - - -@pytest.mark.asyncio -async def test_view_table_token_cannot_gain_access_without_base_permission(perms_ds): - # Only allow a different actor to view this table - previous_config = perms_ds.config - perms_ds.config = { - "databases": { - "perms_ds_two": { - # Only someone-else can see anything in this database - "allow": {"id": "someone-else"}, - } - } - } - try: - actor = { - "id": "restricted-token", - "token": "dstok", - # Restricted token claims access to perms_ds_two/t1 only - "_r": {"r": {"perms_ds_two": {"t1": ["vt"]}}}, - } - cookies = {"ds_actor": perms_ds.client.actor_cookie(actor)} - response = await perms_ds.client.get("/perms_ds_two/t1.json", cookies=cookies) - assert response.status_code == 403 - finally: - perms_ds.config = previous_config - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "restrictions,verb,path,body,expected_status", - ( - # No restrictions - (None, "get", "/.json", None, 200), - (None, "get", "/perms_ds_one.json", None, 200), - (None, "get", "/perms_ds_one/t1.json", None, 200), - (None, "get", "/perms_ds_one/t1/1.json", None, 200), - (None, "get", "/perms_ds_one/v1.json", None, 200), - # Restricted to just view-instance - ({"a": ["vi"]}, "get", "/.json", None, 200), - ({"a": ["vi"]}, "get", "/perms_ds_one.json", None, 403), - ({"a": ["vi"]}, "get", "/perms_ds_one/t1.json", None, 403), - ({"a": ["vi"]}, "get", "/perms_ds_one/t1/1.json", None, 403), - ({"a": ["vi"]}, "get", "/perms_ds_one/v1.json", None, 403), - # Restricted to just view-database - ( - {"a": ["vd"]}, - "get", - "/.json", - None, - 403, - ), # Cannot see instance (no upward cascading) - ({"a": ["vd"]}, "get", "/perms_ds_one.json", None, 200), - ({"a": ["vd"]}, "get", "/perms_ds_one/t1.json", None, 403), - ({"a": ["vd"]}, "get", "/perms_ds_one/t1/1.json", None, 403), - ({"a": ["vd"]}, "get", "/perms_ds_one/v1.json", None, 403), - # Restricted to just view-table for specific database - ( - {"d": {"perms_ds_one": ["vt"]}}, - "get", - "/.json", - None, - 403, - ), # Cannot see instance (no upward cascading) - ( - {"d": {"perms_ds_one": ["vt"]}}, - "get", - "/perms_ds_one.json", - None, - 403, - ), # Cannot see database page (no upward cascading) - ( - {"d": {"perms_ds_one": ["vt"]}}, - "get", - "/perms_ds_two.json", - None, - 403, - ), # But not this one - ( - # Can see the table - {"d": {"perms_ds_one": ["vt"]}}, - "get", - "/perms_ds_one/t1.json", - None, - 200, - ), - ( - # And the view - {"d": {"perms_ds_one": ["vt"]}}, - "get", - "/perms_ds_one/v1.json", - None, - 200, - ), - # view-table access to a specific table - ( - {"r": {"perms_ds_one": {"t1": ["vt"]}}}, - "get", - "/.json", - None, - 403, - ), # Cannot see instance (no upward cascading) - ( - {"r": {"perms_ds_one": {"t1": ["vt"]}}}, - "get", - "/perms_ds_one.json", - None, - 403, - ), # Cannot see database page (no upward cascading) - ( - {"r": {"perms_ds_one": {"t1": ["vt"]}}}, - "get", - "/perms_ds_one/t1.json", - None, - 200, - ), - # But cannot see the other table - ( - {"r": {"perms_ds_one": {"t1": ["vt"]}}}, - "get", - "/perms_ds_one/t2.json", - None, - 403, - ), - # Or the view - ( - {"r": {"perms_ds_one": {"t1": ["vt"]}}}, - "get", - "/perms_ds_one/v1.json", - None, - 403, - ), - ), -) -async def test_actor_restrictions( - perms_ds, restrictions, verb, path, body, expected_status -): - actor = {"id": "user"} - if restrictions: - actor["_r"] = restrictions - method = getattr(perms_ds.client, verb) - kwargs = {"cookies": {"ds_actor": perms_ds.client.actor_cookie(actor)}} - if body: - kwargs["json"] = body - perms_ds._permission_checks.clear() - response = await method(path, **kwargs) - assert response.status_code == expected_status, json.dumps( - { - "verb": verb, - "path": path, - "body": body, - "restrictions": restrictions, - "expected_status": expected_status, - "response_status": response.status_code, - "checks": [ - { - "action": check.action, - "parent": check.parent, - "child": check.child, - "result": check.result, - } - for check in perms_ds._permission_checks - ], - }, - indent=2, - ) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "restrictions,action,resource,expected", - ( - # Exact match: view-instance restriction allows view-instance action - ({"a": ["view-instance"]}, "view-instance", None, True), - # No implication: view-table does NOT imply view-instance - ({"a": ["view-table"]}, "view-instance", None, False), - ({"a": ["view-database"]}, "view-instance", None, False), - # update-row does not imply view-instance - ({"a": ["update-row"]}, "view-instance", None, False), - # view-table on a resource does NOT imply view-instance - ({"r": {"db1": {"t1": ["view-table"]}}}, "view-instance", None, False), - # execute-sql on a database does NOT imply view-instance or view-database - ({"d": {"db1": ["es"]}}, "view-instance", None, False), - ({"d": {"db1": ["es"]}}, "view-database", "db1", False), - ({"d": {"db1": ["es"]}}, "view-database", "db2", False), - # But execute-sql abbreviation DOES allow execute-sql action on that database - ({"d": {"db1": ["es"]}}, "execute-sql", "db1", True), - # update-row on a resource does not imply view-instance - ({"r": {"db1": {"t1": ["update-row"]}}}, "view-instance", None, False), - # view-database on a database does NOT imply view-instance - ({"d": {"db1": ["view-database"]}}, "view-instance", None, False), - # But it DOES allow view-database on that specific database - ({"d": {"db1": ["view-database"]}}, "view-database", "db1", True), - # Having view-table on "a" allows access to any specific table - ({"a": ["view-table"]}, "view-table", ("dbname", "tablename"), True), - # Having view-table on a database allows access to tables in that database - ( - {"d": {"dbname": ["view-table"]}}, - "view-table", - ("dbname", "tablename"), - True, - ), - # But not if it's allowed on a different database - ( - {"d": {"dbname": ["view-table"]}}, - "view-table", - ("dbname2", "tablename"), - False, - ), - # Table-level restriction allows access to that specific table - ( - {"r": {"dbname": {"tablename": ["view-table"]}}}, - "view-table", - ("dbname", "tablename"), - True, - ), - # But not to a different table in the same database - ( - {"r": {"dbname": {"tablename": ["view-table"]}}}, - "view-table", - ("dbname", "other_table"), - False, - ), - ), -) -async def test_restrictions_allow_action(restrictions, action, resource, expected): - ds = Datasette() - await ds.invoke_startup() - actual = restrictions_allow_action(ds, restrictions, action, resource) - assert actual == expected - - -@pytest.mark.asyncio -async def test_actor_restrictions_filters_allowed_resources(perms_ds): - """Test that allowed_resources() respects actor restrictions - issue #2534""" - - # Actor restricted to just perms_ds_one/t1 - actor = {"id": "user", "_r": {"r": {"perms_ds_one": {"t1": ["vt"]}}}} - - # Should only return t1 - page = await perms_ds.allowed_resources("view-table", actor) - assert len(page.resources) == 1 - assert page.resources[0].parent == "perms_ds_one" - assert page.resources[0].child == "t1" - - # Database listing should be empty (no view-database permission) - db_page = await perms_ds.allowed_resources("view-database", actor) - assert len(db_page.resources) == 0 - - -@pytest.mark.asyncio -async def test_actor_restrictions_do_not_expand_allowed_resources(perms_ds): - """Restrictions cannot grant access not already allowed to the actor.""" - - previous_config = perms_ds.config - perms_ds.config = { - "databases": { - "perms_ds_one": { - "allow": {"id": "someone-else"}, - } - } - } - try: - actor = {"id": "user", "_r": {"r": {"perms_ds_one": {"t1": ["vt"]}}}} - - # Base actor is not allowed to see t1, so restrictions should not change that - page = await perms_ds.allowed_resources("view-table", actor) - assert len(page.resources) == 0 - - # And explicit permission checks should still deny - response = await perms_ds.client.get( - "/perms_ds_one/t1.json", - cookies={"ds_actor": perms_ds.client.actor_cookie(actor)}, - ) - assert response.status_code == 403 - finally: - perms_ds.config = previous_config - - -@pytest.mark.asyncio -async def test_actor_restrictions_database_level(perms_ds): - """Test database-level restrictions allow all tables in database - issue #2534""" - - actor = {"id": "user", "_r": {"d": {"perms_ds_one": ["vt"]}}} - - page = await perms_ds.allowed_resources("view-table", actor, parent="perms_ds_one") - - # Should return all tables in perms_ds_one - table_names = {r.child for r in page.resources} - assert "t1" in table_names - assert "t2" in table_names - assert "v1" in table_names # views too - - -@pytest.mark.asyncio -async def test_actor_restrictions_global_level(perms_ds): - """Test global-level restrictions allow all resources - issue #2534""" - - actor = {"id": "user", "_r": {"a": ["vt"]}} - - page = await perms_ds.allowed_resources("view-table", actor) - - # Should return all tables in all databases - assert len(page.resources) > 0 - dbs = {r.parent for r in page.resources} - assert "perms_ds_one" in dbs - assert "perms_ds_two" in dbs - - -@pytest.mark.asyncio -async def test_restrictions_gate_before_config(perms_ds): - """Test that restrictions act as gating filter before config permissions - issue #2534""" - from datasette.resources import TableResource - - # Actor restricted to just t1 (not t2) - actor = {"id": "user", "_r": {"r": {"perms_ds_one": {"t1": ["vt"]}}}} - - # Config doesn't matter - restrictions gate what's checked - # t2 is not in restriction allowlist, so should be DENIED - result = await perms_ds.allowed( - action="view-table", - resource=TableResource("perms_ds_one", "t2"), - actor=actor, - ) - assert result is False - - # t1 is in restrictions AND passes normal permission check - should be ALLOWED - result = await perms_ds.allowed( - action="view-table", - resource=TableResource("perms_ds_one", "t1"), - actor=actor, - ) - assert result is True - - -@pytest.mark.asyncio -async def test_actor_restrictions_json_endpoints_show_filtered_listings(perms_ds): - """Test that /.json and /db.json show correct filtered listings - issue #2534""" - - actor = {"id": "user", "_r": {"r": {"perms_ds_one": {"t1": ["vt"]}}}} - cookies = {"ds_actor": perms_ds.client.actor_cookie(actor)} - - # /.json should be 403 (no view-instance permission) - response = await perms_ds.client.get("/.json", cookies=cookies) - assert response.status_code == 403 - - # /perms_ds_one.json should be 403 (no view-database permission) - response = await perms_ds.client.get("/perms_ds_one.json", cookies=cookies) - assert response.status_code == 403 - - # /perms_ds_one/t1.json should be 200 - response = await perms_ds.client.get("/perms_ds_one/t1.json", cookies=cookies) - assert response.status_code == 200 - - -@pytest.mark.asyncio -async def test_actor_restrictions_view_instance_only(perms_ds): - """Test actor restricted to view-instance only - issue #2534""" - - actor = {"id": "user", "_r": {"a": ["vi"]}} - cookies = {"ds_actor": perms_ds.client.actor_cookie(actor)} - - # /.json should be 200 (has view-instance permission) - response = await perms_ds.client.get("/.json", cookies=cookies) - assert response.status_code == 200 - - # But no databases should be visible (no view-database permission) - data = response.json() - # The instance is visible but databases list should be empty or minimal - # Actually, let's check via allowed_resources - page = await perms_ds.allowed_resources("view-database", actor) - assert len(page.resources) == 0 - - -@pytest.mark.asyncio -async def test_actor_restrictions_empty_allowlist(perms_ds): - """Test actor with empty restrictions allowlist denies everything - issue #2534""" - - actor = {"id": "user", "_r": {}} - - # No actions in allowlist, so everything should be denied - page1 = await perms_ds.allowed_resources("view-table", actor) - assert len(page1.resources) == 0 - - page2 = await perms_ds.allowed_resources("view-database", actor) - assert len(page2.resources) == 0 - - result = await perms_ds.allowed(action="view-instance", actor=actor) - assert result is False - - -@pytest.mark.asyncio -async def test_actor_restrictions_cannot_be_overridden_by_config(): - """Test that config permissions cannot override actor restrictions - issue #2534""" - from datasette.app import Datasette - from datasette.resources import TableResource - - # Create datasette with config that allows user to access both t1 AND t2 - config = { - "databases": { - "test_db": { - "tables": { - "t1": {"allow": {"id": "user"}}, - "t2": {"allow": {"id": "user"}}, - } - } - } - } - - ds = Datasette(config=config) - await ds.invoke_startup() - db = ds.add_memory_database("test_db") - await db.execute_write("create table t1 (id integer primary key)") - await db.execute_write("create table t2 (id integer primary key)") - - # Actor restricted to ONLY t1 (not t2) - # Even though config allows t2, restrictions should deny it - actor = {"id": "user", "_r": {"r": {"test_db": {"t1": ["vt"]}}}} - - # t1 should be allowed (in restrictions AND config allows) - result = await ds.allowed( - action="view-table", resource=TableResource("test_db", "t1"), actor=actor - ) - assert result is True, "t1 should be allowed - in restriction allowlist" - - # t2 should be DENIED (not in restrictions, even though config allows) - result = await ds.allowed( - action="view-table", resource=TableResource("test_db", "t2"), actor=actor - ) - assert ( - result is False - ), "t2 should be denied - NOT in restriction allowlist, config cannot override" - - -@pytest.mark.asyncio -async def test_actor_restrictions_with_database_level_config(perms_ds): - """Test database-level restrictions with table-level config - issue #2534""" - from datasette.resources import TableResource - - # Config allows specific tables only - perms_ds._config = { - "databases": { - "perms_ds_one": { - "tables": { - "t1": {"allow": {"id": "user"}}, - "t2": {"allow": {"id": "user"}}, - } - } - } - } - - # Actor has database-level restriction (all tables in perms_ds_one) - # Should only access tables that pass BOTH restrictions AND config - actor = {"id": "user", "_r": {"d": {"perms_ds_one": ["vt"]}}} - - # t1 - in restrictions (all tables) AND config allows - result = await perms_ds.allowed( - action="view-table", resource=TableResource("perms_ds_one", "t1"), actor=actor - ) - assert result is True - - # t2 - in restrictions (all tables) AND config allows - result = await perms_ds.allowed( - action="view-table", resource=TableResource("perms_ds_one", "t2"), actor=actor - ) - assert result is True - - # v1 (view) - in restrictions (all tables) AND config doesn't mention it - # Since actor has database-level restriction allowing all tables, v1 is allowed - # Config is additive, not restrictive - it doesn't create implicit denies - result = await perms_ds.allowed( - action="view-table", resource=TableResource("perms_ds_one", "v1"), actor=actor - ) - assert result is True, "v1 should be allowed - actor has db-level restriction" - - # Clean up - perms_ds._config = None - - -@pytest.mark.asyncio -async def test_actor_restrictions_parent_deny_blocks_config_child_allow(perms_ds): - """ - Test that table-level restrictions add parent-level deny to block - other tables in the same database, even if config allows them - """ - from datasette.resources import TableResource - - # Config allows both t1 and t2 - perms_ds._config = { - "databases": { - "perms_ds_one": { - "tables": { - "t1": {"allow": {"id": "user"}}, - "t2": {"allow": {"id": "user"}}, - } - } - } - } - - # Restriction allows ONLY t1 in perms_ds_one - # This should add: - # - parent-level DENY for perms_ds_one (to block other tables) - # - child-level ALLOW for t1 - actor = {"id": "user", "_r": {"r": {"perms_ds_one": {"t1": ["vt"]}}}} - - # t1 should work (child-level allow beats parent-level deny) - result = await perms_ds.allowed( - action="view-table", resource=TableResource("perms_ds_one", "t1"), actor=actor - ) - assert result is True - - # t2 should be DENIED by parent-level deny from restrictions - # even though config has child-level allow - # Because restrictions should run first - result = await perms_ds.allowed( - action="view-table", resource=TableResource("perms_ds_one", "t2"), actor=actor - ) - assert ( - result is False - ), "t2 should be denied - restriction parent deny should beat config child allow" - - # Clean up - perms_ds._config = None - - -@pytest.mark.asyncio -async def test_permission_check_view_requires_debug_permission(): - """Test that /-/check requires permissions-debug permission""" - # Anonymous user should be denied - ds = Datasette() - response = await ds.client.get("/-/check.json?action=view-instance") - assert response.status_code == 403 - assert "permissions-debug" in response.text - - # User without permissions-debug should be denied - response = await ds.client.get( - "/-/check.json?action=view-instance", - cookies={"ds_actor": ds.sign({"id": "user"}, "actor")}, - ) - assert response.status_code == 403 - - # Root user should have access (root has all permissions) - ds_with_root = Datasette() - ds_with_root.root_enabled = True - root_token = ds_with_root.create_token("root") - response = await ds_with_root.client.get( - "/-/check.json?action=view-instance", - headers={"Authorization": f"Bearer {root_token}"}, - ) - assert response.status_code == 200 - data = response.json() - assert data["action"] == "view-instance" - assert data["allowed"] is True - - -@pytest.mark.asyncio -async def test_root_allow_block_with_table_restricted_actor(): - """ - Test that root-level allow: blocks are processed for actors with - table-level restrictions. - - This covers the case in config.py is_in_restriction_allowlist() where - parent=None, child=None and actor has table restrictions but not global. - """ - from datasette.resources import TableResource - - # Config with root-level allow block that denies non-admin users - ds = Datasette( - config={ - "allow": {"id": "admin"}, # Root-level allow block - } - ) - await ds.invoke_startup() - db = ds.add_memory_database("mydb") - await db.execute_write("create table t1 (id integer primary key)") - await ds.client.get("/") # Trigger catalog refresh - - # Actor with table-level restrictions only (not global) - actor = {"id": "user", "_r": {"r": {"mydb": {"t1": ["view-table"]}}}} - - # The root-level allow: {id: admin} should be processed and deny this user - # because they're not "admin", even though they have table restrictions - result = await ds.allowed( - action="view-table", - resource=TableResource("mydb", "t1"), - actor=actor, - ) - # Should be False because root allow: {id: admin} denies non-admin users - assert result is False - - # But admin with same restrictions should be allowed - admin_actor = {"id": "admin", "_r": {"r": {"mydb": {"t1": ["view-table"]}}}} - result = await ds.allowed( - action="view-table", - resource=TableResource("mydb", "t1"), - actor=admin_actor, - ) - assert result is True diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 42995c0d..c89a466d 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1,190 +1,96 @@ from bs4 import BeautifulSoup as Soup -from .fixtures import ( - make_app_client, - TABLES, - TEMP_PLUGIN_SECRET_FILE, - PLUGINS_DIR, - TestClient as _TestClient, -) # noqa -from click.testing import CliRunner -from datasette.app import Datasette -from datasette import cli, hookimpl -from datasette.filters import FilterArguments -from datasette.plugins import get_plugins, DEFAULT_PLUGINS, pm -from datasette.permissions import PermissionSQL, Action -from datasette.resources import DatabaseResource -from datasette.utils.sqlite import sqlite3 -from datasette.utils import StartupError, await_me_maybe -from jinja2 import ChoiceLoader, FileSystemLoader +from .fixtures import app_client, make_app_client, TEMP_PLUGIN_SECRET_FILE # noqa import base64 -import datetime -import importlib import json import os import pathlib import re -import textwrap import pytest import urllib -at_memory_re = re.compile(r" at 0x\w+") - -@pytest.mark.parametrize( - "plugin_hook", [name for name in dir(pm.hook) if not name.startswith("_")] -) -def test_plugin_hooks_have_tests(plugin_hook): - """Every plugin hook should be referenced in this test module""" - tests_in_this_module = [t for t in globals().keys() if t.startswith("test_hook_")] - ok = False - for test in tests_in_this_module: - if plugin_hook in test: - ok = True - assert ok, f"Plugin hook is missing tests: {plugin_hook}" - - -@pytest.mark.asyncio -async def test_hook_plugins_dir_plugin_prepare_connection(ds_client): - response = await ds_client.get( - "/fixtures/-/query.json?_shape=arrayfirst&sql=select+convert_units(100%2C+'m'%2C+'ft')" +def test_plugins_dir_plugin(app_client): + response = app_client.get( + "/fixtures.json?sql=select+convert_units(100%2C+'m'%2C+'ft')" ) - assert response.json()[0] == pytest.approx(328.0839) + assert pytest.approx(328.0839) == response.json["rows"][0][0] -@pytest.mark.asyncio -async def test_hook_plugin_prepare_connection_arguments(ds_client): - response = await ds_client.get( - "/fixtures/-/query.json?sql=select+prepare_connection_args()&_shape=arrayfirst" - ) - assert [ - "database=fixtures, datasette.plugin_config(\"name-of-plugin\")={'depth': 'root'}" - ] == response.json() - - # Function should not be available on the internal database - db = ds_client.ds.get_internal_database() - with pytest.raises(sqlite3.OperationalError): - await db.execute("select prepare_connection_args()") - - -@pytest.mark.asyncio @pytest.mark.parametrize( "path,expected_decoded_object", [ + ("/", {"template": "index.html", "database": None, "table": None}), ( - "/", - { - "template": "index.html", - "database": None, - "table": None, - "view_name": "index", - "request_path": "/", - "added": 15, - "columns": None, - }, - ), - ( - "/fixtures", - { - "template": "database.html", - "database": "fixtures", - "table": None, - "view_name": "database", - "request_path": "/fixtures", - "added": 15, - "columns": None, - }, + "/fixtures/", + {"template": "database.html", "database": "fixtures", "table": None}, ), ( "/fixtures/sortable", - { - "template": "table.html", - "database": "fixtures", - "table": "sortable", - "view_name": "table", - "request_path": "/fixtures/sortable", - "added": 15, - "columns": [ - "pk1", - "pk2", - "content", - "sortable", - "sortable_with_nulls", - "sortable_with_nulls_2", - "text", - ], - }, + {"template": "table.html", "database": "fixtures", "table": "sortable"}, ), ], ) -async def test_hook_extra_css_urls(ds_client, path, expected_decoded_object): - response = await ds_client.get(path) - assert response.status_code == 200 - links = Soup(response.text, "html.parser").find_all("link") +def test_plugin_extra_css_urls(app_client, path, expected_decoded_object): + response = app_client.get(path) + links = Soup(response.body, "html.parser").findAll("link") special_href = [ - link - for link in links - if link.attrs["href"].endswith("/extra-css-urls-demo.css") + l for l in links if l.attrs["href"].endswith("/extra-css-urls-demo.css") ][0]["href"] # This link has a base64-encoded JSON blob in it encoded = special_href.split("/")[3] - actual_decoded_object = json.loads(base64.b64decode(encoded).decode("utf8")) - assert expected_decoded_object == actual_decoded_object + assert expected_decoded_object == json.loads( + base64.b64decode(encoded).decode("utf8") + ) -@pytest.mark.asyncio -async def test_hook_extra_js_urls(ds_client): - response = await ds_client.get("/") - scripts = Soup(response.text, "html.parser").find_all("script") - script_attrs = [s.attrs for s in scripts] - for attrs in [ - { +def test_plugin_extra_js_urls(app_client): + response = app_client.get("/") + scripts = Soup(response.body, "html.parser").findAll("script") + assert [ + s + for s in scripts + if s.attrs + == { "integrity": "SRIHASH", "crossorigin": "anonymous", - "src": "https://plugin-example.datasette.io/jquery.js", - }, - { - "src": "https://plugin-example.datasette.io/plugin.module.js", - "type": "module", - }, - ]: - assert any(s == attrs for s in script_attrs), "Expected: {}".format(attrs) + "src": "https://example.com/jquery.js", + } + ] -@pytest.mark.asyncio -async def test_plugins_with_duplicate_js_urls(ds_client): +def test_plugins_with_duplicate_js_urls(app_client): # If two plugins both require jQuery, jQuery should be loaded only once - response = await ds_client.get("/fixtures") + response = app_client.get("/fixtures") # This test is a little tricky, as if the user has any other plugins in # their current virtual environment those may affect what comes back too. - # What matters is that https://plugin-example.datasette.io/jquery.js is only there once + # What matters is that https://example.com/jquery.js is only there once # and it comes before plugin1.js and plugin2.js which could be in either # order - scripts = Soup(response.text, "html.parser").find_all("script") + scripts = Soup(response.body, "html.parser").findAll("script") srcs = [s["src"] for s in scripts if s.get("src")] # No duplicates allowed: assert len(srcs) == len(set(srcs)) # jquery.js loaded once: - assert 1 == srcs.count("https://plugin-example.datasette.io/jquery.js") + assert 1 == srcs.count("https://example.com/jquery.js") # plugin1.js and plugin2.js are both there: - assert 1 == srcs.count("https://plugin-example.datasette.io/plugin1.js") - assert 1 == srcs.count("https://plugin-example.datasette.io/plugin2.js") + assert 1 == srcs.count("https://example.com/plugin1.js") + assert 1 == srcs.count("https://example.com/plugin2.js") # jquery comes before them both - assert srcs.index("https://plugin-example.datasette.io/jquery.js") < srcs.index( - "https://plugin-example.datasette.io/plugin1.js" + assert srcs.index("https://example.com/jquery.js") < srcs.index( + "https://example.com/plugin1.js" ) - assert srcs.index("https://plugin-example.datasette.io/jquery.js") < srcs.index( - "https://plugin-example.datasette.io/plugin2.js" + assert srcs.index("https://example.com/jquery.js") < srcs.index( + "https://example.com/plugin2.js" ) -@pytest.mark.asyncio -async def test_hook_render_cell_link_from_json(ds_client): +def test_plugins_render_cell_link_from_json(app_client): sql = """ select '{"href": "http://example.com/", "label":"Example"}' """.strip() - path = "/fixtures/-/query?" + urllib.parse.urlencode({"sql": sql}) - response = await ds_client.get(path) - td = Soup(response.text, "html.parser").find("table").find("tbody").find("td") + path = "/fixtures?" + urllib.parse.urlencode({"sql": sql}) + response = app_client.get(path) + td = Soup(response.body, "html.parser").find("table").find("tbody").find("td") a = td.find("a") assert a is not None, str(a) assert a.attrs["href"] == "http://example.com/" @@ -192,83 +98,52 @@ async def test_hook_render_cell_link_from_json(ds_client): assert a.text == "Example" -@pytest.mark.asyncio -async def test_hook_render_cell_demo(ds_client): - response = await ds_client.get( - "/fixtures/simple_primary_key?id=4&_render_cell_extra=1" - ) - soup = Soup(response.text, "html.parser") +def test_plugins_render_cell_demo(app_client): + response = app_client.get("/fixtures/simple_primary_key?id=4") + soup = Soup(response.body, "html.parser") td = soup.find("td", {"class": "col-content"}) - assert json.loads(td.string) == { - "row": {"id": 4, "content": "RENDER_CELL_DEMO"}, + assert { "column": "content", "table": "simple_primary_key", "database": "fixtures", "config": {"depth": "table", "special": "this-is-simple_primary_key"}, - "render_cell_extra": 1, - } + } == json.loads(td.string) -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path", - ( - "/fixtures/-/query?sql=select+'RENDER_CELL_ASYNC'", - "/fixtures/simple_primary_key", - ), -) -async def test_hook_render_cell_async(ds_client, path): - response = await ds_client.get(path) - assert b"RENDER_CELL_ASYNC_RESULT" in response.content - - -@pytest.mark.asyncio -async def test_plugin_config(ds_client): - assert {"depth": "table"} == ds_client.ds.plugin_config( +def test_plugin_config(app_client): + assert {"depth": "table"} == app_client.ds.plugin_config( "name-of-plugin", database="fixtures", table="sortable" ) - assert {"depth": "database"} == ds_client.ds.plugin_config( + assert {"depth": "database"} == app_client.ds.plugin_config( "name-of-plugin", database="fixtures", table="unknown_table" ) - assert {"depth": "database"} == ds_client.ds.plugin_config( + assert {"depth": "database"} == app_client.ds.plugin_config( "name-of-plugin", database="fixtures" ) - assert {"depth": "root"} == ds_client.ds.plugin_config( + assert {"depth": "root"} == app_client.ds.plugin_config( "name-of-plugin", database="unknown_database" ) - assert {"depth": "root"} == ds_client.ds.plugin_config("name-of-plugin") - assert None is ds_client.ds.plugin_config("unknown-plugin") + assert {"depth": "root"} == app_client.ds.plugin_config("name-of-plugin") + assert None is app_client.ds.plugin_config("unknown-plugin") -@pytest.mark.asyncio -async def test_plugin_config_env(ds_client, monkeypatch): - monkeypatch.setenv("FOO_ENV", "FROM_ENVIRONMENT") - assert ds_client.ds.plugin_config("env-plugin") == {"foo": "FROM_ENVIRONMENT"} - - -@pytest.mark.asyncio -async def test_plugin_config_env_from_config(monkeypatch): - monkeypatch.setenv("FOO_ENV", "FROM_ENVIRONMENT_2") - datasette = Datasette( - config={"plugins": {"env-plugin": {"setting": {"$env": "FOO_ENV"}}}} - ) - assert datasette.plugin_config("env-plugin") == {"setting": "FROM_ENVIRONMENT_2"} - - -@pytest.mark.asyncio -async def test_plugin_config_env_from_list(ds_client): +def test_plugin_config_env(app_client): os.environ["FOO_ENV"] = "FROM_ENVIRONMENT" - assert [{"in_a_list": "FROM_ENVIRONMENT"}] == ds_client.ds.plugin_config( - "env-plugin-list" - ) + assert {"foo": "FROM_ENVIRONMENT"} == app_client.ds.plugin_config("env-plugin") + # Ensure secrets aren't visible in /-/metadata.json + metadata = app_client.get("/-/metadata.json") + assert {"foo": {"$env": "FOO_ENV"}} == metadata.json["plugins"]["env-plugin"] del os.environ["FOO_ENV"] -@pytest.mark.asyncio -async def test_plugin_config_file(ds_client): - with open(TEMP_PLUGIN_SECRET_FILE, "w") as fp: - fp.write("FROM_FILE") - assert {"foo": "FROM_FILE"} == ds_client.ds.plugin_config("file-plugin") +def test_plugin_config_file(app_client): + open(TEMP_PLUGIN_SECRET_FILE, "w").write("FROM_FILE") + assert {"foo": "FROM_FILE"} == app_client.ds.plugin_config("file-plugin") + # Ensure secrets aren't visible in /-/metadata.json + metadata = app_client.get("/-/metadata.json") + assert {"foo": {"$file": TEMP_PLUGIN_SECRET_FILE}} == metadata.json["plugins"][ + "file-plugin" + ] os.remove(TEMP_PLUGIN_SECRET_FILE) @@ -282,23 +157,15 @@ async def test_plugin_config_file(ds_client): "database": None, "table": None, "config": {"depth": "root"}, - "view_name": "index", - "request_path": "/", - "added": 15, - "columns": None, }, ), ( - "/fixtures", + "/fixtures/", { "template": "database.html", "database": "fixtures", "table": None, "config": {"depth": "database"}, - "view_name": "database", - "request_path": "/fixtures", - "added": 15, - "columns": None, }, ), ( @@ -308,1529 +175,72 @@ async def test_plugin_config_file(ds_client): "database": "fixtures", "table": "sortable", "config": {"depth": "table"}, - "view_name": "table", - "request_path": "/fixtures/sortable", - "added": 15, - "columns": [ - "pk1", - "pk2", - "content", - "sortable", - "sortable_with_nulls", - "sortable_with_nulls_2", - "text", - ], }, ), ], ) -def test_hook_extra_body_script(app_client, path, expected_extra_body_script): - r = re.compile(r"") - response = app_client.get(path) - assert response.status_code == 200, response.text - match = r.search(response.text) - assert match is not None, "No extra_body_script found in HTML" - json_data = match.group(1) +def test_plugins_extra_body_script(app_client, path, expected_extra_body_script): + r = re.compile(r"") + json_data = r.search(app_client.get(path).body.decode("utf8")).group(1) actual_data = json.loads(json_data) assert expected_extra_body_script == actual_data -@pytest.mark.asyncio -async def test_hook_asgi_wrapper(ds_client): - response = await ds_client.get("/fixtures") - assert "fixtures" == response.headers["x-databases"] +def test_plugins_asgi_wrapper(app_client): + response = app_client.get("/fixtures") + assert "fixtures, special" == response.headers["x-databases"] -def test_hook_extra_template_vars(restore_working_directory): - with make_app_client( +def test_plugins_extra_template_vars(restore_working_directory): + for client in make_app_client( template_dir=str(pathlib.Path(__file__).parent / "test_templates") - ) as client: - response = client.get("/-/versions") - assert response.status_code == 200 + ): + response = client.get("/-/metadata") + assert response.status == 200 extra_template_vars = json.loads( - Soup(response.text, "html.parser").select("pre.extra_template_vars")[0].text + Soup(response.body, "html.parser").select("pre.extra_template_vars")[0].text ) assert { "template": "show_json.html", - "scope_path": "/-/versions", - "columns": None, + "scope_path": "/-/metadata", + "extra_serve_options": {}, } == extra_template_vars extra_template_vars_from_awaitable = json.loads( - Soup(response.text, "html.parser") + Soup(response.body, "html.parser") .select("pre.extra_template_vars_from_awaitable")[0] .text ) assert { "template": "show_json.html", "awaitable": True, - "scope_path": "/-/versions", + "scope_path": "/-/metadata", } == extra_template_vars_from_awaitable -def test_plugins_async_template_function(restore_working_directory): - with make_app_client( - template_dir=str(pathlib.Path(__file__).parent / "test_templates") - ) as client: - response = client.get("/-/versions") - assert response.status_code == 200 - extra_from_awaitable_function = ( - Soup(response.text, "html.parser") - .select("pre.extra_from_awaitable_function")[0] - .text - ) - expected = ( - sqlite3.connect(":memory:").execute("select sqlite_version()").fetchone()[0] - ) - assert expected == extra_from_awaitable_function - - -def test_default_plugins_have_no_templates_path_or_static_path(): - # The default plugins that ship with Datasette should have their static_path and - # templates_path all set to None - plugins = get_plugins() - for plugin in plugins: - if plugin["name"] in DEFAULT_PLUGINS: - assert None is plugin["static_path"] - assert None is plugin["templates_path"] - - -@pytest.fixture(scope="session") -def view_names_client(tmp_path_factory): - tmpdir = tmp_path_factory.mktemp("test-view-names") - templates = tmpdir / "templates" - templates.mkdir() - plugins = tmpdir / "plugins" - plugins.mkdir() - for template in ( - "index.html", - "database.html", - "table.html", - "row.html", - "show_json.html", - "query.html", +def test_extra_serve_options_available_on_datasette(restore_working_directory): + for client in make_app_client( + template_dir=str(pathlib.Path(__file__).parent / "test_templates"), + extra_serve_options={"foo": "bar"}, ): - (templates / template).write_text("view_name:{{ view_name }}", "utf-8") - (plugins / "extra_vars.py").write_text( - textwrap.dedent( - """ - from datasette import hookimpl - @hookimpl - def extra_template_vars(view_name): - return {"view_name": view_name} - """ - ), - "utf-8", - ) - db_path = str(tmpdir / "fixtures.db") - conn = sqlite3.connect(db_path) - conn.executescript(TABLES) - return _TestClient( - Datasette([db_path], template_dir=str(templates), plugins_dir=str(plugins)) - ) + response = client.get("/-/metadata") + assert response.status == 200 + extra_template_vars = json.loads( + Soup(response.body, "html.parser").select("pre.extra_template_vars")[0].text + ) + assert {"foo": "bar"} == extra_template_vars["extra_serve_options"] -@pytest.mark.parametrize( - "path,view_name", - ( - ("/", "index"), - ("/fixtures", "database"), - ("/fixtures/facetable", "table"), - ("/fixtures/facetable/1", "row"), - ("/-/versions", "json_data"), - ("/fixtures/-/query?sql=select+1", "database"), - ), -) -def test_view_names(view_names_client, path, view_name): - response = view_names_client.get(path) - assert response.status_code == 200 - assert f"view_name:{view_name}" == response.text - - -@pytest.mark.asyncio -async def test_hook_register_output_renderer_no_parameters(ds_client): - response = await ds_client.get("/fixtures/facetable.testnone") - assert response.status_code == 200 - assert b"Hello" == response.content - - -@pytest.mark.asyncio -async def test_hook_register_output_renderer_all_parameters(ds_client): - response = await ds_client.get("/fixtures/facetable.testall") - assert response.status_code == 200 - # Lots of 'at 0x103a4a690' in here - replace those so we can do - # an easy comparison - body = at_memory_re.sub(" at 0xXXX", response.text) - assert json.loads(body) == { - "datasette": "", - "columns": [ - "pk", - "created", - "planet_int", - "on_earth", - "state", - "_city_id", - "_neighborhood", - "tags", - "complex_array", - "distinct_some_null", - "n", - ], - "rows": [ - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ], - "sql": "select pk, created, planet_int, on_earth, state, _city_id, _neighborhood, tags, complex_array, distinct_some_null, n from facetable order by pk limit 51", - "query_name": None, - "database": "fixtures", - "table": "facetable", - "request": '', - "view_name": "table", - "1+1": 2, - } - - -@pytest.mark.asyncio -async def test_hook_register_output_renderer_custom_status_code(ds_client): - response = await ds_client.get( - "/fixtures/pragma_cache_size.testall?status_code=202" - ) - assert response.status_code == 202 - - -@pytest.mark.asyncio -async def test_hook_register_output_renderer_custom_content_type(ds_client): - response = await ds_client.get( - "/fixtures/pragma_cache_size.testall?content_type=text/blah" - ) - assert "text/blah" == response.headers["content-type"] - - -@pytest.mark.asyncio -async def test_hook_register_output_renderer_custom_headers(ds_client): - response = await ds_client.get( - "/fixtures/pragma_cache_size.testall?header=x-wow:1&header=x-gosh:2" - ) - assert "1" == response.headers["x-wow"] - assert "2" == response.headers["x-gosh"] - - -@pytest.mark.asyncio -async def test_hook_register_output_renderer_returning_response(ds_client): - response = await ds_client.get("/fixtures/facetable.testresponse") - assert response.status_code == 200 - assert response.json() == {"this_is": "json"} - - -@pytest.mark.asyncio -async def test_hook_register_output_renderer_returning_broken_value(ds_client): - response = await ds_client.get("/fixtures/facetable.testresponse?_broken=1") - assert response.status_code == 500 - assert "this should break should be dict or Response" in response.text - - -@pytest.mark.asyncio -async def test_hook_register_output_renderer_can_render(ds_client): - response = await ds_client.get("/fixtures/facetable?_no_can_render=1") - assert response.status_code == 200 - links = ( - Soup(response.text, "html.parser") - .find("p", {"class": "export-links"}) - .find_all("a") - ) - actual = [link["href"] for link in links] - # Should not be present because we sent ?_no_can_render=1 - assert "/fixtures/facetable.testall?_labels=on" not in actual - # Check that it was passed the values we expected - assert hasattr(ds_client.ds, "_can_render_saw") +def test_plugins_available_databases(app_client): + response = app_client.get("/-/databases.json") + assert 200 == response.status assert { - "datasette": ds_client.ds, - "columns": [ - "pk", - "created", - "planet_int", - "on_earth", - "state", - "_city_id", - "_neighborhood", - "tags", - "complex_array", - "distinct_some_null", - "n", - ], - "sql": "select pk, created, planet_int, on_earth, state, _city_id, _neighborhood, tags, complex_array, distinct_some_null, n from facetable order by pk limit 51", - "query_name": None, - "database": "fixtures", - "table": "facetable", - "view_name": "table", - }.items() <= ds_client.ds._can_render_saw.items() - - -@pytest.mark.asyncio -async def test_hook_prepare_jinja2_environment(ds_client): - ds_client.ds._HELLO = "HI" - await ds_client.ds.invoke_startup() - environment = ds_client.ds.get_jinja_environment(None) - template = environment.from_string( - "Hello there, {{ a|format_numeric }}, {{ a|to_hello }}, {{ b|select_times_three }}", - {"a": 3412341, "b": 5}, - ) - rendered = await ds_client.ds.render_template(template) - assert "Hello there, 3,412,341, HI, 15" == rendered - - -def test_hook_publish_subcommand(): - # This is hard to test properly, because publish subcommand plugins - # cannot be loaded using the --plugins-dir mechanism - they need - # to be installed using "pip install". So I'm cheating and taking - # advantage of the fact that cloudrun/heroku use the plugin hook - # to register themselves as default plugins. - assert ["cloudrun", "heroku"] == cli.publish.list_commands({}) - - -@pytest.mark.asyncio -async def test_hook_register_facet_classes(ds_client): - response = await ds_client.get( - "/fixtures/compound_three_primary_keys.json?_dummy_facet=1&_extra=suggested_facets" - ) - assert response.json()["suggested_facets"] == [ - { - "name": "pk1", - "toggle_url": "http://localhost/fixtures/compound_three_primary_keys.json?_dummy_facet=1&_extra=suggested_facets&_facet_dummy=pk1", - "type": "dummy", - }, - { - "name": "pk2", - "toggle_url": "http://localhost/fixtures/compound_three_primary_keys.json?_dummy_facet=1&_extra=suggested_facets&_facet_dummy=pk2", - "type": "dummy", - }, - { - "name": "pk3", - "toggle_url": "http://localhost/fixtures/compound_three_primary_keys.json?_dummy_facet=1&_extra=suggested_facets&_facet_dummy=pk3", - "type": "dummy", - }, - { - "name": "content", - "toggle_url": "http://localhost/fixtures/compound_three_primary_keys.json?_dummy_facet=1&_extra=suggested_facets&_facet_dummy=content", - "type": "dummy", - }, - { - "name": "pk1", - "toggle_url": "http://localhost/fixtures/compound_three_primary_keys.json?_dummy_facet=1&_extra=suggested_facets&_facet=pk1", - }, - { - "name": "pk2", - "toggle_url": "http://localhost/fixtures/compound_three_primary_keys.json?_dummy_facet=1&_extra=suggested_facets&_facet=pk2", - }, - { - "name": "pk3", - "toggle_url": "http://localhost/fixtures/compound_three_primary_keys.json?_dummy_facet=1&_extra=suggested_facets&_facet=pk3", - }, - ] - - -@pytest.mark.asyncio -async def test_hook_actor_from_request(ds_client): - await ds_client.get("/") - # Should have no actor - assert ds_client.ds._last_request.scope["actor"] is None - await ds_client.get("/?_bot=1") - # Should have bot actor - assert ds_client.ds._last_request.scope["actor"] == {"id": "bot"} - - -@pytest.mark.asyncio -async def test_hook_actor_from_request_async(ds_client): - await ds_client.get("/") - # Should have no actor - assert ds_client.ds._last_request.scope["actor"] is None - await ds_client.get("/?_bot2=1") - # Should have bot2 actor - assert ds_client.ds._last_request.scope["actor"] == {"id": "bot2", "1+1": 2} - - -@pytest.mark.asyncio -async def test_existing_scope_actor_respected(ds_client): - await ds_client.get("/?_actor_in_scope=1") - assert ds_client.ds._last_request.scope["actor"] == {"id": "from-scope"} - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "action,expected", - [ - ("this_is_allowed", True), - ("this_is_denied", False), - ("this_is_allowed_async", True), - ("this_is_denied_async", False), - ], -) -async def test_hook_custom_allowed(action, expected): - # Test actions and permission logic are defined in tests/plugins/my_plugin.py - ds = Datasette(plugins_dir=PLUGINS_DIR) - await ds.invoke_startup() - actual = await ds.allowed(action=action, actor={"id": "actor"}) - assert expected == actual - - -@pytest.mark.asyncio -async def test_hook_permission_resources_sql(): - ds = Datasette() - await ds.invoke_startup() - - collected = [] - for block in ds.pm.hook.permission_resources_sql( - datasette=ds, - actor={"id": "alice"}, - action="view-table", - ): - block = await await_me_maybe(block) - if block is None: - continue - if isinstance(block, (list, tuple)): - collected.extend(block) - else: - collected.append(block) - - assert collected - assert all(isinstance(item, PermissionSQL) for item in collected) - - -@pytest.mark.asyncio -async def test_actor_json(ds_client): - assert (await ds_client.get("/-/actor.json")).json() == {"actor": None} - assert (await ds_client.get("/-/actor.json?_bot2=1")).json() == { - "actor": {"id": "bot2", "1+1": 2} - } - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,body", - [ - ("/one/", "2"), - ("/two/Ray?greeting=Hail", "Hail Ray"), - ("/not-async/", "This was not async"), - ], -) -async def test_hook_register_routes(ds_client, path, body): - response = await ds_client.get(path) - assert response.status_code == 200 - assert response.text == body - - -@pytest.mark.parametrize("configured_path", ("path1", "path2")) -def test_hook_register_routes_with_datasette(configured_path): - with make_app_client( - config={ - "plugins": { - "register-route-demo": { - "path": configured_path, - } - } - } - ) as client: - response = client.get(f"/{configured_path}/") - assert response.status_code == 200 - assert configured_path.upper() == response.text - # Other one should 404 - other_path = [p for p in ("path1", "path2") if configured_path != p][0] - assert client.get(f"/{other_path}/", follow_redirects=True).status_code == 404 - - -def test_hook_register_routes_override(): - "Plugins can over-ride default paths such as /db/table" - with make_app_client( - config={ - "plugins": { - "register-route-demo": { - "path": "blah", - } - } - } - ) as client: - response = client.get("/db/table") - assert response.status_code == 200 - assert ( - response.text - == "/db/table: [('db_name', 'db'), ('table_and_format', 'table')]" - ) - - -def test_hook_register_routes_post(app_client): - response = app_client.post("/post/", {"this is": "post data"}, csrftoken_from=True) - assert response.status_code == 200 - assert "csrftoken" in response.json - assert response.json["this is"] == "post data" - - -def test_hook_register_routes_csrftoken(restore_working_directory, tmpdir_factory): - templates = tmpdir_factory.mktemp("templates") - (templates / "csrftoken_form.html").write_text( - "CSRFTOKEN: {{ csrftoken() }}", "utf-8" - ) - with make_app_client(template_dir=templates) as client: - response = client.get("/csrftoken-form/") - expected_token = client.ds._last_request.scope["csrftoken"]() - assert f"CSRFTOKEN: {expected_token}" == response.text - - -@pytest.mark.asyncio -async def test_hook_register_routes_asgi(ds_client): - response = await ds_client.get("/three/") - assert {"hello": "world"} == response.json() - assert "1" == response.headers["x-three"] - - -@pytest.mark.asyncio -async def test_hook_register_routes_add_message(ds_client): - response = await ds_client.get("/add-message/") - assert response.status_code == 200 - assert response.text == "Added message" - decoded = ds_client.ds.unsign(response.cookies["ds_messages"], "messages") - assert decoded == [["Hello from messages", 1]] - - -def test_hook_register_routes_render_message(restore_working_directory, tmpdir_factory): - templates = tmpdir_factory.mktemp("templates") - (templates / "render_message.html").write_text('{% extends "base.html" %}', "utf-8") - with make_app_client(template_dir=templates) as client: - response1 = client.get("/add-message/") - response2 = client.get("/render-message/", cookies=response1.cookies) - assert 200 == response2.status - assert "Hello from messages" in response2.text - - -@pytest.mark.asyncio -async def test_hook_startup(ds_client): - await ds_client.ds.invoke_startup() - assert ds_client.ds._startup_hook_fired - assert 2 == ds_client.ds._startup_hook_calculation - - -@pytest.mark.asyncio -async def test_hook_canned_queries(ds_client): - queries = (await ds_client.get("/fixtures.json")).json()["queries"] - queries_by_name = {q["name"]: q for q in queries} - assert { - "sql": "select 2", - "name": "from_async_hook", - "private": False, - } == queries_by_name["from_async_hook"] - assert { - "sql": "select 1, 'null' as actor_id", - "name": "from_hook", - "private": False, - } == queries_by_name["from_hook"] - - -@pytest.mark.asyncio -async def test_hook_canned_queries_non_async(ds_client): - response = await ds_client.get("/fixtures/from_hook.json?_shape=array") - assert [{"1": 1, "actor_id": "null"}] == response.json() - - -@pytest.mark.asyncio -async def test_hook_canned_queries_async(ds_client): - response = await ds_client.get("/fixtures/from_async_hook.json?_shape=array") - assert [{"2": 2}] == response.json() - - -@pytest.mark.asyncio -async def test_hook_canned_queries_actor(ds_client): - assert ( - await ds_client.get("/fixtures/from_hook.json?_bot=1&_shape=array") - ).json() == [{"1": 1, "actor_id": "bot"}] - - -def test_hook_register_magic_parameters(restore_working_directory): - with make_app_client( - extra_databases={"data.db": "create table logs (line text)"}, - config={ - "databases": { - "data": { - "queries": { - "runme": { - "sql": "insert into logs (line) values (:_request_http_version)", - "write": True, - }, - "get_uuid": { - "sql": "select :_uuid_new", - }, - "asyncrequest": { - "sql": "select :_asyncrequest_key", - }, - } - } - } - }, - ) as client: - response = client.post("/data/runme", {}, csrftoken_from=True) - assert response.status_code == 302 - actual = client.get("/data/logs.json?_sort_desc=rowid&_shape=array").json - assert [{"rowid": 1, "line": "1.1"}] == actual - # Now try the GET request against get_uuid - response_get = client.get("/data/get_uuid.json?_shape=array") - assert 200 == response_get.status - new_uuid = response_get.json[0][":_uuid_new"] - assert 4 == new_uuid.count("-") - # And test the async one - response_async = client.get("/data/asyncrequest.json?_shape=array") - assert 200 == response_async.status - assert response_async.json[0][":_asyncrequest_key"] == "key" - - -def test_hook_forbidden(restore_working_directory): - with make_app_client( - extra_databases={"data2.db": "create table logs (line text)"}, - config={"allow": {}}, - ) as client: - response = client.get("/") - assert response.status_code == 403 - response2 = client.get("/data2") - assert 302 == response2.status - assert ( - response2.headers["Location"] - == "/login?message=You do not have permission to view this database" - ) - assert ( - client.ds._last_forbidden_message - == "You do not have permission to view this database" - ) - - -@pytest.mark.asyncio -async def test_hook_handle_exception(ds_client): - await ds_client.get("/trigger-error?x=123") - assert hasattr(ds_client.ds, "_exception_hook_fired") - request, exception = ds_client.ds._exception_hook_fired - assert request.url == "http://localhost/trigger-error?x=123" - assert isinstance(exception, ZeroDivisionError) - - -@pytest.mark.asyncio -@pytest.mark.parametrize("param", ("_custom_error", "_custom_error_async")) -async def test_hook_handle_exception_custom_response(ds_client, param): - response = await ds_client.get("/trigger-error?{}=1".format(param)) - assert response.text == param - - -@pytest.mark.asyncio -async def test_hook_menu_links(ds_client): - def get_menu_links(html): - soup = Soup(html, "html.parser") - return [ - {"label": a.text, "href": a["href"]} for a in soup.select(".nav-menu a") - ] - - response = await ds_client.get("/") - assert get_menu_links(response.text) == [] - - response_2 = await ds_client.get("/?_bot=1&_hello=BOB") - assert get_menu_links(response_2.text) == [ - {"label": "Hello, BOB", "href": "/"}, - {"label": "Hello 2", "href": "/"}, - ] - - -@pytest.mark.asyncio -async def test_hook_table_actions(ds_client): - response = await ds_client.get("/fixtures/facetable") - assert get_actions_links(response.text) == [] - response_2 = await ds_client.get("/fixtures/facetable?_bot=1&_hello=BOB") - assert ">Table actions<" in response_2.text - assert sorted( - get_actions_links(response_2.text), key=lambda link: link["label"] - ) == [ - {"label": "Database: fixtures", "href": "/", "description": None}, - {"label": "From async BOB", "href": "/", "description": None}, - {"label": "Table: facetable", "href": "/", "description": None}, - ] - - -@pytest.mark.asyncio -async def test_hook_view_actions(ds_client): - response = await ds_client.get("/fixtures/simple_view") - assert get_actions_links(response.text) == [] - response_2 = await ds_client.get( - "/fixtures/simple_view", - cookies={"ds_actor": ds_client.actor_cookie({"id": "bob"})}, - ) - assert ">View actions<" in response_2.text - assert sorted( - get_actions_links(response_2.text), key=lambda link: link["label"] - ) == [ - {"label": "Database: fixtures", "href": "/", "description": None}, - {"label": "View: simple_view", "href": "/", "description": None}, - ] - - -def get_actions_links(html): - soup = Soup(html, "html.parser") - details = soup.find("details", {"class": "actions-menu-links"}) - if details is None: - return [] - links = [] - for a_el in details.select("a"): - description = None - if a_el.find("p") is not None: - description = a_el.find("p").text.strip() - a_el.find("p").extract() - label = a_el.text.strip() - href = a_el["href"] - links.append({"label": label, "href": href, "description": description}) - return links - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_url", - ( - ("/fixtures/-/query?sql=select+1", "/fixtures/-/query?sql=explain+select+1"), - pytest.param( - "/fixtures/pragma_cache_size", - "/fixtures/-/query?sql=explain+PRAGMA+cache_size%3B", - ), - # Don't attempt to explain an explain - ("/fixtures/-/query?sql=explain+select+1", None), - ), -) -async def test_hook_query_actions(ds_client, path, expected_url): - response = await ds_client.get(path) - assert response.status_code == 200 - links = get_actions_links(response.text) - if expected_url is None: - assert links == [] - else: - assert links == [ - { - "label": "Explain this query", - "href": expected_url, - "description": "Runs a SQLite explain", - } - ] - - -@pytest.mark.asyncio -async def test_hook_row_actions(ds_client): - response = await ds_client.get("/fixtures/facet_cities/1") - assert get_actions_links(response.text) == [] - - response_2 = await ds_client.get( - "/fixtures/facet_cities/1", - cookies={"ds_actor": ds_client.actor_cookie({"id": "sam"})}, - ) - assert get_actions_links(response_2.text) == [ - { - "label": "Row details for sam", - "href": "/", - "description": '{"id": 1, "name": "San Francisco"}', - } - ] - - -@pytest.mark.asyncio -async def test_hook_database_actions(ds_client): - response = await ds_client.get("/fixtures") - assert get_actions_links(response.text) == [] - - response_2 = await ds_client.get("/fixtures?_bot=1&_hello=BOB") - assert get_actions_links(response_2.text) == [ - {"label": "Database: fixtures - BOB", "href": "/", "description": None}, - ] - - -@pytest.mark.asyncio -async def test_hook_homepage_actions(ds_client): - response = await ds_client.get("/") - # No button for anonymous users - assert "Homepage actions" not in response.text - # Signed in user gets an action - response2 = await ds_client.get( - "/", cookies={"ds_actor": ds_client.actor_cookie({"id": "troy"})} - ) - assert "Homepage actions" in response2.text - assert get_actions_links(response2.text) == [ - { - "label": "Custom homepage for: troy", - "href": "/-/custom-homepage", - "description": None, - }, - ] - - -def test_hook_skip_csrf(app_client): - cookie = app_client.actor_cookie({"id": "test"}) - csrf_response = app_client.post( - "/post/", - post_data={"this is": "post data"}, - csrftoken_from=True, - cookies={"ds_actor": cookie}, - ) - assert csrf_response.status_code == 200 - missing_csrf_response = app_client.post( - "/post/", post_data={"this is": "post data"}, cookies={"ds_actor": cookie} - ) - assert missing_csrf_response.status_code == 403 - # But "/skip-csrf" should allow - allow_csrf_response = app_client.post( - "/skip-csrf", post_data={"this is": "post data"}, cookies={"ds_actor": cookie} - ) - assert allow_csrf_response.status_code == 405 # Method not allowed - # /skip-csrf-2 should not - second_missing_csrf_response = app_client.post( - "/skip-csrf-2", post_data={"this is": "post data"}, cookies={"ds_actor": cookie} - ) - assert second_missing_csrf_response.status_code == 403 - - -def _extract_commands(output): - lines = output.split("Commands:\n", 1)[1].split("\n") - return {line.split()[0].replace("*", "") for line in lines if line.strip()} - - -def test_hook_register_commands(): - # Without the plugin should have seven commands - runner = CliRunner() - result = runner.invoke(cli.cli, "--help") - commands = _extract_commands(result.output) - assert commands == { - "serve", - "inspect", - "install", - "package", - "plugins", - "publish", - "uninstall", - "create-token", - } - - # Now install a plugin - class VerifyPlugin: - __name__ = "VerifyPlugin" - - @hookimpl - def register_commands(self, cli): - @cli.command() - def verify(): - pass - - @cli.command() - def unverify(): - pass - - pm.register(VerifyPlugin(), name="verify") - importlib.reload(cli) - result2 = runner.invoke(cli.cli, "--help") - commands2 = _extract_commands(result2.output) - assert commands2 == { - "serve", - "inspect", - "install", - "package", - "plugins", - "publish", - "uninstall", - "verify", - "unverify", - "create-token", - } - pm.unregister(name="verify") - importlib.reload(cli) - - -@pytest.mark.asyncio -async def test_hook_filters_from_request(ds_client): - class ReturnNothingPlugin: - __name__ = "ReturnNothingPlugin" - - @hookimpl - def filters_from_request(self, request): - if request.args.get("_nothing"): - return FilterArguments(["1 = 0"], human_descriptions=["NOTHING"]) - - ds_client.ds.pm.register(ReturnNothingPlugin(), name="ReturnNothingPlugin") - response = await ds_client.get("/fixtures/facetable?_nothing=1") - assert "0 rows\n where NOTHING" in response.text - json_response = await ds_client.get("/fixtures/facetable.json?_nothing=1") - assert json_response.json()["rows"] == [] - ds_client.ds.pm.unregister(name="ReturnNothingPlugin") - - -@pytest.mark.asyncio -@pytest.mark.parametrize("extra_metadata", (False, True)) -async def test_hook_register_actions(extra_metadata): - from datasette.permissions import Action - from datasette.resources import DatabaseResource, InstanceResource - - ds = Datasette( - config=( - { - "plugins": { - "datasette-register-actions": { - "actions": [ - { - "name": "extra-from-metadata", - "abbr": "efm", - "description": "Extra from metadata", - } - ] - } - } - } - if extra_metadata - else None - ), - plugins_dir=PLUGINS_DIR, - ) - await ds.invoke_startup() - assert ds.actions["action-from-plugin"] == Action( - name="action-from-plugin", - abbr="ap", - description="New action added by a plugin", - resource_class=DatabaseResource, - ) - if extra_metadata: - assert ds.actions["extra-from-metadata"] == Action( - name="extra-from-metadata", - abbr="efm", - description="Extra from metadata", - ) - else: - assert "extra-from-metadata" not in ds.actions - - -@pytest.mark.asyncio -@pytest.mark.parametrize("duplicate", ("name", "abbr")) -async def test_hook_register_actions_no_duplicates(duplicate): - name1, name2 = "name1", "name2" - abbr1, abbr2 = "abbr1", "abbr2" - if duplicate == "name": - name2 = "name1" - if duplicate == "abbr": - abbr2 = "abbr1" - ds = Datasette( - config={ - "plugins": { - "datasette-register-actions": { - "actions": [ - { - "name": name1, - "abbr": abbr1, - "description": None, - }, - { - "name": name2, - "abbr": abbr2, - "description": None, - }, - ] - } - } - }, - plugins_dir=PLUGINS_DIR, - ) - # This should error: - with pytest.raises(StartupError) as ex: - await ds.invoke_startup() - assert "Duplicate action {}".format(duplicate) in str(ex.value) - - -@pytest.mark.asyncio -async def test_hook_register_actions_allows_identical_duplicates(): - ds = Datasette( - config={ - "plugins": { - "datasette-register-actions": { - "actions": [ - { - "name": "name1", - "abbr": "abbr1", - "description": None, - }, - { - "name": "name1", - "abbr": "abbr1", - "description": None, - }, - ] - } - } - }, - plugins_dir=PLUGINS_DIR, - ) - await ds.invoke_startup() - # Check that ds.actions has only one of each - assert len([p for p in ds.actions.values() if p.abbr == "abbr1"]) == 1 - - -@pytest.mark.asyncio -async def test_hook_actors_from_ids(): - # Without the hook should return default {"id": id} list - ds = Datasette() - await ds.invoke_startup() - db = ds.add_memory_database("actors_from_ids") - await db.execute_write( - "create table actors (id text primary key, name text, age int)" - ) - await db.execute_write( - "insert into actors (id, name, age) values ('3', 'Cate Blanchett', 52)" - ) - await db.execute_write( - "insert into actors (id, name, age) values ('5', 'Rooney Mara', 36)" - ) - await db.execute_write( - "insert into actors (id, name, age) values ('7', 'Sarah Paulson', 46)" - ) - await db.execute_write( - "insert into actors (id, name, age) values ('9', 'Helena Bonham Carter', 55)" - ) - table_names = await db.table_names() - assert table_names == ["actors"] - actors1 = await ds.actors_from_ids(["3", "5", "7"]) - assert actors1 == { - "3": {"id": "3"}, - "5": {"id": "5"}, - "7": {"id": "7"}, - } - - class ActorsFromIdsPlugin: - __name__ = "ActorsFromIdsPlugin" - - @hookimpl - def actors_from_ids(self, datasette, actor_ids): - db = datasette.get_database("actors_from_ids") - - async def inner(): - sql = "select id, name from actors where id in ({})".format( - ", ".join("?" for _ in actor_ids) - ) - actors = {} - result = await db.execute(sql, actor_ids) - for row in result.rows: - actor = dict(row) - actors[actor["id"]] = actor - return actors - - return inner - - try: - ds.pm.register(ActorsFromIdsPlugin(), name="ActorsFromIdsPlugin") - actors2 = await ds.actors_from_ids(["3", "5", "7"]) - assert actors2 == { - "3": {"id": "3", "name": "Cate Blanchett"}, - "5": {"id": "5", "name": "Rooney Mara"}, - "7": {"id": "7", "name": "Sarah Paulson"}, - } - finally: - ds.pm.unregister(name="ReturnNothingPlugin") - - -@pytest.mark.asyncio -async def test_plugin_is_installed(): - datasette = Datasette(memory=True) - - class DummyPlugin: - __name__ = "DummyPlugin" - - @hookimpl - def actors_from_ids(self, datasette, actor_ids): - return {} - - try: - datasette.pm.register(DummyPlugin(), name="DummyPlugin") - response = await datasette.client.get("/-/plugins.json") - assert response.status_code == 200 - installed_plugins = {p["name"] for p in response.json()} - assert "DummyPlugin" in installed_plugins - - finally: - datasette.pm.unregister(name="DummyPlugin") - - -@pytest.mark.asyncio -async def test_hook_jinja2_environment_from_request(tmpdir): - templates = pathlib.Path(tmpdir / "templates") - templates.mkdir() - (templates / "index.html").write_text("Hello museums!", "utf-8") - - class EnvironmentPlugin: - @hookimpl - def jinja2_environment_from_request(self, request, env): - if request and request.host == "www.niche-museums.com": - return env.overlay( - loader=ChoiceLoader( - [ - FileSystemLoader(str(templates)), - env.loader, - ] - ), - enable_async=True, - ) - return env - - datasette = Datasette(memory=True) - - try: - datasette.pm.register(EnvironmentPlugin(), name="EnvironmentPlugin") - response = await datasette.client.get("/") - assert response.status_code == 200 - assert "Hello museums!" not in response.text - # Try again with the hostname - response2 = await datasette.client.get( - "/", headers={"host": "www.niche-museums.com"} - ) - assert response2.status_code == 200 - assert "Hello museums!" in response2.text - finally: - datasette.pm.unregister(name="EnvironmentPlugin") - - -class SlotPlugin: - __name__ = "SlotPlugin" - - @hookimpl - def top_homepage(self, request): - return "Xtop_homepage:" + request.args["z"] - - @hookimpl - def top_database(self, request, database): - async def inner(): - return "Xtop_database:{}:{}".format(database, request.args["z"]) - - return inner - - @hookimpl - def top_table(self, request, database, table): - return "Xtop_table:{}:{}:{}".format(database, table, request.args["z"]) - - @hookimpl - def top_row(self, request, database, table, row): - return "Xtop_row:{}:{}:{}:{}".format( - database, table, row["name"], request.args["z"] - ) - - @hookimpl - def top_query(self, request, database, sql): - return "Xtop_query:{}:{}:{}".format(database, sql, request.args["z"]) - - @hookimpl - def top_canned_query(self, request, database, query_name): - return "Xtop_query:{}:{}:{}".format(database, query_name, request.args["z"]) - - -@pytest.mark.asyncio -async def test_hook_top_homepage(): - datasette = Datasette(memory=True) - try: - datasette.pm.register(SlotPlugin(), name="SlotPlugin") - response = await datasette.client.get("/?z=foo") - assert response.status_code == 200 - assert "Xtop_homepage:foo" in response.text - finally: - datasette.pm.unregister(name="SlotPlugin") - - -@pytest.mark.asyncio -async def test_hook_top_database(): - datasette = Datasette(memory=True) - try: - datasette.pm.register(SlotPlugin(), name="SlotPlugin") - response = await datasette.client.get("/_memory?z=bar") - assert response.status_code == 200 - assert "Xtop_database:_memory:bar" in response.text - finally: - datasette.pm.unregister(name="SlotPlugin") - - -@pytest.mark.asyncio -async def test_hook_top_table(ds_client): - try: - ds_client.ds.pm.register(SlotPlugin(), name="SlotPlugin") - response = await ds_client.get("/fixtures/facetable?z=baz") - assert response.status_code == 200 - assert "Xtop_table:fixtures:facetable:baz" in response.text - finally: - ds_client.ds.pm.unregister(name="SlotPlugin") - - -@pytest.mark.asyncio -async def test_hook_top_row(ds_client): - try: - ds_client.ds.pm.register(SlotPlugin(), name="SlotPlugin") - response = await ds_client.get("/fixtures/facet_cities/1?z=bax") - assert response.status_code == 200 - assert "Xtop_row:fixtures:facet_cities:San Francisco:bax" in response.text - finally: - ds_client.ds.pm.unregister(name="SlotPlugin") - - -@pytest.mark.asyncio -async def test_hook_top_query(ds_client): - try: - pm.register(SlotPlugin(), name="SlotPlugin") - response = await ds_client.get("/fixtures/-/query?sql=select+1&z=x") - assert response.status_code == 200 - assert "Xtop_query:fixtures:select 1:x" in response.text - finally: - pm.unregister(name="SlotPlugin") - - -@pytest.mark.asyncio -async def test_hook_top_canned_query(ds_client): - try: - pm.register(SlotPlugin(), name="SlotPlugin") - response = await ds_client.get("/fixtures/from_hook?z=xyz") - assert response.status_code == 200 - assert "Xtop_query:fixtures:from_hook:xyz" in response.text - finally: - pm.unregister(name="SlotPlugin") - - -@pytest.mark.asyncio -async def test_hook_track_event(): - datasette = Datasette(memory=True) - from .conftest import TrackEventPlugin - - await datasette.invoke_startup() - await datasette.track_event( - TrackEventPlugin.OneEvent(actor=None, extra="extra extra") - ) - assert len(datasette._tracked_events) == 1 - assert isinstance(datasette._tracked_events[0], TrackEventPlugin.OneEvent) - event = datasette._tracked_events[0] - assert event.name == "one" - assert event.properties() == {"extra": "extra extra"} - # Should have a recent created as well - created = event.created - assert isinstance(created, datetime.datetime) - assert created.tzinfo == datetime.timezone.utc - - -@pytest.mark.asyncio -async def test_hook_register_events(): - datasette = Datasette(memory=True) - await datasette.invoke_startup() - assert any(k.__name__ == "OneEvent" for k in datasette.event_classes) - - -@pytest.mark.asyncio -async def test_hook_register_actions(): - datasette = Datasette(memory=True, plugins_dir=PLUGINS_DIR) - await datasette.invoke_startup() - # Check that the custom action from my_plugin.py is registered - assert "view-collection" in datasette.actions - action = datasette.actions["view-collection"] - assert action.abbr == "vc" - assert action.description == "View a collection" - - -@pytest.mark.asyncio -async def test_hook_register_actions_with_custom_resources(): - """ - Test registering actions with custom Resource classes: - - A global action (no resource) - - A parent-level action (DocumentCollectionResource) - - A child-level action (DocumentResource) - """ - from datasette.permissions import Resource, Action - - # Define custom Resource classes - class DocumentCollectionResource(Resource): - """A collection of documents.""" - - name = "document_collection" - parent_class = None # Top-level resource - - def __init__(self, collection: str): - super().__init__(parent=collection, child=None) - - @classmethod - async def resources_sql(cls, datasette) -> str: - return """ - SELECT 'collection1' AS parent, NULL AS child - UNION ALL - SELECT 'collection2' AS parent, NULL AS child - """ - - class DocumentResource(Resource): - """A document in a collection.""" - - name = "document" - parent_class = DocumentCollectionResource # Child of DocumentCollectionResource - - def __init__(self, collection: str, document: str): - super().__init__(parent=collection, child=document) - - @classmethod - async def resources_sql(cls, datasette) -> str: - return """ - SELECT 'collection1' AS parent, 'doc1' AS child - UNION ALL - SELECT 'collection1' AS parent, 'doc2' AS child - UNION ALL - SELECT 'collection2' AS parent, 'doc3' AS child - """ - - # Define a test plugin that registers these actions - class TestPlugin: - __name__ = "test_custom_resources_plugin" - - @hookimpl - def register_actions(self, datasette): - return [ - # Global action - no resource_class - Action( - name="manage-documents", - abbr="md", - description="Manage the document system", - ), - # Parent-level action - collection only - Action( - name="view-document-collection", - description="View a document collection", - resource_class=DocumentCollectionResource, - ), - # Child-level action - collection + document - Action( - name="view-document", - abbr="vdoc", - description="View a document", - resource_class=DocumentResource, - ), - ] - - @hookimpl - def permission_resources_sql(self, datasette, actor, action): - from datasette.permissions import PermissionSQL - - # Grant user2 access to manage-documents globally - if actor and actor.get("id") == "user2" and action == "manage-documents": - return PermissionSQL.allow(reason="user2 granted manage-documents") - - # Grant user2 access to view-document-collection globally - if ( - actor - and actor.get("id") == "user2" - and action == "view-document-collection" - ): - return PermissionSQL.allow( - reason="user2 granted view-document-collection" - ) - - # Default allow for view-document-collection (like other view-* actions) - if action == "view-document-collection": - return PermissionSQL.allow( - reason="default allow for view-document-collection" - ) - - # Default allow for view-document (like other view-* actions) - if action == "view-document": - return PermissionSQL.allow(reason="default allow for view-document") - - # Register the plugin temporarily - plugin = TestPlugin() - pm.register(plugin, name="test_custom_resources_plugin") - - try: - # Create datasette instance and invoke startup - datasette = Datasette(memory=True) - await datasette.invoke_startup() - - # Test global action - manage_docs = datasette.actions["manage-documents"] - assert manage_docs.name == "manage-documents" - assert manage_docs.abbr == "md" - assert manage_docs.resource_class is None - assert manage_docs.takes_parent is False - assert manage_docs.takes_child is False - - # Test parent-level action - view_collection = datasette.actions["view-document-collection"] - assert view_collection.name == "view-document-collection" - assert view_collection.abbr is None - assert view_collection.resource_class is DocumentCollectionResource - assert view_collection.takes_parent is True - assert view_collection.takes_child is False - - # Test child-level action - view_doc = datasette.actions["view-document"] - assert view_doc.name == "view-document" - assert view_doc.abbr == "vdoc" - assert view_doc.resource_class is DocumentResource - assert view_doc.takes_parent is True - assert view_doc.takes_child is True - - # Verify the resource classes have correct hierarchy - assert DocumentCollectionResource.parent_class is None - assert DocumentResource.parent_class is DocumentCollectionResource - - # Test that resources can be instantiated correctly - collection_resource = DocumentCollectionResource(collection="collection1") - assert collection_resource.parent == "collection1" - assert collection_resource.child is None - - doc_resource = DocumentResource(collection="collection1", document="doc1") - assert doc_resource.parent == "collection1" - assert doc_resource.child == "doc1" - - # Test permission checks with restricted actors - - # Test 1: Global action - no restrictions (custom actions default to deny) - unrestricted_actor = {"id": "user1"} - allowed = await datasette.allowed( - action="manage-documents", - actor=unrestricted_actor, - ) - assert allowed is False # Custom actions have no default allow - - # Test 2: Global action - user2 has explicit permission via plugin hook - restricted_global = {"id": "user2", "_r": {"a": ["md"]}} - allowed = await datasette.allowed( - action="manage-documents", - actor=restricted_global, - ) - assert allowed is True # Granted by plugin hook for user2 - - # Test 3: Global action - restricted but not in allowlist - restricted_no_access = {"id": "user3", "_r": {"a": ["vdc"]}} - allowed = await datasette.allowed( - action="manage-documents", - actor=restricted_no_access, - ) - assert allowed is False # Not in allowlist - - # Test 4: Collection-level action - allowed for specific collection - collection_resource = DocumentCollectionResource(collection="collection1") - # This one does not have an abbreviation: - restricted_collection = { - "id": "user4", - "_r": {"d": {"collection1": ["view-document-collection"]}}, - } - allowed = await datasette.allowed( - action="view-document-collection", - resource=collection_resource, - actor=restricted_collection, - ) - assert allowed is True # Allowed for collection1 - - # Test 5: Collection-level action - denied for different collection - collection2_resource = DocumentCollectionResource(collection="collection2") - allowed = await datasette.allowed( - action="view-document-collection", - resource=collection2_resource, - actor=restricted_collection, - ) - assert allowed is False # Not allowed for collection2 - - # Test 6: Document-level action - allowed for specific document - doc1_resource = DocumentResource(collection="collection1", document="doc1") - restricted_document = { - "id": "user5", - "_r": {"r": {"collection1": {"doc1": ["vdoc"]}}}, - } - allowed = await datasette.allowed( - action="view-document", - resource=doc1_resource, - actor=restricted_document, - ) - assert allowed is True # Allowed for collection1/doc1 - - # Test 7: Document-level action - denied for different document - doc2_resource = DocumentResource(collection="collection1", document="doc2") - allowed = await datasette.allowed( - action="view-document", - resource=doc2_resource, - actor=restricted_document, - ) - assert allowed is False # Not allowed for collection1/doc2 - - # Test 8: Document-level action - globally allowed - doc_resource = DocumentResource(collection="collection2", document="doc3") - restricted_all_docs = {"id": "user6", "_r": {"a": ["vdoc"]}} - allowed = await datasette.allowed( - action="view-document", - resource=doc_resource, - actor=restricted_all_docs, - ) - assert allowed is True # Globally allowed for all documents - - # Test 9: Verify hierarchy - collection access doesn't grant document access - collection_only_actor = {"id": "user7", "_r": {"d": {"collection1": ["vdc"]}}} - doc_resource = DocumentResource(collection="collection1", document="doc1") - allowed = await datasette.allowed( - action="view-document", - resource=doc_resource, - actor=collection_only_actor, - ) - assert ( - allowed is False - ) # Collection permission doesn't grant document permission - - finally: - # Unregister the plugin - pm.unregister(plugin) - - -@pytest.mark.skip(reason="TODO") -@pytest.mark.parametrize( - "metadata,config,expected_metadata,expected_config", - ( - ( - # Instance level - {"plugins": {"datasette-foo": "bar"}}, - {}, - {}, - {"plugins": {"datasette-foo": "bar"}}, - ), - ( - # Database level - {"databases": {"foo": {"plugins": {"datasette-foo": "bar"}}}}, - {}, - {}, - {"databases": {"foo": {"plugins": {"datasette-foo": "bar"}}}}, - ), - ( - # Table level - { - "databases": { - "foo": {"tables": {"bar": {"plugins": {"datasette-foo": "bar"}}}} - } - }, - {}, - {}, - { - "databases": { - "foo": {"tables": {"bar": {"plugins": {"datasette-foo": "bar"}}}} - } - }, - ), - ( - # Keep other keys - {"plugins": {"datasette-foo": "bar"}, "other": "key"}, - {"original_config": "original"}, - {"other": "key"}, - {"original_config": "original", "plugins": {"datasette-foo": "bar"}}, - ), - ), -) -def test_metadata_plugin_config_treated_as_config( - metadata, config, expected_metadata, expected_config -): - ds = Datasette(metadata=metadata, config=config) - actual_metadata = ds.metadata() - assert "plugins" not in actual_metadata - assert actual_metadata == expected_metadata - assert ds.config == expected_config + "name": "special", + "path": None, + "size": 0, + "is_mutable": False, + "is_memory": False, + "hash": None, + } in response.json + assert [{"id": 1, "bar": "hello"}] == app_client.get( + "/special/foo.json?_shape=array" + ).json diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index f53e5059..1e9bb830 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -2,22 +2,17 @@ from click.testing import CliRunner from datasette import cli from unittest import mock import json -import os -import pytest -import textwrap -@pytest.mark.serial @mock.patch("shutil.which") -def test_publish_cloudrun_requires_gcloud(mock_which, tmp_path_factory): +def test_publish_cloudrun_requires_gcloud(mock_which): mock_which.return_value = False runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke(cli.cli, ["publish", "cloudrun", "test.db"]) - assert result.exit_code == 1 - assert "Publishing to Google Cloud requires gcloud" in result.output + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke(cli.cli, ["publish", "cloudrun", "test.db"]) + assert result.exit_code == 1 + assert "Publishing to Google Cloud requires gcloud" in result.output @mock.patch("shutil.which") @@ -26,383 +21,82 @@ def test_publish_cloudrun_invalid_database(mock_which): runner = CliRunner() result = runner.invoke(cli.cli, ["publish", "cloudrun", "woop.db"]) assert result.exit_code == 2 - assert "Path 'woop.db' does not exist" in result.output + assert 'Path "woop.db" does not exist' in result.output -@pytest.mark.serial @mock.patch("shutil.which") @mock.patch("datasette.publish.cloudrun.check_output") @mock.patch("datasette.publish.cloudrun.check_call") -@mock.patch("datasette.publish.cloudrun.get_existing_services") -def test_publish_cloudrun_prompts_for_service( - mock_get_existing_services, mock_call, mock_output, mock_which, tmp_path_factory -): - mock_get_existing_services.return_value = [ - {"name": "existing", "created": "2019-01-01", "url": "http://www.example.com/"} - ] +def test_publish_cloudrun(mock_call, mock_output, mock_which): mock_output.return_value = "myproject" mock_which.return_value = True runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke( - cli.cli, ["publish", "cloudrun", "test.db"], input="input-service" - ) - assert ( - "Please provide a service name for this deployment\n\n" - "Using an existing service name will over-write it\n\n" - "Your existing services:\n\n" - " existing - created 2019-01-01 - http://www.example.com/\n\n" - "Service name: input-service" - ) == result.output.strip() - assert 0 == result.exit_code - tag = "us-docker.pkg.dev/myproject/datasette/datasette-input-service" - mock_call.assert_has_calls( - [ - mock.call( - "gcloud services enable artifactregistry.googleapis.com --project myproject --quiet", - shell=True, - ), - mock.call( - "gcloud artifacts repositories describe datasette --project myproject --location us --quiet", - shell=True, - ), - mock.call(f"gcloud builds submit --tag {tag}", shell=True), - mock.call( - "gcloud run deploy --allow-unauthenticated --platform=managed --image {} input-service --max-instances 1".format( - tag + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke(cli.cli, ["publish", "cloudrun", "test.db"]) + assert 0 == result.exit_code + tag = "gcr.io/{}/datasette".format(mock_output.return_value) + mock_call.assert_has_calls( + [ + mock.call("gcloud builds submit --tag {}".format(tag), shell=True), + mock.call( + "gcloud beta run deploy --allow-unauthenticated --image {}".format( + tag + ), + shell=True, ), - shell=True, - ), - ] - ) - - -@pytest.mark.serial -@mock.patch("shutil.which") -@mock.patch("datasette.publish.cloudrun.check_output") -@mock.patch("datasette.publish.cloudrun.check_call") -def test_publish_cloudrun(mock_call, mock_output, mock_which, tmp_path_factory): - mock_output.return_value = "myproject" - mock_which.return_value = True - runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke( - cli.cli, ["publish", "cloudrun", "test.db", "--service", "test"] - ) - assert 0 == result.exit_code - tag = f"us-docker.pkg.dev/{mock_output.return_value}/datasette/datasette-test" - mock_call.assert_has_calls( - [ - mock.call( - f"gcloud services enable artifactregistry.googleapis.com --project {mock_output.return_value} --quiet", - shell=True, - ), - mock.call( - f"gcloud artifacts repositories describe datasette --project {mock_output.return_value} --location us --quiet", - shell=True, - ), - mock.call(f"gcloud builds submit --tag {tag}", shell=True), - mock.call( - "gcloud run deploy --allow-unauthenticated --platform=managed --image {} test --max-instances 1".format( - tag - ), - shell=True, - ), - ] - ) - - -@pytest.mark.serial -@mock.patch("shutil.which") -@mock.patch("datasette.publish.cloudrun.check_output") -@mock.patch("datasette.publish.cloudrun.check_call") -@pytest.mark.parametrize( - "memory,cpu,timeout,min_instances,max_instances,expected_gcloud_args", - [ - ["1Gi", None, None, None, None, "--memory 1Gi"], - ["2G", None, None, None, None, "--memory 2G"], - ["256Mi", None, None, None, None, "--memory 256Mi"], - [ - "4", - None, - None, - None, - None, - None, - ], - [ - "GB", - None, - None, - None, - None, - None, - ], - [None, 1, None, None, None, "--cpu 1"], - [None, 2, None, None, None, "--cpu 2"], - [None, 3, None, None, None, None], - [None, 4, None, None, None, "--cpu 4"], - ["2G", 4, None, None, None, "--memory 2G --cpu 4"], - [None, None, 1800, None, None, "--timeout 1800"], - [None, None, None, 2, None, "--min-instances 2"], - [None, None, None, 2, 4, "--min-instances 2 --max-instances 4"], - [None, 2, None, None, 4, "--cpu 2 --max-instances 4"], - ], -) -def test_publish_cloudrun_memory_cpu( - mock_call, - mock_output, - mock_which, - memory, - cpu, - timeout, - min_instances, - max_instances, - expected_gcloud_args, - tmp_path_factory, -): - mock_output.return_value = "myproject" - mock_which.return_value = True - runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - args = ["publish", "cloudrun", "test.db", "--service", "test"] - if memory: - args.extend(["--memory", memory]) - if cpu: - args.extend(["--cpu", str(cpu)]) - if timeout: - args.extend(["--timeout", str(timeout)]) - result = runner.invoke(cli.cli, args) - if expected_gcloud_args is None: - assert 2 == result.exit_code - return - assert 0 == result.exit_code - tag = f"us-docker.pkg.dev/{mock_output.return_value}/datasette/datasette-test" - expected_call = ( - "gcloud run deploy --allow-unauthenticated --platform=managed" - " --image {} test".format(tag) - ) - expected_build_call = f"gcloud builds submit --tag {tag}" - if memory: - expected_call += " --memory {}".format(memory) - if cpu: - expected_call += " --cpu {}".format(cpu) - if timeout: - expected_build_call += f" --timeout {timeout}" - # max_instances defaults to 1 - expected_call += " --max-instances 1" - mock_call.assert_has_calls( - [ - mock.call( - f"gcloud services enable artifactregistry.googleapis.com --project {mock_output.return_value} --quiet", - shell=True, - ), - mock.call( - f"gcloud artifacts repositories describe datasette --project {mock_output.return_value} --location us --quiet", - shell=True, - ), - mock.call(expected_build_call, shell=True), - mock.call( - expected_call, - shell=True, - ), - ] - ) - - -@pytest.mark.serial -@mock.patch("shutil.which") -@mock.patch("datasette.publish.cloudrun.check_output") -@mock.patch("datasette.publish.cloudrun.check_call") -def test_publish_cloudrun_plugin_secrets( - mock_call, mock_output, mock_which, tmp_path_factory -): - mock_which.return_value = True - mock_output.return_value = "myproject" - - runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - with open("metadata.yml", "w") as fp: - fp.write( - textwrap.dedent( - """ - title: Hello from metadata YAML - plugins: - datasette-auth-github: - foo: bar - """ - ).strip() + ] ) - result = runner.invoke( - cli.cli, - [ - "publish", - "cloudrun", - "test.db", - "--metadata", - "metadata.yml", - "--service", - "datasette", - "--plugin-secret", - "datasette-auth-github", - "client_id", - "x-client-id", - "--show-files", - "--secret", - "x-secret", - ], - ) - assert result.exit_code == 0 - dockerfile = ( - result.output.split("==== Dockerfile ====\n")[1] - .split("\n====================\n")[0] - .strip() - ) - expected = textwrap.dedent( - r""" - FROM python:3.11.0-slim-bullseye - COPY . /app - WORKDIR /app - - ENV DATASETTE_AUTH_GITHUB_CLIENT_ID 'x-client-id' - ENV DATASETTE_SECRET 'x-secret' - RUN pip install -U datasette - RUN datasette inspect test.db --inspect-file inspect-data.json - ENV PORT 8001 - EXPOSE 8001 - CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --metadata metadata.json --setting force_https_urls on --port $PORT""" - ).strip() - assert expected == dockerfile - metadata = ( - result.output.split("=== metadata.json ===\n")[1] - .split("\n==== Dockerfile ====\n")[0] - .strip() - ) - assert { - "title": "Hello from metadata YAML", - "plugins": { - "datasette-auth-github": { - "client_id": {"$env": "DATASETTE_AUTH_GITHUB_CLIENT_ID"}, - "foo": "bar", - }, - }, - } == json.loads(metadata) -@pytest.mark.serial @mock.patch("shutil.which") @mock.patch("datasette.publish.cloudrun.check_output") @mock.patch("datasette.publish.cloudrun.check_call") -def test_publish_cloudrun_apt_get_install( - mock_call, mock_output, mock_which, tmp_path_factory -): +def test_publish_cloudrun_plugin_secrets(mock_call, mock_output, mock_which): mock_which.return_value = True mock_output.return_value = "myproject" runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke( - cli.cli, - [ - "publish", - "cloudrun", - "test.db", - "--service", - "datasette", - "--show-files", - "--secret", - "x-secret", - "--apt-get-install", - "ripgrep", - "--spatialite", - ], - ) - assert result.exit_code == 0 - dockerfile = ( - result.output.split("==== Dockerfile ====\n")[1] - .split("\n====================\n")[0] - .strip() - ) - expected = textwrap.dedent( - r""" - FROM python:3.11.0-slim-bullseye - COPY . /app - WORKDIR /app + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke( + cli.cli, + [ + "publish", + "cloudrun", + "test.db", + "--plugin-secret", + "datasette-auth-github", + "client_id", + "x-client-id", + "--show-files", + ], + ) + dockerfile = ( + result.output.split("==== Dockerfile ====\n")[1] + .split("\n====================\n")[0] + .strip() + ) + expected = """FROM python:3.6 +COPY . /app +WORKDIR /app - RUN apt-get update && \ - apt-get install -y ripgrep python3-dev gcc libsqlite3-mod-spatialite && \ - rm -rf /var/lib/apt/lists/* - - ENV DATASETTE_SECRET 'x-secret' - ENV SQLITE_EXTENSIONS '/usr/lib/x86_64-linux-gnu/mod_spatialite.so' - RUN pip install -U datasette - RUN datasette inspect test.db --inspect-file inspect-data.json - ENV PORT 8001 - EXPOSE 8001 - CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --setting force_https_urls on --port $PORT - """ - ).strip() - assert expected == dockerfile - - -@pytest.mark.serial -@mock.patch("shutil.which") -@mock.patch("datasette.publish.cloudrun.check_output") -@mock.patch("datasette.publish.cloudrun.check_call") -@pytest.mark.parametrize( - "extra_options,expected", - [ - ("", "--setting force_https_urls on"), - ( - "--setting base_url /foo", - "--setting base_url /foo --setting force_https_urls on", - ), - ("--setting force_https_urls off", "--setting force_https_urls off"), - ], -) -def test_publish_cloudrun_extra_options( - mock_call, mock_output, mock_which, extra_options, expected, tmp_path_factory -): - mock_which.return_value = True - mock_output.return_value = "myproject" - - runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke( - cli.cli, - [ - "publish", - "cloudrun", - "test.db", - "--service", - "datasette", - "--show-files", - "--extra-options", - extra_options, - ], - ) - assert result.exit_code == 0 - dockerfile = ( - result.output.split("==== Dockerfile ====\n")[1] - .split("\n====================\n")[0] - .strip() - ) - last_line = dockerfile.split("\n")[-1] - extra_options = ( - last_line.split("--inspect-file inspect-data.json")[1] - .split("--port")[0] - .strip() - ) - assert extra_options == expected +ENV DATASETTE_AUTH_GITHUB_CLIENT_ID 'x-client-id' +RUN pip install -U datasette +RUN datasette inspect test.db --inspect-file inspect-data.json +ENV PORT 8001 +EXPOSE 8001 +CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --metadata metadata.json --port $PORT""".strip() + assert expected == dockerfile + metadata = ( + result.output.split("=== metadata.json ===\n")[1] + .split("\n==== Dockerfile ====\n")[0] + .strip() + ) + assert { + "plugins": { + "datasette-auth-github": { + "client_id": {"$env": "DATASETTE_AUTH_GITHUB_CLIENT_ID"} + } + } + } == json.loads(metadata) diff --git a/tests/test_publish_heroku.py b/tests/test_publish_heroku.py index cab83654..4cd66219 100644 --- a/tests/test_publish_heroku.py +++ b/tests/test_publish_heroku.py @@ -1,39 +1,30 @@ from click.testing import CliRunner from datasette import cli from unittest import mock -import os -import pathlib -import pytest -@pytest.mark.serial @mock.patch("shutil.which") -def test_publish_heroku_requires_heroku(mock_which, tmp_path_factory): +def test_publish_heroku_requires_heroku(mock_which): mock_which.return_value = False runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke(cli.cli, ["publish", "heroku", "test.db"]) - assert result.exit_code == 1 - assert "Publishing to Heroku requires heroku" in result.output + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke(cli.cli, ["publish", "heroku", "test.db"]) + assert result.exit_code == 1 + assert "Publishing to Heroku requires heroku" in result.output -@pytest.mark.serial @mock.patch("shutil.which") @mock.patch("datasette.publish.heroku.check_output") @mock.patch("datasette.publish.heroku.call") -def test_publish_heroku_installs_plugin( - mock_call, mock_check_output, mock_which, tmp_path_factory -): +def test_publish_heroku_installs_plugin(mock_call, mock_check_output, mock_which): mock_which.return_value = True mock_check_output.side_effect = lambda s: {"['heroku', 'plugins']": b""}[repr(s)] runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("t.db", "w") as fp: - fp.write("data") - result = runner.invoke(cli.cli, ["publish", "heroku", "t.db"], input="y\n") - assert 0 != result.exit_code + with runner.isolated_filesystem(): + open("t.db", "w").write("data") + result = runner.invoke(cli.cli, ["publish", "heroku", "t.db"], input="y\n") + assert 0 != result.exit_code mock_check_output.assert_has_calls( [mock.call(["heroku", "plugins"]), mock.call(["heroku", "apps:list", "--json"])] ) @@ -48,14 +39,13 @@ def test_publish_heroku_invalid_database(mock_which): runner = CliRunner() result = runner.invoke(cli.cli, ["publish", "heroku", "woop.db"]) assert result.exit_code == 2 - assert "Path 'woop.db' does not exist" in result.output + assert 'Path "woop.db" does not exist' in result.output -@pytest.mark.serial @mock.patch("shutil.which") @mock.patch("datasette.publish.heroku.check_output") @mock.patch("datasette.publish.heroku.call") -def test_publish_heroku(mock_call, mock_check_output, mock_which, tmp_path_factory): +def test_publish_heroku(mock_call, mock_check_output, mock_which): mock_which.return_value = True mock_check_output.side_effect = lambda s: { "['heroku', 'plugins']": b"heroku-builds", @@ -63,35 +53,19 @@ def test_publish_heroku(mock_call, mock_check_output, mock_which, tmp_path_facto "['heroku', 'apps:create', 'datasette', '--json']": b'{"name": "f"}', }[repr(s)] runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke(cli.cli, ["publish", "heroku", "test.db", "--tar", "gtar"]) - assert 0 == result.exit_code, result.output - mock_call.assert_has_calls( - [ - mock.call( - [ - "heroku", - "builds:create", - "-a", - "f", - "--include-vcs-ignore", - "--tar", - "gtar", - ] - ), - ] - ) + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke(cli.cli, ["publish", "heroku", "test.db"]) + assert 0 == result.exit_code, result.output + mock_call.assert_called_once_with( + ["heroku", "builds:create", "-a", "f", "--include-vcs-ignore"] + ) -@pytest.mark.serial @mock.patch("shutil.which") @mock.patch("datasette.publish.heroku.check_output") @mock.patch("datasette.publish.heroku.call") -def test_publish_heroku_plugin_secrets( - mock_call, mock_check_output, mock_which, tmp_path_factory -): +def test_publish_heroku_plugin_secrets(mock_call, mock_check_output, mock_which): mock_which.return_value = True mock_check_output.side_effect = lambda s: { "['heroku', 'plugins']": b"heroku-builds", @@ -99,85 +73,34 @@ def test_publish_heroku_plugin_secrets( "['heroku', 'apps:create', 'datasette', '--json']": b'{"name": "f"}', }[repr(s)] runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - result = runner.invoke( - cli.cli, - [ - "publish", - "heroku", - "test.db", - "--plugin-secret", - "datasette-auth-github", - "client_id", - "x-client-id", - ], - ) - assert 0 == result.exit_code, result.output - mock_call.assert_has_calls( - [ - mock.call( - [ - "heroku", - "config:set", - "-a", - "f", - "DATASETTE_AUTH_GITHUB_CLIENT_ID=x-client-id", - ] - ), - mock.call(["heroku", "builds:create", "-a", "f", "--include-vcs-ignore"]), - ] - ) - - -@pytest.mark.serial -@mock.patch("shutil.which") -@mock.patch("datasette.publish.heroku.check_output") -@mock.patch("datasette.publish.heroku.call") -def test_publish_heroku_generate_dir( - mock_call, mock_check_output, mock_which, tmp_path_factory -): - mock_which.return_value = True - mock_check_output.side_effect = lambda s: { - "['heroku', 'plugins']": b"heroku-builds", - }[repr(s)] - runner = CliRunner() - os.chdir(tmp_path_factory.mktemp("runner")) - with open("test.db", "w") as fp: - fp.write("data") - output = str(tmp_path_factory.mktemp("generate_dir") / "output") - result = runner.invoke( - cli.cli, - [ - "publish", - "heroku", - "test.db", - "--generate-dir", - output, - ], - ) - assert result.exit_code == 0 - path = pathlib.Path(output) - assert path.exists() - file_names = {str(r.relative_to(path)) for r in path.glob("*")} - assert file_names == { - "requirements.txt", - "bin", - "runtime.txt", - "Procfile", - "test.db", - } - for name, expected in ( - ("requirements.txt", "datasette"), - ("runtime.txt", "python-3.11.0"), - ( - "Procfile", - ( - "web: datasette serve --host 0.0.0.0 -i test.db " - "--cors --port $PORT --inspect-file inspect-data.json" - ), - ), - ): - with open(path / name) as fp: - assert fp.read().strip() == expected + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke( + cli.cli, + [ + "publish", + "heroku", + "test.db", + "--plugin-secret", + "datasette-auth-github", + "client_id", + "x-client-id", + ], + ) + assert 0 == result.exit_code, result.output + mock_call.assert_has_calls( + [ + mock.call( + [ + "heroku", + "config:set", + "-a", + "f", + "DATASETTE_AUTH_GITHUB_CLIENT_ID=x-client-id", + ] + ), + mock.call( + ["heroku", "builds:create", "-a", "f", "--include-vcs-ignore"] + ), + ] + ) diff --git a/tests/test_publish_now.py b/tests/test_publish_now.py new file mode 100644 index 00000000..72aa71db --- /dev/null +++ b/tests/test_publish_now.py @@ -0,0 +1,163 @@ +from click.testing import CliRunner +from datasette import cli +from unittest import mock +import json +import subprocess + + +@mock.patch("shutil.which") +def test_publish_now_requires_now(mock_which): + mock_which.return_value = False + runner = CliRunner() + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke(cli.cli, ["publish", "nowv1", "test.db"]) + assert result.exit_code == 1 + assert "Publishing to Zeit Now requires now" in result.output + + +@mock.patch("shutil.which") +def test_publish_now_invalid_database(mock_which): + mock_which.return_value = True + runner = CliRunner() + result = runner.invoke(cli.cli, ["publish", "nowv1", "woop.db"]) + assert result.exit_code == 2 + assert 'Path "woop.db" does not exist' in result.output + + +@mock.patch("shutil.which") +def test_publish_now_using_now_alias(mock_which): + mock_which.return_value = True + result = CliRunner().invoke(cli.cli, ["publish", "now", "woop.db"]) + assert result.exit_code == 2 + + +@mock.patch("shutil.which") +@mock.patch("datasette.publish.now.run") +def test_publish_now(mock_run, mock_which): + mock_which.return_value = True + runner = CliRunner() + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke(cli.cli, ["publish", "nowv1", "test.db"]) + assert 0 == result.exit_code + mock_run.assert_called_once_with("now", stdout=subprocess.PIPE) + + +@mock.patch("shutil.which") +@mock.patch("datasette.publish.now.run") +def test_publish_now_force_token(mock_run, mock_which): + mock_which.return_value = True + runner = CliRunner() + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke( + cli.cli, ["publish", "nowv1", "test.db", "--force", "--token=X"] + ) + assert 0 == result.exit_code + mock_run.assert_called_once_with( + ["now", "--force", "--token=X"], stdout=subprocess.PIPE + ) + + +@mock.patch("shutil.which") +@mock.patch("datasette.publish.now.run") +def test_publish_now_multiple_aliases(mock_run, mock_which): + mock_which.return_value = True + mock_run.return_value = mock.Mock(0) + mock_run.return_value.stdout = b"https://demo.example.com/" + runner = CliRunner() + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + runner.invoke( + cli.cli, + [ + "publish", + "now", + "test.db", + "--token", + "XXX", + "--alias", + "alias1", + "--alias", + "alias2", + ], + ) + mock_run.assert_has_calls( + [ + mock.call(["now", "--token=XXX"], stdout=subprocess.PIPE), + mock.call( + [ + "now", + "alias", + b"https://demo.example.com/", + "alias1", + "--token=XXX", + ] + ), + mock.call( + [ + "now", + "alias", + b"https://demo.example.com/", + "alias2", + "--token=XXX", + ] + ), + ] + ) + + +@mock.patch("shutil.which") +@mock.patch("datasette.publish.now.run") +def test_publish_now_plugin_secrets(mock_run, mock_which): + mock_which.return_value = True + mock_run.return_value = mock.Mock(0) + mock_run.return_value.stdout = b"https://demo.example.com/" + + runner = CliRunner() + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke( + cli.cli, + [ + "publish", + "now", + "test.db", + "--token", + "XXX", + "--plugin-secret", + "datasette-auth-github", + "client_id", + "x-client-id", + "--show-files", + ], + ) + dockerfile = ( + result.output.split("==== Dockerfile ====\n")[1] + .split("\n====================\n")[0] + .strip() + ) + expected = """FROM python:3.6 +COPY . /app +WORKDIR /app + +ENV DATASETTE_AUTH_GITHUB_CLIENT_ID 'x-client-id' +RUN pip install -U datasette +RUN datasette inspect test.db --inspect-file inspect-data.json +ENV PORT 8001 +EXPOSE 8001 +CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --metadata metadata.json --config force_https_urls:on --port $PORT""".strip() + assert expected == dockerfile + metadata = ( + result.output.split("=== metadata.json ===\n")[1] + .split("\n==== Dockerfile ====\n")[0] + .strip() + ) + assert { + "plugins": { + "datasette-auth-github": { + "client_id": {"$env": "DATASETTE_AUTH_GITHUB_CLIENT_ID"} + } + } + } == json.loads(metadata) diff --git a/tests/test_restriction_sql.py b/tests/test_restriction_sql.py deleted file mode 100644 index f23eb839..00000000 --- a/tests/test_restriction_sql.py +++ /dev/null @@ -1,315 +0,0 @@ -import pytest -from datasette.app import Datasette -from datasette.permissions import PermissionSQL -from datasette.resources import TableResource - - -@pytest.mark.asyncio -async def test_multiple_restriction_sources_intersect(): - """ - Test that when multiple plugins return restriction_sql, they are INTERSECTed. - - This tests the case where both actor _r restrictions AND a plugin - provide restriction_sql - both must pass for access to be granted. - """ - from datasette import hookimpl - - class RestrictivePlugin: - __name__ = "RestrictivePlugin" - - @hookimpl - def permission_resources_sql(self, datasette, actor, action): - # Plugin adds additional restriction: only db1_multi_intersect allowed - if action == "view-table": - return PermissionSQL( - restriction_sql="SELECT 'db1_multi_intersect' AS parent, NULL AS child", - params={}, - ) - return None - - plugin = RestrictivePlugin() - - ds = Datasette() - await ds.invoke_startup() - ds.pm.register(plugin, name="restrictive_plugin") - - try: - db1 = ds.add_memory_database("db1_multi_intersect") - db2 = ds.add_memory_database("db2_multi_intersect") - await db1.execute_write("CREATE TABLE t1 (id INTEGER)") - await db2.execute_write("CREATE TABLE t1 (id INTEGER)") - await ds._refresh_schemas() # Populate catalog tables - - # Actor has restrictions allowing both databases - # But plugin only allows db1_multi_intersect - # INTERSECT means only db1_multi_intersect/t1 should pass - actor = { - "id": "user", - "_r": {"d": {"db1_multi_intersect": ["vt"], "db2_multi_intersect": ["vt"]}}, - } - - page = await ds.allowed_resources("view-table", actor) - resources = {(r.parent, r.child) for r in page.resources} - - # Should only see db1_multi_intersect/t1 (intersection of actor restrictions and plugin restrictions) - assert ("db1_multi_intersect", "t1") in resources - assert ("db2_multi_intersect", "t1") not in resources - finally: - ds.pm.unregister(name="restrictive_plugin") - - -@pytest.mark.asyncio -async def test_restriction_sql_with_overlapping_databases_and_tables(): - """ - Test actor with both database-level and table-level restrictions for same database. - - When actor has: - - Database-level: db1_overlapping allowed (all tables) - - Table-level: db1_overlapping/t1 allowed - - Both entries are UNION'd (OR'ed) within the actor's restrictions. - Database-level restriction allows ALL tables, so table-level is redundant. - """ - ds = Datasette() - await ds.invoke_startup() - db = ds.add_memory_database("db1_overlapping") - await db.execute_write("CREATE TABLE t1 (id INTEGER)") - await db.execute_write("CREATE TABLE t2 (id INTEGER)") - await ds._refresh_schemas() - - # Actor has BOTH database-level (db1_overlapping all tables) AND table-level (db1_overlapping/t1 only) - actor = { - "id": "user", - "_r": { - "d": { - "db1_overlapping": ["vt"] - }, # Database-level: all tables in db1_overlapping - "r": { - "db1_overlapping": {"t1": ["vt"]} - }, # Table-level: only t1 in db1_overlapping - }, - } - - # Within actor restrictions, entries are UNION'd (OR'ed): - # - Database level allows: (db1_overlapping, NULL) → matches all tables via hierarchical matching - # - Table level allows: (db1_overlapping, t1) → redundant, already covered by database level - # Result: Both tables are allowed - page = await ds.allowed_resources("view-table", actor) - resources = {(r.parent, r.child) for r in page.resources} - - assert ("db1_overlapping", "t1") in resources - # Database-level restriction allows all tables, so t2 is also allowed - assert ("db1_overlapping", "t2") in resources - - -@pytest.mark.asyncio -async def test_restriction_sql_empty_allowlist_query(): - """ - Test the specific SQL query generated when action is not in allowlist. - - actor_restrictions_sql() returns "SELECT NULL AS parent, NULL AS child WHERE 0" - Verify this produces an empty result set. - """ - ds = Datasette() - await ds.invoke_startup() - db = ds.add_memory_database("db1_empty_allowlist") - await db.execute_write("CREATE TABLE t1 (id INTEGER)") - await ds._refresh_schemas() - - # Actor has restrictions but action not in allowlist - actor = {"id": "user", "_r": {"r": {"db1_empty_allowlist": {"t1": ["vt"]}}}} - - # Try to view-database (only view-table is in allowlist) - page = await ds.allowed_resources("view-database", actor) - - # Should be empty - assert len(page.resources) == 0 - - -@pytest.mark.asyncio -async def test_restriction_sql_with_pagination(): - """ - Test that restrictions work correctly with keyset pagination. - """ - ds = Datasette() - await ds.invoke_startup() - db = ds.add_memory_database("db1_pagination") - - # Create many tables - for i in range(10): - await db.execute_write(f"CREATE TABLE t{i:02d} (id INTEGER)") - await ds._refresh_schemas() - - # Actor restricted to only odd-numbered tables - restrictions = {"r": {"db1_pagination": {}}} - for i in range(10): - if i % 2 == 1: # Only odd tables - restrictions["r"]["db1_pagination"][f"t{i:02d}"] = ["vt"] - - actor = {"id": "user", "_r": restrictions} - - # Get first page with small limit - page1 = await ds.allowed_resources( - "view-table", actor, parent="db1_pagination", limit=2 - ) - assert len(page1.resources) == 2 - assert page1.next is not None - - # Get second page using next token - page2 = await ds.allowed_resources( - "view-table", actor, parent="db1_pagination", limit=2, next=page1.next - ) - assert len(page2.resources) == 2 - - # Should have no overlap - page1_ids = {r.child for r in page1.resources} - page2_ids = {r.child for r in page2.resources} - assert page1_ids.isdisjoint(page2_ids) - - # All should be odd-numbered tables - all_ids = page1_ids | page2_ids - for table_id in all_ids: - table_num = int(table_id[1:]) # Extract number from "t01", "t03", etc. - assert table_num % 2 == 1, f"Table {table_id} should be odd-numbered" - - -@pytest.mark.asyncio -async def test_also_requires_with_restrictions(): - """ - Test that also_requires actions properly respect restrictions. - - execute-sql requires view-database. With restrictions, both must pass. - """ - ds = Datasette() - await ds.invoke_startup() - db1 = ds.add_memory_database("db1_also_requires") - db2 = ds.add_memory_database("db2_also_requires") - await ds._refresh_schemas() - - # Actor restricted to only db1_also_requires for view-database - # execute-sql requires view-database, so should only work on db1_also_requires - actor = { - "id": "user", - "_r": { - "d": { - "db1_also_requires": ["vd", "es"], - "db2_also_requires": [ - "es" - ], # They have execute-sql but not view-database - } - }, - } - - # db1_also_requires should allow execute-sql - result = await ds.allowed( - action="execute-sql", - resource=TableResource("db1_also_requires", None), - actor=actor, - ) - assert result is True - - # db2_also_requires should not (they have execute-sql but not view-database) - result = await ds.allowed( - action="execute-sql", - resource=TableResource("db2_also_requires", None), - actor=actor, - ) - assert result is False - - -@pytest.mark.asyncio -async def test_restriction_abbreviations_and_full_names(): - """ - Test that both abbreviations and full action names work in restrictions. - """ - ds = Datasette() - await ds.invoke_startup() - db = ds.add_memory_database("db1_abbrev") - await db.execute_write("CREATE TABLE t1 (id INTEGER)") - await ds._refresh_schemas() - - # Test with abbreviation - actor_abbr = {"id": "user", "_r": {"r": {"db1_abbrev": {"t1": ["vt"]}}}} - result = await ds.allowed( - action="view-table", - resource=TableResource("db1_abbrev", "t1"), - actor=actor_abbr, - ) - assert result is True - - # Test with full name - actor_full = {"id": "user", "_r": {"r": {"db1_abbrev": {"t1": ["view-table"]}}}} - result = await ds.allowed( - action="view-table", - resource=TableResource("db1_abbrev", "t1"), - actor=actor_full, - ) - assert result is True - - # Test with mixed - actor_mixed = {"id": "user", "_r": {"d": {"db1_abbrev": ["view-database", "vt"]}}} - result = await ds.allowed( - action="view-table", - resource=TableResource("db1_abbrev", "t1"), - actor=actor_mixed, - ) - assert result is True - - -@pytest.mark.asyncio -async def test_permission_resources_sql_multiple_restriction_sources_intersect(): - """ - Test that when multiple plugins return restriction_sql, they are INTERSECTed. - - This tests the case where both actor _r restrictions AND a plugin - provide restriction_sql - both must pass for access to be granted. - """ - from datasette import hookimpl - - class RestrictivePlugin: - __name__ = "RestrictivePlugin" - - @hookimpl - def permission_resources_sql(self, datasette, actor, action): - # Plugin adds additional restriction: only db1_multi_restrictions allowed - if action == "view-table": - return PermissionSQL( - restriction_sql="SELECT 'db1_multi_restrictions' AS parent, NULL AS child", - params={}, - ) - return None - - plugin = RestrictivePlugin() - - ds = Datasette() - await ds.invoke_startup() - ds.pm.register(plugin, name="restrictive_plugin") - - try: - db1 = ds.add_memory_database("db1_multi_restrictions") - db2 = ds.add_memory_database("db2_multi_restrictions") - await db1.execute_write("CREATE TABLE t1 (id INTEGER)") - await db2.execute_write("CREATE TABLE t1 (id INTEGER)") - await ds._refresh_schemas() # Populate catalog tables - - # Actor has restrictions allowing both databases - # But plugin only allows db1 - # INTERSECT means only db1/t1 should pass - actor = { - "id": "user", - "_r": { - "d": { - "db1_multi_restrictions": ["vt"], - "db2_multi_restrictions": ["vt"], - } - }, - } - - page = await ds.allowed_resources("view-table", actor) - resources = {(r.parent, r.child) for r in page.resources} - - # Should only see db1/t1 (intersection of actor restrictions and plugin restrictions) - assert ("db1_multi_restrictions", "t1") in resources - assert ("db2_multi_restrictions", "t1") not in resources - finally: - ds.pm.unregister(name="restrictive_plugin") diff --git a/tests/test_routes.py b/tests/test_routes.py deleted file mode 100644 index 9866cc76..00000000 --- a/tests/test_routes.py +++ /dev/null @@ -1,111 +0,0 @@ -from datasette.app import Datasette, Database -from datasette.utils import resolve_routes -import pytest -import pytest_asyncio - - -@pytest.fixture(scope="session") -def routes(): - ds = Datasette() - return ds._routes() - - -@pytest.mark.parametrize( - "path,expected_name,expected_matches", - ( - ("/", "IndexView", {"format": None}), - ("/foo", "DatabaseView", {"format": None, "database": "foo"}), - ("/foo.csv", "DatabaseView", {"format": "csv", "database": "foo"}), - ("/foo.json", "DatabaseView", {"format": "json", "database": "foo"}), - ("/foo.humbug", "DatabaseView", {"format": "humbug", "database": "foo"}), - ( - "/foo/humbug", - "table_view", - {"database": "foo", "table": "humbug", "format": None}, - ), - ( - "/foo/humbug.json", - "table_view", - {"database": "foo", "table": "humbug", "format": "json"}, - ), - ( - "/foo/humbug.blah", - "table_view", - {"database": "foo", "table": "humbug", "format": "blah"}, - ), - ( - "/foo/humbug/1", - "RowView", - {"format": None, "database": "foo", "pks": "1", "table": "humbug"}, - ), - ( - "/foo/humbug/1.json", - "RowView", - {"format": "json", "database": "foo", "pks": "1", "table": "humbug"}, - ), - ), -) -def test_routes(routes, path, expected_name, expected_matches): - match, view = resolve_routes(routes, path) - if expected_name is None: - assert match is None - else: - assert ( - view.__name__ == expected_name or view.view_class.__name__ == expected_name - ) - assert match.groupdict() == expected_matches - - -@pytest_asyncio.fixture -async def ds_with_route(): - ds = Datasette() - await ds.invoke_startup() - ds.remove_database("_memory") - db = Database(ds, is_memory=True, memory_name="route-name-db") - ds.add_database(db, name="original-name", route="custom-route-name") - await db.execute_write_script( - """ - create table if not exists t (id integer primary key); - insert or replace into t (id) values (1); - """ - ) - return ds - - -@pytest.mark.asyncio -async def test_db_with_route_databases(ds_with_route): - response = await ds_with_route.client.get("/-/databases.json") - assert response.json()[0] == { - "name": "original-name", - "route": "custom-route-name", - "path": None, - "size": 0, - "is_mutable": True, - "is_memory": True, - "hash": None, - } - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_status", - ( - ("/", 200), - ("/original-name", 404), - ("/original-name/t", 404), - ("/original-name/t/1", 404), - ("/custom-route-name", 200), - ("/custom-route-name/-/query?sql=select+id+from+t", 200), - ("/custom-route-name/t", 200), - ("/custom-route-name/t/1", 200), - ), -) -async def test_db_with_route_that_does_not_match_name( - ds_with_route, path, expected_status -): - response = await ds_with_route.client.get(path) - assert response.status_code == expected_status - # There should be links to custom-route-name but none to original-name - if response.status_code == 200: - assert "/custom-route-name" in response.text - assert "/original-name" not in response.text diff --git a/tests/test_schema_endpoints.py b/tests/test_schema_endpoints.py deleted file mode 100644 index 5500a7b0..00000000 --- a/tests/test_schema_endpoints.py +++ /dev/null @@ -1,248 +0,0 @@ -import asyncio -import pytest -import pytest_asyncio -from datasette.app import Datasette - - -@pytest_asyncio.fixture(scope="module") -async def schema_ds(): - """Create a Datasette instance with test databases and permission config.""" - ds = Datasette( - config={ - "databases": { - "schema_private_db": {"allow": {"id": "root"}}, - } - } - ) - - # Create public database with multiple tables - public_db = ds.add_memory_database("schema_public_db") - await public_db.execute_write( - "CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT)" - ) - await public_db.execute_write( - "CREATE TABLE IF NOT EXISTS posts (id INTEGER PRIMARY KEY, title TEXT)" - ) - await public_db.execute_write( - "CREATE VIEW IF NOT EXISTS recent_posts AS SELECT * FROM posts ORDER BY id DESC" - ) - - # Create a database with restricted access (requires root permission) - private_db = ds.add_memory_database("schema_private_db") - await private_db.execute_write( - "CREATE TABLE IF NOT EXISTS secret_data (id INTEGER PRIMARY KEY, value TEXT)" - ) - - # Create an empty database - ds.add_memory_database("schema_empty_db") - - return ds - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "format_ext,expected_in_content", - [ - ("json", None), - ("md", ["# Schema for", "```sql"]), - ("", ["Schema for", "CREATE TABLE"]), - ], -) -async def test_database_schema_formats(schema_ds, format_ext, expected_in_content): - """Test /database/-/schema endpoint in different formats.""" - url = "/schema_public_db/-/schema" - if format_ext: - url += f".{format_ext}" - response = await schema_ds.client.get(url) - assert response.status_code == 200 - - if format_ext == "json": - data = response.json() - assert "database" in data - assert data["database"] == "schema_public_db" - assert "schema" in data - assert "CREATE TABLE users" in data["schema"] - else: - content = response.text - for expected in expected_in_content: - assert expected in content - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "format_ext,expected_in_content", - [ - ("json", None), - ("md", ["# Schema for", "```sql"]), - ("", ["Schema for all databases"]), - ], -) -async def test_instance_schema_formats(schema_ds, format_ext, expected_in_content): - """Test /-/schema endpoint in different formats.""" - url = "/-/schema" - if format_ext: - url += f".{format_ext}" - response = await schema_ds.client.get(url) - assert response.status_code == 200 - - if format_ext == "json": - data = response.json() - assert "schemas" in data - assert isinstance(data["schemas"], list) - db_names = [item["database"] for item in data["schemas"]] - # Should see schema_public_db and schema_empty_db, but not schema_private_db (anonymous user) - assert "schema_public_db" in db_names - assert "schema_empty_db" in db_names - assert "schema_private_db" not in db_names - # Check schemas are present - for item in data["schemas"]: - if item["database"] == "schema_public_db": - assert "CREATE TABLE users" in item["schema"] - else: - content = response.text - for expected in expected_in_content: - assert expected in content - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "format_ext,expected_in_content", - [ - ("json", None), - ("md", ["# Schema for", "```sql"]), - ("", ["Schema for users"]), - ], -) -async def test_table_schema_formats(schema_ds, format_ext, expected_in_content): - """Test /database/table/-/schema endpoint in different formats.""" - url = "/schema_public_db/users/-/schema" - if format_ext: - url += f".{format_ext}" - response = await schema_ds.client.get(url) - assert response.status_code == 200 - - if format_ext == "json": - data = response.json() - assert "database" in data - assert data["database"] == "schema_public_db" - assert "table" in data - assert data["table"] == "users" - assert "schema" in data - assert "CREATE TABLE users" in data["schema"] - else: - content = response.text - for expected in expected_in_content: - assert expected in content - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "url", - [ - "/schema_private_db/-/schema.json", - "/schema_private_db/secret_data/-/schema.json", - ], -) -async def test_schema_permission_enforcement(schema_ds, url): - """Test that permissions are enforced for schema endpoints.""" - # Anonymous user should get 403 - response = await schema_ds.client.get(url) - assert response.status_code == 403 - - # Authenticated user with permission should succeed - response = await schema_ds.client.get( - url, - cookies={"ds_actor": schema_ds.client.actor_cookie({"id": "root"})}, - ) - assert response.status_code == 200 - - -@pytest.mark.asyncio -async def test_instance_schema_respects_database_permissions(schema_ds): - """Test that /-/schema only shows databases the user can view.""" - # Anonymous user should only see public databases - response = await schema_ds.client.get("/-/schema.json") - assert response.status_code == 200 - data = response.json() - db_names = [item["database"] for item in data["schemas"]] - assert "schema_public_db" in db_names - assert "schema_empty_db" in db_names - assert "schema_private_db" not in db_names - - # Authenticated user should see all databases - response = await schema_ds.client.get( - "/-/schema.json", - cookies={"ds_actor": schema_ds.client.actor_cookie({"id": "root"})}, - ) - assert response.status_code == 200 - data = response.json() - db_names = [item["database"] for item in data["schemas"]] - assert "schema_public_db" in db_names - assert "schema_empty_db" in db_names - assert "schema_private_db" in db_names - - -@pytest.mark.asyncio -async def test_database_schema_with_multiple_tables(schema_ds): - """Test schema with multiple tables in a database.""" - response = await schema_ds.client.get("/schema_public_db/-/schema.json") - assert response.status_code == 200 - data = response.json() - schema = data["schema"] - - # All objects should be in the schema - assert "CREATE TABLE users" in schema - assert "CREATE TABLE posts" in schema - assert "CREATE VIEW recent_posts" in schema - - -@pytest.mark.asyncio -async def test_empty_database_schema(schema_ds): - """Test schema for an empty database.""" - response = await schema_ds.client.get("/schema_empty_db/-/schema.json") - assert response.status_code == 200 - data = response.json() - assert data["database"] == "schema_empty_db" - assert data["schema"] == "" - - -@pytest.mark.asyncio -async def test_database_not_exists(schema_ds): - """Test schema for a non-existent database returns 404.""" - # Test JSON format - response = await schema_ds.client.get("/nonexistent_db/-/schema.json") - assert response.status_code == 404 - data = response.json() - assert data["ok"] is False - assert "not found" in data["error"].lower() - - # Test HTML format (returns text) - response = await schema_ds.client.get("/nonexistent_db/-/schema") - assert response.status_code == 404 - assert "not found" in response.text.lower() - - # Test Markdown format (returns text) - response = await schema_ds.client.get("/nonexistent_db/-/schema.md") - assert response.status_code == 404 - assert "not found" in response.text.lower() - - -@pytest.mark.asyncio -async def test_table_not_exists(schema_ds): - """Test schema for a non-existent table returns 404.""" - # Test JSON format - response = await schema_ds.client.get("/schema_public_db/nonexistent/-/schema.json") - assert response.status_code == 404 - data = response.json() - assert data["ok"] is False - assert "not found" in data["error"].lower() - - # Test HTML format (returns text) - response = await schema_ds.client.get("/schema_public_db/nonexistent/-/schema") - assert response.status_code == 404 - assert "not found" in response.text.lower() - - # Test Markdown format (returns text) - response = await schema_ds.client.get("/schema_public_db/nonexistent/-/schema.md") - assert response.status_code == 404 - assert "not found" in response.text.lower() diff --git a/tests/test_search_tables.py b/tests/test_search_tables.py deleted file mode 100644 index 34b37706..00000000 --- a/tests/test_search_tables.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -Tests for special endpoints in datasette/views/special.py -""" - -import pytest -import pytest_asyncio -from datasette.app import Datasette - - -@pytest_asyncio.fixture -async def ds_with_tables(): - """Create a Datasette instance with some tables for searching.""" - ds = Datasette( - config={ - "databases": { - "content": { - "allow": {"id": "*"}, # Allow all authenticated users - "tables": { - "articles": { - "allow": {"id": "editor"}, # Only editor can view - }, - "comments": { - "allow": True, # Everyone can view - }, - }, - }, - "private": { - "allow": False, # Deny everyone - }, - } - } - ) - await ds.invoke_startup() - - # Add content database with some tables - content_db = ds.add_memory_database("content") - await content_db.execute_write( - "CREATE TABLE IF NOT EXISTS articles (id INTEGER PRIMARY KEY, title TEXT)" - ) - await content_db.execute_write( - "CREATE TABLE IF NOT EXISTS comments (id INTEGER PRIMARY KEY, body TEXT)" - ) - await content_db.execute_write( - "CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT)" - ) - - # Add private database with a table - private_db = ds.add_memory_database("private") - await private_db.execute_write( - "CREATE TABLE IF NOT EXISTS secrets (id INTEGER PRIMARY KEY, data TEXT)" - ) - - # Add another public database - public_db = ds.add_memory_database("public") - await public_db.execute_write( - "CREATE TABLE IF NOT EXISTS articles (id INTEGER PRIMARY KEY, content TEXT)" - ) - - return ds - - -# /-/tables.json tests -@pytest.mark.asyncio -async def test_tables_basic_search(ds_with_tables): - """Test basic table search functionality.""" - # Search for "articles" - should find it in both content and public databases - # but only return public.articles for anonymous user (content.articles requires auth) - response = await ds_with_tables.client.get("/-/tables.json?q=articles") - assert response.status_code == 200 - data = response.json() - - # Should only see public.articles (content.articles restricted to authenticated users) - assert "matches" in data - assert len(data["matches"]) == 1 - - match = data["matches"][0] - assert "url" in match - assert "name" in match - assert match["name"] == "public: articles" - assert "/public/articles" in match["url"] - - -@pytest.mark.asyncio -async def test_tables_search_with_auth(ds_with_tables): - """Test that authenticated users see more tables.""" - # Editor user should see content.articles - response = await ds_with_tables.client.get( - "/-/tables.json?q=articles", - cookies={"ds_actor": ds_with_tables.client.actor_cookie({"id": "editor"})}, - ) - assert response.status_code == 200 - data = response.json() - - # Should see both content.articles and public.articles - assert len(data["matches"]) == 2 - - names = {match["name"] for match in data["matches"]} - assert names == {"content: articles", "public: articles"} - - -@pytest.mark.asyncio -async def test_tables_search_partial_match(ds_with_tables): - """Test that search matches partial table names.""" - # Search for "com" should match "comments" - response = await ds_with_tables.client.get( - "/-/tables.json?q=com", - cookies={"ds_actor": ds_with_tables.client.actor_cookie({"id": "user"})}, - ) - assert response.status_code == 200 - data = response.json() - - assert len(data["matches"]) == 1 - assert data["matches"][0]["name"] == "content: comments" - - -@pytest.mark.asyncio -async def test_tables_search_respects_database_permissions(ds_with_tables): - """Test that tables from denied databases are not shown.""" - # Search for "secrets" which is in the private database - # Even authenticated users shouldn't see it because database is denied - response = await ds_with_tables.client.get( - "/-/tables.json?q=secrets", - cookies={"ds_actor": ds_with_tables.client.actor_cookie({"id": "user"})}, - ) - assert response.status_code == 200 - data = response.json() - - # Should not see secrets table from private database - assert len(data["matches"]) == 0 - - -@pytest.mark.asyncio -async def test_tables_search_respects_table_permissions(ds_with_tables): - """Test that tables with specific permissions are filtered correctly.""" - # Regular authenticated user searching for "users" - response = await ds_with_tables.client.get( - "/-/tables.json?q=users", - cookies={"ds_actor": ds_with_tables.client.actor_cookie({"id": "regular"})}, - ) - assert response.status_code == 200 - data = response.json() - - # Should see content.users (authenticated users can view content database) - assert len(data["matches"]) == 1 - assert data["matches"][0]["name"] == "content: users" - - -@pytest.mark.asyncio -async def test_tables_search_response_structure(ds_with_tables): - """Test that response has correct structure.""" - response = await ds_with_tables.client.get( - "/-/tables.json?q=users", - cookies={"ds_actor": ds_with_tables.client.actor_cookie({"id": "user"})}, - ) - assert response.status_code == 200 - data = response.json() - - assert "matches" in data - assert isinstance(data["matches"], list) - - if data["matches"]: - match = data["matches"][0] - assert "url" in match - assert "name" in match - assert isinstance(match["url"], str) - assert isinstance(match["name"], str) - # Name should be in format "database: table" - assert ": " in match["name"] diff --git a/tests/test_spatialite.py b/tests/test_spatialite.py deleted file mode 100644 index c07a30e8..00000000 --- a/tests/test_spatialite.py +++ /dev/null @@ -1,23 +0,0 @@ -from datasette.app import Datasette -from datasette.utils import find_spatialite, SpatialiteNotFound, SPATIALITE_FUNCTIONS -from .utils import has_load_extension -import pytest - - -def has_spatialite(): - try: - find_spatialite() - return True - except SpatialiteNotFound: - return False - - -@pytest.mark.asyncio -@pytest.mark.skipif(not has_spatialite(), reason="Requires SpatiaLite") -@pytest.mark.skipif(not has_load_extension(), reason="Requires enable_load_extension") -async def test_spatialite_version_info(): - ds = Datasette(sqlite_extensions=["spatialite"]) - response = await ds.client.get("/-/versions.json") - assert response.status_code == 200 - spatialite = response.json()["sqlite"]["extensions"]["spatialite"] - assert set(SPATIALITE_FUNCTIONS) == set(spatialite) diff --git a/tests/test_table_api.py b/tests/test_table_api.py deleted file mode 100644 index 653679e4..00000000 --- a/tests/test_table_api.py +++ /dev/null @@ -1,1385 +0,0 @@ -from datasette.utils import detect_json1 -from datasette.utils.sqlite import sqlite_version -from .fixtures import ( # noqa - app_client, - app_client_with_trace, - app_client_returned_rows_matches_page_size, - generate_compound_rows, - generate_sortable_rows, - make_app_client, -) -import json -import pytest -import urllib - - -@pytest.mark.asyncio -async def test_table_json(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key.json?_extra=query") - assert response.status_code == 200 - data = response.json() - assert ( - data["query"]["sql"] - == "select id, content from simple_primary_key order by id limit 51" - ) - assert data["query"]["params"] == {} - assert data["rows"] == [ - {"id": 1, "content": "hello"}, - {"id": 2, "content": "world"}, - {"id": 3, "content": ""}, - {"id": 4, "content": "RENDER_CELL_DEMO"}, - {"id": 5, "content": "RENDER_CELL_ASYNC"}, - ] - - -@pytest.mark.asyncio -async def test_table_not_exists_json(ds_client): - assert (await ds_client.get("/fixtures/blah.json")).json() == { - "ok": False, - "error": "Table not found", - "status": 404, - "title": None, - } - - -@pytest.mark.asyncio -async def test_table_shape_arrays(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key.json?_shape=arrays") - assert response.json()["rows"] == [ - [1, "hello"], - [2, "world"], - [3, ""], - [4, "RENDER_CELL_DEMO"], - [5, "RENDER_CELL_ASYNC"], - ] - - -@pytest.mark.asyncio -async def test_table_shape_arrayfirst(ds_client): - response = await ds_client.get( - "/fixtures/-/query.json?" - + urllib.parse.urlencode( - { - "sql": "select content from simple_primary_key order by id", - "_shape": "arrayfirst", - } - ) - ) - assert response.json() == [ - "hello", - "world", - "", - "RENDER_CELL_DEMO", - "RENDER_CELL_ASYNC", - ] - - -@pytest.mark.asyncio -async def test_table_shape_objects(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key.json?_shape=objects") - assert response.json()["rows"] == [ - {"id": 1, "content": "hello"}, - {"id": 2, "content": "world"}, - {"id": 3, "content": ""}, - {"id": 4, "content": "RENDER_CELL_DEMO"}, - {"id": 5, "content": "RENDER_CELL_ASYNC"}, - ] - - -@pytest.mark.asyncio -async def test_table_shape_array(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key.json?_shape=array") - assert response.json() == [ - {"id": 1, "content": "hello"}, - {"id": 2, "content": "world"}, - {"id": 3, "content": ""}, - {"id": 4, "content": "RENDER_CELL_DEMO"}, - {"id": 5, "content": "RENDER_CELL_ASYNC"}, - ] - - -@pytest.mark.asyncio -async def test_table_shape_array_nl(ds_client): - response = await ds_client.get( - "/fixtures/simple_primary_key.json?_shape=array&_nl=on" - ) - lines = response.text.split("\n") - results = [json.loads(line) for line in lines] - assert [ - {"id": 1, "content": "hello"}, - {"id": 2, "content": "world"}, - {"id": 3, "content": ""}, - {"id": 4, "content": "RENDER_CELL_DEMO"}, - {"id": 5, "content": "RENDER_CELL_ASYNC"}, - ] == results - - -@pytest.mark.asyncio -async def test_table_shape_invalid(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key.json?_shape=invalid") - assert response.json() == { - "ok": False, - "error": "Invalid _shape: invalid", - "status": 400, - "title": None, - } - - -@pytest.mark.asyncio -async def test_table_shape_object(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key.json?_shape=object") - assert response.json() == { - "1": {"id": 1, "content": "hello"}, - "2": {"id": 2, "content": "world"}, - "3": {"id": 3, "content": ""}, - "4": {"id": 4, "content": "RENDER_CELL_DEMO"}, - "5": {"id": 5, "content": "RENDER_CELL_ASYNC"}, - } - - -@pytest.mark.asyncio -async def test_table_shape_object_compound_primary_key(ds_client): - response = await ds_client.get("/fixtures/compound_primary_key.json?_shape=object") - assert response.json() == { - "a,b": {"pk1": "a", "pk2": "b", "content": "c"}, - "a~2Fb,~2Ec-d": {"pk1": "a/b", "pk2": ".c-d", "content": "c"}, - } - - -@pytest.mark.asyncio -async def test_table_with_slashes_in_name(ds_client): - response = await ds_client.get( - "/fixtures/table~2Fwith~2Fslashes~2Ecsv.json?_shape=objects" - ) - assert response.status_code == 200 - data = response.json() - assert data["rows"] == [{"pk": "3", "content": "hey"}] - - -@pytest.mark.asyncio -async def test_table_with_reserved_word_name(ds_client): - response = await ds_client.get("/fixtures/select.json?_shape=objects") - assert response.status_code == 200 - data = response.json() - assert data["rows"] == [ - { - "rowid": 1, - "group": "group", - "having": "having", - "and": "and", - "json": '{"href": "http://example.com/", "label":"Example"}', - } - ] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_rows,expected_pages", - [ - ("/fixtures/no_primary_key.json", 201, 5), - ("/fixtures/paginated_view.json", 201, 9), - ("/fixtures/no_primary_key.json?_size=25", 201, 9), - ("/fixtures/paginated_view.json?_size=50", 201, 5), - ("/fixtures/paginated_view.json?_size=max", 201, 3), - ("/fixtures/123_starts_with_digits.json", 0, 1), - # Ensure faceting doesn't break pagination: - ("/fixtures/compound_three_primary_keys.json?_facet=pk1", 1001, 21), - # Paginating while sorted by an expanded foreign key should work - ( - "/fixtures/roadside_attraction_characteristics.json?_size=2&_sort=attraction_id&_labels=on", - 5, - 3, - ), - ], -) -async def test_paginate_tables_and_views( - ds_client, path, expected_rows, expected_pages -): - fetched = [] - count = 0 - while path: - if "?" in path: - path += "&_extra=next_url" - else: - path += "?_extra=next_url" - response = await ds_client.get(path) - assert response.status_code == 200 - count += 1 - fetched.extend(response.json()["rows"]) - path = response.json()["next_url"] - if path: - assert urllib.parse.urlencode({"_next": response.json()["next"]}) in path - path = path.replace("http://localhost", "") - assert count < 30, "Possible infinite loop detected" - - assert expected_rows == len(fetched) - assert expected_pages == count - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_error", - [ - ("/fixtures/no_primary_key.json?_size=-4", "_size must be a positive integer"), - ("/fixtures/no_primary_key.json?_size=dog", "_size must be a positive integer"), - ("/fixtures/no_primary_key.json?_size=1001", "_size must be <= 100"), - ], -) -async def test_validate_page_size(ds_client, path, expected_error): - response = await ds_client.get(path) - assert expected_error == response.json()["error"] - assert response.status_code == 400 - - -@pytest.mark.asyncio -async def test_page_size_zero(ds_client): - """For _size=0 we return the counts, empty rows and no continuation token""" - response = await ds_client.get( - "/fixtures/no_primary_key.json?_size=0&_extra=count,next_url" - ) - assert response.status_code == 200 - assert [] == response.json()["rows"] - assert 201 == response.json()["count"] - assert None is response.json()["next"] - assert None is response.json()["next_url"] - - -@pytest.mark.asyncio -async def test_paginate_compound_keys(ds_client): - fetched = [] - path = "/fixtures/compound_three_primary_keys.json?_shape=objects&_extra=next_url" - page = 0 - while path: - page += 1 - response = await ds_client.get(path) - fetched.extend(response.json()["rows"]) - path = response.json()["next_url"] - if path: - path = path.replace("http://localhost", "") - assert page < 100 - assert 1001 == len(fetched) - assert 21 == page - # Should be correctly ordered - contents = [f["content"] for f in fetched] - expected = [r[3] for r in generate_compound_rows(1001)] - assert expected == contents - - -@pytest.mark.asyncio -async def test_paginate_compound_keys_with_extra_filters(ds_client): - fetched = [] - path = "/fixtures/compound_three_primary_keys.json?content__contains=d&_shape=objects&_extra=next_url" - page = 0 - while path: - page += 1 - assert page < 100 - response = await ds_client.get(path) - fetched.extend(response.json()["rows"]) - path = response.json()["next_url"] - if path: - path = path.replace("http://localhost", "") - assert 2 == page - expected = [r[3] for r in generate_compound_rows(1001) if "d" in r[3]] - assert expected == [f["content"] for f in fetched] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "query_string,sort_key,human_description_en", - [ - ("_sort=sortable", lambda row: row["sortable"], "sorted by sortable"), - ( - "_sort_desc=sortable", - lambda row: -row["sortable"], - "sorted by sortable descending", - ), - ( - "_sort=sortable_with_nulls", - lambda row: ( - 1 if row["sortable_with_nulls"] is not None else 0, - row["sortable_with_nulls"], - ), - "sorted by sortable_with_nulls", - ), - ( - "_sort_desc=sortable_with_nulls", - lambda row: ( - 1 if row["sortable_with_nulls"] is None else 0, - ( - -row["sortable_with_nulls"] - if row["sortable_with_nulls"] is not None - else 0 - ), - row["content"], - ), - "sorted by sortable_with_nulls descending", - ), - # text column contains '$null' - ensure it doesn't confuse pagination: - ("_sort=text", lambda row: row["text"], "sorted by text"), - # Still works if sort column removed using _col= - ("_sort=text&_col=content", lambda row: row["text"], "sorted by text"), - ], -) -async def test_sortable(ds_client, query_string, sort_key, human_description_en): - path = f"/fixtures/sortable.json?_shape=objects&_extra=human_description_en,next_url&{query_string}" - fetched = [] - page = 0 - while path: - page += 1 - assert page < 100 - response = await ds_client.get(path) - assert human_description_en == response.json()["human_description_en"] - fetched.extend(response.json()["rows"]) - path = response.json()["next_url"] - if path: - path = path.replace("http://localhost", "") - assert page == 5 - expected = list(generate_sortable_rows(201)) - expected.sort(key=sort_key) - assert [r["content"] for r in expected] == [r["content"] for r in fetched] - - -@pytest.mark.asyncio -async def test_sortable_and_filtered(ds_client): - path = ( - "/fixtures/sortable.json" - "?content__contains=d&_sort_desc=sortable&_shape=objects" - "&_extra=human_description_en,count" - ) - response = await ds_client.get(path) - fetched = response.json()["rows"] - assert ( - 'where content contains "d" sorted by sortable descending' - == response.json()["human_description_en"] - ) - expected = [row for row in generate_sortable_rows(201) if "d" in row["content"]] - assert len(expected) == response.json()["count"] - expected.sort(key=lambda row: -row["sortable"]) - assert [r["content"] for r in expected] == [r["content"] for r in fetched] - - -@pytest.mark.asyncio -async def test_sortable_argument_errors(ds_client): - response = await ds_client.get("/fixtures/sortable.json?_sort=badcolumn") - assert "Cannot sort table by badcolumn" == response.json()["error"] - response = await ds_client.get("/fixtures/sortable.json?_sort_desc=badcolumn2") - assert "Cannot sort table by badcolumn2" == response.json()["error"] - response = await ds_client.get( - "/fixtures/sortable.json?_sort=sortable_with_nulls&_sort_desc=sortable" - ) - assert ( - "Cannot use _sort and _sort_desc at the same time" == response.json()["error"] - ) - - -@pytest.mark.asyncio -async def test_sortable_columns_metadata(ds_client): - response = await ds_client.get("/fixtures/sortable.json?_sort=content") - assert "Cannot sort table by content" == response.json()["error"] - # no_primary_key has ALL sort options disabled - for column in ("content", "a", "b", "c"): - response = await ds_client.get(f"/fixtures/sortable.json?_sort={column}") - assert f"Cannot sort table by {column}" == response.json()["error"] - - -@pytest.mark.asyncio -@pytest.mark.xfail -@pytest.mark.parametrize( - "path,expected_rows", - [ - ( - "/fixtures/searchable.json?_shape=arrays&_search=dog", - [ - [1, "barry cat", "terry dog", "panther"], - [2, "terry dog", "sara weasel", "puma"], - ], - ), - ( - # Special keyword shouldn't break FTS query - "/fixtures/searchable.json?_shape=arrays&_search=AND", - [], - ), - ( - # Without _searchmode=raw this should return no results - "/fixtures/searchable.json?_shape=arrays&_search=te*+AND+do*", - [], - ), - ( - # _searchmode=raw - "/fixtures/searchable.json?_shape=arrays&_search=te*+AND+do*&_searchmode=raw", - [ - [1, "barry cat", "terry dog", "panther"], - [2, "terry dog", "sara weasel", "puma"], - ], - ), - ( - # _searchmode=raw combined with _search_COLUMN - "/fixtures/searchable.json?_shape=arrays&_search_text2=te*&_searchmode=raw", - [ - [1, "barry cat", "terry dog", "panther"], - ], - ), - ( - "/fixtures/searchable.json?_shape=arrays&_search=weasel", - [[2, "terry dog", "sara weasel", "puma"]], - ), - ( - "/fixtures/searchable.json?_shape=arrays&_search_text2=dog", - [[1, "barry cat", "terry dog", "panther"]], - ), - ( - "/fixtures/searchable.json?_shape=arrays&_search_name%20with%20.%20and%20spaces=panther", - [[1, "barry cat", "terry dog", "panther"]], - ), - ], -) -async def test_searchable(ds_client, path, expected_rows): - response = await ds_client.get(path) - assert expected_rows == response.json()["rows"] - - -_SEARCHMODE_RAW_RESULTS = [ - [1, "barry cat", "terry dog", "panther"], - [2, "terry dog", "sara weasel", "puma"], -] - - -@pytest.mark.parametrize( - "table_metadata,querystring,expected_rows", - [ - ( - {}, - "_search=te*+AND+do*", - [], - ), - ( - {"searchmode": "raw"}, - "_search=te*+AND+do*", - _SEARCHMODE_RAW_RESULTS, - ), - ( - {}, - "_search=te*+AND+do*&_searchmode=raw", - _SEARCHMODE_RAW_RESULTS, - ), - # Can be over-ridden with _searchmode=escaped - ( - {"searchmode": "raw"}, - "_search=te*+AND+do*&_searchmode=escaped", - [], - ), - ], -) -def test_searchmode(table_metadata, querystring, expected_rows): - with make_app_client( - metadata={"databases": {"fixtures": {"tables": {"searchable": table_metadata}}}} - ) as client: - response = client.get("/fixtures/searchable.json?_shape=arrays&" + querystring) - assert expected_rows == response.json["rows"] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_rows", - [ - ( - "/fixtures/searchable_view_configured_by_metadata.json?_shape=arrays&_search=weasel", - [[2, "terry dog", "sara weasel", "puma"]], - ), - # This should return all results because search is not configured: - ( - "/fixtures/searchable_view.json?_shape=arrays&_search=weasel", - [ - [1, "barry cat", "terry dog", "panther"], - [2, "terry dog", "sara weasel", "puma"], - ], - ), - ( - "/fixtures/searchable_view.json?_shape=arrays&_search=weasel&_fts_table=searchable_fts&_fts_pk=pk", - [[2, "terry dog", "sara weasel", "puma"]], - ), - ], -) -async def test_searchable_views(ds_client, path, expected_rows): - response = await ds_client.get(path) - assert response.json()["rows"] == expected_rows - - -@pytest.mark.asyncio -async def test_searchable_invalid_column(ds_client): - response = await ds_client.get("/fixtures/searchable.json?_search_invalid=x") - assert response.status_code == 400 - assert response.json() == { - "ok": False, - "error": "Cannot search by that column", - "status": 400, - "title": None, - } - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_rows", - [ - ( - "/fixtures/simple_primary_key.json?_shape=arrays&content=hello", - [[1, "hello"]], - ), - ( - "/fixtures/simple_primary_key.json?_shape=arrays&content__contains=o", - [ - [1, "hello"], - [2, "world"], - [4, "RENDER_CELL_DEMO"], - ], - ), - ( - "/fixtures/simple_primary_key.json?_shape=arrays&content__exact=", - [[3, ""]], - ), - ( - "/fixtures/simple_primary_key.json?_shape=arrays&content__not=world", - [ - [1, "hello"], - [3, ""], - [4, "RENDER_CELL_DEMO"], - [5, "RENDER_CELL_ASYNC"], - ], - ), - ], -) -async def test_table_filter_queries(ds_client, path, expected_rows): - response = await ds_client.get(path) - assert response.json()["rows"] == expected_rows - - -@pytest.mark.asyncio -async def test_table_filter_queries_multiple_of_same_type(ds_client): - response = await ds_client.get( - "/fixtures/simple_primary_key.json?_shape=arrays&content__not=world&content__not=hello" - ) - assert [ - [3, ""], - [4, "RENDER_CELL_DEMO"], - [5, "RENDER_CELL_ASYNC"], - ] == response.json()["rows"] - - -@pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") -@pytest.mark.asyncio -async def test_table_filter_json_arraycontains(ds_client): - response = await ds_client.get( - "/fixtures/facetable.json?_shape=arrays&tags__arraycontains=tag1" - ) - assert response.json()["rows"] == [ - [ - 1, - "2019-01-14 08:00:00", - 1, - 1, - "CA", - 1, - "Mission", - '["tag1", "tag2"]', - '[{"foo": "bar"}]', - "one", - "n1", - ], - [ - 2, - "2019-01-14 08:00:00", - 1, - 1, - "CA", - 1, - "Dogpatch", - '["tag1", "tag3"]', - "[]", - "two", - "n2", - ], - ] - - -@pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") -@pytest.mark.asyncio -async def test_table_filter_json_arraynotcontains(ds_client): - response = await ds_client.get( - "/fixtures/facetable.json?_shape=arrays&tags__arraynotcontains=tag3&tags__not=[]" - ) - assert response.json()["rows"] == [ - [ - 1, - "2019-01-14 08:00:00", - 1, - 1, - "CA", - 1, - "Mission", - '["tag1", "tag2"]', - '[{"foo": "bar"}]', - "one", - "n1", - ] - ] - - -@pytest.mark.asyncio -async def test_table_filter_extra_where(ds_client): - response = await ds_client.get( - "/fixtures/facetable.json?_shape=arrays&_where=_neighborhood='Dogpatch'" - ) - assert [ - [ - 2, - "2019-01-14 08:00:00", - 1, - 1, - "CA", - 1, - "Dogpatch", - '["tag1", "tag3"]', - "[]", - "two", - "n2", - ] - ] == response.json()["rows"] - - -@pytest.mark.asyncio -async def test_table_filter_extra_where_invalid(ds_client): - response = await ds_client.get( - "/fixtures/facetable.json?_where=_neighborhood=Dogpatch'" - ) - assert response.status_code == 400 - assert "Invalid SQL" == response.json()["title"] - - -def test_table_filter_extra_where_disabled_if_no_sql_allowed(): - with make_app_client(config={"allow_sql": {}}) as client: - response = client.get( - "/fixtures/facetable.json?_where=_neighborhood='Dogpatch'" - ) - assert response.status_code == 403 - assert "_where= is not allowed" == response.json["error"] - - -@pytest.mark.asyncio -async def test_table_through(ds_client): - # Just the museums: - response = await ds_client.get( - "/fixtures/roadside_attractions.json?_shape=arrays" - '&_through={"table":"roadside_attraction_characteristics","column":"characteristic_id","value":"1"}' - "&_extra=human_description_en" - ) - assert response.json()["rows"] == [ - [ - 3, - "Burlingame Museum of PEZ Memorabilia", - "214 California Drive, Burlingame, CA 94010", - None, - 37.5793, - -122.3442, - ], - [ - 4, - "Bigfoot Discovery Museum", - "5497 Highway 9, Felton, CA 95018", - "https://www.bigfootdiscoveryproject.com/", - 37.0414, - -122.0725, - ], - ] - - assert ( - response.json()["human_description_en"] - == 'where roadside_attraction_characteristics.characteristic_id = "1"' - ) - - -@pytest.mark.asyncio -async def test_max_returned_rows(ds_client): - response = await ds_client.get( - "/fixtures/-/query.json?sql=select+content+from+no_primary_key" - ) - data = response.json() - assert data["truncated"] - assert 100 == len(data["rows"]) - - -@pytest.mark.asyncio -async def test_view(ds_client): - response = await ds_client.get("/fixtures/simple_view.json?_shape=objects") - assert response.status_code == 200 - data = response.json() - assert data["rows"] == [ - {"upper_content": "HELLO", "content": "hello"}, - {"upper_content": "WORLD", "content": "world"}, - {"upper_content": "", "content": ""}, - {"upper_content": "RENDER_CELL_DEMO", "content": "RENDER_CELL_DEMO"}, - {"upper_content": "RENDER_CELL_ASYNC", "content": "RENDER_CELL_ASYNC"}, - ] - - -def test_page_size_matching_max_returned_rows( - app_client_returned_rows_matches_page_size, -): - fetched = [] - path = "/fixtures/no_primary_key.json?_extra=next_url" - while path: - response = app_client_returned_rows_matches_page_size.get(path) - fetched.extend(response.json["rows"]) - assert len(response.json["rows"]) in (1, 50) - path = response.json["next_url"] - if path: - path = path.replace("http://localhost", "") - assert len(fetched) == 201 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_facet_results", - [ - ( - "/fixtures/facetable.json?_facet=state&_facet=_city_id", - { - "state": { - "name": "state", - "hideable": True, - "type": "column", - "toggle_url": "/fixtures/facetable.json?_facet=_city_id", - "results": [ - { - "value": "CA", - "label": "CA", - "count": 10, - "toggle_url": "_facet=state&_facet=_city_id&state=CA", - "selected": False, - }, - { - "value": "MI", - "label": "MI", - "count": 4, - "toggle_url": "_facet=state&_facet=_city_id&state=MI", - "selected": False, - }, - { - "value": "MC", - "label": "MC", - "count": 1, - "toggle_url": "_facet=state&_facet=_city_id&state=MC", - "selected": False, - }, - ], - "truncated": False, - }, - "_city_id": { - "name": "_city_id", - "hideable": True, - "type": "column", - "toggle_url": "/fixtures/facetable.json?_facet=state", - "results": [ - { - "value": 1, - "label": "San Francisco", - "count": 6, - "toggle_url": "_facet=state&_facet=_city_id&_city_id__exact=1", - "selected": False, - }, - { - "value": 2, - "label": "Los Angeles", - "count": 4, - "toggle_url": "_facet=state&_facet=_city_id&_city_id__exact=2", - "selected": False, - }, - { - "value": 3, - "label": "Detroit", - "count": 4, - "toggle_url": "_facet=state&_facet=_city_id&_city_id__exact=3", - "selected": False, - }, - { - "value": 4, - "label": "Memnonia", - "count": 1, - "toggle_url": "_facet=state&_facet=_city_id&_city_id__exact=4", - "selected": False, - }, - ], - "truncated": False, - }, - }, - ), - ( - "/fixtures/facetable.json?_facet=state&_facet=_city_id&state=MI", - { - "state": { - "name": "state", - "hideable": True, - "type": "column", - "toggle_url": "/fixtures/facetable.json?_facet=_city_id&state=MI", - "results": [ - { - "value": "MI", - "label": "MI", - "count": 4, - "selected": True, - "toggle_url": "_facet=state&_facet=_city_id", - } - ], - "truncated": False, - }, - "_city_id": { - "name": "_city_id", - "hideable": True, - "type": "column", - "toggle_url": "/fixtures/facetable.json?_facet=state&state=MI", - "results": [ - { - "value": 3, - "label": "Detroit", - "count": 4, - "selected": False, - "toggle_url": "_facet=state&_facet=_city_id&state=MI&_city_id__exact=3", - } - ], - "truncated": False, - }, - }, - ), - ( - "/fixtures/facetable.json?_facet=planet_int", - { - "planet_int": { - "name": "planet_int", - "hideable": True, - "type": "column", - "toggle_url": "/fixtures/facetable.json", - "results": [ - { - "value": 1, - "label": 1, - "count": 14, - "selected": False, - "toggle_url": "_facet=planet_int&planet_int=1", - }, - { - "value": 2, - "label": 2, - "count": 1, - "selected": False, - "toggle_url": "_facet=planet_int&planet_int=2", - }, - ], - "truncated": False, - } - }, - ), - ( - # planet_int is an integer field: - "/fixtures/facetable.json?_facet=planet_int&planet_int=1", - { - "planet_int": { - "name": "planet_int", - "hideable": True, - "type": "column", - "toggle_url": "/fixtures/facetable.json?planet_int=1", - "results": [ - { - "value": 1, - "label": 1, - "count": 14, - "selected": True, - "toggle_url": "_facet=planet_int", - } - ], - "truncated": False, - } - }, - ), - ], -) -async def test_facets(ds_client, path, expected_facet_results): - response = await ds_client.get(path) - facet_results = response.json()["facet_results"] - # We only compare the querystring portion of the taggle_url - for facet_name, facet_info in facet_results["results"].items(): - assert facet_name == facet_info["name"] - assert False is facet_info["truncated"] - for facet_value in facet_info["results"]: - facet_value["toggle_url"] = facet_value["toggle_url"].split("?")[1] - assert expected_facet_results == facet_results["results"] - - -@pytest.mark.asyncio -@pytest.mark.skipif(not detect_json1(), reason="requires JSON1 extension") -async def test_facets_array(ds_client): - response = await ds_client.get("/fixtures/facetable.json?_facet_array=tags") - facet_results = response.json()["facet_results"] - assert facet_results["results"]["tags"]["results"] == [ - { - "value": "tag1", - "label": "tag1", - "count": 2, - "toggle_url": "http://localhost/fixtures/facetable.json?_facet_array=tags&tags__arraycontains=tag1", - "selected": False, - }, - { - "value": "tag2", - "label": "tag2", - "count": 1, - "toggle_url": "http://localhost/fixtures/facetable.json?_facet_array=tags&tags__arraycontains=tag2", - "selected": False, - }, - { - "value": "tag3", - "label": "tag3", - "count": 1, - "toggle_url": "http://localhost/fixtures/facetable.json?_facet_array=tags&tags__arraycontains=tag3", - "selected": False, - }, - ] - - -@pytest.mark.asyncio -async def test_suggested_facets(ds_client): - suggestions = [ - { - "name": suggestion["name"], - "querystring": suggestion["toggle_url"].split("?")[-1], - } - for suggestion in ( - await ds_client.get("/fixtures/facetable.json?_extra=suggested_facets") - ).json()["suggested_facets"] - ] - expected = [ - {"name": "created", "querystring": "_extra=suggested_facets&_facet=created"}, - { - "name": "planet_int", - "querystring": "_extra=suggested_facets&_facet=planet_int", - }, - {"name": "on_earth", "querystring": "_extra=suggested_facets&_facet=on_earth"}, - {"name": "state", "querystring": "_extra=suggested_facets&_facet=state"}, - {"name": "_city_id", "querystring": "_extra=suggested_facets&_facet=_city_id"}, - { - "name": "_neighborhood", - "querystring": "_extra=suggested_facets&_facet=_neighborhood", - }, - {"name": "tags", "querystring": "_extra=suggested_facets&_facet=tags"}, - { - "name": "complex_array", - "querystring": "_extra=suggested_facets&_facet=complex_array", - }, - { - "name": "created", - "querystring": "_extra=suggested_facets&_facet_date=created", - }, - ] - if detect_json1(): - expected.append( - {"name": "tags", "querystring": "_extra=suggested_facets&_facet_array=tags"} - ) - assert expected == suggestions - - -def test_allow_facet_off(): - with make_app_client(settings={"allow_facet": False}) as client: - assert ( - client.get( - "/fixtures/facetable.json?_facet=planet_int&_extra=suggested_facets" - ).status - == 400 - ) - data = client.get("/fixtures/facetable.json?_extra=suggested_facets").json - # Should not suggest any facets either: - assert [] == data["suggested_facets"] - - -def test_suggest_facets_off(): - with make_app_client(settings={"suggest_facets": False}) as client: - # Now suggested_facets should be [] - assert ( - [] - == client.get("/fixtures/facetable.json?_extra=suggested_facets").json[ - "suggested_facets" - ] - ) - - -@pytest.mark.asyncio -@pytest.mark.parametrize("nofacet", (True, False)) -async def test_nofacet(ds_client, nofacet): - path = "/fixtures/facetable.json?_facet=state&_extra=suggested_facets" - if nofacet: - path += "&_nofacet=1" - response = await ds_client.get(path) - if nofacet: - assert response.json()["suggested_facets"] == [] - assert response.json()["facet_results"]["results"] == {} - else: - assert response.json()["suggested_facets"] != [] - assert response.json()["facet_results"]["results"] != {} - - -@pytest.mark.asyncio -@pytest.mark.parametrize("nosuggest", (True, False)) -async def test_nosuggest(ds_client, nosuggest): - path = "/fixtures/facetable.json?_facet=state&_extra=suggested_facets" - if nosuggest: - path += "&_nosuggest=1" - response = await ds_client.get(path) - if nosuggest: - assert response.json()["suggested_facets"] == [] - # But facets should still be returned: - assert response.json()["facet_results"] != {} - else: - assert response.json()["suggested_facets"] != [] - assert response.json()["facet_results"] != {} - - -@pytest.mark.asyncio -@pytest.mark.parametrize("nocount,expected_count", ((True, None), (False, 15))) -async def test_nocount(ds_client, nocount, expected_count): - path = "/fixtures/facetable.json?_extra=count" - if nocount: - path += "&_nocount=1" - response = await ds_client.get(path) - assert response.json()["count"] == expected_count - - -def test_nocount_nofacet_if_shape_is_object(app_client_with_trace): - response = app_client_with_trace.get( - "/fixtures/facetable.json?_trace=1&_shape=object" - ) - assert "count(*)" not in response.text - - -@pytest.mark.asyncio -async def test_expand_labels(ds_client): - response = await ds_client.get( - "/fixtures/facetable.json?_shape=object&_labels=1&_size=2" - "&_neighborhood__contains=c" - ) - assert response.json() == { - "2": { - "pk": 2, - "created": "2019-01-14 08:00:00", - "planet_int": 1, - "on_earth": 1, - "state": "CA", - "_city_id": {"value": 1, "label": "San Francisco"}, - "_neighborhood": "Dogpatch", - "tags": '["tag1", "tag3"]', - "complex_array": "[]", - "distinct_some_null": "two", - "n": "n2", - }, - "13": { - "pk": 13, - "created": "2019-01-17 08:00:00", - "planet_int": 1, - "on_earth": 1, - "state": "MI", - "_city_id": {"value": 3, "label": "Detroit"}, - "_neighborhood": "Corktown", - "tags": "[]", - "complex_array": "[]", - "distinct_some_null": None, - "n": None, - }, - } - - -@pytest.mark.asyncio -async def test_expand_label(ds_client): - response = await ds_client.get( - "/fixtures/foreign_key_references.json?_shape=object" - "&_label=foreign_key_with_label&_size=1" - ) - assert response.json() == { - "1": { - "pk": "1", - "foreign_key_with_label": {"value": 1, "label": "hello"}, - "foreign_key_with_blank_label": 3, - "foreign_key_with_no_label": "1", - "foreign_key_compound_pk1": "a", - "foreign_key_compound_pk2": "b", - } - } - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_cache_control", - [ - ("/fixtures/facetable.json", "max-age=5"), - ("/fixtures/facetable.json?_ttl=invalid", "max-age=5"), - ("/fixtures/facetable.json?_ttl=10", "max-age=10"), - ("/fixtures/facetable.json?_ttl=0", "no-cache"), - ], -) -async def test_ttl_parameter(ds_client, path, expected_cache_control): - response = await ds_client.get(path) - assert response.headers["Cache-Control"] == expected_cache_control - - -@pytest.mark.asyncio -async def test_infinity_returned_as_null(ds_client): - response = await ds_client.get("/fixtures/infinity.json?_shape=array") - assert response.json() == [ - {"rowid": 1, "value": None}, - {"rowid": 2, "value": None}, - {"rowid": 3, "value": 1.5}, - ] - - -@pytest.mark.asyncio -async def test_infinity_returned_as_invalid_json_if_requested(ds_client): - response = await ds_client.get( - "/fixtures/infinity.json?_shape=array&_json_infinity=1" - ) - assert response.json() == [ - {"rowid": 1, "value": float("inf")}, - {"rowid": 2, "value": float("-inf")}, - {"rowid": 3, "value": 1.5}, - ] - - -@pytest.mark.asyncio -async def test_custom_query_with_unicode_characters(ds_client): - # /fixtures/𝐜𝐢𝐭𝐢𝐞𝐬.json - response = await ds_client.get( - "/fixtures/~F0~9D~90~9C~F0~9D~90~A2~F0~9D~90~AD~F0~9D~90~A2~F0~9D~90~9E~F0~9D~90~AC.json?_shape=array" - ) - assert response.json() == [{"id": 1, "name": "San Francisco"}] - - -@pytest.mark.asyncio -async def test_null_and_compound_foreign_keys_are_not_expanded(ds_client): - response = await ds_client.get( - "/fixtures/foreign_key_references.json?_shape=array&_labels=on" - ) - assert response.json() == [ - { - "pk": "1", - "foreign_key_with_label": {"value": 1, "label": "hello"}, - "foreign_key_with_blank_label": {"value": 3, "label": ""}, - "foreign_key_with_no_label": {"value": "1", "label": "1"}, - "foreign_key_compound_pk1": "a", - "foreign_key_compound_pk2": "b", - }, - { - "pk": "2", - "foreign_key_with_label": None, - "foreign_key_with_blank_label": None, - "foreign_key_with_no_label": None, - "foreign_key_compound_pk1": None, - "foreign_key_compound_pk2": None, - }, - ] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_json,expected_text", - [ - ( - "/fixtures/binary_data.json?_shape=array", - [ - {"rowid": 1, "data": {"$base64": True, "encoded": "FRwCx60F/g=="}}, - {"rowid": 2, "data": {"$base64": True, "encoded": "FRwDx60F/g=="}}, - {"rowid": 3, "data": None}, - ], - None, - ), - ( - "/fixtures/binary_data.json?_shape=array&_nl=on", - None, - ( - '{"rowid": 1, "data": {"$base64": true, "encoded": "FRwCx60F/g=="}}\n' - '{"rowid": 2, "data": {"$base64": true, "encoded": "FRwDx60F/g=="}}\n' - '{"rowid": 3, "data": null}' - ), - ), - ], -) -async def test_binary_data_in_json(ds_client, path, expected_json, expected_text): - response = await ds_client.get(path) - if expected_json: - assert response.json() == expected_json - else: - assert response.text == expected_text - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "qs", - [ - "", - "?_shape=arrays", - "?_shape=arrayfirst", - "?_shape=object", - "?_shape=objects", - "?_shape=array", - "?_shape=array&_nl=on", - ], -) -async def test_paginate_using_link_header(ds_client, qs): - path = f"/fixtures/compound_three_primary_keys.json{qs}" - num_pages = 0 - while path: - response = await ds_client.get(path) - assert response.status_code == 200 - num_pages += 1 - link = response.headers.get("link") - if link: - assert link.startswith("<") - assert link.endswith('>; rel="next"') - path = link[1:].split(">")[0] - path = path.replace("http://localhost", "") - else: - path = None - assert num_pages == 21 - - -@pytest.mark.skipif( - sqlite_version() < (3, 31, 0), - reason="generated columns were added in SQLite 3.31.0", -) -def test_generated_columns_are_visible_in_datasette(): - with make_app_client( - extra_databases={ - "generated.db": """ - CREATE TABLE generated_columns ( - body TEXT, - id INT GENERATED ALWAYS AS (json_extract(body, '$.number')) STORED, - consideration INT GENERATED ALWAYS AS (json_extract(body, '$.string')) STORED - ); - INSERT INTO generated_columns (body) VALUES ( - '{"number": 1, "string": "This is a string"}' - );""" - } - ) as client: - response = client.get("/generated/generated_columns.json?_shape=array") - assert response.json == [ - { - "rowid": 1, - "body": '{"number": 1, "string": "This is a string"}', - "id": 1, - "consideration": "This is a string", - } - ] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_columns", - ( - ("/fixtures/facetable.json?_col=created", ["pk", "created"]), - ( - "/fixtures/facetable.json?_nocol=created", - [ - "pk", - "planet_int", - "on_earth", - "state", - "_city_id", - "_neighborhood", - "tags", - "complex_array", - "distinct_some_null", - "n", - ], - ), - ( - "/fixtures/facetable.json?_col=state&_col=created", - ["pk", "state", "created"], - ), - ( - "/fixtures/facetable.json?_col=state&_col=state", - ["pk", "state"], - ), - ( - "/fixtures/facetable.json?_col=state&_col=created&_nocol=created", - ["pk", "state"], - ), - ( - # Ensure faceting doesn't break, https://github.com/simonw/datasette/issues/1345 - "/fixtures/facetable.json?_nocol=state&_facet=state", - [ - "pk", - "created", - "planet_int", - "on_earth", - "_city_id", - "_neighborhood", - "tags", - "complex_array", - "distinct_some_null", - "n", - ], - ), - ( - "/fixtures/simple_view.json?_nocol=content", - ["upper_content"], - ), - ("/fixtures/simple_view.json?_col=content", ["content"]), - ), -) -async def test_col_nocol(ds_client, path, expected_columns): - response = await ds_client.get(path + "&_extra=columns") - assert response.status_code == 200 - columns = response.json()["columns"] - assert columns == expected_columns - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_error", - ( - ("/fixtures/facetable.json?_col=bad", "_col=bad - invalid columns"), - ("/fixtures/facetable.json?_nocol=bad", "_nocol=bad - invalid columns"), - ("/fixtures/facetable.json?_nocol=pk", "_nocol=pk - invalid columns"), - ("/fixtures/simple_view.json?_col=bad", "_col=bad - invalid columns"), - ), -) -async def test_col_nocol_errors(ds_client, path, expected_error): - response = await ds_client.get(path) - assert response.status_code == 400 - assert response.json()["error"] == expected_error - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "extra,expected_json", - ( - ( - "columns", - { - "ok": True, - "next": None, - "columns": ["id", "content", "content2"], - "rows": [{"id": "1", "content": "hey", "content2": "world"}], - "truncated": False, - }, - ), - ( - "count", - { - "ok": True, - "next": None, - "rows": [{"id": "1", "content": "hey", "content2": "world"}], - "truncated": False, - "count": 1, - }, - ), - ), -) -async def test_table_extras(ds_client, extra, expected_json): - response = await ds_client.get( - "/fixtures/primary_key_multiple_columns.json?_extra=" + extra - ) - assert response.status_code == 200 - assert response.json() == expected_json diff --git a/tests/test_table_html.py b/tests/test_table_html.py deleted file mode 100644 index e3ddb4b0..00000000 --- a/tests/test_table_html.py +++ /dev/null @@ -1,1300 +0,0 @@ -from datasette.app import Datasette -from bs4 import BeautifulSoup as Soup -from .fixtures import ( # noqa - app_client, - make_app_client, - app_client_with_dot, -) -import pathlib -import pytest -import urllib.parse -from .utils import inner_html - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_definition_sql", - [ - ( - "/fixtures/facet_cities", - """ -CREATE TABLE facet_cities ( - id integer primary key, - name text -); - """.strip(), - ), - ( - "/fixtures/compound_three_primary_keys", - """ -CREATE TABLE compound_three_primary_keys ( - pk1 varchar(30), - pk2 varchar(30), - pk3 varchar(30), - content text, - PRIMARY KEY (pk1, pk2, pk3) -); -CREATE INDEX idx_compound_three_primary_keys_content ON compound_three_primary_keys(content); - """.strip(), - ), - ], -) -async def test_table_definition_sql(path, expected_definition_sql, ds_client): - response = await ds_client.get(path) - pre = Soup(response.text, "html.parser").select_one("pre.wrapped-sql") - assert expected_definition_sql == pre.string - - -def test_table_cell_truncation(): - with make_app_client(settings={"truncate_cells_html": 5}) as client: - response = client.get("/fixtures/facetable") - assert response.status == 200 - table = Soup(response.body, "html.parser").find("table") - assert table["class"] == ["rows-and-columns"] - assert [ - "Missi…", - "Dogpa…", - "SOMA", - "Tende…", - "Berna…", - "Hayes…", - "Holly…", - "Downt…", - "Los F…", - "Korea…", - "Downt…", - "Greek…", - "Corkt…", - "Mexic…", - "Arcad…", - ] == [ - td.string - for td in table.find_all("td", {"class": "col-neighborhood-b352a7"}) - ] - # URLs should be truncated too - response2 = client.get("/fixtures/roadside_attractions") - assert response2.status == 200 - table = Soup(response2.body, "html.parser").find("table") - tds = table.find_all("td", {"class": "col-url"}) - assert [str(td) for td in tds] == [ - '
    ', - '', - '', - '', - ] - - -@pytest.mark.asyncio -async def test_add_filter_redirects(ds_client): - filter_args = urllib.parse.urlencode( - {"_filter_column": "content", "_filter_op": "startswith", "_filter_value": "x"} - ) - path_base = "/fixtures/simple_primary_key" - path = path_base + "?" + filter_args - response = await ds_client.get(path) - assert response.status_code == 302 - assert response.headers["Location"].endswith("?content__startswith=x") - - # Adding a redirect to an existing query string: - path = path_base + "?foo=bar&" + filter_args - response = await ds_client.get(path) - assert response.status_code == 302 - assert response.headers["Location"].endswith("?foo=bar&content__startswith=x") - - # Test that op with a __x suffix overrides the filter value - path = ( - path_base - + "?" - + urllib.parse.urlencode( - { - "_filter_column": "content", - "_filter_op": "isnull__5", - "_filter_value": "x", - } - ) - ) - response = await ds_client.get(path) - assert response.status_code == 302 - assert response.headers["Location"].endswith("?content__isnull=5") - - -@pytest.mark.asyncio -async def test_existing_filter_redirects(ds_client): - filter_args = { - "_filter_column_1": "name", - "_filter_op_1": "contains", - "_filter_value_1": "hello", - "_filter_column_2": "age", - "_filter_op_2": "gte", - "_filter_value_2": "22", - "_filter_column_3": "age", - "_filter_op_3": "lt", - "_filter_value_3": "30", - "_filter_column_4": "name", - "_filter_op_4": "contains", - "_filter_value_4": "world", - } - path_base = "/fixtures/simple_primary_key" - path = path_base + "?" + urllib.parse.urlencode(filter_args) - response = await ds_client.get(path) - assert response.status_code == 302 - assert_querystring_equal( - "name__contains=hello&age__gte=22&age__lt=30&name__contains=world", - response.headers["Location"].split("?")[1], - ) - - # Setting _filter_column_3 to empty string should remove *_3 entirely - filter_args["_filter_column_3"] = "" - path = path_base + "?" + urllib.parse.urlencode(filter_args) - response = await ds_client.get(path) - assert response.status_code == 302 - assert_querystring_equal( - "name__contains=hello&age__gte=22&name__contains=world", - response.headers["Location"].split("?")[1], - ) - - # ?_filter_op=exact should be removed if unaccompanied by _fiter_column - response = await ds_client.get(path_base + "?_filter_op=exact") - assert response.status_code == 302 - assert "?" not in response.headers["Location"] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "qs,expected_hidden", - ( - # Things that should be reflected in hidden form fields: - ("_facet=_neighborhood", {"_facet": "_neighborhood"}), - ("_where=1+=+1&_col=_city_id", {"_where": "1 = 1", "_col": "_city_id"}), - # Things that should NOT be reflected in hidden form fields: - ( - "_facet=_neighborhood&_neighborhood__exact=Downtown", - {"_facet": "_neighborhood"}, - ), - ("_facet=_neighborhood&_city_id__gt=1", {"_facet": "_neighborhood"}), - ), -) -async def test_reflected_hidden_form_fields(ds_client, qs, expected_hidden): - # https://github.com/simonw/datasette/issues/1527 - response = await ds_client.get("/fixtures/facetable?{}".format(qs)) - # In this case we should NOT have a hidden _neighborhood__exact=Downtown field - form = Soup(response.text, "html.parser").find("form") - hidden_inputs = { - input["name"]: input["value"] for input in form.select("input[type=hidden]") - } - assert hidden_inputs == expected_hidden - - -@pytest.mark.asyncio -async def test_empty_search_parameter_gets_removed(ds_client): - path_base = "/fixtures/simple_primary_key" - path = ( - path_base - + "?" - + urllib.parse.urlencode( - { - "_search": "", - "_filter_column": "name", - "_filter_op": "exact", - "_filter_value": "chidi", - } - ) - ) - response = await ds_client.get(path) - assert response.status_code == 302 - assert response.headers["Location"].endswith("?name__exact=chidi") - - -@pytest.mark.asyncio -async def test_searchable_view_persists_fts_table(ds_client): - # The search form should persist ?_fts_table as a hidden field - response = await ds_client.get( - "/fixtures/searchable_view?_fts_table=searchable_fts&_fts_pk=pk" - ) - inputs = Soup(response.text, "html.parser").find("form").find_all("input") - hiddens = [i for i in inputs if i["type"] == "hidden"] - assert [("_fts_table", "searchable_fts"), ("_fts_pk", "pk")] == [ - (hidden["name"], hidden["value"]) for hidden in hiddens - ] - - -@pytest.mark.asyncio -async def test_sort_by_desc_redirects(ds_client): - path_base = "/fixtures/sortable" - path = ( - path_base - + "?" - + urllib.parse.urlencode({"_sort": "sortable", "_sort_by_desc": "1"}) - ) - response = await ds_client.get(path) - assert response.status_code == 302 - assert response.headers["Location"].endswith("?_sort_desc=sortable") - - -@pytest.mark.asyncio -async def test_sort_links(ds_client): - response = await ds_client.get("/fixtures/sortable?_sort=sortable") - assert response.status_code == 200 - ths = Soup(response.text, "html.parser").find_all("th") - attrs_and_link_attrs = [ - { - "attrs": th.attrs, - "a_href": (th.find("a")["href"] if th.find("a") else None), - } - for th in ths - ] - assert attrs_and_link_attrs == [ - { - "attrs": { - "class": ["col-Link"], - "scope": "col", - "data-column": "Link", - "data-column-type": "", - "data-column-not-null": "0", - "data-is-pk": "0", - }, - "a_href": None, - }, - { - "attrs": { - "class": ["col-pk1"], - "scope": "col", - "data-column": "pk1", - "data-column-type": "varchar(30)", - "data-column-not-null": "0", - "data-is-pk": "1", - }, - "a_href": None, - }, - { - "attrs": { - "class": ["col-pk2"], - "scope": "col", - "data-column": "pk2", - "data-column-type": "varchar(30)", - "data-column-not-null": "0", - "data-is-pk": "1", - }, - "a_href": None, - }, - { - "attrs": { - "class": ["col-content"], - "scope": "col", - "data-column": "content", - "data-column-type": "text", - "data-column-not-null": "0", - "data-is-pk": "0", - }, - "a_href": None, - }, - { - "attrs": { - "class": ["col-sortable"], - "scope": "col", - "data-column": "sortable", - "data-column-type": "integer", - "data-column-not-null": "0", - "data-is-pk": "0", - }, - "a_href": "/fixtures/sortable?_sort_desc=sortable", - }, - { - "attrs": { - "class": ["col-sortable_with_nulls"], - "scope": "col", - "data-column": "sortable_with_nulls", - "data-column-type": "real", - "data-column-not-null": "0", - "data-is-pk": "0", - }, - "a_href": "/fixtures/sortable?_sort=sortable_with_nulls", - }, - { - "attrs": { - "class": ["col-sortable_with_nulls_2"], - "scope": "col", - "data-column": "sortable_with_nulls_2", - "data-column-type": "real", - "data-column-not-null": "0", - "data-is-pk": "0", - }, - "a_href": "/fixtures/sortable?_sort=sortable_with_nulls_2", - }, - { - "attrs": { - "class": ["col-text"], - "scope": "col", - "data-column": "text", - "data-column-type": "text", - "data-column-not-null": "0", - "data-is-pk": "0", - }, - "a_href": "/fixtures/sortable?_sort=text", - }, - ] - - -@pytest.mark.asyncio -async def test_facet_display(ds_client): - response = await ds_client.get( - "/fixtures/facetable?_facet=planet_int&_facet=_city_id&_facet=on_earth" - ) - assert response.status_code == 200 - soup = Soup(response.text, "html.parser") - divs = soup.find("div", {"class": "facet-results"}).find_all("div") - actual = [] - for div in divs: - actual.append( - { - "name": div.find("strong").text.split()[0], - "items": [ - { - "name": a.text, - "qs": a["href"].split("?")[-1], - "count": int(str(a.parent).split("")[1].split("<")[0]), - } - for a in div.find("ul").find_all("a") - ], - } - ) - assert actual == [ - { - "name": "_city_id", - "items": [ - { - "name": "San Francisco", - "qs": "_facet=planet_int&_facet=_city_id&_facet=on_earth&_city_id__exact=1", - "count": 6, - }, - { - "name": "Los Angeles", - "qs": "_facet=planet_int&_facet=_city_id&_facet=on_earth&_city_id__exact=2", - "count": 4, - }, - { - "name": "Detroit", - "qs": "_facet=planet_int&_facet=_city_id&_facet=on_earth&_city_id__exact=3", - "count": 4, - }, - { - "name": "Memnonia", - "qs": "_facet=planet_int&_facet=_city_id&_facet=on_earth&_city_id__exact=4", - "count": 1, - }, - ], - }, - { - "name": "planet_int", - "items": [ - { - "name": "1", - "qs": "_facet=planet_int&_facet=_city_id&_facet=on_earth&planet_int=1", - "count": 14, - }, - { - "name": "2", - "qs": "_facet=planet_int&_facet=_city_id&_facet=on_earth&planet_int=2", - "count": 1, - }, - ], - }, - { - "name": "on_earth", - "items": [ - { - "name": "1", - "qs": "_facet=planet_int&_facet=_city_id&_facet=on_earth&on_earth=1", - "count": 14, - }, - { - "name": "0", - "qs": "_facet=planet_int&_facet=_city_id&_facet=on_earth&on_earth=0", - "count": 1, - }, - ], - }, - ] - - -@pytest.mark.asyncio -async def test_facets_persist_through_filter_form(ds_client): - response = await ds_client.get( - "/fixtures/facetable?_facet=planet_int&_facet=_city_id&_facet_array=tags" - ) - assert response.status_code == 200 - inputs = Soup(response.text, "html.parser").find("form").find_all("input") - hiddens = [i for i in inputs if i["type"] == "hidden"] - assert [(hidden["name"], hidden["value"]) for hidden in hiddens] == [ - ("_facet", "planet_int"), - ("_facet", "_city_id"), - ("_facet_array", "tags"), - ] - - -@pytest.mark.asyncio -async def test_next_does_not_persist_in_hidden_field(ds_client): - response = await ds_client.get("/fixtures/searchable?_size=1&_next=1") - assert response.status_code == 200 - inputs = Soup(response.text, "html.parser").find("form").find_all("input") - hiddens = [i for i in inputs if i["type"] == "hidden"] - assert [(hidden["name"], hidden["value"]) for hidden in hiddens] == [ - ("_size", "1"), - ] - - -@pytest.mark.asyncio -async def test_table_html_simple_primary_key(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key?_size=3") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - assert table["class"] == ["rows-and-columns"] - ths = table.find_all("th") - assert "id\xa0▼" == ths[0].find("a").string.strip() - for expected_col, th in zip(("content",), ths[1:]): - a = th.find("a") - assert expected_col == a.string - assert a["href"].endswith(f"/simple_primary_key?_size=3&_sort={expected_col}") - assert ["nofollow"] == a["rel"] - assert [ - [ - '', - '', - ], - [ - '', - '', - ], - [ - '', - '', - ], - ] == [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] - - -@pytest.mark.asyncio -async def test_table_csv_json_export_interface(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key?id__gt=2") - assert response.status_code == 200 - # The links at the top of the page - links = ( - Soup(response.text, "html.parser") - .find("p", {"class": "export-links"}) - .find_all("a") - ) - actual = [link["href"] for link in links] - expected = [ - "/fixtures/simple_primary_key.json?id__gt=2", - "/fixtures/simple_primary_key.testall?id__gt=2", - "/fixtures/simple_primary_key.testnone?id__gt=2", - "/fixtures/simple_primary_key.testresponse?id__gt=2", - "/fixtures/simple_primary_key.csv?id__gt=2&_size=max", - "#export", - ] - assert expected == actual - # And the advanced export box at the bottom: - div = Soup(response.text, "html.parser").find("div", {"class": "advanced-export"}) - json_links = [a["href"] for a in div.find("p").find_all("a")] - assert [ - "/fixtures/simple_primary_key.json?id__gt=2", - "/fixtures/simple_primary_key.json?id__gt=2&_shape=array", - "/fixtures/simple_primary_key.json?id__gt=2&_shape=array&_nl=on", - "/fixtures/simple_primary_key.json?id__gt=2&_shape=object", - ] == json_links - # And the CSV form - form = div.find("form") - assert form["action"].endswith("/simple_primary_key.csv") - inputs = [str(input) for input in form.find_all("input")] - assert [ - '', - '', - '', - '', - ] == inputs - - -@pytest.mark.asyncio -async def test_csv_json_export_links_include_labels_if_foreign_keys(ds_client): - response = await ds_client.get("/fixtures/facetable") - assert response.status_code == 200 - links = ( - Soup(response.text, "html.parser") - .find("p", {"class": "export-links"}) - .find_all("a") - ) - actual = [link["href"] for link in links] - expected = [ - "/fixtures/facetable.json?_labels=on", - "/fixtures/facetable.testall?_labels=on", - "/fixtures/facetable.testnone?_labels=on", - "/fixtures/facetable.testresponse?_labels=on", - "/fixtures/facetable.csv?_labels=on&_size=max", - "#export", - ] - assert expected == actual - - -@pytest.mark.asyncio -async def test_table_not_exists(ds_client): - assert "Table not found" in (await ds_client.get("/fixtures/blah")).text - - -@pytest.mark.asyncio -async def test_table_html_no_primary_key(ds_client): - response = await ds_client.get("/fixtures/no_primary_key") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - # We have disabled sorting for this table using metadata.json - assert ["content", "a", "b", "c"] == [ - th.string.strip() for th in table.select("thead th")[2:] - ] - expected = [ - [ - ''.format( - i, i - ), - f'', - f'', - f'', - f'', - f'', - ] - for i in range(1, 51) - ] - assert expected == [ - [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") - ] - - -@pytest.mark.asyncio -async def test_rowid_sortable_no_primary_key(ds_client): - response = await ds_client.get("/fixtures/no_primary_key") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - assert table["class"] == ["rows-and-columns"] - ths = table.find_all("th") - assert "rowid\xa0▼" == ths[1].find("a").string.strip() - - -@pytest.mark.asyncio -async def test_table_html_compound_primary_key(ds_client): - response = await ds_client.get("/fixtures/compound_primary_key") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - ths = table.find_all("th") - assert "Link" == ths[0].string.strip() - for expected_col, th in zip(("pk1", "pk2", "content"), ths[1:]): - a = th.find("a") - assert expected_col == a.string - assert th["class"] == [f"col-{expected_col}"] - assert a["href"].endswith(f"/compound_primary_key?_sort={expected_col}") - expected = [ - [ - '', - '', - '', - '', - ], - [ - '', - '', - '', - '', - ], - ] - assert [ - [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") - ] == expected - - -@pytest.mark.asyncio -async def test_table_html_foreign_key_links(ds_client): - response = await ds_client.get("/fixtures/foreign_key_references") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - actual = [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] - assert actual == [ - [ - '', - '', - '', - '', - '', - '', - ], - [ - '', - '', - '', - '', - '', - '', - ], - ] - - -@pytest.mark.asyncio -async def test_table_html_foreign_key_facets(ds_client): - response = await ds_client.get( - "/fixtures/foreign_key_references?_facet=foreign_key_with_blank_label" - ) - assert response.status_code == 200 - assert ( - '
  • - 1
  • ' - ) in response.text - - -@pytest.mark.asyncio -async def test_table_html_disable_foreign_key_links_with_labels(ds_client): - response = await ds_client.get( - "/fixtures/foreign_key_references?_labels=off&_size=1" - ) - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - actual = [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] - assert actual == [ - [ - '
    ', - '', - '', - '', - '', - '', - ] - ] - - -@pytest.mark.asyncio -async def test_table_html_foreign_key_custom_label_column(ds_client): - response = await ds_client.get("/fixtures/custom_foreign_key_label") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - expected = [ - [ - '', - '', - ] - ] - assert expected == [ - [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") - ] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_column_options", - [ - ("/fixtures/infinity", ["- column -", "rowid", "value"]), - ( - "/fixtures/primary_key_multiple_columns", - ["- column -", "id", "content", "content2"], - ), - ("/fixtures/compound_primary_key", ["- column -", "pk1", "pk2", "content"]), - ], -) -async def test_table_html_filter_form_column_options( - path, expected_column_options, ds_client -): - response = await ds_client.get(path) - assert response.status_code == 200 - form = Soup(response.text, "html.parser").find("form") - column_options = [ - o.attrs.get("value") or o.string - for o in form.select("select[name=_filter_column] option") - ] - assert expected_column_options == column_options - - -@pytest.mark.asyncio -async def test_table_html_filter_form_still_shows_nocol_columns(ds_client): - # https://github.com/simonw/datasette/issues/1503 - response = await ds_client.get("/fixtures/sortable?_nocol=sortable") - assert response.status_code == 200 - form = Soup(response.text, "html.parser").find("form") - assert [ - o.string - for o in form.select("select[name='_filter_column']")[0].select("option") - ] == [ - "- column -", - "pk1", - "pk2", - "content", - "sortable_with_nulls", - "sortable_with_nulls_2", - "text", - # Moved to the end because it is no longer returned by the query: - "sortable", - ] - - -@pytest.mark.asyncio -async def test_compound_primary_key_with_foreign_key_references(ds_client): - # e.g. a many-to-many table with a compound primary key on the two columns - response = await ds_client.get("/fixtures/searchable_tags") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - expected = [ - [ - '', - '', - '', - ], - [ - '', - '', - '', - ], - ] - assert expected == [ - [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") - ] - - -@pytest.mark.asyncio -async def test_view_html(ds_client): - response = await ds_client.get("/fixtures/simple_view?_size=3") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - ths = table.select("thead th") - assert 2 == len(ths) - assert ths[0].find("a") is not None - assert ths[0].find("a")["href"].endswith("/simple_view?_size=3&_sort=content") - assert ths[0].find("a").string.strip() == "content" - assert ths[1].find("a") is None - assert ths[1].string.strip() == "upper_content" - expected = [ - [ - '', - '', - ], - [ - '', - '', - ], - [ - '', - '', - ], - ] - assert expected == [ - [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") - ] - - -@pytest.mark.asyncio -async def test_table_metadata(ds_client): - response = await ds_client.get("/fixtures/simple_primary_key") - assert response.status_code == 200 - soup = Soup(response.text, "html.parser") - # Page title should be custom and should be HTML escaped - assert "This <em>HTML</em> is escaped" == inner_html(soup.find("h1")) - # Description should be custom and NOT escaped (we used description_html) - assert "Simple primary key" == inner_html( - soup.find("div", {"class": "metadata-description"}) - ) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,has_object,has_stream,has_expand", - [ - ("/fixtures/no_primary_key", False, True, False), - ("/fixtures/complex_foreign_keys", True, False, True), - ], -) -async def test_advanced_export_box(ds_client, path, has_object, has_stream, has_expand): - response = await ds_client.get(path) - assert response.status_code == 200 - soup = Soup(response.text, "html.parser") - # JSON shape options - expected_json_shapes = ["default", "array", "newline-delimited"] - if has_object: - expected_json_shapes.append("object") - div = soup.find("div", {"class": "advanced-export"}) - assert expected_json_shapes == [a.text for a in div.find("p").find_all("a")] - # "stream all rows" option - if has_stream: - assert "stream all rows" in str(div) - # "expand labels" option - if has_expand: - assert "expand labels" in str(div) - - -@pytest.mark.asyncio -async def test_extra_where_clauses(ds_client): - response = await ds_client.get( - "/fixtures/facetable?_where=_neighborhood='Dogpatch'&_where=_city_id=1" - ) - soup = Soup(response.text, "html.parser") - div = soup.select(".extra-wheres")[0] - assert "2 extra where clauses" == div.find("h3").text - hrefs = [a["href"] for a in div.find_all("a")] - assert [ - "/fixtures/facetable?_where=_city_id%3D1", - "/fixtures/facetable?_where=_neighborhood%3D%27Dogpatch%27", - ] == hrefs - # These should also be persisted as hidden fields - inputs = soup.find("form").find_all("input") - hiddens = [i for i in inputs if i["type"] == "hidden"] - assert [("_where", "_neighborhood='Dogpatch'"), ("_where", "_city_id=1")] == [ - (hidden["name"], hidden["value"]) for hidden in hiddens - ] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_hidden", - [ - ("/fixtures/facetable?_size=10", [("_size", "10")]), - ( - "/fixtures/facetable?_size=10&_ignore=1&_ignore=2", - [ - ("_size", "10"), - ("_ignore", "1"), - ("_ignore", "2"), - ], - ), - ], -) -async def test_other_hidden_form_fields(ds_client, path, expected_hidden): - response = await ds_client.get(path) - soup = Soup(response.text, "html.parser") - inputs = soup.find("form").find_all("input") - hiddens = [i for i in inputs if i["type"] == "hidden"] - assert [(hidden["name"], hidden["value"]) for hidden in hiddens] == expected_hidden - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected_hidden", - [ - ("/fixtures/searchable?_search=terry", []), - ("/fixtures/searchable?_sort=text2", []), - ("/fixtures/searchable?_sort_desc=text2", []), - ("/fixtures/searchable?_sort=text2&_where=1", [("_where", "1")]), - ], -) -async def test_search_and_sort_fields_not_duplicated(ds_client, path, expected_hidden): - # https://github.com/simonw/datasette/issues/1214 - response = await ds_client.get(path) - soup = Soup(response.text, "html.parser") - inputs = soup.find("form").find_all("input") - hiddens = [i for i in inputs if i["type"] == "hidden"] - assert [(hidden["name"], hidden["value"]) for hidden in hiddens] == expected_hidden - - -@pytest.mark.asyncio -async def test_binary_data_display_in_table(ds_client): - response = await ds_client.get("/fixtures/binary_data") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - expected_tds = [ - [ - '', - '', - '', - ], - [ - '', - '', - '', - ], - [ - '', - '', - '', - ], - ] - assert expected_tds == [ - [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") - ] - - -def test_custom_table_include(): - with make_app_client( - template_dir=str(pathlib.Path(__file__).parent / "test_templates") - ) as client: - response = client.get("/fixtures/complex_foreign_keys") - assert response.status == 200 - assert ( - '
    ' - '1 - 2 - hello 1' - "
    " - ) == str(Soup(response.text, "html.parser").select_one("div.custom-table-row")) - - -@pytest.mark.asyncio -@pytest.mark.parametrize("json", (True, False)) -@pytest.mark.parametrize( - "params,error", - ( - ("?_sort=bad", "Cannot sort table by bad"), - ("?_sort_desc=bad", "Cannot sort table by bad"), - ( - "?_sort=state&_sort_desc=state", - "Cannot use _sort and _sort_desc at the same time", - ), - ), -) -async def test_sort_errors(ds_client, json, params, error): - path = "/fixtures/facetable{}{}".format( - ".json" if json else "", - params, - ) - response = await ds_client.get(path) - assert response.status_code == 400 - if json: - assert response.json() == { - "ok": False, - "error": error, - "status": 400, - "title": None, - } - else: - assert error in response.text - - -@pytest.mark.asyncio -async def test_metadata_sort(ds_client): - response = await ds_client.get("/fixtures/facet_cities") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - assert table["class"] == ["rows-and-columns"] - ths = table.find_all("th") - assert ["id", "name\xa0▼"] == [th.find("a").string.strip() for th in ths] - rows = [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] - expected = [ - [ - '
    ', - '', - ], - [ - '', - '', - ], - [ - '', - '', - ], - [ - '', - '', - ], - ] - assert expected == rows - # Make sure you can reverse that sort order - response = await ds_client.get("/fixtures/facet_cities?_sort_desc=name") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - rows = [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] - assert list(reversed(expected)) == rows - - -@pytest.mark.asyncio -async def test_metadata_sort_desc(ds_client): - response = await ds_client.get("/fixtures/attraction_characteristic") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - assert table["class"] == ["rows-and-columns"] - ths = table.find_all("th") - assert ["pk\xa0▲", "name"] == [th.find("a").string.strip() for th in ths] - rows = [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] - expected = [ - [ - '', - '', - ], - [ - '', - '', - ], - ] - assert expected == rows - # Make sure you can reverse that sort order - response = await ds_client.get("/fixtures/attraction_characteristic?_sort=pk") - assert response.status_code == 200 - table = Soup(response.text, "html.parser").find("table") - rows = [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] - assert list(reversed(expected)) == rows - - -@pytest.mark.parametrize( - "max_returned_rows,path,expected_num_facets,expected_ellipses,expected_ellipses_url", - ( - ( - 5, - # Default should show 2 facets - "/fixtures/facetable?_facet=_neighborhood", - 2, - True, - "/fixtures/facetable?_facet=_neighborhood&_facet_size=max", - ), - # _facet_size above max_returned_rows should show max_returned_rows (5) - ( - 5, - "/fixtures/facetable?_facet=_neighborhood&_facet_size=50", - 5, - True, - "/fixtures/facetable?_facet=_neighborhood&_facet_size=max", - ), - # If max_returned_rows is high enough, should return all - ( - 20, - "/fixtures/facetable?_facet=_neighborhood&_facet_size=max", - 14, - False, - None, - ), - # If num facets > max_returned_rows, show ... without a link - # _facet_size above max_returned_rows should show max_returned_rows (5) - ( - 5, - "/fixtures/facetable?_facet=_neighborhood&_facet_size=max", - 5, - True, - None, - ), - ), -) -def test_facet_more_links( - max_returned_rows, - path, - expected_num_facets, - expected_ellipses, - expected_ellipses_url, -): - with make_app_client( - settings={"max_returned_rows": max_returned_rows, "default_facet_size": 2} - ) as client: - response = client.get(path) - soup = Soup(response.body, "html.parser") - lis = soup.select("#facet-neighborhood-b352a7 ul li:not(.facet-truncated)") - facet_truncated = soup.select_one(".facet-truncated") - assert len(lis) == expected_num_facets - if not expected_ellipses: - assert facet_truncated is None - else: - if expected_ellipses_url: - assert facet_truncated.find("a")["href"] == expected_ellipses_url - else: - assert facet_truncated.find("a") is None - - -def test_unavailable_table_does_not_break_sort_relationships(): - # https://github.com/simonw/datasette/issues/1305 - with make_app_client( - config={ - "databases": { - "fixtures": {"tables": {"foreign_key_references": {"allow": False}}} - } - } - ) as client: - response = client.get("/?_sort=relationships") - assert response.status == 200 - - -@pytest.mark.asyncio -async def test_column_metadata(ds_client): - response = await ds_client.get("/fixtures/roadside_attractions") - soup = Soup(response.text, "html.parser") - dl = soup.find("dl") - assert [(dt.text, dt.next_sibling.text) for dt in dl.find_all("dt")] == [ - ("address", "The street address for the attraction"), - ("name", "The name of the attraction"), - ] - assert ( - soup.select("th[data-column=name]")[0]["data-column-description"] - == "The name of the attraction" - ) - assert ( - soup.select("th[data-column=address]")[0]["data-column-description"] - == "The street address for the attraction" - ) - - -def test_facet_total(): - # https://github.com/simonw/datasette/issues/1423 - # https://github.com/simonw/datasette/issues/1556 - with make_app_client(settings={"max_returned_rows": 100}) as client: - path = "/fixtures/sortable?_facet=content&_facet=pk1" - response = client.get(path) - assert response.status == 200 - fragments = ( - '>30', - '8', - ) - for fragment in fragments: - assert fragment in response.text - - -@pytest.mark.asyncio -async def test_sort_rowid_with_next(ds_client): - # https://github.com/simonw/datasette/issues/1470 - response = await ds_client.get("/fixtures/binary_data?_size=1&_next=1&_sort=rowid") - assert response.status_code == 200 - - -def assert_querystring_equal(expected, actual): - assert sorted(expected.split("&")) == sorted(actual.split("&")) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "path,expected", - ( - ( - "/fixtures/facetable", - "fixtures: facetable: 15 rows", - ), - ( - "/fixtures/facetable?on_earth__exact=1", - "fixtures: facetable: 14 rows where on_earth = 1", - ), - ), -) -async def test_table_page_title(ds_client, path, expected): - response = await ds_client.get(path) - title = Soup(response.text, "html.parser").find("title").text - assert title == expected - - -@pytest.mark.asyncio -async def test_table_post_method_not_allowed(ds_client): - response = await ds_client.post("/fixtures/facetable") - assert response.status_code == 405 - assert "Method not allowed" in response.text - - -@pytest.mark.parametrize("allow_facet", (True, False)) -def test_allow_facet_off(allow_facet): - with make_app_client(settings={"allow_facet": allow_facet}) as client: - response = client.get("/fixtures/facetable") - expected = "DATASETTE_ALLOW_FACET = {};".format( - "true" if allow_facet else "false" - ) - assert expected in response.text - if allow_facet: - assert "Suggested facets" in response.text - else: - assert "Suggested facets" not in response.text - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "size,title,length_bytes", - ( - (2000, ' title="2.0 KB"', "2,000"), - (20000, ' title="19.5 KB"', "20,000"), - (20, "", "20"), - ), -) -async def test_format_of_binary_links(size, title, length_bytes): - ds = Datasette() - db_name = "binary-links-{}".format(size) - db = ds.add_memory_database(db_name) - sql = "select zeroblob({}) as blob".format(size) - await db.execute_write("create table blobs as {}".format(sql)) - response = await ds.client.get("/{}/blobs".format(db_name)) - assert response.status_code == 200 - expected = "{}><Binary: {} bytes>".format(title, length_bytes) - assert expected in response.text - # And test with arbitrary SQL query too - sql_response = await ds.client.get( - "{}/-/query".format(db_name), params={"sql": sql} - ) - assert sql_response.status_code == 200 - assert expected in sql_response.text - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "config", - ( - # Blocked at table level - { - "databases": { - "foreign_key_labels": { - "tables": { - # Table a is only visible to root - "a": {"allow": {"id": "root"}}, - } - } - } - }, - # Blocked at database level - { - "databases": { - "foreign_key_labels": { - # Only root can view this database - "allow": {"id": "root"}, - "tables": { - # But table b is visible to everyone - "b": {"allow": True}, - }, - } - } - }, - # Blocked at the instance level - { - "allow": {"id": "root"}, - "databases": { - "foreign_key_labels": { - "tables": { - # Table b is visible to everyone - "b": {"allow": True}, - } - } - }, - }, - ), -) -async def test_foreign_key_labels_obey_permissions(config): - ds = Datasette(config=config) - db = ds.add_memory_database("foreign_key_labels") - await db.execute_write( - "create table if not exists a(id integer primary key, name text)" - ) - await db.execute_write("insert or replace into a (id, name) values (1, 'hello')") - await db.execute_write( - "create table if not exists b(id integer primary key, name text, a_id integer references a(id))" - ) - await db.execute_write( - "insert or replace into b (id, name, a_id) values (1, 'world', 1)" - ) - # Anonymous user can see table b but not table a - blah = await ds.client.get("/foreign_key_labels.json") - anon_a = await ds.client.get("/foreign_key_labels/a.json?_labels=on") - assert anon_a.status_code == 403 - anon_b = await ds.client.get("/foreign_key_labels/b.json?_labels=on") - assert anon_b.status_code == 200 - # root user can see both - cookies = {"ds_actor": ds.sign({"a": {"id": "root"}}, "actor")} - root_a = await ds.client.get( - "/foreign_key_labels/a.json?_labels=on", cookies=cookies - ) - assert root_a.status_code == 200 - root_b = await ds.client.get( - "/foreign_key_labels/b.json?_labels=on", cookies=cookies - ) - assert root_b.status_code == 200 - # Labels should have been expanded for root - assert root_b.json() == { - "ok": True, - "next": None, - "rows": [{"id": 1, "name": "world", "a_id": {"value": 1, "label": "hello"}}], - "truncated": False, - } - # But not for anon - assert anon_b.json() == { - "ok": True, - "next": None, - "rows": [{"id": 1, "name": "world", "a_id": 1}], - "truncated": False, - } - - -def test_foreign_keys_special_character_in_database_name(app_client_with_dot): - # https://github.com/simonw/datasette/pull/2476 - response = app_client_with_dot.get("/fixtures~2Edot/complex_foreign_keys") - assert 'world' in response.text diff --git a/tests/test_templates/pages/202.html b/tests/test_templates/pages/202.html deleted file mode 100644 index 43a313b2..00000000 --- a/tests/test_templates/pages/202.html +++ /dev/null @@ -1 +0,0 @@ -{{ custom_status(202) }}202! \ No newline at end of file diff --git a/tests/test_templates/pages/about.html b/tests/test_templates/pages/about.html deleted file mode 100644 index 11d78862..00000000 --- a/tests/test_templates/pages/about.html +++ /dev/null @@ -1 +0,0 @@ -ABOUT! view_name:{{ view_name }} \ No newline at end of file diff --git a/tests/test_templates/pages/atom.html b/tests/test_templates/pages/atom.html deleted file mode 100644 index 1c7faafd..00000000 --- a/tests/test_templates/pages/atom.html +++ /dev/null @@ -1 +0,0 @@ -{{ custom_header("content-type", "application/xml") }} \ No newline at end of file diff --git a/tests/test_templates/pages/headers.html b/tests/test_templates/pages/headers.html deleted file mode 100644 index 8a59d4aa..00000000 --- a/tests/test_templates/pages/headers.html +++ /dev/null @@ -1 +0,0 @@ -{{ custom_header("x-this-is-foo", "foo") }}FOO{{ custom_header("x-this-is-bar", "bar") }}BAR \ No newline at end of file diff --git a/tests/test_templates/pages/nested/nest.html b/tests/test_templates/pages/nested/nest.html deleted file mode 100644 index 5510f99e..00000000 --- a/tests/test_templates/pages/nested/nest.html +++ /dev/null @@ -1 +0,0 @@ -Nest! \ No newline at end of file diff --git a/tests/test_templates/pages/redirect.html b/tests/test_templates/pages/redirect.html deleted file mode 100644 index 36a71554..00000000 --- a/tests/test_templates/pages/redirect.html +++ /dev/null @@ -1 +0,0 @@ -{{ custom_redirect("/example") }} \ No newline at end of file diff --git a/tests/test_templates/pages/redirect2.html b/tests/test_templates/pages/redirect2.html deleted file mode 100644 index b7ae092a..00000000 --- a/tests/test_templates/pages/redirect2.html +++ /dev/null @@ -1 +0,0 @@ -{{ custom_redirect("/example", 301) }} \ No newline at end of file diff --git a/tests/test_templates/pages/request.html b/tests/test_templates/pages/request.html deleted file mode 100644 index aa8e0b62..00000000 --- a/tests/test_templates/pages/request.html +++ /dev/null @@ -1 +0,0 @@ -path:{{ request.path }} \ No newline at end of file diff --git a/tests/test_templates/pages/route_{name}.html b/tests/test_templates/pages/route_{name}.html deleted file mode 100644 index 42bd1e04..00000000 --- a/tests/test_templates/pages/route_{name}.html +++ /dev/null @@ -1,2 +0,0 @@ -{% if name == "OhNo" %}{{ raise_404("Oh no") }}{% endif %} -

    Hello from {{ name }}

    \ No newline at end of file diff --git a/tests/test_templates/pages/topic_{topic}.html b/tests/test_templates/pages/topic_{topic}.html deleted file mode 100644 index f07b6b07..00000000 --- a/tests/test_templates/pages/topic_{topic}.html +++ /dev/null @@ -1 +0,0 @@ -Topic page for {{ topic }} \ No newline at end of file diff --git a/tests/test_templates/pages/topic_{topic}/{slug}.html b/tests/test_templates/pages/topic_{topic}/{slug}.html deleted file mode 100644 index cbe5344f..00000000 --- a/tests/test_templates/pages/topic_{topic}/{slug}.html +++ /dev/null @@ -1 +0,0 @@ -Slug: {{ slug }}, Topic: {{ topic }} \ No newline at end of file diff --git a/tests/test_templates/show_json.html b/tests/test_templates/show_json.html index cff04fb4..bbf1bc06 100644 --- a/tests/test_templates/show_json.html +++ b/tests/test_templates/show_json.html @@ -5,5 +5,4 @@ Test data for extra_template_vars:
    {{ extra_template_vars|safe }}
    {{ extra_template_vars_from_awaitable|safe }}
    -
    {{ query_database("select sqlite_version();") }}
    {% endblock %} diff --git a/tests/test_tracer.py b/tests/test_tracer.py deleted file mode 100644 index 1e0d7001..00000000 --- a/tests/test_tracer.py +++ /dev/null @@ -1,99 +0,0 @@ -import pytest -from .fixtures import make_app_client - - -@pytest.mark.parametrize("trace_debug", (True, False)) -def test_trace(trace_debug): - with make_app_client(settings={"trace_debug": trace_debug}) as client: - response = client.get("/fixtures/simple_primary_key.json?_trace=1") - assert response.status == 200 - - data = response.json - if not trace_debug: - assert "_trace" not in data - return - - assert "_trace" in data - trace_info = data["_trace"] - assert isinstance(trace_info["request_duration_ms"], float) - assert isinstance(trace_info["sum_trace_duration_ms"], float) - assert isinstance(trace_info["num_traces"], int) - assert isinstance(trace_info["traces"], list) - traces = trace_info["traces"] - assert len(traces) == trace_info["num_traces"] - for trace in traces: - assert isinstance(trace["type"], str) - assert isinstance(trace["start"], float) - assert isinstance(trace["end"], float) - assert trace["duration_ms"] == (trace["end"] - trace["start"]) * 1000 - assert isinstance(trace["traceback"], list) - assert isinstance(trace["database"], str) - assert isinstance(trace["sql"], str) - assert isinstance(trace.get("params"), (list, dict, None.__class__)) - - sqls = [trace["sql"] for trace in traces if "sql" in trace] - # There should be a mix of different types of SQL statement - expected = ( - "CREATE TABLE ", - "PRAGMA ", - "INSERT OR REPLACE INTO ", - "INSERT INTO", - "select ", - ) - for prefix in expected: - assert any( - sql.startswith(prefix) for sql in sqls - ), "No trace beginning with: {}".format(prefix) - - # Should be at least one executescript - assert any(trace for trace in traces if trace.get("executescript")) - # And at least one executemany - execute_manys = [trace for trace in traces if trace.get("executemany")] - assert execute_manys - assert all(isinstance(trace["count"], int) for trace in execute_manys) - - -def test_trace_silently_fails_for_large_page(): - # Max HTML size is 256KB - with make_app_client(settings={"trace_debug": True}) as client: - # Small response should have trace - small_response = client.get("/fixtures/simple_primary_key.json?_trace=1") - assert small_response.status == 200 - assert "_trace" in small_response.json - - # Big response should not - big_response = client.get( - "/fixtures/-/query.json", - params={"_trace": 1, "sql": "select zeroblob(1024 * 256)"}, - ) - assert big_response.status == 200 - assert "_trace" not in big_response.json - - -def test_trace_query_errors(): - with make_app_client(settings={"trace_debug": True}) as client: - response = client.get( - "/fixtures/-/query.json", - params={"_trace": 1, "sql": "select * from non_existent_table"}, - ) - assert response.status == 400 - - data = response.json - assert "_trace" in data - trace_info = data["_trace"] - assert trace_info["traces"][-1]["error"] == "no such table: non_existent_table" - - -def test_trace_parallel_queries(): - with make_app_client(settings={"trace_debug": True}) as client: - response = client.get("/parallel-queries?_trace=1") - assert response.status == 200 - - data = response.json - assert data["one"] == 1 - assert data["two"] == 2 - trace_info = data["_trace"] - traces = [trace for trace in trace_info["traces"] if "sql" in trace] - one, two = traces - # "two" should have started before "one" ended - assert two["start"] < one["end"] diff --git a/tests/test_utils.py b/tests/test_utils.py index b8d047e9..e9e722b8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,14 +2,13 @@ Tests for various datasette helper functions. """ -from datasette.app import Datasette from datasette import utils from datasette.utils.asgi import Request -from datasette.utils.sqlite import sqlite3 +from datasette.filters import Filters import json import os -import pathlib import pytest +import sqlite3 import tempfile from unittest.mock import patch @@ -20,8 +19,8 @@ from unittest.mock import patch ("foo", ["foo"]), ("foo,bar", ["foo", "bar"]), ("123,433,112", ["123", "433", "112"]), - ("123~2C433,112", ["123,433", "112"]), - ("123~2F433~2F112", ["123/433/112"]), + ("123%2C433,112", ["123,433", "112"]), + ("123%2F433%2F112", ["123/433/112"]), ], ) def test_urlsafe_components(path, expected): @@ -94,7 +93,7 @@ def test_path_with_replaced_args(path, args, expected): "row,pks,expected_path", [ ({"A": "foo", "B": "bar"}, ["A", "B"], "foo,bar"), - ({"A": "f,o", "B": "bar"}, ["A", "B"], "f~2Co,bar"), + ({"A": "f,o", "B": "bar"}, ["A", "B"], "f%2Co,bar"), ({"A": 123}, ["A"], "123"), ( utils.CustomRow( @@ -138,12 +137,7 @@ def test_custom_json_encoder(obj, expected): "bad_sql", [ "update blah;", - "-- sql comment to skip\nupdate blah;", - "update blah set some_column='# Hello there\n\n* This is a list\n* of items\n--\n[And a link](https://github.com/simonw/datasette-render-markdown).'\nas demo_markdown", - "PRAGMA case_sensitive_like = true", - "SELECT * FROM pragma_not_on_allow_list('idx52')", - "/* This comment is not valid. select 1", - "/**/\nupdate foo set bar = 1\n/* test */ select 1", + "PRAGMA case_sensitive_like = true" "SELECT * FROM pragma_index_info('idx52')", ], ) def test_validate_sql_select_bad(bad_sql): @@ -156,34 +150,16 @@ def test_validate_sql_select_bad(bad_sql): [ "select count(*) from airports", "select foo from bar", - "--sql comment to skip\nselect foo from bar", - "select '# Hello there\n\n* This is a list\n* of items\n--\n[And a link](https://github.com/simonw/datasette-render-markdown).'\nas demo_markdown", "select 1 + 1", - "explain select 1 + 1", - "explain\nselect 1 + 1", - "explain query plan select 1 + 1", - "explain query plan\nselect 1 + 1", "SELECT\nblah FROM foo", "WITH RECURSIVE cnt(x) AS (SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 10) SELECT x FROM cnt;", - "explain WITH RECURSIVE cnt(x) AS (SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 10) SELECT x FROM cnt;", - "explain query plan WITH RECURSIVE cnt(x) AS (SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 10) SELECT x FROM cnt;", - "SELECT * FROM pragma_index_info('idx52')", - "select * from pragma_table_xinfo('table')", - # Various types of comment - "-- comment\nselect 1", - "-- one line\n -- two line\nselect 1", - " /* comment */\nselect 1", - " /* comment */select 1", - "/* comment */\n -- another\n /* one more */ select 1", - "/* This comment \n has multiple lines */\nselect 1", ], ) def test_validate_sql_select_good(good_sql): utils.validate_sql_select(good_sql) -@pytest.mark.parametrize("open_quote,close_quote", [('"', '"'), ("[", "]")]) -def test_detect_fts(open_quote, close_quote): +def test_detect_fts(): sql = """ CREATE TABLE "Dumb_Table" ( "TreeID" INTEGER, @@ -199,11 +175,9 @@ def test_detect_fts(open_quote, close_quote): "qCaretaker" TEXT ); CREATE VIEW Test_View AS SELECT * FROM Dumb_Table; - CREATE VIRTUAL TABLE {open}Street_Tree_List_fts{close} USING FTS4 ("qAddress", "qCaretaker", "qSpecies", content={open}Street_Tree_List{close}); + CREATE VIRTUAL TABLE "Street_Tree_List_fts" USING FTS4 ("qAddress", "qCaretaker", "qSpecies", content="Street_Tree_List"); CREATE VIRTUAL TABLE r USING rtree(a, b, c); - """.format( - open=open_quote, close=close_quote - ) + """ conn = utils.sqlite3.connect(":memory:") conn.executescript(sql) assert None is utils.detect_fts(conn, "Dumb_Table") @@ -212,22 +186,6 @@ def test_detect_fts(open_quote, close_quote): assert "Street_Tree_List_fts" == utils.detect_fts(conn, "Street_Tree_List") -@pytest.mark.parametrize("table", ("regular", "has'single quote")) -def test_detect_fts_different_table_names(table): - sql = """ - CREATE TABLE [{table}] ( - "TreeID" INTEGER, - "qSpecies" TEXT - ); - CREATE VIRTUAL TABLE [{table}_fts] USING FTS4 ("qSpecies", content="{table}"); - """.format( - table=table - ) - conn = utils.sqlite3.connect(":memory:") - conn.executescript(sql) - assert "{table}_fts".format(table=table) == utils.detect_fts(conn, table) - - @pytest.mark.parametrize( "url,expected", [ @@ -260,8 +218,7 @@ def test_to_css_class(s, expected): def test_temporary_docker_directory_uses_hard_link(): with tempfile.TemporaryDirectory() as td: os.chdir(td) - with open("hello", "w") as fp: - fp.write("world") + open("hello", "w").write("world") # Default usage of this should use symlink with utils.temporary_docker_directory( files=["hello"], @@ -275,11 +232,9 @@ def test_temporary_docker_directory_uses_hard_link(): install=[], spatialite=False, version_note=None, - secret="secret", ) as temp_docker: hello = os.path.join(temp_docker, "hello") - with open(hello) as fp: - assert "world" == fp.read() + assert "world" == open(hello).read() # It should be a hard link assert 2 == os.stat(hello).st_nlink @@ -290,8 +245,7 @@ def test_temporary_docker_directory_uses_copy_if_hard_link_fails(mock_link): mock_link.side_effect = OSError with tempfile.TemporaryDirectory() as td: os.chdir(td) - with open("hello", "w") as fp: - fp.write("world") + open("hello", "w").write("world") # Default usage of this should use symlink with utils.temporary_docker_directory( files=["hello"], @@ -305,11 +259,9 @@ def test_temporary_docker_directory_uses_copy_if_hard_link_fails(mock_link): install=[], spatialite=False, version_note=None, - secret=None, ) as temp_docker: hello = os.path.join(temp_docker, "hello") - with open(hello) as fp: - assert "world" == fp.read() + assert "world" == open(hello).read() # It should be a copy, not a hard link assert 1 == os.stat(hello).st_nlink @@ -317,8 +269,7 @@ def test_temporary_docker_directory_uses_copy_if_hard_link_fails(mock_link): def test_temporary_docker_directory_quotes_args(): with tempfile.TemporaryDirectory() as td: os.chdir(td) - with open("hello", "w") as fp: - fp.write("world") + open("hello", "w").write("world") with utils.temporary_docker_directory( files=["hello"], name="t", @@ -331,14 +282,11 @@ def test_temporary_docker_directory_quotes_args(): install=[], spatialite=False, version_note="$PWD", - secret="secret", ) as temp_docker: df = os.path.join(temp_docker, "Dockerfile") - with open(df) as fp: - df_contents = fp.read() + df_contents = open(df).read() assert "'$PWD'" in df_contents assert "'--$HOME'" in df_contents - assert "ENV DATASETTE_SECRET 'secret'" in df_contents def test_compound_keys_after_sql(): @@ -361,6 +309,31 @@ def test_compound_keys_after_sql(): ) +async def table_exists(table): + return table == "exists.csv" + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "table_and_format,expected_table,expected_format", + [ + ("blah", "blah", None), + ("blah.csv", "blah", "csv"), + ("blah.json", "blah", "json"), + ("blah.baz", "blah.baz", None), + ("exists.csv", "exists.csv", None), + ], +) +async def test_resolve_table_and_format( + table_and_format, expected_table, expected_format +): + actual_table, actual_format = await utils.resolve_table_and_format( + table_and_format, table_exists, ["json"] + ) + assert expected_table == actual_table + assert expected_format == actual_format + + def test_table_columns(): conn = sqlite3.connect(":memory:") conn.executescript( @@ -378,7 +351,9 @@ def test_table_columns(): ("/foo?sql=select+1", "json", {}, "/foo.json?sql=select+1"), ("/foo/bar", "json", {}, "/foo/bar.json"), ("/foo/bar", "csv", {}, "/foo/bar.csv"), + ("/foo/bar.csv", "json", {}, "/foo/bar.csv?_format=json"), ("/foo/bar", "csv", {"_dl": 1}, "/foo/bar.csv?_dl=1"), + ("/foo/b.csv", "json", {"_dl": 1}, "/foo/b.csv?_dl=1&_format=json"), ( "/sf-trees/Street_Tree_List?_search=cherry&_size=1000", "csv", @@ -389,7 +364,7 @@ def test_table_columns(): ) def test_path_with_format(path, format, extra_qs, expected): request = Request.fake(path) - actual = utils.path_with_format(request=request, format=format, extra_qs=extra_qs) + actual = utils.path_with_format(request, format, extra_qs) assert expected == actual @@ -406,351 +381,3 @@ def test_path_with_format(path, format, extra_qs, expected): ) def test_format_bytes(bytes, expected): assert expected == utils.format_bytes(bytes) - - -@pytest.mark.parametrize( - "query,expected", - [ - ("dog", '"dog"'), - ("cat,", '"cat,"'), - ("cat dog", '"cat" "dog"'), - # If a phrase is already double quoted, leave it so - ('"cat dog"', '"cat dog"'), - ('"cat dog" fish', '"cat dog" "fish"'), - # Sensibly handle unbalanced double quotes - ('cat"', '"cat"'), - ('"cat dog" "fish', '"cat dog" "fish"'), - ], -) -def test_escape_fts(query, expected): - assert expected == utils.escape_fts(query) - - -@pytest.mark.parametrize( - "input,expected", - [ - ("dog", "dog"), - ('dateutil_parse("1/2/2020")', r"dateutil_parse(\0000221/2/2020\000022)"), - ("this\r\nand\r\nthat", r"this\00000Aand\00000Athat"), - ], -) -def test_escape_css_string(input, expected): - assert expected == utils.escape_css_string(input) - - -def test_check_connection_spatialite_raises(): - path = str(pathlib.Path(__file__).parent / "spatialite.db") - conn = sqlite3.connect(path) - with pytest.raises(utils.SpatialiteConnectionProblem): - utils.check_connection(conn) - - -def test_check_connection_passes(): - conn = sqlite3.connect(":memory:") - utils.check_connection(conn) - - -def test_call_with_supported_arguments(): - def foo(a, b): - return f"{a}+{b}" - - assert "1+2" == utils.call_with_supported_arguments(foo, a=1, b=2) - assert "1+2" == utils.call_with_supported_arguments(foo, a=1, b=2, c=3) - - with pytest.raises(TypeError): - utils.call_with_supported_arguments(foo, a=1) - - -@pytest.mark.parametrize( - "data,should_raise", - [ - ([["foo", "bar"], ["foo", "baz"]], False), - ([("foo", "bar"), ("foo", "baz")], False), - ((["foo", "bar"], ["foo", "baz"]), False), - ([["foo", "bar"], ["foo", "baz", "bax"]], True), - ({"foo": ["bar", "baz"]}, False), - ({"foo": ("bar", "baz")}, False), - ({"foo": "bar"}, True), - ], -) -def test_multi_params(data, should_raise): - if should_raise: - with pytest.raises(AssertionError): - utils.MultiParams(data) - return - p1 = utils.MultiParams(data) - assert "bar" == p1["foo"] - assert ["bar", "baz"] == list(p1.getlist("foo")) - - -@pytest.mark.parametrize( - "actor,allow,expected", - [ - # Default is to allow: - (None, None, True), - # {} means deny-all: - (None, {}, False), - ({"id": "root"}, {}, False), - # true means allow-all - ({"id": "root"}, True, True), - (None, True, True), - # false means deny-all - ({"id": "root"}, False, False), - (None, False, False), - # Special case for "unauthenticated": true - (None, {"unauthenticated": True}, True), - (None, {"unauthenticated": False}, False), - # Match on just one property: - (None, {"id": "root"}, False), - ({"id": "root"}, None, True), - ({"id": "simon", "staff": True}, {"staff": True}, True), - ({"id": "simon", "staff": False}, {"staff": True}, False), - # Special "*" value for any key: - ({"id": "root"}, {"id": "*"}, True), - ({}, {"id": "*"}, False), - ({"name": "root"}, {"id": "*"}, False), - # Supports single strings or list of values: - ({"id": "root"}, {"id": "bob"}, False), - ({"id": "root"}, {"id": ["bob"]}, False), - ({"id": "root"}, {"id": "root"}, True), - ({"id": "root"}, {"id": ["root"]}, True), - # Any matching role will work: - ({"id": "garry", "roles": ["staff", "dev"]}, {"roles": ["staff"]}, True), - ({"id": "garry", "roles": ["staff", "dev"]}, {"roles": ["dev"]}, True), - ({"id": "garry", "roles": ["staff", "dev"]}, {"roles": ["otter"]}, False), - ({"id": "garry", "roles": ["staff", "dev"]}, {"roles": ["dev", "otter"]}, True), - ({"id": "garry", "roles": []}, {"roles": ["staff"]}, False), - ({"id": "garry"}, {"roles": ["staff"]}, False), - # Any single matching key works: - ({"id": "root"}, {"bot_id": "my-bot", "id": ["root"]}, True), - ], -) -def test_actor_matches_allow(actor, allow, expected): - assert expected == utils.actor_matches_allow(actor, allow) - - -@pytest.mark.parametrize( - "config,expected", - [ - ({"foo": "bar"}, {"foo": "bar"}), - ({"$env": "FOO"}, "x"), - ({"k": {"$env": "FOO"}}, {"k": "x"}), - ([{"k": {"$env": "FOO"}}, {"z": {"$env": "FOO"}}], [{"k": "x"}, {"z": "x"}]), - ({"k": [{"in_a_list": {"$env": "FOO"}}]}, {"k": [{"in_a_list": "x"}]}), - ], -) -def test_resolve_env_secrets(config, expected): - assert expected == utils.resolve_env_secrets(config, {"FOO": "x"}) - - -@pytest.mark.parametrize( - "actor,expected", - [ - ({"id": "blah"}, "blah"), - ({"id": "blah", "login": "l"}, "l"), - ({"id": "blah", "login": "l"}, "l"), - ({"id": "blah", "login": "l", "username": "u"}, "u"), - ({"login": "l", "name": "n"}, "n"), - ( - {"id": "blah", "login": "l", "username": "u", "name": "n", "display": "d"}, - "d", - ), - ({"weird": "shape"}, "{'weird': 'shape'}"), - ], -) -def test_display_actor(actor, expected): - assert expected == utils.display_actor(actor) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "dbs,expected_path", - [ - (["one_table"], "/one/one"), - (["two_tables"], "/two"), - (["one_table", "two_tables"], "/"), - ], -) -async def test_initial_path_for_datasette(tmp_path_factory, dbs, expected_path): - db_dir = tmp_path_factory.mktemp("dbs") - one_table = str(db_dir / "one.db") - sqlite3.connect(one_table).execute("create table one (id integer primary key)") - two_tables = str(db_dir / "two.db") - sqlite3.connect(two_tables).execute("create table two (id integer primary key)") - sqlite3.connect(two_tables).execute("create table three (id integer primary key)") - datasette = Datasette( - [{"one_table": one_table, "two_tables": two_tables}[db] for db in dbs] - ) - path = await utils.initial_path_for_datasette(datasette) - assert path == expected_path - - -@pytest.mark.parametrize( - "content,expected", - ( - ("title: Hello", {"title": "Hello"}), - ('{"title": "Hello"}', {"title": "Hello"}), - ("{{ this }} is {{ bad }}", None), - ), -) -def test_parse_metadata(content, expected): - if expected is None: - with pytest.raises(utils.BadMetadataError): - utils.parse_metadata(content) - else: - assert utils.parse_metadata(content) == expected - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "sql,expected", - ( - ("select 1", []), - ("select 1 + :one", ["one"]), - ("select 1 + :one + :two", ["one", "two"]), - ("select 'bob' || '0:00' || :cat", ["cat"]), - ("select this is invalid :one, :two, :three", ["one", "two", "three"]), - ), -) -@pytest.mark.parametrize("use_async_version", (False, True)) -async def test_named_parameters(sql, expected, use_async_version): - ds = Datasette([], memory=True) - db = ds.get_database("_memory") - if use_async_version: - params = await utils.derive_named_parameters(db, sql) - else: - params = utils.named_parameters(sql) - assert params == expected - - -@pytest.mark.parametrize( - "original,expected", - ( - ("abc", "abc"), - ("/foo/bar", "~2Ffoo~2Fbar"), - ("/-/bar", "~2F-~2Fbar"), - ("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"), - (r"%~-/", "~25~7E-~2F"), - ("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"), - ("with space", "with+space"), - ), -) -def test_tilde_encoding(original, expected): - actual = utils.tilde_encode(original) - assert actual == expected - # And test round-trip - assert original == utils.tilde_decode(actual) - - -@pytest.mark.parametrize( - "url,length,expected", - ( - ("https://example.com/", 5, "http…"), - ("https://example.com/foo/bar", 15, "https://exampl…"), - ("https://example.com/foo/bar/baz.jpg", 30, "https://example.com/foo/ba….jpg"), - # Extensions longer than 4 characters are not treated specially: - ("https://example.com/foo/bar/baz.jpeg2", 30, "https://example.com/foo/bar/b…"), - ( - "https://example.com/foo/bar/baz.jpeg2", - None, - "https://example.com/foo/bar/baz.jpeg2", - ), - ), -) -def test_truncate_url(url, length, expected): - actual = utils.truncate_url(url, length) - assert actual == expected - - -@pytest.mark.parametrize( - "pairs,expected", - ( - # Simple nested objects - ([("a", "b")], {"a": "b"}), - ([("a.b", "c")], {"a": {"b": "c"}}), - # JSON literals - ([("a.b", "true")], {"a": {"b": True}}), - ([("a.b", "false")], {"a": {"b": False}}), - ([("a.b", "null")], {"a": {"b": None}}), - ([("a.b", "1")], {"a": {"b": 1}}), - ([("a.b", "1.1")], {"a": {"b": 1.1}}), - # Nested JSON literals - ([("a.b", '{"foo": "bar"}')], {"a": {"b": {"foo": "bar"}}}), - ([("a.b", "[1, 2, 3]")], {"a": {"b": [1, 2, 3]}}), - # JSON strings are preserved - ([("a.b", '"true"')], {"a": {"b": "true"}}), - ([("a.b", '"[1, 2, 3]"')], {"a": {"b": "[1, 2, 3]"}}), - # Later keys over-ride the previous - ( - [ - ("a", "b"), - ("a.b", "c"), - ], - {"a": {"b": "c"}}, - ), - ( - [ - ("settings.trace_debug", "true"), - ("plugins.datasette-ripgrep.path", "/etc"), - ("settings.trace_debug", "false"), - ], - { - "settings": { - "trace_debug": False, - }, - "plugins": { - "datasette-ripgrep": { - "path": "/etc", - } - }, - }, - ), - ), -) -def test_pairs_to_nested_config(pairs, expected): - actual = utils.pairs_to_nested_config(pairs) - assert actual == expected - - -@pytest.mark.asyncio -async def test_calculate_etag(tmp_path): - path = tmp_path / "test.txt" - path.write_text("hello") - etag = '"5d41402abc4b2a76b9719d911017c592"' - assert etag == await utils.calculate_etag(path) - assert utils._etag_cache[path] == etag - utils._etag_cache[path] = "hash" - assert "hash" == await utils.calculate_etag(path) - utils._etag_cache.clear() - - -@pytest.mark.parametrize( - "dict1,dict2,expected", - [ - # Basic update - ({"a": 1, "b": 2}, {"b": 3, "c": 4}, {"a": 1, "b": 3, "c": 4}), - # Nested dictionary update - ( - {"a": 1, "b": {"x": 10, "y": 20}}, - {"b": {"y": 30, "z": 40}}, - {"a": 1, "b": {"x": 10, "y": 30, "z": 40}}, - ), - # Deep nested update - ( - {"a": {"b": {"c": 1}}}, - {"a": {"b": {"d": 2}}}, - {"a": {"b": {"c": 1, "d": 2}}}, - ), - # Update with mixed types - ( - {"a": 1, "b": {"x": 10}}, - {"b": {"y": 20}, "c": [1, 2, 3]}, - {"a": 1, "b": {"x": 10, "y": 20}, "c": [1, 2, 3]}, - ), - ], -) -def test_deep_dict_update(dict1, dict2, expected): - result = utils.deep_dict_update(dict1, dict2) - assert result == expected - # Check that the original dict1 was modified - assert dict1 == expected diff --git a/tests/test_utils_check_callable.py b/tests/test_utils_check_callable.py deleted file mode 100644 index 4f72f9ff..00000000 --- a/tests/test_utils_check_callable.py +++ /dev/null @@ -1,46 +0,0 @@ -from datasette.utils.check_callable import check_callable -import pytest - - -class AsyncClass: - async def __call__(self): - pass - - -class NotAsyncClass: - def __call__(self): - pass - - -class ClassNoCall: - pass - - -async def async_func(): - pass - - -def non_async_func(): - pass - - -@pytest.mark.parametrize( - "obj,expected_is_callable,expected_is_async_callable", - ( - (async_func, True, True), - (non_async_func, True, False), - (AsyncClass(), True, True), - (NotAsyncClass(), True, False), - (ClassNoCall(), False, False), - (AsyncClass, True, False), - (NotAsyncClass, True, False), - (ClassNoCall, True, False), - ("", False, False), - (1, False, False), - (str, True, False), - ), -) -def test_check_callable(obj, expected_is_callable, expected_is_async_callable): - status = check_callable(obj) - assert status.is_callable == expected_is_callable - assert status.is_async_callable == expected_is_async_callable diff --git a/tests/test_utils_permissions.py b/tests/test_utils_permissions.py deleted file mode 100644 index b412de0f..00000000 --- a/tests/test_utils_permissions.py +++ /dev/null @@ -1,612 +0,0 @@ -import pytest -from datasette.app import Datasette -from datasette.permissions import PermissionSQL -from datasette.utils.permissions import resolve_permissions_from_catalog -from typing import Callable, List - - -@pytest.fixture -def db(): - ds = Datasette() - import tempfile - from datasette.database import Database - - path = tempfile.mktemp(suffix="demo.db") - db = ds.add_database(Database(ds, path=path)) - return db - - -NO_RULES_SQL = ( - "SELECT NULL AS parent, NULL AS child, NULL AS allow, NULL AS reason WHERE 0" -) - - -def plugin_allow_all_for_user(user: str) -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ - SELECT NULL AS parent, NULL AS child, 1 AS allow, - 'global allow for ' || :allow_all_user || ' on ' || :allow_all_action AS reason - WHERE :actor_id = :allow_all_user - """, - {"allow_all_user": user, "allow_all_action": action}, - ) - - return provider - - -def plugin_deny_specific_table( - user: str, parent: str, child: str -) -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ - SELECT :deny_specific_table_parent AS parent, :deny_specific_table_child AS child, 0 AS allow, - 'deny ' || :deny_specific_table_parent || '/' || :deny_specific_table_child || ' for ' || :deny_specific_table_user || ' on ' || :deny_specific_table_action AS reason - WHERE :actor_id = :deny_specific_table_user - """, - { - "deny_specific_table_parent": parent, - "deny_specific_table_child": child, - "deny_specific_table_user": user, - "deny_specific_table_action": action, - }, - ) - - return provider - - -def plugin_org_policy_deny_parent(parent: str) -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ - SELECT :org_policy_parent_deny_parent AS parent, NULL AS child, 0 AS allow, - 'org policy: parent ' || :org_policy_parent_deny_parent || ' denied on ' || :org_policy_parent_deny_action AS reason - """, - { - "org_policy_parent_deny_parent": parent, - "org_policy_parent_deny_action": action, - }, - ) - - return provider - - -def plugin_allow_parent_for_user( - user: str, parent: str -) -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ - SELECT :allow_parent_parent AS parent, NULL AS child, 1 AS allow, - 'allow full parent for ' || :allow_parent_user || ' on ' || :allow_parent_action AS reason - WHERE :actor_id = :allow_parent_user - """, - { - "allow_parent_parent": parent, - "allow_parent_user": user, - "allow_parent_action": action, - }, - ) - - return provider - - -def plugin_child_allow_for_user( - user: str, parent: str, child: str -) -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ - SELECT :allow_child_parent AS parent, :allow_child_child AS child, 1 AS allow, - 'allow child for ' || :allow_child_user || ' on ' || :allow_child_action AS reason - WHERE :actor_id = :allow_child_user - """, - { - "allow_child_parent": parent, - "allow_child_child": child, - "allow_child_user": user, - "allow_child_action": action, - }, - ) - - return provider - - -def plugin_root_deny_for_all() -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ - SELECT NULL AS parent, NULL AS child, 0 AS allow, 'root deny for all on ' || :root_deny_action AS reason - """, - {"root_deny_action": action}, - ) - - return provider - - -def plugin_conflicting_same_child_rules( - user: str, parent: str, child: str -) -> List[Callable[[str], PermissionSQL]]: - def allow_provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ - SELECT :conflict_child_allow_parent AS parent, :conflict_child_allow_child AS child, 1 AS allow, - 'team grant at child for ' || :conflict_child_allow_user || ' on ' || :conflict_child_allow_action AS reason - WHERE :actor_id = :conflict_child_allow_user - """, - { - "conflict_child_allow_parent": parent, - "conflict_child_allow_child": child, - "conflict_child_allow_user": user, - "conflict_child_allow_action": action, - }, - ) - - def deny_provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ - SELECT :conflict_child_deny_parent AS parent, :conflict_child_deny_child AS child, 0 AS allow, - 'exception deny at child for ' || :conflict_child_deny_user || ' on ' || :conflict_child_deny_action AS reason - WHERE :actor_id = :conflict_child_deny_user - """, - { - "conflict_child_deny_parent": parent, - "conflict_child_deny_child": child, - "conflict_child_deny_user": user, - "conflict_child_deny_action": action, - }, - ) - - return [allow_provider, deny_provider] - - -def plugin_allow_all_for_action( - user: str, allowed_action: str -) -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - if action != allowed_action: - return PermissionSQL(NO_RULES_SQL) - # Sanitize parameter names by replacing hyphens with underscores - param_prefix = action.replace("-", "_") - return PermissionSQL( - f""" - SELECT NULL AS parent, NULL AS child, 1 AS allow, - 'global allow for ' || :{param_prefix}_user || ' on ' || :{param_prefix}_action AS reason - WHERE :actor_id = :{param_prefix}_user - """, - {f"{param_prefix}_user": user, f"{param_prefix}_action": action}, - ) - - return provider - - -VIEW_TABLE = "view-table" - - -# ---------- Catalog DDL (from your schema) ---------- -CATALOG_DDL = """ -CREATE TABLE IF NOT EXISTS catalog_databases ( - database_name TEXT PRIMARY KEY, - path TEXT, - is_memory INTEGER, - schema_version INTEGER -); -CREATE TABLE IF NOT EXISTS catalog_tables ( - database_name TEXT, - table_name TEXT, - rootpage INTEGER, - sql TEXT, - PRIMARY KEY (database_name, table_name), - FOREIGN KEY (database_name) REFERENCES catalog_databases(database_name) -); -""" - -PARENTS = ["accounting", "hr", "analytics"] -SPECIALS = {"accounting": ["sales"], "analytics": ["secret"], "hr": []} - -TABLE_CANDIDATES_SQL = ( - "SELECT database_name AS parent, table_name AS child FROM catalog_tables" -) -PARENT_CANDIDATES_SQL = ( - "SELECT database_name AS parent, NULL AS child FROM catalog_databases" -) - - -# ---------- Helpers ---------- -async def seed_catalog(db, per_parent: int = 10) -> None: - await db.execute_write_script(CATALOG_DDL) - # databases - db_rows = [(p, f"/{p}.db", 0, 1) for p in PARENTS] - await db.execute_write_many( - "INSERT OR REPLACE INTO catalog_databases(database_name, path, is_memory, schema_version) VALUES (?,?,?,?)", - db_rows, - ) - - # tables - def tables_for(parent: str, n: int): - base = [f"table{i:02d}" for i in range(1, n + 1)] - for s in SPECIALS.get(parent, []): - if s not in base: - base[0] = s - return base - - table_rows = [] - for p in PARENTS: - for t in tables_for(p, per_parent): - table_rows.append((p, t, 0, f"CREATE TABLE {t} (id INTEGER PRIMARY KEY)")) - await db.execute_write_many( - "INSERT OR REPLACE INTO catalog_tables(database_name, table_name, rootpage, sql) VALUES (?,?,?,?)", - table_rows, - ) - - -def res_allowed(rows, parent=None): - return sorted( - r["resource"] - for r in rows - if r["allow"] == 1 and (parent is None or r["parent"] == parent) - ) - - -def res_denied(rows, parent=None): - return sorted( - r["resource"] - for r in rows - if r["allow"] == 0 and (parent is None or r["parent"] == parent) - ) - - -# ---------- Tests ---------- -@pytest.mark.asyncio -async def test_alice_global_allow_with_specific_denies_catalog(db): - await seed_catalog(db) - plugins = [ - plugin_allow_all_for_user("alice"), - plugin_deny_specific_table("alice", "accounting", "sales"), - plugin_org_policy_deny_parent("hr"), - ] - rows = await resolve_permissions_from_catalog( - db, - {"id": "alice"}, - plugins, - VIEW_TABLE, - TABLE_CANDIDATES_SQL, - implicit_deny=True, - ) - # Alice can see everything except accounting/sales and hr/* - assert "/accounting/sales" in res_denied(rows) - for r in rows: - if r["parent"] == "hr": - assert r["allow"] == 0 - elif r["resource"] == "/accounting/sales": - assert r["allow"] == 0 - else: - assert r["allow"] == 1 - - -@pytest.mark.asyncio -async def test_carol_parent_allow_but_child_conflict_deny_wins_catalog(db): - await seed_catalog(db) - plugins = [ - plugin_org_policy_deny_parent("hr"), - plugin_allow_parent_for_user("carol", "analytics"), - *plugin_conflicting_same_child_rules("carol", "analytics", "secret"), - ] - rows = await resolve_permissions_from_catalog( - db, - {"id": "carol"}, - plugins, - VIEW_TABLE, - TABLE_CANDIDATES_SQL, - implicit_deny=True, - ) - allowed_analytics = res_allowed(rows, parent="analytics") - denied_analytics = res_denied(rows, parent="analytics") - - assert "/analytics/secret" in denied_analytics - # 10 analytics children total, 1 denied - assert len(allowed_analytics) == 9 - - -@pytest.mark.asyncio -async def test_specificity_child_allow_overrides_parent_deny_catalog(db): - await seed_catalog(db) - plugins = [ - plugin_allow_all_for_user("alice"), - plugin_org_policy_deny_parent("analytics"), # parent-level deny - plugin_child_allow_for_user( - "alice", "analytics", "table02" - ), # child allow beats parent deny - ] - rows = await resolve_permissions_from_catalog( - db, - {"id": "alice"}, - plugins, - VIEW_TABLE, - TABLE_CANDIDATES_SQL, - implicit_deny=True, - ) - - # table02 allowed, other analytics tables denied - assert any(r["resource"] == "/analytics/table02" and r["allow"] == 1 for r in rows) - assert all( - (r["parent"] != "analytics" or r["child"] == "table02" or r["allow"] == 0) - for r in rows - ) - - -@pytest.mark.asyncio -async def test_root_deny_all_but_parent_allow_rescues_specific_parent_catalog(db): - await seed_catalog(db) - plugins = [ - plugin_root_deny_for_all(), # root deny - plugin_allow_parent_for_user( - "bob", "accounting" - ), # parent allow (more specific) - ] - rows = await resolve_permissions_from_catalog( - db, {"id": "bob"}, plugins, VIEW_TABLE, TABLE_CANDIDATES_SQL, implicit_deny=True - ) - for r in rows: - if r["parent"] == "accounting": - assert r["allow"] == 1 - else: - assert r["allow"] == 0 - - -@pytest.mark.asyncio -async def test_parent_scoped_candidates(db): - await seed_catalog(db) - plugins = [ - plugin_org_policy_deny_parent("hr"), - plugin_allow_parent_for_user("carol", "analytics"), - ] - rows = await resolve_permissions_from_catalog( - db, - {"id": "carol"}, - plugins, - VIEW_TABLE, - PARENT_CANDIDATES_SQL, - implicit_deny=True, - ) - d = {r["resource"]: r["allow"] for r in rows} - assert d["/analytics"] == 1 - assert d["/hr"] == 0 - - -@pytest.mark.asyncio -async def test_implicit_deny_behavior(db): - await seed_catalog(db) - plugins = [] # no rules at all - - # implicit_deny=True -> everything denied with reason 'implicit deny' - rows = await resolve_permissions_from_catalog( - db, - {"id": "erin"}, - plugins, - VIEW_TABLE, - TABLE_CANDIDATES_SQL, - implicit_deny=True, - ) - assert all(r["allow"] == 0 and r["reason"] == "implicit deny" for r in rows) - - # implicit_deny=False -> no winner => allow is None, reason is None - rows2 = await resolve_permissions_from_catalog( - db, - {"id": "erin"}, - plugins, - VIEW_TABLE, - TABLE_CANDIDATES_SQL, - implicit_deny=False, - ) - assert all(r["allow"] is None and r["reason"] is None for r in rows2) - - -@pytest.mark.asyncio -async def test_candidate_filters_via_params(db): - await seed_catalog(db) - # Add some metadata to test filtering - # Mark 'hr' as is_memory=1 and increment analytics schema_version - await db.execute_write( - "UPDATE catalog_databases SET is_memory=1 WHERE database_name='hr'" - ) - await db.execute_write( - "UPDATE catalog_databases SET schema_version=2 WHERE database_name='analytics'" - ) - - # Candidate SQL that filters by db metadata via params - candidate_sql = """ - SELECT t.database_name AS parent, t.table_name AS child - FROM catalog_tables t - JOIN catalog_databases d ON d.database_name = t.database_name - WHERE (:exclude_memory = 1 AND d.is_memory = 1) IS NOT 1 - AND (:min_schema_version IS NULL OR d.schema_version >= :min_schema_version) - """ - - plugins = [ - plugin_root_deny_for_all(), - plugin_allow_parent_for_user( - "dev", "analytics" - ), # analytics rescued if included by candidates - ] - - # Case 1: exclude memory dbs, require schema_version >= 2 -> only analytics appear, and thus are allowed - rows = await resolve_permissions_from_catalog( - db, - {"id": "dev"}, - plugins, - VIEW_TABLE, - candidate_sql, - candidate_params={"exclude_memory": 1, "min_schema_version": 2}, - implicit_deny=True, - ) - assert rows and all(r["parent"] == "analytics" for r in rows) - assert all(r["allow"] == 1 for r in rows) - - # Case 2: include memory dbs, min_schema_version = None -> accounting/hr/analytics appear, - # but root deny wins except where specifically allowed (none except analytics parent allow doesn’t apply to table depth if candidate includes children; still fine—policy is explicit). - rows2 = await resolve_permissions_from_catalog( - db, - {"id": "dev"}, - plugins, - VIEW_TABLE, - candidate_sql, - candidate_params={"exclude_memory": 0, "min_schema_version": None}, - implicit_deny=True, - ) - assert any(r["parent"] == "accounting" for r in rows2) - assert any(r["parent"] == "hr" for r in rows2) - # For table-scoped candidates, the parent-level allow does not override root deny unless you have child-level rules - assert all(r["allow"] in (0, 1) for r in rows2) - - -@pytest.mark.asyncio -async def test_action_specific_rules(db): - await seed_catalog(db) - plugins = [plugin_allow_all_for_action("dana", VIEW_TABLE)] - - view_rows = await resolve_permissions_from_catalog( - db, - {"id": "dana"}, - plugins, - VIEW_TABLE, - TABLE_CANDIDATES_SQL, - implicit_deny=True, - ) - assert view_rows and all(r["allow"] == 1 for r in view_rows) - assert all(r["action"] == VIEW_TABLE for r in view_rows) - - insert_rows = await resolve_permissions_from_catalog( - db, - {"id": "dana"}, - plugins, - "insert-row", - TABLE_CANDIDATES_SQL, - implicit_deny=True, - ) - assert insert_rows and all(r["allow"] == 0 for r in insert_rows) - assert all(r["reason"] == "implicit deny" for r in insert_rows) - assert all(r["action"] == "insert-row" for r in insert_rows) - - -@pytest.mark.asyncio -async def test_actor_actor_id_action_parameters_available(db): - """Test that :actor (JSON), :actor_id, and :action are all available in SQL""" - await seed_catalog(db) - - def plugin_using_all_parameters() -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ - SELECT NULL AS parent, NULL AS child, 1 AS allow, - 'Actor ID: ' || COALESCE(:actor_id, 'null') || - ', Actor JSON: ' || COALESCE(:actor, 'null') || - ', Action: ' || :action AS reason - WHERE :actor_id = 'test_user' AND :action = 'view-table' - AND json_extract(:actor, '$.role') = 'admin' - """ - ) - - return provider - - plugins = [plugin_using_all_parameters()] - - # Test with full actor dict - rows = await resolve_permissions_from_catalog( - db, - {"id": "test_user", "role": "admin"}, - plugins, - "view-table", - TABLE_CANDIDATES_SQL, - implicit_deny=True, - ) - - # Should have allowed rows with reason containing all the info - allowed = [r for r in rows if r["allow"] == 1] - assert len(allowed) > 0 - - # Check that the reason string contains evidence of all parameters - reason = allowed[0]["reason"] - assert "test_user" in reason - assert "view-table" in reason - # The :actor parameter should be the JSON string - assert "Actor JSON:" in reason - - -@pytest.mark.asyncio -async def test_multiple_plugins_with_own_parameters(db): - """ - Test that multiple plugins can use their own parameter names without conflict. - - This verifies that the parameter naming convention works: plugins prefix their - parameters (e.g., :plugin1_pattern, :plugin2_message) and both sets of parameters - are successfully bound in the SQL queries. - """ - await seed_catalog(db) - - def plugin_one() -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - if action != "view-table": - return PermissionSQL("plugin_one", "SELECT NULL WHERE 0", {}) - return PermissionSQL( - """ - SELECT database_name AS parent, table_name AS child, - 1 AS allow, 'Plugin one used param: ' || :plugin1_param AS reason - FROM catalog_tables - WHERE database_name = 'accounting' - """, - { - "plugin1_param": "value1", - }, - ) - - return provider - - def plugin_two() -> Callable[[str], PermissionSQL]: - def provider(action: str) -> PermissionSQL: - if action != "view-table": - return PermissionSQL("plugin_two", "SELECT NULL WHERE 0", {}) - return PermissionSQL( - """ - SELECT database_name AS parent, table_name AS child, - 1 AS allow, 'Plugin two used param: ' || :plugin2_param AS reason - FROM catalog_tables - WHERE database_name = 'hr' - """, - { - "plugin2_param": "value2", - }, - ) - - return provider - - plugins = [plugin_one(), plugin_two()] - - rows = await resolve_permissions_from_catalog( - db, - {"id": "test_user"}, - plugins, - "view-table", - TABLE_CANDIDATES_SQL, - implicit_deny=False, - ) - - # Both plugins should contribute results with their parameters successfully bound - plugin_one_rows = [ - r for r in rows if r.get("reason") and "Plugin one" in r["reason"] - ] - plugin_two_rows = [ - r for r in rows if r.get("reason") and "Plugin two" in r["reason"] - ] - - assert len(plugin_one_rows) > 0, "Plugin one should contribute rules" - assert len(plugin_two_rows) > 0, "Plugin two should contribute rules" - - # Verify each plugin's parameters were successfully bound in the SQL - assert any( - "value1" in r.get("reason", "") for r in plugin_one_rows - ), "Plugin one's :plugin1_param should be bound" - assert any( - "value2" in r.get("reason", "") for r in plugin_two_rows - ), "Plugin two's :plugin2_param should be bound" diff --git a/tests/utils.py b/tests/utils.py index e2d9339a..8947956b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,45 +1,8 @@ -from datasette.utils.sqlite import sqlite3 - - -def last_event(datasette): - events = getattr(datasette, "_tracked_events", []) - return events[-1] if events else None - - -def assert_footer_links(soup): - footer_links = soup.find("footer").find_all("a") - assert 4 == len(footer_links) - datasette_link, license_link, source_link, about_link = footer_links - assert "Datasette" == datasette_link.text.strip() - assert "tests/fixtures.py" == source_link.text.strip() - assert "Apache License 2.0" == license_link.text.strip() - assert "About Datasette" == about_link.text.strip() - assert "https://datasette.io/" == datasette_link["href"] - assert ( - "https://github.com/simonw/datasette/blob/main/tests/fixtures.py" - == source_link["href"] - ) - assert ( - "https://github.com/simonw/datasette/blob/main/LICENSE" == license_link["href"] - ) - assert "https://github.com/simonw/datasette" == about_link["href"] - - -def inner_html(soup): - html = str(soup) - # This includes the parent tag - so remove that - inner_html = html.split(">", 1)[1].rsplit("<", 1)[0] - return inner_html.strip() - - -def has_load_extension(): - conn = sqlite3.connect(":memory:") - return hasattr(conn, "enable_load_extension") - - -def cookie_was_deleted(response, cookie): - return any( - h - for h in response.headers.get_list("set-cookie") - if h.startswith(f'{cookie}="";') - ) +class MockRequest: + def __init__(self, url): + self.url = url + self.path = "/" + url.split("://")[1].split("/", 1)[1] + self.query_string = "" + if "?" in url: + self.query_string = url.split("?", 1)[1] + self.path = self.path.split("?")[0] diff --git a/update-docs-help.py b/update-docs-help.py new file mode 100644 index 00000000..3a1eb860 --- /dev/null +++ b/update-docs-help.py @@ -0,0 +1,30 @@ +from click.testing import CliRunner +from datasette.cli import cli +from pathlib import Path + +docs_path = Path(__file__).parent / "docs" + +includes = ( + ("serve", "datasette-serve-help.txt"), + ("package", "datasette-package-help.txt"), + ("publish nowv1", "datasette-publish-nowv1-help.txt"), + ("publish heroku", "datasette-publish-heroku-help.txt"), + ("publish cloudrun", "datasette-publish-cloudrun-help.txt"), +) + + +def update_help_includes(): + for name, filename in includes: + runner = CliRunner() + result = runner.invoke( + cli, name.split() + ["--help"], terminal_width=88 + ) + actual = "$ datasette {} --help\n\n{}".format( + name, result.output + ) + actual = actual.replace('Usage: cli ', 'Usage: datasette ') + open(docs_path / filename, "w").write(actual) + + +if __name__ == "__main__": + update_help_includes() diff --git a/versioneer.py b/versioneer.py new file mode 100644 index 00000000..64fea1c8 --- /dev/null +++ b/versioneer.py @@ -0,0 +1,1822 @@ + +# Version: 0.18 + +"""The Versioneer - like a rocketeer, but for versions. + +The Versioneer +============== + +* like a rocketeer, but for versions! +* https://github.com/warner/python-versioneer +* Brian Warner +* License: Public Domain +* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy +* [![Latest Version] +(https://pypip.in/version/versioneer/badge.svg?style=flat) +](https://pypi.python.org/pypi/versioneer/) +* [![Build Status] +(https://travis-ci.org/warner/python-versioneer.png?branch=master) +](https://travis-ci.org/warner/python-versioneer) + +This is a tool for managing a recorded version number in distutils-based +python projects. The goal is to remove the tedious and error-prone "update +the embedded version string" step from your release process. Making a new +release should be as easy as recording a new tag in your version-control +system, and maybe making new tarballs. + + +## Quick Install + +* `pip install versioneer` to somewhere to your $PATH +* add a `[versioneer]` section to your setup.cfg (see below) +* run `versioneer install` in your source tree, commit the results + +## Version Identifiers + +Source trees come from a variety of places: + +* a version-control system checkout (mostly used by developers) +* a nightly tarball, produced by build automation +* a snapshot tarball, produced by a web-based VCS browser, like github's + "tarball from tag" feature +* a release tarball, produced by "setup.py sdist", distributed through PyPI + +Within each source tree, the version identifier (either a string or a number, +this tool is format-agnostic) can come from a variety of places: + +* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows + about recent "tags" and an absolute revision-id +* the name of the directory into which the tarball was unpacked +* an expanded VCS keyword ($Id$, etc) +* a `_version.py` created by some earlier build step + +For released software, the version identifier is closely related to a VCS +tag. Some projects use tag names that include more than just the version +string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool +needs to strip the tag prefix to extract the version identifier. For +unreleased software (between tags), the version identifier should provide +enough information to help developers recreate the same tree, while also +giving them an idea of roughly how old the tree is (after version 1.2, before +version 1.3). Many VCS systems can report a description that captures this, +for example `git describe --tags --dirty --always` reports things like +"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the +0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has +uncommitted changes. + +The version identifier is used for multiple purposes: + +* to allow the module to self-identify its version: `myproject.__version__` +* to choose a name and prefix for a 'setup.py sdist' tarball + +## Theory of Operation + +Versioneer works by adding a special `_version.py` file into your source +tree, where your `__init__.py` can import it. This `_version.py` knows how to +dynamically ask the VCS tool for version information at import time. + +`_version.py` also contains `$Revision$` markers, and the installation +process marks `_version.py` to have this marker rewritten with a tag name +during the `git archive` command. As a result, generated tarballs will +contain enough information to get the proper version. + +To allow `setup.py` to compute a version too, a `versioneer.py` is added to +the top level of your source tree, next to `setup.py` and the `setup.cfg` +that configures it. This overrides several distutils/setuptools commands to +compute the version when invoked, and changes `setup.py build` and `setup.py +sdist` to replace `_version.py` with a small static file that contains just +the generated version data. + +## Installation + +See [INSTALL.md](./INSTALL.md) for detailed installation instructions. + +## Version-String Flavors + +Code which uses Versioneer can learn about its version string at runtime by +importing `_version` from your main `__init__.py` file and running the +`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can +import the top-level `versioneer.py` and run `get_versions()`. + +Both functions return a dictionary with different flavors of version +information: + +* `['version']`: A condensed version string, rendered using the selected + style. This is the most commonly used value for the project's version + string. The default "pep440" style yields strings like `0.11`, + `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section + below for alternative styles. + +* `['full-revisionid']`: detailed revision identifier. For Git, this is the + full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". + +* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the + commit date in ISO 8601 format. This will be None if the date is not + available. + +* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that + this is only accurate if run in a VCS checkout, otherwise it is likely to + be False or None + +* `['error']`: if the version string could not be computed, this will be set + to a string describing the problem, otherwise it will be None. It may be + useful to throw an exception in setup.py if this is set, to avoid e.g. + creating tarballs with a version string of "unknown". + +Some variants are more useful than others. Including `full-revisionid` in a +bug report should allow developers to reconstruct the exact code being tested +(or indicate the presence of local changes that should be shared with the +developers). `version` is suitable for display in an "about" box or a CLI +`--version` output: it can be easily compared against release notes and lists +of bugs fixed in various releases. + +The installer adds the following text to your `__init__.py` to place a basic +version in `YOURPROJECT.__version__`: + + from ._version import get_versions + __version__ = get_versions()['version'] + del get_versions + +## Styles + +The setup.cfg `style=` configuration controls how the VCS information is +rendered into a version string. + +The default style, "pep440", produces a PEP440-compliant string, equal to the +un-prefixed tag name for actual releases, and containing an additional "local +version" section with more detail for in-between builds. For Git, this is +TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags +--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the +tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and +that this commit is two revisions ("+2") beyond the "0.11" tag. For released +software (exactly equal to a known tag), the identifier will only contain the +stripped tag, e.g. "0.11". + +Other styles are available. See [details.md](details.md) in the Versioneer +source tree for descriptions. + +## Debugging + +Versioneer tries to avoid fatal errors: if something goes wrong, it will tend +to return a version of "0+unknown". To investigate the problem, run `setup.py +version`, which will run the version-lookup code in a verbose mode, and will +display the full contents of `get_versions()` (including the `error` string, +which may help identify what went wrong). + +## Known Limitations + +Some situations are known to cause problems for Versioneer. This details the +most significant ones. More can be found on Github +[issues page](https://github.com/warner/python-versioneer/issues). + +### Subprojects + +Versioneer has limited support for source trees in which `setup.py` is not in +the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are +two common reasons why `setup.py` might not be in the root: + +* Source trees which contain multiple subprojects, such as + [Buildbot](https://github.com/buildbot/buildbot), which contains both + "master" and "slave" subprojects, each with their own `setup.py`, + `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI + distributions (and upload multiple independently-installable tarballs). +* Source trees whose main purpose is to contain a C library, but which also + provide bindings to Python (and perhaps other langauges) in subdirectories. + +Versioneer will look for `.git` in parent directories, and most operations +should get the right version string. However `pip` and `setuptools` have bugs +and implementation details which frequently cause `pip install .` from a +subproject directory to fail to find a correct version string (so it usually +defaults to `0+unknown`). + +`pip install --editable .` should work correctly. `setup.py install` might +work too. + +Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in +some later version. + +[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking +this issue. The discussion in +[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the +issue from the Versioneer side in more detail. +[pip PR#3176](https://github.com/pypa/pip/pull/3176) and +[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve +pip to let Versioneer work correctly. + +Versioneer-0.16 and earlier only looked for a `.git` directory next to the +`setup.cfg`, so subprojects were completely unsupported with those releases. + +### Editable installs with setuptools <= 18.5 + +`setup.py develop` and `pip install --editable .` allow you to install a +project into a virtualenv once, then continue editing the source code (and +test) without re-installing after every change. + +"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a +convenient way to specify executable scripts that should be installed along +with the python package. + +These both work as expected when using modern setuptools. When using +setuptools-18.5 or earlier, however, certain operations will cause +`pkg_resources.DistributionNotFound` errors when running the entrypoint +script, which must be resolved by re-installing the package. This happens +when the install happens with one version, then the egg_info data is +regenerated while a different version is checked out. Many setup.py commands +cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into +a different virtualenv), so this can be surprising. + +[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes +this one, but upgrading to a newer version of setuptools should probably +resolve it. + +### Unicode version strings + +While Versioneer works (and is continually tested) with both Python 2 and +Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. +Newer releases probably generate unicode version strings on py2. It's not +clear that this is wrong, but it may be surprising for applications when then +write these strings to a network connection or include them in bytes-oriented +APIs like cryptographic checksums. + +[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates +this question. + + +## Updating Versioneer + +To upgrade your project to a new release of Versioneer, do the following: + +* install the new Versioneer (`pip install -U versioneer` or equivalent) +* edit `setup.cfg`, if necessary, to include any new configuration settings + indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. +* re-run `versioneer install` in your source tree, to replace + `SRC/_version.py` +* commit any changed files + +## Future Directions + +This tool is designed to make it easily extended to other version-control +systems: all VCS-specific components are in separate directories like +src/git/ . The top-level `versioneer.py` script is assembled from these +components by running make-versioneer.py . In the future, make-versioneer.py +will take a VCS name as an argument, and will construct a version of +`versioneer.py` that is specific to the given VCS. It might also take the +configuration arguments that are currently provided manually during +installation by editing setup.py . Alternatively, it might go the other +direction and include code from all supported VCS systems, reducing the +number of intermediate scripts. + + +## License + +To make Versioneer easier to embed, all its code is dedicated to the public +domain. The `_version.py` that it creates is also in the public domain. +Specifically, both are released under the Creative Commons "Public Domain +Dedication" license (CC0-1.0), as described in +https://creativecommons.org/publicdomain/zero/1.0/ . + +""" + +from __future__ import print_function +try: + import configparser +except ImportError: + import ConfigParser as configparser +import errno +import json +import os +import re +import subprocess +import sys + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_root(): + """Get the project root directory. + + We require that all commands are run from the project root, i.e. the + directory that contains setup.py, setup.cfg, and versioneer.py . + """ + root = os.path.realpath(os.path.abspath(os.getcwd())) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + # allow 'python path/to/setup.py COMMAND' + root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + err = ("Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND').") + raise VersioneerBadRootError(err) + try: + # Certain runtime workflows (setup.py install/develop in a setuptools + # tree) execute all dependencies in a single python process, so + # "versioneer" may be imported multiple times, and python's shared + # module-import table will cache the first one. So we can't use + # os.path.dirname(__file__), as that will find whichever + # versioneer.py was first imported, even in later projects. + me = os.path.realpath(os.path.abspath(__file__)) + me_dir = os.path.normcase(os.path.splitext(me)[0]) + vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) + if me_dir != vsr_dir: + print("Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(me), versioneer_py)) + except NameError: + pass + return root + + +def get_config_from_root(root): + """Read the project setup.cfg file to determine Versioneer config.""" + # This might raise EnvironmentError (if setup.cfg is missing), or + # configparser.NoSectionError (if it lacks a [versioneer] section), or + # configparser.NoOptionError (if it lacks "VCS="). See the docstring at + # the top of versioneer.py for instructions on writing your setup.cfg . + setup_cfg = os.path.join(root, "setup.cfg") + parser = configparser.SafeConfigParser() + with open(setup_cfg, "r") as f: + parser.readfp(f) + VCS = parser.get("versioneer", "VCS") # mandatory + + def get(parser, name): + if parser.has_option("versioneer", name): + return parser.get("versioneer", name) + return None + cfg = VersioneerConfig() + cfg.VCS = VCS + cfg.style = get(parser, "style") or "" + cfg.versionfile_source = get(parser, "versionfile_source") + cfg.versionfile_build = get(parser, "versionfile_build") + cfg.tag_prefix = get(parser, "tag_prefix") + if cfg.tag_prefix in ("''", '""'): + cfg.tag_prefix = "" + cfg.parentdir_prefix = get(parser, "parentdir_prefix") + cfg.verbose = get(parser, "verbose") + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +# these dictionaries contain VCS-specific tools +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, + env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, p.returncode + return stdout, p.returncode + + +LONG_VERSION_PY['git'] = ''' +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.18 (https://github.com/warner/python-versioneer) + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" + git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" + git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "%(STYLE)s" + cfg.tag_prefix = "%(TAG_PREFIX)s" + cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" + cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, + env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %%s" %% dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %%s" %% (commands,)) + return None, None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %%s (error)" %% dispcmd) + print("stdout was %%s" %% stdout) + return None, p.returncode + return stdout, p.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %%s but none started with prefix %%s" %% + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %%d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%%s', no digits" %% ",".join(refs - tags)) + if verbose: + print("likely tags: %%s" %% ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %%s" %% r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %%s not under git control" %% root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long", + "--match", "%%s*" %% tag_prefix], + cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%%s'" + %% describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%%s' doesn't start with prefix '%%s'" + print(fmt %% (full_tag, tag_prefix)) + pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" + %% (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], + cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], + cwd=root)[0].strip() + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%%d" %% pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%%d" %% pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Eexceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%%s'" %% style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} +''' + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %s" % r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long", + "--match", "%s*" % tag_prefix], + cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], + cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], + cwd=root)[0].strip() + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def do_vcs_install(manifest_in, versionfile_source, ipy): + """Git-specific installation logic for Versioneer. + + For Git, this means creating/changing .gitattributes to mark _version.py + for export-subst keyword substitution. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + files = [manifest_in, versionfile_source] + if ipy: + files.append(ipy) + try: + me = __file__ + if me.endswith(".pyc") or me.endswith(".pyo"): + me = os.path.splitext(me)[0] + ".py" + versioneer_file = os.path.relpath(me) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) + present = False + try: + f = open(".gitattributes", "r") + for line in f.readlines(): + if line.strip().startswith(versionfile_source): + if "export-subst" in line.strip().split()[1:]: + present = True + f.close() + except EnvironmentError: + pass + if not present: + f = open(".gitattributes", "a+") + f.write("%s export-subst\n" % versionfile_source) + f.close() + files.append(".gitattributes") + run_command(GITS, ["add", "--"] + files) + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %s but none started with prefix %s" % + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +SHORT_VERSION_PY = """ +# This file was generated by 'versioneer.py' (0.18) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +%s +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) +""" + + +def versions_from_file(filename): + """Try to determine the version from _version.py if present.""" + try: + with open(filename) as f: + contents = f.read() + except EnvironmentError: + raise NotThisMethod("unable to read _version.py") + mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", + contents, re.M | re.S) + if not mo: + mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", + contents, re.M | re.S) + if not mo: + raise NotThisMethod("no version_json in _version.py") + return json.loads(mo.group(1)) + + +def write_to_version_file(filename, versions): + """Write the given version number to the given _version.py file.""" + os.unlink(filename) + contents = json.dumps(versions, sort_keys=True, + indent=1, separators=(",", ": ")) + with open(filename, "w") as f: + f.write(SHORT_VERSION_PY % contents) + + print("set %s to '%s'" % (filename, versions["version"])) + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Eexceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +class VersioneerBadRootError(Exception): + """The project root directory is unknown or missing key files.""" + + +def get_versions(verbose=False): + """Get the project version from whatever source is available. + + Returns dict with two keys: 'version' and 'full'. + """ + if "versioneer" in sys.modules: + # see the discussion in cmdclass.py:get_cmdclass() + del sys.modules["versioneer"] + + root = get_root() + cfg = get_config_from_root(root) + + assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" + handlers = HANDLERS.get(cfg.VCS) + assert handlers, "unrecognized VCS '%s'" % cfg.VCS + verbose = verbose or cfg.verbose + assert cfg.versionfile_source is not None, \ + "please set versioneer.versionfile_source" + assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" + + versionfile_abs = os.path.join(root, cfg.versionfile_source) + + # extract version from first of: _version.py, VCS command (e.g. 'git + # describe'), parentdir. This is meant to work for developers using a + # source checkout, for users of a tarball created by 'setup.py sdist', + # and for users of a tarball/zipball created by 'git archive' or github's + # download-from-tag feature or the equivalent in other VCSes. + + get_keywords_f = handlers.get("get_keywords") + from_keywords_f = handlers.get("keywords") + if get_keywords_f and from_keywords_f: + try: + keywords = get_keywords_f(versionfile_abs) + ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) + if verbose: + print("got version from expanded keyword %s" % ver) + return ver + except NotThisMethod: + pass + + try: + ver = versions_from_file(versionfile_abs) + if verbose: + print("got version from file %s %s" % (versionfile_abs, ver)) + return ver + except NotThisMethod: + pass + + from_vcs_f = handlers.get("pieces_from_vcs") + if from_vcs_f: + try: + pieces = from_vcs_f(cfg.tag_prefix, root, verbose) + ver = render(pieces, cfg.style) + if verbose: + print("got version from VCS %s" % ver) + return ver + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + if verbose: + print("got version from parentdir %s" % ver) + return ver + except NotThisMethod: + pass + + if verbose: + print("unable to compute version") + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, "error": "unable to compute version", + "date": None} + + +def get_version(): + """Get the short version string for this project.""" + return get_versions()["version"] + + +def get_cmdclass(): + """Get the custom setuptools/distutils subclasses used by Versioneer.""" + if "versioneer" in sys.modules: + del sys.modules["versioneer"] + # this fixes the "python setup.py develop" case (also 'install' and + # 'easy_install .'), in which subdependencies of the main project are + # built (using setup.py bdist_egg) in the same python process. Assume + # a main project A and a dependency B, which use different versions + # of Versioneer. A's setup.py imports A's Versioneer, leaving it in + # sys.modules by the time B's setup.py is executed, causing B to run + # with the wrong versioneer. Setuptools wraps the sub-dep builds in a + # sandbox that restores sys.modules to it's pre-build state, so the + # parent is protected against the child's "import versioneer". By + # removing ourselves from sys.modules here, before the child build + # happens, we protect the child from the parent's versioneer too. + # Also see https://github.com/warner/python-versioneer/issues/52 + + cmds = {} + + # we add "version" to both distutils and setuptools + from distutils.core import Command + + class cmd_version(Command): + description = "report generated version string" + user_options = [] + boolean_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + vers = get_versions(verbose=True) + print("Version: %s" % vers["version"]) + print(" full-revisionid: %s" % vers.get("full-revisionid")) + print(" dirty: %s" % vers.get("dirty")) + print(" date: %s" % vers.get("date")) + if vers["error"]: + print(" error: %s" % vers["error"]) + cmds["version"] = cmd_version + + # we override "build_py" in both distutils and setuptools + # + # most invocation pathways end up running build_py: + # distutils/build -> build_py + # distutils/install -> distutils/build ->.. + # setuptools/bdist_wheel -> distutils/install ->.. + # setuptools/bdist_egg -> distutils/install_lib -> build_py + # setuptools/install -> bdist_egg ->.. + # setuptools/develop -> ? + # pip install: + # copies source tree to a tempdir before running egg_info/etc + # if .git isn't copied too, 'git describe' will fail + # then does setup.py bdist_wheel, or sometimes setup.py install + # setup.py egg_info -> ? + + # we override different "build_py" commands for both environments + if "setuptools" in sys.modules: + from setuptools.command.build_py import build_py as _build_py + else: + from distutils.command.build_py import build_py as _build_py + + class cmd_build_py(_build_py): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_py.run(self) + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if cfg.versionfile_build: + target_versionfile = os.path.join(self.build_lib, + cfg.versionfile_build) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + cmds["build_py"] = cmd_build_py + + if "cx_Freeze" in sys.modules: # cx_freeze enabled? + from cx_Freeze.dist import build_exe as _build_exe + # nczeczulin reports that py2exe won't like the pep440-style string + # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. + # setup(console=[{ + # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION + # "product_version": versioneer.get_version(), + # ... + + class cmd_build_exe(_build_exe): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _build_exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write(LONG % + {"DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + }) + cmds["build_exe"] = cmd_build_exe + del cmds["build_py"] + + if 'py2exe' in sys.modules: # py2exe enabled? + try: + from py2exe.distutils_buildexe import py2exe as _py2exe # py3 + except ImportError: + from py2exe.build_exe import py2exe as _py2exe # py2 + + class cmd_py2exe(_py2exe): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _py2exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write(LONG % + {"DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + }) + cmds["py2exe"] = cmd_py2exe + + # we override different "sdist" commands for both environments + if "setuptools" in sys.modules: + from setuptools.command.sdist import sdist as _sdist + else: + from distutils.command.sdist import sdist as _sdist + + class cmd_sdist(_sdist): + def run(self): + versions = get_versions() + self._versioneer_generated_versions = versions + # unless we update this, the command will keep using the old + # version + self.distribution.metadata.version = versions["version"] + return _sdist.run(self) + + def make_release_tree(self, base_dir, files): + root = get_root() + cfg = get_config_from_root(root) + _sdist.make_release_tree(self, base_dir, files) + # now locate _version.py in the new base_dir directory + # (remembering that it may be a hardlink) and replace it with an + # updated value + target_versionfile = os.path.join(base_dir, cfg.versionfile_source) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, + self._versioneer_generated_versions) + cmds["sdist"] = cmd_sdist + + return cmds + + +CONFIG_ERROR = """ +setup.cfg is missing the necessary Versioneer configuration. You need +a section like: + + [versioneer] + VCS = git + style = pep440 + versionfile_source = src/myproject/_version.py + versionfile_build = myproject/_version.py + tag_prefix = + parentdir_prefix = myproject- + +You will also need to edit your setup.py to use the results: + + import versioneer + setup(version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), ...) + +Please read the docstring in ./versioneer.py for configuration instructions, +edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. +""" + +SAMPLE_CONFIG = """ +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +[versioneer] +#VCS = git +#style = pep440 +#versionfile_source = +#versionfile_build = +#tag_prefix = +#parentdir_prefix = + +""" + +INIT_PY_SNIPPET = """ +from ._version import get_versions +__version__ = get_versions()['version'] +del get_versions +""" + + +def do_setup(): + """Main VCS-independent setup function for installing Versioneer.""" + root = get_root() + try: + cfg = get_config_from_root(root) + except (EnvironmentError, configparser.NoSectionError, + configparser.NoOptionError) as e: + if isinstance(e, (EnvironmentError, configparser.NoSectionError)): + print("Adding sample versioneer config to setup.cfg", + file=sys.stderr) + with open(os.path.join(root, "setup.cfg"), "a") as f: + f.write(SAMPLE_CONFIG) + print(CONFIG_ERROR, file=sys.stderr) + return 1 + + print(" creating %s" % cfg.versionfile_source) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write(LONG % {"DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + }) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), + "__init__.py") + if os.path.exists(ipy): + try: + with open(ipy, "r") as f: + old = f.read() + except EnvironmentError: + old = "" + if INIT_PY_SNIPPET not in old: + print(" appending to %s" % ipy) + with open(ipy, "a") as f: + f.write(INIT_PY_SNIPPET) + else: + print(" %s unmodified" % ipy) + else: + print(" %s doesn't exist, ok" % ipy) + ipy = None + + # Make sure both the top-level "versioneer.py" and versionfile_source + # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so + # they'll be copied into source distributions. Pip won't be able to + # install the package without this. + manifest_in = os.path.join(root, "MANIFEST.in") + simple_includes = set() + try: + with open(manifest_in, "r") as f: + for line in f: + if line.startswith("include "): + for include in line.split()[1:]: + simple_includes.add(include) + except EnvironmentError: + pass + # That doesn't cover everything MANIFEST.in can do + # (http://docs.python.org/2/distutils/sourcedist.html#commands), so + # it might give some false negatives. Appending redundant 'include' + # lines is safe, though. + if "versioneer.py" not in simple_includes: + print(" appending 'versioneer.py' to MANIFEST.in") + with open(manifest_in, "a") as f: + f.write("include versioneer.py\n") + else: + print(" 'versioneer.py' already in MANIFEST.in") + if cfg.versionfile_source not in simple_includes: + print(" appending versionfile_source ('%s') to MANIFEST.in" % + cfg.versionfile_source) + with open(manifest_in, "a") as f: + f.write("include %s\n" % cfg.versionfile_source) + else: + print(" versionfile_source already in MANIFEST.in") + + # Make VCS-specific changes. For git, this means creating/changing + # .gitattributes to mark _version.py for export-subst keyword + # substitution. + do_vcs_install(manifest_in, cfg.versionfile_source, ipy) + return 0 + + +def scan_setup_py(): + """Validate the contents of setup.py against Versioneer's expectations.""" + found = set() + setters = False + errors = 0 + with open("setup.py", "r") as f: + for line in f.readlines(): + if "import versioneer" in line: + found.add("import") + if "versioneer.get_cmdclass()" in line: + found.add("cmdclass") + if "versioneer.get_version()" in line: + found.add("get_version") + if "versioneer.VCS" in line: + setters = True + if "versioneer.versionfile_source" in line: + setters = True + if len(found) != 3: + print("") + print("Your setup.py appears to be missing some important items") + print("(but I might be wrong). Please make sure it has something") + print("roughly like the following:") + print("") + print(" import versioneer") + print(" setup( version=versioneer.get_version(),") + print(" cmdclass=versioneer.get_cmdclass(), ...)") + print("") + errors += 1 + if setters: + print("You should remove lines like 'versioneer.VCS = ' and") + print("'versioneer.versionfile_source = ' . This configuration") + print("now lives in setup.cfg, and should be removed from setup.py") + print("") + errors += 1 + return errors + + +if __name__ == "__main__": + cmd = sys.argv[1] + if cmd == "setup": + errors = do_setup() + errors += scan_setup_py() + if errors: + sys.exit(1)
    this …http…1hello1hello2world3\xa01hello{}{}a{}b{}c{}11a1b1c111a1b1c1abca/b.c-dcabc1hello\xa0111111world2\xa01abc1\xa01feline2\xa02caninehelloHELLOworldWORLD\xa0\xa0 @@ -637,20 +907,19 @@ async def test_urlify_custom_queries(ds_client): ) -@pytest.mark.asyncio -async def test_show_hide_sql_query(ds_client): - path = "/fixtures/-/query?" + urllib.parse.urlencode( +def test_show_hide_sql_query(app_client): + path = "/fixtures?" + urllib.parse.urlencode( {"sql": "select ('https://twitter.com/' || 'simonw') as user_url;"} ) - response = await ds_client.get(path) - soup = Soup(response.content, "html.parser") + response = app_client.get(path) + soup = Soup(response.body, "html.parser") span = soup.select(".show-hide-sql")[0] assert span.find("a")["href"].endswith("&_hide_sql=1") assert "(hide)" == span.getText() assert soup.find("textarea") is not None # Now follow the link to hide it - response = await ds_client.get(span.find("a")["href"]) - soup = Soup(response.content, "html.parser") + response = app_client.get(span.find("a")["href"]) + soup = Soup(response.body, "html.parser") span = soup.select(".show-hide-sql")[0] assert not span.find("a")["href"].endswith("&_hide_sql=1") assert "(show)" == span.getText() @@ -663,586 +932,51 @@ async def test_show_hide_sql_query(ds_client): ] == [(hidden["name"], hidden["value"]) for hidden in hiddens] -@pytest.mark.asyncio -async def test_canned_query_with_hide_has_no_hidden_sql(ds_client): - # For a canned query the show/hide should NOT have a hidden SQL field - # https://github.com/simonw/datasette/issues/1411 - response = await ds_client.get("/fixtures/pragma_cache_size?_hide_sql=1") - soup = Soup(response.content, "html.parser") - hiddens = soup.find("form").select("input[type=hidden]") +def test_extra_where_clauses(app_client): + response = app_client.get( + "/fixtures/facetable?_where=neighborhood='Dogpatch'&_where=city_id=1" + ) + soup = Soup(response.body, "html.parser") + div = soup.select(".extra-wheres")[0] + assert "2 extra where clauses" == div.find("h3").text + hrefs = [a["href"] for a in div.findAll("a")] assert [ - ("_hide_sql", "1"), - ] == [(hidden["name"], hidden["value"]) for hidden in hiddens] + "/fixtures/facetable?_where=city_id%3D1", + "/fixtures/facetable?_where=neighborhood%3D%27Dogpatch%27", + ] == hrefs -@pytest.mark.parametrize( - "hide_sql,querystring,expected_hidden,expected_show_hide_link,expected_show_hide_text", - ( - (False, "", None, "/_memory/one?_hide_sql=1", "hide"), - (False, "?_hide_sql=1", "_hide_sql", "/_memory/one", "show"), - (True, "", None, "/_memory/one?_show_sql=1", "show"), - (True, "?_show_sql=1", "_show_sql", "/_memory/one", "hide"), - ), -) -def test_canned_query_show_hide_metadata_option( - hide_sql, - querystring, - expected_hidden, - expected_show_hide_link, - expected_show_hide_text, -): - with make_app_client( - config={ - "databases": { - "_memory": { - "queries": { - "one": { - "sql": "select 1 + 1", - "hide_sql": hide_sql, - } - } - } - } - }, - memory=True, - ) as client: - expected_show_hide_fragment = '({})'.format( - expected_show_hide_link, expected_show_hide_text - ) - response = client.get("/_memory/one" + querystring) - html = response.text - show_hide_fragment = html.split('')[1].split( - "" - )[0] - assert show_hide_fragment == expected_show_hide_fragment - if expected_hidden: - assert ( - ''.format(expected_hidden) - in html - ) - else: - assert '<Binary:\xa07\xa0bytes><Binary:\xa07\xa0bytes>\xa01<Binary\xa0data:\xa019\xa0bytes>http…http…\xa0http…1hello2world3\xa0{i}{i}a{i}b{i}c{i}abca/b.c-dc1hello\xa01-\xa031ab2\xa0\xa0\xa0\xa0\xa01131ab1world2\xa011\xa01feline2\xa02caninehelloHELLOworldWORLD\xa0\xa01<Binary:\xa07\xa0bytes>2<Binary:\xa07\xa0bytes>3\xa03Detroit2Los Angeles4Memnonia1San Francisco2Paranormal1Museum