diff --git a/.dockerignore b/.dockerignore index 490f509e..5078bf47 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,3 +9,5 @@ build dist scratchpad venv +*.db +*.sqlite diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..84e574fd --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# Applying Black +35d6ee2790e41e96f243c1ff58be0c9c0519a8ce +368638555160fb9ac78f462d0f79b1394163fa30 +2b344f6a34d2adaa305996a1a580ece06397f6e4 diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..f0bcdbe0 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [simonw] diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..88bb03b1 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: +- package-ecosystem: pip + directory: "/" + schedule: + interval: daily + time: "13:00" + groups: + python-packages: + patterns: + - "*" diff --git a/.github/workflows/deploy-branch-preview.yml b/.github/workflows/deploy-branch-preview.yml new file mode 100644 index 00000000..e56d9c27 --- /dev/null +++ b/.github/workflows/deploy-branch-preview.yml @@ -0,0 +1,35 @@ +name: Deploy a Datasette branch preview to Vercel + +on: + workflow_dispatch: + inputs: + branch: + description: "Branch to deploy" + required: true + type: string + +jobs: + deploy-branch-preview: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v6 + with: + python-version: "3.11" + - name: Install dependencies + run: | + pip install datasette-publish-vercel + - name: Deploy the preview + env: + VERCEL_TOKEN: ${{ secrets.BRANCH_PREVIEW_VERCEL_TOKEN }} + run: | + export BRANCH="${{ github.event.inputs.branch }}" + wget https://latest.datasette.io/fixtures.db + datasette publish vercel fixtures.db \ + --branch $BRANCH \ + --project "datasette-preview-$BRANCH" \ + --token $VERCEL_TOKEN \ + --scope datasette \ + --about "Preview of $BRANCH" \ + --about_url "https://github.com/simonw/datasette/tree/$BRANCH" diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 2de0a8b6..9f53b01e 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -1,27 +1,26 @@ name: Deploy latest.datasette.io on: + workflow_dispatch: push: branches: - - main + - main + # - 1.0-dev + +permissions: + contents: read jobs: deploy: runs-on: ubuntu-latest steps: - name: Check out datasette - uses: actions/checkout@v2 + uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: - python-version: 3.9 - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + python-version: "3.13" + cache: pip - name: Install Python dependencies run: | python -m pip install --upgrade pip @@ -29,33 +28,102 @@ jobs: python -m pip install -e .[docs] python -m pip install sphinx-to-sqlite==0.1a1 - name: Run tests - run: pytest - - name: Build fixtures.db - run: python tests/fixtures.py fixtures.db fixtures.json plugins + if: ${{ github.ref == 'refs/heads/main' }} + run: | + pytest -n auto -m "not serial" + pytest -m "serial" + - name: Build fixtures.db and other files needed to deploy the demo + run: |- + python tests/fixtures.py \ + fixtures.db \ + fixtures-config.json \ + fixtures-metadata.json \ + plugins \ + --extra-db-filename extra_database.db - name: Build docs.db + if: ${{ github.ref == 'refs/heads/main' }} run: |- cd docs - sphinx-build -b xml . _build + DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build sphinx-to-sqlite ../docs.db _build cd .. - - name: Set up Cloud Run - uses: google-github-actions/setup-gcloud@master + - name: Set up the alternate-route demo + run: | + echo ' + from datasette import hookimpl + + @hookimpl + def startup(datasette): + db = datasette.get_database("fixtures2") + db.route = "alternative-route" + ' > plugins/alternative_route.py + cp fixtures.db fixtures2.db + - name: And the counters writable canned query demo + run: | + cat > plugins/counters.py < metadata.json + # cat metadata.json + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v3 with: - version: '275.0.0' - service_account_email: ${{ secrets.GCP_SA_EMAIL }} - service_account_key: ${{ secrets.GCP_SA_KEY }} + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 - name: Deploy to Cloud Run + env: + LATEST_DATASETTE_SECRET: ${{ secrets.LATEST_DATASETTE_SECRET }} run: |- gcloud config set run/region us-central1 gcloud config set project datasette-222320 - datasette publish cloudrun fixtures.db \ - -m fixtures.json \ + export SUFFIX="-${GITHUB_REF#refs/heads/}" + export SUFFIX=${SUFFIX#-main} + # Replace 1.0 with one-dot-zero in SUFFIX + export SUFFIX=${SUFFIX//1.0/one-dot-zero} + datasette publish cloudrun fixtures.db fixtures2.db extra_database.db \ + -m fixtures-metadata.json \ --plugins-dir=plugins \ --branch=$GITHUB_SHA \ --version-note=$GITHUB_SHA \ - --extra-options="--setting template_debug 1" \ - --install=pysqlite3-binary \ - --service=datasette-latest + --extra-options="--setting template_debug 1 --setting trace_debug 1 --crossdb" \ + --install 'datasette-ephemeral-tables>=0.2.2' \ + --service "datasette-latest$SUFFIX" \ + --secret $LATEST_DATASETTE_SECRET + - name: Deploy to docs as well (only for main) + if: ${{ github.ref == 'refs/heads/main' }} + run: |- # Deploy docs.db to a different service datasette publish cloudrun docs.db \ --branch=$GITHUB_SHA \ diff --git a/.github/workflows/documentation-links.yml b/.github/workflows/documentation-links.yml new file mode 100644 index 00000000..a54bd83a --- /dev/null +++ b/.github/workflows/documentation-links.yml @@ -0,0 +1,16 @@ +name: Read the Docs Pull Request Preview +on: + pull_request_target: + types: + - opened + +permissions: + pull-requests: write + +jobs: + documentation-links: + runs-on: ubuntu-latest + steps: + - uses: readthedocs/actions/preview@v1 + with: + project-slug: "datasette" diff --git a/.github/workflows/mirror-master-and-main.yml b/.github/workflows/mirror-master-and-main.yml deleted file mode 100644 index 8418df40..00000000 --- a/.github/workflows/mirror-master-and-main.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Mirror "master" and "main" branches -on: - push: - branches: - - master - - main - -jobs: - mirror: - runs-on: ubuntu-latest - steps: - - name: Mirror to "master" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: master - force: false - - name: Mirror to "main" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: main - force: false diff --git a/.github/workflows/prettier.yml b/.github/workflows/prettier.yml new file mode 100644 index 00000000..77cce7d1 --- /dev/null +++ b/.github/workflows/prettier.yml @@ -0,0 +1,25 @@ +name: Check JavaScript for conformance with Prettier + +on: [push] + +permissions: + contents: read + +jobs: + prettier: + runs-on: ubuntu-latest + steps: + - name: Check out repo + uses: actions/checkout@v4 + - uses: actions/cache@v4 + name: Configure npm caching + with: + path: ~/.npm + key: ${{ runner.OS }}-npm-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.OS }}-npm- + - name: Install dependencies + run: npm ci + - name: Run prettier + run: |- + npm run prettier -- --check diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c1909bbe..e94d0bdd 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -4,70 +4,106 @@ on: release: types: [created] +permissions: + contents: read + jobs: test: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: pip + cache-dependency-path: pyproject.toml - name: Install dependencies run: | pip install -e '.[test]' - name: Run tests run: | pytest + deploy: runs-on: ubuntu-latest needs: [test] + environment: release + permissions: + id-token: write steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: - python-version: '3.9' - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-publish-pip- + python-version: '3.13' + cache: pip + cache-dependency-path: pyproject.toml - name: Install dependencies run: | - pip install setuptools wheel twine - - name: Publish - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + pip install setuptools wheel build + - name: Build run: | - python setup.py sdist bdist_wheel - twine upload dist/* + python -m build + - name: Publish + uses: pypa/gh-action-pypi-publish@release/v1 + + deploy_static_docs: + runs-on: ubuntu-latest + needs: [deploy] + if: "!github.event.release.prerelease" + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.10' + cache: pip + cache-dependency-path: pyproject.toml + - name: Install dependencies + run: | + python -m pip install -e .[docs] + python -m pip install sphinx-to-sqlite==0.1a1 + - name: Build docs.db + run: |- + cd docs + DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build + sphinx-to-sqlite ../docs.db _build + cd .. + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 + - name: Deploy stable-docs.datasette.io to Cloud Run + run: |- + gcloud config set run/region us-central1 + gcloud config set project datasette-222320 + datasette publish cloudrun docs.db \ + --service=datasette-docs-stable + deploy_docker: runs-on: ubuntu-latest needs: [deploy] if: "!github.event.release.prerelease" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Build and push to Docker Hub env: DOCKER_USER: ${{ secrets.DOCKER_USER }} DOCKER_PASS: ${{ secrets.DOCKER_PASS }} run: |- - docker login -u $DOCKER_USER -p $DOCKER_PASS - export REPO=datasetteproject/datasette - docker build -f Dockerfile -t $REPO:${GITHUB_REF#refs/tags/} . - docker tag $REPO:${GITHUB_REF#refs/tags/} $REPO:latest - docker push $REPO + sleep 60 # Give PyPI time to make the new release available + docker login -u $DOCKER_USER -p $DOCKER_PASS + export REPO=datasetteproject/datasette + docker build -f Dockerfile \ + -t $REPO:${GITHUB_REF#refs/tags/} \ + --build-arg VERSION=${GITHUB_REF#refs/tags/} . + docker tag $REPO:${GITHUB_REF#refs/tags/} $REPO:latest + docker push $REPO:${GITHUB_REF#refs/tags/} + docker push $REPO:latest diff --git a/.github/workflows/push_docker_tag.yml b/.github/workflows/push_docker_tag.yml new file mode 100644 index 00000000..afe8d6b2 --- /dev/null +++ b/.github/workflows/push_docker_tag.yml @@ -0,0 +1,28 @@ +name: Push specific Docker tag + +on: + workflow_dispatch: + inputs: + version_tag: + description: Tag to build and push + +permissions: + contents: read + +jobs: + deploy_docker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build and push to Docker Hub + env: + DOCKER_USER: ${{ secrets.DOCKER_USER }} + DOCKER_PASS: ${{ secrets.DOCKER_PASS }} + VERSION_TAG: ${{ github.event.inputs.version_tag }} + run: |- + docker login -u $DOCKER_USER -p $DOCKER_PASS + export REPO=datasetteproject/datasette + docker build -f Dockerfile \ + -t $REPO:${VERSION_TAG} \ + --build-arg VERSION=${VERSION_TAG} . + docker push $REPO:${VERSION_TAG} diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml new file mode 100644 index 00000000..7c5370ce --- /dev/null +++ b/.github/workflows/spellcheck.yml @@ -0,0 +1,27 @@ +name: Check spelling in documentation + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + spellcheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + - name: Install dependencies + run: | + pip install -e '.[docs]' + - name: Check spelling + run: | + codespell README.md --ignore-words docs/codespell-ignore-words.txt + codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt + codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt + codespell tests --ignore-words docs/codespell-ignore-words.txt diff --git a/.github/workflows/stable-docs.yml b/.github/workflows/stable-docs.yml new file mode 100644 index 00000000..3119d617 --- /dev/null +++ b/.github/workflows/stable-docs.yml @@ -0,0 +1,76 @@ +name: Update Stable Docs + +on: + release: + types: [published] + push: + branches: + - main + +permissions: + contents: write + +jobs: + update_stable_docs: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 0 # We need all commits to find docs/ changes + - name: Set up Git user + run: | + git config user.name "Automated" + git config user.email "actions@users.noreply.github.com" + - name: Create stable branch if it does not yet exist + run: | + if ! git ls-remote --heads origin stable | grep -qE '\bstable\b'; then + # Make sure we have all tags locally + git fetch --tags --quiet + + # Latest tag that is just numbers and dots (optionally prefixed with 'v') + # e.g., 0.65.2 or v0.65.2 — excludes 1.0a20, 1.0-rc1, etc. + LATEST_RELEASE=$( + git tag -l --sort=-v:refname \ + | grep -E '^v?[0-9]+(\.[0-9]+){1,3}$' \ + | head -n1 + ) + + git checkout -b stable + + # If there are any stable releases, copy docs/ from the most recent + if [ -n "$LATEST_RELEASE" ]; then + rm -rf docs/ + git checkout "$LATEST_RELEASE" -- docs/ || true + fi + + git commit -m "Populate docs/ from $LATEST_RELEASE" || echo "No changes" + git push -u origin stable + fi + - name: Handle Release + if: github.event_name == 'release' && !github.event.release.prerelease + run: | + git fetch --all + git checkout stable + git reset --hard ${GITHUB_REF#refs/tags/} + git push origin stable --force + - name: Handle Commit to Main + if: contains(github.event.head_commit.message, '!stable-docs') + run: | + git fetch origin + git checkout -b stable origin/stable + # Get the list of modified files in docs/ from the current commit + FILES=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} -- docs/) + # Check if the list of files is non-empty + if [[ -n "$FILES" ]]; then + # Checkout those files to the stable branch to over-write with their contents + for FILE in $FILES; do + git checkout ${{ github.sha }} -- $FILE + done + git add docs/ + git commit -m "Doc changes from ${{ github.sha }}" + git push origin stable + else + echo "No changes to docs/ in this commit." + exit 0 + fi diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index 1d1cf332..8d73b64d 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -7,23 +7,21 @@ on: pull_request: branches: - main +permissions: + contents: read + jobs: test: runs-on: ubuntu-latest steps: - name: Check out datasette - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: - python-version: 3.9 - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + python-version: '3.12' + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' - name: Install Python dependencies run: | python -m pip install --upgrade pip @@ -33,7 +31,7 @@ jobs: run: |- ls -lah cat .coveragerc - pytest --cov=datasette --cov-config=.coveragerc --cov-report xml:coverage.xml --cov-report term + pytest -m "not serial" --cov=datasette --cov-config=.coveragerc --cov-report xml:coverage.xml --cov-report term -x ls -lah - name: Upload coverage report uses: codecov/codecov-action@v1 diff --git a/.github/workflows/test-pyodide.yml b/.github/workflows/test-pyodide.yml new file mode 100644 index 00000000..b490a9bf --- /dev/null +++ b/.github/workflows/test-pyodide.yml @@ -0,0 +1,33 @@ +name: Test in Pyodide with shot-scraper + +on: + push: + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v6 + with: + python-version: "3.10" + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + - name: Cache Playwright browsers + uses: actions/cache@v4 + with: + path: ~/.cache/ms-playwright/ + key: ${{ runner.os }}-browsers + - name: Install Playwright dependencies + run: | + pip install shot-scraper build + shot-scraper install + - name: Run test + run: | + ./test-in-pyodide-with-shot-scraper.sh diff --git a/.github/workflows/test-sqlite-support.yml b/.github/workflows/test-sqlite-support.yml new file mode 100644 index 00000000..76ea138a --- /dev/null +++ b/.github/workflows/test-sqlite-support.yml @@ -0,0 +1,53 @@ +name: Test SQLite versions + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + test: + runs-on: ${{ matrix.platform }} + continue-on-error: true + strategy: + matrix: + platform: [ubuntu-latest] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + sqlite-version: [ + #"3", # latest version + "3.46", + #"3.45", + #"3.27", + #"3.26", + "3.25", + #"3.25.3", # 2018-09-25, window functions breaks test_upsert for some reason on 3.10, skip for now + #"3.24", # 2018-06-04, added UPSERT support + #"3.23.1" # 2018-04-10, before UPSERT + ] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + cache: pip + cache-dependency-path: pyproject.toml + - name: Set up SQLite ${{ matrix.sqlite-version }} + uses: asg017/sqlite-versions@71ea0de37ae739c33e447af91ba71dda8fcf22e6 + with: + version: ${{ matrix.sqlite-version }} + cflags: "-DSQLITE_ENABLE_DESERIALIZE -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_FTS4 -DSQLITE_ENABLE_FTS3_PARENTHESIS -DSQLITE_ENABLE_RTREE -DSQLITE_ENABLE_JSON1" + - run: python3 -c "import sqlite3; print(sqlite3.sqlite_version)" + - run: echo $LD_LIBRARY_PATH + - name: Build extension for --load-extension test + run: |- + (cd tests && gcc ext.c -fPIC -shared -o ext.so) + - name: Install dependencies + run: | + pip install -e '.[test]' + pip freeze + - name: Run tests + run: | + pytest -n auto -m "not serial" + pytest -m "serial" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a1774213..1e5e03d2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,29 +1,51 @@ name: Test -on: [push] +on: [push, pull_request] + +permissions: + contents: read jobs: test: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + allow-prereleases: true + cache: pip + cache-dependency-path: pyproject.toml + - name: Build extension for --load-extension test + run: |- + (cd tests && gcc ext.c -fPIC -shared -o ext.so) - name: Install dependencies run: | pip install -e '.[test]' + pip freeze - name: Run tests run: | - pytest + pytest -n auto -m "not serial" + pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh + - name: Install docs dependencies + run: | + pip install -e '.[docs]' + - name: Black + run: black --check . + - name: Check if cog needs to be run + run: | + cog --check docs/*.rst + - name: Check if blacken-docs needs to be run + run: | + # This fails on syntax errors, or a diff was applied + blacken-docs -l 60 docs/*.rst + - name: Test DATASETTE_LOAD_PLUGINS + run: | + pip install datasette-init datasette-json-html + tests/test-datasette-load-plugins.sh diff --git a/.github/workflows/tmate-mac.yml b/.github/workflows/tmate-mac.yml new file mode 100644 index 00000000..fcee0f21 --- /dev/null +++ b/.github/workflows/tmate-mac.yml @@ -0,0 +1,15 @@ +name: tmate session mac + +on: + workflow_dispatch: + +permissions: + contents: read + +jobs: + build: + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 diff --git a/.github/workflows/tmate.yml b/.github/workflows/tmate.yml index 02e7bd33..123f6c71 100644 --- a/.github/workflows/tmate.yml +++ b/.github/workflows/tmate.yml @@ -3,6 +3,10 @@ name: tmate session on: workflow_dispatch: +permissions: + contents: read + models: read + jobs: build: runs-on: ubuntu-latest @@ -10,3 +14,5 @@ jobs: - uses: actions/checkout@v2 - name: Setup tmate session uses: mxschmitt/action-tmate@v3 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 47418755..70e6bbeb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,9 @@ scratchpad .vscode +uv.lock +data.db + # We don't use Pipfile, so ignore them Pipfile Pipfile.lock @@ -116,3 +119,11 @@ ENV/ # macOS files .DS_Store +node_modules +.*.swp + +# In case someone compiled tests/ext.c for test_load_extensions, don't +# include it in source control. +tests/*.dylib +tests/*.so +tests/*.dll diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 00000000..222861c3 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,4 @@ +{ + "tabWidth": 2, + "useTabs": false +} diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..5b30e75a --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,16 @@ +version: 2 + +build: + os: ubuntu-20.04 + tools: + python: "3.11" + +sphinx: + configuration: docs/conf.py + +python: + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..14d4c567 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +`swillison+datasette-code-of-conduct@gmail.com`. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/Dockerfile b/Dockerfile index f008ff69..9a8f06cf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,42 +1,18 @@ -FROM python:3.7.2-slim-stretch as build +FROM python:3.11.0-slim-bullseye as build -# Setup build dependencies -RUN apt update \ -&& apt install -y python3-dev build-essential wget libxml2-dev libproj-dev libgeos-dev libsqlite3-dev zlib1g-dev pkg-config git \ - && apt clean +# Version of Datasette to install, e.g. 0.55 +# docker build . -t datasette --build-arg VERSION=0.55 +ARG VERSION +RUN apt-get update && \ + apt-get install -y --no-install-recommends libsqlite3-mod-spatialite && \ + apt clean && \ + rm -rf /var/lib/apt && \ + rm -rf /var/lib/dpkg/info/* -RUN wget "https://www.sqlite.org/2020/sqlite-autoconf-3310100.tar.gz" && tar xzf sqlite-autoconf-3310100.tar.gz \ - && cd sqlite-autoconf-3310100 && ./configure --disable-static --enable-fts5 --enable-json1 CFLAGS="-g -O2 -DSQLITE_ENABLE_FTS3=1 -DSQLITE_ENABLE_FTS4=1 -DSQLITE_ENABLE_RTREE=1 -DSQLITE_ENABLE_JSON1" \ - && make && make install - -RUN wget "http://www.gaia-gis.it/gaia-sins/freexl-sources/freexl-1.0.5.tar.gz" && tar zxf freexl-1.0.5.tar.gz \ - && cd freexl-1.0.5 && ./configure && make && make install - -RUN wget "http://www.gaia-gis.it/gaia-sins/libspatialite-sources/libspatialite-4.4.0-RC0.tar.gz" && tar zxf libspatialite-4.4.0-RC0.tar.gz \ - && cd libspatialite-4.4.0-RC0 && ./configure && make && make install - -RUN wget "http://www.gaia-gis.it/gaia-sins/readosm-sources/readosm-1.1.0.tar.gz" && tar zxf readosm-1.1.0.tar.gz && cd readosm-1.1.0 && ./configure && make && make install - -RUN wget "http://www.gaia-gis.it/gaia-sins/spatialite-tools-sources/spatialite-tools-4.4.0-RC0.tar.gz" && tar zxf spatialite-tools-4.4.0-RC0.tar.gz \ - && cd spatialite-tools-4.4.0-RC0 && ./configure && make && make install - - -# Add local code to the image instead of fetching from pypi. -COPY . /datasette - -RUN pip install /datasette - -FROM python:3.7.2-slim-stretch - -# Copy python dependencies and spatialite libraries -COPY --from=build /usr/local/lib/ /usr/local/lib/ -# Copy executables -COPY --from=build /usr/local/bin /usr/local/bin -# Copy spatial extensions -COPY --from=build /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu - -ENV LD_LIBRARY_PATH=/usr/local/lib +RUN pip install https://github.com/simonw/datasette/archive/refs/tags/${VERSION}.zip && \ + find /usr/local/lib -name '__pycache__' | xargs rm -r && \ + rm -rf /root/.cache/pip EXPOSE 8001 CMD ["datasette"] diff --git a/Justfile b/Justfile new file mode 100644 index 00000000..a47662c3 --- /dev/null +++ b/Justfile @@ -0,0 +1,56 @@ +export DATASETTE_SECRET := "not_a_secret" + +# Run tests and linters +@default: test lint + +# Setup project +@init: + uv sync --extra test --extra docs + +# Run pytest with supplied options +@test *options: init + uv run pytest -n auto {{options}} + +@codespell: + uv run codespell README.md --ignore-words docs/codespell-ignore-words.txt + uv run codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt + uv run codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt + uv run codespell tests --ignore-words docs/codespell-ignore-words.txt + +# Run linters: black, flake8, mypy, cog +@lint: codespell + uv run black . --check + uv run flake8 + uv run --extra test cog --check README.md docs/*.rst + +# Rebuild docs with cog +@cog: + uv run --extra test cog -r README.md docs/*.rst + +# Serve live docs on localhost:8000 +@docs: cog blacken-docs + uv run --extra docs make -C docs livehtml + +# Build docs as static HTML +@docs-build: cog blacken-docs + rm -rf docs/_build && cd docs && uv run make html + +# Apply Black +@black: + uv run black . + +# Apply blacken-docs +@blacken-docs: + uv run blacken-docs -l 60 docs/*.rst + +# Apply prettier +@prettier: + npm run fix + +# Format code with both black and prettier +@format: black prettier blacken-docs + +@serve *options: + uv run sqlite-utils create-database data.db + uv run sqlite-utils create-table data.db docs id integer title text --pk id --ignore + uv run python -m datasette data.db --root --reload {{options}} diff --git a/README.md b/README.md index 16fc8f0e..393e8e5c 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,30 @@ -# Datasette +Datasette [![PyPI](https://img.shields.io/pypi/v/datasette.svg)](https://pypi.org/project/datasette/) -[![Changelog](https://img.shields.io/github/v/release/simonw/datasette?label=changelog)](https://docs.datasette.io/en/stable/changelog.html) +[![Changelog](https://img.shields.io/github/v/release/simonw/datasette?label=changelog)](https://docs.datasette.io/en/latest/changelog.html) [![Python 3.x](https://img.shields.io/pypi/pyversions/datasette.svg?logo=python&logoColor=white)](https://pypi.org/project/datasette/) [![Tests](https://github.com/simonw/datasette/workflows/Test/badge.svg)](https://github.com/simonw/datasette/actions?query=workflow%3ATest) [![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](https://docs.datasette.io/en/latest/?badge=latest) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette/blob/main/LICENSE) [![docker: datasette](https://img.shields.io/badge/docker-datasette-blue)](https://hub.docker.com/r/datasetteproject/datasette) +[![discord](https://img.shields.io/discord/823971286308356157?label=discord)](https://datasette.io/discord) *An open source multi-tool for exploring and publishing data* Datasette is a tool for exploring and publishing data. It helps people take data of any shape or size and publish that as an interactive, explorable website and accompanying API. -Datasette is aimed at data journalists, museum curators, archivists, local governments and anyone else who has data that they wish to share with the world. +Datasette is aimed at data journalists, museum curators, archivists, local governments, scientists, researchers and anyone else who has data that they wish to share with the world. -[Explore a demo](https://fivethirtyeight.datasettes.com/fivethirtyeight), watch [a video about the project](https://www.youtube.com/watch?v=pTr1uLQTJNE) or try it out by [uploading and publishing your own CSV data](https://simonwillison.net/2019/Apr/23/datasette-glitch/). +[Explore a demo](https://datasette.io/global-power-plants/global-power-plants), watch [a video about the project](https://simonwillison.net/2021/Feb/7/video/) or try it out [on GitHub Codespaces](https://github.com/datasette/datasette-studio). +* [datasette.io](https://datasette.io/) is the official project website * Latest [Datasette News](https://datasette.io/news) * Comprehensive documentation: https://docs.datasette.io/ * Examples: https://datasette.io/examples -* Live demo of current main: https://latest.datasette.io/ -* Support questions, feedback? Join our [GitHub Discussions forum](https://github.com/simonw/datasette/discussions) +* Live demo of current `main` branch: https://latest.datasette.io/ +* Questions, feedback or want to talk about the project? Join our [Discord](https://datasette.io/discord) -Want to stay up-to-date with the project? Subscribe to the [Datasette Weekly newsletter](https://datasette.substack.com/) for tips, tricks and news on what's new in the Datasette ecosystem. +Want to stay up-to-date with the project? Subscribe to the [Datasette newsletter](https://datasette.substack.com/) for tips, tricks and news on what's new in the Datasette ecosystem. ## Installation @@ -34,7 +36,7 @@ You can also install it using `pip` or `pipx`: pip install datasette -Datasette requires Python 3.6 or higher. We also have [detailed installation instructions](https://docs.datasette.io/en/stable/installation.html) covering other options such as Docker. +Datasette requires Python 3.8 or higher. We also have [detailed installation instructions](https://docs.datasette.io/en/stable/installation.html) covering other options such as Docker. ## Basic usage @@ -46,45 +48,12 @@ This will start a web server on port 8001 - visit http://localhost:8001/ to acce Use Chrome on OS X? You can run datasette against your browser history like so: - datasette ~/Library/Application\ Support/Google/Chrome/Default/History + datasette ~/Library/Application\ Support/Google/Chrome/Default/History --nolock Now visiting http://localhost:8001/History/downloads will show you a web interface to browse your downloads data: ![Downloads table rendered by datasette](https://static.simonwillison.net/static/2017/datasette-downloads.png) -## datasette serve options - - Usage: datasette serve [OPTIONS] [FILES]... - - Serve up specified SQLite database files with a web UI - - Options: - -i, --immutable PATH Database files to open in immutable mode - -h, --host TEXT Host for server. Defaults to 127.0.0.1 which means - only connections from the local machine will be - allowed. Use 0.0.0.0 to listen to all IPs and - allow access from other machines. - -p, --port INTEGER Port for server, defaults to 8001 - --reload Automatically reload if database or code change - detected - useful for development - --cors Enable CORS by serving Access-Control-Allow- - Origin: * - --load-extension PATH Path to a SQLite extension to load - --inspect-file TEXT Path to JSON file created using "datasette - inspect" - -m, --metadata FILENAME Path to JSON file containing license/source - metadata - --template-dir DIRECTORY Path to directory containing custom templates - --plugins-dir DIRECTORY Path to directory containing custom plugins - --static STATIC MOUNT mountpoint:path-to-directory for serving static - files - --memory Make :memory: database available - --config CONFIG Set config option using configname:value - docs.datasette.io/en/stable/config.html - --version-note TEXT Additional note to show on /-/versions - --help-config Show available config options - --help Show this message and exit. - ## metadata.json If you want to include licensing and source information in the generated datasette website you can do so using a JSON file that looks something like this: @@ -116,3 +85,7 @@ Or: This will create a docker image containing both the datasette application and the specified SQLite database files. It will then deploy that image to Heroku or Cloud Run and give you a URL to access the resulting website and API. See [Publishing data](https://docs.datasette.io/en/stable/publish.html) in the documentation for more details. + +## Datasette Lite + +[Datasette Lite](https://lite.datasette.io/) is Datasette packaged using WebAssembly so that it runs entirely in your browser, no Python web application server required. Read more about that in the [Datasette Lite documentation](https://github.com/simonw/datasette-lite/blob/main/README.md). diff --git a/datasette/__init__.py b/datasette/__init__.py index 0e59760a..47d2b4f6 100644 --- a/datasette/__init__.py +++ b/datasette/__init__.py @@ -1,3 +1,8 @@ +from datasette.permissions import Permission # noqa from datasette.version import __version_info__, __version__ # noqa +from datasette.events import Event # noqa +from datasette.utils.asgi import Forbidden, NotFound, Request, Response # noqa +from datasette.utils import actor_matches_allow # noqa +from datasette.views import Context # noqa from .hookspecs import hookimpl # noqa from .hookspecs import hookspec # noqa diff --git a/datasette/actor_auth_cookie.py b/datasette/actor_auth_cookie.py index 15ecd331..368213af 100644 --- a/datasette/actor_auth_cookie.py +++ b/datasette/actor_auth_cookie.py @@ -1,6 +1,6 @@ from datasette import hookimpl from itsdangerous import BadSignature -import baseconv +from datasette.utils import baseconv import time diff --git a/datasette/app.py b/datasette/app.py index 9bc84df0..b9955925 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,79 +1,123 @@ +from __future__ import annotations + +from asgi_csrf import Errors import asyncio +import contextvars +from typing import TYPE_CHECKING, Any, Dict, Iterable, List + +if TYPE_CHECKING: + from datasette.permissions import AllowedResource, Resource import asgi_csrf import collections +import dataclasses import datetime +import functools import glob import hashlib import httpx +import importlib.metadata import inspect from itsdangerous import BadSignature import json import os -import pkg_resources import re import secrets import sys import threading -import traceback +import time +import types import urllib.parse from concurrent import futures from pathlib import Path -from markupsafe import Markup +from markupsafe import Markup, escape from itsdangerous import URLSafeSerializer -import jinja2 -from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader, escape +from jinja2 import ( + ChoiceLoader, + Environment, + FileSystemLoader, + PrefixLoader, +) from jinja2.environment import Template from jinja2.exceptions import TemplateNotFound -import uvicorn -from .views.base import DatasetteError, ureg -from .views.database import DatabaseDownload, DatabaseView +from .events import Event +from .views import Context +from .views.database import database_download, DatabaseView, TableCreateView, QueryView from .views.index import IndexView from .views.special import ( JsonDataView, PatternPortfolioView, AuthTokenView, + ApiExplorerView, + CreateTokenView, LogoutView, AllowDebugView, PermissionsDebugView, MessagesDebugView, + AllowedResourcesView, + PermissionRulesView, + PermissionCheckView, + TablesView, + InstanceSchemaView, + DatabaseSchemaView, + TableSchemaView, ) -from .views.table import RowView, TableView +from .views.table import ( + TableInsertView, + TableUpsertView, + TableDropView, + table_view, +) +from .views.row import RowView, RowDeleteView, RowUpdateView from .renderer import json_renderer from .url_builder import Urls from .database import Database, QueryInterrupted from .utils import ( + PaginatedResources, PrefixedUrlString, + SPATIALITE_FUNCTIONS, StartupError, async_call_with_supported_arguments, await_me_maybe, + baseconv, call_with_supported_arguments, + detect_json1, display_actor, escape_css_string, escape_sqlite, find_spatialite, format_bytes, module_from_path, + move_plugins_and_allow, + move_table_config, parse_metadata, resolve_env_secrets, + resolve_routes, + tilde_decode, + tilde_encode, to_css_class, - HASH_LENGTH, + urlsafe_components, + redact_keys, + row_sql_params_pks, ) from .utils.asgi import ( AsgiLifespan, - Base400, Forbidden, NotFound, + DatabaseNotFound, + TableNotFound, + RowNotFound, Request, Response, + AsgiRunOnFirstRequest, asgi_static, asgi_send, - asgi_send_html, - asgi_send_json, + asgi_send_file, asgi_send_redirect, ) +from .utils.internal_db import init_internal_db, populate_schema_tables from .utils.sqlite import ( sqlite3, using_pysqlite3, @@ -82,9 +126,43 @@ from .tracer import AsgiTracer from .plugins import pm, DEFAULT_PLUGINS, get_plugins from .version import __version__ +from .resources import DatabaseResource, TableResource + app_root = Path(__file__).parent.parent -MEMORY = object() + +# Context variable to track when code is executing within a datasette.client request +_in_datasette_client = contextvars.ContextVar("in_datasette_client", default=False) + + +class _DatasetteClientContext: + """Context manager to mark code as executing within a datasette.client request.""" + + def __enter__(self): + self.token = _in_datasette_client.set(True) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + _in_datasette_client.reset(self.token) + return False + + +@dataclasses.dataclass +class PermissionCheck: + """Represents a logged permission check for debugging purposes.""" + + when: str + actor: Dict[str, Any] | None + action: str + parent: str | None + child: str | None + result: bool + + +# https://github.com/simonw/datasette/issues/283#issuecomment-781591015 +SQLITE_LIMIT_ATTACHED = 10 + +INTERNAL_DB_NAME = "__INTERNAL__" Setting = collections.namedtuple("Setting", ("name", "default", "help")) SETTINGS = ( @@ -94,6 +172,11 @@ SETTINGS = ( 1000, "Maximum rows that can be returned from a table or custom query", ), + Setting( + "max_insert_rows", + 100, + "Maximum rows that can be inserted at a time using the bulk insert API", + ), Setting( "num_sql_threads", 3, @@ -109,11 +192,6 @@ SETTINGS = ( 50, "Time limit for calculating a suggested facet", ), - Setting( - "hash_urls", - False, - "Include DB file contents hash in URLs, for far-future caching", - ), Setting( "allow_facet", True, @@ -124,17 +202,27 @@ SETTINGS = ( True, "Allow users to download the original SQLite database files", ), + Setting( + "allow_signed_tokens", + True, + "Allow users to create and use signed API tokens", + ), + Setting( + "default_allow_sql", + True, + "Allow anyone to run arbitrary SQL queries", + ), + Setting( + "max_signed_tokens_ttl", + 0, + "Maximum allowed expiry time for signed API tokens", + ), Setting("suggest_facets", True, "Calculate and display suggested facets"), Setting( "default_cache_ttl", 5, "Default HTTP cache TTL (used in Cache-Control: max-age= header)", ), - Setting( - "default_cache_ttl_hashed", - 365 * 24 * 60 * 60, - "Default HTTP cache TTL for hashed URL pages", - ), Setting("cache_size_kb", 0, "SQLite cache size in KB (0 == use SQLite default)"), Setting( "allow_csv_stream", @@ -161,14 +249,48 @@ SETTINGS = ( False, "Allow display of template debug information with ?_context=1", ), + Setting( + "trace_debug", + False, + "Allow display of SQL trace debug information with ?_trace=1", + ), Setting("base_url", "/", "Datasette URLs should use this base path"), ) - +_HASH_URLS_REMOVED = "The hash_urls setting has been removed, try the datasette-hashed-urls plugin instead" +OBSOLETE_SETTINGS = { + "hash_urls": _HASH_URLS_REMOVED, + "default_cache_ttl_hashed": _HASH_URLS_REMOVED, +} DEFAULT_SETTINGS = {option.name: option.default for option in SETTINGS} +FAVICON_PATH = app_root / "datasette" / "static" / "favicon.png" + +DEFAULT_NOT_SET = object() + + +ResourcesSQL = collections.namedtuple("ResourcesSQL", ("sql", "params")) + async def favicon(request, send): - await asgi_send(send, "", 200) + await asgi_send_file( + send, + str(FAVICON_PATH), + content_type="image/png", + headers={"Cache-Control": "max-age=3600, immutable, public"}, + ) + + +ResolvedTable = collections.namedtuple("ResolvedTable", ("db", "table", "is_view")) +ResolvedRow = collections.namedtuple( + "ResolvedRow", ("db", "table", "sql", "params", "pks", "pk_values", "row") +) + + +def _to_string(value): + if isinstance(value, str): + return value + else: + return json.dumps(value, default=str) class Datasette: @@ -179,62 +301,90 @@ class Datasette: def __init__( self, - files, + files=None, immutables=None, cache_headers=True, cors=False, inspect_data=None, + config=None, metadata=None, sqlite_extensions=None, template_dir=None, plugins_dir=None, static_mounts=None, memory=False, - config=None, + settings=None, secret=None, version_note=None, config_dir=None, pdb=False, + crossdb=False, + nolock=False, + internal=None, + default_deny=False, ): + self._startup_invoked = False assert config_dir is None or isinstance( config_dir, Path ), "config_dir= should be a pathlib.Path" + self.config_dir = config_dir self.pdb = pdb self._secret = secret or secrets.token_hex(32) - self.files = tuple(files) + tuple(immutables or []) + if files is not None and isinstance(files, str): + raise ValueError("files= must be a list of paths, not a string") + self.files = tuple(files or []) + tuple(immutables or []) if config_dir: - self.files += tuple([str(p) for p in config_dir.glob("*.db")]) + db_files = [] + for ext in ("db", "sqlite", "sqlite3"): + db_files.extend(config_dir.glob("*.{}".format(ext))) + self.files += tuple(str(f) for f in db_files) if ( config_dir and (config_dir / "inspect-data.json").exists() and not inspect_data ): - inspect_data = json.load((config_dir / "inspect-data.json").open()) - if immutables is None: + inspect_data = json.loads((config_dir / "inspect-data.json").read_text()) + if not immutables: immutable_filenames = [i["file"] for i in inspect_data.values()] immutables = [ f for f in self.files if Path(f).name in immutable_filenames ] self.inspect_data = inspect_data self.immutables = set(immutables or []) - if not self.files: - self.files = [MEMORY] - elif memory: - self.files = (MEMORY,) + self.files self.databases = collections.OrderedDict() + self.actions = {} # .invoke_startup() will populate this + try: + self._refresh_schemas_lock = asyncio.Lock() + except RuntimeError as rex: + # Workaround for intermittent test failure, see: + # https://github.com/simonw/datasette/issues/1802 + if "There is no current event loop in thread" in str(rex): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + self._refresh_schemas_lock = asyncio.Lock() + else: + raise + self.crossdb = crossdb + self.nolock = nolock + if memory or crossdb or not self.files: + self.add_database( + Database(self, is_mutable=False, is_memory=True), name="_memory" + ) for file in self.files: - path = file - is_memory = False - if file is MEMORY: - path = None - is_memory = True - is_mutable = path not in self.immutables - db = Database(self, path, is_mutable=is_mutable, is_memory=is_memory) - if db.name in self.databases: - raise Exception(f"Multiple files with same stem: {db.name}") - self.add_database(db.name, db) + self.add_database( + Database(self, file, is_mutable=file not in self.immutables) + ) + + self.internal_db_created = False + if internal is None: + self._internal_database = Database(self, memory_name=secrets.token_hex()) + else: + self._internal_database = Database(self, path=internal, mode="rwc") + self._internal_database.name = INTERNAL_DB_NAME + self.cache_headers = cache_headers self.cors = cors + config_files = [] metadata_files = [] if config_dir: metadata_files = [ @@ -242,11 +392,27 @@ class Datasette: for filename in ("metadata.json", "metadata.yaml", "metadata.yml") if (config_dir / filename).exists() ] + config_files = [ + config_dir / filename + for filename in ("datasette.json", "datasette.yaml", "datasette.yml") + if (config_dir / filename).exists() + ] if config_dir and metadata_files and not metadata: with metadata_files[0].open() as fp: metadata = parse_metadata(fp.read()) - self._metadata = metadata or {} - self.sqlite_functions = [] + + if config_dir and config_files and not config: + with config_files[0].open() as fp: + config = parse_metadata(fp.read()) + + # Move any "plugins" and "allow" settings from metadata to config - updates them in place + metadata = metadata or {} + config = config or {} + metadata, config = move_plugins_and_allow(metadata, config) + # Now migrate any known table configuration settings over as well + metadata, config = move_table_config(metadata, config) + + self._metadata_local = metadata or {} self.sqlite_extensions = [] for extension in sqlite_extensions or []: # Resolve spatialite, if requested @@ -264,16 +430,54 @@ class Datasette: if config_dir and (config_dir / "static").is_dir() and not static_mounts: static_mounts = [("static", str((config_dir / "static").resolve()))] self.static_mounts = static_mounts or [] - if config_dir and (config_dir / "config.json").exists(): - raise StartupError("config.json should be renamed to settings.json") - if config_dir and (config_dir / "settings.json").exists() and not config: - config = json.load((config_dir / "settings.json").open()) - self._settings = dict(DEFAULT_SETTINGS, **(config or {})) + if config_dir and (config_dir / "datasette.json").exists() and not config: + config = json.loads((config_dir / "datasette.json").read_text()) + + config = config or {} + config_settings = config.get("settings") or {} + + # Validate settings from config file + for key, value in config_settings.items(): + if key not in DEFAULT_SETTINGS: + raise StartupError(f"Invalid setting '{key}' in config file") + # Validate type matches expected type from DEFAULT_SETTINGS + if value is not None: # Allow None/null values + expected_type = type(DEFAULT_SETTINGS[key]) + actual_type = type(value) + if actual_type != expected_type: + raise StartupError( + f"Setting '{key}' in config file has incorrect type. " + f"Expected {expected_type.__name__}, got {actual_type.__name__}. " + f"Value: {value!r}. " + f"Hint: In YAML/JSON config files, remove quotes from boolean and integer values." + ) + + # Validate settings from constructor parameter + if settings: + for key, value in settings.items(): + if key not in DEFAULT_SETTINGS: + raise StartupError(f"Invalid setting '{key}' in settings parameter") + if value is not None: + expected_type = type(DEFAULT_SETTINGS[key]) + actual_type = type(value) + if actual_type != expected_type: + raise StartupError( + f"Setting '{key}' in settings parameter has incorrect type. " + f"Expected {expected_type.__name__}, got {actual_type.__name__}. " + f"Value: {value!r}" + ) + + self.config = config + # CLI settings should overwrite datasette.json settings + self._settings = dict(DEFAULT_SETTINGS, **(config_settings), **(settings or {})) self.renderers = {} # File extension -> (renderer, can_render) functions self.version_note = version_note - self.executor = futures.ThreadPoolExecutor( - max_workers=self.setting("num_sql_threads") - ) + if self.setting("num_sql_threads") == 0: + self.executor = None + else: + self.executor = futures.ThreadPoolExecutor( + max_workers=self.setting("num_sql_threads") + ) self.max_returned_rows = self.setting("max_returned_rows") self.sql_time_limit_ms = self.setting("sql_time_limit_ms") self.page_size = self.setting("default_page_size") @@ -311,28 +515,187 @@ class Datasette: ), ] ) - self.jinja_env = Environment( - loader=template_loader, autoescape=True, enable_async=True + environment = Environment( + loader=template_loader, + autoescape=True, + enable_async=True, + # undefined=StrictUndefined, ) - self.jinja_env.filters["escape_css_string"] = escape_css_string - self.jinja_env.filters["quote_plus"] = lambda u: urllib.parse.quote_plus(u) - self.jinja_env.filters["escape_sqlite"] = escape_sqlite - self.jinja_env.filters["to_css_class"] = to_css_class - # pylint: disable=no-member - pm.hook.prepare_jinja2_environment(env=self.jinja_env) - + environment.filters["escape_css_string"] = escape_css_string + environment.filters["quote_plus"] = urllib.parse.quote_plus + self._jinja_env = environment + environment.filters["escape_sqlite"] = escape_sqlite + environment.filters["to_css_class"] = to_css_class self._register_renderers() self._permission_checks = collections.deque(maxlen=200) self._root_token = secrets.token_hex(32) + self.root_enabled = False + self.default_deny = default_deny self.client = DatasetteClient(self) + async def apply_metadata_json(self): + # Apply any metadata entries from metadata.json to the internal tables + # step 1: top-level metadata + for key in self._metadata_local or {}: + if key == "databases": + continue + value = self._metadata_local[key] + await self.set_instance_metadata(key, _to_string(value)) + + # step 2: database-level metadata + for dbname, db in self._metadata_local.get("databases", {}).items(): + for key, value in db.items(): + if key in ("tables", "queries"): + continue + await self.set_database_metadata(dbname, key, _to_string(value)) + + # step 3: table-level metadata + for tablename, table in db.get("tables", {}).items(): + for key, value in table.items(): + if key == "columns": + continue + await self.set_resource_metadata( + dbname, tablename, key, _to_string(value) + ) + + # step 4: column-level metadata (only descriptions in metadata.json) + for columnname, column_description in table.get("columns", {}).items(): + await self.set_column_metadata( + dbname, tablename, columnname, "description", column_description + ) + + # TODO(alex) is metadata.json was loaded in, and --internal is not memory, then log + # a warning to user that they should delete their metadata.json file + + def get_jinja_environment(self, request: Request = None) -> Environment: + environment = self._jinja_env + if request: + for environment in pm.hook.jinja2_environment_from_request( + datasette=self, request=request, env=environment + ): + pass + return environment + + def get_action(self, name_or_abbr: str): + """ + Returns an Action object for the given name or abbreviation. Returns None if not found. + """ + if name_or_abbr in self.actions: + return self.actions[name_or_abbr] + # Try abbreviation + for action in self.actions.values(): + if action.abbr == name_or_abbr: + return action + return None + + async def refresh_schemas(self): + if self._refresh_schemas_lock.locked(): + return + async with self._refresh_schemas_lock: + await self._refresh_schemas() + + async def _refresh_schemas(self): + internal_db = self.get_internal_database() + if not self.internal_db_created: + await init_internal_db(internal_db) + await self.apply_metadata_json() + self.internal_db_created = True + current_schema_versions = { + row["database_name"]: row["schema_version"] + for row in await internal_db.execute( + "select database_name, schema_version from catalog_databases" + ) + } + # Delete stale entries for databases that are no longer attached + stale_databases = set(current_schema_versions.keys()) - set( + self.databases.keys() + ) + for stale_db_name in stale_databases: + await internal_db.execute_write( + "DELETE FROM catalog_databases WHERE database_name = ?", + [stale_db_name], + ) + for database_name, db in self.databases.items(): + schema_version = (await db.execute("PRAGMA schema_version")).first()[0] + # Compare schema versions to see if we should skip it + if schema_version == current_schema_versions.get(database_name): + continue + placeholders = "(?, ?, ?, ?)" + values = [database_name, str(db.path), db.is_memory, schema_version] + if db.path is None: + placeholders = "(?, null, ?, ?)" + values = [database_name, db.is_memory, schema_version] + await internal_db.execute_write( + """ + INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version) + VALUES {} + """.format( + placeholders + ), + values, + ) + await populate_schema_tables(internal_db, db) + @property def urls(self): return Urls(self) + @property + def pm(self): + """ + Return the global plugin manager instance. + + This provides access to the pluggy PluginManager that manages all + Datasette plugins and hooks. Use datasette.pm.hook.hook_name() to + call plugin hooks. + """ + return pm + async def invoke_startup(self): + # This must be called for Datasette to be in a usable state + if self._startup_invoked: + return + # Register event classes + event_classes = [] + for hook in pm.hook.register_events(datasette=self): + extra_classes = await await_me_maybe(hook) + if extra_classes: + event_classes.extend(extra_classes) + self.event_classes = tuple(event_classes) + + # Register actions, but watch out for duplicate name/abbr + action_names = {} + action_abbrs = {} + for hook in pm.hook.register_actions(datasette=self): + if hook: + for action in hook: + if ( + action.name in action_names + and action != action_names[action.name] + ): + raise StartupError( + "Duplicate action name: {}".format(action.name) + ) + if ( + action.abbr + and action.abbr in action_abbrs + and action != action_abbrs[action.abbr] + ): + raise StartupError( + "Duplicate action abbr: {}".format(action.abbr) + ) + action_names[action.name] = action + if action.abbr: + action_abbrs[action.abbr] = action + self.actions[action.name] = action + + for hook in pm.hook.prepare_jinja2_environment( + env=self._jinja_env, datasette=self + ): + await await_me_maybe(hook) for hook in pm.hook.startup(datasette=self): await await_me_maybe(hook) + self._startup_invoked = True def sign(self, value, namespace="default"): return URLSafeSerializer(self._secret, namespace).dumps(value) @@ -340,80 +703,272 @@ class Datasette: def unsign(self, signed, namespace="default"): return URLSafeSerializer(self._secret, namespace).loads(signed) - def get_database(self, name=None): + def in_client(self) -> bool: + """Check if the current code is executing within a datasette.client request. + + Returns: + bool: True if currently executing within a datasette.client request, False otherwise. + """ + return _in_datasette_client.get() + + def create_token( + self, + actor_id: str, + *, + expires_after: int | None = None, + restrict_all: Iterable[str] | None = None, + restrict_database: Dict[str, Iterable[str]] | None = None, + restrict_resource: Dict[str, Dict[str, Iterable[str]]] | None = None, + ): + token = {"a": actor_id, "t": int(time.time())} + if expires_after: + token["d"] = expires_after + + def abbreviate_action(action): + # rename to abbr if possible + action_obj = self.actions.get(action) + if not action_obj: + return action + return action_obj.abbr or action + + if expires_after: + token["d"] = expires_after + if restrict_all or restrict_database or restrict_resource: + token["_r"] = {} + if restrict_all: + token["_r"]["a"] = [abbreviate_action(a) for a in restrict_all] + if restrict_database: + token["_r"]["d"] = {} + for database, actions in restrict_database.items(): + token["_r"]["d"][database] = [abbreviate_action(a) for a in actions] + if restrict_resource: + token["_r"]["r"] = {} + for database, resources in restrict_resource.items(): + for resource, actions in resources.items(): + token["_r"]["r"].setdefault(database, {})[resource] = [ + abbreviate_action(a) for a in actions + ] + return "dstok_{}".format(self.sign(token, namespace="token")) + + def get_database(self, name=None, route=None): + if route is not None: + matches = [db for db in self.databases.values() if db.route == route] + if not matches: + raise KeyError + return matches[0] if name is None: - return next(iter(self.databases.values())) + name = [key for key in self.databases.keys()][0] return self.databases[name] - def add_database(self, name, db): - self.databases[name] = db + def add_database(self, db, name=None, route=None): + new_databases = self.databases.copy() + if name is None: + # Pick a unique name for this database + suggestion = db.suggest_name() + name = suggestion + else: + suggestion = name + i = 2 + while name in self.databases: + name = "{}_{}".format(suggestion, i) + i += 1 + db.name = name + db.route = route or name + new_databases[name] = db + # don't mutate! that causes race conditions with live import + self.databases = new_databases + return db + + def add_memory_database(self, memory_name, name=None, route=None): + return self.add_database( + Database(self, memory_name=memory_name), name=name, route=route + ) def remove_database(self, name): - self.databases.pop(name) + self.get_database(name).close() + new_databases = self.databases.copy() + new_databases.pop(name) + self.databases = new_databases def setting(self, key): return self._settings.get(key, None) - def config_dict(self): - # Returns a fully resolved config dictionary, useful for templates + def settings_dict(self): + # Returns a fully resolved settings dictionary, useful for templates return {option.name: self.setting(option.name) for option in SETTINGS} - def metadata(self, key=None, database=None, table=None, fallback=True): - """ - Looks up metadata, cascading backwards from specified level. - Returns None if metadata value is not found. - """ - assert not ( - database is None and table is not None - ), "Cannot call metadata() with table= specified but not database=" - databases = self._metadata.get("databases") or {} - search_list = [] - if database is not None: - search_list.append(databases.get(database) or {}) - if table is not None: - table_metadata = ((databases.get(database) or {}).get("tables") or {}).get( - table - ) or {} - search_list.insert(0, table_metadata) - search_list.append(self._metadata) - if not fallback: - # No fallback allowed, so just use the first one in the list - search_list = search_list[:1] - if key is not None: - for item in search_list: - if key in item: - return item[key] - return None - else: - # Return the merged list - m = {} - for item in search_list: - m.update(item) - return m + def _metadata_recursive_update(self, orig, updated): + if not isinstance(orig, dict) or not isinstance(updated, dict): + return orig + + for key, upd_value in updated.items(): + if isinstance(upd_value, dict) and isinstance(orig.get(key), dict): + orig[key] = self._metadata_recursive_update(orig[key], upd_value) + else: + orig[key] = upd_value + return orig + + async def get_instance_metadata(self): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_instance + """ + ) + return dict(rows) + + async def get_database_metadata(self, database_name: str): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_databases + WHERE database_name = ? + """, + [database_name], + ) + return dict(rows) + + async def get_resource_metadata(self, database_name: str, resource_name: str): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_resources + WHERE database_name = ? + AND resource_name = ? + """, + [database_name, resource_name], + ) + return dict(rows) + + async def get_column_metadata( + self, database_name: str, resource_name: str, column_name: str + ): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_columns + WHERE database_name = ? + AND resource_name = ? + AND column_name = ? + """, + [database_name, resource_name, column_name], + ) + return dict(rows) + + async def set_instance_metadata(self, key: str, value: str): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_instance(key, value) + VALUES(?, ?) + ON CONFLICT(key) DO UPDATE SET value = excluded.value; + """, + [key, value], + ) + + async def set_database_metadata(self, database_name: str, key: str, value: str): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_databases(database_name, key, value) + VALUES(?, ?, ?) + ON CONFLICT(database_name, key) DO UPDATE SET value = excluded.value; + """, + [database_name, key, value], + ) + + async def set_resource_metadata( + self, database_name: str, resource_name: str, key: str, value: str + ): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_resources(database_name, resource_name, key, value) + VALUES(?, ?, ?, ?) + ON CONFLICT(database_name, resource_name, key) DO UPDATE SET value = excluded.value; + """, + [database_name, resource_name, key, value], + ) + + async def set_column_metadata( + self, + database_name: str, + resource_name: str, + column_name: str, + key: str, + value: str, + ): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_columns(database_name, resource_name, column_name, key, value) + VALUES(?, ?, ?, ?, ?) + ON CONFLICT(database_name, resource_name, column_name, key) DO UPDATE SET value = excluded.value; + """, + [database_name, resource_name, column_name, key, value], + ) + + def get_internal_database(self): + return self._internal_database def plugin_config(self, plugin_name, database=None, table=None, fallback=True): - "Return config for plugin, falling back from specified database/table" - plugins = self.metadata( - "plugins", database=database, table=table, fallback=fallback - ) - if plugins is None: - return None - plugin_config = plugins.get(plugin_name) - # Resolve any $file and $env keys - plugin_config = resolve_env_secrets(plugin_config, os.environ) - return plugin_config + """Return config for plugin, falling back from specified database/table""" + if database is None and table is None: + config = self._plugin_config_top(plugin_name) + else: + config = self._plugin_config_nested(plugin_name, database, table, fallback) + + return resolve_env_secrets(config, os.environ) + + def _plugin_config_top(self, plugin_name): + """Returns any top-level plugin configuration for the specified plugin.""" + return ((self.config or {}).get("plugins") or {}).get(plugin_name) + + def _plugin_config_nested(self, plugin_name, database, table=None, fallback=True): + """Returns any database or table-level plugin configuration for the specified plugin.""" + db_config = ((self.config or {}).get("databases") or {}).get(database) + + # if there's no db-level configuration, then return early, falling back to top-level if needed + if not db_config: + return self._plugin_config_top(plugin_name) if fallback else None + + db_plugin_config = (db_config.get("plugins") or {}).get(plugin_name) + + if table: + table_plugin_config = ( + ((db_config.get("tables") or {}).get(table) or {}).get("plugins") or {} + ).get(plugin_name) + + # fallback to db_config or top-level config, in that order, if needed + if table_plugin_config is None and fallback: + return db_plugin_config or self._plugin_config_top(plugin_name) + + return table_plugin_config + + # fallback to top-level if needed + if db_plugin_config is None and fallback: + self._plugin_config_top(plugin_name) + + return db_plugin_config def app_css_hash(self): if not hasattr(self, "_app_css_hash"): - self._app_css_hash = hashlib.sha1( - open(os.path.join(str(app_root), "datasette/static/app.css")) - .read() - .encode("utf8") - ).hexdigest()[:6] + with open(os.path.join(str(app_root), "datasette/static/app.css")) as fp: + self._app_css_hash = hashlib.sha1(fp.read().encode("utf8")).hexdigest()[ + :6 + ] return self._app_css_hash async def get_canned_queries(self, database_name, actor): - queries = self.metadata("queries", database=database_name, fallback=False) or {} + queries = {} for more_queries in pm.hook.canned_queries( datasette=self, database=database_name, @@ -435,33 +990,37 @@ class Datasette: if query: return query - def update_with_inherited_metadata(self, metadata): - # Fills in source/license with defaults, if available - metadata.update( - { - "source": metadata.get("source") or self.metadata("source"), - "source_url": metadata.get("source_url") or self.metadata("source_url"), - "license": metadata.get("license") or self.metadata("license"), - "license_url": metadata.get("license_url") - or self.metadata("license_url"), - "about": metadata.get("about") or self.metadata("about"), - "about_url": metadata.get("about_url") or self.metadata("about_url"), - } - ) - def _prepare_connection(self, conn, database): conn.row_factory = sqlite3.Row conn.text_factory = lambda x: str(x, "utf-8", "replace") - for name, num_args, func in self.sqlite_functions: - conn.create_function(name, num_args, func) - if self.sqlite_extensions: + if self.sqlite_extensions and database != INTERNAL_DB_NAME: conn.enable_load_extension(True) for extension in self.sqlite_extensions: - conn.execute(f"SELECT load_extension('{extension}')") + # "extension" is either a string path to the extension + # or a 2-item tuple that specifies which entrypoint to load. + if isinstance(extension, tuple): + path, entrypoint = extension + conn.execute("SELECT load_extension(?, ?)", [path, entrypoint]) + else: + conn.execute("SELECT load_extension(?)", [extension]) if self.setting("cache_size_kb"): conn.execute(f"PRAGMA cache_size=-{self.setting('cache_size_kb')}") # pylint: disable=no-member - pm.hook.prepare_connection(conn=conn, database=database, datasette=self) + if database != INTERNAL_DB_NAME: + pm.hook.prepare_connection(conn=conn, database=database, datasette=self) + # If self.crossdb and this is _memory, connect the first SQLITE_LIMIT_ATTACHED databases + if self.crossdb and database == "_memory": + count = 0 + for db_name, db in self.databases.items(): + if count >= SQLITE_LIMIT_ATTACHED or db.is_memory: + continue + sql = 'ATTACH DATABASE "file:{path}?{qs}" AS [{name}];'.format( + path=db.path, + qs="mode=ro" if db.is_mutable else "immutable=1", + name=db_name, + ) + conn.execute(sql) + count += 1 def add_message(self, request, message, type=INFO): if not hasattr(request, "_messages"): @@ -485,34 +1044,409 @@ class Datasette: else: return [] - async def permission_allowed(self, actor, action, resource=None, default=False): - "Check permissions using the permissions_allowed plugin hook" - result = None - for check in pm.hook.permission_allowed( + async def _crumb_items(self, request, table=None, database=None): + crumbs = [] + actor = None + if request: + actor = request.actor + # Top-level link + if await self.allowed(action="view-instance", actor=actor): + crumbs.append({"href": self.urls.instance(), "label": "home"}) + # Database link + if database: + if await self.allowed( + action="view-database", + resource=DatabaseResource(database=database), + actor=actor, + ): + crumbs.append( + { + "href": self.urls.database(database), + "label": database, + } + ) + # Table link + if table: + assert database, "table= requires database=" + if await self.allowed( + action="view-table", + resource=TableResource(database=database, table=table), + actor=actor, + ): + crumbs.append( + { + "href": self.urls.table(database, table), + "label": table, + } + ) + return crumbs + + async def actors_from_ids( + self, actor_ids: Iterable[str | int] + ) -> Dict[int | str, Dict]: + result = pm.hook.actors_from_ids(datasette=self, actor_ids=actor_ids) + if result is None: + # Do the default thing + return {actor_id: {"id": actor_id} for actor_id in actor_ids} + result = await await_me_maybe(result) + return result + + async def track_event(self, event: Event): + assert isinstance(event, self.event_classes), "Invalid event type: {}".format( + type(event) + ) + for hook in pm.hook.track_event(datasette=self, event=event): + await await_me_maybe(hook) + + def resource_for_action(self, action: str, parent: str | None, child: str | None): + """ + Create a Resource instance for the given action with parent/child values. + + Looks up the action's resource_class and instantiates it with the + provided parent and child identifiers. + + Args: + action: The action name (e.g., "view-table", "view-query") + parent: The parent resource identifier (e.g., database name) + child: The child resource identifier (e.g., table/query name) + + Returns: + A Resource instance of the appropriate subclass + + Raises: + ValueError: If the action is unknown + """ + from datasette.permissions import Resource + + action_obj = self.actions.get(action) + if not action_obj: + raise ValueError(f"Unknown action: {action}") + + resource_class = action_obj.resource_class + instance = object.__new__(resource_class) + Resource.__init__(instance, parent=parent, child=child) + return instance + + async def check_visibility( + self, + actor: dict, + action: str, + resource: "Resource" | None = None, + ): + """ + Check if actor can see a resource and if it's private. + + Returns (visible, private) tuple: + - visible: bool - can the actor see it? + - private: bool - if visible, can anonymous users NOT see it? + """ + from datasette.permissions import Resource + + # Validate that resource is a Resource object or None + if resource is not None and not isinstance(resource, Resource): + raise TypeError(f"resource must be a Resource subclass instance or None.") + + # Check if actor can see it + if not await self.allowed(action=action, resource=resource, actor=actor): + return False, False + + # Check if anonymous user can see it (for "private" flag) + if not await self.allowed(action=action, resource=resource, actor=None): + # Actor can see it but anonymous cannot - it's private + return True, True + + # Both actor and anonymous can see it - it's public + return True, False + + async def allowed_resources_sql( + self, + *, + action: str, + actor: dict | None = None, + parent: str | None = None, + include_is_private: bool = False, + ) -> ResourcesSQL: + """ + Build SQL query to get all resources the actor can access for the given action. + + Args: + action: The action name (e.g., "view-table") + actor: The actor dict (or None for unauthenticated) + parent: Optional parent filter (e.g., database name) to limit results + include_is_private: If True, include is_private column showing if anonymous cannot access + + Returns a namedtuple of (query: str, params: dict) that can be executed against the internal database. + The query returns rows with (parent, child, reason) columns, plus is_private if requested. + + Example: + query, params = await datasette.allowed_resources_sql( + action="view-table", + actor=actor, + parent="mydb", + include_is_private=True + ) + result = await datasette.get_internal_database().execute(query, params) + """ + from datasette.utils.actions_sql import build_allowed_resources_sql + + action_obj = self.actions.get(action) + if not action_obj: + raise ValueError(f"Unknown action: {action}") + + sql, params = await build_allowed_resources_sql( + self, actor, action, parent=parent, include_is_private=include_is_private + ) + return ResourcesSQL(sql, params) + + async def allowed_resources( + self, + action: str, + actor: dict | None = None, + *, + parent: str | None = None, + include_is_private: bool = False, + include_reasons: bool = False, + limit: int = 100, + next: str | None = None, + ) -> PaginatedResources: + """ + Return paginated resources the actor can access for the given action. + + Uses SQL with keyset pagination to efficiently filter resources. + Returns PaginatedResources with list of Resource instances and pagination metadata. + + Args: + action: The action name (e.g., "view-table") + actor: The actor dict (or None for unauthenticated) + parent: Optional parent filter (e.g., database name) to limit results + include_is_private: If True, adds a .private attribute to each Resource + include_reasons: If True, adds a .reasons attribute with List[str] of permission reasons + limit: Maximum number of results to return (1-1000, default 100) + next: Keyset token from previous page for pagination + + Returns: + PaginatedResources with: + - resources: List of Resource objects for this page + - next: Token for next page (None if no more results) + + Example: + # Get first page of tables + page = await datasette.allowed_resources("view-table", actor, limit=50) + for table in page.resources: + print(f"{table.parent}/{table.child}") + + # Get next page + if page.next: + next_page = await datasette.allowed_resources( + "view-table", actor, limit=50, next=page.next + ) + + # With reasons for debugging + page = await datasette.allowed_resources( + "view-table", actor, include_reasons=True + ) + for table in page.resources: + print(f"{table.child}: {table.reasons}") + + # Iterate through all results with async generator + page = await datasette.allowed_resources("view-table", actor) + async for table in page.all(): + print(table.child) + """ + + action_obj = self.actions.get(action) + if not action_obj: + raise ValueError(f"Unknown action: {action}") + + # Validate and cap limit + limit = min(max(1, limit), 1000) + + # Get base SQL query + query, params = await self.allowed_resources_sql( + action=action, + actor=actor, + parent=parent, + include_is_private=include_is_private, + ) + + # Add keyset pagination WHERE clause if next token provided + if next: + try: + components = urlsafe_components(next) + if len(components) >= 2: + last_parent, last_child = components[0], components[1] + # Keyset condition: (parent > last) OR (parent = last AND child > last) + keyset_where = """ + (parent > :keyset_parent OR + (parent = :keyset_parent AND child > :keyset_child)) + """ + # Wrap original query and add keyset filter + query = f"SELECT * FROM ({query}) WHERE {keyset_where}" + params["keyset_parent"] = last_parent + params["keyset_child"] = last_child + except (ValueError, KeyError): + # Invalid token - ignore and start from beginning + pass + + # Add LIMIT (fetch limit+1 to detect if there are more results) + # Note: query from allowed_resources_sql() already includes ORDER BY parent, child + query = f"{query} LIMIT :limit" + params["limit"] = limit + 1 + + # Execute query + result = await self.get_internal_database().execute(query, params) + rows = list(result.rows) + + # Check if truncated (got more than limit rows) + truncated = len(rows) > limit + if truncated: + rows = rows[:limit] # Remove the extra row + + # Build Resource objects with optional attributes + resources = [] + for row in rows: + # row[0]=parent, row[1]=child, row[2]=reason, row[3]=is_private (if requested) + resource = self.resource_for_action(action, parent=row[0], child=row[1]) + + # Add reasons if requested + if include_reasons: + reason_json = row[2] + try: + reasons_array = ( + json.loads(reason_json) if isinstance(reason_json, str) else [] + ) + resource.reasons = [r for r in reasons_array if r is not None] + except (json.JSONDecodeError, TypeError): + resource.reasons = [reason_json] if reason_json else [] + + # Add private flag if requested + if include_is_private: + resource.private = bool(row[3]) + + resources.append(resource) + + # Generate next token if there are more results + next_token = None + if truncated and resources: + last_resource = resources[-1] + # Use tilde-encoding like table pagination + next_token = "{},{}".format( + tilde_encode(str(last_resource.parent)), + tilde_encode(str(last_resource.child)), + ) + + return PaginatedResources( + resources=resources, + next=next_token, + _datasette=self, + _action=action, + _actor=actor, + _parent=parent, + _include_is_private=include_is_private, + _include_reasons=include_reasons, + _limit=limit, + ) + + async def allowed( + self, + *, + action: str, + resource: "Resource" = None, + actor: dict | None = None, + ) -> bool: + """ + Check if actor can perform action on specific resource. + + Uses SQL to check permission for a single resource without fetching all resources. + This is efficient - it does NOT call allowed_resources() and check membership. + + For global actions, resource should be None (or omitted). + + Example: + from datasette.resources import TableResource + can_view = await datasette.allowed( + action="view-table", + resource=TableResource(database="analytics", table="users"), + actor=actor + ) + + # For global actions, resource can be omitted: + can_debug = await datasette.allowed(action="permissions-debug", actor=actor) + """ + from datasette.utils.actions_sql import check_permission_for_resource + + # For global actions, resource remains None + + # Check if this action has also_requires - if so, check that action first + action_obj = self.actions.get(action) + if action_obj and action_obj.also_requires: + # Must have the required action first + if not await self.allowed( + action=action_obj.also_requires, + resource=resource, + actor=actor, + ): + return False + + # For global actions, resource is None + parent = resource.parent if resource else None + child = resource.child if resource else None + + result = await check_permission_for_resource( datasette=self, actor=actor, action=action, - resource=resource, - ): - check = await await_me_maybe(check) - if check is not None: - result = check - used_default = False - if result is None: - result = default - used_default = True - self._permission_checks.append( - { - "when": datetime.datetime.utcnow().isoformat(), - "actor": actor, - "action": action, - "resource": resource, - "used_default": used_default, - "result": result, - } + parent=parent, + child=child, ) + + # Log the permission check for debugging + self._permission_checks.append( + PermissionCheck( + when=datetime.datetime.now(datetime.timezone.utc).isoformat(), + actor=actor, + action=action, + parent=parent, + child=child, + result=result, + ) + ) + return result + async def ensure_permission( + self, + *, + action: str, + resource: "Resource" = None, + actor: dict | None = None, + ): + """ + Check if actor can perform action on resource, raising Forbidden if not. + + This is a convenience wrapper around allowed() that raises Forbidden + instead of returning False. Use this when you want to enforce a permission + check and halt execution if it fails. + + Example: + from datasette.resources import TableResource + + # Will raise Forbidden if actor cannot view the table + await datasette.ensure_permission( + action="view-table", + resource=TableResource(database="analytics", table="users"), + actor=request.actor + ) + + # For instance-level actions, resource can be omitted: + await datasette.ensure_permission( + action="permissions-debug", + actor=request.actor + ) + """ + if not await self.allowed(action=action, resource=resource, actor=actor): + raise Forbidden(action) + async def execute( self, db_name, @@ -532,8 +1466,8 @@ class Datasette: log_sql_errors=log_sql_errors, ) - async def expand_foreign_keys(self, database, table, column, values): - "Returns dict mapping (column, value) -> label" + async def expand_foreign_keys(self, actor, database, table, column, values): + """Returns dict mapping (column, value) -> label""" labeled_fks = {} db = self.databases[database] foreign_keys = await db.foreign_keys_for_table(table) @@ -546,7 +1480,19 @@ class Datasette: ][0] except IndexError: return {} - label_column = await db.label_column_for_table(fk["other_table"]) + # Ensure user has permission to view the referenced table + from datasette.resources import TableResource + + other_table = fk["other_table"] + other_column = fk["other_column"] + visible, _ = await self.check_visibility( + actor, + action="view-table", + resource=TableResource(database=database, table=other_table), + ) + if not visible: + return {} + label_column = await db.label_column_for_table(other_table) if not label_column: return {(fk["column"], value): str(value) for value in values} labeled_fks = {} @@ -555,9 +1501,9 @@ class Datasette: from {other_table} where {other_column} in ({placeholders}) """.format( - other_column=escape_sqlite(fk["other_column"]), + other_column=escape_sqlite(other_column), label_column=escape_sqlite(label_column), - other_table=escape_sqlite(fk["other_table"]), + other_table=escape_sqlite(other_table), placeholders=", ".join(["?"] * len(set(values))), ) try: @@ -575,31 +1521,26 @@ class Datasette: url = "https://" + url[len("http://") :] return url - def _register_custom_units(self): - "Register any custom units defined in the metadata.json with Pint" - for unit in self.metadata("custom_units") or []: - ureg.define(unit) - def _connected_databases(self): return [ { "name": d.name, + "route": d.route, "path": d.path, "size": d.size, "is_mutable": d.is_mutable, "is_memory": d.is_memory, "hash": d.hash, } - for d in sorted(self.databases.values(), key=lambda d: d.name) + for name, d in self.databases.items() ] def _versions(self): conn = sqlite3.connect(":memory:") - self._prepare_connection(conn, ":memory:") + self._prepare_connection(conn, "_memory") sqlite_version = conn.execute("select sqlite_version()").fetchone()[0] - sqlite_extensions = {} + sqlite_extensions = {"json1": detect_json1(conn)} for extension, testsql, hasversion in ( - ("json1", "SELECT json('{}')", False), ("spatialite", "SELECT spatialite_version()", True), ): try: @@ -610,6 +1551,17 @@ class Datasette: sqlite_extensions[extension] = None except Exception: pass + # More details on SpatiaLite + if "spatialite" in sqlite_extensions: + spatialite_details = {} + for fn in SPATIALITE_FUNCTIONS: + try: + result = conn.execute("select {}()".format(fn)) + spatialite_details[fn] = result.fetchone()[0] + except Exception as e: + spatialite_details[fn] = {"error": str(e)} + sqlite_extensions["spatialite"] = spatialite_details + # Figure out supported FTS versions fts_versions = [] for fts in ("FTS5", "FTS4", "FTS3"): @@ -623,6 +1575,15 @@ class Datasette: datasette_version = {"version": __version__} if self.version_note: datasette_version["note"] = self.version_note + + try: + # Optional import to avoid breaking Pyodide + # https://github.com/simonw/datasette/issues/1733#issuecomment-1115268245 + import uvicorn + + uvicorn_version = uvicorn.__version__ + except ImportError: + uvicorn_version = None info = { "python": { "version": ".".join(map(str, sys.version_info[:3])), @@ -630,7 +1591,7 @@ class Datasette: }, "datasette": datasette_version, "asgi": "3.0", - "uvicorn": uvicorn.__version__, + "uvicorn": uvicorn_version, "sqlite": { "version": sqlite_version, "fts_versions": fts_versions, @@ -643,9 +1604,9 @@ class Datasette: if using_pysqlite3: for package in ("pysqlite3", "pysqlite3-binary"): try: - info["pysqlite3"] = pkg_resources.get_distribution(package).version + info["pysqlite3"] = importlib.metadata.version(package) break - except pkg_resources.DistributionNotFound: + except importlib.metadata.PackageNotFoundError: pass return info @@ -658,18 +1619,21 @@ class Datasette: should_show_all = all if not should_show_all: ps = [p for p in ps if p["name"] not in DEFAULT_PLUGINS] + ps.sort(key=lambda p: p["name"]) return [ { "name": p["name"], "static": p["static_path"] is not None, "templates": p["templates_path"] is not None, "version": p.get("version"), - "hooks": p["hooks"], + "hooks": list(sorted(set(p["hooks"]))), } for p in ps ] def _threads(self): + if self.setting("num_sql_threads") == 0: + return {"num_threads": 0, "threads": []} threads = list(threading.enumerate()) d = { "num_threads": len(threads), @@ -677,31 +1641,46 @@ class Datasette: {"name": t.name, "ident": t.ident, "daemon": t.daemon} for t in threads ], } - # Only available in Python 3.7+ - if hasattr(asyncio, "all_tasks"): - tasks = asyncio.all_tasks() - d.update( - { - "num_tasks": len(tasks), - "tasks": [_cleaner_task_str(t) for t in tasks], - } - ) + tasks = asyncio.all_tasks() + d.update( + { + "num_tasks": len(tasks), + "tasks": [_cleaner_task_str(t) for t in tasks], + } + ) return d def _actor(self, request): return {"actor": request.actor} - def table_metadata(self, database, table): - "Fetch table-specific metadata." + def _actions(self): + return [ + { + "name": action.name, + "abbr": action.abbr, + "description": action.description, + "takes_parent": action.takes_parent, + "takes_child": action.takes_child, + "resource_class": ( + action.resource_class.__name__ if action.resource_class else None + ), + "also_requires": action.also_requires, + } + for action in sorted(self.actions.values(), key=lambda a: a.name) + ] + + async def table_config(self, database: str, table: str) -> dict: + """Return dictionary of configuration for specified table""" return ( - (self.metadata("databases") or {}) + (self.config or {}) + .get("databases", {}) .get(database, {}) .get("tables", {}) .get(table, {}) ) def _register_renderers(self): - """ Register output renderers which output data in custom formats. """ + """Register output renderers which output data in custom formats.""" # Built-in renderers self.renderers["json"] = (json_renderer, lambda: True) @@ -709,7 +1688,7 @@ class Datasette: hook_renderers = [] # pylint: disable=no-member for hook in pm.hook.register_output_renderer(datasette=self): - if type(hook) == list: + if type(hook) is list: hook_renderers += hook else: hook_renderers.append(hook) @@ -722,15 +1701,23 @@ class Datasette: ) async def render_template( - self, templates, context=None, request=None, view_name=None + self, + templates: List[str] | str | Template, + context: Dict[str, Any] | Context | None = None, + request: Request | None = None, + view_name: str | None = None, ): + if not self._startup_invoked: + raise Exception("render_template() called before await ds.invoke_startup()") context = context or {} if isinstance(templates, Template): template = templates else: if isinstance(templates, str): templates = [templates] - template = self.jinja_env.select_template(templates) + template = self.get_jinja_environment(request).select_template(templates) + if dataclasses.is_dataclass(context): + context = dataclasses.asdict(context) body_scripts = [] # pylint: disable=no-member for extra_script in pm.hook.extra_body_script( @@ -743,7 +1730,13 @@ class Datasette: datasette=self, ): extra_script = await await_me_maybe(extra_script) - body_scripts.append(Markup(extra_script)) + if isinstance(extra_script, dict): + script = extra_script["script"] + module = bool(extra_script.get("module")) + else: + script = extra_script + module = False + body_scripts.append({"script": Markup(script), "module": module}) extra_template_vars = {} # pylint: disable=no-member @@ -765,7 +1758,9 @@ class Datasette: async def menu_links(): links = [] for hook in pm.hook.menu_links( - datasette=self, actor=request.actor if request else None + datasette=self, + actor=request.actor if request else None, + request=request or None, ): extra_links = await await_me_maybe(hook) if extra_links: @@ -775,6 +1770,8 @@ class Datasette: template_context = { **context, **{ + "request": request, + "crumb_items": self._crumb_items, "urls": self.urls, "actor": request.actor if request else None, "menu_links": menu_links, @@ -795,16 +1792,29 @@ class Datasette: ), "base_url": self.setting("base_url"), "csrftoken": request.scope["csrftoken"] if request else lambda: "", + "datasette_version": __version__, }, **extra_template_vars, } if request and request.args.get("_context") and self.setting("template_debug"): return "
{}
".format( - jinja2.escape(json.dumps(template_context, default=repr, indent=4)) + escape(json.dumps(template_context, default=repr, indent=4)) ) return await template.render_async(template_context) + def set_actor_cookie( + self, response: Response, actor: dict, expire_after: int | None = None + ): + data = {"a": actor} + if expire_after: + expires_at = int(time.time()) + (24 * 60 * 60) + data["e"] = baseconv.base62.encode(expires_at) + response.set_cookie("ds_actor", self.sign(data, "actor")) + + def delete_actor_cookie(self, response: Response): + response.set_cookie("ds_actor", "", expires=0, max_age=0) + async def _asset_urls(self, key, template, context, request, view_name): # Flatten list-of-lists from plugins: seen_urls = set() @@ -820,42 +1830,49 @@ class Datasette: ): hook = await await_me_maybe(hook) collected.extend(hook) - collected.extend(self.metadata(key) or []) + collected.extend((self.config or {}).get(key) or []) output = [] for url_or_dict in collected: if isinstance(url_or_dict, dict): url = url_or_dict["url"] sri = url_or_dict.get("sri") + module = bool(url_or_dict.get("module")) else: url = url_or_dict sri = None + module = False if url in seen_urls: continue seen_urls.add(url) if url.startswith("/"): # Take base_url into account: url = self.urls.path(url) + script = {"url": url} if sri: - output.append({"url": url, "sri": sri}) - else: - output.append({"url": url}) + script["sri"] = sri + if module: + script["module"] = True + output.append(script) return output - def app(self): - "Returns an ASGI app function that serves the whole of Datasette" + def _config(self): + return redact_keys( + self.config, ("secret", "key", "password", "token", "hash", "dsn") + ) + + def _routes(self): routes = [] - for routes_to_add in pm.hook.register_routes(): + for routes_to_add in pm.hook.register_routes(datasette=self): for regex, view_fn in routes_to_add: routes.append((regex, wrap_view(view_fn, self))) def add_route(view, regex): routes.append((regex, view)) - # Generate a regex snippet to match all registered renderer file extensions - renderer_regex = "|".join(r"\." + key for key in self.renderers.keys()) - - add_route(IndexView.as_view(self), r"/(?P(\.jsono?)?$)") + add_route(IndexView.as_view(self), r"/(\.(?Pjsono?))?$") + add_route(IndexView.as_view(self), r"/-/(\.(?Pjsono?))?$") + add_route(permanent_redirect("/-/"), r"/-$") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires add_route(favicon, "/favicon.ico") @@ -880,47 +1897,73 @@ class Datasette: ), ) add_route( - JsonDataView.as_view(self, "metadata.json", lambda: self._metadata), - r"/-/metadata(?P(\.json)?)$", + permanent_redirect( + "/_memory", forward_query_string=True, forward_rest=True + ), + r"/:memory:(?P.*)$", ) add_route( JsonDataView.as_view(self, "versions.json", self._versions), - r"/-/versions(?P(\.json)?)$", + r"/-/versions(\.(?Pjson))?$", ) add_route( JsonDataView.as_view( self, "plugins.json", self._plugins, needs_request=True ), - r"/-/plugins(?P(\.json)?)$", + r"/-/plugins(\.(?Pjson))?$", ) add_route( JsonDataView.as_view(self, "settings.json", lambda: self._settings), - r"/-/settings(?P(\.json)?)$", + r"/-/settings(\.(?Pjson))?$", ) add_route( - permanent_redirect("/-/settings.json"), - r"/-/config.json", - ) - add_route( - permanent_redirect("/-/settings"), - r"/-/config", + JsonDataView.as_view(self, "config.json", lambda: self._config()), + r"/-/config(\.(?Pjson))?$", ) add_route( JsonDataView.as_view(self, "threads.json", self._threads), - r"/-/threads(?P(\.json)?)$", + r"/-/threads(\.(?Pjson))?$", ) add_route( JsonDataView.as_view(self, "databases.json", self._connected_databases), - r"/-/databases(?P(\.json)?)$", + r"/-/databases(\.(?Pjson))?$", ) add_route( - JsonDataView.as_view(self, "actor.json", self._actor, needs_request=True), - r"/-/actor(?P(\.json)?)$", + JsonDataView.as_view( + self, "actor.json", self._actor, needs_request=True, permission=None + ), + r"/-/actor(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view( + self, + "actions.json", + self._actions, + template="debug_actions.html", + permission="permissions-debug", + ), + r"/-/actions(\.(?Pjson))?$", ) add_route( AuthTokenView.as_view(self), r"/-/auth-token$", ) + add_route( + CreateTokenView.as_view(self), + r"/-/create-token$", + ) + add_route( + ApiExplorerView.as_view(self), + r"/-/api$", + ) + add_route( + TablesView.as_view(self), + r"/-/tables(\.(?Pjson))?$", + ) + add_route( + InstanceSchemaView.as_view(self), + r"/-/schema(\.(?Pjson|md))?$", + ) add_route( LogoutView.as_view(self), r"/-/logout$", @@ -929,6 +1972,18 @@ class Datasette: PermissionsDebugView.as_view(self), r"/-/permissions$", ) + add_route( + AllowedResourcesView.as_view(self), + r"/-/allowed(\.(?Pjson))?$", + ) + add_route( + PermissionRulesView.as_view(self), + r"/-/rules(\.(?Pjson))?$", + ) + add_route( + PermissionCheckView.as_view(self), + r"/-/check(\.(?Pjson))?$", + ) add_route( MessagesDebugView.as_view(self), r"/-/messages$", @@ -938,46 +1993,127 @@ class Datasette: r"/-/allow-debug$", ) add_route( - PatternPortfolioView.as_view(self), + wrap_view(PatternPortfolioView, self), r"/-/patterns$", ) add_route( - DatabaseDownload.as_view(self), r"/(?P[^/]+?)(?P\.db)$" + wrap_view(database_download, self), + r"/(?P[^\/\.]+)\.db$", ) add_route( - DatabaseView.as_view(self), - r"/(?P[^/]+?)(?P" - + renderer_regex - + r"|.jsono|\.csv)?$", + wrap_view(DatabaseView, self), + r"/(?P[^\/\.]+)(\.(?P\w+))?$", + ) + add_route(TableCreateView.as_view(self), r"/(?P[^\/\.]+)/-/create$") + add_route( + DatabaseSchemaView.as_view(self), + r"/(?P[^\/\.]+)/-/schema(\.(?Pjson|md))?$", ) add_route( - TableView.as_view(self), - r"/(?P[^/]+)/(?P[^/]+?$)", + wrap_view(QueryView, self), + r"/(?P[^\/\.]+)/-/query(\.(?P\w+))?$", + ) + add_route( + wrap_view(table_view, self), + r"/(?P[^\/\.]+)/(?P[^\/\.]+)(\.(?P\w+))?$", ) add_route( RowView.as_view(self), - r"/(?P[^/]+)/(?P
[^/]+?)/(?P[^/]+?)(?P" - + renderer_regex - + r")?$", + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)(\.(?P\w+))?$", ) - self._register_custom_units() + add_route( + TableInsertView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/insert$", + ) + add_route( + TableUpsertView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/upsert$", + ) + add_route( + TableDropView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/drop$", + ) + add_route( + TableSchemaView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/schema(\.(?Pjson|md))?$", + ) + add_route( + RowDeleteView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/delete$", + ) + add_route( + RowUpdateView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/update$", + ) + return [ + # Compile any strings to regular expressions + ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) + for pattern, view in routes + ] + + async def resolve_database(self, request): + database_route = tilde_decode(request.url_vars["database"]) + try: + return self.get_database(route=database_route) + except KeyError: + raise DatabaseNotFound(database_route) + + async def resolve_table(self, request): + db = await self.resolve_database(request) + table_name = tilde_decode(request.url_vars["table"]) + # Table must exist + is_view = False + table_exists = await db.table_exists(table_name) + if not table_exists: + is_view = await db.view_exists(table_name) + if not (table_exists or is_view): + raise TableNotFound(db.name, table_name) + return ResolvedTable(db, table_name, is_view) + + async def resolve_row(self, request): + db, table_name, _ = await self.resolve_table(request) + pk_values = urlsafe_components(request.url_vars["pks"]) + sql, params, pks = await row_sql_params_pks(db, table_name, pk_values) + results = await db.execute(sql, params, truncate=True) + row = results.first() + if row is None: + raise RowNotFound(db.name, table_name, pk_values) + return ResolvedRow(db, table_name, sql, params, pks, pk_values, results.first()) + + def app(self): + """Returns an ASGI app function that serves the whole of Datasette""" + routes = self._routes() async def setup_db(): # First time server starts up, calculate table counts for immutable databases - for dbname, database in self.databases.items(): + for database in self.databases.values(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) - asgi = AsgiLifespan( - AsgiTracer( - asgi_csrf.asgi_csrf( - DatasetteRouter(self, routes), - signing_secret=self._secret, - cookie_name="ds_csrftoken", - ) + async def custom_csrf_error(scope, send, message_id): + await asgi_send( + send, + content=await self.render_template( + "csrf_error.html", + {"message_id": message_id, "message_name": Errors(message_id).name}, + ), + status=403, + content_type="text/html; charset=utf-8", + ) + + asgi = asgi_csrf.asgi_csrf( + DatasetteRouter(self, routes), + signing_secret=self._secret, + cookie_name="ds_csrftoken", + skip_if_scope=lambda scope: any( + pm.hook.skip_csrf(datasette=self, scope=scope) ), - on_startup=setup_db, + send_csrf_failed=custom_csrf_error, ) + if self.setting("trace_debug"): + asgi = AsgiTracer(asgi) + asgi = AsgiLifespan(asgi) + asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) return asgi @@ -986,22 +2122,7 @@ class Datasette: class DatasetteRouter: def __init__(self, datasette, routes): self.ds = datasette - routes = routes or [] - self.routes = [ - # Compile any strings to regular expressions - ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) - for pattern, view in routes - ] - # Build a list of pages/blah/{name}.html matching expressions - pattern_templates = [ - filepath - for filepath in self.ds.jinja_env.list_templates() - if "{" in filepath and filepath.startswith("pages/") - ] - self.page_routes = [ - (route_pattern_from_filepath(filepath[len("pages/") :]), filepath) - for filepath in pattern_templates - ] + self.routes = routes or [] async def __call__(self, scope, receive, send): # Because we care about "foo/bar" v.s. "foo%2Fbar" we decode raw_path ourselves @@ -1009,6 +2130,7 @@ class DatasetteRouter: raw_path = scope.get("raw_path") if raw_path: path = raw_path.decode("ascii") + path = path.partition("?")[0] return await self.route_path(scope, receive, send, path) async def route_path(self, scope, receive, send, path): @@ -1016,6 +2138,7 @@ class DatasetteRouter: base_url = self.ds.setting("base_url") if base_url != "/" and path.startswith(base_url): path = "/" + path[len(base_url) :] + scope = dict(scope, route_path=path) request = Request(scope, receive) # Populate request_messages if ds_messages cookie is present try: @@ -1042,26 +2165,52 @@ class DatasetteRouter: break scope_modifications["actor"] = actor or default_actor scope = dict(scope, **scope_modifications) - for regex, view in self.routes: - match = regex.match(path) - if match is not None: - new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) - request.scope = new_scope - try: - response = await view(request, send) - if response: - self.ds._write_messages_to_response(request, response) - await response.asgi_send(send) - return - except NotFound as exception: - return await self.handle_404(request, send, exception) - except Exception as exception: - return await self.handle_500(request, send, exception) - return await self.handle_404(request, send) + + match, view = resolve_routes(self.routes, path) + + if match is None: + return await self.handle_404(request, send) + + new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) + request.scope = new_scope + try: + response = await view(request, send) + if response: + self.ds._write_messages_to_response(request, response) + await response.asgi_send(send) + return + except NotFound as exception: + return await self.handle_404(request, send, exception) + except Forbidden as exception: + # Try the forbidden() plugin hook + for custom_response in pm.hook.forbidden( + datasette=self.ds, request=request, message=exception.args[0] + ): + custom_response = await await_me_maybe(custom_response) + assert ( + custom_response + ), "Default forbidden() hook should have been called" + return await custom_response.asgi_send(send) + except Exception as exception: + return await self.handle_exception(request, send, exception) async def handle_404(self, request, send, exception=None): + # If path contains % encoding, redirect to tilde encoding + if "%" in request.path: + # Try the same path but with "%" replaced by "~" + # and "~" replaced with "~7E" + # and "." replaced with "~2E" + new_path = ( + request.path.replace("~", "~7E").replace("%", "~").replace(".", "~2E") + ) + if request.query_string: + new_path += "?{}".format(request.query_string) + await asgi_send_redirect(send, new_path) + return # If URL has a trailing slash, redirect to URL without it - path = request.scope.get("raw_path", request.scope["path"].encode("utf8")) + path = request.scope.get( + "raw_path", request.scope["path"].encode("utf8") + ).partition(b"?")[0] context = {} if path.endswith(b"/"): path = path.rstrip(b"/") @@ -1070,17 +2219,28 @@ class DatasetteRouter: await asgi_send_redirect(send, path.decode("latin1")) else: # Is there a pages/* template matching this path? - template_path = ( - os.path.join("pages", *request.scope["path"].split("/")) + ".html" - ) + route_path = request.scope.get("route_path", request.scope["path"]) + # Jinja requires template names to use "/" even on Windows + template_name = "pages" + route_path + ".html" + # Build a list of pages/blah/{name}.html matching expressions + environment = self.ds.get_jinja_environment(request) + pattern_templates = [ + filepath + for filepath in environment.list_templates() + if "{" in filepath and filepath.startswith("pages/") + ] + page_routes = [ + (route_pattern_from_filepath(filepath[len("pages/") :]), filepath) + for filepath in pattern_templates + ] try: - template = self.ds.jinja_env.select_template([template_path]) + template = environment.select_template([template_name]) except TemplateNotFound: template = None if template is None: # Try for a pages/blah/{name}.html template match - for regex, wildcard_template in self.page_routes: - match = regex.match(request.scope["path"]) + for regex, wildcard_template in page_routes: + match = regex.match(route_path) if match is not None: context.update(match.groupdict()) template = wildcard_template @@ -1122,7 +2282,7 @@ class DatasetteRouter: view_name="page", ) except NotFoundExplicit as e: - await self.handle_500(request, send, e) + await self.handle_exception(request, send, e) return # Pull content-type out into separate parameter content_type = "text/html; charset=utf-8" @@ -1137,72 +2297,23 @@ class DatasetteRouter: content_type=content_type, ) else: - await self.handle_500(request, send, exception or NotFound("404")) + await self.handle_exception(request, send, exception or NotFound("404")) - async def handle_500(self, request, send, exception): - if self.ds.pdb: - import pdb + async def handle_exception(self, request, send, exception): + responses = [] + for hook in pm.hook.handle_exception( + datasette=self.ds, + request=request, + exception=exception, + ): + response = await await_me_maybe(hook) + if response is not None: + responses.append(response) - pdb.post_mortem(exception.__traceback__) - - title = None - if isinstance(exception, Forbidden): - status = 403 - info = {} - message = exception.args[0] - # Try the forbidden() plugin hook - for custom_response in pm.hook.forbidden( - datasette=self.ds, request=request, message=message - ): - custom_response = await await_me_maybe(custom_response) - if custom_response is not None: - await custom_response.asgi_send(send) - return - elif isinstance(exception, Base400): - status = exception.status - info = {} - message = exception.args[0] - elif isinstance(exception, DatasetteError): - status = exception.status - info = exception.error_dict - message = exception.message - if exception.message_is_html: - message = Markup(message) - title = exception.title - else: - status = 500 - info = {} - message = str(exception) - traceback.print_exc() - templates = [f"{status}.html", "error.html"] - info.update( - { - "ok": False, - "error": message, - "status": status, - "title": title, - } - ) - headers = {} - if self.ds.cors: - headers["Access-Control-Allow-Origin"] = "*" - if request.path.split("?")[0].endswith(".json"): - await asgi_send_json(send, info, status=status, headers=headers) - else: - template = self.ds.jinja_env.select_template(templates) - await asgi_send_html( - send, - await template.render_async( - dict( - info, - urls=self.ds.urls, - app_css_hash=self.ds.app_css_hash(), - menu_links=lambda: [], - ) - ), - status=status, - headers=headers, - ) + assert responses, "Default exception handler should have returned something" + # Even if there are multiple responses use just the first one + response = responses[0] + await response.asgi_send(send) _cleaner_task_str_re = re.compile(r"\S*site-packages/") @@ -1216,7 +2327,44 @@ def _cleaner_task_str(task): return _cleaner_task_str_re.sub("", s) -def wrap_view(view_fn, datasette): +def wrap_view(view_fn_or_class, datasette): + is_function = isinstance(view_fn_or_class, types.FunctionType) + if is_function: + return wrap_view_function(view_fn_or_class, datasette) + else: + if not isinstance(view_fn_or_class, type): + raise ValueError("view_fn_or_class must be a function or a class") + return wrap_view_class(view_fn_or_class, datasette) + + +def wrap_view_class(view_class, datasette): + async def async_view_for_class(request, send): + instance = view_class() + if inspect.iscoroutinefunction(instance.__call__): + return await async_call_with_supported_arguments( + instance.__call__, + scope=request.scope, + receive=request.receive, + send=send, + request=request, + datasette=datasette, + ) + else: + return call_with_supported_arguments( + instance.__call__, + scope=request.scope, + receive=request.receive, + send=send, + request=request, + datasette=datasette, + ) + + async_view_for_class.view_class = view_class + return async_view_for_class + + +def wrap_view_function(view_fn, datasette): + @functools.wraps(view_fn) async def async_view_fn(request, send): if inspect.iscoroutinefunction(view_fn): response = await async_call_with_supported_arguments( @@ -1242,14 +2390,23 @@ def wrap_view(view_fn, datasette): return async_view_fn -def permanent_redirect(path): +def permanent_redirect(path, forward_query_string=False, forward_rest=False): return wrap_view( - lambda request, send: Response.redirect(path, status=301), + lambda request, send: Response.redirect( + path + + (request.url_vars["rest"] if forward_rest else "") + + ( + ("?" + request.query_string) + if forward_query_string and request.query_string + else "" + ), + status=301, + ), datasette=None, ) -_curly_re = re.compile(r"(\{.*?\})") +_curly_re = re.compile(r"({.*?})") def route_pattern_from_filepath(filepath): @@ -1270,45 +2427,113 @@ class NotFoundExplicit(NotFound): class DatasetteClient: + """Internal HTTP client for making requests to a Datasette instance. + + Used for testing and for internal operations that need to make HTTP requests + to the Datasette app without going through an actual HTTP server. + """ + def __init__(self, ds): self.ds = ds - self.app = ds.app() - def _fix(self, path): - if not isinstance(path, PrefixedUrlString): + @property + def app(self): + return self.ds.app() + + def actor_cookie(self, actor): + # Utility method, mainly for tests + return self.ds.sign({"a": actor}, "actor") + + def _fix(self, path, avoid_path_rewrites=False): + if not isinstance(path, PrefixedUrlString) and not avoid_path_rewrites: path = self.ds.urls.path(path) if path.startswith("/"): path = f"http://localhost{path}" return path - async def get(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.get(self._fix(path), **kwargs) + async def _request(self, method, path, skip_permission_checks=False, **kwargs): + from datasette.permissions import SkipPermissions - async def options(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.options(self._fix(path), **kwargs) + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) - async def head(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.head(self._fix(path), **kwargs) + async def get(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "get", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def post(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.post(self._fix(path), **kwargs) + async def options(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "options", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def put(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.put(self._fix(path), **kwargs) + async def head(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "head", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def patch(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.patch(self._fix(path), **kwargs) + async def post(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "post", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def delete(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.delete(self._fix(path), **kwargs) + async def put(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "put", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def request(self, method, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.request(method, self._fix(path), **kwargs) + async def patch(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "patch", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def delete(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "delete", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def request(self, method, path, skip_permission_checks=False, **kwargs): + """Make an HTTP request with the specified method. + + Args: + method: HTTP method (e.g., "GET", "POST", "PUT") + path: The path to request + skip_permission_checks: If True, bypass all permission checks for this request + **kwargs: Additional arguments to pass to httpx + + Returns: + httpx.Response: The response from the request + """ + from datasette.permissions import SkipPermissions + + avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) diff --git a/datasette/blob_renderer.py b/datasette/blob_renderer.py index 217b3638..4d8c6bea 100644 --- a/datasette/blob_renderer.py +++ b/datasette/blob_renderer.py @@ -34,8 +34,8 @@ async def render_blob(datasette, database, rows, columns, request, table, view_n filename_bits = [] if table: filename_bits.append(to_css_class(table)) - if "pk_path" in request.url_vars: - filename_bits.append(request.url_vars["pk_path"]) + if "pks" in request.url_vars: + filename_bits.append(request.url_vars["pks"]) filename_bits.append(to_css_class(blob_column)) if blob_hash: filename_bits.append(blob_hash[:6]) diff --git a/datasette/cli.py b/datasette/cli.py index 32408d23..21420491 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -4,23 +4,34 @@ import click from click import formatting from click.types import CompositeParamType from click_default_group import DefaultGroup +import functools import json import os import pathlib +from runpy import run_module import shutil from subprocess import call import sys -from runpy import run_module +import textwrap import webbrowser -from .app import Datasette, DEFAULT_SETTINGS, SETTINGS, pm +from .app import ( + Datasette, + DEFAULT_SETTINGS, + SETTINGS, + SQLITE_LIMIT_ATTACHED, + pm, +) from .utils import ( + LoadExtension, StartupError, check_connection, + deep_dict_update, find_spatialite, parse_metadata, ConnectionProblem, SpatialiteConnectionProblem, initial_path_for_datasette, + pairs_to_nested_config, temporary_docker_directory, value_as_boolean, SpatialiteNotFound, @@ -32,40 +43,24 @@ from .utils.testing import TestClient from .version import __version__ -class Config(click.ParamType): - # This will be removed in Datasette 1.0 in favour of class Setting - name = "config" +def run_sync(coro_func): + """Run an async callable to completion on a fresh event loop.""" + loop = asyncio.new_event_loop() + try: + asyncio.set_event_loop(loop) + return loop.run_until_complete(coro_func()) + finally: + asyncio.set_event_loop(None) + loop.close() - def convert(self, config, param, ctx): - if ":" not in config: - self.fail(f'"{config}" should be name:value', param, ctx) - return - name, value = config.split(":", 1) - if name not in DEFAULT_SETTINGS: - self.fail( - f"{name} is not a valid option (--help-config to see all)", - param, - ctx, - ) - return - # Type checking - default = DEFAULT_SETTINGS[name] - if isinstance(default, bool): - try: - return name, value_as_boolean(value) - except ValueAsBooleanError: - self.fail(f'"{name}" should be on/off/true/false/1/0', param, ctx) - return - elif isinstance(default, int): - if not value.isdigit(): - self.fail(f'"{name}" should be an integer', param, ctx) - return - return name, int(value) - elif isinstance(default, str): - return name, value - else: - # Should never happen: - self.fail("Invalid option") + +# Use Rich for tracebacks if it is installed +try: + from rich.traceback import install + + install(show_locals=True) +except ImportError: + pass class Setting(CompositeParamType): @@ -74,60 +69,89 @@ class Setting(CompositeParamType): def convert(self, config, param, ctx): name, value = config - if name not in DEFAULT_SETTINGS: - self.fail( - f"{name} is not a valid option (--help-config to see all)", - param, - ctx, - ) - return - # Type checking - default = DEFAULT_SETTINGS[name] - if isinstance(default, bool): - try: - return name, value_as_boolean(value) - except ValueAsBooleanError: - self.fail(f'"{name}" should be on/off/true/false/1/0', param, ctx) - return - elif isinstance(default, int): - if not value.isdigit(): - self.fail(f'"{name}" should be an integer', param, ctx) - return - return name, int(value) - elif isinstance(default, str): - return name, value - else: - # Should never happen: - self.fail("Invalid option") + if name in DEFAULT_SETTINGS: + # For backwards compatibility with how this worked prior to + # Datasette 1.0, we turn bare setting names into setting.name + # Type checking for those older settings + default = DEFAULT_SETTINGS[name] + name = "settings.{}".format(name) + if isinstance(default, bool): + try: + return name, "true" if value_as_boolean(value) else "false" + except ValueAsBooleanError: + self.fail(f'"{name}" should be on/off/true/false/1/0', param, ctx) + elif isinstance(default, int): + if not value.isdigit(): + self.fail(f'"{name}" should be an integer', param, ctx) + return name, value + elif isinstance(default, str): + return name, value + else: + # Should never happen: + self.fail("Invalid option") + return name, value + + +def sqlite_extensions(fn): + fn = click.option( + "sqlite_extensions", + "--load-extension", + type=LoadExtension(), + envvar="DATASETTE_LOAD_EXTENSION", + multiple=True, + help="Path to a SQLite extension to load, and optional entrypoint", + )(fn) + + # Wrap it in a custom error handler + @functools.wraps(fn) + def wrapped(*args, **kwargs): + try: + return fn(*args, **kwargs) + except AttributeError as e: + if "enable_load_extension" in str(e): + raise click.ClickException( + textwrap.dedent( + """ + Your Python installation does not have the ability to load SQLite extensions. + + More information: https://datasette.io/help/extensions + """ + ).strip() + ) + raise + + return wrapped @click.group(cls=DefaultGroup, default="serve", default_if_no_args=True) @click.version_option(version=__version__) def cli(): """ - Datasette! + Datasette is an open source multi-tool for exploring and publishing data + + \b + About Datasette: https://datasette.io/ + Full documentation: https://docs.datasette.io/ """ @cli.command() @click.argument("files", type=click.Path(exists=True), nargs=-1) @click.option("--inspect-file", default="-") -@click.option( - "sqlite_extensions", - "--load-extension", - envvar="SQLITE_EXTENSIONS", - multiple=True, - help="Path to a SQLite extension to load", -) +@sqlite_extensions def inspect(files, inspect_file, sqlite_extensions): - app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions) + """ + Generate JSON summary of provided database files + + This can then be passed to "datasette --inspect-file" to speed up count + operations against immutable database files. + """ + inspect_data = run_sync(lambda: inspect_(files, sqlite_extensions)) if inspect_file == "-": - out = sys.stdout + sys.stdout.write(json.dumps(inspect_data, indent=2)) else: - out = open(inspect_file, "w") - loop = asyncio.get_event_loop() - inspect_data = loop.run_until_complete(inspect_(files, sqlite_extensions)) - out.write(json.dumps(inspect_data, indent=2)) + with open(inspect_file, "w") as fp: + fp.write(json.dumps(inspect_data, indent=2)) async def inspect_(files, sqlite_extensions): @@ -149,7 +173,7 @@ async def inspect_(files, sqlite_extensions): @cli.group() def publish(): - "Publish specified SQLite database files to the internet along with a Datasette-powered interface and API" + """Publish specified SQLite database files to the internet along with a Datasette-powered interface and API""" pass @@ -159,15 +183,23 @@ pm.hook.publish_subcommand(publish=publish) @cli.command() @click.option("--all", help="Include built-in default plugins", is_flag=True) +@click.option( + "--requirements", help="Output requirements.txt of installed plugins", is_flag=True +) @click.option( "--plugins-dir", type=click.Path(exists=True, file_okay=False, dir_okay=True), help="Path to directory containing custom plugins", ) -def plugins(all, plugins_dir): - "List currently available plugins" +def plugins(all, requirements, plugins_dir): + """List currently installed plugins""" app = Datasette([], plugins_dir=plugins_dir) - click.echo(json.dumps(app._plugins(all=all), indent=4)) + if requirements: + for plugin in app._plugins(): + if plugin["version"]: + click.echo("{}=={}".format(plugin["name"], plugin["version"])) + else: + click.echo(json.dumps(app._plugins(all=all), indent=4)) @cli.command() @@ -184,7 +216,7 @@ def plugins(all, plugins_dir): help="Path to JSON/YAML file containing metadata to publish", ) @click.option("--extra-options", help="Extra options to pass to datasette serve") -@click.option("--branch", help="Install datasette from a GitHub branch e.g. master") +@click.option("--branch", help="Install datasette from a GitHub branch e.g. main") @click.option( "--template-dir", type=click.Path(exists=True, file_okay=False, dir_okay=True), @@ -216,6 +248,7 @@ def plugins(all, plugins_dir): "-p", "--port", default=8001, + type=click.IntRange(1, 65535), help="Port to run the server on, defaults to 8001", ) @click.option("--title", help="Title for metadata") @@ -241,7 +274,7 @@ def package( port, **extra_metadata, ): - "Package specified SQLite files into a new datasette Docker container" + """Package SQLite files into a Datasette Docker container""" if not shutil.which("docker"): click.secho( ' The package command requires "docker" to be installed and configured ', @@ -276,15 +309,32 @@ def package( @cli.command() -@click.argument("packages", nargs=-1, required=True) +@click.argument("packages", nargs=-1) @click.option( "-U", "--upgrade", is_flag=True, help="Upgrade packages to latest version" ) -def install(packages, upgrade): - "Install Python packages - e.g. Datasette plugins - into the same environment as Datasette" +@click.option( + "-r", + "--requirement", + type=click.Path(exists=True), + help="Install from requirements file", +) +@click.option( + "-e", + "--editable", + help="Install a project in editable mode from this path", +) +def install(packages, upgrade, requirement, editable): + """Install plugins and packages from PyPI into the same environment as Datasette""" + if not packages and not requirement and not editable: + raise click.UsageError("Please specify at least one package to install") args = ["pip", "install"] if upgrade: args += ["--upgrade"] + if editable: + args += ["--editable", editable] + if requirement: + args += ["-r", requirement] args += list(packages) sys.argv = args run_module("pip", run_name="__main__") @@ -294,7 +344,7 @@ def install(packages, upgrade): @click.argument("packages", nargs=-1, required=True) @click.option("-y", "--yes", is_flag=True, help="Don't ask for confirmation") def uninstall(packages, yes): - "Uninstall Python packages (e.g. plugins) from the Datasette environment" + """Uninstall plugins and Python packages from the Datasette environment""" sys.argv = ["pip", "uninstall"] + list(packages) + (["-y"] if yes else []) run_module("pip", run_name="__main__") @@ -322,23 +372,22 @@ def uninstall(packages, yes): "-p", "--port", default=8001, + type=click.IntRange(0, 65535), help="Port for server, defaults to 8001. Use -p 0 to automatically assign an available port.", ) +@click.option( + "--uds", + help="Bind to a Unix domain socket", +) @click.option( "--reload", is_flag=True, - help="Automatically reload if database or code change detected - useful for development", + help="Automatically reload if code or metadata change detected - useful for development", ) @click.option( "--cors", is_flag=True, help="Enable CORS by serving Access-Control-Allow-Origin: *" ) -@click.option( - "sqlite_extensions", - "--load-extension", - envvar="SQLITE_EXTENSIONS", - multiple=True, - help="Path to a SQLite extension to load", -) +@sqlite_extensions @click.option( "--inspect-file", help='Path to JSON file created using "datasette inspect"' ) @@ -364,18 +413,19 @@ def uninstall(packages, yes): help="Serve static files from this directory at /MOUNT/...", multiple=True, ) -@click.option("--memory", is_flag=True, help="Make :memory: database available") +@click.option("--memory", is_flag=True, help="Make /_memory database available") @click.option( + "-c", "--config", - type=Config(), - help="Deprecated: set config option using configname:value. Use --setting instead.", - multiple=True, + type=click.File(mode="r"), + help="Path to JSON/YAML Datasette configuration file", ) @click.option( + "-s", "--setting", "settings", type=Setting(), - help="Setting, see docs.datasette.io/en/stable/config.html", + help="nested.key, value setting to use in Datasette configuration", multiple=True, ) @click.option( @@ -388,12 +438,30 @@ def uninstall(packages, yes): help="Output URL that sets a cookie authenticating the root user", is_flag=True, ) +@click.option( + "--default-deny", + help="Deny all permissions by default", + is_flag=True, +) @click.option( "--get", help="Run an HTTP GET request against this path, print results and exit", ) +@click.option( + "--headers", + is_flag=True, + help="Include HTTP headers in --get output", +) +@click.option( + "--token", + help="API token to send with --get requests", +) +@click.option( + "--actor", + help="Actor to use for --get requests (JSON string)", +) @click.option("--version-note", help="Additional note to show on /-/versions") -@click.option("--help-config", is_flag=True, help="Show available config options") +@click.option("--help-settings", is_flag=True, help="Show available settings") @click.option("--pdb", is_flag=True, help="Launch debugger on any errors") @click.option( "-o", @@ -407,11 +475,37 @@ def uninstall(packages, yes): is_flag=True, help="Create database files if they do not exist", ) +@click.option( + "--crossdb", + is_flag=True, + help="Enable cross-database joins using the /_memory database", +) +@click.option( + "--nolock", + is_flag=True, + help="Ignore locking, open locked files in read-only mode", +) +@click.option( + "--ssl-keyfile", + help="SSL key file", + envvar="DATASETTE_SSL_KEYFILE", +) +@click.option( + "--ssl-certfile", + help="SSL certificate file", + envvar="DATASETTE_SSL_CERTFILE", +) +@click.option( + "--internal", + type=click.Path(), + help="Path to a persistent Datasette internal SQLite database", +) def serve( files, immutable, host, port, + uds, reload, cors, sqlite_extensions, @@ -425,18 +519,27 @@ def serve( settings, secret, root, + default_deny, get, + headers, + token, + actor, version_note, - help_config, + help_settings, pdb, open_browser, create, + crossdb, + nolock, + ssl_keyfile, + ssl_certfile, + internal, return_instance=False, ): """Serve up specified SQLite database files with a web UI""" - if help_config: + if help_settings: formatter = formatting.HelpFormatter() - with formatter.section("Config options"): + with formatter.section("Settings"): formatter.write_dl( [ (option.name, f"{option.help} (default={option.default})") @@ -451,50 +554,69 @@ def serve( reloader = hupper.start_reloader("datasette.cli.serve") if immutable: reloader.watch_files(immutable) + if config: + reloader.watch_files([config.name]) if metadata: reloader.watch_files([metadata.name]) inspect_data = None if inspect_file: - inspect_data = json.load(open(inspect_file)) + with open(inspect_file) as fp: + inspect_data = json.load(fp) metadata_data = None if metadata: metadata_data = parse_metadata(metadata.read()) - combined_config = {} + config_data = None if config: - click.echo( - "--config name:value will be deprecated in Datasette 1.0, use --setting name value instead", - err=True, - ) - combined_config.update(config) - combined_config.update(settings) + config_data = parse_metadata(config.read()) + + config_data = config_data or {} + + # Merge in settings from -s/--setting + if settings: + settings_updates = pairs_to_nested_config(settings) + # Merge recursively, to avoid over-writing nested values + # https://github.com/simonw/datasette/issues/2389 + deep_dict_update(config_data, settings_updates) kwargs = dict( immutables=immutable, cache_headers=not reload, cors=cors, inspect_data=inspect_data, + config=config_data, metadata=metadata_data, sqlite_extensions=sqlite_extensions, template_dir=template_dir, plugins_dir=plugins_dir, static_mounts=static, - config=combined_config, + settings=None, # These are passed in config= now memory=memory, secret=secret, version_note=version_note, pdb=pdb, + crossdb=crossdb, + nolock=nolock, + internal=internal, + default_deny=default_deny, ) - # if files is a single directory, use that as config_dir= - if 1 == len(files) and os.path.isdir(files[0]): - kwargs["config_dir"] = pathlib.Path(files[0]) - files = [] + # Separate directories from files + directories = [f for f in files if os.path.isdir(f)] + file_paths = [f for f in files if not os.path.isdir(f)] + + # Handle config_dir - only one directory allowed + if len(directories) > 1: + raise click.ClickException( + "Cannot pass multiple directories. Pass a single directory as config_dir." + ) + elif len(directories) == 1: + kwargs["config_dir"] = pathlib.Path(directories[0]) # Verify list of files, create if needed (and --create) - for file in files: + for file in file_paths: if not pathlib.Path(file).exists(): if create: sqlite3.connect(file).execute("vacuum") @@ -505,6 +627,33 @@ def serve( ) ) + # Check for duplicate files by resolving all paths to their absolute forms + # Collect all database files that will be loaded (explicit files + config_dir files) + all_db_files = [] + + # Add explicit files + for file in file_paths: + all_db_files.append((file, pathlib.Path(file).resolve())) + + # Add config_dir databases if config_dir is set + if "config_dir" in kwargs: + config_dir = kwargs["config_dir"] + for ext in ("db", "sqlite", "sqlite3"): + for db_file in config_dir.glob(f"*.{ext}"): + all_db_files.append((str(db_file), db_file.resolve())) + + # Check for duplicates + seen = {} + for original_path, resolved_path in all_db_files: + if resolved_path in seen: + raise click.ClickException( + f"Duplicate database file: '{original_path}' and '{seen[resolved_path]}' " + f"both refer to {resolved_path}" + ) + seen[resolved_path] = original_path + + files = file_paths + try: ds = Datasette(files, **kwargs) except SpatialiteNotFound: @@ -517,15 +666,38 @@ def serve( return ds # Run the "startup" plugin hooks - asyncio.get_event_loop().run_until_complete(ds.invoke_startup()) + run_sync(ds.invoke_startup) - # Run async sanity checks - but only if we're not under pytest - asyncio.get_event_loop().run_until_complete(check_databases(ds)) + # Run async soundness checks - but only if we're not under pytest + run_sync(lambda: check_databases(ds)) + + if headers and not get: + raise click.ClickException("--headers can only be used with --get") + + if token and not get: + raise click.ClickException("--token can only be used with --get") if get: client = TestClient(ds) - response = client.get(get) - click.echo(response.text) + request_headers = {} + if token: + request_headers["Authorization"] = "Bearer {}".format(token) + cookies = {} + if actor: + cookies["ds_actor"] = client.actor_cookie(json.loads(actor)) + response = client.get(get, headers=request_headers, cookies=cookies) + + if headers: + # Output HTTP status code, headers, two newlines, then the response body + click.echo(f"HTTP/1.1 {response.status}") + for key, value in response.headers.items(): + click.echo(f"{key}: {value}") + if response.text: + click.echo() + click.echo(response.text) + else: + click.echo(response.text) + exit_code = 0 if response.status == 200 else 1 sys.exit(exit_code) return @@ -533,21 +705,155 @@ def serve( # Start the server url = None if root: + ds.root_enabled = True url = "http://{}:{}{}?token={}".format( host, port, ds.urls.path("-/auth-token"), ds._root_token ) - print(url) + click.echo(url) if open_browser: if url is None: # Figure out most convenient URL - to table, database or homepage - path = asyncio.get_event_loop().run_until_complete( - initial_path_for_datasette(ds) - ) + path = run_sync(lambda: initial_path_for_datasette(ds)) url = f"http://{host}:{port}{path}" webbrowser.open(url) - uvicorn.run( - ds.app(), host=host, port=port, log_level="info", lifespan="on", workers=1 + uvicorn_kwargs = dict( + host=host, port=port, log_level="info", lifespan="on", workers=1 ) + if uds: + uvicorn_kwargs["uds"] = uds + if ssl_keyfile: + uvicorn_kwargs["ssl_keyfile"] = ssl_keyfile + if ssl_certfile: + uvicorn_kwargs["ssl_certfile"] = ssl_certfile + uvicorn.run(ds.app(), **uvicorn_kwargs) + + +@cli.command() +@click.argument("id") +@click.option( + "--secret", + help="Secret used for signing the API tokens", + envvar="DATASETTE_SECRET", + required=True, +) +@click.option( + "-e", + "--expires-after", + help="Token should expire after this many seconds", + type=int, +) +@click.option( + "alls", + "-a", + "--all", + type=str, + metavar="ACTION", + multiple=True, + help="Restrict token to this action", +) +@click.option( + "databases", + "-d", + "--database", + type=(str, str), + metavar="DB ACTION", + multiple=True, + help="Restrict token to this action on this database", +) +@click.option( + "resources", + "-r", + "--resource", + type=(str, str, str), + metavar="DB RESOURCE ACTION", + multiple=True, + help="Restrict token to this action on this database resource (a table, SQL view or named query)", +) +@click.option( + "--debug", + help="Show decoded token", + is_flag=True, +) +@click.option( + "--plugins-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to directory containing custom plugins", +) +def create_token( + id, secret, expires_after, alls, databases, resources, debug, plugins_dir +): + """ + Create a signed API token for the specified actor ID + + Example: + + datasette create-token root --secret mysecret + + To allow only "view-database-download" for all databases: + + \b + datasette create-token root --secret mysecret \\ + --all view-database-download + + To allow "create-table" against a specific database: + + \b + datasette create-token root --secret mysecret \\ + --database mydb create-table + + To allow "insert-row" against a specific table: + + \b + datasette create-token root --secret myscret \\ + --resource mydb mytable insert-row + + Restricted actions can be specified multiple times using + multiple --all, --database, and --resource options. + + Add --debug to see a decoded version of the token. + """ + ds = Datasette(secret=secret, plugins_dir=plugins_dir) + + # Run ds.invoke_startup() in an event loop + run_sync(ds.invoke_startup) + + # Warn about any unknown actions + actions = [] + actions.extend(alls) + actions.extend([p[1] for p in databases]) + actions.extend([p[2] for p in resources]) + for action in actions: + if not ds.actions.get(action): + click.secho( + f" Unknown permission: {action} ", + fg="red", + err=True, + ) + + restrict_database = {} + for database, action in databases: + restrict_database.setdefault(database, []).append(action) + restrict_resource = {} + for database, resource, action in resources: + restrict_resource.setdefault(database, {}).setdefault(resource, []).append( + action + ) + + token = ds.create_token( + id, + expires_after=expires_after, + restrict_all=alls, + restrict_database=restrict_database, + restrict_resource=restrict_resource, + ) + click.echo(token) + if debug: + encoded = token[len("dstok_") :] + click.echo("\nDecoded:\n") + click.echo(json.dumps(ds.unsign(encoded, namespace="token"), indent=2)) + + +pm.hook.register_commands(cli=cli) async def check_databases(ds): @@ -573,3 +879,15 @@ async def check_databases(ds): raise click.UsageError( f"Connection to {database.path} failed check: {str(e.args[0])}" ) + # If --crossdb and more than SQLITE_LIMIT_ATTACHED show warning + if ( + ds.crossdb + and len([db for db in ds.databases.values() if not db.is_memory]) + > SQLITE_LIMIT_ATTACHED + ): + msg = ( + "Warning: --crossdb only works with the first {} attached databases".format( + SQLITE_LIMIT_ATTACHED + ) + ) + click.echo(click.style(msg, bold=True, fg="yellow"), err=True) diff --git a/datasette/database.py b/datasette/database.py index 412e0c59..e5858128 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -1,7 +1,9 @@ import asyncio +from collections import namedtuple from pathlib import Path import janus import queue +import sqlite_utils import sys import threading import uuid @@ -13,71 +15,212 @@ from .utils import ( detect_spatialite, get_all_foreign_keys, get_outbound_foreign_keys, + md5_not_usedforsecurity, sqlite_timelimit, sqlite3, table_columns, table_column_details, ) +from .utils.sqlite import sqlite_version from .inspect import inspect_hash connections = threading.local() +AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file")) + class Database: - def __init__(self, ds, path=None, is_mutable=False, is_memory=False): + # For table counts stop at this many rows: + count_limit = 10000 + _thread_local_id_counter = 1 + + def __init__( + self, + ds, + path=None, + is_mutable=True, + is_memory=False, + memory_name=None, + mode=None, + ): + self.name = None + self._thread_local_id = f"x{self._thread_local_id_counter}" + Database._thread_local_id_counter += 1 + self.route = None self.ds = ds self.path = path self.is_mutable = is_mutable self.is_memory = is_memory - self.hash = None + self.memory_name = memory_name + if memory_name is not None: + self.is_memory = True + self.cached_hash = None self.cached_size = None - self.cached_table_counts = None + self._cached_table_counts = None self._write_thread = None self._write_queue = None - if not self.is_mutable and not self.is_memory: - p = Path(path) - self.hash = inspect_hash(p) - self.cached_size = p.stat().st_size - # Maybe use self.ds.inspect_data to populate cached_table_counts - if self.ds.inspect_data and self.ds.inspect_data.get(self.name): - self.cached_table_counts = { - key: value["count"] - for key, value in self.ds.inspect_data[self.name]["tables"].items() - } + # These are used when in non-threaded mode: + self._read_connection = None + self._write_connection = None + # This is used to track all file connections so they can be closed + self._all_file_connections = [] + self.mode = mode + + @property + def cached_table_counts(self): + if self._cached_table_counts is not None: + return self._cached_table_counts + # Maybe use self.ds.inspect_data to populate cached_table_counts + if self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self._cached_table_counts = { + key: value["count"] + for key, value in self.ds.inspect_data[self.name]["tables"].items() + } + return self._cached_table_counts + + @property + def color(self): + if self.hash: + return self.hash[:6] + return md5_not_usedforsecurity(self.name)[:6] + + def suggest_name(self): + if self.path: + return Path(self.path).stem + elif self.memory_name: + return self.memory_name + else: + return "db" def connect(self, write=False): + extra_kwargs = {} + if write: + extra_kwargs["isolation_level"] = "IMMEDIATE" + if self.memory_name: + uri = "file:{}?mode=memory&cache=shared".format(self.memory_name) + conn = sqlite3.connect( + uri, uri=True, check_same_thread=False, **extra_kwargs + ) + if not write: + conn.execute("PRAGMA query_only=1") + return conn if self.is_memory: - return sqlite3.connect(":memory:") + return sqlite3.connect(":memory:", uri=True) + # mode=ro or immutable=1? if self.is_mutable: qs = "?mode=ro" + if self.ds.nolock: + qs += "&nolock=1" else: qs = "?immutable=1" assert not (write and not self.is_mutable) if write: qs = "" - return sqlite3.connect( - f"file:{self.path}{qs}", uri=True, check_same_thread=False + if self.mode is not None: + qs = f"?mode={self.mode}" + conn = sqlite3.connect( + f"file:{self.path}{qs}", uri=True, check_same_thread=False, **extra_kwargs ) + self._all_file_connections.append(conn) + return conn - async def execute_write(self, sql, params=None, block=False): + def close(self): + # Close all connections - useful to avoid running out of file handles in tests + for connection in self._all_file_connections: + connection.close() + + async def execute_write(self, sql, params=None, block=True): def _inner(conn): - with conn: - return conn.execute(sql, params or []) + return conn.execute(sql, params or []) - return await self.execute_write_fn(_inner, block=block) + with trace("sql", database=self.name, sql=sql.strip(), params=params): + results = await self.execute_write_fn(_inner, block=block) + return results - async def execute_write_fn(self, fn, block=False): - task_id = uuid.uuid5(uuid.NAMESPACE_DNS, "datasette.io") + async def execute_write_script(self, sql, block=True): + def _inner(conn): + return conn.executescript(sql) + + with trace("sql", database=self.name, sql=sql.strip(), executescript=True): + results = await self.execute_write_fn( + _inner, block=block, transaction=False + ) + return results + + async def execute_write_many(self, sql, params_seq, block=True): + def _inner(conn): + count = 0 + + def count_params(params): + nonlocal count + for param in params: + count += 1 + yield param + + return conn.executemany(sql, count_params(params_seq)), count + + with trace( + "sql", database=self.name, sql=sql.strip(), executemany=True + ) as kwargs: + results, count = await self.execute_write_fn(_inner, block=block) + kwargs["count"] = count + return results + + async def execute_isolated_fn(self, fn): + # Open a new connection just for the duration of this function + # blocking the write queue to avoid any writes occurring during it + if self.ds.executor is None: + # non-threaded mode + isolated_connection = self.connect(write=True) + try: + result = fn(isolated_connection) + finally: + isolated_connection.close() + try: + self._all_file_connections.remove(isolated_connection) + except ValueError: + # Was probably a memory connection + pass + return result + else: + # Threaded mode - send to write thread + return await self._send_to_write_thread(fn, isolated_connection=True) + + async def execute_write_fn(self, fn, block=True, transaction=True): + if self.ds.executor is None: + # non-threaded mode + if self._write_connection is None: + self._write_connection = self.connect(write=True) + self.ds._prepare_connection(self._write_connection, self.name) + if transaction: + with self._write_connection: + return fn(self._write_connection) + else: + return fn(self._write_connection) + else: + return await self._send_to_write_thread( + fn, block=block, transaction=transaction + ) + + async def _send_to_write_thread( + self, fn, block=True, isolated_connection=False, transaction=True + ): if self._write_queue is None: self._write_queue = queue.Queue() if self._write_thread is None: self._write_thread = threading.Thread( target=self._execute_writes, daemon=True ) + self._write_thread.name = "_execute_writes for database {}".format( + self.name + ) self._write_thread.start() + task_id = uuid.uuid5(uuid.NAMESPACE_DNS, "datasette.io") reply_queue = janus.Queue() - self._write_queue.put(WriteTask(fn, task_id, reply_queue)) + self._write_queue.put( + WriteTask(fn, task_id, reply_queue, isolated_connection, transaction) + ) if block: result = await reply_queue.async_q.get() if isinstance(result, Exception): @@ -94,6 +237,7 @@ class Database: conn = None try: conn = self.connect(write=True) + self.ds._prepare_connection(conn, self.name) except Exception as e: conn_exception = e while True: @@ -101,21 +245,49 @@ class Database: if conn_exception is not None: result = conn_exception else: - try: - result = task.fn(conn) - except Exception as e: - sys.stderr.write("{}\n".format(e)) - sys.stderr.flush() - result = e + if task.isolated_connection: + isolated_connection = self.connect(write=True) + try: + result = task.fn(isolated_connection) + except Exception as e: + sys.stderr.write("{}\n".format(e)) + sys.stderr.flush() + result = e + finally: + isolated_connection.close() + try: + self._all_file_connections.remove(isolated_connection) + except ValueError: + # Was probably a memory connection + pass + else: + try: + if task.transaction: + with conn: + result = task.fn(conn) + else: + result = task.fn(conn) + except Exception as e: + sys.stderr.write("{}\n".format(e)) + sys.stderr.flush() + result = e task.reply_queue.sync_q.put(result) async def execute_fn(self, fn): + if self.ds.executor is None: + # non-threaded mode + if self._read_connection is None: + self._read_connection = self.connect() + self.ds._prepare_connection(self._read_connection, self.name) + return fn(self._read_connection) + + # threaded mode def in_thread(): - conn = getattr(connections, self.name, None) + conn = getattr(connections, self._thread_local_id, None) if not conn: conn = self.connect() self.ds._prepare_connection(conn, self.name) - setattr(connections, self.name, conn) + setattr(connections, self._thread_local_id, conn) return fn(conn) return await asyncio.get_event_loop().run_in_executor( @@ -175,14 +347,34 @@ class Database: results = await self.execute_fn(sql_operation_in_thread) return results + @property + def hash(self): + if self.cached_hash is not None: + return self.cached_hash + elif self.is_mutable or self.is_memory: + return None + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_hash = self.ds.inspect_data[self.name]["hash"] + return self.cached_hash + else: + p = Path(self.path) + self.cached_hash = inspect_hash(p) + return self.cached_hash + @property def size(self): - if self.is_memory: - return 0 if self.cached_size is not None: return self.cached_size - else: + elif self.is_memory: + return 0 + elif self.is_mutable: return Path(self.path).stat().st_size + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_size = self.ds.inspect_data[self.name]["size"] + return self.cached_size + else: + self.cached_size = Path(self.path).stat().st_size + return self.cached_size async def table_counts(self, limit=10): if not self.is_mutable and self.cached_table_counts is not None: @@ -193,7 +385,7 @@ class Database: try: table_count = ( await self.execute( - f"select count(*) from [{table}]", + f"select count(*) from (select * from [{table}] limit {self.count_limit + 1})", custom_time_limit=limit, ) ).rows[0][0] @@ -203,7 +395,7 @@ class Database: except (QueryInterrupted, sqlite3.OperationalError, sqlite3.DatabaseError): counts[table] = None if not self.is_mutable: - self.cached_table_counts = counts + self._cached_table_counts = counts return counts @property @@ -212,12 +404,18 @@ class Database: return None return Path(self.path).stat().st_mtime_ns - @property - def name(self): - if self.is_memory: - return ":memory:" - else: - return Path(self.path).stem + async def attached_databases(self): + # This used to be: + # select seq, name, file from pragma_database_list() where seq > 0 + # But SQLite prior to 3.16.0 doesn't support pragma functions + results = await self.execute("PRAGMA database_list;") + # {'seq': 0, 'name': 'main', 'file': ''} + return [ + AttachedDatabase(*row) + for row in results.rows + # Filter out the SQLite internal "temp" database, refs #2557 + if row["seq"] > 0 and row["name"] != "temp" + ] async def table_exists(self, table): results = await self.execute( @@ -225,6 +423,12 @@ class Database: ) return bool(results.rows) + async def view_exists(self, table): + results = await self.execute( + "select 1 from sqlite_master where type='view' and name=?", params=(table,) + ) + return bool(results.rows) + async def table_names(self): results = await self.execute( "select name from sqlite_master where type='table'" @@ -244,14 +448,40 @@ class Database: return await self.execute_fn(lambda conn: detect_fts(conn, table)) async def label_column_for_table(self, table): - explicit_label_column = self.ds.table_metadata(self.name, table).get( + explicit_label_column = (await self.ds.table_config(self.name, table)).get( "label_column" ) if explicit_label_column: return explicit_label_column - column_names = await self.execute_fn(lambda conn: table_columns(conn, table)) + + def column_details(conn): + # Returns {column_name: (type, is_unique)} + db = sqlite_utils.Database(conn) + columns = db[table].columns_dict + indexes = db[table].indexes + details = {} + for name in columns: + is_unique = any( + index + for index in indexes + if index.columns == [name] and index.unique + ) + details[name] = (columns[name], is_unique) + return details + + column_details = await self.execute_fn(column_details) + # Is there just one unique column that's text? + unique_text_columns = [ + name + for name, (type_, is_unique) in column_details.items() + if is_unique and type_ is str + ] + if len(unique_text_columns) == 1: + return unique_text_columns[0] + + column_names = list(column_details.keys()) # Is there a name or title column? - name_or_title = [c for c in column_names if c in ("name", "title")] + name_or_title = [c for c in column_names if c.lower() in ("name", "title")] if name_or_title: return name_or_title[0] # If a table has two columns, one of which is ID, then label_column is the other one @@ -259,6 +489,7 @@ class Database: column_names and len(column_names) == 2 and ("id" in column_names or "pk" in column_names) + and not set(column_names) == {"id", "pk"} ): return [c for c in column_names if c not in ("id", "pk")][0] # Couldn't find a label: @@ -270,19 +501,107 @@ class Database: ) async def hidden_table_names(self): - # Mark tables 'hidden' if they relate to FTS virtual tables - hidden_tables = [ - r[0] - for r in ( - await self.execute( + hidden_tables = [] + # Add any tables marked as hidden in config + db_config = self.ds.config.get("databases", {}).get(self.name, {}) + if "tables" in db_config: + hidden_tables += [ + t for t in db_config["tables"] if db_config["tables"][t].get("hidden") + ] + + if sqlite_version()[1] >= 37: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + with shadow_tables as ( + select name + from pragma_table_list + where [type] = 'shadow' + order by name + ), + core_tables as ( + select name + from sqlite_master + WHERE name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + combined as ( + select name from shadow_tables + union all + select name from core_tables + ) + select name from combined order by 1 """ - select name from sqlite_master - where rootpage = 0 - and sql like '%VIRTUAL TABLE%USING FTS%' - """ ) - ).rows + ] + else: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + WITH base AS ( + SELECT name + FROM sqlite_master + WHERE name IN ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + fts_suffixes AS ( + SELECT column1 AS suffix + FROM (VALUES ('_data'), ('_idx'), ('_docsize'), ('_content'), ('_config')) + ), + fts5_names AS ( + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%USING FTS%' + ), + fts5_shadow_tables AS ( + SELECT + printf('%s%s', fts5_names.name, fts_suffixes.suffix) AS name + FROM fts5_names + JOIN fts_suffixes + ), + fts3_suffixes AS ( + SELECT column1 AS suffix + FROM (VALUES ('_content'), ('_segdir'), ('_segments'), ('_stat'), ('_docsize')) + ), + fts3_names AS ( + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%USING FTS3%' + OR sql LIKE '%VIRTUAL TABLE%USING FTS4%' + ), + fts3_shadow_tables AS ( + SELECT + printf('%s%s', fts3_names.name, fts3_suffixes.suffix) AS name + FROM fts3_names + JOIN fts3_suffixes + ), + final AS ( + SELECT name FROM base + UNION ALL + SELECT name FROM fts5_shadow_tables + UNION ALL + SELECT name FROM fts3_shadow_tables + ) + SELECT name FROM final ORDER BY 1 + """ + ) + ] + # Also hide any FTS tables that have a content= argument + hidden_tables += [ + x[0] + for x in await self.execute( + """ + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%' + AND sql LIKE '%USING FTS%' + AND sql LIKE '%content=%' + """ + ) ] + has_spatialite = await self.execute_fn(detect_spatialite) if has_spatialite: # Also hide Spatialite internal tables @@ -296,6 +615,9 @@ class Database: "sqlite_sequence", "views_geometry_columns", "virts_geometry_columns", + "data_licenses", + "KNN", + "KNN2", ] + [ r[0] for r in ( @@ -308,21 +630,6 @@ class Database: ) ).rows ] - # Add any from metadata.json - db_metadata = self.ds.metadata(database=self.name) - if "tables" in db_metadata: - hidden_tables += [ - t - for t in db_metadata["tables"] - if db_metadata["tables"][t].get("hidden") - ] - # Also mark as hidden any tables which start with the name of a hidden table - # e.g. "searchable_fts" implies "searchable_fts_content" should be hidden - for table_name in await self.table_names(): - for hidden_table in hidden_tables[:]: - if table_name.startswith(hidden_table): - hidden_tables.append(table_name) - continue return hidden_tables @@ -374,16 +681,24 @@ class Database: class WriteTask: - __slots__ = ("fn", "task_id", "reply_queue") + __slots__ = ("fn", "task_id", "reply_queue", "isolated_connection", "transaction") - def __init__(self, fn, task_id, reply_queue): + def __init__(self, fn, task_id, reply_queue, isolated_connection, transaction): self.fn = fn self.task_id = task_id self.reply_queue = reply_queue + self.isolated_connection = isolated_connection + self.transaction = transaction class QueryInterrupted(Exception): - pass + def __init__(self, e, sql, params): + self.e = e + self.sql = sql + self.params = params + + def __str__(self): + return "QueryInterrupted: {}".format(self.e) class MultipleValues(Exception): @@ -412,6 +727,9 @@ class Results: else: raise MultipleValues + def dicts(self): + return [dict(row) for row in self.rows] + def __iter__(self): return iter(self.rows) diff --git a/datasette/default_actions.py b/datasette/default_actions.py new file mode 100644 index 00000000..87d98fac --- /dev/null +++ b/datasette/default_actions.py @@ -0,0 +1,101 @@ +from datasette import hookimpl +from datasette.permissions import Action +from datasette.resources import ( + DatabaseResource, + TableResource, + QueryResource, +) + + +@hookimpl +def register_actions(): + """Register the core Datasette actions.""" + return ( + # Global actions (no resource_class) + Action( + name="view-instance", + abbr="vi", + description="View Datasette instance", + ), + Action( + name="permissions-debug", + abbr="pd", + description="Access permission debug tool", + ), + Action( + name="debug-menu", + abbr="dm", + description="View debug menu items", + ), + # Database-level actions (parent-level) + Action( + name="view-database", + abbr="vd", + description="View database", + resource_class=DatabaseResource, + ), + Action( + name="view-database-download", + abbr="vdd", + description="Download database file", + resource_class=DatabaseResource, + also_requires="view-database", + ), + Action( + name="execute-sql", + abbr="es", + description="Execute read-only SQL queries", + resource_class=DatabaseResource, + also_requires="view-database", + ), + Action( + name="create-table", + abbr="ct", + description="Create tables", + resource_class=DatabaseResource, + ), + # Table-level actions (child-level) + Action( + name="view-table", + abbr="vt", + description="View table", + resource_class=TableResource, + ), + Action( + name="insert-row", + abbr="ir", + description="Insert rows", + resource_class=TableResource, + ), + Action( + name="delete-row", + abbr="dr", + description="Delete rows", + resource_class=TableResource, + ), + Action( + name="update-row", + abbr="ur", + description="Update rows", + resource_class=TableResource, + ), + Action( + name="alter-table", + abbr="at", + description="Alter tables", + resource_class=TableResource, + ), + Action( + name="drop-table", + abbr="dt", + description="Drop tables", + resource_class=TableResource, + ), + # Query-level actions (child-level) + Action( + name="view-query", + abbr="vq", + description="View named query results", + resource_class=QueryResource, + ), + ) diff --git a/datasette/default_magic_parameters.py b/datasette/default_magic_parameters.py index 0f8f397e..91c1c5aa 100644 --- a/datasette/default_magic_parameters.py +++ b/datasette/default_magic_parameters.py @@ -1,5 +1,4 @@ from datasette import hookimpl -from datasette.utils import escape_fts import datetime import os import time @@ -25,9 +24,12 @@ def now(key, request): if key == "epoch": return int(time.time()) elif key == "date_utc": - return datetime.datetime.utcnow().date().isoformat() + return datetime.datetime.now(datetime.timezone.utc).date().isoformat() elif key == "datetime_utc": - return datetime.datetime.utcnow().strftime(r"%Y-%m-%dT%H:%M:%S") + "Z" + return ( + datetime.datetime.now(datetime.timezone.utc).strftime(r"%Y-%m-%dT%H:%M:%S") + + "Z" + ) else: raise KeyError diff --git a/datasette/default_menu_links.py b/datasette/default_menu_links.py index 56f481ef..85032387 100644 --- a/datasette/default_menu_links.py +++ b/datasette/default_menu_links.py @@ -4,7 +4,7 @@ from datasette import hookimpl @hookimpl def menu_links(datasette, actor): async def inner(): - if not await datasette.permission_allowed(actor, "debug-menu"): + if not await datasette.allowed(action="debug-menu", actor=actor): return [] return [ @@ -17,10 +17,6 @@ def menu_links(datasette, actor): "href": datasette.urls.path("/-/versions"), "label": "Version info", }, - { - "href": datasette.urls.path("/-/metadata"), - "label": "Metadata", - }, { "href": datasette.urls.path("/-/settings"), "label": "Settings", diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py deleted file mode 100644 index 9f1d9c62..00000000 --- a/datasette/default_permissions.py +++ /dev/null @@ -1,45 +0,0 @@ -from datasette import hookimpl -from datasette.utils import actor_matches_allow - - -@hookimpl(tryfirst=True) -def permission_allowed(datasette, actor, action, resource): - async def inner(): - if action in ("permissions-debug", "debug-menu"): - if actor and actor.get("id") == "root": - return True - elif action == "view-instance": - allow = datasette.metadata("allow") - if allow is not None: - return actor_matches_allow(actor, allow) - elif action == "view-database": - database_allow = datasette.metadata("allow", database=resource) - if database_allow is None: - return None - return actor_matches_allow(actor, database_allow) - elif action == "view-table": - database, table = resource - tables = datasette.metadata("tables", database=database) or {} - table_allow = (tables.get(table) or {}).get("allow") - if table_allow is None: - return None - return actor_matches_allow(actor, table_allow) - elif action == "view-query": - # Check if this query has a "allow" block in metadata - database, query_name = resource - query = await datasette.get_canned_query(database, query_name, actor) - assert query is not None - allow = query.get("allow") - if allow is None: - return None - return actor_matches_allow(actor, allow) - elif action == "execute-sql": - # Use allow_sql block from database block, or from top-level - database_allow_sql = datasette.metadata("allow_sql", database=resource) - if database_allow_sql is None: - database_allow_sql = datasette.metadata("allow_sql") - if database_allow_sql is None: - return None - return actor_matches_allow(actor, database_allow_sql) - - return inner diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py new file mode 100644 index 00000000..4c82d705 --- /dev/null +++ b/datasette/default_permissions/__init__.py @@ -0,0 +1,59 @@ +""" +Default permission implementations for Datasette. + +This module provides the built-in permission checking logic through implementations +of the permission_resources_sql hook. The hooks are organized by their purpose: + +1. Actor Restrictions - Enforces _r allowlists embedded in actor tokens +2. Root User - Grants full access when --root flag is used +3. Config Rules - Applies permissions from datasette.yaml +4. Default Settings - Enforces default_allow_sql and default view permissions + +IMPORTANT: These hooks return PermissionSQL objects that are combined using SQL +UNION/INTERSECT operations. The order of evaluation is: + - restriction_sql fields are INTERSECTed (all must match) + - Regular sql fields are UNIONed and evaluated with cascading priority +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl + +# Re-export all hooks and public utilities +from .restrictions import ( + actor_restrictions_sql, + restrictions_allow_action, + ActorRestrictions, +) +from .root import root_user_permissions_sql +from .config import config_permissions_sql +from .defaults import ( + default_allow_sql_check, + default_action_permissions_sql, + DEFAULT_ALLOW_ACTIONS, +) +from .tokens import actor_from_signed_api_token + + +@hookimpl +def skip_csrf(scope) -> Optional[bool]: + """Skip CSRF check for JSON content-type requests.""" + if scope["type"] == "http": + headers = scope.get("headers") or {} + if dict(headers).get(b"content-type") == b"application/json": + return True + return None + + +@hookimpl +def canned_queries(datasette: "Datasette", database: str, actor) -> dict: + """Return canned queries defined in datasette.yaml configuration.""" + queries = ( + ((datasette.config or {}).get("databases") or {}).get(database) or {} + ).get("queries") or {} + return queries diff --git a/datasette/default_permissions/config.py b/datasette/default_permissions/config.py new file mode 100644 index 00000000..aab87c1c --- /dev/null +++ b/datasette/default_permissions/config.py @@ -0,0 +1,442 @@ +""" +Config-based permission handling for Datasette. + +Applies permission rules from datasette.yaml configuration. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL +from datasette.utils import actor_matches_allow + +from .helpers import PermissionRowCollector, get_action_name_variants + + +class ConfigPermissionProcessor: + """ + Processes permission rules from datasette.yaml configuration. + + Configuration structure: + + permissions: # Root-level permissions block + view-instance: + id: admin + + databases: + mydb: + permissions: # Database-level permissions + view-database: + id: admin + allow: # Database-level allow block (for view-*) + id: viewer + allow_sql: # execute-sql allow block + id: analyst + tables: + users: + permissions: # Table-level permissions + view-table: + id: admin + allow: # Table-level allow block + id: viewer + queries: + my_query: + permissions: # Query-level permissions + view-query: + id: admin + allow: # Query-level allow block + id: viewer + """ + + def __init__( + self, + datasette: "Datasette", + actor: Optional[dict], + action: str, + ): + self.datasette = datasette + self.actor = actor + self.action = action + self.config = datasette.config or {} + self.collector = PermissionRowCollector(prefix="cfg") + + # Pre-compute action variants + self.action_checks = get_action_name_variants(datasette, action) + self.action_obj = datasette.actions.get(action) + + # Parse restrictions if present + self.has_restrictions = actor and "_r" in actor if actor else False + self.restrictions = actor.get("_r", {}) if actor else {} + + # Pre-compute restriction info for efficiency + self.restricted_databases: Set[str] = set() + self.restricted_tables: Set[Tuple[str, str]] = set() + + if self.has_restrictions: + self.restricted_databases = { + db_name + for db_name, db_actions in (self.restrictions.get("d") or {}).items() + if self.action_checks.intersection(db_actions) + } + self.restricted_tables = { + (db_name, table_name) + for db_name, tables in (self.restrictions.get("r") or {}).items() + for table_name, table_actions in tables.items() + if self.action_checks.intersection(table_actions) + } + # Tables implicitly reference their parent databases + self.restricted_databases.update(db for db, _ in self.restricted_tables) + + def evaluate_allow_block(self, allow_block: Any) -> Optional[bool]: + """Evaluate an allow block against the current actor.""" + if allow_block is None: + return None + return actor_matches_allow(self.actor, allow_block) + + def is_in_restriction_allowlist( + self, + parent: Optional[str], + child: Optional[str], + ) -> bool: + """Check if resource is allowed by actor restrictions.""" + if not self.has_restrictions: + return True # No restrictions, all resources allowed + + # Check global allowlist + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + + # Check database-level allowlist + if parent and self.action_checks.intersection( + self.restrictions.get("d", {}).get(parent, []) + ): + return True + + # Check table-level allowlist + if parent: + table_restrictions = (self.restrictions.get("r", {}) or {}).get(parent, {}) + if child: + table_actions = table_restrictions.get(child, []) + if self.action_checks.intersection(table_actions): + return True + else: + # Parent query should proceed if any child in this database is allowlisted + for table_actions in table_restrictions.values(): + if self.action_checks.intersection(table_actions): + return True + + # Parent/child both None: include if any restrictions exist for this action + if parent is None and child is None: + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + if self.restricted_databases: + return True + if self.restricted_tables: + return True + + return False + + def add_permissions_rule( + self, + parent: Optional[str], + child: Optional[str], + permissions_block: Optional[dict], + scope_desc: str, + ) -> None: + """Add a rule from a permissions:{action} block.""" + if permissions_block is None: + return + + action_allow_block = permissions_block.get(self.action) + result = self.evaluate_allow_block(action_allow_block) + + self.collector.add( + parent=parent, + child=child, + allow=result, + reason=f"config {'allow' if result else 'deny'} {scope_desc}", + if_not_none=True, + ) + + def add_allow_block_rule( + self, + parent: Optional[str], + child: Optional[str], + allow_block: Any, + scope_desc: str, + ) -> None: + """ + Add rules from an allow:{} block. + + For allow blocks, if the block exists but doesn't match the actor, + this is treated as a deny. We also handle the restriction-gate logic. + """ + if allow_block is None: + return + + # Skip if resource is not in restriction allowlist + if not self.is_in_restriction_allowlist(parent, child): + return + + result = self.evaluate_allow_block(allow_block) + bool_result = bool(result) + + self.collector.add( + parent, + child, + bool_result, + f"config {'allow' if result else 'deny'} {scope_desc}", + ) + + # Handle restriction-gate: add explicit denies for restricted resources + self._add_restriction_gate_denies(parent, child, bool_result, scope_desc) + + def _add_restriction_gate_denies( + self, + parent: Optional[str], + child: Optional[str], + is_allowed: bool, + scope_desc: str, + ) -> None: + """ + When a config rule denies at a higher level, add explicit denies + for restricted resources to prevent child-level allows from + incorrectly granting access. + """ + if is_allowed or child is not None or not self.has_restrictions: + return + + if not self.action_obj: + return + + reason = f"config deny {scope_desc} (restriction gate)" + + if parent is None: + # Root-level deny: add denies for all restricted resources + if self.action_obj.takes_parent: + for db_name in self.restricted_databases: + self.collector.add(db_name, None, False, reason) + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + self.collector.add(db_name, table_name, False, reason) + else: + # Database-level deny: add denies for tables in that database + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + if db_name == parent: + self.collector.add(db_name, table_name, False, reason) + + def process(self) -> Optional[PermissionSQL]: + """Process all config rules and return combined PermissionSQL.""" + self._process_root_permissions() + self._process_databases() + self._process_root_allow_blocks() + + return self.collector.to_permission_sql() + + def _process_root_permissions(self) -> None: + """Process root-level permissions block.""" + root_perms = self.config.get("permissions") or {} + self.add_permissions_rule( + None, + None, + root_perms, + f"permissions for {self.action}", + ) + + def _process_databases(self) -> None: + """Process database-level and nested configurations.""" + databases = self.config.get("databases") or {} + + for db_name, db_config in databases.items(): + self._process_database(db_name, db_config or {}) + + def _process_database(self, db_name: str, db_config: dict) -> None: + """Process a single database's configuration.""" + # Database-level permissions block + db_perms = db_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + None, + db_perms, + f"permissions for {self.action} on {db_name}", + ) + + # Process tables + for table_name, table_config in (db_config.get("tables") or {}).items(): + self._process_table(db_name, table_name, table_config or {}) + + # Process queries + for query_name, query_config in (db_config.get("queries") or {}).items(): + self._process_query(db_name, query_name, query_config) + + # Database-level allow blocks + self._process_database_allow_blocks(db_name, db_config) + + def _process_table( + self, + db_name: str, + table_name: str, + table_config: dict, + ) -> None: + """Process a single table's configuration.""" + # Table-level permissions block + table_perms = table_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + table_name, + table_perms, + f"permissions for {self.action} on {db_name}/{table_name}", + ) + + # Table-level allow block (for view-table) + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + table_name, + table_config.get("allow"), + f"allow for {self.action} on {db_name}/{table_name}", + ) + + def _process_query( + self, + db_name: str, + query_name: str, + query_config: Any, + ) -> None: + """Process a single query's configuration.""" + # Query config can be a string (just SQL) or dict + if not isinstance(query_config, dict): + return + + # Query-level permissions block + query_perms = query_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + query_name, + query_perms, + f"permissions for {self.action} on {db_name}/{query_name}", + ) + + # Query-level allow block (for view-query) + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + query_name, + query_config.get("allow"), + f"allow for {self.action} on {db_name}/{query_name}", + ) + + def _process_database_allow_blocks( + self, + db_name: str, + db_config: dict, + ) -> None: + """Process database-level allow/allow_sql blocks.""" + # view-database allow block + if self.action == "view-database": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # execute-sql allow_sql block + if self.action == "execute-sql": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow_sql"), + f"allow_sql for {db_name}", + ) + + # view-table uses database-level allow for inheritance + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # view-query uses database-level allow for inheritance + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + def _process_root_allow_blocks(self) -> None: + """Process root-level allow/allow_sql blocks.""" + root_allow = self.config.get("allow") + + if self.action == "view-instance": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-instance", + ) + + if self.action == "view-database": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-database", + ) + + if self.action == "view-table": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-table", + ) + + if self.action == "view-query": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-query", + ) + + if self.action == "execute-sql": + self.add_allow_block_rule( + None, + None, + self.config.get("allow_sql"), + "allow_sql", + ) + + +@hookimpl(specname="permission_resources_sql") +async def config_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Apply permission rules from datasette.yaml configuration. + + This processes: + - permissions: blocks at root, database, table, and query levels + - allow: blocks for view-* actions + - allow_sql: blocks for execute-sql action + """ + processor = ConfigPermissionProcessor(datasette, actor, action) + result = processor.process() + + if result is None: + return [] + + return [result] diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py new file mode 100644 index 00000000..f5a6a270 --- /dev/null +++ b/datasette/default_permissions/defaults.py @@ -0,0 +1,70 @@ +""" +Default permission settings for Datasette. + +Provides default allow rules for standard view/execute actions. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +# Actions that are allowed by default (unless --default-deny is used) +DEFAULT_ALLOW_ACTIONS = frozenset( + { + "view-instance", + "view-database", + "view-database-download", + "view-table", + "view-query", + "execute-sql", + } +) + + +@hookimpl(specname="permission_resources_sql") +async def default_allow_sql_check( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Enforce the default_allow_sql setting. + + When default_allow_sql is false (the default), execute-sql is denied + unless explicitly allowed by config or other rules. + """ + if action == "execute-sql": + if not datasette.setting("default_allow_sql"): + return PermissionSQL.deny(reason="default_allow_sql is false") + + return None + + +@hookimpl(specname="permission_resources_sql") +async def default_action_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Provide default allow rules for standard view/execute actions. + + These defaults are skipped when datasette is started with --default-deny. + The restriction_sql mechanism (from actor_restrictions_sql) will still + filter these results if the actor has restrictions. + """ + if datasette.default_deny: + return None + + if action in DEFAULT_ALLOW_ACTIONS: + reason = f"default allow for {action}".replace("'", "''") + return PermissionSQL.allow(reason=reason) + + return None diff --git a/datasette/default_permissions/helpers.py b/datasette/default_permissions/helpers.py new file mode 100644 index 00000000..47e03569 --- /dev/null +++ b/datasette/default_permissions/helpers.py @@ -0,0 +1,85 @@ +""" +Shared helper utilities for default permission implementations. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette.permissions import PermissionSQL + + +def get_action_name_variants(datasette: "Datasette", action: str) -> Set[str]: + """ + Get all name variants for an action (full name and abbreviation). + + Example: + get_action_name_variants(ds, "view-table") -> {"view-table", "vt"} + """ + variants = {action} + action_obj = datasette.actions.get(action) + if action_obj and action_obj.abbr: + variants.add(action_obj.abbr) + return variants + + +def action_in_list(datasette: "Datasette", action: str, action_list: list) -> bool: + """Check if an action (or its abbreviation) is in a list.""" + return bool(get_action_name_variants(datasette, action).intersection(action_list)) + + +@dataclass +class PermissionRow: + """A single permission rule row.""" + + parent: Optional[str] + child: Optional[str] + allow: bool + reason: str + + +class PermissionRowCollector: + """Collects permission rows and converts them to PermissionSQL.""" + + def __init__(self, prefix: str = "row"): + self.rows: List[PermissionRow] = [] + self.prefix = prefix + + def add( + self, + parent: Optional[str], + child: Optional[str], + allow: Optional[bool], + reason: str, + if_not_none: bool = False, + ) -> None: + """Add a permission row. If if_not_none=True, only add if allow is not None.""" + if if_not_none and allow is None: + return + self.rows.append(PermissionRow(parent, child, allow, reason)) + + def to_permission_sql(self) -> Optional[PermissionSQL]: + """Convert collected rows to a PermissionSQL object.""" + if not self.rows: + return None + + parts = [] + params = {} + + for idx, row in enumerate(self.rows): + key = f"{self.prefix}_{idx}" + parts.append( + f"SELECT :{key}_parent AS parent, :{key}_child AS child, " + f":{key}_allow AS allow, :{key}_reason AS reason" + ) + params[f"{key}_parent"] = row.parent + params[f"{key}_child"] = row.child + params[f"{key}_allow"] = 1 if row.allow else 0 + params[f"{key}_reason"] = row.reason + + sql = "\nUNION ALL\n".join(parts) + return PermissionSQL(sql=sql, params=params) diff --git a/datasette/default_permissions/restrictions.py b/datasette/default_permissions/restrictions.py new file mode 100644 index 00000000..a22cd7e5 --- /dev/null +++ b/datasette/default_permissions/restrictions.py @@ -0,0 +1,195 @@ +""" +Actor restriction handling for Datasette permissions. + +This module handles the _r (restrictions) key in actor dictionaries, which +contains allowlists of resources the actor can access. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + +from .helpers import action_in_list, get_action_name_variants + + +@dataclass +class ActorRestrictions: + """Parsed actor restrictions from the _r key.""" + + global_actions: List[str] # _r.a - globally allowed actions + database_actions: dict # _r.d - {db_name: [actions]} + table_actions: dict # _r.r - {db_name: {table: [actions]}} + + @classmethod + def from_actor(cls, actor: Optional[dict]) -> Optional["ActorRestrictions"]: + """Parse restrictions from actor dict. Returns None if no restrictions.""" + if not actor: + return None + assert isinstance(actor, dict), "actor must be a dictionary" + + restrictions = actor.get("_r") + if restrictions is None: + return None + + return cls( + global_actions=restrictions.get("a", []), + database_actions=restrictions.get("d", {}), + table_actions=restrictions.get("r", {}), + ) + + def is_action_globally_allowed(self, datasette: "Datasette", action: str) -> bool: + """Check if action is in the global allowlist.""" + return action_in_list(datasette, action, self.global_actions) + + def get_allowed_databases(self, datasette: "Datasette", action: str) -> Set[str]: + """Get database names where this action is allowed.""" + allowed = set() + for db_name, db_actions in self.database_actions.items(): + if action_in_list(datasette, action, db_actions): + allowed.add(db_name) + return allowed + + def get_allowed_tables( + self, datasette: "Datasette", action: str + ) -> Set[Tuple[str, str]]: + """Get (database, table) pairs where this action is allowed.""" + allowed = set() + for db_name, tables in self.table_actions.items(): + for table_name, table_actions in tables.items(): + if action_in_list(datasette, action, table_actions): + allowed.add((db_name, table_name)) + return allowed + + +@hookimpl(specname="permission_resources_sql") +async def actor_restrictions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Handle actor restriction-based permission rules. + + When an actor has an "_r" key, it contains an allowlist of resources they + can access. This function returns restriction_sql that filters the final + results to only include resources in that allowlist. + + The _r structure: + { + "a": ["vi", "pd"], # Global actions allowed + "d": {"mydb": ["vt", "es"]}, # Database-level actions + "r": {"mydb": {"users": ["vt"]}} # Table-level actions + } + """ + if not actor: + return None + + restrictions = ActorRestrictions.from_actor(actor) + + if restrictions is None: + # No restrictions - all resources allowed + return [] + + # If globally allowed, no filtering needed + if restrictions.is_action_globally_allowed(datasette, action): + return [] + + # Build restriction SQL + allowed_dbs = restrictions.get_allowed_databases(datasette, action) + allowed_tables = restrictions.get_allowed_tables(datasette, action) + + # If nothing is allowed for this action, return empty-set restriction + if not allowed_dbs and not allowed_tables: + return [ + PermissionSQL( + params={"deny": f"actor restrictions: {action} not in allowlist"}, + restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", + ) + ] + + # Build UNION of allowed resources + selects = [] + params = {} + counter = 0 + + # Database-level entries (parent, NULL) - allows all children + for db_name in allowed_dbs: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, NULL AS child") + params[f"{key}_parent"] = db_name + + # Table-level entries (parent, child) + for db_name, table_name in allowed_tables: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, :{key}_child AS child") + params[f"{key}_parent"] = db_name + params[f"{key}_child"] = table_name + + restriction_sql = "\nUNION ALL\n".join(selects) + + return [PermissionSQL(params=params, restriction_sql=restriction_sql)] + + +def restrictions_allow_action( + datasette: "Datasette", + restrictions: dict, + action: str, + resource: Optional[str | Tuple[str, str]], +) -> bool: + """ + Check if restrictions allow the requested action on the requested resource. + + This is a synchronous utility function for use by other code that needs + to quickly check restriction allowlists. + + Args: + datasette: The Datasette instance + restrictions: The _r dict from an actor + action: The action name to check + resource: None for global, str for database, (db, table) tuple for table + + Returns: + True if allowed, False if denied + """ + # Does this action have an abbreviation? + to_check = get_action_name_variants(datasette, action) + + # Check global level (any resource) + all_allowed = restrictions.get("a") + if all_allowed is not None: + assert isinstance(all_allowed, list) + if to_check.intersection(all_allowed): + return True + + # Check database level + if resource: + if isinstance(resource, str): + database_name = resource + else: + database_name = resource[0] + database_allowed = restrictions.get("d", {}).get(database_name) + if database_allowed is not None: + assert isinstance(database_allowed, list) + if to_check.intersection(database_allowed): + return True + + # Check table/resource level + if resource is not None and not isinstance(resource, str) and len(resource) == 2: + database, table = resource + table_allowed = restrictions.get("r", {}).get(database, {}).get(table) + if table_allowed is not None: + assert isinstance(table_allowed, list) + if to_check.intersection(table_allowed): + return True + + # This action is not explicitly allowed, so reject it + return False diff --git a/datasette/default_permissions/root.py b/datasette/default_permissions/root.py new file mode 100644 index 00000000..4931f7ff --- /dev/null +++ b/datasette/default_permissions/root.py @@ -0,0 +1,29 @@ +""" +Root user permission handling for Datasette. + +Grants full permissions to the root user when --root flag is used. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +@hookimpl(specname="permission_resources_sql") +async def root_user_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], +) -> Optional[PermissionSQL]: + """ + Grant root user full permissions when --root flag is used. + """ + if not datasette.root_enabled: + return None + if actor is not None and actor.get("id") == "root": + return PermissionSQL.allow(reason="root user") diff --git a/datasette/default_permissions/tokens.py b/datasette/default_permissions/tokens.py new file mode 100644 index 00000000..474b0c23 --- /dev/null +++ b/datasette/default_permissions/tokens.py @@ -0,0 +1,95 @@ +""" +Token authentication for Datasette. + +Handles signed API tokens (dstok_ prefix). +""" + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +import itsdangerous + +from datasette import hookimpl + + +@hookimpl(specname="actor_from_request") +def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dict]: + """ + Authenticate requests using signed API tokens (dstok_ prefix). + + Token structure (signed JSON): + { + "a": "actor_id", # Actor ID + "t": 1234567890, # Timestamp (Unix epoch) + "d": 3600, # Optional: Duration in seconds + "_r": {...} # Optional: Restrictions + } + """ + prefix = "dstok_" + + # Check if tokens are enabled + if not datasette.setting("allow_signed_tokens"): + return None + + max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") + + # Get authorization header + authorization = request.headers.get("authorization") + if not authorization: + return None + if not authorization.startswith("Bearer "): + return None + + token = authorization[len("Bearer ") :] + if not token.startswith(prefix): + return None + + # Remove prefix and verify signature + token = token[len(prefix) :] + try: + decoded = datasette.unsign(token, namespace="token") + except itsdangerous.BadSignature: + return None + + # Validate timestamp + if "t" not in decoded: + return None + created = decoded["t"] + if not isinstance(created, int): + return None + + # Handle duration/expiry + duration = decoded.get("d") + if duration is not None and not isinstance(duration, int): + return None + + # Apply max TTL if configured + if (duration is None and max_signed_tokens_ttl) or ( + duration is not None + and max_signed_tokens_ttl + and duration > max_signed_tokens_ttl + ): + duration = max_signed_tokens_ttl + + # Check expiry + if duration: + if time.time() - created > duration: + return None + + # Build actor dict + actor = {"id": decoded["a"], "token": "dstok"} + + # Copy restrictions if present + if "_r" in decoded: + actor["_r"] = decoded["_r"] + + # Add expiry timestamp if applicable + if duration: + actor["token_expires"] = created + duration + + return actor diff --git a/datasette/events.py b/datasette/events.py new file mode 100644 index 00000000..5cd5ba3d --- /dev/null +++ b/datasette/events.py @@ -0,0 +1,235 @@ +from abc import ABC, abstractproperty +from dataclasses import asdict, dataclass, field +from datasette.hookspecs import hookimpl +from datetime import datetime, timezone + + +@dataclass +class Event(ABC): + @abstractproperty + def name(self): + pass + + created: datetime = field( + init=False, default_factory=lambda: datetime.now(timezone.utc) + ) + actor: dict | None + + def properties(self): + properties = asdict(self) + properties.pop("actor", None) + properties.pop("created", None) + return properties + + +@dataclass +class LoginEvent(Event): + """ + Event name: ``login`` + + A user (represented by ``event.actor``) has logged in. + """ + + name = "login" + + +@dataclass +class LogoutEvent(Event): + """ + Event name: ``logout`` + + A user (represented by ``event.actor``) has logged out. + """ + + name = "logout" + + +@dataclass +class CreateTokenEvent(Event): + """ + Event name: ``create-token`` + + A user created an API token. + + :ivar expires_after: Number of seconds after which this token will expire. + :type expires_after: int or None + :ivar restrict_all: Restricted permissions for this token. + :type restrict_all: list + :ivar restrict_database: Restricted database permissions for this token. + :type restrict_database: dict + :ivar restrict_resource: Restricted resource permissions for this token. + :type restrict_resource: dict + """ + + name = "create-token" + expires_after: int | None + restrict_all: list + restrict_database: dict + restrict_resource: dict + + +@dataclass +class CreateTableEvent(Event): + """ + Event name: ``create-table`` + + A new table has been created in the database. + + :ivar database: The name of the database where the table was created. + :type database: str + :ivar table: The name of the table that was created + :type table: str + :ivar schema: The SQL schema definition for the new table. + :type schema: str + """ + + name = "create-table" + database: str + table: str + schema: str + + +@dataclass +class DropTableEvent(Event): + """ + Event name: ``drop-table`` + + A table has been dropped from the database. + + :ivar database: The name of the database where the table was dropped. + :type database: str + :ivar table: The name of the table that was dropped + :type table: str + """ + + name = "drop-table" + database: str + table: str + + +@dataclass +class AlterTableEvent(Event): + """ + Event name: ``alter-table`` + + A table has been altered. + + :ivar database: The name of the database where the table was altered + :type database: str + :ivar table: The name of the table that was altered + :type table: str + :ivar before_schema: The table's SQL schema before the alteration + :type before_schema: str + :ivar after_schema: The table's SQL schema after the alteration + :type after_schema: str + """ + + name = "alter-table" + database: str + table: str + before_schema: str + after_schema: str + + +@dataclass +class InsertRowsEvent(Event): + """ + Event name: ``insert-rows`` + + Rows were inserted into a table. + + :ivar database: The name of the database where the rows were inserted. + :type database: str + :ivar table: The name of the table where the rows were inserted. + :type table: str + :ivar num_rows: The number of rows that were requested to be inserted. + :type num_rows: int + :ivar ignore: Was ignore set? + :type ignore: bool + :ivar replace: Was replace set? + :type replace: bool + """ + + name = "insert-rows" + database: str + table: str + num_rows: int + ignore: bool + replace: bool + + +@dataclass +class UpsertRowsEvent(Event): + """ + Event name: ``upsert-rows`` + + Rows were upserted into a table. + + :ivar database: The name of the database where the rows were inserted. + :type database: str + :ivar table: The name of the table where the rows were inserted. + :type table: str + :ivar num_rows: The number of rows that were requested to be inserted. + :type num_rows: int + """ + + name = "upsert-rows" + database: str + table: str + num_rows: int + + +@dataclass +class UpdateRowEvent(Event): + """ + Event name: ``update-row`` + + A row was updated in a table. + + :ivar database: The name of the database where the row was updated. + :type database: str + :ivar table: The name of the table where the row was updated. + :type table: str + :ivar pks: The primary key values of the updated row. + """ + + name = "update-row" + database: str + table: str + pks: list + + +@dataclass +class DeleteRowEvent(Event): + """ + Event name: ``delete-row`` + + A row was deleted from a table. + + :ivar database: The name of the database where the row was deleted. + :type database: str + :ivar table: The name of the table where the row was deleted. + :type table: str + :ivar pks: The primary key values of the deleted row. + """ + + name = "delete-row" + database: str + table: str + pks: list + + +@hookimpl +def register_events(): + return [ + LoginEvent, + LogoutEvent, + CreateTableEvent, + CreateTokenEvent, + AlterTableEvent, + DropTableEvent, + InsertRowsEvent, + UpsertRowsEvent, + UpdateRowEvent, + DeleteRowEvent, + ] diff --git a/datasette/facets.py b/datasette/facets.py index 8ad5a423..dd149424 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -1,6 +1,5 @@ import json import urllib -import re from datasette import hookimpl from datasette.database import QueryInterrupted from datasette.utils import ( @@ -8,13 +7,12 @@ from datasette.utils import ( path_with_added_args, path_with_removed_args, detect_json1, - InvalidSql, sqlite3, ) -def load_facet_configs(request, table_metadata): - # Given a request and the metadata configuration for a table, return +def load_facet_configs(request, table_config): + # Given a request and the configuration for a table, return # a dictionary of selected facets, their lists of configs and for each # config whether it came from the request or the metadata. # @@ -22,21 +20,21 @@ def load_facet_configs(request, table_metadata): # {"source": "metadata", "config": config1}, # {"source": "request", "config": config2}]} facet_configs = {} - table_metadata = table_metadata or {} - metadata_facets = table_metadata.get("facets", []) - for metadata_config in metadata_facets: - if isinstance(metadata_config, str): + table_config = table_config or {} + table_facet_configs = table_config.get("facets", []) + for facet_config in table_facet_configs: + if isinstance(facet_config, str): type = "column" - metadata_config = {"simple": metadata_config} + facet_config = {"simple": facet_config} else: assert ( - len(metadata_config.values()) == 1 + len(facet_config.values()) == 1 ), "Metadata config dicts should be {type: config}" - type, metadata_config = metadata_config.items()[0] - if isinstance(metadata_config, str): - metadata_config = {"simple": metadata_config} + type, facet_config = list(facet_config.items())[0] + if isinstance(facet_config, str): + facet_config = {"simple": facet_config} facet_configs.setdefault(type, []).append( - {"source": "metadata", "config": metadata_config} + {"source": "metadata", "config": facet_config} ) qs_pairs = urllib.parse.parse_qs(request.query_string, keep_blank_values=True) for key, values in qs_pairs.items(): @@ -47,13 +45,12 @@ def load_facet_configs(request, table_metadata): elif key.startswith("_facet_"): type = key[len("_facet_") :] for value in values: - # The value is the config - either JSON or not - if value.startswith("{"): - config = json.loads(value) - else: - config = {"simple": value} + # The value is the facet_config - either JSON or not + facet_config = ( + json.loads(value) if value.startswith("{") else {"simple": value} + ) facet_configs.setdefault(type, []).append( - {"source": "request", "config": config} + {"source": "request", "config": facet_config} ) return facet_configs @@ -68,6 +65,8 @@ def register_facet_classes(): class Facet: type = None + # How many rows to consider when suggesting facets: + suggest_consider = 1000 def __init__( self, @@ -77,7 +76,7 @@ class Facet: sql=None, table=None, params=None, - metadata=None, + table_config=None, row_count=None, ): assert table or sql, "Must provide either table= or sql=" @@ -88,12 +87,12 @@ class Facet: self.table = table self.sql = sql or f"select * from [{table}]" self.params = params or [] - self.metadata = metadata + self.table_config = table_config # row_count can be None, in which case we calculate it ourselves: self.row_count = row_count def get_configs(self): - configs = load_facet_configs(self.request, self.metadata) + configs = load_facet_configs(self.request, self.table_config) return configs.get(self.type) or [] def get_querystring_pairs(self): @@ -101,6 +100,36 @@ class Facet: # [('_foo', 'bar'), ('_foo', '2'), ('empty', '')] return urllib.parse.parse_qsl(self.request.query_string, keep_blank_values=True) + def get_facet_size(self): + facet_size = self.ds.setting("default_facet_size") + max_returned_rows = self.ds.setting("max_returned_rows") + table_facet_size = None + if self.table: + config_facet_size = ( + self.ds.config.get("databases", {}) + .get(self.database, {}) + .get("tables", {}) + .get(self.table, {}) + .get("facet_size") + ) + if config_facet_size: + table_facet_size = config_facet_size + custom_facet_size = self.request.args.get("_facet_size") + if custom_facet_size: + if custom_facet_size == "max": + facet_size = max_returned_rows + elif custom_facet_size.isdigit(): + facet_size = int(custom_facet_size) + else: + # Invalid value, ignore it + custom_facet_size = None + if table_facet_size and not custom_facet_size: + if table_facet_size == "max": + facet_size = max_returned_rows + else: + facet_size = table_facet_size + return min(facet_size, max_returned_rows) + async def suggest(self): return [] @@ -118,17 +147,6 @@ class Facet: ) ).columns - async def get_row_count(self): - if self.row_count is None: - self.row_count = ( - await self.ds.execute( - self.database, - f"select count(*) from ({self.sql})", - self.params, - ) - ).rows[0][0] - return self.row_count - class ColumnFacet(Facet): type = "column" @@ -136,20 +154,23 @@ class ColumnFacet(Facet): async def suggest(self): row_count = await self.get_row_count() columns = await self.get_columns(self.sql, self.params) - facet_size = self.ds.setting("default_facet_size") + facet_size = self.get_facet_size() suggested_facets = [] already_enabled = [c["config"]["simple"] for c in self.get_configs()] for column in columns: if column in already_enabled: continue suggested_facet_sql = """ - select {column}, count(*) as n from ( - {sql} - ) where {column} is not null - group by {column} + with limited as (select * from ({sql}) limit {suggest_consider}) + select {column} as value, count(*) as n from limited + where value is not null + group by value limit {limit} """.format( - column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + column=escape_sqlite(column), + sql=self.sql, + limit=facet_size + 1, + suggest_consider=self.suggest_consider, ) distinct_values = None try: @@ -162,10 +183,8 @@ class ColumnFacet(Facet): ) num_distinct_values = len(distinct_values) if ( - num_distinct_values - and num_distinct_values > 1 + 1 < num_distinct_values < row_count and num_distinct_values <= facet_size - and num_distinct_values < row_count # And at least one has n > 1 and any(r["n"] > 1 for r in distinct_values) ): @@ -174,7 +193,11 @@ class ColumnFacet(Facet): "name": column, "toggle_url": self.ds.absolute_url( self.request, - path_with_added_args(self.request, {"_facet": column}), + self.ds.urls.path( + path_with_added_args( + self.request, {"_facet": column} + ) + ), ), } ) @@ -182,13 +205,24 @@ class ColumnFacet(Facet): continue return suggested_facets + async def get_row_count(self): + if self.row_count is None: + self.row_count = ( + await self.ds.execute( + self.database, + f"select count(*) from (select * from ({self.sql}) limit {self.suggest_consider})", + self.params, + ) + ).rows[0][0] + return self.row_count + async def facet_results(self): - facet_results = {} + facet_results = [] facets_timed_out = [] qs_pairs = self.get_querystring_pairs() - facet_size = self.ds.setting("default_facet_size") + facet_size = self.get_facet_size() for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] @@ -211,34 +245,39 @@ class ColumnFacet(Facet): custom_time_limit=self.ds.setting("facet_time_limit_ms"), ) facet_results_values = [] - facet_results[column] = { - "name": column, - "type": self.type, - "hideable": source != "metadata", - "toggle_url": path_with_removed_args( - self.request, {"_facet": column} - ), - "results": facet_results_values, - "truncated": len(facet_rows_results) > facet_size, - } + facet_results.append( + { + "name": column, + "type": self.type, + "hideable": source != "metadata", + "toggle_url": self.ds.urls.path( + path_with_removed_args(self.request, {"_facet": column}) + ), + "results": facet_results_values, + "truncated": len(facet_rows_results) > facet_size, + } + ) facet_rows = facet_rows_results.rows[:facet_size] if self.table: # Attempt to expand foreign keys into labels values = [row["value"] for row in facet_rows] expanded = await self.ds.expand_foreign_keys( - self.database, self.table, column, values + self.request.actor, self.database, self.table, column, values ) else: expanded = {} for row in facet_rows: - selected = (column, str(row["value"])) in qs_pairs + column_qs = column + if column.startswith("_"): + column_qs = "{}__exact".format(column) + selected = (column_qs, str(row["value"])) in qs_pairs if selected: toggle_path = path_with_removed_args( - self.request, {column: str(row["value"])} + self.request, {column_qs: str(row["value"])} ) else: toggle_path = path_with_added_args( - self.request, {column: row["value"]} + self.request, {column_qs: row["value"]} ) facet_results_values.append( { @@ -246,7 +285,7 @@ class ColumnFacet(Facet): "label": expanded.get((column, row["value"]), row["value"]), "count": row["count"], "toggle_url": self.ds.absolute_url( - self.request, toggle_path + self.request, self.ds.urls.path(toggle_path) ), "selected": selected, } @@ -279,10 +318,14 @@ class ArrayFacet(Facet): continue # Is every value in this column either null or a JSON array? suggested_facet_sql = """ + with limited as (select * from ({sql}) limit {suggest_consider}) select distinct json_type({column}) - from ({sql}) + from limited + where {column} is not null and {column} != '' """.format( - column=escape_sqlite(column), sql=self.sql + column=escape_sqlite(column), + sql=self.sql, + suggest_consider=self.suggest_consider, ) try: results = await self.ds.execute( @@ -295,14 +338,18 @@ class ArrayFacet(Facet): ) types = tuple(r[0] for r in results.rows) if types in (("array",), ("array", None)): - # Now sanity check that first 100 arrays contain only strings + # Now check that first 100 arrays contain only strings first_100 = [ v[0] for v in await self.ds.execute( self.database, - "select {column} from ({sql}) where {column} is not null and json_array_length({column}) > 0 limit 100".format( - column=escape_sqlite(column), sql=self.sql - ), + ( + "select {column} from ({sql}) " + "where {column} is not null " + "and {column} != '' " + "and json_array_length({column}) > 0 " + "limit 100" + ).format(column=escape_sqlite(column), sql=self.sql), self.params, truncate=False, custom_time_limit=self.ds.setting( @@ -320,8 +367,10 @@ class ArrayFacet(Facet): "type": "array", "toggle_url": self.ds.absolute_url( self.request, - path_with_added_args( - self.request, {"_facet_array": column} + self.ds.urls.path( + path_with_added_args( + self.request, {"_facet_array": column} + ) ), ), } @@ -332,21 +381,38 @@ class ArrayFacet(Facet): async def facet_results(self): # self.configs should be a plain list of columns - facet_results = {} + facet_results = [] facets_timed_out = [] - facet_size = self.ds.setting("default_facet_size") + facet_size = self.get_facet_size() for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] column = config.get("column") or config["simple"] + # https://github.com/simonw/datasette/issues/448 facet_sql = """ - select j.value as value, count(*) as count from ( - {sql} - ) join json_each({col}) j - group by j.value order by count desc, value limit {limit} + with inner as ({sql}), + deduped_array_items as ( + select + distinct j.value, + inner.* + from + json_each([inner].{col}) j + join inner + ) + select + value as value, + count(*) as count + from + deduped_array_items + group by + value + order by + count(*) desc, value limit {limit} """.format( - col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + col=escape_sqlite(column), + sql=self.sql, + limit=facet_size + 1, ) try: facet_rows_results = await self.ds.execute( @@ -357,16 +423,20 @@ class ArrayFacet(Facet): custom_time_limit=self.ds.setting("facet_time_limit_ms"), ) facet_results_values = [] - facet_results[column] = { - "name": column, - "type": self.type, - "results": facet_results_values, - "hideable": source != "metadata", - "toggle_url": path_with_removed_args( - self.request, {"_facet_array": column} - ), - "truncated": len(facet_rows_results) > facet_size, - } + facet_results.append( + { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": self.ds.urls.path( + path_with_removed_args( + self.request, {"_facet_array": column} + ) + ), + "truncated": len(facet_rows_results) > facet_size, + } + ) facet_rows = facet_rows_results.rows[:facet_size] pairs = self.get_querystring_pairs() for row in facet_rows: @@ -410,8 +480,8 @@ class DateFacet(Facet): # Does this column contain any dates in the first 100 rows? suggested_facet_sql = """ select date({column}) from ( - {sql} - ) where {column} glob "????-??-*" limit 100; + select * from ({sql}) limit 100 + ) where {column} glob "????-??-*" """.format( column=escape_sqlite(column), sql=self.sql ) @@ -432,8 +502,10 @@ class DateFacet(Facet): "type": "date", "toggle_url": self.ds.absolute_url( self.request, - path_with_added_args( - self.request, {"_facet_date": column} + self.ds.urls.path( + path_with_added_args( + self.request, {"_facet_date": column} + ) ), ), } @@ -443,10 +515,10 @@ class DateFacet(Facet): return suggested_facets async def facet_results(self): - facet_results = {} + facet_results = [] facets_timed_out = [] args = dict(self.get_querystring_pairs()) - facet_size = self.ds.setting("default_facet_size") + facet_size = self.get_facet_size() for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] @@ -470,16 +542,18 @@ class DateFacet(Facet): custom_time_limit=self.ds.setting("facet_time_limit_ms"), ) facet_results_values = [] - facet_results[column] = { - "name": column, - "type": self.type, - "results": facet_results_values, - "hideable": source != "metadata", - "toggle_url": path_with_removed_args( - self.request, {"_facet_date": column} - ), - "truncated": len(facet_rows_results) > facet_size, - } + facet_results.append( + { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": path_with_removed_args( + self.request, {"_facet_date": column} + ), + "truncated": len(facet_rows_results) > facet_size, + } + ) facet_rows = facet_rows_results.rows[:facet_size] for row in facet_rows: selected = str(args.get(f"{column}__date")) == str(row["value"]) diff --git a/datasette/filters.py b/datasette/filters.py index edf2de99..95cc5f37 100644 --- a/datasette/filters.py +++ b/datasette/filters.py @@ -1,7 +1,173 @@ +from datasette import hookimpl +from datasette.resources import DatabaseResource +from datasette.views.base import DatasetteError +from datasette.utils.asgi import BadRequest import json -import numbers +from .utils import detect_json1, escape_sqlite, path_with_removed_args -from .utils import detect_json1, escape_sqlite + +@hookimpl(specname="filters_from_request") +def where_filters(request, database, datasette): + # This one deals with ?_where= + async def inner(): + where_clauses = [] + extra_wheres_for_ui = [] + if "_where" in request.args: + if not await datasette.allowed( + action="execute-sql", + resource=DatabaseResource(database=database), + actor=request.actor, + ): + raise DatasetteError("_where= is not allowed", status=403) + else: + where_clauses.extend(request.args.getlist("_where")) + extra_wheres_for_ui = [ + { + "text": text, + "remove_url": path_with_removed_args(request, {"_where": text}), + } + for text in request.args.getlist("_where") + ] + + return FilterArguments( + where_clauses, + extra_context={ + "extra_wheres_for_ui": extra_wheres_for_ui, + }, + ) + + return inner + + +@hookimpl(specname="filters_from_request") +def search_filters(request, database, table, datasette): + # ?_search= and _search_colname= + async def inner(): + where_clauses = [] + params = {} + human_descriptions = [] + extra_context = {} + + # Figure out which fts_table to use + table_metadata = await datasette.table_config(database, table) + db = datasette.get_database(database) + fts_table = request.args.get("_fts_table") + fts_table = fts_table or table_metadata.get("fts_table") + fts_table = fts_table or await db.fts_table(table) + fts_pk = request.args.get("_fts_pk", table_metadata.get("fts_pk", "rowid")) + search_args = { + key: request.args[key] + for key in request.args + if key.startswith("_search") and key != "_searchmode" + } + search = "" + search_mode_raw = table_metadata.get("searchmode") == "raw" + # Or set search mode from the querystring + qs_searchmode = request.args.get("_searchmode") + if qs_searchmode == "escaped": + search_mode_raw = False + if qs_searchmode == "raw": + search_mode_raw = True + + extra_context["supports_search"] = bool(fts_table) + + if fts_table and search_args: + if "_search" in search_args: + # Simple ?_search=xxx + search = search_args["_search"] + where_clauses.append( + "{fts_pk} in (select rowid from {fts_table} where {fts_table} match {match_clause})".format( + fts_table=escape_sqlite(fts_table), + fts_pk=escape_sqlite(fts_pk), + match_clause=( + ":search" if search_mode_raw else "escape_fts(:search)" + ), + ) + ) + human_descriptions.append(f'search matches "{search}"') + params["search"] = search + extra_context["search"] = search + else: + # More complex: search against specific columns + for i, (key, search_text) in enumerate(search_args.items()): + search_col = key.split("_search_", 1)[1] + if search_col not in await db.table_columns(fts_table): + raise BadRequest("Cannot search by that column") + + where_clauses.append( + "rowid in (select rowid from {fts_table} where {search_col} match {match_clause})".format( + fts_table=escape_sqlite(fts_table), + search_col=escape_sqlite(search_col), + match_clause=( + ":search_{}".format(i) + if search_mode_raw + else "escape_fts(:search_{})".format(i) + ), + ) + ) + human_descriptions.append( + f'search column "{search_col}" matches "{search_text}"' + ) + params[f"search_{i}"] = search_text + extra_context["search"] = search_text + + return FilterArguments(where_clauses, params, human_descriptions, extra_context) + + return inner + + +@hookimpl(specname="filters_from_request") +def through_filters(request, database, table, datasette): + # ?_search= and _search_colname= + async def inner(): + where_clauses = [] + params = {} + human_descriptions = [] + extra_context = {} + + # Support for ?_through={table, column, value} + if "_through" in request.args: + for through in request.args.getlist("_through"): + through_data = json.loads(through) + through_table = through_data["table"] + other_column = through_data["column"] + value = through_data["value"] + db = datasette.get_database(database) + outgoing_foreign_keys = await db.foreign_keys_for_table(through_table) + try: + fk_to_us = [ + fk for fk in outgoing_foreign_keys if fk["other_table"] == table + ][0] + except IndexError: + raise DatasetteError( + "Invalid _through - could not find corresponding foreign key" + ) + param = f"p{len(params)}" + where_clauses.append( + "{our_pk} in (select {our_column} from {through_table} where {other_column} = :{param})".format( + through_table=escape_sqlite(through_table), + our_pk=escape_sqlite(fk_to_us["other_column"]), + our_column=escape_sqlite(fk_to_us["column"]), + other_column=escape_sqlite(other_column), + param=param, + ) + ) + params[param] = value + human_descriptions.append(f'{through_table}.{other_column} = "{value}"') + + return FilterArguments(where_clauses, params, human_descriptions, extra_context) + + return inner + + +class FilterArguments: + def __init__( + self, where_clauses, params=None, human_descriptions=None, extra_context=None + ): + self.where_clauses = where_clauses + self.params = params or {} + self.human_descriptions = human_descriptions or [] + self.extra_context = extra_context or {} class Filter: @@ -114,6 +280,13 @@ class Filters: '{c} contains "{v}"', format="%{}%", ), + TemplatedFilter( + "notcontains", + "does not contain", + '"{c}" not like :{p}', + '{c} does not contain "{v}"', + format="%{}%", + ), TemplatedFilter( "endswith", "ends with", @@ -149,19 +322,13 @@ class Filters: TemplatedFilter( "arraycontains", "array contains", - """rowid in ( - select {t}.rowid from {t}, json_each({t}.{c}) j - where j.value = :{p} - )""", + """:{p} in (select value from json_each([{t}].[{c}]))""", '{c} contains "{v}"', ), TemplatedFilter( "arraynotcontains", "array does not contain", - """rowid not in ( - select {t}.rowid from {t}, json_each({t}.{c}) j - where j.value = :{p} - )""", + """:{p} not in (select value from json_each([{t}].[{c}]))""", '{c} does not contain "{v}"', ), ] @@ -200,15 +367,11 @@ class Filters: ) _filters_by_key = {f.key: f for f in _filters} - def __init__(self, pairs, units=None, ureg=None): - if units is None: - units = {} + def __init__(self, pairs): self.pairs = pairs - self.units = units - self.ureg = ureg def lookups(self): - "Yields (lookup, display, no_argument) pairs" + """Yields (lookup, display, no_argument) pairs""" for filter in self._filters: yield filter.key, filter.display, filter.no_argument @@ -233,7 +396,7 @@ class Filters: return f"where {s}" def selections(self): - "Yields (column, lookup, value) tuples" + """Yields (column, lookup, value) tuples""" for key, value in self.pairs: if "__" in key: column, lookup = key.rsplit("__", 1) @@ -245,20 +408,6 @@ class Filters: def has_selections(self): return bool(self.pairs) - def convert_unit(self, column, value): - "If the user has provided a unit in the query, convert it into the column unit, if present." - if column not in self.units: - return value - - # Try to interpret the value as a unit - value = self.ureg(value) - if isinstance(value, numbers.Number): - # It's just a bare number, assume it's the column unit - return value - - column_unit = self.ureg(self.units[column]) - return value.to(column_unit).magnitude - def build_where_clauses(self, table): sql_bits = [] params = {} @@ -266,9 +415,7 @@ class Filters: for column, lookup, value in self.selections(): filter = self._filters_by_key.get(lookup, None) if filter: - sql_bit, param = filter.where_clause( - table, column, self.convert_unit(column, value), i - ) + sql_bit, param = filter.where_clause(table, column, value, i) sql_bits.append(sql_bit) if param is not None: if not isinstance(param, list): diff --git a/datasette/forbidden.py b/datasette/forbidden.py new file mode 100644 index 00000000..41c48396 --- /dev/null +++ b/datasette/forbidden.py @@ -0,0 +1,19 @@ +from datasette import hookimpl, Response + + +@hookimpl(trylast=True) +def forbidden(datasette, request, message): + async def inner(): + return Response.html( + await datasette.render_template( + "error.html", + { + "title": "Forbidden", + "error": message, + }, + request=request, + ), + status=403, + ) + + return inner diff --git a/datasette/handle_exception.py b/datasette/handle_exception.py new file mode 100644 index 00000000..96398a4c --- /dev/null +++ b/datasette/handle_exception.py @@ -0,0 +1,77 @@ +from datasette import hookimpl, Response +from .utils import add_cors_headers +from .utils.asgi import ( + Base400, +) +from .views.base import DatasetteError +from markupsafe import Markup +import traceback + +try: + import ipdb as pdb +except ImportError: + import pdb + +try: + import rich +except ImportError: + rich = None + + +@hookimpl(trylast=True) +def handle_exception(datasette, request, exception): + async def inner(): + if datasette.pdb: + pdb.post_mortem(exception.__traceback__) + + if rich is not None: + rich.get_console().print_exception(show_locals=True) + + title = None + if isinstance(exception, Base400): + status = exception.status + info = {} + message = exception.args[0] + elif isinstance(exception, DatasetteError): + status = exception.status + info = exception.error_dict + message = exception.message + if exception.message_is_html: + message = Markup(message) + title = exception.title + else: + status = 500 + info = {} + message = str(exception) + traceback.print_exc() + templates = [f"{status}.html", "error.html"] + info.update( + { + "ok": False, + "error": message, + "status": status, + "title": title, + } + ) + headers = {} + if datasette.cors: + add_cors_headers(headers) + if request.path.split("?")[0].endswith(".json"): + return Response.json(info, status=status, headers=headers) + else: + environment = datasette.get_jinja_environment(request) + template = environment.select_template(templates) + return Response.html( + await template.render_async( + dict( + info, + urls=datasette.urls, + app_css_hash=datasette.app_css_hash(), + menu_links=lambda: [], + ) + ), + status=status, + headers=headers, + ) + + return inner diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index a305ca6a..3f6a1425 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -7,108 +7,216 @@ hookimpl = HookimplMarker("datasette") @hookspec def startup(datasette): - "Fires directly after Datasette first starts running" + """Fires directly after Datasette first starts running""" @hookspec def asgi_wrapper(datasette): - "Returns an ASGI middleware callable to wrap our ASGI application with" + """Returns an ASGI middleware callable to wrap our ASGI application with""" @hookspec def prepare_connection(conn, database, datasette): - "Modify SQLite connection in some way e.g. register custom SQL functions" + """Modify SQLite connection in some way e.g. register custom SQL functions""" @hookspec -def prepare_jinja2_environment(env): - "Modify Jinja2 template environment e.g. register custom template tags" +def prepare_jinja2_environment(env, datasette): + """Modify Jinja2 template environment e.g. register custom template tags""" @hookspec def extra_css_urls(template, database, table, columns, view_name, request, datasette): - "Extra CSS URLs added by this plugin" + """Extra CSS URLs added by this plugin""" @hookspec def extra_js_urls(template, database, table, columns, view_name, request, datasette): - "Extra JavaScript URLs added by this plugin" + """Extra JavaScript URLs added by this plugin""" @hookspec def extra_body_script( template, database, table, columns, view_name, request, datasette ): - "Extra JavaScript code to be included in - - - + diff --git a/datasette/templates/_codemirror_foot.html b/datasette/templates/_codemirror_foot.html index 4019d448..a624c8a4 100644 --- a/datasette/templates/_codemirror_foot.html +++ b/datasette/templates/_codemirror_foot.html @@ -1,37 +1,42 @@ diff --git a/datasette/templates/_crumbs.html b/datasette/templates/_crumbs.html new file mode 100644 index 00000000..bd1ff0da --- /dev/null +++ b/datasette/templates/_crumbs.html @@ -0,0 +1,15 @@ +{% macro nav(request, database=None, table=None) -%} +{% if crumb_items is defined %} + {% set items=crumb_items(request=request, database=database, table=table) %} + {% if items %} +

+ {% for item in items %} + {{ item.label }} + {% if not loop.last %} + / + {% endif %} + {% endfor %} +

+ {% endif %} +{% endif %} +{%- endmacro %} diff --git a/datasette/templates/_debug_common_functions.html b/datasette/templates/_debug_common_functions.html new file mode 100644 index 00000000..d988a2f3 --- /dev/null +++ b/datasette/templates/_debug_common_functions.html @@ -0,0 +1,50 @@ + diff --git a/datasette/templates/_description_source_license.html b/datasette/templates/_description_source_license.html index a2bc18f2..f852268f 100644 --- a/datasette/templates/_description_source_license.html +++ b/datasette/templates/_description_source_license.html @@ -1,6 +1,6 @@ -{% if metadata.description_html or metadata.description %} +{% if metadata.get("description_html") or metadata.get("description") %}
{% for column in display_columns %} - diff --git a/datasette/templates/allow_debug.html b/datasette/templates/allow_debug.html index 0f1b30f0..1ecc92df 100644 --- a/datasette/templates/allow_debug.html +++ b/datasette/templates/allow_debug.html @@ -33,9 +33,12 @@ p.message-warning {

Debug allow rules

+{% set current_tab = "allow_debug" %} +{% include "_permissions_debug_tabs.html" %} +

Use this tool to try out different actor and allow combinations. See Defining permissions with "allow" blocks for documentation.

- +

diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html new file mode 100644 index 00000000..dc393c20 --- /dev/null +++ b/datasette/templates/api_explorer.html @@ -0,0 +1,208 @@ +{% extends "base.html" %} + +{% block title %}API Explorer{% endblock %} + +{% block extra_head %} + +{% endblock %} + +{% block content %} + +

API Explorer{% if private %} 🔒{% endif %}

+ +

Use this tool to try out the + {% if datasette_version %} + Datasette API. + {% else %} + Datasette API. + {% endif %} +

+
+ GET + +
+ + + +
+ +
+
+ POST +
+
+ + +
+
+ + +
+

+ +
+ + + + + +{% if example_links %} +

API endpoints

+
    + {% for database in example_links %} +
  • Database: {{ database.name }}
  • +
      + {% for link in database.links %} +
    • {{ link.path }} - {{ link.label }}
    • + {% endfor %} + {% for table in database.tables %} +
    • {{ table.name }} +
        + {% for link in table.links %} +
      • {{ link.path }} - {{ link.label }}
      • + {% endfor %} +
      +
    • + {% endfor %} +
    + {% endfor %} +
+{% endif %} + +{% endblock %} diff --git a/datasette/templates/base.html b/datasette/templates/base.html index 3ed67164..0d89e11c 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -1,21 +1,27 @@ - - +{% import "_crumbs.html" as crumbs with context %} + {% block title %}{% endblock %} {% for url in extra_css_urls %} - + {% endfor %} + + {% for url in extra_js_urls %} - + {% endfor %} -{% block extra_head %}{% endblock %} +{%- if alternate_url_json -%} + +{%- endif -%} +{%- block extra_head %}{% endblock -%} -
+ {% if not column.sortable %} {{ column.name }} {% else %} {% if column.name == sort %} - {{ column.name }} ▼ + {{ column.name }} ▼ {% else %} - {{ column.name }}{% if column.name == sort_desc %} ▲{% endif %} + {{ column.name }}{% if column.name == sort_desc %} ▲{% endif %} {% endif %} {% endif %}
+ + + + + + + + + + + + + {% for action in data %} + + + + + + + + + + {% endfor %} + +
NameAbbrDescriptionResourceTakes ParentTakes ChildAlso Requires
{{ action.name }}{% if action.abbr %}{{ action.abbr }}{% endif %}{{ action.description or "" }}{% if action.resource_class %}{{ action.resource_class }}{% endif %}{% if action.takes_parent %}✓{% endif %}{% if action.takes_child %}✓{% endif %}{% if action.also_requires %}{{ action.also_requires }}{% endif %}
+ +{% endblock %} diff --git a/datasette/templates/debug_allowed.html b/datasette/templates/debug_allowed.html new file mode 100644 index 00000000..add3154a --- /dev/null +++ b/datasette/templates/debug_allowed.html @@ -0,0 +1,229 @@ +{% extends "base.html" %} + +{% block title %}Allowed Resources{% endblock %} + +{% block extra_head %} + +{% include "_permission_ui_styles.html" %} +{% include "_debug_common_functions.html" %} +{% endblock %} + +{% block content %} +

Allowed resources

+ +{% set current_tab = "allowed" %} +{% include "_permissions_debug_tabs.html" %} + +

Use this tool to check which resources the current actor is allowed to access for a given permission action. It queries the /-/allowed.json API endpoint.

+ +{% if request.actor %} +

Current actor: {{ request.actor.get("id", "anonymous") }}

+{% else %} +

Current actor: anonymous (not logged in)

+{% endif %} + +
+
+
+ + + Only certain actions are supported by this endpoint +
+ +
+ + + Filter results to a specific parent resource +
+ +
+ + + Filter results to a specific child resource (requires parent to be set) +
+ +
+ + + Number of results per page (max 200) +
+ +
+ +
+
+
+ + + + + +{% endblock %} diff --git a/datasette/templates/debug_check.html b/datasette/templates/debug_check.html new file mode 100644 index 00000000..c2e7997f --- /dev/null +++ b/datasette/templates/debug_check.html @@ -0,0 +1,270 @@ +{% extends "base.html" %} + +{% block title %}Permission Check{% endblock %} + +{% block extra_head %} + +{% include "_permission_ui_styles.html" %} +{% include "_debug_common_functions.html" %} + +{% endblock %} + +{% block content %} +

Permission check

+ +{% set current_tab = "check" %} +{% include "_permissions_debug_tabs.html" %} + +

Use this tool to test permission checks for the current actor. It queries the /-/check.json API endpoint.

+ +{% if request.actor %} +

Current actor: {{ request.actor.get("id", "anonymous") }}

+{% else %} +

Current actor: anonymous (not logged in)

+{% endif %} + +
+
+
+ + + The permission action to check +
+ +
+ + + For database-level permissions, specify the database name +
+ +
+ + + For table-level permissions, specify the table name (requires parent) +
+ +
+ +
+
+
+ + + + + +{% endblock %} diff --git a/datasette/templates/debug_permissions_playground.html b/datasette/templates/debug_permissions_playground.html new file mode 100644 index 00000000..91ce1fcf --- /dev/null +++ b/datasette/templates/debug_permissions_playground.html @@ -0,0 +1,166 @@ +{% extends "base.html" %} + +{% block title %}Debug permissions{% endblock %} + +{% block extra_head %} +{% include "_permission_ui_styles.html" %} + +{% endblock %} + +{% block content %} +

Permission playground

+ +{% set current_tab = "permissions" %} +{% include "_permissions_debug_tabs.html" %} + +

This tool lets you simulate an actor and a permission check for that actor.

+ +
+
+ +
+
+ + +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+ +
+

+    
+
+ + + +

Recent permissions checks

+ +

+ {% if filter != "all" %}All{% else %}All{% endif %}, + {% if filter != "exclude-yours" %}Exclude yours{% else %}Exclude yours{% endif %}, + {% if filter != "only-yours" %}Only yours{% else %}Only yours{% endif %} +

+ +{% if permission_checks %} + + + + + + + + + + + + + {% for check in permission_checks %} + + + + + + + + + {% endfor %} + +
WhenActionParentChildActorResult
{{ check.when.split('T', 1)[0] }}
{{ check.when.split('T', 1)[1].split('+', 1)[0].split('-', 1)[0].split('Z', 1)[0] }}
{{ check.action }}{{ check.parent or '—' }}{{ check.child or '—' }}{% if check.actor %}{{ check.actor|tojson }}{% else %}anonymous{% endif %}{% if check.result %}Allowed{% elif check.result is none %}No opinion{% else %}Denied{% endif %}
+{% else %} +

No permission checks have been recorded yet.

+{% endif %} + +{% endblock %} diff --git a/datasette/templates/debug_rules.html b/datasette/templates/debug_rules.html new file mode 100644 index 00000000..9a290803 --- /dev/null +++ b/datasette/templates/debug_rules.html @@ -0,0 +1,203 @@ +{% extends "base.html" %} + +{% block title %}Permission Rules{% endblock %} + +{% block extra_head %} + +{% include "_permission_ui_styles.html" %} +{% include "_debug_common_functions.html" %} +{% endblock %} + +{% block content %} +

Permission rules

+ +{% set current_tab = "rules" %} +{% include "_permissions_debug_tabs.html" %} + +

Use this tool to view the permission rules that allow the current actor to access resources for a given permission action. It queries the /-/rules.json API endpoint.

+ +{% if request.actor %} +

Current actor: {{ request.actor.get("id", "anonymous") }}

+{% else %} +

Current actor: anonymous (not logged in)

+{% endif %} + +
+
+
+ + + The permission action to check +
+ +
+ + + Number of results per page (max 200) +
+ +
+ +
+
+
+ + + + + +{% endblock %} diff --git a/datasette/templates/error.html b/datasette/templates/error.html index 5c651d4e..3451d886 100644 --- a/datasette/templates/error.html +++ b/datasette/templates/error.html @@ -2,13 +2,6 @@ {% block title %}{% if title %}{{ title }}{% else %}Error {{ status }}{% endif %}{% endblock %} -{% block nav %} -

- home -

- {{ super() }} -{% endblock %} - {% block content %}

{% if title %}{{ title }}{% else %}Error {{ status }}{% endif %}

diff --git a/datasette/templates/index.html b/datasette/templates/index.html index 06e09635..03349279 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -2,17 +2,26 @@ {% block title %}{{ metadata.title or "Datasette" }}: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %} +{% block extra_head %} +{% if noindex %}{% endif %} +{% endblock %} + {% block body_class %}index{% endblock %} {% block content %}

{{ metadata.title or "Datasette" }}{% if private %} 🔒{% endif %}

+{% set action_links, action_title = homepage_actions, "Homepage actions" %} +{% include "_action_menu.html" %} + +{{ top_homepage() }} + {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} {% for database in databases %}

{{ database.name }}{% if database.private %} 🔒{% endif %}

- {% if database.show_table_row_counts %}{{ "{:,}".format(database.table_rows_sum) }} rows in {% endif %}{{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.tables_count and database.hidden_tables_count %}, {% endif -%} + {% if database.show_table_row_counts %}{{ "{:,}".format(database.table_rows_sum) }} rows in {% endif %}{{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.hidden_tables_count %}, {% endif -%} {% if database.hidden_tables_count -%} {% if database.show_table_row_counts %}{{ "{:,}".format(database.hidden_table_rows_sum) }} rows in {% endif %}{{ database.hidden_tables_count }} hidden table{% if database.hidden_tables_count != 1 %}s{% endif -%} {% endif -%} diff --git a/datasette/templates/logout.html b/datasette/templates/logout.html index 98738679..c8fc642a 100644 --- a/datasette/templates/logout.html +++ b/datasette/templates/logout.html @@ -2,20 +2,13 @@ {% block title %}Log out{% endblock %} -{% block nav %} -

- home -

- {{ super() }} -{% endblock %} - {% block content %}

Log out

You are logged in as {{ display_actor(actor) }}

-
+
diff --git a/datasette/templates/messages_debug.html b/datasette/templates/messages_debug.html index e0ab9a40..2940cd69 100644 --- a/datasette/templates/messages_debug.html +++ b/datasette/templates/messages_debug.html @@ -8,7 +8,7 @@

Set a message:

- +
diff --git a/datasette/templates/patterns.html b/datasette/templates/patterns.html index 984c1bf6..7770f7d4 100644 --- a/datasette/templates/patterns.html +++ b/datasette/templates/patterns.html @@ -1,5 +1,5 @@ - + Datasette: Pattern Portfolio @@ -9,11 +9,11 @@ -
@@ -45,7 +45,7 @@

Header for /database/table/row and Messages

-
+

Data license: - Apache License 2.0 + Apache License 2.0 · Data source: - + tests/fixtures.py · About: @@ -96,18 +96,24 @@

+
+ @@ -118,10 +124,10 @@

Data license: - Apache License 2.0 + Apache License 2.0 · Data source: - + tests/fixtures.py · About: @@ -158,18 +164,24 @@

+
+ @@ -177,10 +189,10 @@

Data license: - Apache License 2.0 + Apache License 2.0 · Data source: - + tests/fixtures.py · About: @@ -478,10 +490,10 @@