diff --git a/.dockerignore b/.dockerignore index 490f509e..5078bf47 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,3 +9,5 @@ build dist scratchpad venv +*.db +*.sqlite diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..84e574fd --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# Applying Black +35d6ee2790e41e96f243c1ff58be0c9c0519a8ce +368638555160fb9ac78f462d0f79b1394163fa30 +2b344f6a34d2adaa305996a1a580ece06397f6e4 diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..f0bcdbe0 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [simonw] diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..88bb03b1 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: +- package-ecosystem: pip + directory: "/" + schedule: + interval: daily + time: "13:00" + groups: + python-packages: + patterns: + - "*" diff --git a/.github/workflows/deploy-branch-preview.yml b/.github/workflows/deploy-branch-preview.yml new file mode 100644 index 00000000..e56d9c27 --- /dev/null +++ b/.github/workflows/deploy-branch-preview.yml @@ -0,0 +1,35 @@ +name: Deploy a Datasette branch preview to Vercel + +on: + workflow_dispatch: + inputs: + branch: + description: "Branch to deploy" + required: true + type: string + +jobs: + deploy-branch-preview: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v6 + with: + python-version: "3.11" + - name: Install dependencies + run: | + pip install datasette-publish-vercel + - name: Deploy the preview + env: + VERCEL_TOKEN: ${{ secrets.BRANCH_PREVIEW_VERCEL_TOKEN }} + run: | + export BRANCH="${{ github.event.inputs.branch }}" + wget https://latest.datasette.io/fixtures.db + datasette publish vercel fixtures.db \ + --branch $BRANCH \ + --project "datasette-preview-$BRANCH" \ + --token $VERCEL_TOKEN \ + --scope datasette \ + --about "Preview of $BRANCH" \ + --about_url "https://github.com/simonw/datasette/tree/$BRANCH" diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 8445f1d8..9f53b01e 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -1,27 +1,26 @@ name: Deploy latest.datasette.io on: + workflow_dispatch: push: branches: - - main + - main + # - 1.0-dev + +permissions: + contents: read jobs: deploy: runs-on: ubuntu-latest steps: - name: Check out datasette - uses: actions/checkout@v2 + uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: - python-version: 3.9 - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + python-version: "3.13" + cache: pip - name: Install Python dependencies run: | python -m pip install --upgrade pip @@ -29,34 +28,105 @@ jobs: python -m pip install -e .[docs] python -m pip install sphinx-to-sqlite==0.1a1 - name: Run tests - run: pytest - - name: Build fixtures.db - run: python tests/fixtures.py fixtures.db fixtures.json + if: ${{ github.ref == 'refs/heads/main' }} + run: | + pytest -n auto -m "not serial" + pytest -m "serial" + - name: Build fixtures.db and other files needed to deploy the demo + run: |- + python tests/fixtures.py \ + fixtures.db \ + fixtures-config.json \ + fixtures-metadata.json \ + plugins \ + --extra-db-filename extra_database.db - name: Build docs.db + if: ${{ github.ref == 'refs/heads/main' }} run: |- cd docs - sphinx-build -b xml . _build + DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build sphinx-to-sqlite ../docs.db _build cd .. - - name: Set up Cloud Run - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master + - name: Set up the alternate-route demo + run: | + echo ' + from datasette import hookimpl + + @hookimpl + def startup(datasette): + db = datasette.get_database("fixtures2") + db.route = "alternative-route" + ' > plugins/alternative_route.py + cp fixtures.db fixtures2.db + - name: And the counters writable canned query demo + run: | + cat > plugins/counters.py < metadata.json + # cat metadata.json + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v3 with: - version: '275.0.0' - service_account_email: ${{ secrets.GCP_SA_EMAIL }} - service_account_key: ${{ secrets.GCP_SA_KEY }} + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 - name: Deploy to Cloud Run + env: + LATEST_DATASETTE_SECRET: ${{ secrets.LATEST_DATASETTE_SECRET }} run: |- gcloud config set run/region us-central1 gcloud config set project datasette-222320 - datasette publish cloudrun fixtures.db \ - -m fixtures.json \ + export SUFFIX="-${GITHUB_REF#refs/heads/}" + export SUFFIX=${SUFFIX#-main} + # Replace 1.0 with one-dot-zero in SUFFIX + export SUFFIX=${SUFFIX//1.0/one-dot-zero} + datasette publish cloudrun fixtures.db fixtures2.db extra_database.db \ + -m fixtures-metadata.json \ + --plugins-dir=plugins \ --branch=$GITHUB_SHA \ --version-note=$GITHUB_SHA \ - --extra-options="--config template_debug:1" \ - --service=datasette-latest + --extra-options="--setting template_debug 1 --setting trace_debug 1 --crossdb" \ + --install 'datasette-ephemeral-tables>=0.2.2' \ + --service "datasette-latest$SUFFIX" \ + --secret $LATEST_DATASETTE_SECRET + - name: Deploy to docs as well (only for main) + if: ${{ github.ref == 'refs/heads/main' }} + run: |- # Deploy docs.db to a different service datasette publish cloudrun docs.db \ --branch=$GITHUB_SHA \ --version-note=$GITHUB_SHA \ - --extra-options="--config template_debug:1" \ + --extra-options="--setting template_debug 1" \ --service=datasette-docs-latest diff --git a/.github/workflows/documentation-links.yml b/.github/workflows/documentation-links.yml new file mode 100644 index 00000000..a54bd83a --- /dev/null +++ b/.github/workflows/documentation-links.yml @@ -0,0 +1,16 @@ +name: Read the Docs Pull Request Preview +on: + pull_request_target: + types: + - opened + +permissions: + pull-requests: write + +jobs: + documentation-links: + runs-on: ubuntu-latest + steps: + - uses: readthedocs/actions/preview@v1 + with: + project-slug: "datasette" diff --git a/.github/workflows/mirror-master-and-main.yml b/.github/workflows/mirror-master-and-main.yml deleted file mode 100644 index 8418df40..00000000 --- a/.github/workflows/mirror-master-and-main.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Mirror "master" and "main" branches -on: - push: - branches: - - master - - main - -jobs: - mirror: - runs-on: ubuntu-latest - steps: - - name: Mirror to "master" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: master - force: false - - name: Mirror to "main" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: main - force: false diff --git a/.github/workflows/prettier.yml b/.github/workflows/prettier.yml new file mode 100644 index 00000000..77cce7d1 --- /dev/null +++ b/.github/workflows/prettier.yml @@ -0,0 +1,25 @@ +name: Check JavaScript for conformance with Prettier + +on: [push] + +permissions: + contents: read + +jobs: + prettier: + runs-on: ubuntu-latest + steps: + - name: Check out repo + uses: actions/checkout@v4 + - uses: actions/cache@v4 + name: Configure npm caching + with: + path: ~/.npm + key: ${{ runner.OS }}-npm-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.OS }}-npm- + - name: Install dependencies + run: npm ci + - name: Run prettier + run: |- + npm run prettier -- --check diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c1909bbe..e94d0bdd 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -4,70 +4,106 @@ on: release: types: [created] +permissions: + contents: read + jobs: test: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: pip + cache-dependency-path: pyproject.toml - name: Install dependencies run: | pip install -e '.[test]' - name: Run tests run: | pytest + deploy: runs-on: ubuntu-latest needs: [test] + environment: release + permissions: + id-token: write steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: - python-version: '3.9' - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-publish-pip- + python-version: '3.13' + cache: pip + cache-dependency-path: pyproject.toml - name: Install dependencies run: | - pip install setuptools wheel twine - - name: Publish - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + pip install setuptools wheel build + - name: Build run: | - python setup.py sdist bdist_wheel - twine upload dist/* + python -m build + - name: Publish + uses: pypa/gh-action-pypi-publish@release/v1 + + deploy_static_docs: + runs-on: ubuntu-latest + needs: [deploy] + if: "!github.event.release.prerelease" + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.10' + cache: pip + cache-dependency-path: pyproject.toml + - name: Install dependencies + run: | + python -m pip install -e .[docs] + python -m pip install sphinx-to-sqlite==0.1a1 + - name: Build docs.db + run: |- + cd docs + DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build + sphinx-to-sqlite ../docs.db _build + cd .. + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 + - name: Deploy stable-docs.datasette.io to Cloud Run + run: |- + gcloud config set run/region us-central1 + gcloud config set project datasette-222320 + datasette publish cloudrun docs.db \ + --service=datasette-docs-stable + deploy_docker: runs-on: ubuntu-latest needs: [deploy] if: "!github.event.release.prerelease" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Build and push to Docker Hub env: DOCKER_USER: ${{ secrets.DOCKER_USER }} DOCKER_PASS: ${{ secrets.DOCKER_PASS }} run: |- - docker login -u $DOCKER_USER -p $DOCKER_PASS - export REPO=datasetteproject/datasette - docker build -f Dockerfile -t $REPO:${GITHUB_REF#refs/tags/} . - docker tag $REPO:${GITHUB_REF#refs/tags/} $REPO:latest - docker push $REPO + sleep 60 # Give PyPI time to make the new release available + docker login -u $DOCKER_USER -p $DOCKER_PASS + export REPO=datasetteproject/datasette + docker build -f Dockerfile \ + -t $REPO:${GITHUB_REF#refs/tags/} \ + --build-arg VERSION=${GITHUB_REF#refs/tags/} . + docker tag $REPO:${GITHUB_REF#refs/tags/} $REPO:latest + docker push $REPO:${GITHUB_REF#refs/tags/} + docker push $REPO:latest diff --git a/.github/workflows/push_docker_tag.yml b/.github/workflows/push_docker_tag.yml new file mode 100644 index 00000000..afe8d6b2 --- /dev/null +++ b/.github/workflows/push_docker_tag.yml @@ -0,0 +1,28 @@ +name: Push specific Docker tag + +on: + workflow_dispatch: + inputs: + version_tag: + description: Tag to build and push + +permissions: + contents: read + +jobs: + deploy_docker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build and push to Docker Hub + env: + DOCKER_USER: ${{ secrets.DOCKER_USER }} + DOCKER_PASS: ${{ secrets.DOCKER_PASS }} + VERSION_TAG: ${{ github.event.inputs.version_tag }} + run: |- + docker login -u $DOCKER_USER -p $DOCKER_PASS + export REPO=datasetteproject/datasette + docker build -f Dockerfile \ + -t $REPO:${VERSION_TAG} \ + --build-arg VERSION=${VERSION_TAG} . + docker push $REPO:${VERSION_TAG} diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml new file mode 100644 index 00000000..7c5370ce --- /dev/null +++ b/.github/workflows/spellcheck.yml @@ -0,0 +1,27 @@ +name: Check spelling in documentation + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + spellcheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + - name: Install dependencies + run: | + pip install -e '.[docs]' + - name: Check spelling + run: | + codespell README.md --ignore-words docs/codespell-ignore-words.txt + codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt + codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt + codespell tests --ignore-words docs/codespell-ignore-words.txt diff --git a/.github/workflows/stable-docs.yml b/.github/workflows/stable-docs.yml new file mode 100644 index 00000000..3119d617 --- /dev/null +++ b/.github/workflows/stable-docs.yml @@ -0,0 +1,76 @@ +name: Update Stable Docs + +on: + release: + types: [published] + push: + branches: + - main + +permissions: + contents: write + +jobs: + update_stable_docs: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 0 # We need all commits to find docs/ changes + - name: Set up Git user + run: | + git config user.name "Automated" + git config user.email "actions@users.noreply.github.com" + - name: Create stable branch if it does not yet exist + run: | + if ! git ls-remote --heads origin stable | grep -qE '\bstable\b'; then + # Make sure we have all tags locally + git fetch --tags --quiet + + # Latest tag that is just numbers and dots (optionally prefixed with 'v') + # e.g., 0.65.2 or v0.65.2 — excludes 1.0a20, 1.0-rc1, etc. + LATEST_RELEASE=$( + git tag -l --sort=-v:refname \ + | grep -E '^v?[0-9]+(\.[0-9]+){1,3}$' \ + | head -n1 + ) + + git checkout -b stable + + # If there are any stable releases, copy docs/ from the most recent + if [ -n "$LATEST_RELEASE" ]; then + rm -rf docs/ + git checkout "$LATEST_RELEASE" -- docs/ || true + fi + + git commit -m "Populate docs/ from $LATEST_RELEASE" || echo "No changes" + git push -u origin stable + fi + - name: Handle Release + if: github.event_name == 'release' && !github.event.release.prerelease + run: | + git fetch --all + git checkout stable + git reset --hard ${GITHUB_REF#refs/tags/} + git push origin stable --force + - name: Handle Commit to Main + if: contains(github.event.head_commit.message, '!stable-docs') + run: | + git fetch origin + git checkout -b stable origin/stable + # Get the list of modified files in docs/ from the current commit + FILES=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} -- docs/) + # Check if the list of files is non-empty + if [[ -n "$FILES" ]]; then + # Checkout those files to the stable branch to over-write with their contents + for FILE in $FILES; do + git checkout ${{ github.sha }} -- $FILE + done + git add docs/ + git commit -m "Doc changes from ${{ github.sha }}" + git push origin stable + else + echo "No changes to docs/ in this commit." + exit 0 + fi diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index 1d1cf332..8d73b64d 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -7,23 +7,21 @@ on: pull_request: branches: - main +permissions: + contents: read + jobs: test: runs-on: ubuntu-latest steps: - name: Check out datasette - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: - python-version: 3.9 - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + python-version: '3.12' + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' - name: Install Python dependencies run: | python -m pip install --upgrade pip @@ -33,7 +31,7 @@ jobs: run: |- ls -lah cat .coveragerc - pytest --cov=datasette --cov-config=.coveragerc --cov-report xml:coverage.xml --cov-report term + pytest -m "not serial" --cov=datasette --cov-config=.coveragerc --cov-report xml:coverage.xml --cov-report term -x ls -lah - name: Upload coverage report uses: codecov/codecov-action@v1 diff --git a/.github/workflows/test-pyodide.yml b/.github/workflows/test-pyodide.yml new file mode 100644 index 00000000..b490a9bf --- /dev/null +++ b/.github/workflows/test-pyodide.yml @@ -0,0 +1,33 @@ +name: Test in Pyodide with shot-scraper + +on: + push: + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v6 + with: + python-version: "3.10" + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + - name: Cache Playwright browsers + uses: actions/cache@v4 + with: + path: ~/.cache/ms-playwright/ + key: ${{ runner.os }}-browsers + - name: Install Playwright dependencies + run: | + pip install shot-scraper build + shot-scraper install + - name: Run test + run: | + ./test-in-pyodide-with-shot-scraper.sh diff --git a/.github/workflows/test-sqlite-support.yml b/.github/workflows/test-sqlite-support.yml new file mode 100644 index 00000000..76ea138a --- /dev/null +++ b/.github/workflows/test-sqlite-support.yml @@ -0,0 +1,53 @@ +name: Test SQLite versions + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + test: + runs-on: ${{ matrix.platform }} + continue-on-error: true + strategy: + matrix: + platform: [ubuntu-latest] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + sqlite-version: [ + #"3", # latest version + "3.46", + #"3.45", + #"3.27", + #"3.26", + "3.25", + #"3.25.3", # 2018-09-25, window functions breaks test_upsert for some reason on 3.10, skip for now + #"3.24", # 2018-06-04, added UPSERT support + #"3.23.1" # 2018-04-10, before UPSERT + ] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + cache: pip + cache-dependency-path: pyproject.toml + - name: Set up SQLite ${{ matrix.sqlite-version }} + uses: asg017/sqlite-versions@71ea0de37ae739c33e447af91ba71dda8fcf22e6 + with: + version: ${{ matrix.sqlite-version }} + cflags: "-DSQLITE_ENABLE_DESERIALIZE -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_FTS4 -DSQLITE_ENABLE_FTS3_PARENTHESIS -DSQLITE_ENABLE_RTREE -DSQLITE_ENABLE_JSON1" + - run: python3 -c "import sqlite3; print(sqlite3.sqlite_version)" + - run: echo $LD_LIBRARY_PATH + - name: Build extension for --load-extension test + run: |- + (cd tests && gcc ext.c -fPIC -shared -o ext.so) + - name: Install dependencies + run: | + pip install -e '.[test]' + pip freeze + - name: Run tests + run: | + pytest -n auto -m "not serial" + pytest -m "serial" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a1774213..1e5e03d2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,29 +1,51 @@ name: Test -on: [push] +on: [push, pull_request] + +permissions: + contents: read jobs: test: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + allow-prereleases: true + cache: pip + cache-dependency-path: pyproject.toml + - name: Build extension for --load-extension test + run: |- + (cd tests && gcc ext.c -fPIC -shared -o ext.so) - name: Install dependencies run: | pip install -e '.[test]' + pip freeze - name: Run tests run: | - pytest + pytest -n auto -m "not serial" + pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh + - name: Install docs dependencies + run: | + pip install -e '.[docs]' + - name: Black + run: black --check . + - name: Check if cog needs to be run + run: | + cog --check docs/*.rst + - name: Check if blacken-docs needs to be run + run: | + # This fails on syntax errors, or a diff was applied + blacken-docs -l 60 docs/*.rst + - name: Test DATASETTE_LOAD_PLUGINS + run: | + pip install datasette-init datasette-json-html + tests/test-datasette-load-plugins.sh diff --git a/.github/workflows/tmate-mac.yml b/.github/workflows/tmate-mac.yml new file mode 100644 index 00000000..fcee0f21 --- /dev/null +++ b/.github/workflows/tmate-mac.yml @@ -0,0 +1,15 @@ +name: tmate session mac + +on: + workflow_dispatch: + +permissions: + contents: read + +jobs: + build: + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 diff --git a/.github/workflows/tmate.yml b/.github/workflows/tmate.yml index 02e7bd33..123f6c71 100644 --- a/.github/workflows/tmate.yml +++ b/.github/workflows/tmate.yml @@ -3,6 +3,10 @@ name: tmate session on: workflow_dispatch: +permissions: + contents: read + models: read + jobs: build: runs-on: ubuntu-latest @@ -10,3 +14,5 @@ jobs: - uses: actions/checkout@v2 - name: Setup tmate session uses: mxschmitt/action-tmate@v3 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 47418755..70e6bbeb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,9 @@ scratchpad .vscode +uv.lock +data.db + # We don't use Pipfile, so ignore them Pipfile Pipfile.lock @@ -116,3 +119,11 @@ ENV/ # macOS files .DS_Store +node_modules +.*.swp + +# In case someone compiled tests/ext.c for test_load_extensions, don't +# include it in source control. +tests/*.dylib +tests/*.so +tests/*.dll diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 00000000..222861c3 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,4 @@ +{ + "tabWidth": 2, + "useTabs": false +} diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..5b30e75a --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,16 @@ +version: 2 + +build: + os: ubuntu-20.04 + tools: + python: "3.11" + +sphinx: + configuration: docs/conf.py + +python: + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..14d4c567 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +`swillison+datasette-code-of-conduct@gmail.com`. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/Dockerfile b/Dockerfile index f008ff69..9a8f06cf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,42 +1,18 @@ -FROM python:3.7.2-slim-stretch as build +FROM python:3.11.0-slim-bullseye as build -# Setup build dependencies -RUN apt update \ -&& apt install -y python3-dev build-essential wget libxml2-dev libproj-dev libgeos-dev libsqlite3-dev zlib1g-dev pkg-config git \ - && apt clean +# Version of Datasette to install, e.g. 0.55 +# docker build . -t datasette --build-arg VERSION=0.55 +ARG VERSION +RUN apt-get update && \ + apt-get install -y --no-install-recommends libsqlite3-mod-spatialite && \ + apt clean && \ + rm -rf /var/lib/apt && \ + rm -rf /var/lib/dpkg/info/* -RUN wget "https://www.sqlite.org/2020/sqlite-autoconf-3310100.tar.gz" && tar xzf sqlite-autoconf-3310100.tar.gz \ - && cd sqlite-autoconf-3310100 && ./configure --disable-static --enable-fts5 --enable-json1 CFLAGS="-g -O2 -DSQLITE_ENABLE_FTS3=1 -DSQLITE_ENABLE_FTS4=1 -DSQLITE_ENABLE_RTREE=1 -DSQLITE_ENABLE_JSON1" \ - && make && make install - -RUN wget "http://www.gaia-gis.it/gaia-sins/freexl-sources/freexl-1.0.5.tar.gz" && tar zxf freexl-1.0.5.tar.gz \ - && cd freexl-1.0.5 && ./configure && make && make install - -RUN wget "http://www.gaia-gis.it/gaia-sins/libspatialite-sources/libspatialite-4.4.0-RC0.tar.gz" && tar zxf libspatialite-4.4.0-RC0.tar.gz \ - && cd libspatialite-4.4.0-RC0 && ./configure && make && make install - -RUN wget "http://www.gaia-gis.it/gaia-sins/readosm-sources/readosm-1.1.0.tar.gz" && tar zxf readosm-1.1.0.tar.gz && cd readosm-1.1.0 && ./configure && make && make install - -RUN wget "http://www.gaia-gis.it/gaia-sins/spatialite-tools-sources/spatialite-tools-4.4.0-RC0.tar.gz" && tar zxf spatialite-tools-4.4.0-RC0.tar.gz \ - && cd spatialite-tools-4.4.0-RC0 && ./configure && make && make install - - -# Add local code to the image instead of fetching from pypi. -COPY . /datasette - -RUN pip install /datasette - -FROM python:3.7.2-slim-stretch - -# Copy python dependencies and spatialite libraries -COPY --from=build /usr/local/lib/ /usr/local/lib/ -# Copy executables -COPY --from=build /usr/local/bin /usr/local/bin -# Copy spatial extensions -COPY --from=build /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu - -ENV LD_LIBRARY_PATH=/usr/local/lib +RUN pip install https://github.com/simonw/datasette/archive/refs/tags/${VERSION}.zip && \ + find /usr/local/lib -name '__pycache__' | xargs rm -r && \ + rm -rf /root/.cache/pip EXPOSE 8001 CMD ["datasette"] diff --git a/Justfile b/Justfile new file mode 100644 index 00000000..a47662c3 --- /dev/null +++ b/Justfile @@ -0,0 +1,56 @@ +export DATASETTE_SECRET := "not_a_secret" + +# Run tests and linters +@default: test lint + +# Setup project +@init: + uv sync --extra test --extra docs + +# Run pytest with supplied options +@test *options: init + uv run pytest -n auto {{options}} + +@codespell: + uv run codespell README.md --ignore-words docs/codespell-ignore-words.txt + uv run codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt + uv run codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt + uv run codespell tests --ignore-words docs/codespell-ignore-words.txt + +# Run linters: black, flake8, mypy, cog +@lint: codespell + uv run black . --check + uv run flake8 + uv run --extra test cog --check README.md docs/*.rst + +# Rebuild docs with cog +@cog: + uv run --extra test cog -r README.md docs/*.rst + +# Serve live docs on localhost:8000 +@docs: cog blacken-docs + uv run --extra docs make -C docs livehtml + +# Build docs as static HTML +@docs-build: cog blacken-docs + rm -rf docs/_build && cd docs && uv run make html + +# Apply Black +@black: + uv run black . + +# Apply blacken-docs +@blacken-docs: + uv run blacken-docs -l 60 docs/*.rst + +# Apply prettier +@prettier: + npm run fix + +# Format code with both black and prettier +@format: black prettier blacken-docs + +@serve *options: + uv run sqlite-utils create-database data.db + uv run sqlite-utils create-table data.db docs id integer title text --pk id --ignore + uv run python -m datasette data.db --root --reload {{options}} diff --git a/README.md b/README.md index 8670936c..393e8e5c 100644 --- a/README.md +++ b/README.md @@ -1,105 +1,42 @@ -# Datasette +Datasette [![PyPI](https://img.shields.io/pypi/v/datasette.svg)](https://pypi.org/project/datasette/) -[![Changelog](https://img.shields.io/github/v/release/simonw/datasette?label=changelog)](https://docs.datasette.io/en/stable/changelog.html) +[![Changelog](https://img.shields.io/github/v/release/simonw/datasette?label=changelog)](https://docs.datasette.io/en/latest/changelog.html) [![Python 3.x](https://img.shields.io/pypi/pyversions/datasette.svg?logo=python&logoColor=white)](https://pypi.org/project/datasette/) [![Tests](https://github.com/simonw/datasette/workflows/Test/badge.svg)](https://github.com/simonw/datasette/actions?query=workflow%3ATest) [![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](https://docs.datasette.io/en/latest/?badge=latest) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette/blob/main/LICENSE) [![docker: datasette](https://img.shields.io/badge/docker-datasette-blue)](https://hub.docker.com/r/datasetteproject/datasette) +[![discord](https://img.shields.io/discord/823971286308356157?label=discord)](https://datasette.io/discord) *An open source multi-tool for exploring and publishing data* Datasette is a tool for exploring and publishing data. It helps people take data of any shape or size and publish that as an interactive, explorable website and accompanying API. -Datasette is aimed at data journalists, museum curators, archivists, local governments and anyone else who has data that they wish to share with the world. +Datasette is aimed at data journalists, museum curators, archivists, local governments, scientists, researchers and anyone else who has data that they wish to share with the world. -[Explore a demo](https://fivethirtyeight.datasettes.com/fivethirtyeight), watch [a video about the project](https://www.youtube.com/watch?v=pTr1uLQTJNE) or try it out by [uploading and publishing your own CSV data](https://simonwillison.net/2019/Apr/23/datasette-glitch/). +[Explore a demo](https://datasette.io/global-power-plants/global-power-plants), watch [a video about the project](https://simonwillison.net/2021/Feb/7/video/) or try it out [on GitHub Codespaces](https://github.com/datasette/datasette-studio). +* [datasette.io](https://datasette.io/) is the official project website +* Latest [Datasette News](https://datasette.io/news) * Comprehensive documentation: https://docs.datasette.io/ -* Examples: https://github.com/simonw/datasette/wiki/Datasettes -* Live demo of current main: https://latest.datasette.io/ -* Support questions, feedback? Join our [GitHub Discussions forum](https://github.com/simonw/datasette/discussions) +* Examples: https://datasette.io/examples +* Live demo of current `main` branch: https://latest.datasette.io/ +* Questions, feedback or want to talk about the project? Join our [Discord](https://datasette.io/discord) -Want to stay up-to-date with the project? Subscribe to the [Datasette Weekly newsletter](https://datasette.substack.com/) for tips, tricks and news on what's new in the Datasette ecosystem. - -## News - - * 9th October 2020: [Datasette 0.50](https://docs.datasette.io/en/stable/changelog.html#v0-50) - New column actions menu. `datasette.client` object for plugins to make internal API requests. Improved documentation on deploying Datasette. [Annotated release notes](https://simonwillison.net/2020/Oct/9/datasette-0-50/). - * 14th September 2020: [Datasette 0.49](https://docs.datasette.io/en/stable/changelog.html#v0-49) - JSON API for writable canned queries, path parameters for custom pages. See also [Datasette 0.49: The annotated release notes](https://simonwillison.net/2020/Sep/15/datasette-0-49/). - * 16th August 2020: [Datasette 0.48](https://docs.datasette.io/en/stable/changelog.html#v0-48) - Documentation now lives at [docs.datasette.io](https://docs.datasette.io/), improvements to the `extra_template_vars`, `extra_css_urls`, `extra_js_urls` and `extra_body_script` plugin hooks. - * 11th August 2020: [Datasette 0.47](https://docs.datasette.io/en/stable/changelog.html#v0-47) - Datasette can now be installed using Homebrew! `brew install simonw/datasette/datasette`. Also new: `datasette install name-of-plugin` and `datasette uninstall name-of-plugin` commands, and `datasette --get '/-/versions.json'` to output the result of Datasette HTTP calls on the command-line. - * 9th August 2020: [Datasette 0.46](https://docs.datasette.io/en/stable/changelog.html#v0-46) - security fix relating to CSRF protection for writable canned queries, a new logo, new debugging tools, improved file downloads and more. - * 6th August 2020: [GraphQL in Datasette with the new datasette-graphql plugin](https://simonwillison.net/2020/Aug/7/datasette-graphql/) - * 24th July 2020: Two new plugins: [datasette-copyable and datasette-insert-api](https://simonwillison.net/2020/Jul/23/datasette-copyable-datasette-insert-api/). `datasette-copyable` adds copy-and-paste export options, and `datasette-insert-api` lets you create tables and insert or update data by POSTing JSON directly to Datasette. - * 1st July 2020: [Datasette 0.45](https://docs.datasette.io/en/stable/changelog.html#v0-45) - [Magic parameters for canned queries](https://docs.datasette.io/en/stable/sql_queries.html#canned-queries-magic-parameters), a log out feature, improved plugin documentation and four new plugin hooks. See also [Datasette 0.45: The annotated release notes](https://simonwillison.net/2020/Jul/1/datasette-045/). - * 20th June 2020: [A cookiecutter template for writing Datasette plugins](https://simonwillison.net/2020/Jun/20/cookiecutter-plugins/) - * 11th June 2020: [Datasette 0.44](https://docs.datasette.io/en/stable/changelog.html#v0-44) - [Authentication and permissions](https://docs.datasette.io/en/stable/authentication.html), [writable canned queries](https://docs.datasette.io/en/stable/sql_queries.html#writable-canned-queries), flash messages, new plugin hooks and much, much more. - * 28th May 2020: [Datasette 0.43](https://docs.datasette.io/en/stable/changelog.html#v0-43) - Redesigned [register_output_renderer](https://docs.datasette.io/en/stable/plugins.html#plugin-register-output-renderer) plugin hook and various small improvements and fixes. - * 8th May 2020: [Datasette 0.42](https://docs.datasette.io/en/stable/changelog.html#v0-42) - Documented internal methods for plugins to execute read queries against a database. - * 6th May 2020: [Datasette 0.41](https://docs.datasette.io/en/stable/changelog.html#v0-41) - New mechanism for [creating custom pages](https://docs.datasette.io/en/0.41/custom_templates.html#custom-pages), new [configuration directory mode](https://docs.datasette.io/en/0.41/config.html#configuration-directory-mode), new `?column__notlike=` table filter and various other smaller improvements. - * 21st April 2020: [Datasette 0.40](https://docs.datasette.io/en/stable/changelog.html#v0-40) - Metadata can now be provided as YAML instead of JSON. Publishing to Zeit Now v1 is no longer supported, but Now v2 support is provided by the new [datasette-publish-now](https://github.com/simonw/datasette-publish-now) plugin. Various bug fixes. - * 24th March 2020: [Datasette 0.39](https://docs.datasette.io/en/stable/changelog.html#v0-39) - New `base_url` configuration option for running Datasette under a different URL prefix, `"sort"` and `"sort_desc"` metadata options for setting a default sort order for a table. - * 8th March 2020: [Datasette 0.38](https://docs.datasette.io/en/stable/changelog.html#v0-38) - New `--memory` option for `datasete publish cloudrun`, [Docker image](https://hub.docker.com/r/datasetteproject/datasette) upgraded to SQLite 3.31.1. - * 25th February 2020: [Datasette 0.37](https://docs.datasette.io/en/stable/changelog.html#v0-37) - new internal APIs enabling plugins to safely write to databases. Read more here: [Datasette Writes](https://simonwillison.net/2020/Feb/26/weeknotes-datasette-writes/). - * 21st February 2020: [Datasette 0.36](https://docs.datasette.io/en/stable/changelog.html#v0-36) - new internals documentation for plugins, `prepare_connection()` now accepts optional `database` and `datasette` arguments. - * 4th February 2020: [Datasette 0.35](https://docs.datasette.io/en/stable/changelog.html#v0-35) - new `.render_template()` method for plugins. - * 29th January 2020: [Datasette 0.34](https://docs.datasette.io/en/stable/changelog.html#v0-34) - improvements to search, `datasette publish cloudrun` and `datasette package`. - * 21st January 2020: [Deploying a data API using GitHub Actions and Cloud Run](https://simonwillison.net/2020/Jan/21/github-actions-cloud-run/) - how to use GitHub Actions and Google Cloud Run to automatically scrape data and deploy the result as an API with Datasette. - * 22nd December 2019: [Datasette 0.33](https://docs.datasette.io/en/stable/changelog.html#v0-33) - various small improvements. - * 19th December 2019: [Building tools to bring data-driven reporting to more newsrooms](https://medium.com/jsk-class-of-2020/building-tools-to-bring-data-driven-reporting-to-more-newsrooms-4520a0c9b3f2) - some notes on my JSK fellowship so far. - * 2nd December 2019: [Niche Museums](https://www.niche-museums.com/) is a new site entirely powered by Datasette, using custom templates and plugins. [niche-museums.com, powered by Datasette](https://simonwillison.net/2019/Nov/25/niche-museums/) describes how the site works, and [datasette-atom: Define an Atom feed using a custom SQL query](https://simonwillison.net/2019/Dec/3/datasette-atom/) describes how the new [datasette-atom plugin](https://github.com/simonw/datasette-atom) was used to add an Atom syndication feed to the site. - * 14th November 2019: [Datasette 0.32](https://docs.datasette.io/en/stable/changelog.html#v0-32) now uses asynchronous rendering in Jinja templates, which means template functions can perform asynchronous operations such as executing SQL queries. [datasette-template-sql](https://github.com/simonw/datasette-template-sql) is a new plugin uses this capability to add a new custom `sql(sql_query)` template function. - * 11th November 2019: [Datasette 0.31](https://docs.datasette.io/en/stable/changelog.html#v0-31) - the first version of Datasette to support Python 3.8, which means dropping support for Python 3.5. - * 18th October 2019: [Datasette 0.30](https://docs.datasette.io/en/stable/changelog.html#v0-30) - * 13th July 2019: [Single sign-on against GitHub using ASGI middleware](https://simonwillison.net/2019/Jul/14/sso-asgi/) talks about the implementation of [datasette-auth-github](https://github.com/simonw/datasette-auth-github) in more detail. - * 7th July 2019: [Datasette 0.29](https://docs.datasette.io/en/stable/changelog.html#v0-29) - ASGI, new plugin hooks, facet by date and much, much more... - * [datasette-auth-github](https://github.com/simonw/datasette-auth-github) - a new plugin for Datasette 0.29 that lets you require users to authenticate against GitHub before accessing your Datasette instance. You can whitelist specific users, or you can restrict access to members of specific GitHub organizations or teams. - * [datasette-cors](https://github.com/simonw/datasette-cors) - a plugin that lets you configure CORS access from a list of domains (or a set of domain wildcards) so you can make JavaScript calls to a Datasette instance from a specific set of other hosts. - * 23rd June 2019: [Porting Datasette to ASGI, and Turtles all the way down](https://simonwillison.net/2019/Jun/23/datasette-asgi/) - * 21st May 2019: The anonymized raw data from [the Stack Overflow Developer Survey 2019](https://stackoverflow.blog/2019/05/21/public-data-release-of-stack-overflows-2019-developer-survey/) has been [published in partnership with Glitch](https://glitch.com/culture/discover-insights-explore-developer-survey-results-2019/), powered by Datasette. - * 19th May 2019: [Datasette 0.28](https://docs.datasette.io/en/stable/changelog.html#v0-28) - a salmagundi of new features! - * No longer immutable! Datasette now supports [databases that change](https://docs.datasette.io/en/stable/changelog.html#supporting-databases-that-change). - * [Faceting improvements](https://docs.datasette.io/en/stable/changelog.html#faceting-improvements-and-faceting-plugins) including facet-by-JSON-array and the ability to define custom faceting using plugins. - * [datasette publish cloudrun](https://docs.datasette.io/en/stable/changelog.html#datasette-publish-cloudrun) lets you publish databases to Google's new Cloud Run hosting service. - * New [register_output_renderer](https://docs.datasette.io/en/stable/changelog.html#register-output-renderer-plugins) plugin hook for adding custom output extensions to Datasette in addition to the default `.json` and `.csv`. - * Dozens of other smaller features and tweaks - see [the release notes](https://docs.datasette.io/en/stable/changelog.html#v0-28) for full details. - * Read more about this release here: [Datasette 0.28—and why master should always be releasable](https://simonwillison.net/2019/May/19/datasette-0-28/) - * 24th February 2019: [ -sqlite-utils: a Python library and CLI tool for building SQLite databases](https://simonwillison.net/2019/Feb/25/sqlite-utils/) - a partner tool for easily creating SQLite databases for use with Datasette. - * 31st Janary 2019: [Datasette 0.27](https://docs.datasette.io/en/stable/changelog.html#v0-27) - `datasette plugins` command, newline-delimited JSON export option, new documentation on [The Datasette Ecosystem](https://docs.datasette.io/en/stable/ecosystem.html). - * 10th January 2019: [Datasette 0.26.1](https://docs.datasette.io/en/stable/changelog.html#v0-26-1) - SQLite upgrade in Docker image, `/-/versions` now shows SQLite compile options. - * 2nd January 2019: [Datasette 0.26](https://docs.datasette.io/en/stable/changelog.html#v0-26) - minor bug fixes, `datasette publish now --alias` argument. -* 18th December 2018: [Fast Autocomplete Search for Your Website](https://24ways.org/2018/fast-autocomplete-search-for-your-website/) - a new tutorial on using Datasette to build a JavaScript autocomplete search engine. -* 3rd October 2018: [The interesting ideas in Datasette](https://simonwillison.net/2018/Oct/4/datasette-ideas/) - a write-up of some of the less obvious interesting ideas embedded in the Datasette project. -* 19th September 2018: [Datasette 0.25](https://docs.datasette.io/en/stable/changelog.html#v0-25) - New plugin hooks, improved database view support and an easier way to use more recent versions of SQLite. -* 23rd July 2018: [Datasette 0.24](https://docs.datasette.io/en/stable/changelog.html#v0-24) - a number of small new features -* 29th June 2018: [datasette-vega](https://github.com/simonw/datasette-vega), a new plugin for visualizing data as bar, line or scatter charts -* 21st June 2018: [Datasette 0.23.1](https://docs.datasette.io/en/stable/changelog.html#v0-23-1) - minor bug fixes -* 18th June 2018: [Datasette 0.23: CSV, SpatiaLite and more](https://docs.datasette.io/en/stable/changelog.html#v0-23) - CSV export, foreign key expansion in JSON and CSV, new config options, improved support for SpatiaLite and a bunch of other improvements -* 23rd May 2018: [Datasette 0.22.1 bugfix](https://github.com/simonw/datasette/releases/tag/0.22.1) plus we now use [versioneer](https://github.com/warner/python-versioneer) -* 20th May 2018: [Datasette 0.22: Datasette Facets](https://simonwillison.net/2018/May/20/datasette-facets) -* 5th May 2018: [Datasette 0.21: New _shape=, new _size=, search within columns](https://github.com/simonw/datasette/releases/tag/0.21) -* 25th April 2018: [Exploring the UK Register of Members Interests with SQL and Datasette](https://simonwillison.net/2018/Apr/25/register-members-interests/) - a tutorial describing how [register-of-members-interests.datasettes.com](https://register-of-members-interests.datasettes.com/) was built ([source code here](https://github.com/simonw/register-of-members-interests)) -* 20th April 2018: [Datasette plugins, and building a clustered map visualization](https://simonwillison.net/2018/Apr/20/datasette-plugins/) - introducing Datasette's new plugin system and [datasette-cluster-map](https://pypi.org/project/datasette-cluster-map/), a plugin for visualizing data on a map -* 20th April 2018: [Datasette 0.20: static assets and templates for plugins](https://github.com/simonw/datasette/releases/tag/0.20) -* 16th April 2018: [Datasette 0.19: plugins preview](https://github.com/simonw/datasette/releases/tag/0.19) -* 14th April 2018: [Datasette 0.18: units](https://github.com/simonw/datasette/releases/tag/0.18) -* 9th April 2018: [Datasette 0.15: sort by column](https://github.com/simonw/datasette/releases/tag/0.15) -* 28th March 2018: [Baltimore Sun Public Salary Records](https://simonwillison.net/2018/Mar/28/datasette-in-the-wild/) - a data journalism project from the Baltimore Sun powered by Datasette - source code [is available here](https://github.com/baltimore-sun-data/salaries-datasette) -* 27th March 2018: [Cloud-first: Rapid webapp deployment using containers](https://wwwf.imperial.ac.uk/blog/research-software-engineering/2018/03/27/cloud-first-rapid-webapp-deployment-using-containers/) - a tutorial covering deploying Datasette using Microsoft Azure by the Research Software Engineering team at Imperial College London -* 28th January 2018: [Analyzing my Twitter followers with Datasette](https://simonwillison.net/2018/Jan/28/analyzing-my-twitter-followers/) - a tutorial on using Datasette to analyze follower data pulled from the Twitter API -* 17th January 2018: [Datasette Publish: a web app for publishing CSV files as an online database](https://simonwillison.net/2018/Jan/17/datasette-publish/) -* 12th December 2017: [Building a location to time zone API with SpatiaLite, OpenStreetMap and Datasette](https://simonwillison.net/2017/Dec/12/building-a-location-time-zone-api/) -* 9th December 2017: [Datasette 0.14: customization edition](https://github.com/simonw/datasette/releases/tag/0.14) -* 25th November 2017: [New in Datasette: filters, foreign keys and search](https://simonwillison.net/2017/Nov/25/new-in-datasette/) -* 13th November 2017: [Datasette: instantly create and publish an API for your SQLite databases](https://simonwillison.net/2017/Nov/13/datasette/) +Want to stay up-to-date with the project? Subscribe to the [Datasette newsletter](https://datasette.substack.com/) for tips, tricks and news on what's new in the Datasette ecosystem. ## Installation - pip3 install datasette +If you are on a Mac, [Homebrew](https://brew.sh/) is the easiest way to install Datasette: -Datasette requires Python 3.6 or higher. We also have [detailed installation instructions](https://docs.datasette.io/en/stable/installation.html) covering other options such as Docker. + brew install datasette + +You can also install it using `pip` or `pipx`: + + pip install datasette + +Datasette requires Python 3.8 or higher. We also have [detailed installation instructions](https://docs.datasette.io/en/stable/installation.html) covering other options such as Docker. ## Basic usage @@ -111,45 +48,12 @@ This will start a web server on port 8001 - visit http://localhost:8001/ to acce Use Chrome on OS X? You can run datasette against your browser history like so: - datasette ~/Library/Application\ Support/Google/Chrome/Default/History + datasette ~/Library/Application\ Support/Google/Chrome/Default/History --nolock Now visiting http://localhost:8001/History/downloads will show you a web interface to browse your downloads data: ![Downloads table rendered by datasette](https://static.simonwillison.net/static/2017/datasette-downloads.png) -## datasette serve options - - Usage: datasette serve [OPTIONS] [FILES]... - - Serve up specified SQLite database files with a web UI - - Options: - -i, --immutable PATH Database files to open in immutable mode - -h, --host TEXT Host for server. Defaults to 127.0.0.1 which means - only connections from the local machine will be - allowed. Use 0.0.0.0 to listen to all IPs and - allow access from other machines. - -p, --port INTEGER Port for server, defaults to 8001 - --reload Automatically reload if database or code change - detected - useful for development - --cors Enable CORS by serving Access-Control-Allow- - Origin: * - --load-extension PATH Path to a SQLite extension to load - --inspect-file TEXT Path to JSON file created using "datasette - inspect" - -m, --metadata FILENAME Path to JSON file containing license/source - metadata - --template-dir DIRECTORY Path to directory containing custom templates - --plugins-dir DIRECTORY Path to directory containing custom plugins - --static STATIC MOUNT mountpoint:path-to-directory for serving static - files - --memory Make :memory: database available - --config CONFIG Set config option using configname:value - docs.datasette.io/en/stable/config.html - --version-note TEXT Additional note to show on /-/versions - --help-config Show available config options - --help Show this message and exit. - ## metadata.json If you want to include licensing and source information in the generated datasette website you can do so using a JSON file that looks something like this: @@ -181,3 +85,7 @@ Or: This will create a docker image containing both the datasette application and the specified SQLite database files. It will then deploy that image to Heroku or Cloud Run and give you a URL to access the resulting website and API. See [Publishing data](https://docs.datasette.io/en/stable/publish.html) in the documentation for more details. + +## Datasette Lite + +[Datasette Lite](https://lite.datasette.io/) is Datasette packaged using WebAssembly so that it runs entirely in your browser, no Python web application server required. Read more about that in the [Datasette Lite documentation](https://github.com/simonw/datasette-lite/blob/main/README.md). diff --git a/datasette/__init__.py b/datasette/__init__.py index 0e59760a..47d2b4f6 100644 --- a/datasette/__init__.py +++ b/datasette/__init__.py @@ -1,3 +1,8 @@ +from datasette.permissions import Permission # noqa from datasette.version import __version_info__, __version__ # noqa +from datasette.events import Event # noqa +from datasette.utils.asgi import Forbidden, NotFound, Request, Response # noqa +from datasette.utils import actor_matches_allow # noqa +from datasette.views import Context # noqa from .hookspecs import hookimpl # noqa from .hookspecs import hookspec # noqa diff --git a/datasette/actor_auth_cookie.py b/datasette/actor_auth_cookie.py index 15ecd331..368213af 100644 --- a/datasette/actor_auth_cookie.py +++ b/datasette/actor_auth_cookie.py @@ -1,6 +1,6 @@ from datasette import hookimpl from itsdangerous import BadSignature -import baseconv +from datasette.utils import baseconv import time diff --git a/datasette/app.py b/datasette/app.py index 8cff6577..b9955925 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,10 +1,21 @@ +from __future__ import annotations + +from asgi_csrf import Errors import asyncio +import contextvars +from typing import TYPE_CHECKING, Any, Dict, Iterable, List + +if TYPE_CHECKING: + from datasette.permissions import AllowedResource, Resource import asgi_csrf import collections +import dataclasses import datetime +import functools import glob import hashlib import httpx +import importlib.metadata import inspect from itsdangerous import BadSignature import json @@ -13,160 +24,273 @@ import re import secrets import sys import threading -import traceback +import time +import types import urllib.parse from concurrent import futures from pathlib import Path -from markupsafe import Markup +from markupsafe import Markup, escape from itsdangerous import URLSafeSerializer -import jinja2 -from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader, escape +from jinja2 import ( + ChoiceLoader, + Environment, + FileSystemLoader, + PrefixLoader, +) from jinja2.environment import Template from jinja2.exceptions import TemplateNotFound -import uvicorn -from .views.base import DatasetteError, ureg -from .views.database import DatabaseDownload, DatabaseView +from .events import Event +from .views import Context +from .views.database import database_download, DatabaseView, TableCreateView, QueryView from .views.index import IndexView from .views.special import ( JsonDataView, PatternPortfolioView, AuthTokenView, + ApiExplorerView, + CreateTokenView, LogoutView, AllowDebugView, PermissionsDebugView, MessagesDebugView, + AllowedResourcesView, + PermissionRulesView, + PermissionCheckView, + TablesView, + InstanceSchemaView, + DatabaseSchemaView, + TableSchemaView, ) -from .views.table import RowView, TableView +from .views.table import ( + TableInsertView, + TableUpsertView, + TableDropView, + table_view, +) +from .views.row import RowView, RowDeleteView, RowUpdateView from .renderer import json_renderer +from .url_builder import Urls from .database import Database, QueryInterrupted from .utils import ( + PaginatedResources, + PrefixedUrlString, + SPATIALITE_FUNCTIONS, + StartupError, async_call_with_supported_arguments, await_me_maybe, + baseconv, call_with_supported_arguments, + detect_json1, display_actor, escape_css_string, escape_sqlite, find_spatialite, format_bytes, module_from_path, + move_plugins_and_allow, + move_table_config, parse_metadata, resolve_env_secrets, - sqlite3, + resolve_routes, + tilde_decode, + tilde_encode, to_css_class, - HASH_LENGTH, + urlsafe_components, + redact_keys, + row_sql_params_pks, ) from .utils.asgi import ( AsgiLifespan, - Base400, Forbidden, NotFound, + DatabaseNotFound, + TableNotFound, + RowNotFound, Request, + Response, + AsgiRunOnFirstRequest, asgi_static, asgi_send, - asgi_send_html, - asgi_send_json, + asgi_send_file, asgi_send_redirect, ) +from .utils.internal_db import init_internal_db, populate_schema_tables +from .utils.sqlite import ( + sqlite3, + using_pysqlite3, +) from .tracer import AsgiTracer from .plugins import pm, DEFAULT_PLUGINS, get_plugins from .version import __version__ +from .resources import DatabaseResource, TableResource + app_root = Path(__file__).parent.parent -MEMORY = object() -ConfigOption = collections.namedtuple("ConfigOption", ("name", "default", "help")) -CONFIG_OPTIONS = ( - ConfigOption("default_page_size", 100, "Default page size for the table view"), - ConfigOption( +# Context variable to track when code is executing within a datasette.client request +_in_datasette_client = contextvars.ContextVar("in_datasette_client", default=False) + + +class _DatasetteClientContext: + """Context manager to mark code as executing within a datasette.client request.""" + + def __enter__(self): + self.token = _in_datasette_client.set(True) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + _in_datasette_client.reset(self.token) + return False + + +@dataclasses.dataclass +class PermissionCheck: + """Represents a logged permission check for debugging purposes.""" + + when: str + actor: Dict[str, Any] | None + action: str + parent: str | None + child: str | None + result: bool + + +# https://github.com/simonw/datasette/issues/283#issuecomment-781591015 +SQLITE_LIMIT_ATTACHED = 10 + +INTERNAL_DB_NAME = "__INTERNAL__" + +Setting = collections.namedtuple("Setting", ("name", "default", "help")) +SETTINGS = ( + Setting("default_page_size", 100, "Default page size for the table view"), + Setting( "max_returned_rows", 1000, "Maximum rows that can be returned from a table or custom query", ), - ConfigOption( + Setting( + "max_insert_rows", + 100, + "Maximum rows that can be inserted at a time using the bulk insert API", + ), + Setting( "num_sql_threads", 3, "Number of threads in the thread pool for executing SQLite queries", ), - ConfigOption( - "sql_time_limit_ms", 1000, "Time limit for a SQL query in milliseconds" - ), - ConfigOption( + Setting("sql_time_limit_ms", 1000, "Time limit for a SQL query in milliseconds"), + Setting( "default_facet_size", 30, "Number of values to return for requested facets" ), - ConfigOption( - "facet_time_limit_ms", 200, "Time limit for calculating a requested facet" - ), - ConfigOption( + Setting("facet_time_limit_ms", 200, "Time limit for calculating a requested facet"), + Setting( "facet_suggest_time_limit_ms", 50, "Time limit for calculating a suggested facet", ), - ConfigOption( - "hash_urls", - False, - "Include DB file contents hash in URLs, for far-future caching", - ), - ConfigOption( + Setting( "allow_facet", True, "Allow users to specify columns to facet using ?_facet= parameter", ), - ConfigOption( + Setting( "allow_download", True, "Allow users to download the original SQLite database files", ), - ConfigOption("suggest_facets", True, "Calculate and display suggested facets"), - ConfigOption( + Setting( + "allow_signed_tokens", + True, + "Allow users to create and use signed API tokens", + ), + Setting( + "default_allow_sql", + True, + "Allow anyone to run arbitrary SQL queries", + ), + Setting( + "max_signed_tokens_ttl", + 0, + "Maximum allowed expiry time for signed API tokens", + ), + Setting("suggest_facets", True, "Calculate and display suggested facets"), + Setting( "default_cache_ttl", 5, "Default HTTP cache TTL (used in Cache-Control: max-age= header)", ), - ConfigOption( - "default_cache_ttl_hashed", - 365 * 24 * 60 * 60, - "Default HTTP cache TTL for hashed URL pages", - ), - ConfigOption( - "cache_size_kb", 0, "SQLite cache size in KB (0 == use SQLite default)" - ), - ConfigOption( + Setting("cache_size_kb", 0, "SQLite cache size in KB (0 == use SQLite default)"), + Setting( "allow_csv_stream", True, "Allow .csv?_stream=1 to download all rows (ignoring max_returned_rows)", ), - ConfigOption( + Setting( "max_csv_mb", 100, "Maximum size allowed for CSV export in MB - set 0 to disable this limit", ), - ConfigOption( + Setting( "truncate_cells_html", 2048, "Truncate cells longer than this in HTML table view - set 0 to disable", ), - ConfigOption( + Setting( "force_https_urls", False, "Force URLs in API output to always use https:// protocol", ), - ConfigOption( + Setting( "template_debug", False, "Allow display of template debug information with ?_context=1", ), - ConfigOption("base_url", "/", "Datasette URLs should use this base path"), + Setting( + "trace_debug", + False, + "Allow display of SQL trace debug information with ?_trace=1", + ), + Setting("base_url", "/", "Datasette URLs should use this base path"), ) +_HASH_URLS_REMOVED = "The hash_urls setting has been removed, try the datasette-hashed-urls plugin instead" +OBSOLETE_SETTINGS = { + "hash_urls": _HASH_URLS_REMOVED, + "default_cache_ttl_hashed": _HASH_URLS_REMOVED, +} +DEFAULT_SETTINGS = {option.name: option.default for option in SETTINGS} -DEFAULT_CONFIG = {option.name: option.default for option in CONFIG_OPTIONS} +FAVICON_PATH = app_root / "datasette" / "static" / "favicon.png" + +DEFAULT_NOT_SET = object() + + +ResourcesSQL = collections.namedtuple("ResourcesSQL", ("sql", "params")) async def favicon(request, send): - await asgi_send(send, "", 200) + await asgi_send_file( + send, + str(FAVICON_PATH), + content_type="image/png", + headers={"Cache-Control": "max-age=3600, immutable, public"}, + ) + + +ResolvedTable = collections.namedtuple("ResolvedTable", ("db", "table", "is_view")) +ResolvedRow = collections.namedtuple( + "ResolvedRow", ("db", "table", "sql", "params", "pks", "pk_values", "row") +) + + +def _to_string(value): + if isinstance(value, str): + return value + else: + return json.dumps(value, default=str) class Datasette: @@ -177,62 +301,90 @@ class Datasette: def __init__( self, - files, + files=None, immutables=None, cache_headers=True, cors=False, inspect_data=None, + config=None, metadata=None, sqlite_extensions=None, template_dir=None, plugins_dir=None, static_mounts=None, memory=False, - config=None, + settings=None, secret=None, version_note=None, config_dir=None, pdb=False, + crossdb=False, + nolock=False, + internal=None, + default_deny=False, ): + self._startup_invoked = False assert config_dir is None or isinstance( config_dir, Path ), "config_dir= should be a pathlib.Path" + self.config_dir = config_dir self.pdb = pdb self._secret = secret or secrets.token_hex(32) - self.files = tuple(files) + tuple(immutables or []) + if files is not None and isinstance(files, str): + raise ValueError("files= must be a list of paths, not a string") + self.files = tuple(files or []) + tuple(immutables or []) if config_dir: - self.files += tuple([str(p) for p in config_dir.glob("*.db")]) + db_files = [] + for ext in ("db", "sqlite", "sqlite3"): + db_files.extend(config_dir.glob("*.{}".format(ext))) + self.files += tuple(str(f) for f in db_files) if ( config_dir and (config_dir / "inspect-data.json").exists() and not inspect_data ): - inspect_data = json.load((config_dir / "inspect-data.json").open()) - if immutables is None: + inspect_data = json.loads((config_dir / "inspect-data.json").read_text()) + if not immutables: immutable_filenames = [i["file"] for i in inspect_data.values()] immutables = [ f for f in self.files if Path(f).name in immutable_filenames ] self.inspect_data = inspect_data self.immutables = set(immutables or []) - if not self.files: - self.files = [MEMORY] - elif memory: - self.files = (MEMORY,) + self.files self.databases = collections.OrderedDict() + self.actions = {} # .invoke_startup() will populate this + try: + self._refresh_schemas_lock = asyncio.Lock() + except RuntimeError as rex: + # Workaround for intermittent test failure, see: + # https://github.com/simonw/datasette/issues/1802 + if "There is no current event loop in thread" in str(rex): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + self._refresh_schemas_lock = asyncio.Lock() + else: + raise + self.crossdb = crossdb + self.nolock = nolock + if memory or crossdb or not self.files: + self.add_database( + Database(self, is_mutable=False, is_memory=True), name="_memory" + ) for file in self.files: - path = file - is_memory = False - if file is MEMORY: - path = None - is_memory = True - is_mutable = path not in self.immutables - db = Database(self, path, is_mutable=is_mutable, is_memory=is_memory) - if db.name in self.databases: - raise Exception("Multiple files with same stem: {}".format(db.name)) - self.add_database(db.name, db) + self.add_database( + Database(self, file, is_mutable=file not in self.immutables) + ) + + self.internal_db_created = False + if internal is None: + self._internal_database = Database(self, memory_name=secrets.token_hex()) + else: + self._internal_database = Database(self, path=internal, mode="rwc") + self._internal_database.name = INTERNAL_DB_NAME + self.cache_headers = cache_headers self.cors = cors + config_files = [] metadata_files = [] if config_dir: metadata_files = [ @@ -240,11 +392,27 @@ class Datasette: for filename in ("metadata.json", "metadata.yaml", "metadata.yml") if (config_dir / filename).exists() ] + config_files = [ + config_dir / filename + for filename in ("datasette.json", "datasette.yaml", "datasette.yml") + if (config_dir / filename).exists() + ] if config_dir and metadata_files and not metadata: with metadata_files[0].open() as fp: metadata = parse_metadata(fp.read()) - self._metadata = metadata or {} - self.sqlite_functions = [] + + if config_dir and config_files and not config: + with config_files[0].open() as fp: + config = parse_metadata(fp.read()) + + # Move any "plugins" and "allow" settings from metadata to config - updates them in place + metadata = metadata or {} + config = config or {} + metadata, config = move_plugins_and_allow(metadata, config) + # Now migrate any known table configuration settings over as well + metadata, config = move_table_config(metadata, config) + + self._metadata_local = metadata or {} self.sqlite_extensions = [] for extension in sqlite_extensions or []: # Resolve spatialite, if requested @@ -262,17 +430,57 @@ class Datasette: if config_dir and (config_dir / "static").is_dir() and not static_mounts: static_mounts = [("static", str((config_dir / "static").resolve()))] self.static_mounts = static_mounts or [] - if config_dir and (config_dir / "config.json").exists() and not config: - config = json.load((config_dir / "config.json").open()) - self._config = dict(DEFAULT_CONFIG, **(config or {})) + if config_dir and (config_dir / "datasette.json").exists() and not config: + config = json.loads((config_dir / "datasette.json").read_text()) + + config = config or {} + config_settings = config.get("settings") or {} + + # Validate settings from config file + for key, value in config_settings.items(): + if key not in DEFAULT_SETTINGS: + raise StartupError(f"Invalid setting '{key}' in config file") + # Validate type matches expected type from DEFAULT_SETTINGS + if value is not None: # Allow None/null values + expected_type = type(DEFAULT_SETTINGS[key]) + actual_type = type(value) + if actual_type != expected_type: + raise StartupError( + f"Setting '{key}' in config file has incorrect type. " + f"Expected {expected_type.__name__}, got {actual_type.__name__}. " + f"Value: {value!r}. " + f"Hint: In YAML/JSON config files, remove quotes from boolean and integer values." + ) + + # Validate settings from constructor parameter + if settings: + for key, value in settings.items(): + if key not in DEFAULT_SETTINGS: + raise StartupError(f"Invalid setting '{key}' in settings parameter") + if value is not None: + expected_type = type(DEFAULT_SETTINGS[key]) + actual_type = type(value) + if actual_type != expected_type: + raise StartupError( + f"Setting '{key}' in settings parameter has incorrect type. " + f"Expected {expected_type.__name__}, got {actual_type.__name__}. " + f"Value: {value!r}" + ) + + self.config = config + # CLI settings should overwrite datasette.json settings + self._settings = dict(DEFAULT_SETTINGS, **(config_settings), **(settings or {})) self.renderers = {} # File extension -> (renderer, can_render) functions self.version_note = version_note - self.executor = futures.ThreadPoolExecutor( - max_workers=self.config("num_sql_threads") - ) - self.max_returned_rows = self.config("max_returned_rows") - self.sql_time_limit_ms = self.config("sql_time_limit_ms") - self.page_size = self.config("default_page_size") + if self.setting("num_sql_threads") == 0: + self.executor = None + else: + self.executor = futures.ThreadPoolExecutor( + max_workers=self.setting("num_sql_threads") + ) + self.max_returned_rows = self.setting("max_returned_rows") + self.sql_time_limit_ms = self.setting("sql_time_limit_ms") + self.page_size = self.setting("default_page_size") # Execute plugins in constructor, to ensure they are available # when the rest of `datasette inspect` executes if self.plugins_dir: @@ -307,28 +515,187 @@ class Datasette: ), ] ) - self.jinja_env = Environment( - loader=template_loader, autoescape=True, enable_async=True + environment = Environment( + loader=template_loader, + autoescape=True, + enable_async=True, + # undefined=StrictUndefined, ) - self.jinja_env.filters["escape_css_string"] = escape_css_string - self.jinja_env.filters["quote_plus"] = lambda u: urllib.parse.quote_plus(u) - self.jinja_env.filters["escape_sqlite"] = escape_sqlite - self.jinja_env.filters["to_css_class"] = to_css_class - # pylint: disable=no-member - pm.hook.prepare_jinja2_environment(env=self.jinja_env) - + environment.filters["escape_css_string"] = escape_css_string + environment.filters["quote_plus"] = urllib.parse.quote_plus + self._jinja_env = environment + environment.filters["escape_sqlite"] = escape_sqlite + environment.filters["to_css_class"] = to_css_class self._register_renderers() self._permission_checks = collections.deque(maxlen=200) self._root_token = secrets.token_hex(32) + self.root_enabled = False + self.default_deny = default_deny self.client = DatasetteClient(self) + async def apply_metadata_json(self): + # Apply any metadata entries from metadata.json to the internal tables + # step 1: top-level metadata + for key in self._metadata_local or {}: + if key == "databases": + continue + value = self._metadata_local[key] + await self.set_instance_metadata(key, _to_string(value)) + + # step 2: database-level metadata + for dbname, db in self._metadata_local.get("databases", {}).items(): + for key, value in db.items(): + if key in ("tables", "queries"): + continue + await self.set_database_metadata(dbname, key, _to_string(value)) + + # step 3: table-level metadata + for tablename, table in db.get("tables", {}).items(): + for key, value in table.items(): + if key == "columns": + continue + await self.set_resource_metadata( + dbname, tablename, key, _to_string(value) + ) + + # step 4: column-level metadata (only descriptions in metadata.json) + for columnname, column_description in table.get("columns", {}).items(): + await self.set_column_metadata( + dbname, tablename, columnname, "description", column_description + ) + + # TODO(alex) is metadata.json was loaded in, and --internal is not memory, then log + # a warning to user that they should delete their metadata.json file + + def get_jinja_environment(self, request: Request = None) -> Environment: + environment = self._jinja_env + if request: + for environment in pm.hook.jinja2_environment_from_request( + datasette=self, request=request, env=environment + ): + pass + return environment + + def get_action(self, name_or_abbr: str): + """ + Returns an Action object for the given name or abbreviation. Returns None if not found. + """ + if name_or_abbr in self.actions: + return self.actions[name_or_abbr] + # Try abbreviation + for action in self.actions.values(): + if action.abbr == name_or_abbr: + return action + return None + + async def refresh_schemas(self): + if self._refresh_schemas_lock.locked(): + return + async with self._refresh_schemas_lock: + await self._refresh_schemas() + + async def _refresh_schemas(self): + internal_db = self.get_internal_database() + if not self.internal_db_created: + await init_internal_db(internal_db) + await self.apply_metadata_json() + self.internal_db_created = True + current_schema_versions = { + row["database_name"]: row["schema_version"] + for row in await internal_db.execute( + "select database_name, schema_version from catalog_databases" + ) + } + # Delete stale entries for databases that are no longer attached + stale_databases = set(current_schema_versions.keys()) - set( + self.databases.keys() + ) + for stale_db_name in stale_databases: + await internal_db.execute_write( + "DELETE FROM catalog_databases WHERE database_name = ?", + [stale_db_name], + ) + for database_name, db in self.databases.items(): + schema_version = (await db.execute("PRAGMA schema_version")).first()[0] + # Compare schema versions to see if we should skip it + if schema_version == current_schema_versions.get(database_name): + continue + placeholders = "(?, ?, ?, ?)" + values = [database_name, str(db.path), db.is_memory, schema_version] + if db.path is None: + placeholders = "(?, null, ?, ?)" + values = [database_name, db.is_memory, schema_version] + await internal_db.execute_write( + """ + INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version) + VALUES {} + """.format( + placeholders + ), + values, + ) + await populate_schema_tables(internal_db, db) + @property def urls(self): return Urls(self) + @property + def pm(self): + """ + Return the global plugin manager instance. + + This provides access to the pluggy PluginManager that manages all + Datasette plugins and hooks. Use datasette.pm.hook.hook_name() to + call plugin hooks. + """ + return pm + async def invoke_startup(self): + # This must be called for Datasette to be in a usable state + if self._startup_invoked: + return + # Register event classes + event_classes = [] + for hook in pm.hook.register_events(datasette=self): + extra_classes = await await_me_maybe(hook) + if extra_classes: + event_classes.extend(extra_classes) + self.event_classes = tuple(event_classes) + + # Register actions, but watch out for duplicate name/abbr + action_names = {} + action_abbrs = {} + for hook in pm.hook.register_actions(datasette=self): + if hook: + for action in hook: + if ( + action.name in action_names + and action != action_names[action.name] + ): + raise StartupError( + "Duplicate action name: {}".format(action.name) + ) + if ( + action.abbr + and action.abbr in action_abbrs + and action != action_abbrs[action.abbr] + ): + raise StartupError( + "Duplicate action abbr: {}".format(action.abbr) + ) + action_names[action.name] = action + if action.abbr: + action_abbrs[action.abbr] = action + self.actions[action.name] = action + + for hook in pm.hook.prepare_jinja2_environment( + env=self._jinja_env, datasette=self + ): + await await_me_maybe(hook) for hook in pm.hook.startup(datasette=self): await await_me_maybe(hook) + self._startup_invoked = True def sign(self, value, namespace="default"): return URLSafeSerializer(self._secret, namespace).dumps(value) @@ -336,80 +703,272 @@ class Datasette: def unsign(self, signed, namespace="default"): return URLSafeSerializer(self._secret, namespace).loads(signed) - def get_database(self, name=None): + def in_client(self) -> bool: + """Check if the current code is executing within a datasette.client request. + + Returns: + bool: True if currently executing within a datasette.client request, False otherwise. + """ + return _in_datasette_client.get() + + def create_token( + self, + actor_id: str, + *, + expires_after: int | None = None, + restrict_all: Iterable[str] | None = None, + restrict_database: Dict[str, Iterable[str]] | None = None, + restrict_resource: Dict[str, Dict[str, Iterable[str]]] | None = None, + ): + token = {"a": actor_id, "t": int(time.time())} + if expires_after: + token["d"] = expires_after + + def abbreviate_action(action): + # rename to abbr if possible + action_obj = self.actions.get(action) + if not action_obj: + return action + return action_obj.abbr or action + + if expires_after: + token["d"] = expires_after + if restrict_all or restrict_database or restrict_resource: + token["_r"] = {} + if restrict_all: + token["_r"]["a"] = [abbreviate_action(a) for a in restrict_all] + if restrict_database: + token["_r"]["d"] = {} + for database, actions in restrict_database.items(): + token["_r"]["d"][database] = [abbreviate_action(a) for a in actions] + if restrict_resource: + token["_r"]["r"] = {} + for database, resources in restrict_resource.items(): + for resource, actions in resources.items(): + token["_r"]["r"].setdefault(database, {})[resource] = [ + abbreviate_action(a) for a in actions + ] + return "dstok_{}".format(self.sign(token, namespace="token")) + + def get_database(self, name=None, route=None): + if route is not None: + matches = [db for db in self.databases.values() if db.route == route] + if not matches: + raise KeyError + return matches[0] if name is None: - return next(iter(self.databases.values())) + name = [key for key in self.databases.keys()][0] return self.databases[name] - def add_database(self, name, db): - self.databases[name] = db + def add_database(self, db, name=None, route=None): + new_databases = self.databases.copy() + if name is None: + # Pick a unique name for this database + suggestion = db.suggest_name() + name = suggestion + else: + suggestion = name + i = 2 + while name in self.databases: + name = "{}_{}".format(suggestion, i) + i += 1 + db.name = name + db.route = route or name + new_databases[name] = db + # don't mutate! that causes race conditions with live import + self.databases = new_databases + return db + + def add_memory_database(self, memory_name, name=None, route=None): + return self.add_database( + Database(self, memory_name=memory_name), name=name, route=route + ) def remove_database(self, name): - self.databases.pop(name) + self.get_database(name).close() + new_databases = self.databases.copy() + new_databases.pop(name) + self.databases = new_databases - def config(self, key): - return self._config.get(key, None) + def setting(self, key): + return self._settings.get(key, None) - def config_dict(self): - # Returns a fully resolved config dictionary, useful for templates - return {option.name: self.config(option.name) for option in CONFIG_OPTIONS} + def settings_dict(self): + # Returns a fully resolved settings dictionary, useful for templates + return {option.name: self.setting(option.name) for option in SETTINGS} - def metadata(self, key=None, database=None, table=None, fallback=True): - """ - Looks up metadata, cascading backwards from specified level. - Returns None if metadata value is not found. - """ - assert not ( - database is None and table is not None - ), "Cannot call metadata() with table= specified but not database=" - databases = self._metadata.get("databases") or {} - search_list = [] - if database is not None: - search_list.append(databases.get(database) or {}) - if table is not None: - table_metadata = ((databases.get(database) or {}).get("tables") or {}).get( - table - ) or {} - search_list.insert(0, table_metadata) - search_list.append(self._metadata) - if not fallback: - # No fallback allowed, so just use the first one in the list - search_list = search_list[:1] - if key is not None: - for item in search_list: - if key in item: - return item[key] - return None - else: - # Return the merged list - m = {} - for item in search_list: - m.update(item) - return m + def _metadata_recursive_update(self, orig, updated): + if not isinstance(orig, dict) or not isinstance(updated, dict): + return orig + + for key, upd_value in updated.items(): + if isinstance(upd_value, dict) and isinstance(orig.get(key), dict): + orig[key] = self._metadata_recursive_update(orig[key], upd_value) + else: + orig[key] = upd_value + return orig + + async def get_instance_metadata(self): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_instance + """ + ) + return dict(rows) + + async def get_database_metadata(self, database_name: str): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_databases + WHERE database_name = ? + """, + [database_name], + ) + return dict(rows) + + async def get_resource_metadata(self, database_name: str, resource_name: str): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_resources + WHERE database_name = ? + AND resource_name = ? + """, + [database_name, resource_name], + ) + return dict(rows) + + async def get_column_metadata( + self, database_name: str, resource_name: str, column_name: str + ): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_columns + WHERE database_name = ? + AND resource_name = ? + AND column_name = ? + """, + [database_name, resource_name, column_name], + ) + return dict(rows) + + async def set_instance_metadata(self, key: str, value: str): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_instance(key, value) + VALUES(?, ?) + ON CONFLICT(key) DO UPDATE SET value = excluded.value; + """, + [key, value], + ) + + async def set_database_metadata(self, database_name: str, key: str, value: str): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_databases(database_name, key, value) + VALUES(?, ?, ?) + ON CONFLICT(database_name, key) DO UPDATE SET value = excluded.value; + """, + [database_name, key, value], + ) + + async def set_resource_metadata( + self, database_name: str, resource_name: str, key: str, value: str + ): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_resources(database_name, resource_name, key, value) + VALUES(?, ?, ?, ?) + ON CONFLICT(database_name, resource_name, key) DO UPDATE SET value = excluded.value; + """, + [database_name, resource_name, key, value], + ) + + async def set_column_metadata( + self, + database_name: str, + resource_name: str, + column_name: str, + key: str, + value: str, + ): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_columns(database_name, resource_name, column_name, key, value) + VALUES(?, ?, ?, ?, ?) + ON CONFLICT(database_name, resource_name, column_name, key) DO UPDATE SET value = excluded.value; + """, + [database_name, resource_name, column_name, key, value], + ) + + def get_internal_database(self): + return self._internal_database def plugin_config(self, plugin_name, database=None, table=None, fallback=True): - "Return config for plugin, falling back from specified database/table" - plugins = self.metadata( - "plugins", database=database, table=table, fallback=fallback - ) - if plugins is None: - return None - plugin_config = plugins.get(plugin_name) - # Resolve any $file and $env keys - plugin_config = resolve_env_secrets(plugin_config, os.environ) - return plugin_config + """Return config for plugin, falling back from specified database/table""" + if database is None and table is None: + config = self._plugin_config_top(plugin_name) + else: + config = self._plugin_config_nested(plugin_name, database, table, fallback) + + return resolve_env_secrets(config, os.environ) + + def _plugin_config_top(self, plugin_name): + """Returns any top-level plugin configuration for the specified plugin.""" + return ((self.config or {}).get("plugins") or {}).get(plugin_name) + + def _plugin_config_nested(self, plugin_name, database, table=None, fallback=True): + """Returns any database or table-level plugin configuration for the specified plugin.""" + db_config = ((self.config or {}).get("databases") or {}).get(database) + + # if there's no db-level configuration, then return early, falling back to top-level if needed + if not db_config: + return self._plugin_config_top(plugin_name) if fallback else None + + db_plugin_config = (db_config.get("plugins") or {}).get(plugin_name) + + if table: + table_plugin_config = ( + ((db_config.get("tables") or {}).get(table) or {}).get("plugins") or {} + ).get(plugin_name) + + # fallback to db_config or top-level config, in that order, if needed + if table_plugin_config is None and fallback: + return db_plugin_config or self._plugin_config_top(plugin_name) + + return table_plugin_config + + # fallback to top-level if needed + if db_plugin_config is None and fallback: + self._plugin_config_top(plugin_name) + + return db_plugin_config def app_css_hash(self): if not hasattr(self, "_app_css_hash"): - self._app_css_hash = hashlib.sha1( - open(os.path.join(str(app_root), "datasette/static/app.css")) - .read() - .encode("utf8") - ).hexdigest()[:6] + with open(os.path.join(str(app_root), "datasette/static/app.css")) as fp: + self._app_css_hash = hashlib.sha1(fp.read().encode("utf8")).hexdigest()[ + :6 + ] return self._app_css_hash async def get_canned_queries(self, database_name, actor): - queries = self.metadata("queries", database=database_name, fallback=False) or {} + queries = {} for more_queries in pm.hook.canned_queries( datasette=self, database=database_name, @@ -431,33 +990,37 @@ class Datasette: if query: return query - def update_with_inherited_metadata(self, metadata): - # Fills in source/license with defaults, if available - metadata.update( - { - "source": metadata.get("source") or self.metadata("source"), - "source_url": metadata.get("source_url") or self.metadata("source_url"), - "license": metadata.get("license") or self.metadata("license"), - "license_url": metadata.get("license_url") - or self.metadata("license_url"), - "about": metadata.get("about") or self.metadata("about"), - "about_url": metadata.get("about_url") or self.metadata("about_url"), - } - ) - def _prepare_connection(self, conn, database): conn.row_factory = sqlite3.Row conn.text_factory = lambda x: str(x, "utf-8", "replace") - for name, num_args, func in self.sqlite_functions: - conn.create_function(name, num_args, func) - if self.sqlite_extensions: + if self.sqlite_extensions and database != INTERNAL_DB_NAME: conn.enable_load_extension(True) for extension in self.sqlite_extensions: - conn.execute("SELECT load_extension('{}')".format(extension)) - if self.config("cache_size_kb"): - conn.execute("PRAGMA cache_size=-{}".format(self.config("cache_size_kb"))) + # "extension" is either a string path to the extension + # or a 2-item tuple that specifies which entrypoint to load. + if isinstance(extension, tuple): + path, entrypoint = extension + conn.execute("SELECT load_extension(?, ?)", [path, entrypoint]) + else: + conn.execute("SELECT load_extension(?)", [extension]) + if self.setting("cache_size_kb"): + conn.execute(f"PRAGMA cache_size=-{self.setting('cache_size_kb')}") # pylint: disable=no-member - pm.hook.prepare_connection(conn=conn, database=database, datasette=self) + if database != INTERNAL_DB_NAME: + pm.hook.prepare_connection(conn=conn, database=database, datasette=self) + # If self.crossdb and this is _memory, connect the first SQLITE_LIMIT_ATTACHED databases + if self.crossdb and database == "_memory": + count = 0 + for db_name, db in self.databases.items(): + if count >= SQLITE_LIMIT_ATTACHED or db.is_memory: + continue + sql = 'ATTACH DATABASE "file:{path}?{qs}" AS [{name}];'.format( + path=db.path, + qs="mode=ro" if db.is_mutable else "immutable=1", + name=db_name, + ) + conn.execute(sql) + count += 1 def add_message(self, request, message, type=INFO): if not hasattr(request, "_messages"): @@ -481,34 +1044,409 @@ class Datasette: else: return [] - async def permission_allowed(self, actor, action, resource=None, default=False): - "Check permissions using the permissions_allowed plugin hook" - result = None - for check in pm.hook.permission_allowed( + async def _crumb_items(self, request, table=None, database=None): + crumbs = [] + actor = None + if request: + actor = request.actor + # Top-level link + if await self.allowed(action="view-instance", actor=actor): + crumbs.append({"href": self.urls.instance(), "label": "home"}) + # Database link + if database: + if await self.allowed( + action="view-database", + resource=DatabaseResource(database=database), + actor=actor, + ): + crumbs.append( + { + "href": self.urls.database(database), + "label": database, + } + ) + # Table link + if table: + assert database, "table= requires database=" + if await self.allowed( + action="view-table", + resource=TableResource(database=database, table=table), + actor=actor, + ): + crumbs.append( + { + "href": self.urls.table(database, table), + "label": table, + } + ) + return crumbs + + async def actors_from_ids( + self, actor_ids: Iterable[str | int] + ) -> Dict[int | str, Dict]: + result = pm.hook.actors_from_ids(datasette=self, actor_ids=actor_ids) + if result is None: + # Do the default thing + return {actor_id: {"id": actor_id} for actor_id in actor_ids} + result = await await_me_maybe(result) + return result + + async def track_event(self, event: Event): + assert isinstance(event, self.event_classes), "Invalid event type: {}".format( + type(event) + ) + for hook in pm.hook.track_event(datasette=self, event=event): + await await_me_maybe(hook) + + def resource_for_action(self, action: str, parent: str | None, child: str | None): + """ + Create a Resource instance for the given action with parent/child values. + + Looks up the action's resource_class and instantiates it with the + provided parent and child identifiers. + + Args: + action: The action name (e.g., "view-table", "view-query") + parent: The parent resource identifier (e.g., database name) + child: The child resource identifier (e.g., table/query name) + + Returns: + A Resource instance of the appropriate subclass + + Raises: + ValueError: If the action is unknown + """ + from datasette.permissions import Resource + + action_obj = self.actions.get(action) + if not action_obj: + raise ValueError(f"Unknown action: {action}") + + resource_class = action_obj.resource_class + instance = object.__new__(resource_class) + Resource.__init__(instance, parent=parent, child=child) + return instance + + async def check_visibility( + self, + actor: dict, + action: str, + resource: "Resource" | None = None, + ): + """ + Check if actor can see a resource and if it's private. + + Returns (visible, private) tuple: + - visible: bool - can the actor see it? + - private: bool - if visible, can anonymous users NOT see it? + """ + from datasette.permissions import Resource + + # Validate that resource is a Resource object or None + if resource is not None and not isinstance(resource, Resource): + raise TypeError(f"resource must be a Resource subclass instance or None.") + + # Check if actor can see it + if not await self.allowed(action=action, resource=resource, actor=actor): + return False, False + + # Check if anonymous user can see it (for "private" flag) + if not await self.allowed(action=action, resource=resource, actor=None): + # Actor can see it but anonymous cannot - it's private + return True, True + + # Both actor and anonymous can see it - it's public + return True, False + + async def allowed_resources_sql( + self, + *, + action: str, + actor: dict | None = None, + parent: str | None = None, + include_is_private: bool = False, + ) -> ResourcesSQL: + """ + Build SQL query to get all resources the actor can access for the given action. + + Args: + action: The action name (e.g., "view-table") + actor: The actor dict (or None for unauthenticated) + parent: Optional parent filter (e.g., database name) to limit results + include_is_private: If True, include is_private column showing if anonymous cannot access + + Returns a namedtuple of (query: str, params: dict) that can be executed against the internal database. + The query returns rows with (parent, child, reason) columns, plus is_private if requested. + + Example: + query, params = await datasette.allowed_resources_sql( + action="view-table", + actor=actor, + parent="mydb", + include_is_private=True + ) + result = await datasette.get_internal_database().execute(query, params) + """ + from datasette.utils.actions_sql import build_allowed_resources_sql + + action_obj = self.actions.get(action) + if not action_obj: + raise ValueError(f"Unknown action: {action}") + + sql, params = await build_allowed_resources_sql( + self, actor, action, parent=parent, include_is_private=include_is_private + ) + return ResourcesSQL(sql, params) + + async def allowed_resources( + self, + action: str, + actor: dict | None = None, + *, + parent: str | None = None, + include_is_private: bool = False, + include_reasons: bool = False, + limit: int = 100, + next: str | None = None, + ) -> PaginatedResources: + """ + Return paginated resources the actor can access for the given action. + + Uses SQL with keyset pagination to efficiently filter resources. + Returns PaginatedResources with list of Resource instances and pagination metadata. + + Args: + action: The action name (e.g., "view-table") + actor: The actor dict (or None for unauthenticated) + parent: Optional parent filter (e.g., database name) to limit results + include_is_private: If True, adds a .private attribute to each Resource + include_reasons: If True, adds a .reasons attribute with List[str] of permission reasons + limit: Maximum number of results to return (1-1000, default 100) + next: Keyset token from previous page for pagination + + Returns: + PaginatedResources with: + - resources: List of Resource objects for this page + - next: Token for next page (None if no more results) + + Example: + # Get first page of tables + page = await datasette.allowed_resources("view-table", actor, limit=50) + for table in page.resources: + print(f"{table.parent}/{table.child}") + + # Get next page + if page.next: + next_page = await datasette.allowed_resources( + "view-table", actor, limit=50, next=page.next + ) + + # With reasons for debugging + page = await datasette.allowed_resources( + "view-table", actor, include_reasons=True + ) + for table in page.resources: + print(f"{table.child}: {table.reasons}") + + # Iterate through all results with async generator + page = await datasette.allowed_resources("view-table", actor) + async for table in page.all(): + print(table.child) + """ + + action_obj = self.actions.get(action) + if not action_obj: + raise ValueError(f"Unknown action: {action}") + + # Validate and cap limit + limit = min(max(1, limit), 1000) + + # Get base SQL query + query, params = await self.allowed_resources_sql( + action=action, + actor=actor, + parent=parent, + include_is_private=include_is_private, + ) + + # Add keyset pagination WHERE clause if next token provided + if next: + try: + components = urlsafe_components(next) + if len(components) >= 2: + last_parent, last_child = components[0], components[1] + # Keyset condition: (parent > last) OR (parent = last AND child > last) + keyset_where = """ + (parent > :keyset_parent OR + (parent = :keyset_parent AND child > :keyset_child)) + """ + # Wrap original query and add keyset filter + query = f"SELECT * FROM ({query}) WHERE {keyset_where}" + params["keyset_parent"] = last_parent + params["keyset_child"] = last_child + except (ValueError, KeyError): + # Invalid token - ignore and start from beginning + pass + + # Add LIMIT (fetch limit+1 to detect if there are more results) + # Note: query from allowed_resources_sql() already includes ORDER BY parent, child + query = f"{query} LIMIT :limit" + params["limit"] = limit + 1 + + # Execute query + result = await self.get_internal_database().execute(query, params) + rows = list(result.rows) + + # Check if truncated (got more than limit rows) + truncated = len(rows) > limit + if truncated: + rows = rows[:limit] # Remove the extra row + + # Build Resource objects with optional attributes + resources = [] + for row in rows: + # row[0]=parent, row[1]=child, row[2]=reason, row[3]=is_private (if requested) + resource = self.resource_for_action(action, parent=row[0], child=row[1]) + + # Add reasons if requested + if include_reasons: + reason_json = row[2] + try: + reasons_array = ( + json.loads(reason_json) if isinstance(reason_json, str) else [] + ) + resource.reasons = [r for r in reasons_array if r is not None] + except (json.JSONDecodeError, TypeError): + resource.reasons = [reason_json] if reason_json else [] + + # Add private flag if requested + if include_is_private: + resource.private = bool(row[3]) + + resources.append(resource) + + # Generate next token if there are more results + next_token = None + if truncated and resources: + last_resource = resources[-1] + # Use tilde-encoding like table pagination + next_token = "{},{}".format( + tilde_encode(str(last_resource.parent)), + tilde_encode(str(last_resource.child)), + ) + + return PaginatedResources( + resources=resources, + next=next_token, + _datasette=self, + _action=action, + _actor=actor, + _parent=parent, + _include_is_private=include_is_private, + _include_reasons=include_reasons, + _limit=limit, + ) + + async def allowed( + self, + *, + action: str, + resource: "Resource" = None, + actor: dict | None = None, + ) -> bool: + """ + Check if actor can perform action on specific resource. + + Uses SQL to check permission for a single resource without fetching all resources. + This is efficient - it does NOT call allowed_resources() and check membership. + + For global actions, resource should be None (or omitted). + + Example: + from datasette.resources import TableResource + can_view = await datasette.allowed( + action="view-table", + resource=TableResource(database="analytics", table="users"), + actor=actor + ) + + # For global actions, resource can be omitted: + can_debug = await datasette.allowed(action="permissions-debug", actor=actor) + """ + from datasette.utils.actions_sql import check_permission_for_resource + + # For global actions, resource remains None + + # Check if this action has also_requires - if so, check that action first + action_obj = self.actions.get(action) + if action_obj and action_obj.also_requires: + # Must have the required action first + if not await self.allowed( + action=action_obj.also_requires, + resource=resource, + actor=actor, + ): + return False + + # For global actions, resource is None + parent = resource.parent if resource else None + child = resource.child if resource else None + + result = await check_permission_for_resource( datasette=self, actor=actor, action=action, - resource=resource, - ): - check = await await_me_maybe(check) - if check is not None: - result = check - used_default = False - if result is None: - result = default - used_default = True - self._permission_checks.append( - { - "when": datetime.datetime.utcnow().isoformat(), - "actor": actor, - "action": action, - "resource": resource, - "used_default": used_default, - "result": result, - } + parent=parent, + child=child, ) + + # Log the permission check for debugging + self._permission_checks.append( + PermissionCheck( + when=datetime.datetime.now(datetime.timezone.utc).isoformat(), + actor=actor, + action=action, + parent=parent, + child=child, + result=result, + ) + ) + return result + async def ensure_permission( + self, + *, + action: str, + resource: "Resource" = None, + actor: dict | None = None, + ): + """ + Check if actor can perform action on resource, raising Forbidden if not. + + This is a convenience wrapper around allowed() that raises Forbidden + instead of returning False. Use this when you want to enforce a permission + check and halt execution if it fails. + + Example: + from datasette.resources import TableResource + + # Will raise Forbidden if actor cannot view the table + await datasette.ensure_permission( + action="view-table", + resource=TableResource(database="analytics", table="users"), + actor=request.actor + ) + + # For instance-level actions, resource can be omitted: + await datasette.ensure_permission( + action="permissions-debug", + actor=request.actor + ) + """ + if not await self.allowed(action=action, resource=resource, actor=actor): + raise Forbidden(action) + async def execute( self, db_name, @@ -528,8 +1466,8 @@ class Datasette: log_sql_errors=log_sql_errors, ) - async def expand_foreign_keys(self, database, table, column, values): - "Returns dict mapping (column, value) -> label" + async def expand_foreign_keys(self, actor, database, table, column, values): + """Returns dict mapping (column, value) -> label""" labeled_fks = {} db = self.databases[database] foreign_keys = await db.foreign_keys_for_table(table) @@ -542,7 +1480,19 @@ class Datasette: ][0] except IndexError: return {} - label_column = await db.label_column_for_table(fk["other_table"]) + # Ensure user has permission to view the referenced table + from datasette.resources import TableResource + + other_table = fk["other_table"] + other_column = fk["other_column"] + visible, _ = await self.check_visibility( + actor, + action="view-table", + resource=TableResource(database=database, table=other_table), + ) + if not visible: + return {} + label_column = await db.label_column_for_table(other_table) if not label_column: return {(fk["column"], value): str(value) for value in values} labeled_fks = {} @@ -551,9 +1501,9 @@ class Datasette: from {other_table} where {other_column} in ({placeholders}) """.format( - other_column=escape_sqlite(fk["other_column"]), + other_column=escape_sqlite(other_column), label_column=escape_sqlite(label_column), - other_table=escape_sqlite(fk["other_table"]), + other_table=escape_sqlite(other_table), placeholders=", ".join(["?"] * len(set(values))), ) try: @@ -567,35 +1517,30 @@ class Datasette: def absolute_url(self, request, path): url = urllib.parse.urljoin(request.url, path) - if url.startswith("http://") and self.config("force_https_urls"): + if url.startswith("http://") and self.setting("force_https_urls"): url = "https://" + url[len("http://") :] return url - def _register_custom_units(self): - "Register any custom units defined in the metadata.json with Pint" - for unit in self.metadata("custom_units") or []: - ureg.define(unit) - def _connected_databases(self): return [ { "name": d.name, + "route": d.route, "path": d.path, "size": d.size, "is_mutable": d.is_mutable, "is_memory": d.is_memory, "hash": d.hash, } - for d in sorted(self.databases.values(), key=lambda d: d.name) + for name, d in self.databases.items() ] def _versions(self): conn = sqlite3.connect(":memory:") - self._prepare_connection(conn, ":memory:") + self._prepare_connection(conn, "_memory") sqlite_version = conn.execute("select sqlite_version()").fetchone()[0] - sqlite_extensions = {} + sqlite_extensions = {"json1": detect_json1(conn)} for extension, testsql, hasversion in ( - ("json1", "SELECT json('{}')", False), ("spatialite", "SELECT spatialite_version()", True), ): try: @@ -606,6 +1551,17 @@ class Datasette: sqlite_extensions[extension] = None except Exception: pass + # More details on SpatiaLite + if "spatialite" in sqlite_extensions: + spatialite_details = {} + for fn in SPATIALITE_FUNCTIONS: + try: + result = conn.execute("select {}()".format(fn)) + spatialite_details[fn] = result.fetchone()[0] + except Exception as e: + spatialite_details[fn] = {"error": str(e)} + sqlite_extensions["spatialite"] = spatialite_details + # Figure out supported FTS versions fts_versions = [] for fts in ("FTS5", "FTS4", "FTS3"): @@ -619,14 +1575,23 @@ class Datasette: datasette_version = {"version": __version__} if self.version_note: datasette_version["note"] = self.version_note - return { + + try: + # Optional import to avoid breaking Pyodide + # https://github.com/simonw/datasette/issues/1733#issuecomment-1115268245 + import uvicorn + + uvicorn_version = uvicorn.__version__ + except ImportError: + uvicorn_version = None + info = { "python": { "version": ".".join(map(str, sys.version_info[:3])), "full": sys.version, }, "datasette": datasette_version, "asgi": "3.0", - "uvicorn": uvicorn.__version__, + "uvicorn": uvicorn_version, "sqlite": { "version": sqlite_version, "fts_versions": fts_versions, @@ -636,6 +1601,14 @@ class Datasette: ], }, } + if using_pysqlite3: + for package in ("pysqlite3", "pysqlite3-binary"): + try: + info["pysqlite3"] = importlib.metadata.version(package) + break + except importlib.metadata.PackageNotFoundError: + pass + return info def _plugins(self, request=None, all=False): ps = list(get_plugins()) @@ -646,18 +1619,21 @@ class Datasette: should_show_all = all if not should_show_all: ps = [p for p in ps if p["name"] not in DEFAULT_PLUGINS] + ps.sort(key=lambda p: p["name"]) return [ { "name": p["name"], "static": p["static_path"] is not None, "templates": p["templates_path"] is not None, "version": p.get("version"), - "hooks": p["hooks"], + "hooks": list(sorted(set(p["hooks"]))), } for p in ps ] def _threads(self): + if self.setting("num_sql_threads") == 0: + return {"num_threads": 0, "threads": []} threads = list(threading.enumerate()) d = { "num_threads": len(threads), @@ -665,31 +1641,46 @@ class Datasette: {"name": t.name, "ident": t.ident, "daemon": t.daemon} for t in threads ], } - # Only available in Python 3.7+ - if hasattr(asyncio, "all_tasks"): - tasks = asyncio.all_tasks() - d.update( - { - "num_tasks": len(tasks), - "tasks": [_cleaner_task_str(t) for t in tasks], - } - ) + tasks = asyncio.all_tasks() + d.update( + { + "num_tasks": len(tasks), + "tasks": [_cleaner_task_str(t) for t in tasks], + } + ) return d def _actor(self, request): return {"actor": request.actor} - def table_metadata(self, database, table): - "Fetch table-specific metadata." + def _actions(self): + return [ + { + "name": action.name, + "abbr": action.abbr, + "description": action.description, + "takes_parent": action.takes_parent, + "takes_child": action.takes_child, + "resource_class": ( + action.resource_class.__name__ if action.resource_class else None + ), + "also_requires": action.also_requires, + } + for action in sorted(self.actions.values(), key=lambda a: a.name) + ] + + async def table_config(self, database: str, table: str) -> dict: + """Return dictionary of configuration for specified table""" return ( - (self.metadata("databases") or {}) + (self.config or {}) + .get("databases", {}) .get(database, {}) .get("tables", {}) .get(table, {}) ) def _register_renderers(self): - """ Register output renderers which output data in custom formats. """ + """Register output renderers which output data in custom formats.""" # Built-in renderers self.renderers["json"] = (json_renderer, lambda: True) @@ -697,7 +1688,7 @@ class Datasette: hook_renderers = [] # pylint: disable=no-member for hook in pm.hook.register_output_renderer(datasette=self): - if type(hook) == list: + if type(hook) is list: hook_renderers += hook else: hook_renderers.append(hook) @@ -710,15 +1701,23 @@ class Datasette: ) async def render_template( - self, templates, context=None, request=None, view_name=None + self, + templates: List[str] | str | Template, + context: Dict[str, Any] | Context | None = None, + request: Request | None = None, + view_name: str | None = None, ): + if not self._startup_invoked: + raise Exception("render_template() called before await ds.invoke_startup()") context = context or {} if isinstance(templates, Template): template = templates else: if isinstance(templates, str): templates = [templates] - template = self.jinja_env.select_template(templates) + template = self.get_jinja_environment(request).select_template(templates) + if dataclasses.is_dataclass(context): + context = dataclasses.asdict(context) body_scripts = [] # pylint: disable=no-member for extra_script in pm.hook.extra_body_script( @@ -731,7 +1730,13 @@ class Datasette: datasette=self, ): extra_script = await await_me_maybe(extra_script) - body_scripts.append(Markup(extra_script)) + if isinstance(extra_script, dict): + script = extra_script["script"] + module = bool(extra_script.get("module")) + else: + script = extra_script + module = False + body_scripts.append({"script": Markup(script), "module": module}) extra_template_vars = {} # pylint: disable=no-member @@ -753,7 +1758,9 @@ class Datasette: async def menu_links(): links = [] for hook in pm.hook.menu_links( - datasette=self, actor=request.actor if request else None + datasette=self, + actor=request.actor if request else None, + request=request or None, ): extra_links = await await_me_maybe(hook) if extra_links: @@ -763,6 +1770,8 @@ class Datasette: template_context = { **context, **{ + "request": request, + "crumb_items": self._crumb_items, "urls": self.urls, "actor": request.actor if request else None, "menu_links": menu_links, @@ -781,18 +1790,31 @@ class Datasette: "extra_js_urls": await self._asset_urls( "extra_js_urls", template, context, request, view_name ), - "base_url": self.config("base_url"), + "base_url": self.setting("base_url"), "csrftoken": request.scope["csrftoken"] if request else lambda: "", + "datasette_version": __version__, }, **extra_template_vars, } - if request and request.args.get("_context") and self.config("template_debug"): + if request and request.args.get("_context") and self.setting("template_debug"): return "
{}
".format( - jinja2.escape(json.dumps(template_context, default=repr, indent=4)) + escape(json.dumps(template_context, default=repr, indent=4)) ) return await template.render_async(template_context) + def set_actor_cookie( + self, response: Response, actor: dict, expire_after: int | None = None + ): + data = {"a": actor} + if expire_after: + expires_at = int(time.time()) + (24 * 60 * 60) + data["e"] = baseconv.base62.encode(expires_at) + response.set_cookie("ds_actor", self.sign(data, "actor")) + + def delete_actor_cookie(self, response: Response): + response.set_cookie("ds_actor", "", expires=0, max_age=0) + async def _asset_urls(self, key, template, context, request, view_name): # Flatten list-of-lists from plugins: seen_urls = set() @@ -808,39 +1830,49 @@ class Datasette: ): hook = await await_me_maybe(hook) collected.extend(hook) - collected.extend(self.metadata(key) or []) + collected.extend((self.config or {}).get(key) or []) output = [] for url_or_dict in collected: if isinstance(url_or_dict, dict): url = url_or_dict["url"] sri = url_or_dict.get("sri") + module = bool(url_or_dict.get("module")) else: url = url_or_dict sri = None + module = False if url in seen_urls: continue seen_urls.add(url) + if url.startswith("/"): + # Take base_url into account: + url = self.urls.path(url) + script = {"url": url} if sri: - output.append({"url": url, "sri": sri}) - else: - output.append({"url": url}) + script["sri"] = sri + if module: + script["module"] = True + output.append(script) return output - def app(self): - "Returns an ASGI app function that serves the whole of Datasette" + def _config(self): + return redact_keys( + self.config, ("secret", "key", "password", "token", "hash", "dsn") + ) + + def _routes(self): routes = [] - for routes_to_add in pm.hook.register_routes(): + for routes_to_add in pm.hook.register_routes(datasette=self): for regex, view_fn in routes_to_add: routes.append((regex, wrap_view(view_fn, self))) def add_route(view, regex): routes.append((regex, view)) - # Generate a regex snippet to match all registered renderer file extensions - renderer_regex = "|".join(r"\." + key for key in self.renderers.keys()) - - add_route(IndexView.as_view(self), r"/(?P(\.jsono?)?$)") + add_route(IndexView.as_view(self), r"/(\.(?Pjsono?))?$") + add_route(IndexView.as_view(self), r"/-/(\.(?Pjsono?))?$") + add_route(permanent_redirect("/-/"), r"/-$") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires add_route(favicon, "/favicon.ico") @@ -855,7 +1887,7 @@ class Datasette: if plugin["static_path"]: add_route( asgi_static(plugin["static_path"]), - "/-/static-plugins/{}/(?P.*)$".format(plugin["name"]), + f"/-/static-plugins/{plugin['name']}/(?P.*)$", ) # Support underscores in name in addition to hyphens, see https://github.com/simonw/datasette/issues/611 add_route( @@ -865,39 +1897,73 @@ class Datasette: ), ) add_route( - JsonDataView.as_view(self, "metadata.json", lambda: self._metadata), - r"/-/metadata(?P(\.json)?)$", + permanent_redirect( + "/_memory", forward_query_string=True, forward_rest=True + ), + r"/:memory:(?P.*)$", ) add_route( JsonDataView.as_view(self, "versions.json", self._versions), - r"/-/versions(?P(\.json)?)$", + r"/-/versions(\.(?Pjson))?$", ) add_route( JsonDataView.as_view( self, "plugins.json", self._plugins, needs_request=True ), - r"/-/plugins(?P(\.json)?)$", + r"/-/plugins(\.(?Pjson))?$", ) add_route( - JsonDataView.as_view(self, "config.json", lambda: self._config), - r"/-/config(?P(\.json)?)$", + JsonDataView.as_view(self, "settings.json", lambda: self._settings), + r"/-/settings(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view(self, "config.json", lambda: self._config()), + r"/-/config(\.(?Pjson))?$", ) add_route( JsonDataView.as_view(self, "threads.json", self._threads), - r"/-/threads(?P(\.json)?)$", + r"/-/threads(\.(?Pjson))?$", ) add_route( JsonDataView.as_view(self, "databases.json", self._connected_databases), - r"/-/databases(?P(\.json)?)$", + r"/-/databases(\.(?Pjson))?$", ) add_route( - JsonDataView.as_view(self, "actor.json", self._actor, needs_request=True), - r"/-/actor(?P(\.json)?)$", + JsonDataView.as_view( + self, "actor.json", self._actor, needs_request=True, permission=None + ), + r"/-/actor(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view( + self, + "actions.json", + self._actions, + template="debug_actions.html", + permission="permissions-debug", + ), + r"/-/actions(\.(?Pjson))?$", ) add_route( AuthTokenView.as_view(self), r"/-/auth-token$", ) + add_route( + CreateTokenView.as_view(self), + r"/-/create-token$", + ) + add_route( + ApiExplorerView.as_view(self), + r"/-/api$", + ) + add_route( + TablesView.as_view(self), + r"/-/tables(\.(?Pjson))?$", + ) + add_route( + InstanceSchemaView.as_view(self), + r"/-/schema(\.(?Pjson|md))?$", + ) add_route( LogoutView.as_view(self), r"/-/logout$", @@ -906,6 +1972,18 @@ class Datasette: PermissionsDebugView.as_view(self), r"/-/permissions$", ) + add_route( + AllowedResourcesView.as_view(self), + r"/-/allowed(\.(?Pjson))?$", + ) + add_route( + PermissionRulesView.as_view(self), + r"/-/rules(\.(?Pjson))?$", + ) + add_route( + PermissionCheckView.as_view(self), + r"/-/check(\.(?Pjson))?$", + ) add_route( MessagesDebugView.as_view(self), r"/-/messages$", @@ -915,46 +1993,127 @@ class Datasette: r"/-/allow-debug$", ) add_route( - PatternPortfolioView.as_view(self), + wrap_view(PatternPortfolioView, self), r"/-/patterns$", ) add_route( - DatabaseDownload.as_view(self), r"/(?P[^/]+?)(?P\.db)$" + wrap_view(database_download, self), + r"/(?P[^\/\.]+)\.db$", ) add_route( - DatabaseView.as_view(self), - r"/(?P[^/]+?)(?P" - + renderer_regex - + r"|.jsono|\.csv)?$", + wrap_view(DatabaseView, self), + r"/(?P[^\/\.]+)(\.(?P\w+))?$", + ) + add_route(TableCreateView.as_view(self), r"/(?P[^\/\.]+)/-/create$") + add_route( + DatabaseSchemaView.as_view(self), + r"/(?P[^\/\.]+)/-/schema(\.(?Pjson|md))?$", ) add_route( - TableView.as_view(self), - r"/(?P[^/]+)/(?P[^/]+?$)", + wrap_view(QueryView, self), + r"/(?P[^\/\.]+)/-/query(\.(?P\w+))?$", + ) + add_route( + wrap_view(table_view, self), + r"/(?P[^\/\.]+)/(?P[^\/\.]+)(\.(?P\w+))?$", ) add_route( RowView.as_view(self), - r"/(?P[^/]+)/(?P
[^/]+?)/(?P[^/]+?)(?P" - + renderer_regex - + r")?$", + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)(\.(?P\w+))?$", ) - self._register_custom_units() + add_route( + TableInsertView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/insert$", + ) + add_route( + TableUpsertView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/upsert$", + ) + add_route( + TableDropView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/drop$", + ) + add_route( + TableSchemaView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/schema(\.(?Pjson|md))?$", + ) + add_route( + RowDeleteView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/delete$", + ) + add_route( + RowUpdateView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/update$", + ) + return [ + # Compile any strings to regular expressions + ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) + for pattern, view in routes + ] + + async def resolve_database(self, request): + database_route = tilde_decode(request.url_vars["database"]) + try: + return self.get_database(route=database_route) + except KeyError: + raise DatabaseNotFound(database_route) + + async def resolve_table(self, request): + db = await self.resolve_database(request) + table_name = tilde_decode(request.url_vars["table"]) + # Table must exist + is_view = False + table_exists = await db.table_exists(table_name) + if not table_exists: + is_view = await db.view_exists(table_name) + if not (table_exists or is_view): + raise TableNotFound(db.name, table_name) + return ResolvedTable(db, table_name, is_view) + + async def resolve_row(self, request): + db, table_name, _ = await self.resolve_table(request) + pk_values = urlsafe_components(request.url_vars["pks"]) + sql, params, pks = await row_sql_params_pks(db, table_name, pk_values) + results = await db.execute(sql, params, truncate=True) + row = results.first() + if row is None: + raise RowNotFound(db.name, table_name, pk_values) + return ResolvedRow(db, table_name, sql, params, pks, pk_values, results.first()) + + def app(self): + """Returns an ASGI app function that serves the whole of Datasette""" + routes = self._routes() async def setup_db(): # First time server starts up, calculate table counts for immutable databases - for dbname, database in self.databases.items(): + for database in self.databases.values(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) - asgi = AsgiLifespan( - AsgiTracer( - asgi_csrf.asgi_csrf( - DatasetteRouter(self, routes), - signing_secret=self._secret, - cookie_name="ds_csrftoken", - ) + async def custom_csrf_error(scope, send, message_id): + await asgi_send( + send, + content=await self.render_template( + "csrf_error.html", + {"message_id": message_id, "message_name": Errors(message_id).name}, + ), + status=403, + content_type="text/html; charset=utf-8", + ) + + asgi = asgi_csrf.asgi_csrf( + DatasetteRouter(self, routes), + signing_secret=self._secret, + cookie_name="ds_csrftoken", + skip_if_scope=lambda scope: any( + pm.hook.skip_csrf(datasette=self, scope=scope) ), - on_startup=setup_db, + send_csrf_failed=custom_csrf_error, ) + if self.setting("trace_debug"): + asgi = AsgiTracer(asgi) + asgi = AsgiLifespan(asgi) + asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) return asgi @@ -963,22 +2122,7 @@ class Datasette: class DatasetteRouter: def __init__(self, datasette, routes): self.ds = datasette - routes = routes or [] - self.routes = [ - # Compile any strings to regular expressions - ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) - for pattern, view in routes - ] - # Build a list of pages/blah/{name}.html matching expressions - pattern_templates = [ - filepath - for filepath in self.ds.jinja_env.list_templates() - if "{" in filepath and filepath.startswith("pages/") - ] - self.page_routes = [ - (route_pattern_from_filepath(filepath[len("pages/") :]), filepath) - for filepath in pattern_templates - ] + self.routes = routes or [] async def __call__(self, scope, receive, send): # Because we care about "foo/bar" v.s. "foo%2Fbar" we decode raw_path ourselves @@ -986,13 +2130,15 @@ class DatasetteRouter: raw_path = scope.get("raw_path") if raw_path: path = raw_path.decode("ascii") + path = path.partition("?")[0] return await self.route_path(scope, receive, send, path) async def route_path(self, scope, receive, send, path): # Strip off base_url if present before routing - base_url = self.ds.config("base_url") + base_url = self.ds.setting("base_url") if base_url != "/" and path.startswith(base_url): path = "/" + path[len(base_url) :] + scope = dict(scope, route_path=path) request = Request(scope, receive) # Populate request_messages if ds_messages cookie is present try: @@ -1005,7 +2151,7 @@ class DatasetteRouter: scope_modifications = {} # Apply force_https_urls, if set if ( - self.ds.config("force_https_urls") + self.ds.setting("force_https_urls") and scope["type"] == "http" and scope.get("scheme") != "https" ): @@ -1019,26 +2165,52 @@ class DatasetteRouter: break scope_modifications["actor"] = actor or default_actor scope = dict(scope, **scope_modifications) - for regex, view in self.routes: - match = regex.match(path) - if match is not None: - new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) - request.scope = new_scope - try: - response = await view(request, send) - if response: - self.ds._write_messages_to_response(request, response) - await response.asgi_send(send) - return - except NotFound as exception: - return await self.handle_404(request, send, exception) - except Exception as exception: - return await self.handle_500(request, send, exception) - return await self.handle_404(request, send) + + match, view = resolve_routes(self.routes, path) + + if match is None: + return await self.handle_404(request, send) + + new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) + request.scope = new_scope + try: + response = await view(request, send) + if response: + self.ds._write_messages_to_response(request, response) + await response.asgi_send(send) + return + except NotFound as exception: + return await self.handle_404(request, send, exception) + except Forbidden as exception: + # Try the forbidden() plugin hook + for custom_response in pm.hook.forbidden( + datasette=self.ds, request=request, message=exception.args[0] + ): + custom_response = await await_me_maybe(custom_response) + assert ( + custom_response + ), "Default forbidden() hook should have been called" + return await custom_response.asgi_send(send) + except Exception as exception: + return await self.handle_exception(request, send, exception) async def handle_404(self, request, send, exception=None): + # If path contains % encoding, redirect to tilde encoding + if "%" in request.path: + # Try the same path but with "%" replaced by "~" + # and "~" replaced with "~7E" + # and "." replaced with "~2E" + new_path = ( + request.path.replace("~", "~7E").replace("%", "~").replace(".", "~2E") + ) + if request.query_string: + new_path += "?{}".format(request.query_string) + await asgi_send_redirect(send, new_path) + return # If URL has a trailing slash, redirect to URL without it - path = request.scope.get("raw_path", request.scope["path"].encode("utf8")) + path = request.scope.get( + "raw_path", request.scope["path"].encode("utf8") + ).partition(b"?")[0] context = {} if path.endswith(b"/"): path = path.rstrip(b"/") @@ -1047,17 +2219,28 @@ class DatasetteRouter: await asgi_send_redirect(send, path.decode("latin1")) else: # Is there a pages/* template matching this path? - template_path = ( - os.path.join("pages", *request.scope["path"].split("/")) + ".html" - ) + route_path = request.scope.get("route_path", request.scope["path"]) + # Jinja requires template names to use "/" even on Windows + template_name = "pages" + route_path + ".html" + # Build a list of pages/blah/{name}.html matching expressions + environment = self.ds.get_jinja_environment(request) + pattern_templates = [ + filepath + for filepath in environment.list_templates() + if "{" in filepath and filepath.startswith("pages/") + ] + page_routes = [ + (route_pattern_from_filepath(filepath[len("pages/") :]), filepath) + for filepath in pattern_templates + ] try: - template = self.ds.jinja_env.select_template([template_path]) + template = environment.select_template([template_name]) except TemplateNotFound: template = None if template is None: # Try for a pages/blah/{name}.html template match - for regex, wildcard_template in self.page_routes: - match = regex.match(request.scope["path"]) + for regex, wildcard_template in page_routes: + match = regex.match(route_path) if match is not None: context.update(match.groupdict()) template = wildcard_template @@ -1099,7 +2282,7 @@ class DatasetteRouter: view_name="page", ) except NotFoundExplicit as e: - await self.handle_500(request, send, e) + await self.handle_exception(request, send, e) return # Pull content-type out into separate parameter content_type = "text/html; charset=utf-8" @@ -1114,72 +2297,23 @@ class DatasetteRouter: content_type=content_type, ) else: - await self.handle_500(request, send, exception or NotFound("404")) + await self.handle_exception(request, send, exception or NotFound("404")) - async def handle_500(self, request, send, exception): - if self.ds.pdb: - import pdb + async def handle_exception(self, request, send, exception): + responses = [] + for hook in pm.hook.handle_exception( + datasette=self.ds, + request=request, + exception=exception, + ): + response = await await_me_maybe(hook) + if response is not None: + responses.append(response) - pdb.post_mortem(exception.__traceback__) - - title = None - if isinstance(exception, Forbidden): - status = 403 - info = {} - message = exception.args[0] - # Try the forbidden() plugin hook - for custom_response in pm.hook.forbidden( - datasette=self.ds, request=request, message=message - ): - custom_response = await await_me_maybe(custom_response) - if custom_response is not None: - await custom_response.asgi_send(send) - return - elif isinstance(exception, Base400): - status = exception.status - info = {} - message = exception.args[0] - elif isinstance(exception, DatasetteError): - status = exception.status - info = exception.error_dict - message = exception.message - if exception.messagge_is_html: - message = Markup(message) - title = exception.title - else: - status = 500 - info = {} - message = str(exception) - traceback.print_exc() - templates = ["{}.html".format(status), "error.html"] - info.update( - { - "ok": False, - "error": message, - "status": status, - "title": title, - } - ) - headers = {} - if self.ds.cors: - headers["Access-Control-Allow-Origin"] = "*" - if request.path.split("?")[0].endswith(".json"): - await asgi_send_json(send, info, status=status, headers=headers) - else: - template = self.ds.jinja_env.select_template(templates) - await asgi_send_html( - send, - await template.render_async( - dict( - info, - urls=self.ds.urls, - app_css_hash=self.ds.app_css_hash(), - menu_links=lambda: [], - ) - ), - status=status, - headers=headers, - ) + assert responses, "Default exception handler should have returned something" + # Even if there are multiple responses use just the first one + response = responses[0] + await response.asgi_send(send) _cleaner_task_str_re = re.compile(r"\S*site-packages/") @@ -1193,7 +2327,44 @@ def _cleaner_task_str(task): return _cleaner_task_str_re.sub("", s) -def wrap_view(view_fn, datasette): +def wrap_view(view_fn_or_class, datasette): + is_function = isinstance(view_fn_or_class, types.FunctionType) + if is_function: + return wrap_view_function(view_fn_or_class, datasette) + else: + if not isinstance(view_fn_or_class, type): + raise ValueError("view_fn_or_class must be a function or a class") + return wrap_view_class(view_fn_or_class, datasette) + + +def wrap_view_class(view_class, datasette): + async def async_view_for_class(request, send): + instance = view_class() + if inspect.iscoroutinefunction(instance.__call__): + return await async_call_with_supported_arguments( + instance.__call__, + scope=request.scope, + receive=request.receive, + send=send, + request=request, + datasette=datasette, + ) + else: + return call_with_supported_arguments( + instance.__call__, + scope=request.scope, + receive=request.receive, + send=send, + request=request, + datasette=datasette, + ) + + async_view_for_class.view_class = view_class + return async_view_for_class + + +def wrap_view_function(view_fn, datasette): + @functools.wraps(view_fn) async def async_view_fn(request, send): if inspect.iscoroutinefunction(view_fn): response = await async_call_with_supported_arguments( @@ -1219,7 +2390,23 @@ def wrap_view(view_fn, datasette): return async_view_fn -_curly_re = re.compile(r"(\{.*?\})") +def permanent_redirect(path, forward_query_string=False, forward_rest=False): + return wrap_view( + lambda request, send: Response.redirect( + path + + (request.url_vars["rest"] if forward_rest else "") + + ( + ("?" + request.query_string) + if forward_query_string and request.query_string + else "" + ), + status=301, + ), + datasette=None, + ) + + +_curly_re = re.compile(r"({.*?})") def route_pattern_from_filepath(filepath): @@ -1229,7 +2416,7 @@ def route_pattern_from_filepath(filepath): re_bits = ["/"] for bit in _curly_re.split(filepath): if _curly_re.match(bit): - re_bits.append("(?P<{}>[^/]*)".format(bit[1:-1])) + re_bits.append(f"(?P<{bit[1:-1]}>[^/]*)") else: re_bits.append(re.escape(bit)) return re.compile("^" + "".join(re_bits) + "$") @@ -1240,85 +2427,113 @@ class NotFoundExplicit(NotFound): class DatasetteClient: - def __init__(self, ds): - self.app = ds.app() + """Internal HTTP client for making requests to a Datasette instance. - def _fix(self, path): - if path.startswith("/"): - path = "http://localhost{}".format(path) - return path + Used for testing and for internal operations that need to make HTTP requests + to the Datasette app without going through an actual HTTP server. + """ - async def get(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.get(self._fix(path), **kwargs) - - async def options(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.options(self._fix(path), **kwargs) - - async def head(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.head(self._fix(path), **kwargs) - - async def post(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.post(self._fix(path), **kwargs) - - async def put(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.put(self._fix(path), **kwargs) - - async def patch(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.patch(self._fix(path), **kwargs) - - async def delete(self, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.delete(self._fix(path), **kwargs) - - async def request(self, method, path, **kwargs): - async with httpx.AsyncClient(app=self.app) as client: - return await client.request(method, self._fix(path), **kwargs) - - -class Urls: def __init__(self, ds): self.ds = ds - def path(self, path): + @property + def app(self): + return self.ds.app() + + def actor_cookie(self, actor): + # Utility method, mainly for tests + return self.ds.sign({"a": actor}, "actor") + + def _fix(self, path, avoid_path_rewrites=False): + if not isinstance(path, PrefixedUrlString) and not avoid_path_rewrites: + path = self.ds.urls.path(path) if path.startswith("/"): - path = path[1:] - return self.ds.config("base_url") + path + path = f"http://localhost{path}" + return path - def instance(self): - return self.path("") + async def _request(self, method, path, skip_permission_checks=False, **kwargs): + from datasette.permissions import SkipPermissions - def static(self, path): - return self.path("-/static/{}".format(path)) + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) - def static_plugins(self, plugin, path): - return self.path("-/static-plugins/{}/{}".format(plugin, path)) - - def logout(self): - return self.path("-/logout") - - def database(self, database): - db = self.ds.databases[database] - if self.ds.config("hash_urls") and db.hash: - return self.path("{}-{}".format(database, db.hash[:HASH_LENGTH])) - else: - return self.path(database) - - def table(self, database, table): - return "{}/{}".format(self.database(database), urllib.parse.quote_plus(table)) - - def query(self, database, query): - return "{}/{}".format(self.database(database), urllib.parse.quote_plus(query)) - - def row(self, database, table, row_path): - return "{}/{}".format(self.table(database, table), row_path) - - def row_blob(self, database, table, row_path, column): - return self.table(database, table) + "/{}.blob?_blob_column={}".format( - row_path, urllib.parse.quote_plus(column) + async def get(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "get", path, skip_permission_checks=skip_permission_checks, **kwargs ) + + async def options(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "options", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def head(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "head", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def post(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "post", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def put(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "put", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def patch(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "patch", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def delete(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "delete", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def request(self, method, path, skip_permission_checks=False, **kwargs): + """Make an HTTP request with the specified method. + + Args: + method: HTTP method (e.g., "GET", "POST", "PUT") + path: The path to request + skip_permission_checks: If True, bypass all permission checks for this request + **kwargs: Additional arguments to pass to httpx + + Returns: + httpx.Response: The response from the request + """ + from datasette.permissions import SkipPermissions + + avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) diff --git a/datasette/blob_renderer.py b/datasette/blob_renderer.py index 794b153e..4d8c6bea 100644 --- a/datasette/blob_renderer.py +++ b/datasette/blob_renderer.py @@ -9,10 +9,10 @@ _BLOB_HASH = "_blob_hash" async def render_blob(datasette, database, rows, columns, request, table, view_name): if _BLOB_COLUMN not in request.args: - raise BadRequest("?{}= is required".format(_BLOB_COLUMN)) + raise BadRequest(f"?{_BLOB_COLUMN}= is required") blob_column = request.args[_BLOB_COLUMN] if blob_column not in columns: - raise BadRequest("{} is not a valid column".format(blob_column)) + raise BadRequest(f"{blob_column} is not a valid column") # If ?_blob_hash= provided, use that to select the row - otherwise use first row blob_hash = None @@ -34,15 +34,15 @@ async def render_blob(datasette, database, rows, columns, request, table, view_n filename_bits = [] if table: filename_bits.append(to_css_class(table)) - if "pk_path" in request.url_vars: - filename_bits.append(request.url_vars["pk_path"]) + if "pks" in request.url_vars: + filename_bits.append(request.url_vars["pks"]) filename_bits.append(to_css_class(blob_column)) if blob_hash: filename_bits.append(blob_hash[:6]) filename = "-".join(filename_bits) + ".blob" headers = { "X-Content-Type-Options": "nosniff", - "Content-Disposition": 'attachment; filename="{}"'.format(filename), + "Content-Disposition": f'attachment; filename="{filename}"', } return Response( body=value or b"", diff --git a/datasette/cli.py b/datasette/cli.py index 04d2950b..21420491 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -2,96 +2,156 @@ import asyncio import uvicorn import click from click import formatting +from click.types import CompositeParamType from click_default_group import DefaultGroup +import functools import json import os import pathlib +from runpy import run_module import shutil from subprocess import call import sys -from runpy import run_module +import textwrap import webbrowser -from .app import Datasette, DEFAULT_CONFIG, CONFIG_OPTIONS, pm +from .app import ( + Datasette, + DEFAULT_SETTINGS, + SETTINGS, + SQLITE_LIMIT_ATTACHED, + pm, +) from .utils import ( + LoadExtension, + StartupError, check_connection, + deep_dict_update, + find_spatialite, parse_metadata, ConnectionProblem, SpatialiteConnectionProblem, initial_path_for_datasette, + pairs_to_nested_config, temporary_docker_directory, value_as_boolean, SpatialiteNotFound, StaticMount, ValueAsBooleanError, ) +from .utils.sqlite import sqlite3 from .utils.testing import TestClient from .version import __version__ -class Config(click.ParamType): - name = "config" +def run_sync(coro_func): + """Run an async callable to completion on a fresh event loop.""" + loop = asyncio.new_event_loop() + try: + asyncio.set_event_loop(loop) + return loop.run_until_complete(coro_func()) + finally: + asyncio.set_event_loop(None) + loop.close() + + +# Use Rich for tracebacks if it is installed +try: + from rich.traceback import install + + install(show_locals=True) +except ImportError: + pass + + +class Setting(CompositeParamType): + name = "setting" + arity = 2 def convert(self, config, param, ctx): - if ":" not in config: - self.fail('"{}" should be name:value'.format(config), param, ctx) - return - name, value = config.split(":", 1) - if name not in DEFAULT_CONFIG: - self.fail( - "{} is not a valid option (--help-config to see all)".format(name), - param, - ctx, - ) - return - # Type checking - default = DEFAULT_CONFIG[name] - if isinstance(default, bool): - try: - return name, value_as_boolean(value) - except ValueAsBooleanError: - self.fail( - '"{}" should be on/off/true/false/1/0'.format(name), param, ctx + name, value = config + if name in DEFAULT_SETTINGS: + # For backwards compatibility with how this worked prior to + # Datasette 1.0, we turn bare setting names into setting.name + # Type checking for those older settings + default = DEFAULT_SETTINGS[name] + name = "settings.{}".format(name) + if isinstance(default, bool): + try: + return name, "true" if value_as_boolean(value) else "false" + except ValueAsBooleanError: + self.fail(f'"{name}" should be on/off/true/false/1/0', param, ctx) + elif isinstance(default, int): + if not value.isdigit(): + self.fail(f'"{name}" should be an integer', param, ctx) + return name, value + elif isinstance(default, str): + return name, value + else: + # Should never happen: + self.fail("Invalid option") + return name, value + + +def sqlite_extensions(fn): + fn = click.option( + "sqlite_extensions", + "--load-extension", + type=LoadExtension(), + envvar="DATASETTE_LOAD_EXTENSION", + multiple=True, + help="Path to a SQLite extension to load, and optional entrypoint", + )(fn) + + # Wrap it in a custom error handler + @functools.wraps(fn) + def wrapped(*args, **kwargs): + try: + return fn(*args, **kwargs) + except AttributeError as e: + if "enable_load_extension" in str(e): + raise click.ClickException( + textwrap.dedent( + """ + Your Python installation does not have the ability to load SQLite extensions. + + More information: https://datasette.io/help/extensions + """ + ).strip() ) - return - elif isinstance(default, int): - if not value.isdigit(): - self.fail('"{}" should be an integer'.format(name), param, ctx) - return - return name, int(value) - elif isinstance(default, str): - return name, value - else: - # Should never happen: - self.fail("Invalid option") + raise + + return wrapped @click.group(cls=DefaultGroup, default="serve", default_if_no_args=True) @click.version_option(version=__version__) def cli(): """ - Datasette! + Datasette is an open source multi-tool for exploring and publishing data + + \b + About Datasette: https://datasette.io/ + Full documentation: https://docs.datasette.io/ """ @cli.command() @click.argument("files", type=click.Path(exists=True), nargs=-1) @click.option("--inspect-file", default="-") -@click.option( - "sqlite_extensions", - "--load-extension", - envvar="SQLITE_EXTENSIONS", - multiple=True, - help="Path to a SQLite extension to load", -) +@sqlite_extensions def inspect(files, inspect_file, sqlite_extensions): - app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions) + """ + Generate JSON summary of provided database files + + This can then be passed to "datasette --inspect-file" to speed up count + operations against immutable database files. + """ + inspect_data = run_sync(lambda: inspect_(files, sqlite_extensions)) if inspect_file == "-": - out = sys.stdout + sys.stdout.write(json.dumps(inspect_data, indent=2)) else: - out = open(inspect_file, "w") - loop = asyncio.get_event_loop() - inspect_data = loop.run_until_complete(inspect_(files, sqlite_extensions)) - out.write(json.dumps(inspect_data, indent=2)) + with open(inspect_file, "w") as fp: + fp.write(json.dumps(inspect_data, indent=2)) async def inspect_(files, sqlite_extensions): @@ -113,7 +173,7 @@ async def inspect_(files, sqlite_extensions): @cli.group() def publish(): - "Publish specified SQLite database files to the internet along with a Datasette-powered interface and API" + """Publish specified SQLite database files to the internet along with a Datasette-powered interface and API""" pass @@ -123,15 +183,23 @@ pm.hook.publish_subcommand(publish=publish) @cli.command() @click.option("--all", help="Include built-in default plugins", is_flag=True) +@click.option( + "--requirements", help="Output requirements.txt of installed plugins", is_flag=True +) @click.option( "--plugins-dir", type=click.Path(exists=True, file_okay=False, dir_okay=True), help="Path to directory containing custom plugins", ) -def plugins(all, plugins_dir): - "List currently available plugins" +def plugins(all, requirements, plugins_dir): + """List currently installed plugins""" app = Datasette([], plugins_dir=plugins_dir) - click.echo(json.dumps(app._plugins(all=all), indent=4)) + if requirements: + for plugin in app._plugins(): + if plugin["version"]: + click.echo("{}=={}".format(plugin["name"], plugin["version"])) + else: + click.echo(json.dumps(app._plugins(all=all), indent=4)) @cli.command() @@ -148,7 +216,7 @@ def plugins(all, plugins_dir): help="Path to JSON/YAML file containing metadata to publish", ) @click.option("--extra-options", help="Extra options to pass to datasette serve") -@click.option("--branch", help="Install datasette from a GitHub branch e.g. master") +@click.option("--branch", help="Install datasette from a GitHub branch e.g. main") @click.option( "--template-dir", type=click.Path(exists=True, file_okay=False, dir_okay=True), @@ -180,6 +248,7 @@ def plugins(all, plugins_dir): "-p", "--port", default=8001, + type=click.IntRange(1, 65535), help="Port to run the server on, defaults to 8001", ) @click.option("--title", help="Title for metadata") @@ -203,9 +272,9 @@ def package( version_note, secret, port, - **extra_metadata + **extra_metadata, ): - "Package specified SQLite files into a new datasette Docker container" + """Package SQLite files into a Datasette Docker container""" if not shutil.which("docker"): click.secho( ' The package command requires "docker" to be installed and configured ', @@ -240,15 +309,32 @@ def package( @cli.command() -@click.argument("packages", nargs=-1, required=True) +@click.argument("packages", nargs=-1) @click.option( "-U", "--upgrade", is_flag=True, help="Upgrade packages to latest version" ) -def install(packages, upgrade): - "Install Python packages - e.g. Datasette plugins - into the same environment as Datasette" +@click.option( + "-r", + "--requirement", + type=click.Path(exists=True), + help="Install from requirements file", +) +@click.option( + "-e", + "--editable", + help="Install a project in editable mode from this path", +) +def install(packages, upgrade, requirement, editable): + """Install plugins and packages from PyPI into the same environment as Datasette""" + if not packages and not requirement and not editable: + raise click.UsageError("Please specify at least one package to install") args = ["pip", "install"] if upgrade: args += ["--upgrade"] + if editable: + args += ["--editable", editable] + if requirement: + args += ["-r", requirement] args += list(packages) sys.argv = args run_module("pip", run_name="__main__") @@ -258,13 +344,13 @@ def install(packages, upgrade): @click.argument("packages", nargs=-1, required=True) @click.option("-y", "--yes", is_flag=True, help="Don't ask for confirmation") def uninstall(packages, yes): - "Uninstall Python packages (e.g. plugins) from the Datasette environment" + """Uninstall plugins and Python packages from the Datasette environment""" sys.argv = ["pip", "uninstall"] + list(packages) + (["-y"] if yes else []) run_module("pip", run_name="__main__") @cli.command() -@click.argument("files", type=click.Path(exists=True), nargs=-1) +@click.argument("files", type=click.Path(), nargs=-1) @click.option( "-i", "--immutable", @@ -286,23 +372,22 @@ def uninstall(packages, yes): "-p", "--port", default=8001, + type=click.IntRange(0, 65535), help="Port for server, defaults to 8001. Use -p 0 to automatically assign an available port.", ) +@click.option( + "--uds", + help="Bind to a Unix domain socket", +) @click.option( "--reload", is_flag=True, - help="Automatically reload if database or code change detected - useful for development", + help="Automatically reload if code or metadata change detected - useful for development", ) @click.option( "--cors", is_flag=True, help="Enable CORS by serving Access-Control-Allow-Origin: *" ) -@click.option( - "sqlite_extensions", - "--load-extension", - envvar="SQLITE_EXTENSIONS", - multiple=True, - help="Path to a SQLite extension to load", -) +@sqlite_extensions @click.option( "--inspect-file", help='Path to JSON file created using "datasette inspect"' ) @@ -328,11 +413,19 @@ def uninstall(packages, yes): help="Serve static files from this directory at /MOUNT/...", multiple=True, ) -@click.option("--memory", is_flag=True, help="Make :memory: database available") +@click.option("--memory", is_flag=True, help="Make /_memory database available") @click.option( + "-c", "--config", - type=Config(), - help="Set config option using configname:value docs.datasette.io/en/stable/config.html", + type=click.File(mode="r"), + help="Path to JSON/YAML Datasette configuration file", +) +@click.option( + "-s", + "--setting", + "settings", + type=Setting(), + help="nested.key, value setting to use in Datasette configuration", multiple=True, ) @click.option( @@ -345,12 +438,30 @@ def uninstall(packages, yes): help="Output URL that sets a cookie authenticating the root user", is_flag=True, ) +@click.option( + "--default-deny", + help="Deny all permissions by default", + is_flag=True, +) @click.option( "--get", help="Run an HTTP GET request against this path, print results and exit", ) +@click.option( + "--headers", + is_flag=True, + help="Include HTTP headers in --get output", +) +@click.option( + "--token", + help="API token to send with --get requests", +) +@click.option( + "--actor", + help="Actor to use for --get requests (JSON string)", +) @click.option("--version-note", help="Additional note to show on /-/versions") -@click.option("--help-config", is_flag=True, help="Show available config options") +@click.option("--help-settings", is_flag=True, help="Show available settings") @click.option("--pdb", is_flag=True, help="Launch debugger on any errors") @click.option( "-o", @@ -359,11 +470,42 @@ def uninstall(packages, yes): is_flag=True, help="Open Datasette in your web browser", ) +@click.option( + "--create", + is_flag=True, + help="Create database files if they do not exist", +) +@click.option( + "--crossdb", + is_flag=True, + help="Enable cross-database joins using the /_memory database", +) +@click.option( + "--nolock", + is_flag=True, + help="Ignore locking, open locked files in read-only mode", +) +@click.option( + "--ssl-keyfile", + help="SSL key file", + envvar="DATASETTE_SSL_KEYFILE", +) +@click.option( + "--ssl-certfile", + help="SSL certificate file", + envvar="DATASETTE_SSL_CERTFILE", +) +@click.option( + "--internal", + type=click.Path(), + help="Path to a persistent Datasette internal SQLite database", +) def serve( files, immutable, host, port, + uds, reload, cors, sqlite_extensions, @@ -374,23 +516,34 @@ def serve( static, memory, config, + settings, secret, root, + default_deny, get, + headers, + token, + actor, version_note, - help_config, + help_settings, pdb, open_browser, + create, + crossdb, + nolock, + ssl_keyfile, + ssl_certfile, + internal, return_instance=False, ): """Serve up specified SQLite database files with a web UI""" - if help_config: + if help_settings: formatter = formatting.HelpFormatter() - with formatter.section("Config options"): + with formatter.section("Settings"): formatter.write_dl( [ - (option.name, "{} (default={})".format(option.help, option.default)) - for option in CONFIG_OPTIONS + (option.name, f"{option.help} (default={option.default})") + for option in SETTINGS ] ) click.echo(formatter.getvalue()) @@ -401,58 +554,150 @@ def serve( reloader = hupper.start_reloader("datasette.cli.serve") if immutable: reloader.watch_files(immutable) + if config: + reloader.watch_files([config.name]) if metadata: reloader.watch_files([metadata.name]) inspect_data = None if inspect_file: - inspect_data = json.load(open(inspect_file)) + with open(inspect_file) as fp: + inspect_data = json.load(fp) metadata_data = None if metadata: metadata_data = parse_metadata(metadata.read()) + config_data = None + if config: + config_data = parse_metadata(config.read()) + + config_data = config_data or {} + + # Merge in settings from -s/--setting + if settings: + settings_updates = pairs_to_nested_config(settings) + # Merge recursively, to avoid over-writing nested values + # https://github.com/simonw/datasette/issues/2389 + deep_dict_update(config_data, settings_updates) + kwargs = dict( immutables=immutable, cache_headers=not reload, cors=cors, inspect_data=inspect_data, + config=config_data, metadata=metadata_data, sqlite_extensions=sqlite_extensions, template_dir=template_dir, plugins_dir=plugins_dir, static_mounts=static, - config=dict(config), + settings=None, # These are passed in config= now memory=memory, secret=secret, version_note=version_note, pdb=pdb, + crossdb=crossdb, + nolock=nolock, + internal=internal, + default_deny=default_deny, ) - # if files is a single directory, use that as config_dir= - if 1 == len(files) and os.path.isdir(files[0]): - kwargs["config_dir"] = pathlib.Path(files[0]) - files = [] + # Separate directories from files + directories = [f for f in files if os.path.isdir(f)] + file_paths = [f for f in files if not os.path.isdir(f)] + + # Handle config_dir - only one directory allowed + if len(directories) > 1: + raise click.ClickException( + "Cannot pass multiple directories. Pass a single directory as config_dir." + ) + elif len(directories) == 1: + kwargs["config_dir"] = pathlib.Path(directories[0]) + + # Verify list of files, create if needed (and --create) + for file in file_paths: + if not pathlib.Path(file).exists(): + if create: + sqlite3.connect(file).execute("vacuum") + else: + raise click.ClickException( + "Invalid value for '[FILES]...': Path '{}' does not exist.".format( + file + ) + ) + + # Check for duplicate files by resolving all paths to their absolute forms + # Collect all database files that will be loaded (explicit files + config_dir files) + all_db_files = [] + + # Add explicit files + for file in file_paths: + all_db_files.append((file, pathlib.Path(file).resolve())) + + # Add config_dir databases if config_dir is set + if "config_dir" in kwargs: + config_dir = kwargs["config_dir"] + for ext in ("db", "sqlite", "sqlite3"): + for db_file in config_dir.glob(f"*.{ext}"): + all_db_files.append((str(db_file), db_file.resolve())) + + # Check for duplicates + seen = {} + for original_path, resolved_path in all_db_files: + if resolved_path in seen: + raise click.ClickException( + f"Duplicate database file: '{original_path}' and '{seen[resolved_path]}' " + f"both refer to {resolved_path}" + ) + seen[resolved_path] = original_path + + files = file_paths try: ds = Datasette(files, **kwargs) except SpatialiteNotFound: raise click.ClickException("Could not find SpatiaLite extension") + except StartupError as e: + raise click.ClickException(e.args[0]) if return_instance: # Private utility mechanism for writing unit tests return ds # Run the "startup" plugin hooks - asyncio.get_event_loop().run_until_complete(ds.invoke_startup()) + run_sync(ds.invoke_startup) - # Run async sanity checks - but only if we're not under pytest - asyncio.get_event_loop().run_until_complete(check_databases(ds)) + # Run async soundness checks - but only if we're not under pytest + run_sync(lambda: check_databases(ds)) + + if headers and not get: + raise click.ClickException("--headers can only be used with --get") + + if token and not get: + raise click.ClickException("--token can only be used with --get") if get: client = TestClient(ds) - response = client.get(get) - click.echo(response.text) + request_headers = {} + if token: + request_headers["Authorization"] = "Bearer {}".format(token) + cookies = {} + if actor: + cookies["ds_actor"] = client.actor_cookie(json.loads(actor)) + response = client.get(get, headers=request_headers, cookies=cookies) + + if headers: + # Output HTTP status code, headers, two newlines, then the response body + click.echo(f"HTTP/1.1 {response.status}") + for key, value in response.headers.items(): + click.echo(f"{key}: {value}") + if response.text: + click.echo() + click.echo(response.text) + else: + click.echo(response.text) + exit_code = 0 if response.status == 200 else 1 sys.exit(exit_code) return @@ -460,21 +705,155 @@ def serve( # Start the server url = None if root: + ds.root_enabled = True url = "http://{}:{}{}?token={}".format( host, port, ds.urls.path("-/auth-token"), ds._root_token ) - print(url) + click.echo(url) if open_browser: if url is None: # Figure out most convenient URL - to table, database or homepage - path = asyncio.get_event_loop().run_until_complete( - initial_path_for_datasette(ds) - ) - url = "http://{}:{}{}".format(host, port, path) + path = run_sync(lambda: initial_path_for_datasette(ds)) + url = f"http://{host}:{port}{path}" webbrowser.open(url) - uvicorn.run( - ds.app(), host=host, port=port, log_level="info", lifespan="on", workers=1 + uvicorn_kwargs = dict( + host=host, port=port, log_level="info", lifespan="on", workers=1 ) + if uds: + uvicorn_kwargs["uds"] = uds + if ssl_keyfile: + uvicorn_kwargs["ssl_keyfile"] = ssl_keyfile + if ssl_certfile: + uvicorn_kwargs["ssl_certfile"] = ssl_certfile + uvicorn.run(ds.app(), **uvicorn_kwargs) + + +@cli.command() +@click.argument("id") +@click.option( + "--secret", + help="Secret used for signing the API tokens", + envvar="DATASETTE_SECRET", + required=True, +) +@click.option( + "-e", + "--expires-after", + help="Token should expire after this many seconds", + type=int, +) +@click.option( + "alls", + "-a", + "--all", + type=str, + metavar="ACTION", + multiple=True, + help="Restrict token to this action", +) +@click.option( + "databases", + "-d", + "--database", + type=(str, str), + metavar="DB ACTION", + multiple=True, + help="Restrict token to this action on this database", +) +@click.option( + "resources", + "-r", + "--resource", + type=(str, str, str), + metavar="DB RESOURCE ACTION", + multiple=True, + help="Restrict token to this action on this database resource (a table, SQL view or named query)", +) +@click.option( + "--debug", + help="Show decoded token", + is_flag=True, +) +@click.option( + "--plugins-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to directory containing custom plugins", +) +def create_token( + id, secret, expires_after, alls, databases, resources, debug, plugins_dir +): + """ + Create a signed API token for the specified actor ID + + Example: + + datasette create-token root --secret mysecret + + To allow only "view-database-download" for all databases: + + \b + datasette create-token root --secret mysecret \\ + --all view-database-download + + To allow "create-table" against a specific database: + + \b + datasette create-token root --secret mysecret \\ + --database mydb create-table + + To allow "insert-row" against a specific table: + + \b + datasette create-token root --secret myscret \\ + --resource mydb mytable insert-row + + Restricted actions can be specified multiple times using + multiple --all, --database, and --resource options. + + Add --debug to see a decoded version of the token. + """ + ds = Datasette(secret=secret, plugins_dir=plugins_dir) + + # Run ds.invoke_startup() in an event loop + run_sync(ds.invoke_startup) + + # Warn about any unknown actions + actions = [] + actions.extend(alls) + actions.extend([p[1] for p in databases]) + actions.extend([p[2] for p in resources]) + for action in actions: + if not ds.actions.get(action): + click.secho( + f" Unknown permission: {action} ", + fg="red", + err=True, + ) + + restrict_database = {} + for database, action in databases: + restrict_database.setdefault(database, []).append(action) + restrict_resource = {} + for database, resource, action in resources: + restrict_resource.setdefault(database, {}).setdefault(resource, []).append( + action + ) + + token = ds.create_token( + id, + expires_after=expires_after, + restrict_all=alls, + restrict_database=restrict_database, + restrict_resource=restrict_resource, + ) + click.echo(token) + if debug: + encoded = token[len("dstok_") :] + click.echo("\nDecoded:\n") + click.echo(json.dumps(ds.unsign(encoded, namespace="token"), indent=2)) + + +pm.hook.register_commands(cli=cli) async def check_databases(ds): @@ -484,14 +863,31 @@ async def check_databases(ds): try: await database.execute_fn(check_connection) except SpatialiteConnectionProblem: + suggestion = "" + try: + find_spatialite() + suggestion = "\n\nTry adding the --load-extension=spatialite option." + except SpatialiteNotFound: + pass raise click.UsageError( "It looks like you're trying to load a SpatiaLite" - " database without first loading the SpatiaLite module." - "\n\nRead more: https://docs.datasette.io/en/stable/spatialite.html" + + " database without first loading the SpatiaLite module." + + suggestion + + "\n\nRead more: https://docs.datasette.io/en/stable/spatialite.html" ) except ConnectionProblem as e: raise click.UsageError( - "Connection to {} failed check: {}".format( - database.path, str(e.args[0]) - ) + f"Connection to {database.path} failed check: {str(e.args[0])}" ) + # If --crossdb and more than SQLITE_LIMIT_ATTACHED show warning + if ( + ds.crossdb + and len([db for db in ds.databases.values() if not db.is_memory]) + > SQLITE_LIMIT_ATTACHED + ): + msg = ( + "Warning: --crossdb only works with the first {} attached databases".format( + SQLITE_LIMIT_ATTACHED + ) + ) + click.echo(click.style(msg, bold=True, fg="yellow"), err=True) diff --git a/datasette/database.py b/datasette/database.py index a9f39253..e5858128 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -1,8 +1,10 @@ import asyncio -import contextlib +from collections import namedtuple from pathlib import Path import janus import queue +import sqlite_utils +import sys import threading import uuid @@ -13,71 +15,212 @@ from .utils import ( detect_spatialite, get_all_foreign_keys, get_outbound_foreign_keys, + md5_not_usedforsecurity, sqlite_timelimit, sqlite3, table_columns, table_column_details, ) +from .utils.sqlite import sqlite_version from .inspect import inspect_hash connections = threading.local() +AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file")) + class Database: - def __init__(self, ds, path=None, is_mutable=False, is_memory=False): + # For table counts stop at this many rows: + count_limit = 10000 + _thread_local_id_counter = 1 + + def __init__( + self, + ds, + path=None, + is_mutable=True, + is_memory=False, + memory_name=None, + mode=None, + ): + self.name = None + self._thread_local_id = f"x{self._thread_local_id_counter}" + Database._thread_local_id_counter += 1 + self.route = None self.ds = ds self.path = path self.is_mutable = is_mutable self.is_memory = is_memory - self.hash = None + self.memory_name = memory_name + if memory_name is not None: + self.is_memory = True + self.cached_hash = None self.cached_size = None - self.cached_table_counts = None + self._cached_table_counts = None self._write_thread = None self._write_queue = None - if not self.is_mutable and not self.is_memory: - p = Path(path) - self.hash = inspect_hash(p) - self.cached_size = p.stat().st_size - # Maybe use self.ds.inspect_data to populate cached_table_counts - if self.ds.inspect_data and self.ds.inspect_data.get(self.name): - self.cached_table_counts = { - key: value["count"] - for key, value in self.ds.inspect_data[self.name]["tables"].items() - } + # These are used when in non-threaded mode: + self._read_connection = None + self._write_connection = None + # This is used to track all file connections so they can be closed + self._all_file_connections = [] + self.mode = mode + + @property + def cached_table_counts(self): + if self._cached_table_counts is not None: + return self._cached_table_counts + # Maybe use self.ds.inspect_data to populate cached_table_counts + if self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self._cached_table_counts = { + key: value["count"] + for key, value in self.ds.inspect_data[self.name]["tables"].items() + } + return self._cached_table_counts + + @property + def color(self): + if self.hash: + return self.hash[:6] + return md5_not_usedforsecurity(self.name)[:6] + + def suggest_name(self): + if self.path: + return Path(self.path).stem + elif self.memory_name: + return self.memory_name + else: + return "db" def connect(self, write=False): + extra_kwargs = {} + if write: + extra_kwargs["isolation_level"] = "IMMEDIATE" + if self.memory_name: + uri = "file:{}?mode=memory&cache=shared".format(self.memory_name) + conn = sqlite3.connect( + uri, uri=True, check_same_thread=False, **extra_kwargs + ) + if not write: + conn.execute("PRAGMA query_only=1") + return conn if self.is_memory: - return sqlite3.connect(":memory:") + return sqlite3.connect(":memory:", uri=True) + # mode=ro or immutable=1? if self.is_mutable: qs = "?mode=ro" + if self.ds.nolock: + qs += "&nolock=1" else: qs = "?immutable=1" assert not (write and not self.is_mutable) if write: qs = "" - return sqlite3.connect( - "file:{}{}".format(self.path, qs), uri=True, check_same_thread=False + if self.mode is not None: + qs = f"?mode={self.mode}" + conn = sqlite3.connect( + f"file:{self.path}{qs}", uri=True, check_same_thread=False, **extra_kwargs ) + self._all_file_connections.append(conn) + return conn - async def execute_write(self, sql, params=None, block=False): + def close(self): + # Close all connections - useful to avoid running out of file handles in tests + for connection in self._all_file_connections: + connection.close() + + async def execute_write(self, sql, params=None, block=True): def _inner(conn): - with conn: - return conn.execute(sql, params or []) + return conn.execute(sql, params or []) - return await self.execute_write_fn(_inner, block=block) + with trace("sql", database=self.name, sql=sql.strip(), params=params): + results = await self.execute_write_fn(_inner, block=block) + return results - async def execute_write_fn(self, fn, block=False): - task_id = uuid.uuid5(uuid.NAMESPACE_DNS, "datasette.io") + async def execute_write_script(self, sql, block=True): + def _inner(conn): + return conn.executescript(sql) + + with trace("sql", database=self.name, sql=sql.strip(), executescript=True): + results = await self.execute_write_fn( + _inner, block=block, transaction=False + ) + return results + + async def execute_write_many(self, sql, params_seq, block=True): + def _inner(conn): + count = 0 + + def count_params(params): + nonlocal count + for param in params: + count += 1 + yield param + + return conn.executemany(sql, count_params(params_seq)), count + + with trace( + "sql", database=self.name, sql=sql.strip(), executemany=True + ) as kwargs: + results, count = await self.execute_write_fn(_inner, block=block) + kwargs["count"] = count + return results + + async def execute_isolated_fn(self, fn): + # Open a new connection just for the duration of this function + # blocking the write queue to avoid any writes occurring during it + if self.ds.executor is None: + # non-threaded mode + isolated_connection = self.connect(write=True) + try: + result = fn(isolated_connection) + finally: + isolated_connection.close() + try: + self._all_file_connections.remove(isolated_connection) + except ValueError: + # Was probably a memory connection + pass + return result + else: + # Threaded mode - send to write thread + return await self._send_to_write_thread(fn, isolated_connection=True) + + async def execute_write_fn(self, fn, block=True, transaction=True): + if self.ds.executor is None: + # non-threaded mode + if self._write_connection is None: + self._write_connection = self.connect(write=True) + self.ds._prepare_connection(self._write_connection, self.name) + if transaction: + with self._write_connection: + return fn(self._write_connection) + else: + return fn(self._write_connection) + else: + return await self._send_to_write_thread( + fn, block=block, transaction=transaction + ) + + async def _send_to_write_thread( + self, fn, block=True, isolated_connection=False, transaction=True + ): if self._write_queue is None: self._write_queue = queue.Queue() if self._write_thread is None: self._write_thread = threading.Thread( target=self._execute_writes, daemon=True ) + self._write_thread.name = "_execute_writes for database {}".format( + self.name + ) self._write_thread.start() + task_id = uuid.uuid5(uuid.NAMESPACE_DNS, "datasette.io") reply_queue = janus.Queue() - self._write_queue.put(WriteTask(fn, task_id, reply_queue)) + self._write_queue.put( + WriteTask(fn, task_id, reply_queue, isolated_connection, transaction) + ) if block: result = await reply_queue.async_q.get() if isinstance(result, Exception): @@ -94,6 +237,7 @@ class Database: conn = None try: conn = self.connect(write=True) + self.ds._prepare_connection(conn, self.name) except Exception as e: conn_exception = e while True: @@ -101,20 +245,49 @@ class Database: if conn_exception is not None: result = conn_exception else: - try: - result = task.fn(conn) - except Exception as e: - print(e) - result = e + if task.isolated_connection: + isolated_connection = self.connect(write=True) + try: + result = task.fn(isolated_connection) + except Exception as e: + sys.stderr.write("{}\n".format(e)) + sys.stderr.flush() + result = e + finally: + isolated_connection.close() + try: + self._all_file_connections.remove(isolated_connection) + except ValueError: + # Was probably a memory connection + pass + else: + try: + if task.transaction: + with conn: + result = task.fn(conn) + else: + result = task.fn(conn) + except Exception as e: + sys.stderr.write("{}\n".format(e)) + sys.stderr.flush() + result = e task.reply_queue.sync_q.put(result) async def execute_fn(self, fn): + if self.ds.executor is None: + # non-threaded mode + if self._read_connection is None: + self._read_connection = self.connect() + self.ds._prepare_connection(self._read_connection, self.name) + return fn(self._read_connection) + + # threaded mode def in_thread(): - conn = getattr(connections, self.name, None) + conn = getattr(connections, self._thread_local_id, None) if not conn: conn = self.connect() self.ds._prepare_connection(conn, self.name) - setattr(connections, self.name, conn) + setattr(connections, self._thread_local_id, conn) return fn(conn) return await asyncio.get_event_loop().run_in_executor( @@ -156,11 +329,12 @@ class Database: if e.args == ("interrupted",): raise QueryInterrupted(e, sql, params) if log_sql_errors: - print( - "ERROR: conn={}, sql = {}, params = {}: {}".format( + sys.stderr.write( + "ERROR: conn={}, sql = {}, params = {}: {}\n".format( conn, repr(sql), params, e ) ) + sys.stderr.flush() raise if truncate: @@ -173,14 +347,34 @@ class Database: results = await self.execute_fn(sql_operation_in_thread) return results + @property + def hash(self): + if self.cached_hash is not None: + return self.cached_hash + elif self.is_mutable or self.is_memory: + return None + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_hash = self.ds.inspect_data[self.name]["hash"] + return self.cached_hash + else: + p = Path(self.path) + self.cached_hash = inspect_hash(p) + return self.cached_hash + @property def size(self): - if self.is_memory: - return 0 if self.cached_size is not None: return self.cached_size - else: + elif self.is_memory: + return 0 + elif self.is_mutable: return Path(self.path).stat().st_size + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_size = self.ds.inspect_data[self.name]["size"] + return self.cached_size + else: + self.cached_size = Path(self.path).stat().st_size + return self.cached_size async def table_counts(self, limit=10): if not self.is_mutable and self.cached_table_counts is not None: @@ -191,7 +385,7 @@ class Database: try: table_count = ( await self.execute( - "select count(*) from [{}]".format(table), + f"select count(*) from (select * from [{table}] limit {self.count_limit + 1})", custom_time_limit=limit, ) ).rows[0][0] @@ -201,7 +395,7 @@ class Database: except (QueryInterrupted, sqlite3.OperationalError, sqlite3.DatabaseError): counts[table] = None if not self.is_mutable: - self.cached_table_counts = counts + self._cached_table_counts = counts return counts @property @@ -210,12 +404,18 @@ class Database: return None return Path(self.path).stat().st_mtime_ns - @property - def name(self): - if self.is_memory: - return ":memory:" - else: - return Path(self.path).stem + async def attached_databases(self): + # This used to be: + # select seq, name, file from pragma_database_list() where seq > 0 + # But SQLite prior to 3.16.0 doesn't support pragma functions + results = await self.execute("PRAGMA database_list;") + # {'seq': 0, 'name': 'main', 'file': ''} + return [ + AttachedDatabase(*row) + for row in results.rows + # Filter out the SQLite internal "temp" database, refs #2557 + if row["seq"] > 0 and row["name"] != "temp" + ] async def table_exists(self, table): results = await self.execute( @@ -223,6 +423,12 @@ class Database: ) return bool(results.rows) + async def view_exists(self, table): + results = await self.execute( + "select 1 from sqlite_master where type='view' and name=?", params=(table,) + ) + return bool(results.rows) + async def table_names(self): results = await self.execute( "select name from sqlite_master where type='table'" @@ -242,21 +448,48 @@ class Database: return await self.execute_fn(lambda conn: detect_fts(conn, table)) async def label_column_for_table(self, table): - explicit_label_column = self.ds.table_metadata(self.name, table).get( + explicit_label_column = (await self.ds.table_config(self.name, table)).get( "label_column" ) if explicit_label_column: return explicit_label_column - # If a table has two columns, one of which is ID, then label_column is the other one - column_names = await self.execute_fn(lambda conn: table_columns(conn, table)) + + def column_details(conn): + # Returns {column_name: (type, is_unique)} + db = sqlite_utils.Database(conn) + columns = db[table].columns_dict + indexes = db[table].indexes + details = {} + for name in columns: + is_unique = any( + index + for index in indexes + if index.columns == [name] and index.unique + ) + details[name] = (columns[name], is_unique) + return details + + column_details = await self.execute_fn(column_details) + # Is there just one unique column that's text? + unique_text_columns = [ + name + for name, (type_, is_unique) in column_details.items() + if is_unique and type_ is str + ] + if len(unique_text_columns) == 1: + return unique_text_columns[0] + + column_names = list(column_details.keys()) # Is there a name or title column? - name_or_title = [c for c in column_names if c in ("name", "title")] + name_or_title = [c for c in column_names if c.lower() in ("name", "title")] if name_or_title: return name_or_title[0] + # If a table has two columns, one of which is ID, then label_column is the other one if ( column_names and len(column_names) == 2 and ("id" in column_names or "pk" in column_names) + and not set(column_names) == {"id", "pk"} ): return [c for c in column_names if c not in ("id", "pk")][0] # Couldn't find a label: @@ -268,19 +501,107 @@ class Database: ) async def hidden_table_names(self): - # Mark tables 'hidden' if they relate to FTS virtual tables - hidden_tables = [ - r[0] - for r in ( - await self.execute( + hidden_tables = [] + # Add any tables marked as hidden in config + db_config = self.ds.config.get("databases", {}).get(self.name, {}) + if "tables" in db_config: + hidden_tables += [ + t for t in db_config["tables"] if db_config["tables"][t].get("hidden") + ] + + if sqlite_version()[1] >= 37: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + with shadow_tables as ( + select name + from pragma_table_list + where [type] = 'shadow' + order by name + ), + core_tables as ( + select name + from sqlite_master + WHERE name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + combined as ( + select name from shadow_tables + union all + select name from core_tables + ) + select name from combined order by 1 """ - select name from sqlite_master - where rootpage = 0 - and sql like '%VIRTUAL TABLE%USING FTS%' - """ ) - ).rows + ] + else: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + WITH base AS ( + SELECT name + FROM sqlite_master + WHERE name IN ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + fts_suffixes AS ( + SELECT column1 AS suffix + FROM (VALUES ('_data'), ('_idx'), ('_docsize'), ('_content'), ('_config')) + ), + fts5_names AS ( + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%USING FTS%' + ), + fts5_shadow_tables AS ( + SELECT + printf('%s%s', fts5_names.name, fts_suffixes.suffix) AS name + FROM fts5_names + JOIN fts_suffixes + ), + fts3_suffixes AS ( + SELECT column1 AS suffix + FROM (VALUES ('_content'), ('_segdir'), ('_segments'), ('_stat'), ('_docsize')) + ), + fts3_names AS ( + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%USING FTS3%' + OR sql LIKE '%VIRTUAL TABLE%USING FTS4%' + ), + fts3_shadow_tables AS ( + SELECT + printf('%s%s', fts3_names.name, fts3_suffixes.suffix) AS name + FROM fts3_names + JOIN fts3_suffixes + ), + final AS ( + SELECT name FROM base + UNION ALL + SELECT name FROM fts5_shadow_tables + UNION ALL + SELECT name FROM fts3_shadow_tables + ) + SELECT name FROM final ORDER BY 1 + """ + ) + ] + # Also hide any FTS tables that have a content= argument + hidden_tables += [ + x[0] + for x in await self.execute( + """ + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%' + AND sql LIKE '%USING FTS%' + AND sql LIKE '%content=%' + """ + ) ] + has_spatialite = await self.execute_fn(detect_spatialite) if has_spatialite: # Also hide Spatialite internal tables @@ -294,6 +615,9 @@ class Database: "sqlite_sequence", "views_geometry_columns", "virts_geometry_columns", + "data_licenses", + "KNN", + "KNN2", ] + [ r[0] for r in ( @@ -306,21 +630,6 @@ class Database: ) ).rows ] - # Add any from metadata.json - db_metadata = self.ds.metadata(database=self.name) - if "tables" in db_metadata: - hidden_tables += [ - t - for t in db_metadata["tables"] - if db_metadata["tables"][t].get("hidden") - ] - # Also mark as hidden any tables which start with the name of a hidden table - # e.g. "searchable_fts" implies "searchable_fts_content" should be hidden - for table_name in await self.table_names(): - for hidden_table in hidden_tables[:]: - if table_name.startswith(hidden_table): - hidden_tables.append(table_name) - continue return hidden_tables @@ -362,26 +671,34 @@ class Database: if self.is_memory: tags.append("memory") if self.hash: - tags.append("hash={}".format(self.hash)) + tags.append(f"hash={self.hash}") if self.size is not None: - tags.append("size={}".format(self.size)) + tags.append(f"size={self.size}") tags_str = "" if tags: - tags_str = " ({})".format(", ".join(tags)) - return "".format(self.name, tags_str) + tags_str = f" ({', '.join(tags)})" + return f"" class WriteTask: - __slots__ = ("fn", "task_id", "reply_queue") + __slots__ = ("fn", "task_id", "reply_queue", "isolated_connection", "transaction") - def __init__(self, fn, task_id, reply_queue): + def __init__(self, fn, task_id, reply_queue, isolated_connection, transaction): self.fn = fn self.task_id = task_id self.reply_queue = reply_queue + self.isolated_connection = isolated_connection + self.transaction = transaction class QueryInterrupted(Exception): - pass + def __init__(self, e, sql, params): + self.e = e + self.sql = sql + self.params = params + + def __str__(self): + return "QueryInterrupted: {}".format(self.e) class MultipleValues(Exception): @@ -410,6 +727,9 @@ class Results: else: raise MultipleValues + def dicts(self): + return [dict(row) for row in self.rows] + def __iter__(self): return iter(self.rows) diff --git a/datasette/default_actions.py b/datasette/default_actions.py new file mode 100644 index 00000000..87d98fac --- /dev/null +++ b/datasette/default_actions.py @@ -0,0 +1,101 @@ +from datasette import hookimpl +from datasette.permissions import Action +from datasette.resources import ( + DatabaseResource, + TableResource, + QueryResource, +) + + +@hookimpl +def register_actions(): + """Register the core Datasette actions.""" + return ( + # Global actions (no resource_class) + Action( + name="view-instance", + abbr="vi", + description="View Datasette instance", + ), + Action( + name="permissions-debug", + abbr="pd", + description="Access permission debug tool", + ), + Action( + name="debug-menu", + abbr="dm", + description="View debug menu items", + ), + # Database-level actions (parent-level) + Action( + name="view-database", + abbr="vd", + description="View database", + resource_class=DatabaseResource, + ), + Action( + name="view-database-download", + abbr="vdd", + description="Download database file", + resource_class=DatabaseResource, + also_requires="view-database", + ), + Action( + name="execute-sql", + abbr="es", + description="Execute read-only SQL queries", + resource_class=DatabaseResource, + also_requires="view-database", + ), + Action( + name="create-table", + abbr="ct", + description="Create tables", + resource_class=DatabaseResource, + ), + # Table-level actions (child-level) + Action( + name="view-table", + abbr="vt", + description="View table", + resource_class=TableResource, + ), + Action( + name="insert-row", + abbr="ir", + description="Insert rows", + resource_class=TableResource, + ), + Action( + name="delete-row", + abbr="dr", + description="Delete rows", + resource_class=TableResource, + ), + Action( + name="update-row", + abbr="ur", + description="Update rows", + resource_class=TableResource, + ), + Action( + name="alter-table", + abbr="at", + description="Alter tables", + resource_class=TableResource, + ), + Action( + name="drop-table", + abbr="dt", + description="Drop tables", + resource_class=TableResource, + ), + # Query-level actions (child-level) + Action( + name="view-query", + abbr="vq", + description="View named query results", + resource_class=QueryResource, + ), + ) diff --git a/datasette/default_magic_parameters.py b/datasette/default_magic_parameters.py index 0f8f397e..91c1c5aa 100644 --- a/datasette/default_magic_parameters.py +++ b/datasette/default_magic_parameters.py @@ -1,5 +1,4 @@ from datasette import hookimpl -from datasette.utils import escape_fts import datetime import os import time @@ -25,9 +24,12 @@ def now(key, request): if key == "epoch": return int(time.time()) elif key == "date_utc": - return datetime.datetime.utcnow().date().isoformat() + return datetime.datetime.now(datetime.timezone.utc).date().isoformat() elif key == "datetime_utc": - return datetime.datetime.utcnow().strftime(r"%Y-%m-%dT%H:%M:%S") + "Z" + return ( + datetime.datetime.now(datetime.timezone.utc).strftime(r"%Y-%m-%dT%H:%M:%S") + + "Z" + ) else: raise KeyError diff --git a/datasette/default_menu_links.py b/datasette/default_menu_links.py index 11374fb5..85032387 100644 --- a/datasette/default_menu_links.py +++ b/datasette/default_menu_links.py @@ -3,7 +3,10 @@ from datasette import hookimpl @hookimpl def menu_links(datasette, actor): - if actor and actor.get("id") == "root": + async def inner(): + if not await datasette.allowed(action="debug-menu", actor=actor): + return [] + return [ {"href": datasette.urls.path("/-/databases"), "label": "Databases"}, { @@ -15,12 +18,8 @@ def menu_links(datasette, actor): "label": "Version info", }, { - "href": datasette.urls.path("/-/metadata"), - "label": "Metadata", - }, - { - "href": datasette.urls.path("/-/config"), - "label": "Config", + "href": datasette.urls.path("/-/settings"), + "label": "Settings", }, { "href": datasette.urls.path("/-/permissions"), @@ -38,3 +37,5 @@ def menu_links(datasette, actor): {"href": datasette.urls.path("/-/actor"), "label": "Debug actor"}, {"href": datasette.urls.path("/-/patterns"), "label": "Pattern portfolio"}, ] + + return inner diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py deleted file mode 100644 index ddd45940..00000000 --- a/datasette/default_permissions.py +++ /dev/null @@ -1,45 +0,0 @@ -from datasette import hookimpl -from datasette.utils import actor_matches_allow - - -@hookimpl(tryfirst=True) -def permission_allowed(datasette, actor, action, resource): - async def inner(): - if action == "permissions-debug": - if actor and actor.get("id") == "root": - return True - elif action == "view-instance": - allow = datasette.metadata("allow") - if allow is not None: - return actor_matches_allow(actor, allow) - elif action == "view-database": - database_allow = datasette.metadata("allow", database=resource) - if database_allow is None: - return None - return actor_matches_allow(actor, database_allow) - elif action == "view-table": - database, table = resource - tables = datasette.metadata("tables", database=database) or {} - table_allow = (tables.get(table) or {}).get("allow") - if table_allow is None: - return None - return actor_matches_allow(actor, table_allow) - elif action == "view-query": - # Check if this query has a "allow" block in metadata - database, query_name = resource - query = await datasette.get_canned_query(database, query_name, actor) - assert query is not None - allow = query.get("allow") - if allow is None: - return None - return actor_matches_allow(actor, allow) - elif action == "execute-sql": - # Use allow_sql block from database block, or from top-level - database_allow_sql = datasette.metadata("allow_sql", database=resource) - if database_allow_sql is None: - database_allow_sql = datasette.metadata("allow_sql") - if database_allow_sql is None: - return None - return actor_matches_allow(actor, database_allow_sql) - - return inner diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py new file mode 100644 index 00000000..4c82d705 --- /dev/null +++ b/datasette/default_permissions/__init__.py @@ -0,0 +1,59 @@ +""" +Default permission implementations for Datasette. + +This module provides the built-in permission checking logic through implementations +of the permission_resources_sql hook. The hooks are organized by their purpose: + +1. Actor Restrictions - Enforces _r allowlists embedded in actor tokens +2. Root User - Grants full access when --root flag is used +3. Config Rules - Applies permissions from datasette.yaml +4. Default Settings - Enforces default_allow_sql and default view permissions + +IMPORTANT: These hooks return PermissionSQL objects that are combined using SQL +UNION/INTERSECT operations. The order of evaluation is: + - restriction_sql fields are INTERSECTed (all must match) + - Regular sql fields are UNIONed and evaluated with cascading priority +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl + +# Re-export all hooks and public utilities +from .restrictions import ( + actor_restrictions_sql, + restrictions_allow_action, + ActorRestrictions, +) +from .root import root_user_permissions_sql +from .config import config_permissions_sql +from .defaults import ( + default_allow_sql_check, + default_action_permissions_sql, + DEFAULT_ALLOW_ACTIONS, +) +from .tokens import actor_from_signed_api_token + + +@hookimpl +def skip_csrf(scope) -> Optional[bool]: + """Skip CSRF check for JSON content-type requests.""" + if scope["type"] == "http": + headers = scope.get("headers") or {} + if dict(headers).get(b"content-type") == b"application/json": + return True + return None + + +@hookimpl +def canned_queries(datasette: "Datasette", database: str, actor) -> dict: + """Return canned queries defined in datasette.yaml configuration.""" + queries = ( + ((datasette.config or {}).get("databases") or {}).get(database) or {} + ).get("queries") or {} + return queries diff --git a/datasette/default_permissions/config.py b/datasette/default_permissions/config.py new file mode 100644 index 00000000..aab87c1c --- /dev/null +++ b/datasette/default_permissions/config.py @@ -0,0 +1,442 @@ +""" +Config-based permission handling for Datasette. + +Applies permission rules from datasette.yaml configuration. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL +from datasette.utils import actor_matches_allow + +from .helpers import PermissionRowCollector, get_action_name_variants + + +class ConfigPermissionProcessor: + """ + Processes permission rules from datasette.yaml configuration. + + Configuration structure: + + permissions: # Root-level permissions block + view-instance: + id: admin + + databases: + mydb: + permissions: # Database-level permissions + view-database: + id: admin + allow: # Database-level allow block (for view-*) + id: viewer + allow_sql: # execute-sql allow block + id: analyst + tables: + users: + permissions: # Table-level permissions + view-table: + id: admin + allow: # Table-level allow block + id: viewer + queries: + my_query: + permissions: # Query-level permissions + view-query: + id: admin + allow: # Query-level allow block + id: viewer + """ + + def __init__( + self, + datasette: "Datasette", + actor: Optional[dict], + action: str, + ): + self.datasette = datasette + self.actor = actor + self.action = action + self.config = datasette.config or {} + self.collector = PermissionRowCollector(prefix="cfg") + + # Pre-compute action variants + self.action_checks = get_action_name_variants(datasette, action) + self.action_obj = datasette.actions.get(action) + + # Parse restrictions if present + self.has_restrictions = actor and "_r" in actor if actor else False + self.restrictions = actor.get("_r", {}) if actor else {} + + # Pre-compute restriction info for efficiency + self.restricted_databases: Set[str] = set() + self.restricted_tables: Set[Tuple[str, str]] = set() + + if self.has_restrictions: + self.restricted_databases = { + db_name + for db_name, db_actions in (self.restrictions.get("d") or {}).items() + if self.action_checks.intersection(db_actions) + } + self.restricted_tables = { + (db_name, table_name) + for db_name, tables in (self.restrictions.get("r") or {}).items() + for table_name, table_actions in tables.items() + if self.action_checks.intersection(table_actions) + } + # Tables implicitly reference their parent databases + self.restricted_databases.update(db for db, _ in self.restricted_tables) + + def evaluate_allow_block(self, allow_block: Any) -> Optional[bool]: + """Evaluate an allow block against the current actor.""" + if allow_block is None: + return None + return actor_matches_allow(self.actor, allow_block) + + def is_in_restriction_allowlist( + self, + parent: Optional[str], + child: Optional[str], + ) -> bool: + """Check if resource is allowed by actor restrictions.""" + if not self.has_restrictions: + return True # No restrictions, all resources allowed + + # Check global allowlist + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + + # Check database-level allowlist + if parent and self.action_checks.intersection( + self.restrictions.get("d", {}).get(parent, []) + ): + return True + + # Check table-level allowlist + if parent: + table_restrictions = (self.restrictions.get("r", {}) or {}).get(parent, {}) + if child: + table_actions = table_restrictions.get(child, []) + if self.action_checks.intersection(table_actions): + return True + else: + # Parent query should proceed if any child in this database is allowlisted + for table_actions in table_restrictions.values(): + if self.action_checks.intersection(table_actions): + return True + + # Parent/child both None: include if any restrictions exist for this action + if parent is None and child is None: + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + if self.restricted_databases: + return True + if self.restricted_tables: + return True + + return False + + def add_permissions_rule( + self, + parent: Optional[str], + child: Optional[str], + permissions_block: Optional[dict], + scope_desc: str, + ) -> None: + """Add a rule from a permissions:{action} block.""" + if permissions_block is None: + return + + action_allow_block = permissions_block.get(self.action) + result = self.evaluate_allow_block(action_allow_block) + + self.collector.add( + parent=parent, + child=child, + allow=result, + reason=f"config {'allow' if result else 'deny'} {scope_desc}", + if_not_none=True, + ) + + def add_allow_block_rule( + self, + parent: Optional[str], + child: Optional[str], + allow_block: Any, + scope_desc: str, + ) -> None: + """ + Add rules from an allow:{} block. + + For allow blocks, if the block exists but doesn't match the actor, + this is treated as a deny. We also handle the restriction-gate logic. + """ + if allow_block is None: + return + + # Skip if resource is not in restriction allowlist + if not self.is_in_restriction_allowlist(parent, child): + return + + result = self.evaluate_allow_block(allow_block) + bool_result = bool(result) + + self.collector.add( + parent, + child, + bool_result, + f"config {'allow' if result else 'deny'} {scope_desc}", + ) + + # Handle restriction-gate: add explicit denies for restricted resources + self._add_restriction_gate_denies(parent, child, bool_result, scope_desc) + + def _add_restriction_gate_denies( + self, + parent: Optional[str], + child: Optional[str], + is_allowed: bool, + scope_desc: str, + ) -> None: + """ + When a config rule denies at a higher level, add explicit denies + for restricted resources to prevent child-level allows from + incorrectly granting access. + """ + if is_allowed or child is not None or not self.has_restrictions: + return + + if not self.action_obj: + return + + reason = f"config deny {scope_desc} (restriction gate)" + + if parent is None: + # Root-level deny: add denies for all restricted resources + if self.action_obj.takes_parent: + for db_name in self.restricted_databases: + self.collector.add(db_name, None, False, reason) + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + self.collector.add(db_name, table_name, False, reason) + else: + # Database-level deny: add denies for tables in that database + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + if db_name == parent: + self.collector.add(db_name, table_name, False, reason) + + def process(self) -> Optional[PermissionSQL]: + """Process all config rules and return combined PermissionSQL.""" + self._process_root_permissions() + self._process_databases() + self._process_root_allow_blocks() + + return self.collector.to_permission_sql() + + def _process_root_permissions(self) -> None: + """Process root-level permissions block.""" + root_perms = self.config.get("permissions") or {} + self.add_permissions_rule( + None, + None, + root_perms, + f"permissions for {self.action}", + ) + + def _process_databases(self) -> None: + """Process database-level and nested configurations.""" + databases = self.config.get("databases") or {} + + for db_name, db_config in databases.items(): + self._process_database(db_name, db_config or {}) + + def _process_database(self, db_name: str, db_config: dict) -> None: + """Process a single database's configuration.""" + # Database-level permissions block + db_perms = db_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + None, + db_perms, + f"permissions for {self.action} on {db_name}", + ) + + # Process tables + for table_name, table_config in (db_config.get("tables") or {}).items(): + self._process_table(db_name, table_name, table_config or {}) + + # Process queries + for query_name, query_config in (db_config.get("queries") or {}).items(): + self._process_query(db_name, query_name, query_config) + + # Database-level allow blocks + self._process_database_allow_blocks(db_name, db_config) + + def _process_table( + self, + db_name: str, + table_name: str, + table_config: dict, + ) -> None: + """Process a single table's configuration.""" + # Table-level permissions block + table_perms = table_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + table_name, + table_perms, + f"permissions for {self.action} on {db_name}/{table_name}", + ) + + # Table-level allow block (for view-table) + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + table_name, + table_config.get("allow"), + f"allow for {self.action} on {db_name}/{table_name}", + ) + + def _process_query( + self, + db_name: str, + query_name: str, + query_config: Any, + ) -> None: + """Process a single query's configuration.""" + # Query config can be a string (just SQL) or dict + if not isinstance(query_config, dict): + return + + # Query-level permissions block + query_perms = query_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + query_name, + query_perms, + f"permissions for {self.action} on {db_name}/{query_name}", + ) + + # Query-level allow block (for view-query) + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + query_name, + query_config.get("allow"), + f"allow for {self.action} on {db_name}/{query_name}", + ) + + def _process_database_allow_blocks( + self, + db_name: str, + db_config: dict, + ) -> None: + """Process database-level allow/allow_sql blocks.""" + # view-database allow block + if self.action == "view-database": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # execute-sql allow_sql block + if self.action == "execute-sql": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow_sql"), + f"allow_sql for {db_name}", + ) + + # view-table uses database-level allow for inheritance + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # view-query uses database-level allow for inheritance + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + def _process_root_allow_blocks(self) -> None: + """Process root-level allow/allow_sql blocks.""" + root_allow = self.config.get("allow") + + if self.action == "view-instance": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-instance", + ) + + if self.action == "view-database": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-database", + ) + + if self.action == "view-table": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-table", + ) + + if self.action == "view-query": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-query", + ) + + if self.action == "execute-sql": + self.add_allow_block_rule( + None, + None, + self.config.get("allow_sql"), + "allow_sql", + ) + + +@hookimpl(specname="permission_resources_sql") +async def config_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Apply permission rules from datasette.yaml configuration. + + This processes: + - permissions: blocks at root, database, table, and query levels + - allow: blocks for view-* actions + - allow_sql: blocks for execute-sql action + """ + processor = ConfigPermissionProcessor(datasette, actor, action) + result = processor.process() + + if result is None: + return [] + + return [result] diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py new file mode 100644 index 00000000..f5a6a270 --- /dev/null +++ b/datasette/default_permissions/defaults.py @@ -0,0 +1,70 @@ +""" +Default permission settings for Datasette. + +Provides default allow rules for standard view/execute actions. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +# Actions that are allowed by default (unless --default-deny is used) +DEFAULT_ALLOW_ACTIONS = frozenset( + { + "view-instance", + "view-database", + "view-database-download", + "view-table", + "view-query", + "execute-sql", + } +) + + +@hookimpl(specname="permission_resources_sql") +async def default_allow_sql_check( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Enforce the default_allow_sql setting. + + When default_allow_sql is false (the default), execute-sql is denied + unless explicitly allowed by config or other rules. + """ + if action == "execute-sql": + if not datasette.setting("default_allow_sql"): + return PermissionSQL.deny(reason="default_allow_sql is false") + + return None + + +@hookimpl(specname="permission_resources_sql") +async def default_action_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Provide default allow rules for standard view/execute actions. + + These defaults are skipped when datasette is started with --default-deny. + The restriction_sql mechanism (from actor_restrictions_sql) will still + filter these results if the actor has restrictions. + """ + if datasette.default_deny: + return None + + if action in DEFAULT_ALLOW_ACTIONS: + reason = f"default allow for {action}".replace("'", "''") + return PermissionSQL.allow(reason=reason) + + return None diff --git a/datasette/default_permissions/helpers.py b/datasette/default_permissions/helpers.py new file mode 100644 index 00000000..47e03569 --- /dev/null +++ b/datasette/default_permissions/helpers.py @@ -0,0 +1,85 @@ +""" +Shared helper utilities for default permission implementations. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette.permissions import PermissionSQL + + +def get_action_name_variants(datasette: "Datasette", action: str) -> Set[str]: + """ + Get all name variants for an action (full name and abbreviation). + + Example: + get_action_name_variants(ds, "view-table") -> {"view-table", "vt"} + """ + variants = {action} + action_obj = datasette.actions.get(action) + if action_obj and action_obj.abbr: + variants.add(action_obj.abbr) + return variants + + +def action_in_list(datasette: "Datasette", action: str, action_list: list) -> bool: + """Check if an action (or its abbreviation) is in a list.""" + return bool(get_action_name_variants(datasette, action).intersection(action_list)) + + +@dataclass +class PermissionRow: + """A single permission rule row.""" + + parent: Optional[str] + child: Optional[str] + allow: bool + reason: str + + +class PermissionRowCollector: + """Collects permission rows and converts them to PermissionSQL.""" + + def __init__(self, prefix: str = "row"): + self.rows: List[PermissionRow] = [] + self.prefix = prefix + + def add( + self, + parent: Optional[str], + child: Optional[str], + allow: Optional[bool], + reason: str, + if_not_none: bool = False, + ) -> None: + """Add a permission row. If if_not_none=True, only add if allow is not None.""" + if if_not_none and allow is None: + return + self.rows.append(PermissionRow(parent, child, allow, reason)) + + def to_permission_sql(self) -> Optional[PermissionSQL]: + """Convert collected rows to a PermissionSQL object.""" + if not self.rows: + return None + + parts = [] + params = {} + + for idx, row in enumerate(self.rows): + key = f"{self.prefix}_{idx}" + parts.append( + f"SELECT :{key}_parent AS parent, :{key}_child AS child, " + f":{key}_allow AS allow, :{key}_reason AS reason" + ) + params[f"{key}_parent"] = row.parent + params[f"{key}_child"] = row.child + params[f"{key}_allow"] = 1 if row.allow else 0 + params[f"{key}_reason"] = row.reason + + sql = "\nUNION ALL\n".join(parts) + return PermissionSQL(sql=sql, params=params) diff --git a/datasette/default_permissions/restrictions.py b/datasette/default_permissions/restrictions.py new file mode 100644 index 00000000..a22cd7e5 --- /dev/null +++ b/datasette/default_permissions/restrictions.py @@ -0,0 +1,195 @@ +""" +Actor restriction handling for Datasette permissions. + +This module handles the _r (restrictions) key in actor dictionaries, which +contains allowlists of resources the actor can access. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + +from .helpers import action_in_list, get_action_name_variants + + +@dataclass +class ActorRestrictions: + """Parsed actor restrictions from the _r key.""" + + global_actions: List[str] # _r.a - globally allowed actions + database_actions: dict # _r.d - {db_name: [actions]} + table_actions: dict # _r.r - {db_name: {table: [actions]}} + + @classmethod + def from_actor(cls, actor: Optional[dict]) -> Optional["ActorRestrictions"]: + """Parse restrictions from actor dict. Returns None if no restrictions.""" + if not actor: + return None + assert isinstance(actor, dict), "actor must be a dictionary" + + restrictions = actor.get("_r") + if restrictions is None: + return None + + return cls( + global_actions=restrictions.get("a", []), + database_actions=restrictions.get("d", {}), + table_actions=restrictions.get("r", {}), + ) + + def is_action_globally_allowed(self, datasette: "Datasette", action: str) -> bool: + """Check if action is in the global allowlist.""" + return action_in_list(datasette, action, self.global_actions) + + def get_allowed_databases(self, datasette: "Datasette", action: str) -> Set[str]: + """Get database names where this action is allowed.""" + allowed = set() + for db_name, db_actions in self.database_actions.items(): + if action_in_list(datasette, action, db_actions): + allowed.add(db_name) + return allowed + + def get_allowed_tables( + self, datasette: "Datasette", action: str + ) -> Set[Tuple[str, str]]: + """Get (database, table) pairs where this action is allowed.""" + allowed = set() + for db_name, tables in self.table_actions.items(): + for table_name, table_actions in tables.items(): + if action_in_list(datasette, action, table_actions): + allowed.add((db_name, table_name)) + return allowed + + +@hookimpl(specname="permission_resources_sql") +async def actor_restrictions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Handle actor restriction-based permission rules. + + When an actor has an "_r" key, it contains an allowlist of resources they + can access. This function returns restriction_sql that filters the final + results to only include resources in that allowlist. + + The _r structure: + { + "a": ["vi", "pd"], # Global actions allowed + "d": {"mydb": ["vt", "es"]}, # Database-level actions + "r": {"mydb": {"users": ["vt"]}} # Table-level actions + } + """ + if not actor: + return None + + restrictions = ActorRestrictions.from_actor(actor) + + if restrictions is None: + # No restrictions - all resources allowed + return [] + + # If globally allowed, no filtering needed + if restrictions.is_action_globally_allowed(datasette, action): + return [] + + # Build restriction SQL + allowed_dbs = restrictions.get_allowed_databases(datasette, action) + allowed_tables = restrictions.get_allowed_tables(datasette, action) + + # If nothing is allowed for this action, return empty-set restriction + if not allowed_dbs and not allowed_tables: + return [ + PermissionSQL( + params={"deny": f"actor restrictions: {action} not in allowlist"}, + restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", + ) + ] + + # Build UNION of allowed resources + selects = [] + params = {} + counter = 0 + + # Database-level entries (parent, NULL) - allows all children + for db_name in allowed_dbs: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, NULL AS child") + params[f"{key}_parent"] = db_name + + # Table-level entries (parent, child) + for db_name, table_name in allowed_tables: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, :{key}_child AS child") + params[f"{key}_parent"] = db_name + params[f"{key}_child"] = table_name + + restriction_sql = "\nUNION ALL\n".join(selects) + + return [PermissionSQL(params=params, restriction_sql=restriction_sql)] + + +def restrictions_allow_action( + datasette: "Datasette", + restrictions: dict, + action: str, + resource: Optional[str | Tuple[str, str]], +) -> bool: + """ + Check if restrictions allow the requested action on the requested resource. + + This is a synchronous utility function for use by other code that needs + to quickly check restriction allowlists. + + Args: + datasette: The Datasette instance + restrictions: The _r dict from an actor + action: The action name to check + resource: None for global, str for database, (db, table) tuple for table + + Returns: + True if allowed, False if denied + """ + # Does this action have an abbreviation? + to_check = get_action_name_variants(datasette, action) + + # Check global level (any resource) + all_allowed = restrictions.get("a") + if all_allowed is not None: + assert isinstance(all_allowed, list) + if to_check.intersection(all_allowed): + return True + + # Check database level + if resource: + if isinstance(resource, str): + database_name = resource + else: + database_name = resource[0] + database_allowed = restrictions.get("d", {}).get(database_name) + if database_allowed is not None: + assert isinstance(database_allowed, list) + if to_check.intersection(database_allowed): + return True + + # Check table/resource level + if resource is not None and not isinstance(resource, str) and len(resource) == 2: + database, table = resource + table_allowed = restrictions.get("r", {}).get(database, {}).get(table) + if table_allowed is not None: + assert isinstance(table_allowed, list) + if to_check.intersection(table_allowed): + return True + + # This action is not explicitly allowed, so reject it + return False diff --git a/datasette/default_permissions/root.py b/datasette/default_permissions/root.py new file mode 100644 index 00000000..4931f7ff --- /dev/null +++ b/datasette/default_permissions/root.py @@ -0,0 +1,29 @@ +""" +Root user permission handling for Datasette. + +Grants full permissions to the root user when --root flag is used. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +@hookimpl(specname="permission_resources_sql") +async def root_user_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], +) -> Optional[PermissionSQL]: + """ + Grant root user full permissions when --root flag is used. + """ + if not datasette.root_enabled: + return None + if actor is not None and actor.get("id") == "root": + return PermissionSQL.allow(reason="root user") diff --git a/datasette/default_permissions/tokens.py b/datasette/default_permissions/tokens.py new file mode 100644 index 00000000..474b0c23 --- /dev/null +++ b/datasette/default_permissions/tokens.py @@ -0,0 +1,95 @@ +""" +Token authentication for Datasette. + +Handles signed API tokens (dstok_ prefix). +""" + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +import itsdangerous + +from datasette import hookimpl + + +@hookimpl(specname="actor_from_request") +def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dict]: + """ + Authenticate requests using signed API tokens (dstok_ prefix). + + Token structure (signed JSON): + { + "a": "actor_id", # Actor ID + "t": 1234567890, # Timestamp (Unix epoch) + "d": 3600, # Optional: Duration in seconds + "_r": {...} # Optional: Restrictions + } + """ + prefix = "dstok_" + + # Check if tokens are enabled + if not datasette.setting("allow_signed_tokens"): + return None + + max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") + + # Get authorization header + authorization = request.headers.get("authorization") + if not authorization: + return None + if not authorization.startswith("Bearer "): + return None + + token = authorization[len("Bearer ") :] + if not token.startswith(prefix): + return None + + # Remove prefix and verify signature + token = token[len(prefix) :] + try: + decoded = datasette.unsign(token, namespace="token") + except itsdangerous.BadSignature: + return None + + # Validate timestamp + if "t" not in decoded: + return None + created = decoded["t"] + if not isinstance(created, int): + return None + + # Handle duration/expiry + duration = decoded.get("d") + if duration is not None and not isinstance(duration, int): + return None + + # Apply max TTL if configured + if (duration is None and max_signed_tokens_ttl) or ( + duration is not None + and max_signed_tokens_ttl + and duration > max_signed_tokens_ttl + ): + duration = max_signed_tokens_ttl + + # Check expiry + if duration: + if time.time() - created > duration: + return None + + # Build actor dict + actor = {"id": decoded["a"], "token": "dstok"} + + # Copy restrictions if present + if "_r" in decoded: + actor["_r"] = decoded["_r"] + + # Add expiry timestamp if applicable + if duration: + actor["token_expires"] = created + duration + + return actor diff --git a/datasette/events.py b/datasette/events.py new file mode 100644 index 00000000..5cd5ba3d --- /dev/null +++ b/datasette/events.py @@ -0,0 +1,235 @@ +from abc import ABC, abstractproperty +from dataclasses import asdict, dataclass, field +from datasette.hookspecs import hookimpl +from datetime import datetime, timezone + + +@dataclass +class Event(ABC): + @abstractproperty + def name(self): + pass + + created: datetime = field( + init=False, default_factory=lambda: datetime.now(timezone.utc) + ) + actor: dict | None + + def properties(self): + properties = asdict(self) + properties.pop("actor", None) + properties.pop("created", None) + return properties + + +@dataclass +class LoginEvent(Event): + """ + Event name: ``login`` + + A user (represented by ``event.actor``) has logged in. + """ + + name = "login" + + +@dataclass +class LogoutEvent(Event): + """ + Event name: ``logout`` + + A user (represented by ``event.actor``) has logged out. + """ + + name = "logout" + + +@dataclass +class CreateTokenEvent(Event): + """ + Event name: ``create-token`` + + A user created an API token. + + :ivar expires_after: Number of seconds after which this token will expire. + :type expires_after: int or None + :ivar restrict_all: Restricted permissions for this token. + :type restrict_all: list + :ivar restrict_database: Restricted database permissions for this token. + :type restrict_database: dict + :ivar restrict_resource: Restricted resource permissions for this token. + :type restrict_resource: dict + """ + + name = "create-token" + expires_after: int | None + restrict_all: list + restrict_database: dict + restrict_resource: dict + + +@dataclass +class CreateTableEvent(Event): + """ + Event name: ``create-table`` + + A new table has been created in the database. + + :ivar database: The name of the database where the table was created. + :type database: str + :ivar table: The name of the table that was created + :type table: str + :ivar schema: The SQL schema definition for the new table. + :type schema: str + """ + + name = "create-table" + database: str + table: str + schema: str + + +@dataclass +class DropTableEvent(Event): + """ + Event name: ``drop-table`` + + A table has been dropped from the database. + + :ivar database: The name of the database where the table was dropped. + :type database: str + :ivar table: The name of the table that was dropped + :type table: str + """ + + name = "drop-table" + database: str + table: str + + +@dataclass +class AlterTableEvent(Event): + """ + Event name: ``alter-table`` + + A table has been altered. + + :ivar database: The name of the database where the table was altered + :type database: str + :ivar table: The name of the table that was altered + :type table: str + :ivar before_schema: The table's SQL schema before the alteration + :type before_schema: str + :ivar after_schema: The table's SQL schema after the alteration + :type after_schema: str + """ + + name = "alter-table" + database: str + table: str + before_schema: str + after_schema: str + + +@dataclass +class InsertRowsEvent(Event): + """ + Event name: ``insert-rows`` + + Rows were inserted into a table. + + :ivar database: The name of the database where the rows were inserted. + :type database: str + :ivar table: The name of the table where the rows were inserted. + :type table: str + :ivar num_rows: The number of rows that were requested to be inserted. + :type num_rows: int + :ivar ignore: Was ignore set? + :type ignore: bool + :ivar replace: Was replace set? + :type replace: bool + """ + + name = "insert-rows" + database: str + table: str + num_rows: int + ignore: bool + replace: bool + + +@dataclass +class UpsertRowsEvent(Event): + """ + Event name: ``upsert-rows`` + + Rows were upserted into a table. + + :ivar database: The name of the database where the rows were inserted. + :type database: str + :ivar table: The name of the table where the rows were inserted. + :type table: str + :ivar num_rows: The number of rows that were requested to be inserted. + :type num_rows: int + """ + + name = "upsert-rows" + database: str + table: str + num_rows: int + + +@dataclass +class UpdateRowEvent(Event): + """ + Event name: ``update-row`` + + A row was updated in a table. + + :ivar database: The name of the database where the row was updated. + :type database: str + :ivar table: The name of the table where the row was updated. + :type table: str + :ivar pks: The primary key values of the updated row. + """ + + name = "update-row" + database: str + table: str + pks: list + + +@dataclass +class DeleteRowEvent(Event): + """ + Event name: ``delete-row`` + + A row was deleted from a table. + + :ivar database: The name of the database where the row was deleted. + :type database: str + :ivar table: The name of the table where the row was deleted. + :type table: str + :ivar pks: The primary key values of the deleted row. + """ + + name = "delete-row" + database: str + table: str + pks: list + + +@hookimpl +def register_events(): + return [ + LoginEvent, + LogoutEvent, + CreateTableEvent, + CreateTokenEvent, + AlterTableEvent, + DropTableEvent, + InsertRowsEvent, + UpsertRowsEvent, + UpdateRowEvent, + DeleteRowEvent, + ] diff --git a/datasette/facets.py b/datasette/facets.py index 1712db9b..dd149424 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -1,6 +1,5 @@ import json import urllib -import re from datasette import hookimpl from datasette.database import QueryInterrupted from datasette.utils import ( @@ -8,13 +7,12 @@ from datasette.utils import ( path_with_added_args, path_with_removed_args, detect_json1, - InvalidSql, sqlite3, ) -def load_facet_configs(request, table_metadata): - # Given a request and the metadata configuration for a table, return +def load_facet_configs(request, table_config): + # Given a request and the configuration for a table, return # a dictionary of selected facets, their lists of configs and for each # config whether it came from the request or the metadata. # @@ -22,21 +20,21 @@ def load_facet_configs(request, table_metadata): # {"source": "metadata", "config": config1}, # {"source": "request", "config": config2}]} facet_configs = {} - table_metadata = table_metadata or {} - metadata_facets = table_metadata.get("facets", []) - for metadata_config in metadata_facets: - if isinstance(metadata_config, str): + table_config = table_config or {} + table_facet_configs = table_config.get("facets", []) + for facet_config in table_facet_configs: + if isinstance(facet_config, str): type = "column" - metadata_config = {"simple": metadata_config} + facet_config = {"simple": facet_config} else: assert ( - len(metadata_config.values()) == 1 + len(facet_config.values()) == 1 ), "Metadata config dicts should be {type: config}" - type, metadata_config = metadata_config.items()[0] - if isinstance(metadata_config, str): - metadata_config = {"simple": metadata_config} + type, facet_config = list(facet_config.items())[0] + if isinstance(facet_config, str): + facet_config = {"simple": facet_config} facet_configs.setdefault(type, []).append( - {"source": "metadata", "config": metadata_config} + {"source": "metadata", "config": facet_config} ) qs_pairs = urllib.parse.parse_qs(request.query_string, keep_blank_values=True) for key, values in qs_pairs.items(): @@ -47,13 +45,12 @@ def load_facet_configs(request, table_metadata): elif key.startswith("_facet_"): type = key[len("_facet_") :] for value in values: - # The value is the config - either JSON or not - if value.startswith("{"): - config = json.loads(value) - else: - config = {"simple": value} + # The value is the facet_config - either JSON or not + facet_config = ( + json.loads(value) if value.startswith("{") else {"simple": value} + ) facet_configs.setdefault(type, []).append( - {"source": "request", "config": config} + {"source": "request", "config": facet_config} ) return facet_configs @@ -68,6 +65,8 @@ def register_facet_classes(): class Facet: type = None + # How many rows to consider when suggesting facets: + suggest_consider = 1000 def __init__( self, @@ -77,7 +76,7 @@ class Facet: sql=None, table=None, params=None, - metadata=None, + table_config=None, row_count=None, ): assert table or sql, "Must provide either table= or sql=" @@ -86,14 +85,14 @@ class Facet: self.database = database # For foreign key expansion. Can be None for e.g. canned SQL queries: self.table = table - self.sql = sql or "select * from [{}]".format(table) + self.sql = sql or f"select * from [{table}]" self.params = params or [] - self.metadata = metadata + self.table_config = table_config # row_count can be None, in which case we calculate it ourselves: self.row_count = row_count def get_configs(self): - configs = load_facet_configs(self.request, self.metadata) + configs = load_facet_configs(self.request, self.table_config) return configs.get(self.type) or [] def get_querystring_pairs(self): @@ -101,6 +100,36 @@ class Facet: # [('_foo', 'bar'), ('_foo', '2'), ('empty', '')] return urllib.parse.parse_qsl(self.request.query_string, keep_blank_values=True) + def get_facet_size(self): + facet_size = self.ds.setting("default_facet_size") + max_returned_rows = self.ds.setting("max_returned_rows") + table_facet_size = None + if self.table: + config_facet_size = ( + self.ds.config.get("databases", {}) + .get(self.database, {}) + .get("tables", {}) + .get(self.table, {}) + .get("facet_size") + ) + if config_facet_size: + table_facet_size = config_facet_size + custom_facet_size = self.request.args.get("_facet_size") + if custom_facet_size: + if custom_facet_size == "max": + facet_size = max_returned_rows + elif custom_facet_size.isdigit(): + facet_size = int(custom_facet_size) + else: + # Invalid value, ignore it + custom_facet_size = None + if table_facet_size and not custom_facet_size: + if table_facet_size == "max": + facet_size = max_returned_rows + else: + facet_size = table_facet_size + return min(facet_size, max_returned_rows) + async def suggest(self): return [] @@ -114,21 +143,10 @@ class Facet: # Detect column names using the "limit 0" trick return ( await self.ds.execute( - self.database, "select * from ({}) limit 0".format(sql), params or [] + self.database, f"select * from ({sql}) limit 0", params or [] ) ).columns - async def get_row_count(self): - if self.row_count is None: - self.row_count = ( - await self.ds.execute( - self.database, - "select count(*) from ({})".format(self.sql), - self.params, - ) - ).rows[0][0] - return self.row_count - class ColumnFacet(Facet): type = "column" @@ -136,20 +154,23 @@ class ColumnFacet(Facet): async def suggest(self): row_count = await self.get_row_count() columns = await self.get_columns(self.sql, self.params) - facet_size = self.ds.config("default_facet_size") + facet_size = self.get_facet_size() suggested_facets = [] already_enabled = [c["config"]["simple"] for c in self.get_configs()] for column in columns: if column in already_enabled: continue suggested_facet_sql = """ - select {column}, count(*) as n from ( - {sql} - ) where {column} is not null - group by {column} + with limited as (select * from ({sql}) limit {suggest_consider}) + select {column} as value, count(*) as n from limited + where value is not null + group by value limit {limit} """.format( - column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + column=escape_sqlite(column), + sql=self.sql, + limit=facet_size + 1, + suggest_consider=self.suggest_consider, ) distinct_values = None try: @@ -158,14 +179,12 @@ class ColumnFacet(Facet): suggested_facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), + custom_time_limit=self.ds.setting("facet_suggest_time_limit_ms"), ) num_distinct_values = len(distinct_values) if ( - num_distinct_values - and num_distinct_values > 1 + 1 < num_distinct_values < row_count and num_distinct_values <= facet_size - and num_distinct_values < row_count # And at least one has n > 1 and any(r["n"] > 1 for r in distinct_values) ): @@ -174,7 +193,11 @@ class ColumnFacet(Facet): "name": column, "toggle_url": self.ds.absolute_url( self.request, - path_with_added_args(self.request, {"_facet": column}), + self.ds.urls.path( + path_with_added_args( + self.request, {"_facet": column} + ) + ), ), } ) @@ -182,13 +205,24 @@ class ColumnFacet(Facet): continue return suggested_facets + async def get_row_count(self): + if self.row_count is None: + self.row_count = ( + await self.ds.execute( + self.database, + f"select count(*) from (select * from ({self.sql}) limit {self.suggest_consider})", + self.params, + ) + ).rows[0][0] + return self.row_count + async def facet_results(self): - facet_results = {} + facet_results = [] facets_timed_out = [] qs_pairs = self.get_querystring_pairs() - facet_size = self.ds.config("default_facet_size") + facet_size = self.get_facet_size() for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] @@ -208,37 +242,42 @@ class ColumnFacet(Facet): facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.config("facet_time_limit_ms"), + custom_time_limit=self.ds.setting("facet_time_limit_ms"), ) facet_results_values = [] - facet_results[column] = { - "name": column, - "type": self.type, - "hideable": source != "metadata", - "toggle_url": path_with_removed_args( - self.request, {"_facet": column} - ), - "results": facet_results_values, - "truncated": len(facet_rows_results) > facet_size, - } + facet_results.append( + { + "name": column, + "type": self.type, + "hideable": source != "metadata", + "toggle_url": self.ds.urls.path( + path_with_removed_args(self.request, {"_facet": column}) + ), + "results": facet_results_values, + "truncated": len(facet_rows_results) > facet_size, + } + ) facet_rows = facet_rows_results.rows[:facet_size] if self.table: # Attempt to expand foreign keys into labels values = [row["value"] for row in facet_rows] expanded = await self.ds.expand_foreign_keys( - self.database, self.table, column, values + self.request.actor, self.database, self.table, column, values ) else: expanded = {} for row in facet_rows: - selected = (column, str(row["value"])) in qs_pairs + column_qs = column + if column.startswith("_"): + column_qs = "{}__exact".format(column) + selected = (column_qs, str(row["value"])) in qs_pairs if selected: toggle_path = path_with_removed_args( - self.request, {column: str(row["value"])} + self.request, {column_qs: str(row["value"])} ) else: toggle_path = path_with_added_args( - self.request, {column: row["value"]} + self.request, {column_qs: row["value"]} ) facet_results_values.append( { @@ -246,7 +285,7 @@ class ColumnFacet(Facet): "label": expanded.get((column, row["value"]), row["value"]), "count": row["count"], "toggle_url": self.ds.absolute_url( - self.request, toggle_path + self.request, self.ds.urls.path(toggle_path) ), "selected": selected, } @@ -279,10 +318,14 @@ class ArrayFacet(Facet): continue # Is every value in this column either null or a JSON array? suggested_facet_sql = """ + with limited as (select * from ({sql}) limit {suggest_consider}) select distinct json_type({column}) - from ({sql}) + from limited + where {column} is not null and {column} != '' """.format( - column=escape_sqlite(column), sql=self.sql + column=escape_sqlite(column), + sql=self.sql, + suggest_consider=self.suggest_consider, ) try: results = await self.ds.execute( @@ -290,22 +333,26 @@ class ArrayFacet(Facet): suggested_facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), + custom_time_limit=self.ds.setting("facet_suggest_time_limit_ms"), log_sql_errors=False, ) types = tuple(r[0] for r in results.rows) if types in (("array",), ("array", None)): - # Now sanity check that first 100 arrays contain only strings + # Now check that first 100 arrays contain only strings first_100 = [ v[0] for v in await self.ds.execute( self.database, - "select {column} from ({sql}) where {column} is not null and json_array_length({column}) > 0 limit 100".format( - column=escape_sqlite(column), sql=self.sql - ), + ( + "select {column} from ({sql}) " + "where {column} is not null " + "and {column} != '' " + "and json_array_length({column}) > 0 " + "limit 100" + ).format(column=escape_sqlite(column), sql=self.sql), self.params, truncate=False, - custom_time_limit=self.ds.config( + custom_time_limit=self.ds.setting( "facet_suggest_time_limit_ms" ), log_sql_errors=False, @@ -320,8 +367,10 @@ class ArrayFacet(Facet): "type": "array", "toggle_url": self.ds.absolute_url( self.request, - path_with_added_args( - self.request, {"_facet_array": column} + self.ds.urls.path( + path_with_added_args( + self.request, {"_facet_array": column} + ) ), ), } @@ -332,21 +381,38 @@ class ArrayFacet(Facet): async def facet_results(self): # self.configs should be a plain list of columns - facet_results = {} + facet_results = [] facets_timed_out = [] - facet_size = self.ds.config("default_facet_size") + facet_size = self.get_facet_size() for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] column = config.get("column") or config["simple"] + # https://github.com/simonw/datasette/issues/448 facet_sql = """ - select j.value as value, count(*) as count from ( - {sql} - ) join json_each({col}) j - group by j.value order by count desc, value limit {limit} + with inner as ({sql}), + deduped_array_items as ( + select + distinct j.value, + inner.* + from + json_each([inner].{col}) j + join inner + ) + select + value as value, + count(*) as count + from + deduped_array_items + group by + value + order by + count(*) desc, value limit {limit} """.format( - col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + col=escape_sqlite(column), + sql=self.sql, + limit=facet_size + 1, ) try: facet_rows_results = await self.ds.execute( @@ -354,31 +420,35 @@ class ArrayFacet(Facet): facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.config("facet_time_limit_ms"), + custom_time_limit=self.ds.setting("facet_time_limit_ms"), ) facet_results_values = [] - facet_results[column] = { - "name": column, - "type": self.type, - "results": facet_results_values, - "hideable": source != "metadata", - "toggle_url": path_with_removed_args( - self.request, {"_facet_array": column} - ), - "truncated": len(facet_rows_results) > facet_size, - } + facet_results.append( + { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": self.ds.urls.path( + path_with_removed_args( + self.request, {"_facet_array": column} + ) + ), + "truncated": len(facet_rows_results) > facet_size, + } + ) facet_rows = facet_rows_results.rows[:facet_size] pairs = self.get_querystring_pairs() for row in facet_rows: value = str(row["value"]) - selected = ("{}__arraycontains".format(column), value) in pairs + selected = (f"{column}__arraycontains", value) in pairs if selected: toggle_path = path_with_removed_args( - self.request, {"{}__arraycontains".format(column): value} + self.request, {f"{column}__arraycontains": value} ) else: toggle_path = path_with_added_args( - self.request, {"{}__arraycontains".format(column): value} + self.request, {f"{column}__arraycontains": value} ) facet_results_values.append( { @@ -410,8 +480,8 @@ class DateFacet(Facet): # Does this column contain any dates in the first 100 rows? suggested_facet_sql = """ select date({column}) from ( - {sql} - ) where {column} glob "????-??-*" limit 100; + select * from ({sql}) limit 100 + ) where {column} glob "????-??-*" """.format( column=escape_sqlite(column), sql=self.sql ) @@ -421,7 +491,7 @@ class DateFacet(Facet): suggested_facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), + custom_time_limit=self.ds.setting("facet_suggest_time_limit_ms"), log_sql_errors=False, ) values = tuple(r[0] for r in results.rows) @@ -432,8 +502,10 @@ class DateFacet(Facet): "type": "date", "toggle_url": self.ds.absolute_url( self.request, - path_with_added_args( - self.request, {"_facet_date": column} + self.ds.urls.path( + path_with_added_args( + self.request, {"_facet_date": column} + ) ), ), } @@ -443,10 +515,10 @@ class DateFacet(Facet): return suggested_facets async def facet_results(self): - facet_results = {} + facet_results = [] facets_timed_out = [] args = dict(self.get_querystring_pairs()) - facet_size = self.ds.config("default_facet_size") + facet_size = self.get_facet_size() for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] @@ -467,31 +539,31 @@ class DateFacet(Facet): facet_sql, self.params, truncate=False, - custom_time_limit=self.ds.config("facet_time_limit_ms"), + custom_time_limit=self.ds.setting("facet_time_limit_ms"), ) facet_results_values = [] - facet_results[column] = { - "name": column, - "type": self.type, - "results": facet_results_values, - "hideable": source != "metadata", - "toggle_url": path_with_removed_args( - self.request, {"_facet_date": column} - ), - "truncated": len(facet_rows_results) > facet_size, - } + facet_results.append( + { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": path_with_removed_args( + self.request, {"_facet_date": column} + ), + "truncated": len(facet_rows_results) > facet_size, + } + ) facet_rows = facet_rows_results.rows[:facet_size] for row in facet_rows: - selected = str(args.get("{}__date".format(column))) == str( - row["value"] - ) + selected = str(args.get(f"{column}__date")) == str(row["value"]) if selected: toggle_path = path_with_removed_args( - self.request, {"{}__date".format(column): str(row["value"])} + self.request, {f"{column}__date": str(row["value"])} ) else: toggle_path = path_with_added_args( - self.request, {"{}__date".format(column): row["value"]} + self.request, {f"{column}__date": row["value"]} ) facet_results_values.append( { diff --git a/datasette/filters.py b/datasette/filters.py index 4891154a..95cc5f37 100644 --- a/datasette/filters.py +++ b/datasette/filters.py @@ -1,7 +1,173 @@ +from datasette import hookimpl +from datasette.resources import DatabaseResource +from datasette.views.base import DatasetteError +from datasette.utils.asgi import BadRequest import json -import numbers +from .utils import detect_json1, escape_sqlite, path_with_removed_args -from .utils import detect_json1, escape_sqlite + +@hookimpl(specname="filters_from_request") +def where_filters(request, database, datasette): + # This one deals with ?_where= + async def inner(): + where_clauses = [] + extra_wheres_for_ui = [] + if "_where" in request.args: + if not await datasette.allowed( + action="execute-sql", + resource=DatabaseResource(database=database), + actor=request.actor, + ): + raise DatasetteError("_where= is not allowed", status=403) + else: + where_clauses.extend(request.args.getlist("_where")) + extra_wheres_for_ui = [ + { + "text": text, + "remove_url": path_with_removed_args(request, {"_where": text}), + } + for text in request.args.getlist("_where") + ] + + return FilterArguments( + where_clauses, + extra_context={ + "extra_wheres_for_ui": extra_wheres_for_ui, + }, + ) + + return inner + + +@hookimpl(specname="filters_from_request") +def search_filters(request, database, table, datasette): + # ?_search= and _search_colname= + async def inner(): + where_clauses = [] + params = {} + human_descriptions = [] + extra_context = {} + + # Figure out which fts_table to use + table_metadata = await datasette.table_config(database, table) + db = datasette.get_database(database) + fts_table = request.args.get("_fts_table") + fts_table = fts_table or table_metadata.get("fts_table") + fts_table = fts_table or await db.fts_table(table) + fts_pk = request.args.get("_fts_pk", table_metadata.get("fts_pk", "rowid")) + search_args = { + key: request.args[key] + for key in request.args + if key.startswith("_search") and key != "_searchmode" + } + search = "" + search_mode_raw = table_metadata.get("searchmode") == "raw" + # Or set search mode from the querystring + qs_searchmode = request.args.get("_searchmode") + if qs_searchmode == "escaped": + search_mode_raw = False + if qs_searchmode == "raw": + search_mode_raw = True + + extra_context["supports_search"] = bool(fts_table) + + if fts_table and search_args: + if "_search" in search_args: + # Simple ?_search=xxx + search = search_args["_search"] + where_clauses.append( + "{fts_pk} in (select rowid from {fts_table} where {fts_table} match {match_clause})".format( + fts_table=escape_sqlite(fts_table), + fts_pk=escape_sqlite(fts_pk), + match_clause=( + ":search" if search_mode_raw else "escape_fts(:search)" + ), + ) + ) + human_descriptions.append(f'search matches "{search}"') + params["search"] = search + extra_context["search"] = search + else: + # More complex: search against specific columns + for i, (key, search_text) in enumerate(search_args.items()): + search_col = key.split("_search_", 1)[1] + if search_col not in await db.table_columns(fts_table): + raise BadRequest("Cannot search by that column") + + where_clauses.append( + "rowid in (select rowid from {fts_table} where {search_col} match {match_clause})".format( + fts_table=escape_sqlite(fts_table), + search_col=escape_sqlite(search_col), + match_clause=( + ":search_{}".format(i) + if search_mode_raw + else "escape_fts(:search_{})".format(i) + ), + ) + ) + human_descriptions.append( + f'search column "{search_col}" matches "{search_text}"' + ) + params[f"search_{i}"] = search_text + extra_context["search"] = search_text + + return FilterArguments(where_clauses, params, human_descriptions, extra_context) + + return inner + + +@hookimpl(specname="filters_from_request") +def through_filters(request, database, table, datasette): + # ?_search= and _search_colname= + async def inner(): + where_clauses = [] + params = {} + human_descriptions = [] + extra_context = {} + + # Support for ?_through={table, column, value} + if "_through" in request.args: + for through in request.args.getlist("_through"): + through_data = json.loads(through) + through_table = through_data["table"] + other_column = through_data["column"] + value = through_data["value"] + db = datasette.get_database(database) + outgoing_foreign_keys = await db.foreign_keys_for_table(through_table) + try: + fk_to_us = [ + fk for fk in outgoing_foreign_keys if fk["other_table"] == table + ][0] + except IndexError: + raise DatasetteError( + "Invalid _through - could not find corresponding foreign key" + ) + param = f"p{len(params)}" + where_clauses.append( + "{our_pk} in (select {our_column} from {through_table} where {other_column} = :{param})".format( + through_table=escape_sqlite(through_table), + our_pk=escape_sqlite(fk_to_us["other_column"]), + our_column=escape_sqlite(fk_to_us["column"]), + other_column=escape_sqlite(other_column), + param=param, + ) + ) + params[param] = value + human_descriptions.append(f'{through_table}.{other_column} = "{value}"') + + return FilterArguments(where_clauses, params, human_descriptions, extra_context) + + return inner + + +class FilterArguments: + def __init__( + self, where_clauses, params=None, human_descriptions=None, extra_context=None + ): + self.where_clauses = where_clauses + self.params = params or {} + self.human_descriptions = human_descriptions or [] + self.extra_context = extra_context or {} class Filter: @@ -43,7 +209,7 @@ class TemplatedFilter(Filter): kwargs = {"c": column} converted = None else: - kwargs = {"c": column, "p": "p{}".format(param_counter), "t": table} + kwargs = {"c": column, "p": f"p{param_counter}", "t": table} return self.sql_template.format(**kwargs), converted def human_clause(self, column, value): @@ -69,12 +235,12 @@ class InFilter(Filter): def where_clause(self, table, column, value, param_counter): values = self.split_value(value) - params = [":p{}".format(param_counter + i) for i in range(len(values))] - sql = "{} in ({})".format(escape_sqlite(column), ", ".join(params)) + params = [f":p{param_counter + i}" for i in range(len(values))] + sql = f"{escape_sqlite(column)} in ({', '.join(params)})" return sql, values def human_clause(self, column, value): - return "{} in {}".format(column, json.dumps(self.split_value(value))) + return f"{column} in {json.dumps(self.split_value(value))}" class NotInFilter(InFilter): @@ -83,12 +249,12 @@ class NotInFilter(InFilter): def where_clause(self, table, column, value, param_counter): values = self.split_value(value) - params = [":p{}".format(param_counter + i) for i in range(len(values))] - sql = "{} not in ({})".format(escape_sqlite(column), ", ".join(params)) + params = [f":p{param_counter + i}" for i in range(len(values))] + sql = f"{escape_sqlite(column)} not in ({', '.join(params)})" return sql, values def human_clause(self, column, value): - return "{} not in {}".format(column, json.dumps(self.split_value(value))) + return f"{column} not in {json.dumps(self.split_value(value))}" class Filters: @@ -114,6 +280,13 @@ class Filters: '{c} contains "{v}"', format="%{}%", ), + TemplatedFilter( + "notcontains", + "does not contain", + '"{c}" not like :{p}', + '{c} does not contain "{v}"', + format="%{}%", + ), TemplatedFilter( "endswith", "ends with", @@ -149,12 +322,15 @@ class Filters: TemplatedFilter( "arraycontains", "array contains", - """rowid in ( - select {t}.rowid from {t}, json_each({t}.{c}) j - where j.value = :{p} - )""", + """:{p} in (select value from json_each([{t}].[{c}]))""", '{c} contains "{v}"', - ) + ), + TemplatedFilter( + "arraynotcontains", + "array does not contain", + """:{p} not in (select value from json_each([{t}].[{c}]))""", + '{c} does not contain "{v}"', + ), ] if detect_json1() else [] @@ -191,15 +367,11 @@ class Filters: ) _filters_by_key = {f.key: f for f in _filters} - def __init__(self, pairs, units=None, ureg=None): - if units is None: - units = {} + def __init__(self, pairs): self.pairs = pairs - self.units = units - self.ureg = ureg def lookups(self): - "Yields (lookup, display, no_argument) pairs" + """Yields (lookup, display, no_argument) pairs""" for filter in self._filters: yield filter.key, filter.display, filter.no_argument @@ -221,10 +393,10 @@ class Filters: s = " and ".join(and_bits) if not s: return "" - return "where {}".format(s) + return f"where {s}" def selections(self): - "Yields (column, lookup, value) tuples" + """Yields (column, lookup, value) tuples""" for key, value in self.pairs: if "__" in key: column, lookup = key.rsplit("__", 1) @@ -236,20 +408,6 @@ class Filters: def has_selections(self): return bool(self.pairs) - def convert_unit(self, column, value): - "If the user has provided a unit in the query, convert it into the column unit, if present." - if column not in self.units: - return value - - # Try to interpret the value as a unit - value = self.ureg(value) - if isinstance(value, numbers.Number): - # It's just a bare number, assume it's the column unit - return value - - column_unit = self.ureg(self.units[column]) - return value.to(column_unit).magnitude - def build_where_clauses(self, table): sql_bits = [] params = {} @@ -257,15 +415,13 @@ class Filters: for column, lookup, value in self.selections(): filter = self._filters_by_key.get(lookup, None) if filter: - sql_bit, param = filter.where_clause( - table, column, self.convert_unit(column, value), i - ) + sql_bit, param = filter.where_clause(table, column, value, i) sql_bits.append(sql_bit) if param is not None: if not isinstance(param, list): param = [param] for individual_param in param: - param_id = "p{}".format(i) + param_id = f"p{i}" params[param_id] = individual_param i += 1 return sql_bits, params diff --git a/datasette/forbidden.py b/datasette/forbidden.py new file mode 100644 index 00000000..41c48396 --- /dev/null +++ b/datasette/forbidden.py @@ -0,0 +1,19 @@ +from datasette import hookimpl, Response + + +@hookimpl(trylast=True) +def forbidden(datasette, request, message): + async def inner(): + return Response.html( + await datasette.render_template( + "error.html", + { + "title": "Forbidden", + "error": message, + }, + request=request, + ), + status=403, + ) + + return inner diff --git a/datasette/handle_exception.py b/datasette/handle_exception.py new file mode 100644 index 00000000..96398a4c --- /dev/null +++ b/datasette/handle_exception.py @@ -0,0 +1,77 @@ +from datasette import hookimpl, Response +from .utils import add_cors_headers +from .utils.asgi import ( + Base400, +) +from .views.base import DatasetteError +from markupsafe import Markup +import traceback + +try: + import ipdb as pdb +except ImportError: + import pdb + +try: + import rich +except ImportError: + rich = None + + +@hookimpl(trylast=True) +def handle_exception(datasette, request, exception): + async def inner(): + if datasette.pdb: + pdb.post_mortem(exception.__traceback__) + + if rich is not None: + rich.get_console().print_exception(show_locals=True) + + title = None + if isinstance(exception, Base400): + status = exception.status + info = {} + message = exception.args[0] + elif isinstance(exception, DatasetteError): + status = exception.status + info = exception.error_dict + message = exception.message + if exception.message_is_html: + message = Markup(message) + title = exception.title + else: + status = 500 + info = {} + message = str(exception) + traceback.print_exc() + templates = [f"{status}.html", "error.html"] + info.update( + { + "ok": False, + "error": message, + "status": status, + "title": title, + } + ) + headers = {} + if datasette.cors: + add_cors_headers(headers) + if request.path.split("?")[0].endswith(".json"): + return Response.json(info, status=status, headers=headers) + else: + environment = datasette.get_jinja_environment(request) + template = environment.select_template(templates) + return Response.html( + await template.render_async( + dict( + info, + urls=datasette.urls, + app_css_hash=datasette.app_css_hash(), + menu_links=lambda: [], + ) + ), + status=status, + headers=headers, + ) + + return inner diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 78070e67..3f6a1425 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -7,103 +7,216 @@ hookimpl = HookimplMarker("datasette") @hookspec def startup(datasette): - "Fires directly after Datasette first starts running" + """Fires directly after Datasette first starts running""" @hookspec def asgi_wrapper(datasette): - "Returns an ASGI middleware callable to wrap our ASGI application with" + """Returns an ASGI middleware callable to wrap our ASGI application with""" @hookspec def prepare_connection(conn, database, datasette): - "Modify SQLite connection in some way e.g. register custom SQL functions" + """Modify SQLite connection in some way e.g. register custom SQL functions""" @hookspec -def prepare_jinja2_environment(env): - "Modify Jinja2 template environment e.g. register custom template tags" +def prepare_jinja2_environment(env, datasette): + """Modify Jinja2 template environment e.g. register custom template tags""" @hookspec def extra_css_urls(template, database, table, columns, view_name, request, datasette): - "Extra CSS URLs added by this plugin" + """Extra CSS URLs added by this plugin""" @hookspec def extra_js_urls(template, database, table, columns, view_name, request, datasette): - "Extra JavaScript URLs added by this plugin" + """Extra JavaScript URLs added by this plugin""" @hookspec def extra_body_script( template, database, table, columns, view_name, request, datasette ): - "Extra JavaScript code to be included in diff --git a/datasette/templates/_codemirror.html b/datasette/templates/_codemirror.html index b31235d2..c4629aeb 100644 --- a/datasette/templates/_codemirror.html +++ b/datasette/templates/_codemirror.html @@ -1,8 +1,16 @@ - - - + diff --git a/datasette/templates/_codemirror_foot.html b/datasette/templates/_codemirror_foot.html index 4019d448..a624c8a4 100644 --- a/datasette/templates/_codemirror_foot.html +++ b/datasette/templates/_codemirror_foot.html @@ -1,37 +1,42 @@ diff --git a/datasette/templates/_crumbs.html b/datasette/templates/_crumbs.html new file mode 100644 index 00000000..bd1ff0da --- /dev/null +++ b/datasette/templates/_crumbs.html @@ -0,0 +1,15 @@ +{% macro nav(request, database=None, table=None) -%} +{% if crumb_items is defined %} + {% set items=crumb_items(request=request, database=database, table=table) %} + {% if items %} +

+ {% for item in items %} + {{ item.label }} + {% if not loop.last %} + / + {% endif %} + {% endfor %} +

+ {% endif %} +{% endif %} +{%- endmacro %} diff --git a/datasette/templates/_debug_common_functions.html b/datasette/templates/_debug_common_functions.html new file mode 100644 index 00000000..d988a2f3 --- /dev/null +++ b/datasette/templates/_debug_common_functions.html @@ -0,0 +1,50 @@ + diff --git a/datasette/templates/_description_source_license.html b/datasette/templates/_description_source_license.html index a2bc18f2..f852268f 100644 --- a/datasette/templates/_description_source_license.html +++ b/datasette/templates/_description_source_license.html @@ -1,6 +1,6 @@ -{% if metadata.description_html or metadata.description %} +{% if metadata.get("description_html") or metadata.get("description") %}
{% for column in display_columns %} - diff --git a/datasette/templates/allow_debug.html b/datasette/templates/allow_debug.html index 0f1b30f0..1ecc92df 100644 --- a/datasette/templates/allow_debug.html +++ b/datasette/templates/allow_debug.html @@ -33,9 +33,12 @@ p.message-warning {

Debug allow rules

+{% set current_tab = "allow_debug" %} +{% include "_permissions_debug_tabs.html" %} +

Use this tool to try out different actor and allow combinations. See Defining permissions with "allow" blocks for documentation.

- +

diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html new file mode 100644 index 00000000..dc393c20 --- /dev/null +++ b/datasette/templates/api_explorer.html @@ -0,0 +1,208 @@ +{% extends "base.html" %} + +{% block title %}API Explorer{% endblock %} + +{% block extra_head %} + +{% endblock %} + +{% block content %} + +

API Explorer{% if private %} 🔒{% endif %}

+ +

Use this tool to try out the + {% if datasette_version %} + Datasette API. + {% else %} + Datasette API. + {% endif %} +

+
+ GET + +
+ + + +
+ +
+
+ POST +
+
+ + +
+
+ + +
+

+ +
+ + + + + +{% if example_links %} +

API endpoints

+
    + {% for database in example_links %} +
  • Database: {{ database.name }}
  • +
      + {% for link in database.links %} +
    • {{ link.path }} - {{ link.label }}
    • + {% endfor %} + {% for table in database.tables %} +
    • {{ table.name }} +
        + {% for link in table.links %} +
      • {{ link.path }} - {{ link.label }}
      • + {% endfor %} +
      +
    • + {% endfor %} +
    + {% endfor %} +
+{% endif %} + +{% endblock %} diff --git a/datasette/templates/base.html b/datasette/templates/base.html index d860df37..0d89e11c 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -1,21 +1,27 @@ - - +{% import "_crumbs.html" as crumbs with context %} + {% block title %}{% endblock %} {% for url in extra_css_urls %} - + {% endfor %} + + {% for url in extra_js_urls %} - + {% endfor %} -{% block extra_head %}{% endblock %} +{%- if alternate_url_json -%} + +{%- endif -%} +{%- block extra_head %}{% endblock -%} -
+ {% if not column.sortable %} {{ column.name }} {% else %} {% if column.name == sort %} - {{ column.name }} ▼ + {{ column.name }} ▼ {% else %} - {{ column.name }}{% if column.name == sort_desc %} ▲{% endif %} + {{ column.name }}{% if column.name == sort_desc %} ▲{% endif %} {% endif %} {% endif %}
+ + + + + + + + + + + + + {% for action in data %} + + + + + + + + + + {% endfor %} + +
NameAbbrDescriptionResourceTakes ParentTakes ChildAlso Requires
{{ action.name }}{% if action.abbr %}{{ action.abbr }}{% endif %}{{ action.description or "" }}{% if action.resource_class %}{{ action.resource_class }}{% endif %}{% if action.takes_parent %}✓{% endif %}{% if action.takes_child %}✓{% endif %}{% if action.also_requires %}{{ action.also_requires }}{% endif %}
+ +{% endblock %} diff --git a/datasette/templates/debug_allowed.html b/datasette/templates/debug_allowed.html new file mode 100644 index 00000000..add3154a --- /dev/null +++ b/datasette/templates/debug_allowed.html @@ -0,0 +1,229 @@ +{% extends "base.html" %} + +{% block title %}Allowed Resources{% endblock %} + +{% block extra_head %} + +{% include "_permission_ui_styles.html" %} +{% include "_debug_common_functions.html" %} +{% endblock %} + +{% block content %} +

Allowed resources

+ +{% set current_tab = "allowed" %} +{% include "_permissions_debug_tabs.html" %} + +

Use this tool to check which resources the current actor is allowed to access for a given permission action. It queries the /-/allowed.json API endpoint.

+ +{% if request.actor %} +

Current actor: {{ request.actor.get("id", "anonymous") }}

+{% else %} +

Current actor: anonymous (not logged in)

+{% endif %} + +
+
+
+ + + Only certain actions are supported by this endpoint +
+ +
+ + + Filter results to a specific parent resource +
+ +
+ + + Filter results to a specific child resource (requires parent to be set) +
+ +
+ + + Number of results per page (max 200) +
+ +
+ +
+
+
+ + + + + +{% endblock %} diff --git a/datasette/templates/debug_check.html b/datasette/templates/debug_check.html new file mode 100644 index 00000000..c2e7997f --- /dev/null +++ b/datasette/templates/debug_check.html @@ -0,0 +1,270 @@ +{% extends "base.html" %} + +{% block title %}Permission Check{% endblock %} + +{% block extra_head %} + +{% include "_permission_ui_styles.html" %} +{% include "_debug_common_functions.html" %} + +{% endblock %} + +{% block content %} +

Permission check

+ +{% set current_tab = "check" %} +{% include "_permissions_debug_tabs.html" %} + +

Use this tool to test permission checks for the current actor. It queries the /-/check.json API endpoint.

+ +{% if request.actor %} +

Current actor: {{ request.actor.get("id", "anonymous") }}

+{% else %} +

Current actor: anonymous (not logged in)

+{% endif %} + +
+
+
+ + + The permission action to check +
+ +
+ + + For database-level permissions, specify the database name +
+ +
+ + + For table-level permissions, specify the table name (requires parent) +
+ +
+ +
+
+
+ + + + + +{% endblock %} diff --git a/datasette/templates/debug_permissions_playground.html b/datasette/templates/debug_permissions_playground.html new file mode 100644 index 00000000..91ce1fcf --- /dev/null +++ b/datasette/templates/debug_permissions_playground.html @@ -0,0 +1,166 @@ +{% extends "base.html" %} + +{% block title %}Debug permissions{% endblock %} + +{% block extra_head %} +{% include "_permission_ui_styles.html" %} + +{% endblock %} + +{% block content %} +

Permission playground

+ +{% set current_tab = "permissions" %} +{% include "_permissions_debug_tabs.html" %} + +

This tool lets you simulate an actor and a permission check for that actor.

+ +
+
+ +
+
+ + +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+ +
+

+    
+
+ + + +

Recent permissions checks

+ +

+ {% if filter != "all" %}All{% else %}All{% endif %}, + {% if filter != "exclude-yours" %}Exclude yours{% else %}Exclude yours{% endif %}, + {% if filter != "only-yours" %}Only yours{% else %}Only yours{% endif %} +

+ +{% if permission_checks %} + + + + + + + + + + + + + {% for check in permission_checks %} + + + + + + + + + {% endfor %} + +
WhenActionParentChildActorResult
{{ check.when.split('T', 1)[0] }}
{{ check.when.split('T', 1)[1].split('+', 1)[0].split('-', 1)[0].split('Z', 1)[0] }}
{{ check.action }}{{ check.parent or '—' }}{{ check.child or '—' }}{% if check.actor %}{{ check.actor|tojson }}{% else %}anonymous{% endif %}{% if check.result %}Allowed{% elif check.result is none %}No opinion{% else %}Denied{% endif %}
+{% else %} +

No permission checks have been recorded yet.

+{% endif %} + +{% endblock %} diff --git a/datasette/templates/debug_rules.html b/datasette/templates/debug_rules.html new file mode 100644 index 00000000..9a290803 --- /dev/null +++ b/datasette/templates/debug_rules.html @@ -0,0 +1,203 @@ +{% extends "base.html" %} + +{% block title %}Permission Rules{% endblock %} + +{% block extra_head %} + +{% include "_permission_ui_styles.html" %} +{% include "_debug_common_functions.html" %} +{% endblock %} + +{% block content %} +

Permission rules

+ +{% set current_tab = "rules" %} +{% include "_permissions_debug_tabs.html" %} + +

Use this tool to view the permission rules that allow the current actor to access resources for a given permission action. It queries the /-/rules.json API endpoint.

+ +{% if request.actor %} +

Current actor: {{ request.actor.get("id", "anonymous") }}

+{% else %} +

Current actor: anonymous (not logged in)

+{% endif %} + +
+
+
+ + + The permission action to check +
+ +
+ + + Number of results per page (max 200) +
+ +
+ +
+
+
+ + + + + +{% endblock %} diff --git a/datasette/templates/error.html b/datasette/templates/error.html index 5c651d4e..3451d886 100644 --- a/datasette/templates/error.html +++ b/datasette/templates/error.html @@ -2,13 +2,6 @@ {% block title %}{% if title %}{{ title }}{% else %}Error {{ status }}{% endif %}{% endblock %} -{% block nav %} -

- home -

- {{ super() }} -{% endblock %} - {% block content %}

{% if title %}{{ title }}{% else %}Error {{ status }}{% endif %}

diff --git a/datasette/templates/index.html b/datasette/templates/index.html index 06e09635..03349279 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -2,17 +2,26 @@ {% block title %}{{ metadata.title or "Datasette" }}: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %} +{% block extra_head %} +{% if noindex %}{% endif %} +{% endblock %} + {% block body_class %}index{% endblock %} {% block content %}

{{ metadata.title or "Datasette" }}{% if private %} 🔒{% endif %}

+{% set action_links, action_title = homepage_actions, "Homepage actions" %} +{% include "_action_menu.html" %} + +{{ top_homepage() }} + {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} {% for database in databases %}

{{ database.name }}{% if database.private %} 🔒{% endif %}

- {% if database.show_table_row_counts %}{{ "{:,}".format(database.table_rows_sum) }} rows in {% endif %}{{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.tables_count and database.hidden_tables_count %}, {% endif -%} + {% if database.show_table_row_counts %}{{ "{:,}".format(database.table_rows_sum) }} rows in {% endif %}{{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.hidden_tables_count %}, {% endif -%} {% if database.hidden_tables_count -%} {% if database.show_table_row_counts %}{{ "{:,}".format(database.hidden_table_rows_sum) }} rows in {% endif %}{{ database.hidden_tables_count }} hidden table{% if database.hidden_tables_count != 1 %}s{% endif -%} {% endif -%} diff --git a/datasette/templates/logout.html b/datasette/templates/logout.html index 98738679..c8fc642a 100644 --- a/datasette/templates/logout.html +++ b/datasette/templates/logout.html @@ -2,20 +2,13 @@ {% block title %}Log out{% endblock %} -{% block nav %} -

- home -

- {{ super() }} -{% endblock %} - {% block content %}

Log out

You are logged in as {{ display_actor(actor) }}

-
+
diff --git a/datasette/templates/messages_debug.html b/datasette/templates/messages_debug.html index e0ab9a40..2940cd69 100644 --- a/datasette/templates/messages_debug.html +++ b/datasette/templates/messages_debug.html @@ -8,7 +8,7 @@

Set a message:

- +
diff --git a/datasette/templates/patterns.html b/datasette/templates/patterns.html index ac9e2e46..7770f7d4 100644 --- a/datasette/templates/patterns.html +++ b/datasette/templates/patterns.html @@ -1,5 +1,5 @@ - + Datasette: Pattern Portfolio @@ -9,19 +9,33 @@ +
-
-
- -
- - -
- -
- - -

Pattern Portfolio

@@ -31,18 +45,15 @@

Header for /database/table/row and Messages

-
+
@@ -51,15 +62,6 @@

Example message

Example message

- - - - - - - - -

.bd for /

Datasette Fixtures

@@ -68,10 +70,10 @@

Data license: - Apache License 2.0 + Apache License 2.0 · Data source: - + tests/fixtures.py · About: @@ -90,24 +92,42 @@

names, foo

- - - - - -

.bd for /database

-

fixtures

+ +
+ +
+

Data license: - Apache License 2.0 + Apache License 2.0 · Data source: - + tests/fixtures.py · About: @@ -139,20 +159,40 @@

- - - - -

.bd for /database/table

+
-

roadside_attraction_characteristics

+ +
+ +
+

Data license: - Apache License 2.0 + Apache License 2.0 · Data source: - + tests/fixtures.py · About: @@ -246,7 +286,6 @@

-

2 extra where clauses

    @@ -258,7 +297,6 @@
-

View and edit SQL

@@ -267,11 +305,6 @@ Suggested facets: tags, created (date), tags (array)

- - - - -
@@ -409,16 +442,6 @@ ); - - - - - - - - - -

.bd for /database/table/row

roadside_attractions: 2

@@ -463,29 +486,22 @@
- - - - - - - - - -

.ft

-