diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..6ca0fac8 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +omit = datasette/_version.py, datasette/utils/shutil_backport.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..5078bf47 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +.DS_Store +.cache +.eggs +.gitignore +.ipynb_checkpoints +build +*.spec +*.egg-info +dist +scratchpad +venv +*.db +*.sqlite diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..84e574fd --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# Applying Black +35d6ee2790e41e96f243c1ff58be0c9c0519a8ce +368638555160fb9ac78f462d0f79b1394163fa30 +2b344f6a34d2adaa305996a1a580ece06397f6e4 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..744258eb --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +datasette/static/codemirror-* linguist-vendored diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..f0bcdbe0 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [simonw] diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..88bb03b1 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: +- package-ecosystem: pip + directory: "/" + schedule: + interval: daily + time: "13:00" + groups: + python-packages: + patterns: + - "*" diff --git a/.github/workflows/deploy-branch-preview.yml b/.github/workflows/deploy-branch-preview.yml new file mode 100644 index 00000000..e56d9c27 --- /dev/null +++ b/.github/workflows/deploy-branch-preview.yml @@ -0,0 +1,35 @@ +name: Deploy a Datasette branch preview to Vercel + +on: + workflow_dispatch: + inputs: + branch: + description: "Branch to deploy" + required: true + type: string + +jobs: + deploy-branch-preview: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v6 + with: + python-version: "3.11" + - name: Install dependencies + run: | + pip install datasette-publish-vercel + - name: Deploy the preview + env: + VERCEL_TOKEN: ${{ secrets.BRANCH_PREVIEW_VERCEL_TOKEN }} + run: | + export BRANCH="${{ github.event.inputs.branch }}" + wget https://latest.datasette.io/fixtures.db + datasette publish vercel fixtures.db \ + --branch $BRANCH \ + --project "datasette-preview-$BRANCH" \ + --token $VERCEL_TOKEN \ + --scope datasette \ + --about "Preview of $BRANCH" \ + --about_url "https://github.com/simonw/datasette/tree/$BRANCH" diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml new file mode 100644 index 00000000..9f53b01e --- /dev/null +++ b/.github/workflows/deploy-latest.yml @@ -0,0 +1,132 @@ +name: Deploy latest.datasette.io + +on: + workflow_dispatch: + push: + branches: + - main + # - 1.0-dev + +permissions: + contents: read + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Check out datasette + uses: actions/checkout@v5 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.13" + cache: pip + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -e .[test] + python -m pip install -e .[docs] + python -m pip install sphinx-to-sqlite==0.1a1 + - name: Run tests + if: ${{ github.ref == 'refs/heads/main' }} + run: | + pytest -n auto -m "not serial" + pytest -m "serial" + - name: Build fixtures.db and other files needed to deploy the demo + run: |- + python tests/fixtures.py \ + fixtures.db \ + fixtures-config.json \ + fixtures-metadata.json \ + plugins \ + --extra-db-filename extra_database.db + - name: Build docs.db + if: ${{ github.ref == 'refs/heads/main' }} + run: |- + cd docs + DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build + sphinx-to-sqlite ../docs.db _build + cd .. + - name: Set up the alternate-route demo + run: | + echo ' + from datasette import hookimpl + + @hookimpl + def startup(datasette): + db = datasette.get_database("fixtures2") + db.route = "alternative-route" + ' > plugins/alternative_route.py + cp fixtures.db fixtures2.db + - name: And the counters writable canned query demo + run: | + cat > plugins/counters.py < metadata.json + # cat metadata.json + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v3 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 + - name: Deploy to Cloud Run + env: + LATEST_DATASETTE_SECRET: ${{ secrets.LATEST_DATASETTE_SECRET }} + run: |- + gcloud config set run/region us-central1 + gcloud config set project datasette-222320 + export SUFFIX="-${GITHUB_REF#refs/heads/}" + export SUFFIX=${SUFFIX#-main} + # Replace 1.0 with one-dot-zero in SUFFIX + export SUFFIX=${SUFFIX//1.0/one-dot-zero} + datasette publish cloudrun fixtures.db fixtures2.db extra_database.db \ + -m fixtures-metadata.json \ + --plugins-dir=plugins \ + --branch=$GITHUB_SHA \ + --version-note=$GITHUB_SHA \ + --extra-options="--setting template_debug 1 --setting trace_debug 1 --crossdb" \ + --install 'datasette-ephemeral-tables>=0.2.2' \ + --service "datasette-latest$SUFFIX" \ + --secret $LATEST_DATASETTE_SECRET + - name: Deploy to docs as well (only for main) + if: ${{ github.ref == 'refs/heads/main' }} + run: |- + # Deploy docs.db to a different service + datasette publish cloudrun docs.db \ + --branch=$GITHUB_SHA \ + --version-note=$GITHUB_SHA \ + --extra-options="--setting template_debug 1" \ + --service=datasette-docs-latest diff --git a/.github/workflows/documentation-links.yml b/.github/workflows/documentation-links.yml new file mode 100644 index 00000000..a54bd83a --- /dev/null +++ b/.github/workflows/documentation-links.yml @@ -0,0 +1,16 @@ +name: Read the Docs Pull Request Preview +on: + pull_request_target: + types: + - opened + +permissions: + pull-requests: write + +jobs: + documentation-links: + runs-on: ubuntu-latest + steps: + - uses: readthedocs/actions/preview@v1 + with: + project-slug: "datasette" diff --git a/.github/workflows/prettier.yml b/.github/workflows/prettier.yml new file mode 100644 index 00000000..77cce7d1 --- /dev/null +++ b/.github/workflows/prettier.yml @@ -0,0 +1,25 @@ +name: Check JavaScript for conformance with Prettier + +on: [push] + +permissions: + contents: read + +jobs: + prettier: + runs-on: ubuntu-latest + steps: + - name: Check out repo + uses: actions/checkout@v4 + - uses: actions/cache@v4 + name: Configure npm caching + with: + path: ~/.npm + key: ${{ runner.OS }}-npm-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.OS }}-npm- + - name: Install dependencies + run: npm ci + - name: Run prettier + run: |- + npm run prettier -- --check diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000..e94d0bdd --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,109 @@ +name: Publish Python Package + +on: + release: + types: [created] + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: pyproject.toml + - name: Install dependencies + run: | + pip install -e '.[test]' + - name: Run tests + run: | + pytest + + deploy: + runs-on: ubuntu-latest + needs: [test] + environment: release + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.13' + cache: pip + cache-dependency-path: pyproject.toml + - name: Install dependencies + run: | + pip install setuptools wheel build + - name: Build + run: | + python -m build + - name: Publish + uses: pypa/gh-action-pypi-publish@release/v1 + + deploy_static_docs: + runs-on: ubuntu-latest + needs: [deploy] + if: "!github.event.release.prerelease" + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.10' + cache: pip + cache-dependency-path: pyproject.toml + - name: Install dependencies + run: | + python -m pip install -e .[docs] + python -m pip install sphinx-to-sqlite==0.1a1 + - name: Build docs.db + run: |- + cd docs + DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build + sphinx-to-sqlite ../docs.db _build + cd .. + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 + - name: Deploy stable-docs.datasette.io to Cloud Run + run: |- + gcloud config set run/region us-central1 + gcloud config set project datasette-222320 + datasette publish cloudrun docs.db \ + --service=datasette-docs-stable + + deploy_docker: + runs-on: ubuntu-latest + needs: [deploy] + if: "!github.event.release.prerelease" + steps: + - uses: actions/checkout@v4 + - name: Build and push to Docker Hub + env: + DOCKER_USER: ${{ secrets.DOCKER_USER }} + DOCKER_PASS: ${{ secrets.DOCKER_PASS }} + run: |- + sleep 60 # Give PyPI time to make the new release available + docker login -u $DOCKER_USER -p $DOCKER_PASS + export REPO=datasetteproject/datasette + docker build -f Dockerfile \ + -t $REPO:${GITHUB_REF#refs/tags/} \ + --build-arg VERSION=${GITHUB_REF#refs/tags/} . + docker tag $REPO:${GITHUB_REF#refs/tags/} $REPO:latest + docker push $REPO:${GITHUB_REF#refs/tags/} + docker push $REPO:latest diff --git a/.github/workflows/push_docker_tag.yml b/.github/workflows/push_docker_tag.yml new file mode 100644 index 00000000..afe8d6b2 --- /dev/null +++ b/.github/workflows/push_docker_tag.yml @@ -0,0 +1,28 @@ +name: Push specific Docker tag + +on: + workflow_dispatch: + inputs: + version_tag: + description: Tag to build and push + +permissions: + contents: read + +jobs: + deploy_docker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build and push to Docker Hub + env: + DOCKER_USER: ${{ secrets.DOCKER_USER }} + DOCKER_PASS: ${{ secrets.DOCKER_PASS }} + VERSION_TAG: ${{ github.event.inputs.version_tag }} + run: |- + docker login -u $DOCKER_USER -p $DOCKER_PASS + export REPO=datasetteproject/datasette + docker build -f Dockerfile \ + -t $REPO:${VERSION_TAG} \ + --build-arg VERSION=${VERSION_TAG} . + docker push $REPO:${VERSION_TAG} diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml new file mode 100644 index 00000000..7c5370ce --- /dev/null +++ b/.github/workflows/spellcheck.yml @@ -0,0 +1,27 @@ +name: Check spelling in documentation + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + spellcheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + - name: Install dependencies + run: | + pip install -e '.[docs]' + - name: Check spelling + run: | + codespell README.md --ignore-words docs/codespell-ignore-words.txt + codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt + codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt + codespell tests --ignore-words docs/codespell-ignore-words.txt diff --git a/.github/workflows/stable-docs.yml b/.github/workflows/stable-docs.yml new file mode 100644 index 00000000..3119d617 --- /dev/null +++ b/.github/workflows/stable-docs.yml @@ -0,0 +1,76 @@ +name: Update Stable Docs + +on: + release: + types: [published] + push: + branches: + - main + +permissions: + contents: write + +jobs: + update_stable_docs: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 0 # We need all commits to find docs/ changes + - name: Set up Git user + run: | + git config user.name "Automated" + git config user.email "actions@users.noreply.github.com" + - name: Create stable branch if it does not yet exist + run: | + if ! git ls-remote --heads origin stable | grep -qE '\bstable\b'; then + # Make sure we have all tags locally + git fetch --tags --quiet + + # Latest tag that is just numbers and dots (optionally prefixed with 'v') + # e.g., 0.65.2 or v0.65.2 — excludes 1.0a20, 1.0-rc1, etc. + LATEST_RELEASE=$( + git tag -l --sort=-v:refname \ + | grep -E '^v?[0-9]+(\.[0-9]+){1,3}$' \ + | head -n1 + ) + + git checkout -b stable + + # If there are any stable releases, copy docs/ from the most recent + if [ -n "$LATEST_RELEASE" ]; then + rm -rf docs/ + git checkout "$LATEST_RELEASE" -- docs/ || true + fi + + git commit -m "Populate docs/ from $LATEST_RELEASE" || echo "No changes" + git push -u origin stable + fi + - name: Handle Release + if: github.event_name == 'release' && !github.event.release.prerelease + run: | + git fetch --all + git checkout stable + git reset --hard ${GITHUB_REF#refs/tags/} + git push origin stable --force + - name: Handle Commit to Main + if: contains(github.event.head_commit.message, '!stable-docs') + run: | + git fetch origin + git checkout -b stable origin/stable + # Get the list of modified files in docs/ from the current commit + FILES=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} -- docs/) + # Check if the list of files is non-empty + if [[ -n "$FILES" ]]; then + # Checkout those files to the stable branch to over-write with their contents + for FILE in $FILES; do + git checkout ${{ github.sha }} -- $FILE + done + git add docs/ + git commit -m "Doc changes from ${{ github.sha }}" + git push origin stable + else + echo "No changes to docs/ in this commit." + exit 0 + fi diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml new file mode 100644 index 00000000..8d73b64d --- /dev/null +++ b/.github/workflows/test-coverage.yml @@ -0,0 +1,40 @@ +name: Calculate test coverage + +on: + push: + branches: + - main + pull_request: + branches: + - main +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Check out datasette + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -e .[test] + python -m pip install pytest-cov + - name: Run tests + run: |- + ls -lah + cat .coveragerc + pytest -m "not serial" --cov=datasette --cov-config=.coveragerc --cov-report xml:coverage.xml --cov-report term -x + ls -lah + - name: Upload coverage report + uses: codecov/codecov-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: coverage.xml diff --git a/.github/workflows/test-pyodide.yml b/.github/workflows/test-pyodide.yml new file mode 100644 index 00000000..b490a9bf --- /dev/null +++ b/.github/workflows/test-pyodide.yml @@ -0,0 +1,33 @@ +name: Test in Pyodide with shot-scraper + +on: + push: + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v6 + with: + python-version: "3.10" + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + - name: Cache Playwright browsers + uses: actions/cache@v4 + with: + path: ~/.cache/ms-playwright/ + key: ${{ runner.os }}-browsers + - name: Install Playwright dependencies + run: | + pip install shot-scraper build + shot-scraper install + - name: Run test + run: | + ./test-in-pyodide-with-shot-scraper.sh diff --git a/.github/workflows/test-sqlite-support.yml b/.github/workflows/test-sqlite-support.yml new file mode 100644 index 00000000..76ea138a --- /dev/null +++ b/.github/workflows/test-sqlite-support.yml @@ -0,0 +1,53 @@ +name: Test SQLite versions + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + test: + runs-on: ${{ matrix.platform }} + continue-on-error: true + strategy: + matrix: + platform: [ubuntu-latest] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + sqlite-version: [ + #"3", # latest version + "3.46", + #"3.45", + #"3.27", + #"3.26", + "3.25", + #"3.25.3", # 2018-09-25, window functions breaks test_upsert for some reason on 3.10, skip for now + #"3.24", # 2018-06-04, added UPSERT support + #"3.23.1" # 2018-04-10, before UPSERT + ] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + cache: pip + cache-dependency-path: pyproject.toml + - name: Set up SQLite ${{ matrix.sqlite-version }} + uses: asg017/sqlite-versions@71ea0de37ae739c33e447af91ba71dda8fcf22e6 + with: + version: ${{ matrix.sqlite-version }} + cflags: "-DSQLITE_ENABLE_DESERIALIZE -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_FTS4 -DSQLITE_ENABLE_FTS3_PARENTHESIS -DSQLITE_ENABLE_RTREE -DSQLITE_ENABLE_JSON1" + - run: python3 -c "import sqlite3; print(sqlite3.sqlite_version)" + - run: echo $LD_LIBRARY_PATH + - name: Build extension for --load-extension test + run: |- + (cd tests && gcc ext.c -fPIC -shared -o ext.so) + - name: Install dependencies + run: | + pip install -e '.[test]' + pip freeze + - name: Run tests + run: | + pytest -n auto -m "not serial" + pytest -m "serial" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..1e5e03d2 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,51 @@ +name: Test + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + cache: pip + cache-dependency-path: pyproject.toml + - name: Build extension for --load-extension test + run: |- + (cd tests && gcc ext.c -fPIC -shared -o ext.so) + - name: Install dependencies + run: | + pip install -e '.[test]' + pip freeze + - name: Run tests + run: | + pytest -n auto -m "not serial" + pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh + - name: Install docs dependencies + run: | + pip install -e '.[docs]' + - name: Black + run: black --check . + - name: Check if cog needs to be run + run: | + cog --check docs/*.rst + - name: Check if blacken-docs needs to be run + run: | + # This fails on syntax errors, or a diff was applied + blacken-docs -l 60 docs/*.rst + - name: Test DATASETTE_LOAD_PLUGINS + run: | + pip install datasette-init datasette-json-html + tests/test-datasette-load-plugins.sh diff --git a/.github/workflows/tmate-mac.yml b/.github/workflows/tmate-mac.yml new file mode 100644 index 00000000..fcee0f21 --- /dev/null +++ b/.github/workflows/tmate-mac.yml @@ -0,0 +1,15 @@ +name: tmate session mac + +on: + workflow_dispatch: + +permissions: + contents: read + +jobs: + build: + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 diff --git a/.github/workflows/tmate.yml b/.github/workflows/tmate.yml new file mode 100644 index 00000000..123f6c71 --- /dev/null +++ b/.github/workflows/tmate.yml @@ -0,0 +1,18 @@ +name: tmate session + +on: + workflow_dispatch: + +permissions: + contents: read + models: read + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index e664d927..70e6bbeb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,19 @@ build-metadata.json datasets.json -# SQLite databases -*.db -*.sqlite +scratchpad + +.vscode + +uv.lock +data.db + +# We don't use Pipfile, so ignore them +Pipfile +Pipfile.lock + +fixtures.db +*test.db # Byte-compiled / optimized / DLL files __pycache__/ @@ -107,3 +117,13 @@ ENV/ # mypy .mypy_cache/ +# macOS files +.DS_Store +node_modules +.*.swp + +# In case someone compiled tests/ext.c for test_load_extensions, don't +# include it in source control. +tests/*.dylib +tests/*.so +tests/*.dll diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 00000000..0cece53b --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,3 @@ +[settings] +multi_line_output=3 + diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 00000000..222861c3 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,4 @@ +{ + "tabWidth": 2, + "useTabs": false +} diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..5b30e75a --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,16 @@ +version: 2 + +build: + os: ubuntu-20.04 + tools: + python: "3.11" + +sphinx: + configuration: docs/conf.py + +python: + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e2db159c..00000000 --- a/.travis.yml +++ /dev/null @@ -1,7 +0,0 @@ -language: python - -python: - - 3.6 - -script: - - python setup.py test diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..14d4c567 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +`swillison+datasette-code-of-conduct@gmail.com`. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..9a8f06cf --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.11.0-slim-bullseye as build + +# Version of Datasette to install, e.g. 0.55 +# docker build . -t datasette --build-arg VERSION=0.55 +ARG VERSION + +RUN apt-get update && \ + apt-get install -y --no-install-recommends libsqlite3-mod-spatialite && \ + apt clean && \ + rm -rf /var/lib/apt && \ + rm -rf /var/lib/dpkg/info/* + +RUN pip install https://github.com/simonw/datasette/archive/refs/tags/${VERSION}.zip && \ + find /usr/local/lib -name '__pycache__' | xargs rm -r && \ + rm -rf /root/.cache/pip + +EXPOSE 8001 +CMD ["datasette"] diff --git a/Justfile b/Justfile new file mode 100644 index 00000000..a47662c3 --- /dev/null +++ b/Justfile @@ -0,0 +1,56 @@ +export DATASETTE_SECRET := "not_a_secret" + +# Run tests and linters +@default: test lint + +# Setup project +@init: + uv sync --extra test --extra docs + +# Run pytest with supplied options +@test *options: init + uv run pytest -n auto {{options}} + +@codespell: + uv run codespell README.md --ignore-words docs/codespell-ignore-words.txt + uv run codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt + uv run codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt + uv run codespell tests --ignore-words docs/codespell-ignore-words.txt + +# Run linters: black, flake8, mypy, cog +@lint: codespell + uv run black . --check + uv run flake8 + uv run --extra test cog --check README.md docs/*.rst + +# Rebuild docs with cog +@cog: + uv run --extra test cog -r README.md docs/*.rst + +# Serve live docs on localhost:8000 +@docs: cog blacken-docs + uv run --extra docs make -C docs livehtml + +# Build docs as static HTML +@docs-build: cog blacken-docs + rm -rf docs/_build && cd docs && uv run make html + +# Apply Black +@black: + uv run black . + +# Apply blacken-docs +@blacken-docs: + uv run blacken-docs -l 60 docs/*.rst + +# Apply prettier +@prettier: + npm run fix + +# Format code with both black and prettier +@format: black prettier blacken-docs + +@serve *options: + uv run sqlite-utils create-database data.db + uv run sqlite-utils create-table data.db docs id integer title text --pk id --ignore + uv run python -m datasette data.db --root --reload {{options}} diff --git a/MANIFEST.in b/MANIFEST.in index 696c1146..8c5e3ee6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,5 @@ recursive-include datasette/static * +recursive-include datasette/templates * +include versioneer.py +include datasette/_version.py +include LICENSE diff --git a/README.md b/README.md index 3d861718..393e8e5c 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,42 @@ -# datasette +Datasette -[![PyPI](https://img.shields.io/pypi/v/datasette.svg)](https://pypi.python.org/pypi/datasette) -*An instant JSON API for your SQLite database* +[![PyPI](https://img.shields.io/pypi/v/datasette.svg)](https://pypi.org/project/datasette/) +[![Changelog](https://img.shields.io/github/v/release/simonw/datasette?label=changelog)](https://docs.datasette.io/en/latest/changelog.html) +[![Python 3.x](https://img.shields.io/pypi/pyversions/datasette.svg?logo=python&logoColor=white)](https://pypi.org/project/datasette/) +[![Tests](https://github.com/simonw/datasette/workflows/Test/badge.svg)](https://github.com/simonw/datasette/actions?query=workflow%3ATest) +[![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](https://docs.datasette.io/en/latest/?badge=latest) +[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette/blob/main/LICENSE) +[![docker: datasette](https://img.shields.io/badge/docker-datasette-blue)](https://hub.docker.com/r/datasetteproject/datasette) +[![discord](https://img.shields.io/discord/823971286308356157?label=discord)](https://datasette.io/discord) -Datasette provides an instant, read-only JSON API for any SQLite database. It also provides tools for packaging the database up as a Docker container and deploying that container to hosting providers such as [Zeit Now](https://zeit.co/now). +*An open source multi-tool for exploring and publishing data* + +Datasette is a tool for exploring and publishing data. It helps people take data of any shape or size and publish that as an interactive, explorable website and accompanying API. + +Datasette is aimed at data journalists, museum curators, archivists, local governments, scientists, researchers and anyone else who has data that they wish to share with the world. + +[Explore a demo](https://datasette.io/global-power-plants/global-power-plants), watch [a video about the project](https://simonwillison.net/2021/Feb/7/video/) or try it out [on GitHub Codespaces](https://github.com/datasette/datasette-studio). + +* [datasette.io](https://datasette.io/) is the official project website +* Latest [Datasette News](https://datasette.io/news) +* Comprehensive documentation: https://docs.datasette.io/ +* Examples: https://datasette.io/examples +* Live demo of current `main` branch: https://latest.datasette.io/ +* Questions, feedback or want to talk about the project? Join our [Discord](https://datasette.io/discord) + +Want to stay up-to-date with the project? Subscribe to the [Datasette newsletter](https://datasette.substack.com/) for tips, tricks and news on what's new in the Datasette ecosystem. ## Installation - pip3 install datasette +If you are on a Mac, [Homebrew](https://brew.sh/) is the easiest way to install Datasette: -Datasette requires Python 3.5 or higher. + brew install datasette + +You can also install it using `pip` or `pipx`: + + pip install datasette + +Datasette requires Python 3.8 or higher. We also have [detailed installation instructions](https://docs.datasette.io/en/stable/installation.html) covering other options such as Docker. ## Basic usage @@ -21,77 +48,12 @@ This will start a web server on port 8001 - visit http://localhost:8001/ to acce Use Chrome on OS X? You can run datasette against your browser history like so: - datasette ~/Library/Application\ Support/Google/Chrome/Default/History + datasette ~/Library/Application\ Support/Google/Chrome/Default/History --nolock Now visiting http://localhost:8001/History/downloads will show you a web interface to browse your downloads data: ![Downloads table rendered by datasette](https://static.simonwillison.net/static/2017/datasette-downloads.png) -http://localhost:8001/History/downloads.json will return that data as JSON: - - { - "database": "History", - "columns": [ - "id", - "current_path", - "target_path", - "start_time", - "received_bytes", - "total_bytes", - ... - ], - "table_rows": 576, - "rows": [ - [ - 1, - "/Users/simonw/Downloads/DropboxInstaller.dmg", - "/Users/simonw/Downloads/DropboxInstaller.dmg", - 13097290269022132, - 626688, - 0, - ... - ] - ] - } - - -http://localhost:8001/History/downloads.jsono will return that data as JSON in a more convenient but less efficient format: - - { - ... - "rows": [ - { - "start_time": 13097290269022132, - "interrupt_reason": 0, - "hash": "", - "id": 1, - "site_url": "", - "referrer": "https://www.dropbox.com/downloading?src=index", - ... - } - ] - } - -## datasette serve options - - $ datasette serve --help - Usage: datasette serve [OPTIONS] [FILES]... - - Serve up specified SQLite database files with a web UI - - Options: - -h, --host TEXT host for server, defaults to 0.0.0.0 - -p, --port INTEGER port for server, defaults to 8001 - --debug Enable debug mode - useful for development - --reload Automatically reload if code change detected - - useful for development - --cors Enable CORS by serving Access-Control-Allow-Origin: - * - --inspect-file TEXT Path to JSON file created using "datasette build" - -m, --metadata FILENAME Path to JSON file containing license/source - metadata - --help Show this message and exit. - ## metadata.json If you want to include licensing and source information in the generated datasette website you can do so using a JSON file that looks something like this: @@ -104,43 +66,26 @@ If you want to include licensing and source information in the generated dataset "source_url": "https://github.com/fivethirtyeight/data" } +Save this in `metadata.json` and run Datasette like so: + + datasette serve fivethirtyeight.db -m metadata.json + The license and source information will be displayed on the index page and in the footer. They will also be included in the JSON produced by the API. ## datasette publish -If you have [Zeit Now](https://zeit.co/now) installed, datasette can deploy one or more SQLite databases to the internet with a single command: +If you have [Heroku](https://heroku.com/) or [Google Cloud Run](https://cloud.google.com/run/) configured, Datasette can deploy one or more SQLite databases to the internet with a single command: - datasette publish now database.db + datasette publish heroku database.db -This will create a docker image containing both the datasette application and the specified SQLite database files. It will then deploy that image to Zeit Now and give you a URL to access the API. +Or: - $ datasette publish --help - Usage: datasette publish [OPTIONS] PUBLISHER [FILES]... + datasette publish cloudrun database.db - Publish specified SQLite database files to the internet along with a - datasette API. +This will create a docker image containing both the datasette application and the specified SQLite database files. It will then deploy that image to Heroku or Cloud Run and give you a URL to access the resulting website and API. - Only current option for PUBLISHER is 'now'. You must have Zeit Now - installed: https://zeit.co/now +See [Publishing data](https://docs.datasette.io/en/stable/publish.html) in the documentation for more details. - Example usage: datasette publish now my-database.db +## Datasette Lite - Options: - -n, --name TEXT Application name to use when deploying to Now - -m, --metadata FILENAME Path to JSON file containing metadata to publish - --help Show this message and exit. - -## datasette package - -If you have docker installed you can use `datasette package` to create a new Docker image in your local repository containing the datasette app and selected SQLite databases: - - $ datasette package --help - Usage: datasette package [OPTIONS] FILES... - - Package specified SQLite files into a new datasette Docker container - - Options: - -t, --tag TEXT Name for the resulting Docker container, can - optionally use name:tag format - -m, --metadata FILENAME Path to JSON file containing metadata to publish - --help Show this message and exit. +[Datasette Lite](https://lite.datasette.io/) is Datasette packaged using WebAssembly so that it runs entirely in your browser, no Python web application server required. Read more about that in the [Datasette Lite documentation](https://github.com/simonw/datasette-lite/blob/main/README.md). diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..bfdc9877 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,8 @@ +coverage: + status: + project: + default: + informational: true + patch: + default: + informational: true diff --git a/datasette/.DS_Store b/datasette/.DS_Store deleted file mode 100644 index 459d1977..00000000 Binary files a/datasette/.DS_Store and /dev/null differ diff --git a/datasette/__init__.py b/datasette/__init__.py index e69de29b..47d2b4f6 100644 --- a/datasette/__init__.py +++ b/datasette/__init__.py @@ -0,0 +1,8 @@ +from datasette.permissions import Permission # noqa +from datasette.version import __version_info__, __version__ # noqa +from datasette.events import Event # noqa +from datasette.utils.asgi import Forbidden, NotFound, Request, Response # noqa +from datasette.utils import actor_matches_allow # noqa +from datasette.views import Context # noqa +from .hookspecs import hookimpl # noqa +from .hookspecs import hookspec # noqa diff --git a/datasette/__main__.py b/datasette/__main__.py new file mode 100644 index 00000000..4adef844 --- /dev/null +++ b/datasette/__main__.py @@ -0,0 +1,4 @@ +from datasette.cli import cli + +if __name__ == "__main__": + cli() diff --git a/datasette/actor_auth_cookie.py b/datasette/actor_auth_cookie.py new file mode 100644 index 00000000..368213af --- /dev/null +++ b/datasette/actor_auth_cookie.py @@ -0,0 +1,23 @@ +from datasette import hookimpl +from itsdangerous import BadSignature +from datasette.utils import baseconv +import time + + +@hookimpl +def actor_from_request(datasette, request): + if "ds_actor" not in request.cookies: + return None + try: + decoded = datasette.unsign(request.cookies["ds_actor"], "actor") + # If it has "e" and "a" keys process the "e" expiry + if not isinstance(decoded, dict) or "a" not in decoded: + return None + expires_at = decoded.get("e") + if expires_at: + timestamp = int(baseconv.base62.decode(expires_at)) + if time.time() > timestamp: + return None + return decoded["a"] + except BadSignature: + return None diff --git a/datasette/app.py b/datasette/app.py index fe238eb1..b9955925 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,560 +1,2539 @@ -from sanic import Sanic -from sanic import response -from sanic.exceptions import NotFound -from sanic.views import HTTPMethodView -from sanic_jinja2 import SanicJinja2 -from jinja2 import FileSystemLoader -import sqlite3 -from pathlib import Path -from concurrent import futures +from __future__ import annotations + +from asgi_csrf import Errors import asyncio -import threading -import urllib.parse -import json +import contextvars +from typing import TYPE_CHECKING, Any, Dict, Iterable, List + +if TYPE_CHECKING: + from datasette.permissions import AllowedResource, Resource +import asgi_csrf +import collections +import dataclasses +import datetime +import functools +import glob import hashlib +import httpx +import importlib.metadata +import inspect +from itsdangerous import BadSignature +import json +import os +import re +import secrets +import sys +import threading import time -from .utils import ( - build_where_clauses, - CustomJSONEncoder, - escape_css_string, - escape_sqlite_table_name, - InvalidSql, - path_from_row_pks, - path_with_added_args, - path_with_ext, - compound_pks_from_path, - sqlite_timelimit, - validate_sql_select, +import types +import urllib.parse +from concurrent import futures +from pathlib import Path + +from markupsafe import Markup, escape +from itsdangerous import URLSafeSerializer +from jinja2 import ( + ChoiceLoader, + Environment, + FileSystemLoader, + PrefixLoader, ) +from jinja2.environment import Template +from jinja2.exceptions import TemplateNotFound + +from .events import Event +from .views import Context +from .views.database import database_download, DatabaseView, TableCreateView, QueryView +from .views.index import IndexView +from .views.special import ( + JsonDataView, + PatternPortfolioView, + AuthTokenView, + ApiExplorerView, + CreateTokenView, + LogoutView, + AllowDebugView, + PermissionsDebugView, + MessagesDebugView, + AllowedResourcesView, + PermissionRulesView, + PermissionCheckView, + TablesView, + InstanceSchemaView, + DatabaseSchemaView, + TableSchemaView, +) +from .views.table import ( + TableInsertView, + TableUpsertView, + TableDropView, + table_view, +) +from .views.row import RowView, RowDeleteView, RowUpdateView +from .renderer import json_renderer +from .url_builder import Urls +from .database import Database, QueryInterrupted + +from .utils import ( + PaginatedResources, + PrefixedUrlString, + SPATIALITE_FUNCTIONS, + StartupError, + async_call_with_supported_arguments, + await_me_maybe, + baseconv, + call_with_supported_arguments, + detect_json1, + display_actor, + escape_css_string, + escape_sqlite, + find_spatialite, + format_bytes, + module_from_path, + move_plugins_and_allow, + move_table_config, + parse_metadata, + resolve_env_secrets, + resolve_routes, + tilde_decode, + tilde_encode, + to_css_class, + urlsafe_components, + redact_keys, + row_sql_params_pks, +) +from .utils.asgi import ( + AsgiLifespan, + Forbidden, + NotFound, + DatabaseNotFound, + TableNotFound, + RowNotFound, + Request, + Response, + AsgiRunOnFirstRequest, + asgi_static, + asgi_send, + asgi_send_file, + asgi_send_redirect, +) +from .utils.internal_db import init_internal_db, populate_schema_tables +from .utils.sqlite import ( + sqlite3, + using_pysqlite3, +) +from .tracer import AsgiTracer +from .plugins import pm, DEFAULT_PLUGINS, get_plugins +from .version import __version__ + +from .resources import DatabaseResource, TableResource app_root = Path(__file__).parent.parent -HASH_BLOCK_SIZE = 1024 * 1024 -SQL_TIME_LIMIT_MS = 1000 -connections = threading.local() +# Context variable to track when code is executing within a datasette.client request +_in_datasette_client = contextvars.ContextVar("in_datasette_client", default=False) -class BaseView(HTTPMethodView): - template = None +class _DatasetteClientContext: + """Context manager to mark code as executing within a datasette.client request.""" - def __init__(self, datasette): - self.ds = datasette - self.files = datasette.files - self.jinja = datasette.jinja - self.executor = datasette.executor - self.page_size = datasette.page_size - self.cache_headers = datasette.cache_headers + def __enter__(self): + self.token = _in_datasette_client.set(True) + return self - def options(self, request, *args, **kwargs): - r = response.text('ok') - if self.ds.cors: - r.headers['Access-Control-Allow-Origin'] = '*' - return r - - def redirect(self, request, path): - if request.query_string: - path = '{}?{}'.format( - path, request.query_string - ) - r = response.redirect(path) - r.headers['Link'] = '<{}>; rel=preload'.format(path) - if self.ds.cors: - r.headers['Access-Control-Allow-Origin'] = '*' - return r - - async def pks_for_table(self, name, table): - rows = [ - row for row in await self.execute( - name, - 'PRAGMA table_info("{}")'.format(table) - ) - if row[-1] - ] - rows.sort(key=lambda row: row[-1]) - return [str(r[1]) for r in rows] - - def resolve_db_name(self, db_name, **kwargs): - databases = self.ds.inspect() - hash = None - name = None - if '-' in db_name: - # Might be name-and-hash, or might just be - # a name with a hyphen in it - name, hash = db_name.rsplit('-', 1) - if name not in databases: - # Try the whole name - name = db_name - hash = None - else: - name = db_name - # Verify the hash - try: - info = databases[name] - except KeyError: - raise NotFound('Database not found: {}'.format(name)) - expected = info['hash'][:7] - if expected != hash: - should_redirect = '/{}-{}'.format( - name, expected, - ) - if 'table' in kwargs: - should_redirect += '/' + kwargs['table'] - if 'as_json' in kwargs: - should_redirect += kwargs['as_json'] - if 'as_db' in kwargs: - should_redirect += kwargs['as_db'] - return name, expected, should_redirect - return name, expected, None - - async def execute(self, db_name, sql, params=None): - """Executes sql against db_name in a thread""" - def sql_operation_in_thread(): - conn = getattr(connections, db_name, None) - if not conn: - info = self.ds.inspect()[db_name] - conn = sqlite3.connect( - 'file:{}?immutable=1'.format(info['file']), - uri=True, - check_same_thread=False, - ) - conn.row_factory = sqlite3.Row - conn.text_factory = lambda x: str(x, 'utf-8', 'replace') - setattr(connections, db_name, conn) - - with sqlite_timelimit(conn, SQL_TIME_LIMIT_MS): - try: - rows = conn.execute(sql, params or {}) - except Exception: - print('ERROR: conn={}, sql = {}, params = {}'.format( - conn, repr(sql), params - )) - raise - return rows - - return await asyncio.get_event_loop().run_in_executor( - self.executor, sql_operation_in_thread - ) - - async def get(self, request, db_name, **kwargs): - name, hash, should_redirect = self.resolve_db_name(db_name, **kwargs) - if should_redirect: - return self.redirect(request, should_redirect) - return await self.view_get(request, name, hash, **kwargs) - - async def view_get(self, request, name, hash, **kwargs): - try: - as_json = kwargs.pop('as_json') - except KeyError: - as_json = False - extra_template_data = {} - start = time.time() - template = self.template - status_code = 200 - try: - data, extra_template_data = await self.data( - request, name, hash, **kwargs - ) - except (sqlite3.OperationalError, InvalidSql) as e: - data = { - 'ok': False, - 'error': str(e), - 'database': name, - 'database_hash': hash, - } - template = 'error.html' - status_code = 400 - end = time.time() - data['query_ms'] = (end - start) * 1000 - for key in ('source', 'source_url', 'license', 'license_url'): - value = self.ds.metadata.get(key) - if value: - data[key] = value - if as_json: - # Special case for .jsono extension - if as_json == '.jsono': - columns = data.get('columns') - rows = data.get('rows') - if rows and columns: - data['rows'] = [ - dict(zip(columns, row)) - for row in rows - ] - headers = {} - if self.ds.cors: - headers['Access-Control-Allow-Origin'] = '*' - r = response.HTTPResponse( - json.dumps( - data, cls=CustomJSONEncoder - ), - status=status_code, - content_type='application/json', - headers=headers, - ) - else: - context = {**data, **dict( - extra_template_data() - if callable(extra_template_data) - else extra_template_data - ), **{ - 'url_json': path_with_ext(request, '.json'), - 'url_jsono': path_with_ext(request, '.jsono'), - 'metadata': self.ds.metadata, - }} - r = self.jinja.render( - template, - request, - **context, - ) - r.status = status_code - # Set far-future cache expiry - if self.cache_headers: - r.headers['Cache-Control'] = 'max-age={}'.format( - 365 * 24 * 60 * 60 - ) - return r + def __exit__(self, exc_type, exc_val, exc_tb): + _in_datasette_client.reset(self.token) + return False -class IndexView(HTTPMethodView): - def __init__(self, datasette): - self.ds = datasette - self.files = datasette.files - self.jinja = datasette.jinja - self.executor = datasette.executor +@dataclasses.dataclass +class PermissionCheck: + """Represents a logged permission check for debugging purposes.""" - async def get(self, request, as_json): - databases = [] - for key, info in sorted(self.ds.inspect().items()): - database = { - 'name': key, - 'hash': info['hash'], - 'path': '{}-{}'.format(key, info['hash'][:7]), - 'tables_truncated': sorted( - info['tables'].items(), - key=lambda p: p[1], - reverse=True - )[:5], - 'tables_count': len(info['tables'].items()), - 'tables_more': len(info['tables'].items()) > 5, - 'table_rows': sum(info['tables'].values()), - } - databases.append(database) - if as_json: - return response.HTTPResponse( - json.dumps( - {db['name']: db for db in databases}, - cls=CustomJSONEncoder - ), - content_type='application/json', - headers={ - 'Access-Control-Allow-Origin': '*' - } - ) - else: - return self.jinja.render( - 'index.html', - request, - databases=databases, - metadata=self.ds.metadata, - ) + when: str + actor: Dict[str, Any] | None + action: str + parent: str | None + child: str | None + result: bool -async def favicon(request): - return response.text('') +# https://github.com/simonw/datasette/issues/283#issuecomment-781591015 +SQLITE_LIMIT_ATTACHED = 10 + +INTERNAL_DB_NAME = "__INTERNAL__" + +Setting = collections.namedtuple("Setting", ("name", "default", "help")) +SETTINGS = ( + Setting("default_page_size", 100, "Default page size for the table view"), + Setting( + "max_returned_rows", + 1000, + "Maximum rows that can be returned from a table or custom query", + ), + Setting( + "max_insert_rows", + 100, + "Maximum rows that can be inserted at a time using the bulk insert API", + ), + Setting( + "num_sql_threads", + 3, + "Number of threads in the thread pool for executing SQLite queries", + ), + Setting("sql_time_limit_ms", 1000, "Time limit for a SQL query in milliseconds"), + Setting( + "default_facet_size", 30, "Number of values to return for requested facets" + ), + Setting("facet_time_limit_ms", 200, "Time limit for calculating a requested facet"), + Setting( + "facet_suggest_time_limit_ms", + 50, + "Time limit for calculating a suggested facet", + ), + Setting( + "allow_facet", + True, + "Allow users to specify columns to facet using ?_facet= parameter", + ), + Setting( + "allow_download", + True, + "Allow users to download the original SQLite database files", + ), + Setting( + "allow_signed_tokens", + True, + "Allow users to create and use signed API tokens", + ), + Setting( + "default_allow_sql", + True, + "Allow anyone to run arbitrary SQL queries", + ), + Setting( + "max_signed_tokens_ttl", + 0, + "Maximum allowed expiry time for signed API tokens", + ), + Setting("suggest_facets", True, "Calculate and display suggested facets"), + Setting( + "default_cache_ttl", + 5, + "Default HTTP cache TTL (used in Cache-Control: max-age= header)", + ), + Setting("cache_size_kb", 0, "SQLite cache size in KB (0 == use SQLite default)"), + Setting( + "allow_csv_stream", + True, + "Allow .csv?_stream=1 to download all rows (ignoring max_returned_rows)", + ), + Setting( + "max_csv_mb", + 100, + "Maximum size allowed for CSV export in MB - set 0 to disable this limit", + ), + Setting( + "truncate_cells_html", + 2048, + "Truncate cells longer than this in HTML table view - set 0 to disable", + ), + Setting( + "force_https_urls", + False, + "Force URLs in API output to always use https:// protocol", + ), + Setting( + "template_debug", + False, + "Allow display of template debug information with ?_context=1", + ), + Setting( + "trace_debug", + False, + "Allow display of SQL trace debug information with ?_trace=1", + ), + Setting("base_url", "/", "Datasette URLs should use this base path"), +) +_HASH_URLS_REMOVED = "The hash_urls setting has been removed, try the datasette-hashed-urls plugin instead" +OBSOLETE_SETTINGS = { + "hash_urls": _HASH_URLS_REMOVED, + "default_cache_ttl_hashed": _HASH_URLS_REMOVED, +} +DEFAULT_SETTINGS = {option.name: option.default for option in SETTINGS} + +FAVICON_PATH = app_root / "datasette" / "static" / "favicon.png" + +DEFAULT_NOT_SET = object() -class DatabaseView(BaseView): - template = 'database.html' - - async def data(self, request, name, hash): - if request.args.get('sql'): - return await self.custom_sql(request, name, hash) - tables = [] - table_inspect = self.ds.inspect()[name]['tables'] - for table_name, table_rows in table_inspect.items(): - rows = await self.execute( - name, - 'PRAGMA table_info([{}]);'.format(table_name) - ) - tables.append({ - 'name': table_name, - 'columns': [r[1] for r in rows], - 'table_rows': table_rows, - }) - tables.sort(key=lambda t: t['name']) - views = await self.execute(name, 'select name from sqlite_master where type = "view"') - return { - 'database': name, - 'tables': tables, - 'views': [v[0] for v in views], - }, { - 'database_hash': hash, - } - - async def custom_sql(self, request, name, hash): - params = request.raw_args - sql = params.pop('sql') - validate_sql_select(sql) - rows = await self.execute(name, sql, params) - columns = [r[0] for r in rows.description] - return { - 'database': name, - 'rows': rows, - 'columns': columns, - 'query': { - 'sql': sql, - 'params': params, - } - }, { - 'database_hash': hash, - 'custom_sql': True, - } +ResourcesSQL = collections.namedtuple("ResourcesSQL", ("sql", "params")) -class DatabaseDownload(BaseView): - async def view_get(self, request, name, hash, **kwargs): - filepath = self.ds.inspect()[name]['file'] - return await response.file_stream( - filepath, headers={ - 'Content-Disposition': 'attachment; filename="{}"'.format(filepath) - } - ) +async def favicon(request, send): + await asgi_send_file( + send, + str(FAVICON_PATH), + content_type="image/png", + headers={"Cache-Control": "max-age=3600, immutable, public"}, + ) -class TableView(BaseView): - template = 'table.html' - - async def data(self, request, name, hash, table): - table = urllib.parse.unquote_plus(table) - pks = await self.pks_for_table(name, table) - is_view = bool(list(await self.execute(name, "SELECT count(*) from sqlite_master WHERE type = 'view' and name=:n", { - 'n': table, - }))[0][0]) - view_definition = None - table_definition = None - if is_view: - view_definition = list(await self.execute(name, 'select sql from sqlite_master where name = :n and type="view"', { - 'n': table, - }))[0][0] - else: - table_definition = list(await self.execute(name, 'select sql from sqlite_master where name = :n and type="table"', { - 'n': table, - }))[0][0] - use_rowid = not pks and not is_view - if use_rowid: - select = 'rowid, *' - order_by = 'rowid' - else: - select = '*' - order_by = ', '.join(pks) - - if is_view: - order_by = '' - - # Special args start with _ and do not contain a __ - # That's so if there is a column that starts with _ - # it can still be queried using ?_col__exact=blah - special_args = {} - other_args = {} - for key, value in request.args.items(): - if key.startswith('_') and '__' not in key: - special_args[key] = value[0] - else: - other_args[key] = value[0] - - if other_args: - where_clauses, params = build_where_clauses(other_args) - else: - where_clauses = [] - params = {} - - after = special_args.get('_after') - if after: - if use_rowid: - where_clauses.append( - 'rowid > :p{}'.format( - len(params), - ) - ) - params['p{}'.format(len(params))] = after - else: - pk_values = compound_pks_from_path(after) - if len(pk_values) == len(pks): - param_counter = len(params) - for pk, value in zip(pks, pk_values): - where_clauses.append( - '"{}" > :p{}'.format( - pk, param_counter, - ) - ) - params['p{}'.format(param_counter)] = value - param_counter += 1 - - where_clause = '' - if where_clauses: - where_clause = 'where {} '.format(' and '.join(where_clauses)) - - if order_by: - order_by = 'order by {} '.format(order_by) - - sql = 'select {} from {} {}{}limit {}'.format( - select, escape_sqlite_table_name(table), where_clause, order_by, self.page_size + 1, - ) - - rows = await self.execute(name, sql, params) - - columns = [r[0] for r in rows.description] - display_columns = columns - if use_rowid: - display_columns = display_columns[1:] - rows = list(rows) - info = self.ds.inspect() - table_rows = info[name]['tables'].get(table) - after = None - after_link = None - if len(rows) > self.page_size: - after = path_from_row_pks(rows[-2], pks, use_rowid) - after_link = path_with_added_args(request, {'_after': after}) - return { - 'database': name, - 'table': table, - 'is_view': is_view, - 'view_definition': view_definition, - 'table_definition': table_definition, - 'rows': rows[:self.page_size], - 'table_rows': table_rows, - 'columns': columns, - 'primary_keys': pks, - 'query': { - 'sql': sql, - 'params': params, - }, - 'after': after, - }, lambda: { - 'database_hash': hash, - 'use_rowid': use_rowid, - 'row_link': lambda row: path_from_row_pks(row, pks, use_rowid), - 'display_columns': display_columns, - 'after_link': after_link, - } +ResolvedTable = collections.namedtuple("ResolvedTable", ("db", "table", "is_view")) +ResolvedRow = collections.namedtuple( + "ResolvedRow", ("db", "table", "sql", "params", "pks", "pk_values", "row") +) -class RowView(BaseView): - template = 'row.html' - - async def data(self, request, name, hash, table, pk_path): - table = urllib.parse.unquote_plus(table) - pk_values = compound_pks_from_path(pk_path) - pks = await self.pks_for_table(name, table) - use_rowid = not pks - select = '*' - if use_rowid: - select = 'rowid, *' - pks = ['rowid'] - wheres = [ - '"{}"=:p{}'.format(pk, i) - for i, pk in enumerate(pks) - ] - sql = 'select {} from "{}" where {}'.format( - select, table, ' AND '.join(wheres) - ) - params = {} - for i, pk_value in enumerate(pk_values): - params['p{}'.format(i)] = pk_value - rows = await self.execute(name, sql, params) - columns = [r[0] for r in rows.description] - rows = list(rows) - if not rows: - raise NotFound('Record not found: {}'.format(pk_values)) - return { - 'database': name, - 'table': table, - 'rows': rows, - 'columns': columns, - 'primary_keys': pks, - 'primary_key_values': pk_values, - }, { - 'database_hash': hash, - 'row_link': None, - } +def _to_string(value): + if isinstance(value, str): + return value + else: + return json.dumps(value, default=str) class Datasette: - def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, cors=False, inspect_data=None, metadata=None): - self.files = files - self.num_threads = num_threads - self.executor = futures.ThreadPoolExecutor( - max_workers=num_threads - ) + # Message constants: + INFO = 1 + WARNING = 2 + ERROR = 3 + + def __init__( + self, + files=None, + immutables=None, + cache_headers=True, + cors=False, + inspect_data=None, + config=None, + metadata=None, + sqlite_extensions=None, + template_dir=None, + plugins_dir=None, + static_mounts=None, + memory=False, + settings=None, + secret=None, + version_note=None, + config_dir=None, + pdb=False, + crossdb=False, + nolock=False, + internal=None, + default_deny=False, + ): + self._startup_invoked = False + assert config_dir is None or isinstance( + config_dir, Path + ), "config_dir= should be a pathlib.Path" + self.config_dir = config_dir + self.pdb = pdb + self._secret = secret or secrets.token_hex(32) + if files is not None and isinstance(files, str): + raise ValueError("files= must be a list of paths, not a string") + self.files = tuple(files or []) + tuple(immutables or []) + if config_dir: + db_files = [] + for ext in ("db", "sqlite", "sqlite3"): + db_files.extend(config_dir.glob("*.{}".format(ext))) + self.files += tuple(str(f) for f in db_files) + if ( + config_dir + and (config_dir / "inspect-data.json").exists() + and not inspect_data + ): + inspect_data = json.loads((config_dir / "inspect-data.json").read_text()) + if not immutables: + immutable_filenames = [i["file"] for i in inspect_data.values()] + immutables = [ + f for f in self.files if Path(f).name in immutable_filenames + ] + self.inspect_data = inspect_data + self.immutables = set(immutables or []) + self.databases = collections.OrderedDict() + self.actions = {} # .invoke_startup() will populate this + try: + self._refresh_schemas_lock = asyncio.Lock() + except RuntimeError as rex: + # Workaround for intermittent test failure, see: + # https://github.com/simonw/datasette/issues/1802 + if "There is no current event loop in thread" in str(rex): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + self._refresh_schemas_lock = asyncio.Lock() + else: + raise + self.crossdb = crossdb + self.nolock = nolock + if memory or crossdb or not self.files: + self.add_database( + Database(self, is_mutable=False, is_memory=True), name="_memory" + ) + for file in self.files: + self.add_database( + Database(self, file, is_mutable=file not in self.immutables) + ) + + self.internal_db_created = False + if internal is None: + self._internal_database = Database(self, memory_name=secrets.token_hex()) + else: + self._internal_database = Database(self, path=internal, mode="rwc") + self._internal_database.name = INTERNAL_DB_NAME + self.cache_headers = cache_headers - self.page_size = page_size self.cors = cors - self._inspect = inspect_data - self.metadata = metadata or {} + config_files = [] + metadata_files = [] + if config_dir: + metadata_files = [ + config_dir / filename + for filename in ("metadata.json", "metadata.yaml", "metadata.yml") + if (config_dir / filename).exists() + ] + config_files = [ + config_dir / filename + for filename in ("datasette.json", "datasette.yaml", "datasette.yml") + if (config_dir / filename).exists() + ] + if config_dir and metadata_files and not metadata: + with metadata_files[0].open() as fp: + metadata = parse_metadata(fp.read()) - def inspect(self): - if not self._inspect: - self._inspect = {} - for filename in self.files: - path = Path(filename) - name = path.stem - if name in self._inspect: - raise Exception('Multiple files with same stem %s' % name) - # Calculate hash, efficiently - m = hashlib.sha256() - with path.open('rb') as fp: - while True: - data = fp.read(HASH_BLOCK_SIZE) - if not data: - break - m.update(data) - # List tables and their row counts - tables = {} - with sqlite3.connect('file:{}?immutable=1'.format(path), uri=True) as conn: - conn.row_factory = sqlite3.Row - table_names = [ - r['name'] - for r in conn.execute('select * from sqlite_master where type="table"') - ] - for table in table_names: - tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0] + if config_dir and config_files and not config: + with config_files[0].open() as fp: + config = parse_metadata(fp.read()) - self._inspect[name] = { - 'hash': m.hexdigest(), - 'file': str(path), - 'tables': tables, - } - return self._inspect + # Move any "plugins" and "allow" settings from metadata to config - updates them in place + metadata = metadata or {} + config = config or {} + metadata, config = move_plugins_and_allow(metadata, config) + # Now migrate any known table configuration settings over as well + metadata, config = move_table_config(metadata, config) + + self._metadata_local = metadata or {} + self.sqlite_extensions = [] + for extension in sqlite_extensions or []: + # Resolve spatialite, if requested + if extension == "spatialite": + # Could raise SpatialiteNotFound + self.sqlite_extensions.append(find_spatialite()) + else: + self.sqlite_extensions.append(extension) + if config_dir and (config_dir / "templates").is_dir() and not template_dir: + template_dir = str((config_dir / "templates").resolve()) + self.template_dir = template_dir + if config_dir and (config_dir / "plugins").is_dir() and not plugins_dir: + plugins_dir = str((config_dir / "plugins").resolve()) + self.plugins_dir = plugins_dir + if config_dir and (config_dir / "static").is_dir() and not static_mounts: + static_mounts = [("static", str((config_dir / "static").resolve()))] + self.static_mounts = static_mounts or [] + if config_dir and (config_dir / "datasette.json").exists() and not config: + config = json.loads((config_dir / "datasette.json").read_text()) + + config = config or {} + config_settings = config.get("settings") or {} + + # Validate settings from config file + for key, value in config_settings.items(): + if key not in DEFAULT_SETTINGS: + raise StartupError(f"Invalid setting '{key}' in config file") + # Validate type matches expected type from DEFAULT_SETTINGS + if value is not None: # Allow None/null values + expected_type = type(DEFAULT_SETTINGS[key]) + actual_type = type(value) + if actual_type != expected_type: + raise StartupError( + f"Setting '{key}' in config file has incorrect type. " + f"Expected {expected_type.__name__}, got {actual_type.__name__}. " + f"Value: {value!r}. " + f"Hint: In YAML/JSON config files, remove quotes from boolean and integer values." + ) + + # Validate settings from constructor parameter + if settings: + for key, value in settings.items(): + if key not in DEFAULT_SETTINGS: + raise StartupError(f"Invalid setting '{key}' in settings parameter") + if value is not None: + expected_type = type(DEFAULT_SETTINGS[key]) + actual_type = type(value) + if actual_type != expected_type: + raise StartupError( + f"Setting '{key}' in settings parameter has incorrect type. " + f"Expected {expected_type.__name__}, got {actual_type.__name__}. " + f"Value: {value!r}" + ) + + self.config = config + # CLI settings should overwrite datasette.json settings + self._settings = dict(DEFAULT_SETTINGS, **(config_settings), **(settings or {})) + self.renderers = {} # File extension -> (renderer, can_render) functions + self.version_note = version_note + if self.setting("num_sql_threads") == 0: + self.executor = None + else: + self.executor = futures.ThreadPoolExecutor( + max_workers=self.setting("num_sql_threads") + ) + self.max_returned_rows = self.setting("max_returned_rows") + self.sql_time_limit_ms = self.setting("sql_time_limit_ms") + self.page_size = self.setting("default_page_size") + # Execute plugins in constructor, to ensure they are available + # when the rest of `datasette inspect` executes + if self.plugins_dir: + for filepath in glob.glob(os.path.join(self.plugins_dir, "*.py")): + if not os.path.isfile(filepath): + continue + mod = module_from_path(filepath, name=os.path.basename(filepath)) + try: + pm.register(mod) + except ValueError: + # Plugin already registered + pass + + # Configure Jinja + default_templates = str(app_root / "datasette" / "templates") + template_paths = [] + if self.template_dir: + template_paths.append(self.template_dir) + plugin_template_paths = [ + plugin["templates_path"] + for plugin in get_plugins() + if plugin["templates_path"] + ] + template_paths.extend(plugin_template_paths) + template_paths.append(default_templates) + template_loader = ChoiceLoader( + [ + FileSystemLoader(template_paths), + # Support {% extends "default:table.html" %}: + PrefixLoader( + {"default": FileSystemLoader(default_templates)}, delimiter=":" + ), + ] + ) + environment = Environment( + loader=template_loader, + autoescape=True, + enable_async=True, + # undefined=StrictUndefined, + ) + environment.filters["escape_css_string"] = escape_css_string + environment.filters["quote_plus"] = urllib.parse.quote_plus + self._jinja_env = environment + environment.filters["escape_sqlite"] = escape_sqlite + environment.filters["to_css_class"] = to_css_class + self._register_renderers() + self._permission_checks = collections.deque(maxlen=200) + self._root_token = secrets.token_hex(32) + self.root_enabled = False + self.default_deny = default_deny + self.client = DatasetteClient(self) + + async def apply_metadata_json(self): + # Apply any metadata entries from metadata.json to the internal tables + # step 1: top-level metadata + for key in self._metadata_local or {}: + if key == "databases": + continue + value = self._metadata_local[key] + await self.set_instance_metadata(key, _to_string(value)) + + # step 2: database-level metadata + for dbname, db in self._metadata_local.get("databases", {}).items(): + for key, value in db.items(): + if key in ("tables", "queries"): + continue + await self.set_database_metadata(dbname, key, _to_string(value)) + + # step 3: table-level metadata + for tablename, table in db.get("tables", {}).items(): + for key, value in table.items(): + if key == "columns": + continue + await self.set_resource_metadata( + dbname, tablename, key, _to_string(value) + ) + + # step 4: column-level metadata (only descriptions in metadata.json) + for columnname, column_description in table.get("columns", {}).items(): + await self.set_column_metadata( + dbname, tablename, columnname, "description", column_description + ) + + # TODO(alex) is metadata.json was loaded in, and --internal is not memory, then log + # a warning to user that they should delete their metadata.json file + + def get_jinja_environment(self, request: Request = None) -> Environment: + environment = self._jinja_env + if request: + for environment in pm.hook.jinja2_environment_from_request( + datasette=self, request=request, env=environment + ): + pass + return environment + + def get_action(self, name_or_abbr: str): + """ + Returns an Action object for the given name or abbreviation. Returns None if not found. + """ + if name_or_abbr in self.actions: + return self.actions[name_or_abbr] + # Try abbreviation + for action in self.actions.values(): + if action.abbr == name_or_abbr: + return action + return None + + async def refresh_schemas(self): + if self._refresh_schemas_lock.locked(): + return + async with self._refresh_schemas_lock: + await self._refresh_schemas() + + async def _refresh_schemas(self): + internal_db = self.get_internal_database() + if not self.internal_db_created: + await init_internal_db(internal_db) + await self.apply_metadata_json() + self.internal_db_created = True + current_schema_versions = { + row["database_name"]: row["schema_version"] + for row in await internal_db.execute( + "select database_name, schema_version from catalog_databases" + ) + } + # Delete stale entries for databases that are no longer attached + stale_databases = set(current_schema_versions.keys()) - set( + self.databases.keys() + ) + for stale_db_name in stale_databases: + await internal_db.execute_write( + "DELETE FROM catalog_databases WHERE database_name = ?", + [stale_db_name], + ) + for database_name, db in self.databases.items(): + schema_version = (await db.execute("PRAGMA schema_version")).first()[0] + # Compare schema versions to see if we should skip it + if schema_version == current_schema_versions.get(database_name): + continue + placeholders = "(?, ?, ?, ?)" + values = [database_name, str(db.path), db.is_memory, schema_version] + if db.path is None: + placeholders = "(?, null, ?, ?)" + values = [database_name, db.is_memory, schema_version] + await internal_db.execute_write( + """ + INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version) + VALUES {} + """.format( + placeholders + ), + values, + ) + await populate_schema_tables(internal_db, db) + + @property + def urls(self): + return Urls(self) + + @property + def pm(self): + """ + Return the global plugin manager instance. + + This provides access to the pluggy PluginManager that manages all + Datasette plugins and hooks. Use datasette.pm.hook.hook_name() to + call plugin hooks. + """ + return pm + + async def invoke_startup(self): + # This must be called for Datasette to be in a usable state + if self._startup_invoked: + return + # Register event classes + event_classes = [] + for hook in pm.hook.register_events(datasette=self): + extra_classes = await await_me_maybe(hook) + if extra_classes: + event_classes.extend(extra_classes) + self.event_classes = tuple(event_classes) + + # Register actions, but watch out for duplicate name/abbr + action_names = {} + action_abbrs = {} + for hook in pm.hook.register_actions(datasette=self): + if hook: + for action in hook: + if ( + action.name in action_names + and action != action_names[action.name] + ): + raise StartupError( + "Duplicate action name: {}".format(action.name) + ) + if ( + action.abbr + and action.abbr in action_abbrs + and action != action_abbrs[action.abbr] + ): + raise StartupError( + "Duplicate action abbr: {}".format(action.abbr) + ) + action_names[action.name] = action + if action.abbr: + action_abbrs[action.abbr] = action + self.actions[action.name] = action + + for hook in pm.hook.prepare_jinja2_environment( + env=self._jinja_env, datasette=self + ): + await await_me_maybe(hook) + for hook in pm.hook.startup(datasette=self): + await await_me_maybe(hook) + self._startup_invoked = True + + def sign(self, value, namespace="default"): + return URLSafeSerializer(self._secret, namespace).dumps(value) + + def unsign(self, signed, namespace="default"): + return URLSafeSerializer(self._secret, namespace).loads(signed) + + def in_client(self) -> bool: + """Check if the current code is executing within a datasette.client request. + + Returns: + bool: True if currently executing within a datasette.client request, False otherwise. + """ + return _in_datasette_client.get() + + def create_token( + self, + actor_id: str, + *, + expires_after: int | None = None, + restrict_all: Iterable[str] | None = None, + restrict_database: Dict[str, Iterable[str]] | None = None, + restrict_resource: Dict[str, Dict[str, Iterable[str]]] | None = None, + ): + token = {"a": actor_id, "t": int(time.time())} + if expires_after: + token["d"] = expires_after + + def abbreviate_action(action): + # rename to abbr if possible + action_obj = self.actions.get(action) + if not action_obj: + return action + return action_obj.abbr or action + + if expires_after: + token["d"] = expires_after + if restrict_all or restrict_database or restrict_resource: + token["_r"] = {} + if restrict_all: + token["_r"]["a"] = [abbreviate_action(a) for a in restrict_all] + if restrict_database: + token["_r"]["d"] = {} + for database, actions in restrict_database.items(): + token["_r"]["d"][database] = [abbreviate_action(a) for a in actions] + if restrict_resource: + token["_r"]["r"] = {} + for database, resources in restrict_resource.items(): + for resource, actions in resources.items(): + token["_r"]["r"].setdefault(database, {})[resource] = [ + abbreviate_action(a) for a in actions + ] + return "dstok_{}".format(self.sign(token, namespace="token")) + + def get_database(self, name=None, route=None): + if route is not None: + matches = [db for db in self.databases.values() if db.route == route] + if not matches: + raise KeyError + return matches[0] + if name is None: + name = [key for key in self.databases.keys()][0] + return self.databases[name] + + def add_database(self, db, name=None, route=None): + new_databases = self.databases.copy() + if name is None: + # Pick a unique name for this database + suggestion = db.suggest_name() + name = suggestion + else: + suggestion = name + i = 2 + while name in self.databases: + name = "{}_{}".format(suggestion, i) + i += 1 + db.name = name + db.route = route or name + new_databases[name] = db + # don't mutate! that causes race conditions with live import + self.databases = new_databases + return db + + def add_memory_database(self, memory_name, name=None, route=None): + return self.add_database( + Database(self, memory_name=memory_name), name=name, route=route + ) + + def remove_database(self, name): + self.get_database(name).close() + new_databases = self.databases.copy() + new_databases.pop(name) + self.databases = new_databases + + def setting(self, key): + return self._settings.get(key, None) + + def settings_dict(self): + # Returns a fully resolved settings dictionary, useful for templates + return {option.name: self.setting(option.name) for option in SETTINGS} + + def _metadata_recursive_update(self, orig, updated): + if not isinstance(orig, dict) or not isinstance(updated, dict): + return orig + + for key, upd_value in updated.items(): + if isinstance(upd_value, dict) and isinstance(orig.get(key), dict): + orig[key] = self._metadata_recursive_update(orig[key], upd_value) + else: + orig[key] = upd_value + return orig + + async def get_instance_metadata(self): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_instance + """ + ) + return dict(rows) + + async def get_database_metadata(self, database_name: str): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_databases + WHERE database_name = ? + """, + [database_name], + ) + return dict(rows) + + async def get_resource_metadata(self, database_name: str, resource_name: str): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_resources + WHERE database_name = ? + AND resource_name = ? + """, + [database_name, resource_name], + ) + return dict(rows) + + async def get_column_metadata( + self, database_name: str, resource_name: str, column_name: str + ): + rows = await self.get_internal_database().execute( + """ + SELECT + key, + value + FROM metadata_columns + WHERE database_name = ? + AND resource_name = ? + AND column_name = ? + """, + [database_name, resource_name, column_name], + ) + return dict(rows) + + async def set_instance_metadata(self, key: str, value: str): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_instance(key, value) + VALUES(?, ?) + ON CONFLICT(key) DO UPDATE SET value = excluded.value; + """, + [key, value], + ) + + async def set_database_metadata(self, database_name: str, key: str, value: str): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_databases(database_name, key, value) + VALUES(?, ?, ?) + ON CONFLICT(database_name, key) DO UPDATE SET value = excluded.value; + """, + [database_name, key, value], + ) + + async def set_resource_metadata( + self, database_name: str, resource_name: str, key: str, value: str + ): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_resources(database_name, resource_name, key, value) + VALUES(?, ?, ?, ?) + ON CONFLICT(database_name, resource_name, key) DO UPDATE SET value = excluded.value; + """, + [database_name, resource_name, key, value], + ) + + async def set_column_metadata( + self, + database_name: str, + resource_name: str, + column_name: str, + key: str, + value: str, + ): + # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) + await self.get_internal_database().execute_write( + """ + INSERT INTO metadata_columns(database_name, resource_name, column_name, key, value) + VALUES(?, ?, ?, ?, ?) + ON CONFLICT(database_name, resource_name, column_name, key) DO UPDATE SET value = excluded.value; + """, + [database_name, resource_name, column_name, key, value], + ) + + def get_internal_database(self): + return self._internal_database + + def plugin_config(self, plugin_name, database=None, table=None, fallback=True): + """Return config for plugin, falling back from specified database/table""" + if database is None and table is None: + config = self._plugin_config_top(plugin_name) + else: + config = self._plugin_config_nested(plugin_name, database, table, fallback) + + return resolve_env_secrets(config, os.environ) + + def _plugin_config_top(self, plugin_name): + """Returns any top-level plugin configuration for the specified plugin.""" + return ((self.config or {}).get("plugins") or {}).get(plugin_name) + + def _plugin_config_nested(self, plugin_name, database, table=None, fallback=True): + """Returns any database or table-level plugin configuration for the specified plugin.""" + db_config = ((self.config or {}).get("databases") or {}).get(database) + + # if there's no db-level configuration, then return early, falling back to top-level if needed + if not db_config: + return self._plugin_config_top(plugin_name) if fallback else None + + db_plugin_config = (db_config.get("plugins") or {}).get(plugin_name) + + if table: + table_plugin_config = ( + ((db_config.get("tables") or {}).get(table) or {}).get("plugins") or {} + ).get(plugin_name) + + # fallback to db_config or top-level config, in that order, if needed + if table_plugin_config is None and fallback: + return db_plugin_config or self._plugin_config_top(plugin_name) + + return table_plugin_config + + # fallback to top-level if needed + if db_plugin_config is None and fallback: + self._plugin_config_top(plugin_name) + + return db_plugin_config + + def app_css_hash(self): + if not hasattr(self, "_app_css_hash"): + with open(os.path.join(str(app_root), "datasette/static/app.css")) as fp: + self._app_css_hash = hashlib.sha1(fp.read().encode("utf8")).hexdigest()[ + :6 + ] + return self._app_css_hash + + async def get_canned_queries(self, database_name, actor): + queries = {} + for more_queries in pm.hook.canned_queries( + datasette=self, + database=database_name, + actor=actor, + ): + more_queries = await await_me_maybe(more_queries) + queries.update(more_queries or {}) + # Fix any {"name": "select ..."} queries to be {"name": {"sql": "select ..."}} + for key in queries: + if not isinstance(queries[key], dict): + queries[key] = {"sql": queries[key]} + # Also make sure "name" is available: + queries[key]["name"] = key + return queries + + async def get_canned_query(self, database_name, query_name, actor): + queries = await self.get_canned_queries(database_name, actor) + query = queries.get(query_name) + if query: + return query + + def _prepare_connection(self, conn, database): + conn.row_factory = sqlite3.Row + conn.text_factory = lambda x: str(x, "utf-8", "replace") + if self.sqlite_extensions and database != INTERNAL_DB_NAME: + conn.enable_load_extension(True) + for extension in self.sqlite_extensions: + # "extension" is either a string path to the extension + # or a 2-item tuple that specifies which entrypoint to load. + if isinstance(extension, tuple): + path, entrypoint = extension + conn.execute("SELECT load_extension(?, ?)", [path, entrypoint]) + else: + conn.execute("SELECT load_extension(?)", [extension]) + if self.setting("cache_size_kb"): + conn.execute(f"PRAGMA cache_size=-{self.setting('cache_size_kb')}") + # pylint: disable=no-member + if database != INTERNAL_DB_NAME: + pm.hook.prepare_connection(conn=conn, database=database, datasette=self) + # If self.crossdb and this is _memory, connect the first SQLITE_LIMIT_ATTACHED databases + if self.crossdb and database == "_memory": + count = 0 + for db_name, db in self.databases.items(): + if count >= SQLITE_LIMIT_ATTACHED or db.is_memory: + continue + sql = 'ATTACH DATABASE "file:{path}?{qs}" AS [{name}];'.format( + path=db.path, + qs="mode=ro" if db.is_mutable else "immutable=1", + name=db_name, + ) + conn.execute(sql) + count += 1 + + def add_message(self, request, message, type=INFO): + if not hasattr(request, "_messages"): + request._messages = [] + request._messages_should_clear = False + request._messages.append((message, type)) + + def _write_messages_to_response(self, request, response): + if getattr(request, "_messages", None): + # Set those messages + response.set_cookie("ds_messages", self.sign(request._messages, "messages")) + elif getattr(request, "_messages_should_clear", False): + response.set_cookie("ds_messages", "", expires=0, max_age=0) + + def _show_messages(self, request): + if getattr(request, "_messages", None): + request._messages_should_clear = True + messages = request._messages + request._messages = [] + return messages + else: + return [] + + async def _crumb_items(self, request, table=None, database=None): + crumbs = [] + actor = None + if request: + actor = request.actor + # Top-level link + if await self.allowed(action="view-instance", actor=actor): + crumbs.append({"href": self.urls.instance(), "label": "home"}) + # Database link + if database: + if await self.allowed( + action="view-database", + resource=DatabaseResource(database=database), + actor=actor, + ): + crumbs.append( + { + "href": self.urls.database(database), + "label": database, + } + ) + # Table link + if table: + assert database, "table= requires database=" + if await self.allowed( + action="view-table", + resource=TableResource(database=database, table=table), + actor=actor, + ): + crumbs.append( + { + "href": self.urls.table(database, table), + "label": table, + } + ) + return crumbs + + async def actors_from_ids( + self, actor_ids: Iterable[str | int] + ) -> Dict[int | str, Dict]: + result = pm.hook.actors_from_ids(datasette=self, actor_ids=actor_ids) + if result is None: + # Do the default thing + return {actor_id: {"id": actor_id} for actor_id in actor_ids} + result = await await_me_maybe(result) + return result + + async def track_event(self, event: Event): + assert isinstance(event, self.event_classes), "Invalid event type: {}".format( + type(event) + ) + for hook in pm.hook.track_event(datasette=self, event=event): + await await_me_maybe(hook) + + def resource_for_action(self, action: str, parent: str | None, child: str | None): + """ + Create a Resource instance for the given action with parent/child values. + + Looks up the action's resource_class and instantiates it with the + provided parent and child identifiers. + + Args: + action: The action name (e.g., "view-table", "view-query") + parent: The parent resource identifier (e.g., database name) + child: The child resource identifier (e.g., table/query name) + + Returns: + A Resource instance of the appropriate subclass + + Raises: + ValueError: If the action is unknown + """ + from datasette.permissions import Resource + + action_obj = self.actions.get(action) + if not action_obj: + raise ValueError(f"Unknown action: {action}") + + resource_class = action_obj.resource_class + instance = object.__new__(resource_class) + Resource.__init__(instance, parent=parent, child=child) + return instance + + async def check_visibility( + self, + actor: dict, + action: str, + resource: "Resource" | None = None, + ): + """ + Check if actor can see a resource and if it's private. + + Returns (visible, private) tuple: + - visible: bool - can the actor see it? + - private: bool - if visible, can anonymous users NOT see it? + """ + from datasette.permissions import Resource + + # Validate that resource is a Resource object or None + if resource is not None and not isinstance(resource, Resource): + raise TypeError(f"resource must be a Resource subclass instance or None.") + + # Check if actor can see it + if not await self.allowed(action=action, resource=resource, actor=actor): + return False, False + + # Check if anonymous user can see it (for "private" flag) + if not await self.allowed(action=action, resource=resource, actor=None): + # Actor can see it but anonymous cannot - it's private + return True, True + + # Both actor and anonymous can see it - it's public + return True, False + + async def allowed_resources_sql( + self, + *, + action: str, + actor: dict | None = None, + parent: str | None = None, + include_is_private: bool = False, + ) -> ResourcesSQL: + """ + Build SQL query to get all resources the actor can access for the given action. + + Args: + action: The action name (e.g., "view-table") + actor: The actor dict (or None for unauthenticated) + parent: Optional parent filter (e.g., database name) to limit results + include_is_private: If True, include is_private column showing if anonymous cannot access + + Returns a namedtuple of (query: str, params: dict) that can be executed against the internal database. + The query returns rows with (parent, child, reason) columns, plus is_private if requested. + + Example: + query, params = await datasette.allowed_resources_sql( + action="view-table", + actor=actor, + parent="mydb", + include_is_private=True + ) + result = await datasette.get_internal_database().execute(query, params) + """ + from datasette.utils.actions_sql import build_allowed_resources_sql + + action_obj = self.actions.get(action) + if not action_obj: + raise ValueError(f"Unknown action: {action}") + + sql, params = await build_allowed_resources_sql( + self, actor, action, parent=parent, include_is_private=include_is_private + ) + return ResourcesSQL(sql, params) + + async def allowed_resources( + self, + action: str, + actor: dict | None = None, + *, + parent: str | None = None, + include_is_private: bool = False, + include_reasons: bool = False, + limit: int = 100, + next: str | None = None, + ) -> PaginatedResources: + """ + Return paginated resources the actor can access for the given action. + + Uses SQL with keyset pagination to efficiently filter resources. + Returns PaginatedResources with list of Resource instances and pagination metadata. + + Args: + action: The action name (e.g., "view-table") + actor: The actor dict (or None for unauthenticated) + parent: Optional parent filter (e.g., database name) to limit results + include_is_private: If True, adds a .private attribute to each Resource + include_reasons: If True, adds a .reasons attribute with List[str] of permission reasons + limit: Maximum number of results to return (1-1000, default 100) + next: Keyset token from previous page for pagination + + Returns: + PaginatedResources with: + - resources: List of Resource objects for this page + - next: Token for next page (None if no more results) + + Example: + # Get first page of tables + page = await datasette.allowed_resources("view-table", actor, limit=50) + for table in page.resources: + print(f"{table.parent}/{table.child}") + + # Get next page + if page.next: + next_page = await datasette.allowed_resources( + "view-table", actor, limit=50, next=page.next + ) + + # With reasons for debugging + page = await datasette.allowed_resources( + "view-table", actor, include_reasons=True + ) + for table in page.resources: + print(f"{table.child}: {table.reasons}") + + # Iterate through all results with async generator + page = await datasette.allowed_resources("view-table", actor) + async for table in page.all(): + print(table.child) + """ + + action_obj = self.actions.get(action) + if not action_obj: + raise ValueError(f"Unknown action: {action}") + + # Validate and cap limit + limit = min(max(1, limit), 1000) + + # Get base SQL query + query, params = await self.allowed_resources_sql( + action=action, + actor=actor, + parent=parent, + include_is_private=include_is_private, + ) + + # Add keyset pagination WHERE clause if next token provided + if next: + try: + components = urlsafe_components(next) + if len(components) >= 2: + last_parent, last_child = components[0], components[1] + # Keyset condition: (parent > last) OR (parent = last AND child > last) + keyset_where = """ + (parent > :keyset_parent OR + (parent = :keyset_parent AND child > :keyset_child)) + """ + # Wrap original query and add keyset filter + query = f"SELECT * FROM ({query}) WHERE {keyset_where}" + params["keyset_parent"] = last_parent + params["keyset_child"] = last_child + except (ValueError, KeyError): + # Invalid token - ignore and start from beginning + pass + + # Add LIMIT (fetch limit+1 to detect if there are more results) + # Note: query from allowed_resources_sql() already includes ORDER BY parent, child + query = f"{query} LIMIT :limit" + params["limit"] = limit + 1 + + # Execute query + result = await self.get_internal_database().execute(query, params) + rows = list(result.rows) + + # Check if truncated (got more than limit rows) + truncated = len(rows) > limit + if truncated: + rows = rows[:limit] # Remove the extra row + + # Build Resource objects with optional attributes + resources = [] + for row in rows: + # row[0]=parent, row[1]=child, row[2]=reason, row[3]=is_private (if requested) + resource = self.resource_for_action(action, parent=row[0], child=row[1]) + + # Add reasons if requested + if include_reasons: + reason_json = row[2] + try: + reasons_array = ( + json.loads(reason_json) if isinstance(reason_json, str) else [] + ) + resource.reasons = [r for r in reasons_array if r is not None] + except (json.JSONDecodeError, TypeError): + resource.reasons = [reason_json] if reason_json else [] + + # Add private flag if requested + if include_is_private: + resource.private = bool(row[3]) + + resources.append(resource) + + # Generate next token if there are more results + next_token = None + if truncated and resources: + last_resource = resources[-1] + # Use tilde-encoding like table pagination + next_token = "{},{}".format( + tilde_encode(str(last_resource.parent)), + tilde_encode(str(last_resource.child)), + ) + + return PaginatedResources( + resources=resources, + next=next_token, + _datasette=self, + _action=action, + _actor=actor, + _parent=parent, + _include_is_private=include_is_private, + _include_reasons=include_reasons, + _limit=limit, + ) + + async def allowed( + self, + *, + action: str, + resource: "Resource" = None, + actor: dict | None = None, + ) -> bool: + """ + Check if actor can perform action on specific resource. + + Uses SQL to check permission for a single resource without fetching all resources. + This is efficient - it does NOT call allowed_resources() and check membership. + + For global actions, resource should be None (or omitted). + + Example: + from datasette.resources import TableResource + can_view = await datasette.allowed( + action="view-table", + resource=TableResource(database="analytics", table="users"), + actor=actor + ) + + # For global actions, resource can be omitted: + can_debug = await datasette.allowed(action="permissions-debug", actor=actor) + """ + from datasette.utils.actions_sql import check_permission_for_resource + + # For global actions, resource remains None + + # Check if this action has also_requires - if so, check that action first + action_obj = self.actions.get(action) + if action_obj and action_obj.also_requires: + # Must have the required action first + if not await self.allowed( + action=action_obj.also_requires, + resource=resource, + actor=actor, + ): + return False + + # For global actions, resource is None + parent = resource.parent if resource else None + child = resource.child if resource else None + + result = await check_permission_for_resource( + datasette=self, + actor=actor, + action=action, + parent=parent, + child=child, + ) + + # Log the permission check for debugging + self._permission_checks.append( + PermissionCheck( + when=datetime.datetime.now(datetime.timezone.utc).isoformat(), + actor=actor, + action=action, + parent=parent, + child=child, + result=result, + ) + ) + + return result + + async def ensure_permission( + self, + *, + action: str, + resource: "Resource" = None, + actor: dict | None = None, + ): + """ + Check if actor can perform action on resource, raising Forbidden if not. + + This is a convenience wrapper around allowed() that raises Forbidden + instead of returning False. Use this when you want to enforce a permission + check and halt execution if it fails. + + Example: + from datasette.resources import TableResource + + # Will raise Forbidden if actor cannot view the table + await datasette.ensure_permission( + action="view-table", + resource=TableResource(database="analytics", table="users"), + actor=request.actor + ) + + # For instance-level actions, resource can be omitted: + await datasette.ensure_permission( + action="permissions-debug", + actor=request.actor + ) + """ + if not await self.allowed(action=action, resource=resource, actor=actor): + raise Forbidden(action) + + async def execute( + self, + db_name, + sql, + params=None, + truncate=False, + custom_time_limit=None, + page_size=None, + log_sql_errors=True, + ): + return await self.databases[db_name].execute( + sql, + params=params, + truncate=truncate, + custom_time_limit=custom_time_limit, + page_size=page_size, + log_sql_errors=log_sql_errors, + ) + + async def expand_foreign_keys(self, actor, database, table, column, values): + """Returns dict mapping (column, value) -> label""" + labeled_fks = {} + db = self.databases[database] + foreign_keys = await db.foreign_keys_for_table(table) + # Find the foreign_key for this column + try: + fk = [ + foreign_key + for foreign_key in foreign_keys + if foreign_key["column"] == column + ][0] + except IndexError: + return {} + # Ensure user has permission to view the referenced table + from datasette.resources import TableResource + + other_table = fk["other_table"] + other_column = fk["other_column"] + visible, _ = await self.check_visibility( + actor, + action="view-table", + resource=TableResource(database=database, table=other_table), + ) + if not visible: + return {} + label_column = await db.label_column_for_table(other_table) + if not label_column: + return {(fk["column"], value): str(value) for value in values} + labeled_fks = {} + sql = """ + select {other_column}, {label_column} + from {other_table} + where {other_column} in ({placeholders}) + """.format( + other_column=escape_sqlite(other_column), + label_column=escape_sqlite(label_column), + other_table=escape_sqlite(other_table), + placeholders=", ".join(["?"] * len(set(values))), + ) + try: + results = await self.execute(database, sql, list(set(values))) + except QueryInterrupted: + pass + else: + for id, value in results: + labeled_fks[(fk["column"], id)] = value + return labeled_fks + + def absolute_url(self, request, path): + url = urllib.parse.urljoin(request.url, path) + if url.startswith("http://") and self.setting("force_https_urls"): + url = "https://" + url[len("http://") :] + return url + + def _connected_databases(self): + return [ + { + "name": d.name, + "route": d.route, + "path": d.path, + "size": d.size, + "is_mutable": d.is_mutable, + "is_memory": d.is_memory, + "hash": d.hash, + } + for name, d in self.databases.items() + ] + + def _versions(self): + conn = sqlite3.connect(":memory:") + self._prepare_connection(conn, "_memory") + sqlite_version = conn.execute("select sqlite_version()").fetchone()[0] + sqlite_extensions = {"json1": detect_json1(conn)} + for extension, testsql, hasversion in ( + ("spatialite", "SELECT spatialite_version()", True), + ): + try: + result = conn.execute(testsql) + if hasversion: + sqlite_extensions[extension] = result.fetchone()[0] + else: + sqlite_extensions[extension] = None + except Exception: + pass + # More details on SpatiaLite + if "spatialite" in sqlite_extensions: + spatialite_details = {} + for fn in SPATIALITE_FUNCTIONS: + try: + result = conn.execute("select {}()".format(fn)) + spatialite_details[fn] = result.fetchone()[0] + except Exception as e: + spatialite_details[fn] = {"error": str(e)} + sqlite_extensions["spatialite"] = spatialite_details + + # Figure out supported FTS versions + fts_versions = [] + for fts in ("FTS5", "FTS4", "FTS3"): + try: + conn.execute( + "CREATE VIRTUAL TABLE v{fts} USING {fts} (data)".format(fts=fts) + ) + fts_versions.append(fts) + except sqlite3.OperationalError: + continue + datasette_version = {"version": __version__} + if self.version_note: + datasette_version["note"] = self.version_note + + try: + # Optional import to avoid breaking Pyodide + # https://github.com/simonw/datasette/issues/1733#issuecomment-1115268245 + import uvicorn + + uvicorn_version = uvicorn.__version__ + except ImportError: + uvicorn_version = None + info = { + "python": { + "version": ".".join(map(str, sys.version_info[:3])), + "full": sys.version, + }, + "datasette": datasette_version, + "asgi": "3.0", + "uvicorn": uvicorn_version, + "sqlite": { + "version": sqlite_version, + "fts_versions": fts_versions, + "extensions": sqlite_extensions, + "compile_options": [ + r[0] for r in conn.execute("pragma compile_options;").fetchall() + ], + }, + } + if using_pysqlite3: + for package in ("pysqlite3", "pysqlite3-binary"): + try: + info["pysqlite3"] = importlib.metadata.version(package) + break + except importlib.metadata.PackageNotFoundError: + pass + return info + + def _plugins(self, request=None, all=False): + ps = list(get_plugins()) + should_show_all = False + if request is not None: + should_show_all = request.args.get("all") + else: + should_show_all = all + if not should_show_all: + ps = [p for p in ps if p["name"] not in DEFAULT_PLUGINS] + ps.sort(key=lambda p: p["name"]) + return [ + { + "name": p["name"], + "static": p["static_path"] is not None, + "templates": p["templates_path"] is not None, + "version": p.get("version"), + "hooks": list(sorted(set(p["hooks"]))), + } + for p in ps + ] + + def _threads(self): + if self.setting("num_sql_threads") == 0: + return {"num_threads": 0, "threads": []} + threads = list(threading.enumerate()) + d = { + "num_threads": len(threads), + "threads": [ + {"name": t.name, "ident": t.ident, "daemon": t.daemon} for t in threads + ], + } + tasks = asyncio.all_tasks() + d.update( + { + "num_tasks": len(tasks), + "tasks": [_cleaner_task_str(t) for t in tasks], + } + ) + return d + + def _actor(self, request): + return {"actor": request.actor} + + def _actions(self): + return [ + { + "name": action.name, + "abbr": action.abbr, + "description": action.description, + "takes_parent": action.takes_parent, + "takes_child": action.takes_child, + "resource_class": ( + action.resource_class.__name__ if action.resource_class else None + ), + "also_requires": action.also_requires, + } + for action in sorted(self.actions.values(), key=lambda a: a.name) + ] + + async def table_config(self, database: str, table: str) -> dict: + """Return dictionary of configuration for specified table""" + return ( + (self.config or {}) + .get("databases", {}) + .get(database, {}) + .get("tables", {}) + .get(table, {}) + ) + + def _register_renderers(self): + """Register output renderers which output data in custom formats.""" + # Built-in renderers + self.renderers["json"] = (json_renderer, lambda: True) + + # Hooks + hook_renderers = [] + # pylint: disable=no-member + for hook in pm.hook.register_output_renderer(datasette=self): + if type(hook) is list: + hook_renderers += hook + else: + hook_renderers.append(hook) + + for renderer in hook_renderers: + self.renderers[renderer["extension"]] = ( + # It used to be called "callback" - remove this in Datasette 1.0 + renderer.get("render") or renderer["callback"], + renderer.get("can_render") or (lambda: True), + ) + + async def render_template( + self, + templates: List[str] | str | Template, + context: Dict[str, Any] | Context | None = None, + request: Request | None = None, + view_name: str | None = None, + ): + if not self._startup_invoked: + raise Exception("render_template() called before await ds.invoke_startup()") + context = context or {} + if isinstance(templates, Template): + template = templates + else: + if isinstance(templates, str): + templates = [templates] + template = self.get_jinja_environment(request).select_template(templates) + if dataclasses.is_dataclass(context): + context = dataclasses.asdict(context) + body_scripts = [] + # pylint: disable=no-member + for extra_script in pm.hook.extra_body_script( + template=template.name, + database=context.get("database"), + table=context.get("table"), + columns=context.get("columns"), + view_name=view_name, + request=request, + datasette=self, + ): + extra_script = await await_me_maybe(extra_script) + if isinstance(extra_script, dict): + script = extra_script["script"] + module = bool(extra_script.get("module")) + else: + script = extra_script + module = False + body_scripts.append({"script": Markup(script), "module": module}) + + extra_template_vars = {} + # pylint: disable=no-member + for extra_vars in pm.hook.extra_template_vars( + template=template.name, + database=context.get("database"), + table=context.get("table"), + columns=context.get("columns"), + view_name=view_name, + request=request, + datasette=self, + ): + extra_vars = await await_me_maybe(extra_vars) + assert isinstance(extra_vars, dict), "extra_vars is of type {}".format( + type(extra_vars) + ) + extra_template_vars.update(extra_vars) + + async def menu_links(): + links = [] + for hook in pm.hook.menu_links( + datasette=self, + actor=request.actor if request else None, + request=request or None, + ): + extra_links = await await_me_maybe(hook) + if extra_links: + links.extend(extra_links) + return links + + template_context = { + **context, + **{ + "request": request, + "crumb_items": self._crumb_items, + "urls": self.urls, + "actor": request.actor if request else None, + "menu_links": menu_links, + "display_actor": display_actor, + "show_logout": request is not None + and "ds_actor" in request.cookies + and request.actor, + "app_css_hash": self.app_css_hash(), + "zip": zip, + "body_scripts": body_scripts, + "format_bytes": format_bytes, + "show_messages": lambda: self._show_messages(request), + "extra_css_urls": await self._asset_urls( + "extra_css_urls", template, context, request, view_name + ), + "extra_js_urls": await self._asset_urls( + "extra_js_urls", template, context, request, view_name + ), + "base_url": self.setting("base_url"), + "csrftoken": request.scope["csrftoken"] if request else lambda: "", + "datasette_version": __version__, + }, + **extra_template_vars, + } + if request and request.args.get("_context") and self.setting("template_debug"): + return "
{}
".format( + escape(json.dumps(template_context, default=repr, indent=4)) + ) + + return await template.render_async(template_context) + + def set_actor_cookie( + self, response: Response, actor: dict, expire_after: int | None = None + ): + data = {"a": actor} + if expire_after: + expires_at = int(time.time()) + (24 * 60 * 60) + data["e"] = baseconv.base62.encode(expires_at) + response.set_cookie("ds_actor", self.sign(data, "actor")) + + def delete_actor_cookie(self, response: Response): + response.set_cookie("ds_actor", "", expires=0, max_age=0) + + async def _asset_urls(self, key, template, context, request, view_name): + # Flatten list-of-lists from plugins: + seen_urls = set() + collected = [] + for hook in getattr(pm.hook, key)( + template=template.name, + database=context.get("database"), + table=context.get("table"), + columns=context.get("columns"), + view_name=view_name, + request=request, + datasette=self, + ): + hook = await await_me_maybe(hook) + collected.extend(hook) + collected.extend((self.config or {}).get(key) or []) + output = [] + for url_or_dict in collected: + if isinstance(url_or_dict, dict): + url = url_or_dict["url"] + sri = url_or_dict.get("sri") + module = bool(url_or_dict.get("module")) + else: + url = url_or_dict + sri = None + module = False + if url in seen_urls: + continue + seen_urls.add(url) + if url.startswith("/"): + # Take base_url into account: + url = self.urls.path(url) + script = {"url": url} + if sri: + script["sri"] = sri + if module: + script["module"] = True + output.append(script) + return output + + def _config(self): + return redact_keys( + self.config, ("secret", "key", "password", "token", "hash", "dsn") + ) + + def _routes(self): + routes = [] + + for routes_to_add in pm.hook.register_routes(datasette=self): + for regex, view_fn in routes_to_add: + routes.append((regex, wrap_view(view_fn, self))) + + def add_route(view, regex): + routes.append((regex, view)) + + add_route(IndexView.as_view(self), r"/(\.(?Pjsono?))?$") + add_route(IndexView.as_view(self), r"/-/(\.(?Pjsono?))?$") + add_route(permanent_redirect("/-/"), r"/-$") + # TODO: /favicon.ico and /-/static/ deserve far-future cache expires + add_route(favicon, "/favicon.ico") + + add_route( + asgi_static(app_root / "datasette" / "static"), r"/-/static/(?P.*)$" + ) + for path, dirname in self.static_mounts: + add_route(asgi_static(dirname), r"/" + path + "/(?P.*)$") + + # Mount any plugin static/ directories + for plugin in get_plugins(): + if plugin["static_path"]: + add_route( + asgi_static(plugin["static_path"]), + f"/-/static-plugins/{plugin['name']}/(?P.*)$", + ) + # Support underscores in name in addition to hyphens, see https://github.com/simonw/datasette/issues/611 + add_route( + asgi_static(plugin["static_path"]), + "/-/static-plugins/{}/(?P.*)$".format( + plugin["name"].replace("-", "_") + ), + ) + add_route( + permanent_redirect( + "/_memory", forward_query_string=True, forward_rest=True + ), + r"/:memory:(?P.*)$", + ) + add_route( + JsonDataView.as_view(self, "versions.json", self._versions), + r"/-/versions(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view( + self, "plugins.json", self._plugins, needs_request=True + ), + r"/-/plugins(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view(self, "settings.json", lambda: self._settings), + r"/-/settings(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view(self, "config.json", lambda: self._config()), + r"/-/config(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view(self, "threads.json", self._threads), + r"/-/threads(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view(self, "databases.json", self._connected_databases), + r"/-/databases(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view( + self, "actor.json", self._actor, needs_request=True, permission=None + ), + r"/-/actor(\.(?Pjson))?$", + ) + add_route( + JsonDataView.as_view( + self, + "actions.json", + self._actions, + template="debug_actions.html", + permission="permissions-debug", + ), + r"/-/actions(\.(?Pjson))?$", + ) + add_route( + AuthTokenView.as_view(self), + r"/-/auth-token$", + ) + add_route( + CreateTokenView.as_view(self), + r"/-/create-token$", + ) + add_route( + ApiExplorerView.as_view(self), + r"/-/api$", + ) + add_route( + TablesView.as_view(self), + r"/-/tables(\.(?Pjson))?$", + ) + add_route( + InstanceSchemaView.as_view(self), + r"/-/schema(\.(?Pjson|md))?$", + ) + add_route( + LogoutView.as_view(self), + r"/-/logout$", + ) + add_route( + PermissionsDebugView.as_view(self), + r"/-/permissions$", + ) + add_route( + AllowedResourcesView.as_view(self), + r"/-/allowed(\.(?Pjson))?$", + ) + add_route( + PermissionRulesView.as_view(self), + r"/-/rules(\.(?Pjson))?$", + ) + add_route( + PermissionCheckView.as_view(self), + r"/-/check(\.(?Pjson))?$", + ) + add_route( + MessagesDebugView.as_view(self), + r"/-/messages$", + ) + add_route( + AllowDebugView.as_view(self), + r"/-/allow-debug$", + ) + add_route( + wrap_view(PatternPortfolioView, self), + r"/-/patterns$", + ) + add_route( + wrap_view(database_download, self), + r"/(?P[^\/\.]+)\.db$", + ) + add_route( + wrap_view(DatabaseView, self), + r"/(?P[^\/\.]+)(\.(?P\w+))?$", + ) + add_route(TableCreateView.as_view(self), r"/(?P[^\/\.]+)/-/create$") + add_route( + DatabaseSchemaView.as_view(self), + r"/(?P[^\/\.]+)/-/schema(\.(?Pjson|md))?$", + ) + add_route( + wrap_view(QueryView, self), + r"/(?P[^\/\.]+)/-/query(\.(?P\w+))?$", + ) + add_route( + wrap_view(table_view, self), + r"/(?P[^\/\.]+)/(?P[^\/\.]+)(\.(?P\w+))?$", + ) + add_route( + RowView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)(\.(?P\w+))?$", + ) + add_route( + TableInsertView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/insert$", + ) + add_route( + TableUpsertView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/upsert$", + ) + add_route( + TableDropView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/drop$", + ) + add_route( + TableSchemaView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/schema(\.(?Pjson|md))?$", + ) + add_route( + RowDeleteView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/delete$", + ) + add_route( + RowUpdateView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/update$", + ) + return [ + # Compile any strings to regular expressions + ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) + for pattern, view in routes + ] + + async def resolve_database(self, request): + database_route = tilde_decode(request.url_vars["database"]) + try: + return self.get_database(route=database_route) + except KeyError: + raise DatabaseNotFound(database_route) + + async def resolve_table(self, request): + db = await self.resolve_database(request) + table_name = tilde_decode(request.url_vars["table"]) + # Table must exist + is_view = False + table_exists = await db.table_exists(table_name) + if not table_exists: + is_view = await db.view_exists(table_name) + if not (table_exists or is_view): + raise TableNotFound(db.name, table_name) + return ResolvedTable(db, table_name, is_view) + + async def resolve_row(self, request): + db, table_name, _ = await self.resolve_table(request) + pk_values = urlsafe_components(request.url_vars["pks"]) + sql, params, pks = await row_sql_params_pks(db, table_name, pk_values) + results = await db.execute(sql, params, truncate=True) + row = results.first() + if row is None: + raise RowNotFound(db.name, table_name, pk_values) + return ResolvedRow(db, table_name, sql, params, pks, pk_values, results.first()) def app(self): - app = Sanic(__name__) - self.jinja = SanicJinja2( - app, - loader=FileSystemLoader([ - str(app_root / 'datasette' / 'templates') - ]) + """Returns an ASGI app function that serves the whole of Datasette""" + routes = self._routes() + + async def setup_db(): + # First time server starts up, calculate table counts for immutable databases + for database in self.databases.values(): + if not database.is_mutable: + await database.table_counts(limit=60 * 60 * 1000) + + async def custom_csrf_error(scope, send, message_id): + await asgi_send( + send, + content=await self.render_template( + "csrf_error.html", + {"message_id": message_id, "message_name": Errors(message_id).name}, + ), + status=403, + content_type="text/html; charset=utf-8", + ) + + asgi = asgi_csrf.asgi_csrf( + DatasetteRouter(self, routes), + signing_secret=self._secret, + cookie_name="ds_csrftoken", + skip_if_scope=lambda scope: any( + pm.hook.skip_csrf(datasette=self, scope=scope) + ), + send_csrf_failed=custom_csrf_error, ) - self.jinja.add_env('escape_css_string', escape_css_string, 'filters') - self.jinja.add_env('quote_plus', lambda u: urllib.parse.quote_plus(u), 'filters') - self.jinja.add_env('escape_table_name', escape_sqlite_table_name, 'filters') - app.add_route(IndexView.as_view(self), '/') - # TODO: /favicon.ico and /-/static/ deserve far-future cache expires - app.add_route(favicon, '/favicon.ico') - app.static('/-/static/', str(app_root / 'datasette' / 'static')) - app.add_route( - DatabaseView.as_view(self), - '/' + if self.setting("trace_debug"): + asgi = AsgiTracer(asgi) + asgi = AsgiLifespan(asgi) + asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) + for wrapper in pm.hook.asgi_wrapper(datasette=self): + asgi = wrapper(asgi) + return asgi + + +class DatasetteRouter: + def __init__(self, datasette, routes): + self.ds = datasette + self.routes = routes or [] + + async def __call__(self, scope, receive, send): + # Because we care about "foo/bar" v.s. "foo%2Fbar" we decode raw_path ourselves + path = scope["path"] + raw_path = scope.get("raw_path") + if raw_path: + path = raw_path.decode("ascii") + path = path.partition("?")[0] + return await self.route_path(scope, receive, send, path) + + async def route_path(self, scope, receive, send, path): + # Strip off base_url if present before routing + base_url = self.ds.setting("base_url") + if base_url != "/" and path.startswith(base_url): + path = "/" + path[len(base_url) :] + scope = dict(scope, route_path=path) + request = Request(scope, receive) + # Populate request_messages if ds_messages cookie is present + try: + request._messages = self.ds.unsign( + request.cookies.get("ds_messages", ""), "messages" + ) + except BadSignature: + pass + + scope_modifications = {} + # Apply force_https_urls, if set + if ( + self.ds.setting("force_https_urls") + and scope["type"] == "http" + and scope.get("scheme") != "https" + ): + scope_modifications["scheme"] = "https" + # Handle authentication + default_actor = scope.get("actor") or None + actor = None + for actor in pm.hook.actor_from_request(datasette=self.ds, request=request): + actor = await await_me_maybe(actor) + if actor: + break + scope_modifications["actor"] = actor or default_actor + scope = dict(scope, **scope_modifications) + + match, view = resolve_routes(self.routes, path) + + if match is None: + return await self.handle_404(request, send) + + new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) + request.scope = new_scope + try: + response = await view(request, send) + if response: + self.ds._write_messages_to_response(request, response) + await response.asgi_send(send) + return + except NotFound as exception: + return await self.handle_404(request, send, exception) + except Forbidden as exception: + # Try the forbidden() plugin hook + for custom_response in pm.hook.forbidden( + datasette=self.ds, request=request, message=exception.args[0] + ): + custom_response = await await_me_maybe(custom_response) + assert ( + custom_response + ), "Default forbidden() hook should have been called" + return await custom_response.asgi_send(send) + except Exception as exception: + return await self.handle_exception(request, send, exception) + + async def handle_404(self, request, send, exception=None): + # If path contains % encoding, redirect to tilde encoding + if "%" in request.path: + # Try the same path but with "%" replaced by "~" + # and "~" replaced with "~7E" + # and "." replaced with "~2E" + new_path = ( + request.path.replace("~", "~7E").replace("%", "~").replace(".", "~2E") + ) + if request.query_string: + new_path += "?{}".format(request.query_string) + await asgi_send_redirect(send, new_path) + return + # If URL has a trailing slash, redirect to URL without it + path = request.scope.get( + "raw_path", request.scope["path"].encode("utf8") + ).partition(b"?")[0] + context = {} + if path.endswith(b"/"): + path = path.rstrip(b"/") + if request.scope["query_string"]: + path += b"?" + request.scope["query_string"] + await asgi_send_redirect(send, path.decode("latin1")) + else: + # Is there a pages/* template matching this path? + route_path = request.scope.get("route_path", request.scope["path"]) + # Jinja requires template names to use "/" even on Windows + template_name = "pages" + route_path + ".html" + # Build a list of pages/blah/{name}.html matching expressions + environment = self.ds.get_jinja_environment(request) + pattern_templates = [ + filepath + for filepath in environment.list_templates() + if "{" in filepath and filepath.startswith("pages/") + ] + page_routes = [ + (route_pattern_from_filepath(filepath[len("pages/") :]), filepath) + for filepath in pattern_templates + ] + try: + template = environment.select_template([template_name]) + except TemplateNotFound: + template = None + if template is None: + # Try for a pages/blah/{name}.html template match + for regex, wildcard_template in page_routes: + match = regex.match(route_path) + if match is not None: + context.update(match.groupdict()) + template = wildcard_template + break + + if template: + headers = {} + status = [200] + + def custom_header(name, value): + headers[name] = value + return "" + + def custom_status(code): + status[0] = code + return "" + + def custom_redirect(location, code=302): + status[0] = code + headers["Location"] = location + return "" + + def raise_404(message=""): + raise NotFoundExplicit(message) + + context.update( + { + "custom_header": custom_header, + "custom_status": custom_status, + "custom_redirect": custom_redirect, + "raise_404": raise_404, + } + ) + try: + body = await self.ds.render_template( + template, + context, + request=request, + view_name="page", + ) + except NotFoundExplicit as e: + await self.handle_exception(request, send, e) + return + # Pull content-type out into separate parameter + content_type = "text/html; charset=utf-8" + matches = [k for k in headers if k.lower() == "content-type"] + if matches: + content_type = headers[matches[0]] + await asgi_send( + send, + body, + status=status[0], + headers=headers, + content_type=content_type, + ) + else: + await self.handle_exception(request, send, exception or NotFound("404")) + + async def handle_exception(self, request, send, exception): + responses = [] + for hook in pm.hook.handle_exception( + datasette=self.ds, + request=request, + exception=exception, + ): + response = await await_me_maybe(hook) + if response is not None: + responses.append(response) + + assert responses, "Default exception handler should have returned something" + # Even if there are multiple responses use just the first one + response = responses[0] + await response.asgi_send(send) + + +_cleaner_task_str_re = re.compile(r"\S*site-packages/") + + +def _cleaner_task_str(task): + s = str(task) + # This has something like the following in it: + # running at /Users/simonw/Dropbox/Development/datasette/venv-3.7.5/lib/python3.7/site-packages/uvicorn/main.py:361> + # Clean up everything up to and including site-packages + return _cleaner_task_str_re.sub("", s) + + +def wrap_view(view_fn_or_class, datasette): + is_function = isinstance(view_fn_or_class, types.FunctionType) + if is_function: + return wrap_view_function(view_fn_or_class, datasette) + else: + if not isinstance(view_fn_or_class, type): + raise ValueError("view_fn_or_class must be a function or a class") + return wrap_view_class(view_fn_or_class, datasette) + + +def wrap_view_class(view_class, datasette): + async def async_view_for_class(request, send): + instance = view_class() + if inspect.iscoroutinefunction(instance.__call__): + return await async_call_with_supported_arguments( + instance.__call__, + scope=request.scope, + receive=request.receive, + send=send, + request=request, + datasette=datasette, + ) + else: + return call_with_supported_arguments( + instance.__call__, + scope=request.scope, + receive=request.receive, + send=send, + request=request, + datasette=datasette, + ) + + async_view_for_class.view_class = view_class + return async_view_for_class + + +def wrap_view_function(view_fn, datasette): + @functools.wraps(view_fn) + async def async_view_fn(request, send): + if inspect.iscoroutinefunction(view_fn): + response = await async_call_with_supported_arguments( + view_fn, + scope=request.scope, + receive=request.receive, + send=send, + request=request, + datasette=datasette, + ) + else: + response = call_with_supported_arguments( + view_fn, + scope=request.scope, + receive=request.receive, + send=send, + request=request, + datasette=datasette, + ) + if response is not None: + return response + + return async_view_fn + + +def permanent_redirect(path, forward_query_string=False, forward_rest=False): + return wrap_view( + lambda request, send: Response.redirect( + path + + (request.url_vars["rest"] if forward_rest else "") + + ( + ("?" + request.query_string) + if forward_query_string and request.query_string + else "" + ), + status=301, + ), + datasette=None, + ) + + +_curly_re = re.compile(r"({.*?})") + + +def route_pattern_from_filepath(filepath): + # Drop the ".html" suffix + if filepath.endswith(".html"): + filepath = filepath[: -len(".html")] + re_bits = ["/"] + for bit in _curly_re.split(filepath): + if _curly_re.match(bit): + re_bits.append(f"(?P<{bit[1:-1]}>[^/]*)") + else: + re_bits.append(re.escape(bit)) + return re.compile("^" + "".join(re_bits) + "$") + + +class NotFoundExplicit(NotFound): + pass + + +class DatasetteClient: + """Internal HTTP client for making requests to a Datasette instance. + + Used for testing and for internal operations that need to make HTTP requests + to the Datasette app without going through an actual HTTP server. + """ + + def __init__(self, ds): + self.ds = ds + + @property + def app(self): + return self.ds.app() + + def actor_cookie(self, actor): + # Utility method, mainly for tests + return self.ds.sign({"a": actor}, "actor") + + def _fix(self, path, avoid_path_rewrites=False): + if not isinstance(path, PrefixedUrlString) and not avoid_path_rewrites: + path = self.ds.urls.path(path) + if path.startswith("/"): + path = f"http://localhost{path}" + return path + + async def _request(self, method, path, skip_permission_checks=False, **kwargs): + from datasette.permissions import SkipPermissions + + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) + + async def get(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "get", path, skip_permission_checks=skip_permission_checks, **kwargs ) - app.add_route( - DatabaseDownload.as_view(self), - '/' + + async def options(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "options", path, skip_permission_checks=skip_permission_checks, **kwargs ) - app.add_route( - TableView.as_view(self), - '//' + + async def head(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "head", path, skip_permission_checks=skip_permission_checks, **kwargs ) - app.add_route( - RowView.as_view(self), - '///' + + async def post(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "post", path, skip_permission_checks=skip_permission_checks, **kwargs ) - return app + + async def put(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "put", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def patch(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "patch", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def delete(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "delete", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def request(self, method, path, skip_permission_checks=False, **kwargs): + """Make an HTTP request with the specified method. + + Args: + method: HTTP method (e.g., "GET", "POST", "PUT") + path: The path to request + skip_permission_checks: If True, bypass all permission checks for this request + **kwargs: Additional arguments to pass to httpx + + Returns: + httpx.Response: The response from the request + """ + from datasette.permissions import SkipPermissions + + avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) diff --git a/datasette/blob_renderer.py b/datasette/blob_renderer.py new file mode 100644 index 00000000..4d8c6bea --- /dev/null +++ b/datasette/blob_renderer.py @@ -0,0 +1,61 @@ +from datasette import hookimpl +from datasette.utils.asgi import Response, BadRequest +from datasette.utils import to_css_class +import hashlib + +_BLOB_COLUMN = "_blob_column" +_BLOB_HASH = "_blob_hash" + + +async def render_blob(datasette, database, rows, columns, request, table, view_name): + if _BLOB_COLUMN not in request.args: + raise BadRequest(f"?{_BLOB_COLUMN}= is required") + blob_column = request.args[_BLOB_COLUMN] + if blob_column not in columns: + raise BadRequest(f"{blob_column} is not a valid column") + + # If ?_blob_hash= provided, use that to select the row - otherwise use first row + blob_hash = None + if _BLOB_HASH in request.args: + blob_hash = request.args[_BLOB_HASH] + for row in rows: + value = row[blob_column] + if hashlib.sha256(value).hexdigest() == blob_hash: + break + else: + # Loop did not break + raise BadRequest( + "Link has expired - the requested binary content has changed or could not be found." + ) + else: + row = rows[0] + + value = row[blob_column] + filename_bits = [] + if table: + filename_bits.append(to_css_class(table)) + if "pks" in request.url_vars: + filename_bits.append(request.url_vars["pks"]) + filename_bits.append(to_css_class(blob_column)) + if blob_hash: + filename_bits.append(blob_hash[:6]) + filename = "-".join(filename_bits) + ".blob" + headers = { + "X-Content-Type-Options": "nosniff", + "Content-Disposition": f'attachment; filename="{filename}"', + } + return Response( + body=value or b"", + status=200, + headers=headers, + content_type="application/binary", + ) + + +@hookimpl +def register_output_renderer(): + return { + "extension": "blob", + "render": render_blob, + "can_render": lambda: False, + } diff --git a/datasette/cli.py b/datasette/cli.py index a089a373..21420491 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -1,126 +1,893 @@ +import asyncio +import uvicorn import click +from click import formatting +from click.types import CompositeParamType from click_default_group import DefaultGroup +import functools import json +import os +import pathlib +from runpy import run_module import shutil from subprocess import call import sys -from .app import Datasette -from .utils import ( - temporary_docker_directory, +import textwrap +import webbrowser +from .app import ( + Datasette, + DEFAULT_SETTINGS, + SETTINGS, + SQLITE_LIMIT_ATTACHED, + pm, ) +from .utils import ( + LoadExtension, + StartupError, + check_connection, + deep_dict_update, + find_spatialite, + parse_metadata, + ConnectionProblem, + SpatialiteConnectionProblem, + initial_path_for_datasette, + pairs_to_nested_config, + temporary_docker_directory, + value_as_boolean, + SpatialiteNotFound, + StaticMount, + ValueAsBooleanError, +) +from .utils.sqlite import sqlite3 +from .utils.testing import TestClient +from .version import __version__ -@click.group(cls=DefaultGroup, default='serve', default_if_no_args=True) +def run_sync(coro_func): + """Run an async callable to completion on a fresh event loop.""" + loop = asyncio.new_event_loop() + try: + asyncio.set_event_loop(loop) + return loop.run_until_complete(coro_func()) + finally: + asyncio.set_event_loop(None) + loop.close() + + +# Use Rich for tracebacks if it is installed +try: + from rich.traceback import install + + install(show_locals=True) +except ImportError: + pass + + +class Setting(CompositeParamType): + name = "setting" + arity = 2 + + def convert(self, config, param, ctx): + name, value = config + if name in DEFAULT_SETTINGS: + # For backwards compatibility with how this worked prior to + # Datasette 1.0, we turn bare setting names into setting.name + # Type checking for those older settings + default = DEFAULT_SETTINGS[name] + name = "settings.{}".format(name) + if isinstance(default, bool): + try: + return name, "true" if value_as_boolean(value) else "false" + except ValueAsBooleanError: + self.fail(f'"{name}" should be on/off/true/false/1/0', param, ctx) + elif isinstance(default, int): + if not value.isdigit(): + self.fail(f'"{name}" should be an integer', param, ctx) + return name, value + elif isinstance(default, str): + return name, value + else: + # Should never happen: + self.fail("Invalid option") + return name, value + + +def sqlite_extensions(fn): + fn = click.option( + "sqlite_extensions", + "--load-extension", + type=LoadExtension(), + envvar="DATASETTE_LOAD_EXTENSION", + multiple=True, + help="Path to a SQLite extension to load, and optional entrypoint", + )(fn) + + # Wrap it in a custom error handler + @functools.wraps(fn) + def wrapped(*args, **kwargs): + try: + return fn(*args, **kwargs) + except AttributeError as e: + if "enable_load_extension" in str(e): + raise click.ClickException( + textwrap.dedent( + """ + Your Python installation does not have the ability to load SQLite extensions. + + More information: https://datasette.io/help/extensions + """ + ).strip() + ) + raise + + return wrapped + + +@click.group(cls=DefaultGroup, default="serve", default_if_no_args=True) +@click.version_option(version=__version__) def cli(): """ - Datasette! + Datasette is an open source multi-tool for exploring and publishing data + + \b + About Datasette: https://datasette.io/ + Full documentation: https://docs.datasette.io/ """ @cli.command() -@click.argument('files', type=click.Path(exists=True), nargs=-1) -@click.option('--inspect-file', default='inspect-data.json') -def build(files, inspect_file): - app = Datasette(files) - open(inspect_file, 'w').write(json.dumps(app.inspect(), indent=2)) +@click.argument("files", type=click.Path(exists=True), nargs=-1) +@click.option("--inspect-file", default="-") +@sqlite_extensions +def inspect(files, inspect_file, sqlite_extensions): + """ + Generate JSON summary of provided database files + + This can then be passed to "datasette --inspect-file" to speed up count + operations against immutable database files. + """ + inspect_data = run_sync(lambda: inspect_(files, sqlite_extensions)) + if inspect_file == "-": + sys.stdout.write(json.dumps(inspect_data, indent=2)) + else: + with open(inspect_file, "w") as fp: + fp.write(json.dumps(inspect_data, indent=2)) + + +async def inspect_(files, sqlite_extensions): + app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions) + data = {} + for name, database in app.databases.items(): + counts = await database.table_counts(limit=3600 * 1000) + data[name] = { + "hash": database.hash, + "size": database.size, + "file": database.path, + "tables": { + table_name: {"count": table_count} + for table_name, table_count in counts.items() + }, + } + return data + + +@cli.group() +def publish(): + """Publish specified SQLite database files to the internet along with a Datasette-powered interface and API""" + pass + + +# Register publish plugins +pm.hook.publish_subcommand(publish=publish) @cli.command() -@click.argument('publisher', type=click.Choice(['now'])) -@click.argument('files', type=click.Path(exists=True), nargs=-1) +@click.option("--all", help="Include built-in default plugins", is_flag=True) @click.option( - '-n', '--name', default='datasette', - help='Application name to use when deploying to Now' + "--requirements", help="Output requirements.txt of installed plugins", is_flag=True ) @click.option( - '-m', '--metadata', type=click.File(mode='r'), - help='Path to JSON file containing metadata to publish' + "--plugins-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to directory containing custom plugins", ) -def publish(publisher, files, name, metadata): - """ - Publish specified SQLite database files to the internet along with a datasette API. - - Only current option for PUBLISHER is 'now'. You must have Zeit Now installed: - https://zeit.co/now - - Example usage: datasette publish now my-database.db - """ - if not shutil.which('now'): - click.secho( - ' The publish command requires "now" to be installed and configured ', - bg='red', - fg='white', - bold=True, - err=True, - ) - click.echo('Follow the instructions at https://zeit.co/now#whats-now', err=True) - sys.exit(1) - - with temporary_docker_directory(files, name, metadata): - call('now') +def plugins(all, requirements, plugins_dir): + """List currently installed plugins""" + app = Datasette([], plugins_dir=plugins_dir) + if requirements: + for plugin in app._plugins(): + if plugin["version"]: + click.echo("{}=={}".format(plugin["name"], plugin["version"])) + else: + click.echo(json.dumps(app._plugins(all=all), indent=4)) @cli.command() -@click.argument('files', type=click.Path(exists=True), nargs=-1, required=True) +@click.argument("files", type=click.Path(exists=True), nargs=-1, required=True) @click.option( - '-t', '--tag', - help='Name for the resulting Docker container, can optionally use name:tag format' + "-t", + "--tag", + help="Name for the resulting Docker container, can optionally use name:tag format", ) @click.option( - '-m', '--metadata', type=click.File(mode='r'), - help='Path to JSON file containing metadata to publish' + "-m", + "--metadata", + type=click.File(mode="r"), + help="Path to JSON/YAML file containing metadata to publish", ) -def package(files, tag, metadata): - "Package specified SQLite files into a new datasette Docker container" - if not shutil.which('docker'): +@click.option("--extra-options", help="Extra options to pass to datasette serve") +@click.option("--branch", help="Install datasette from a GitHub branch e.g. main") +@click.option( + "--template-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to directory containing custom templates", +) +@click.option( + "--plugins-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to directory containing custom plugins", +) +@click.option( + "--static", + type=StaticMount(), + help="Serve static files from this directory at /MOUNT/...", + multiple=True, +) +@click.option( + "--install", help="Additional packages (e.g. plugins) to install", multiple=True +) +@click.option("--spatialite", is_flag=True, help="Enable SpatialLite extension") +@click.option("--version-note", help="Additional note to show on /-/versions") +@click.option( + "--secret", + help="Secret used for signing secure values, such as signed cookies", + envvar="DATASETTE_PUBLISH_SECRET", + default=lambda: os.urandom(32).hex(), +) +@click.option( + "-p", + "--port", + default=8001, + type=click.IntRange(1, 65535), + help="Port to run the server on, defaults to 8001", +) +@click.option("--title", help="Title for metadata") +@click.option("--license", help="License label for metadata") +@click.option("--license_url", help="License URL for metadata") +@click.option("--source", help="Source label for metadata") +@click.option("--source_url", help="Source URL for metadata") +@click.option("--about", help="About label for metadata") +@click.option("--about_url", help="About URL for metadata") +def package( + files, + tag, + metadata, + extra_options, + branch, + template_dir, + plugins_dir, + static, + install, + spatialite, + version_note, + secret, + port, + **extra_metadata, +): + """Package SQLite files into a Datasette Docker container""" + if not shutil.which("docker"): click.secho( ' The package command requires "docker" to be installed and configured ', - bg='red', - fg='white', + bg="red", + fg="white", bold=True, err=True, ) sys.exit(1) - with temporary_docker_directory(files, 'datasette', metadata): - args = ['docker', 'build'] + with temporary_docker_directory( + files, + "datasette", + metadata=metadata, + extra_options=extra_options, + branch=branch, + template_dir=template_dir, + plugins_dir=plugins_dir, + static=static, + install=install, + spatialite=spatialite, + version_note=version_note, + secret=secret, + extra_metadata=extra_metadata, + port=port, + ): + args = ["docker", "build"] if tag: - args.append('-t') + args.append("-t") args.append(tag) - args.append('.') + args.append(".") call(args) @cli.command() -@click.argument('files', type=click.Path(exists=True), nargs=-1) -@click.option('-h', '--host', default='0.0.0.0', help='host for server, defaults to 0.0.0.0') -@click.option('-p', '--port', default=8001, help='port for server, defaults to 8001') -@click.option('--debug', is_flag=True, help='Enable debug mode - useful for development') -@click.option('--reload', is_flag=True, help='Automatically reload if code change detected - useful for development') -@click.option('--cors', is_flag=True, help='Enable CORS by serving Access-Control-Allow-Origin: *') -@click.option('--inspect-file', help='Path to JSON file created using "datasette build"') -@click.option('-m', '--metadata', type=click.File(mode='r'), help='Path to JSON file containing license/source metadata') -def serve(files, host, port, debug, reload, cors, inspect_file, metadata): +@click.argument("packages", nargs=-1) +@click.option( + "-U", "--upgrade", is_flag=True, help="Upgrade packages to latest version" +) +@click.option( + "-r", + "--requirement", + type=click.Path(exists=True), + help="Install from requirements file", +) +@click.option( + "-e", + "--editable", + help="Install a project in editable mode from this path", +) +def install(packages, upgrade, requirement, editable): + """Install plugins and packages from PyPI into the same environment as Datasette""" + if not packages and not requirement and not editable: + raise click.UsageError("Please specify at least one package to install") + args = ["pip", "install"] + if upgrade: + args += ["--upgrade"] + if editable: + args += ["--editable", editable] + if requirement: + args += ["-r", requirement] + args += list(packages) + sys.argv = args + run_module("pip", run_name="__main__") + + +@cli.command() +@click.argument("packages", nargs=-1, required=True) +@click.option("-y", "--yes", is_flag=True, help="Don't ask for confirmation") +def uninstall(packages, yes): + """Uninstall plugins and Python packages from the Datasette environment""" + sys.argv = ["pip", "uninstall"] + list(packages) + (["-y"] if yes else []) + run_module("pip", run_name="__main__") + + +@cli.command() +@click.argument("files", type=click.Path(), nargs=-1) +@click.option( + "-i", + "--immutable", + type=click.Path(exists=True), + help="Database files to open in immutable mode", + multiple=True, +) +@click.option( + "-h", + "--host", + default="127.0.0.1", + help=( + "Host for server. Defaults to 127.0.0.1 which means only connections " + "from the local machine will be allowed. Use 0.0.0.0 to listen to " + "all IPs and allow access from other machines." + ), +) +@click.option( + "-p", + "--port", + default=8001, + type=click.IntRange(0, 65535), + help="Port for server, defaults to 8001. Use -p 0 to automatically assign an available port.", +) +@click.option( + "--uds", + help="Bind to a Unix domain socket", +) +@click.option( + "--reload", + is_flag=True, + help="Automatically reload if code or metadata change detected - useful for development", +) +@click.option( + "--cors", is_flag=True, help="Enable CORS by serving Access-Control-Allow-Origin: *" +) +@sqlite_extensions +@click.option( + "--inspect-file", help='Path to JSON file created using "datasette inspect"' +) +@click.option( + "-m", + "--metadata", + type=click.File(mode="r"), + help="Path to JSON/YAML file containing license/source metadata", +) +@click.option( + "--template-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to directory containing custom templates", +) +@click.option( + "--plugins-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to directory containing custom plugins", +) +@click.option( + "--static", + type=StaticMount(), + help="Serve static files from this directory at /MOUNT/...", + multiple=True, +) +@click.option("--memory", is_flag=True, help="Make /_memory database available") +@click.option( + "-c", + "--config", + type=click.File(mode="r"), + help="Path to JSON/YAML Datasette configuration file", +) +@click.option( + "-s", + "--setting", + "settings", + type=Setting(), + help="nested.key, value setting to use in Datasette configuration", + multiple=True, +) +@click.option( + "--secret", + help="Secret used for signing secure values, such as signed cookies", + envvar="DATASETTE_SECRET", +) +@click.option( + "--root", + help="Output URL that sets a cookie authenticating the root user", + is_flag=True, +) +@click.option( + "--default-deny", + help="Deny all permissions by default", + is_flag=True, +) +@click.option( + "--get", + help="Run an HTTP GET request against this path, print results and exit", +) +@click.option( + "--headers", + is_flag=True, + help="Include HTTP headers in --get output", +) +@click.option( + "--token", + help="API token to send with --get requests", +) +@click.option( + "--actor", + help="Actor to use for --get requests (JSON string)", +) +@click.option("--version-note", help="Additional note to show on /-/versions") +@click.option("--help-settings", is_flag=True, help="Show available settings") +@click.option("--pdb", is_flag=True, help="Launch debugger on any errors") +@click.option( + "-o", + "--open", + "open_browser", + is_flag=True, + help="Open Datasette in your web browser", +) +@click.option( + "--create", + is_flag=True, + help="Create database files if they do not exist", +) +@click.option( + "--crossdb", + is_flag=True, + help="Enable cross-database joins using the /_memory database", +) +@click.option( + "--nolock", + is_flag=True, + help="Ignore locking, open locked files in read-only mode", +) +@click.option( + "--ssl-keyfile", + help="SSL key file", + envvar="DATASETTE_SSL_KEYFILE", +) +@click.option( + "--ssl-certfile", + help="SSL certificate file", + envvar="DATASETTE_SSL_CERTFILE", +) +@click.option( + "--internal", + type=click.Path(), + help="Path to a persistent Datasette internal SQLite database", +) +def serve( + files, + immutable, + host, + port, + uds, + reload, + cors, + sqlite_extensions, + inspect_file, + metadata, + template_dir, + plugins_dir, + static, + memory, + config, + settings, + secret, + root, + default_deny, + get, + headers, + token, + actor, + version_note, + help_settings, + pdb, + open_browser, + create, + crossdb, + nolock, + ssl_keyfile, + ssl_certfile, + internal, + return_instance=False, +): """Serve up specified SQLite database files with a web UI""" + if help_settings: + formatter = formatting.HelpFormatter() + with formatter.section("Settings"): + formatter.write_dl( + [ + (option.name, f"{option.help} (default={option.default})") + for option in SETTINGS + ] + ) + click.echo(formatter.getvalue()) + sys.exit(0) if reload: import hupper - hupper.start_reloader('datasette.cli.serve') + + reloader = hupper.start_reloader("datasette.cli.serve") + if immutable: + reloader.watch_files(immutable) + if config: + reloader.watch_files([config.name]) + if metadata: + reloader.watch_files([metadata.name]) inspect_data = None if inspect_file: - inspect_data = json.load(open(inspect_file)) + with open(inspect_file) as fp: + inspect_data = json.load(fp) metadata_data = None if metadata: - metadata_data = json.loads(metadata.read()) + metadata_data = parse_metadata(metadata.read()) - click.echo('Serve! files={} on port {}'.format(files, port)) - ds = Datasette( - files, - cache_headers=not debug and not reload, + config_data = None + if config: + config_data = parse_metadata(config.read()) + + config_data = config_data or {} + + # Merge in settings from -s/--setting + if settings: + settings_updates = pairs_to_nested_config(settings) + # Merge recursively, to avoid over-writing nested values + # https://github.com/simonw/datasette/issues/2389 + deep_dict_update(config_data, settings_updates) + + kwargs = dict( + immutables=immutable, + cache_headers=not reload, cors=cors, inspect_data=inspect_data, + config=config_data, metadata=metadata_data, + sqlite_extensions=sqlite_extensions, + template_dir=template_dir, + plugins_dir=plugins_dir, + static_mounts=static, + settings=None, # These are passed in config= now + memory=memory, + secret=secret, + version_note=version_note, + pdb=pdb, + crossdb=crossdb, + nolock=nolock, + internal=internal, + default_deny=default_deny, ) - # Force initial hashing/table counting - ds.inspect() - ds.app().run(host=host, port=port, debug=debug) + + # Separate directories from files + directories = [f for f in files if os.path.isdir(f)] + file_paths = [f for f in files if not os.path.isdir(f)] + + # Handle config_dir - only one directory allowed + if len(directories) > 1: + raise click.ClickException( + "Cannot pass multiple directories. Pass a single directory as config_dir." + ) + elif len(directories) == 1: + kwargs["config_dir"] = pathlib.Path(directories[0]) + + # Verify list of files, create if needed (and --create) + for file in file_paths: + if not pathlib.Path(file).exists(): + if create: + sqlite3.connect(file).execute("vacuum") + else: + raise click.ClickException( + "Invalid value for '[FILES]...': Path '{}' does not exist.".format( + file + ) + ) + + # Check for duplicate files by resolving all paths to their absolute forms + # Collect all database files that will be loaded (explicit files + config_dir files) + all_db_files = [] + + # Add explicit files + for file in file_paths: + all_db_files.append((file, pathlib.Path(file).resolve())) + + # Add config_dir databases if config_dir is set + if "config_dir" in kwargs: + config_dir = kwargs["config_dir"] + for ext in ("db", "sqlite", "sqlite3"): + for db_file in config_dir.glob(f"*.{ext}"): + all_db_files.append((str(db_file), db_file.resolve())) + + # Check for duplicates + seen = {} + for original_path, resolved_path in all_db_files: + if resolved_path in seen: + raise click.ClickException( + f"Duplicate database file: '{original_path}' and '{seen[resolved_path]}' " + f"both refer to {resolved_path}" + ) + seen[resolved_path] = original_path + + files = file_paths + + try: + ds = Datasette(files, **kwargs) + except SpatialiteNotFound: + raise click.ClickException("Could not find SpatiaLite extension") + except StartupError as e: + raise click.ClickException(e.args[0]) + + if return_instance: + # Private utility mechanism for writing unit tests + return ds + + # Run the "startup" plugin hooks + run_sync(ds.invoke_startup) + + # Run async soundness checks - but only if we're not under pytest + run_sync(lambda: check_databases(ds)) + + if headers and not get: + raise click.ClickException("--headers can only be used with --get") + + if token and not get: + raise click.ClickException("--token can only be used with --get") + + if get: + client = TestClient(ds) + request_headers = {} + if token: + request_headers["Authorization"] = "Bearer {}".format(token) + cookies = {} + if actor: + cookies["ds_actor"] = client.actor_cookie(json.loads(actor)) + response = client.get(get, headers=request_headers, cookies=cookies) + + if headers: + # Output HTTP status code, headers, two newlines, then the response body + click.echo(f"HTTP/1.1 {response.status}") + for key, value in response.headers.items(): + click.echo(f"{key}: {value}") + if response.text: + click.echo() + click.echo(response.text) + else: + click.echo(response.text) + + exit_code = 0 if response.status == 200 else 1 + sys.exit(exit_code) + return + + # Start the server + url = None + if root: + ds.root_enabled = True + url = "http://{}:{}{}?token={}".format( + host, port, ds.urls.path("-/auth-token"), ds._root_token + ) + click.echo(url) + if open_browser: + if url is None: + # Figure out most convenient URL - to table, database or homepage + path = run_sync(lambda: initial_path_for_datasette(ds)) + url = f"http://{host}:{port}{path}" + webbrowser.open(url) + uvicorn_kwargs = dict( + host=host, port=port, log_level="info", lifespan="on", workers=1 + ) + if uds: + uvicorn_kwargs["uds"] = uds + if ssl_keyfile: + uvicorn_kwargs["ssl_keyfile"] = ssl_keyfile + if ssl_certfile: + uvicorn_kwargs["ssl_certfile"] = ssl_certfile + uvicorn.run(ds.app(), **uvicorn_kwargs) + + +@cli.command() +@click.argument("id") +@click.option( + "--secret", + help="Secret used for signing the API tokens", + envvar="DATASETTE_SECRET", + required=True, +) +@click.option( + "-e", + "--expires-after", + help="Token should expire after this many seconds", + type=int, +) +@click.option( + "alls", + "-a", + "--all", + type=str, + metavar="ACTION", + multiple=True, + help="Restrict token to this action", +) +@click.option( + "databases", + "-d", + "--database", + type=(str, str), + metavar="DB ACTION", + multiple=True, + help="Restrict token to this action on this database", +) +@click.option( + "resources", + "-r", + "--resource", + type=(str, str, str), + metavar="DB RESOURCE ACTION", + multiple=True, + help="Restrict token to this action on this database resource (a table, SQL view or named query)", +) +@click.option( + "--debug", + help="Show decoded token", + is_flag=True, +) +@click.option( + "--plugins-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to directory containing custom plugins", +) +def create_token( + id, secret, expires_after, alls, databases, resources, debug, plugins_dir +): + """ + Create a signed API token for the specified actor ID + + Example: + + datasette create-token root --secret mysecret + + To allow only "view-database-download" for all databases: + + \b + datasette create-token root --secret mysecret \\ + --all view-database-download + + To allow "create-table" against a specific database: + + \b + datasette create-token root --secret mysecret \\ + --database mydb create-table + + To allow "insert-row" against a specific table: + + \b + datasette create-token root --secret myscret \\ + --resource mydb mytable insert-row + + Restricted actions can be specified multiple times using + multiple --all, --database, and --resource options. + + Add --debug to see a decoded version of the token. + """ + ds = Datasette(secret=secret, plugins_dir=plugins_dir) + + # Run ds.invoke_startup() in an event loop + run_sync(ds.invoke_startup) + + # Warn about any unknown actions + actions = [] + actions.extend(alls) + actions.extend([p[1] for p in databases]) + actions.extend([p[2] for p in resources]) + for action in actions: + if not ds.actions.get(action): + click.secho( + f" Unknown permission: {action} ", + fg="red", + err=True, + ) + + restrict_database = {} + for database, action in databases: + restrict_database.setdefault(database, []).append(action) + restrict_resource = {} + for database, resource, action in resources: + restrict_resource.setdefault(database, {}).setdefault(resource, []).append( + action + ) + + token = ds.create_token( + id, + expires_after=expires_after, + restrict_all=alls, + restrict_database=restrict_database, + restrict_resource=restrict_resource, + ) + click.echo(token) + if debug: + encoded = token[len("dstok_") :] + click.echo("\nDecoded:\n") + click.echo(json.dumps(ds.unsign(encoded, namespace="token"), indent=2)) + + +pm.hook.register_commands(cli=cli) + + +async def check_databases(ds): + # Run check_connection against every connected database + # to confirm they are all usable + for database in list(ds.databases.values()): + try: + await database.execute_fn(check_connection) + except SpatialiteConnectionProblem: + suggestion = "" + try: + find_spatialite() + suggestion = "\n\nTry adding the --load-extension=spatialite option." + except SpatialiteNotFound: + pass + raise click.UsageError( + "It looks like you're trying to load a SpatiaLite" + + " database without first loading the SpatiaLite module." + + suggestion + + "\n\nRead more: https://docs.datasette.io/en/stable/spatialite.html" + ) + except ConnectionProblem as e: + raise click.UsageError( + f"Connection to {database.path} failed check: {str(e.args[0])}" + ) + # If --crossdb and more than SQLITE_LIMIT_ATTACHED show warning + if ( + ds.crossdb + and len([db for db in ds.databases.values() if not db.is_memory]) + > SQLITE_LIMIT_ATTACHED + ): + msg = ( + "Warning: --crossdb only works with the first {} attached databases".format( + SQLITE_LIMIT_ATTACHED + ) + ) + click.echo(click.style(msg, bold=True, fg="yellow"), err=True) diff --git a/datasette/database.py b/datasette/database.py new file mode 100644 index 00000000..e5858128 --- /dev/null +++ b/datasette/database.py @@ -0,0 +1,737 @@ +import asyncio +from collections import namedtuple +from pathlib import Path +import janus +import queue +import sqlite_utils +import sys +import threading +import uuid + +from .tracer import trace +from .utils import ( + detect_fts, + detect_primary_keys, + detect_spatialite, + get_all_foreign_keys, + get_outbound_foreign_keys, + md5_not_usedforsecurity, + sqlite_timelimit, + sqlite3, + table_columns, + table_column_details, +) +from .utils.sqlite import sqlite_version +from .inspect import inspect_hash + +connections = threading.local() + +AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file")) + + +class Database: + # For table counts stop at this many rows: + count_limit = 10000 + _thread_local_id_counter = 1 + + def __init__( + self, + ds, + path=None, + is_mutable=True, + is_memory=False, + memory_name=None, + mode=None, + ): + self.name = None + self._thread_local_id = f"x{self._thread_local_id_counter}" + Database._thread_local_id_counter += 1 + self.route = None + self.ds = ds + self.path = path + self.is_mutable = is_mutable + self.is_memory = is_memory + self.memory_name = memory_name + if memory_name is not None: + self.is_memory = True + self.cached_hash = None + self.cached_size = None + self._cached_table_counts = None + self._write_thread = None + self._write_queue = None + # These are used when in non-threaded mode: + self._read_connection = None + self._write_connection = None + # This is used to track all file connections so they can be closed + self._all_file_connections = [] + self.mode = mode + + @property + def cached_table_counts(self): + if self._cached_table_counts is not None: + return self._cached_table_counts + # Maybe use self.ds.inspect_data to populate cached_table_counts + if self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self._cached_table_counts = { + key: value["count"] + for key, value in self.ds.inspect_data[self.name]["tables"].items() + } + return self._cached_table_counts + + @property + def color(self): + if self.hash: + return self.hash[:6] + return md5_not_usedforsecurity(self.name)[:6] + + def suggest_name(self): + if self.path: + return Path(self.path).stem + elif self.memory_name: + return self.memory_name + else: + return "db" + + def connect(self, write=False): + extra_kwargs = {} + if write: + extra_kwargs["isolation_level"] = "IMMEDIATE" + if self.memory_name: + uri = "file:{}?mode=memory&cache=shared".format(self.memory_name) + conn = sqlite3.connect( + uri, uri=True, check_same_thread=False, **extra_kwargs + ) + if not write: + conn.execute("PRAGMA query_only=1") + return conn + if self.is_memory: + return sqlite3.connect(":memory:", uri=True) + + # mode=ro or immutable=1? + if self.is_mutable: + qs = "?mode=ro" + if self.ds.nolock: + qs += "&nolock=1" + else: + qs = "?immutable=1" + assert not (write and not self.is_mutable) + if write: + qs = "" + if self.mode is not None: + qs = f"?mode={self.mode}" + conn = sqlite3.connect( + f"file:{self.path}{qs}", uri=True, check_same_thread=False, **extra_kwargs + ) + self._all_file_connections.append(conn) + return conn + + def close(self): + # Close all connections - useful to avoid running out of file handles in tests + for connection in self._all_file_connections: + connection.close() + + async def execute_write(self, sql, params=None, block=True): + def _inner(conn): + return conn.execute(sql, params or []) + + with trace("sql", database=self.name, sql=sql.strip(), params=params): + results = await self.execute_write_fn(_inner, block=block) + return results + + async def execute_write_script(self, sql, block=True): + def _inner(conn): + return conn.executescript(sql) + + with trace("sql", database=self.name, sql=sql.strip(), executescript=True): + results = await self.execute_write_fn( + _inner, block=block, transaction=False + ) + return results + + async def execute_write_many(self, sql, params_seq, block=True): + def _inner(conn): + count = 0 + + def count_params(params): + nonlocal count + for param in params: + count += 1 + yield param + + return conn.executemany(sql, count_params(params_seq)), count + + with trace( + "sql", database=self.name, sql=sql.strip(), executemany=True + ) as kwargs: + results, count = await self.execute_write_fn(_inner, block=block) + kwargs["count"] = count + return results + + async def execute_isolated_fn(self, fn): + # Open a new connection just for the duration of this function + # blocking the write queue to avoid any writes occurring during it + if self.ds.executor is None: + # non-threaded mode + isolated_connection = self.connect(write=True) + try: + result = fn(isolated_connection) + finally: + isolated_connection.close() + try: + self._all_file_connections.remove(isolated_connection) + except ValueError: + # Was probably a memory connection + pass + return result + else: + # Threaded mode - send to write thread + return await self._send_to_write_thread(fn, isolated_connection=True) + + async def execute_write_fn(self, fn, block=True, transaction=True): + if self.ds.executor is None: + # non-threaded mode + if self._write_connection is None: + self._write_connection = self.connect(write=True) + self.ds._prepare_connection(self._write_connection, self.name) + if transaction: + with self._write_connection: + return fn(self._write_connection) + else: + return fn(self._write_connection) + else: + return await self._send_to_write_thread( + fn, block=block, transaction=transaction + ) + + async def _send_to_write_thread( + self, fn, block=True, isolated_connection=False, transaction=True + ): + if self._write_queue is None: + self._write_queue = queue.Queue() + if self._write_thread is None: + self._write_thread = threading.Thread( + target=self._execute_writes, daemon=True + ) + self._write_thread.name = "_execute_writes for database {}".format( + self.name + ) + self._write_thread.start() + task_id = uuid.uuid5(uuid.NAMESPACE_DNS, "datasette.io") + reply_queue = janus.Queue() + self._write_queue.put( + WriteTask(fn, task_id, reply_queue, isolated_connection, transaction) + ) + if block: + result = await reply_queue.async_q.get() + if isinstance(result, Exception): + raise result + else: + return result + else: + return task_id + + def _execute_writes(self): + # Infinite looping thread that protects the single write connection + # to this database + conn_exception = None + conn = None + try: + conn = self.connect(write=True) + self.ds._prepare_connection(conn, self.name) + except Exception as e: + conn_exception = e + while True: + task = self._write_queue.get() + if conn_exception is not None: + result = conn_exception + else: + if task.isolated_connection: + isolated_connection = self.connect(write=True) + try: + result = task.fn(isolated_connection) + except Exception as e: + sys.stderr.write("{}\n".format(e)) + sys.stderr.flush() + result = e + finally: + isolated_connection.close() + try: + self._all_file_connections.remove(isolated_connection) + except ValueError: + # Was probably a memory connection + pass + else: + try: + if task.transaction: + with conn: + result = task.fn(conn) + else: + result = task.fn(conn) + except Exception as e: + sys.stderr.write("{}\n".format(e)) + sys.stderr.flush() + result = e + task.reply_queue.sync_q.put(result) + + async def execute_fn(self, fn): + if self.ds.executor is None: + # non-threaded mode + if self._read_connection is None: + self._read_connection = self.connect() + self.ds._prepare_connection(self._read_connection, self.name) + return fn(self._read_connection) + + # threaded mode + def in_thread(): + conn = getattr(connections, self._thread_local_id, None) + if not conn: + conn = self.connect() + self.ds._prepare_connection(conn, self.name) + setattr(connections, self._thread_local_id, conn) + return fn(conn) + + return await asyncio.get_event_loop().run_in_executor( + self.ds.executor, in_thread + ) + + async def execute( + self, + sql, + params=None, + truncate=False, + custom_time_limit=None, + page_size=None, + log_sql_errors=True, + ): + """Executes sql against db_name in a thread""" + page_size = page_size or self.ds.page_size + + def sql_operation_in_thread(conn): + time_limit_ms = self.ds.sql_time_limit_ms + if custom_time_limit and custom_time_limit < time_limit_ms: + time_limit_ms = custom_time_limit + + with sqlite_timelimit(conn, time_limit_ms): + try: + cursor = conn.cursor() + cursor.execute(sql, params if params is not None else {}) + max_returned_rows = self.ds.max_returned_rows + if max_returned_rows == page_size: + max_returned_rows += 1 + if max_returned_rows and truncate: + rows = cursor.fetchmany(max_returned_rows + 1) + truncated = len(rows) > max_returned_rows + rows = rows[:max_returned_rows] + else: + rows = cursor.fetchall() + truncated = False + except (sqlite3.OperationalError, sqlite3.DatabaseError) as e: + if e.args == ("interrupted",): + raise QueryInterrupted(e, sql, params) + if log_sql_errors: + sys.stderr.write( + "ERROR: conn={}, sql = {}, params = {}: {}\n".format( + conn, repr(sql), params, e + ) + ) + sys.stderr.flush() + raise + + if truncate: + return Results(rows, truncated, cursor.description) + + else: + return Results(rows, False, cursor.description) + + with trace("sql", database=self.name, sql=sql.strip(), params=params): + results = await self.execute_fn(sql_operation_in_thread) + return results + + @property + def hash(self): + if self.cached_hash is not None: + return self.cached_hash + elif self.is_mutable or self.is_memory: + return None + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_hash = self.ds.inspect_data[self.name]["hash"] + return self.cached_hash + else: + p = Path(self.path) + self.cached_hash = inspect_hash(p) + return self.cached_hash + + @property + def size(self): + if self.cached_size is not None: + return self.cached_size + elif self.is_memory: + return 0 + elif self.is_mutable: + return Path(self.path).stat().st_size + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_size = self.ds.inspect_data[self.name]["size"] + return self.cached_size + else: + self.cached_size = Path(self.path).stat().st_size + return self.cached_size + + async def table_counts(self, limit=10): + if not self.is_mutable and self.cached_table_counts is not None: + return self.cached_table_counts + # Try to get counts for each table, $limit timeout for each count + counts = {} + for table in await self.table_names(): + try: + table_count = ( + await self.execute( + f"select count(*) from (select * from [{table}] limit {self.count_limit + 1})", + custom_time_limit=limit, + ) + ).rows[0][0] + counts[table] = table_count + # In some cases I saw "SQL Logic Error" here in addition to + # QueryInterrupted - so we catch that too: + except (QueryInterrupted, sqlite3.OperationalError, sqlite3.DatabaseError): + counts[table] = None + if not self.is_mutable: + self._cached_table_counts = counts + return counts + + @property + def mtime_ns(self): + if self.is_memory: + return None + return Path(self.path).stat().st_mtime_ns + + async def attached_databases(self): + # This used to be: + # select seq, name, file from pragma_database_list() where seq > 0 + # But SQLite prior to 3.16.0 doesn't support pragma functions + results = await self.execute("PRAGMA database_list;") + # {'seq': 0, 'name': 'main', 'file': ''} + return [ + AttachedDatabase(*row) + for row in results.rows + # Filter out the SQLite internal "temp" database, refs #2557 + if row["seq"] > 0 and row["name"] != "temp" + ] + + async def table_exists(self, table): + results = await self.execute( + "select 1 from sqlite_master where type='table' and name=?", params=(table,) + ) + return bool(results.rows) + + async def view_exists(self, table): + results = await self.execute( + "select 1 from sqlite_master where type='view' and name=?", params=(table,) + ) + return bool(results.rows) + + async def table_names(self): + results = await self.execute( + "select name from sqlite_master where type='table'" + ) + return [r[0] for r in results.rows] + + async def table_columns(self, table): + return await self.execute_fn(lambda conn: table_columns(conn, table)) + + async def table_column_details(self, table): + return await self.execute_fn(lambda conn: table_column_details(conn, table)) + + async def primary_keys(self, table): + return await self.execute_fn(lambda conn: detect_primary_keys(conn, table)) + + async def fts_table(self, table): + return await self.execute_fn(lambda conn: detect_fts(conn, table)) + + async def label_column_for_table(self, table): + explicit_label_column = (await self.ds.table_config(self.name, table)).get( + "label_column" + ) + if explicit_label_column: + return explicit_label_column + + def column_details(conn): + # Returns {column_name: (type, is_unique)} + db = sqlite_utils.Database(conn) + columns = db[table].columns_dict + indexes = db[table].indexes + details = {} + for name in columns: + is_unique = any( + index + for index in indexes + if index.columns == [name] and index.unique + ) + details[name] = (columns[name], is_unique) + return details + + column_details = await self.execute_fn(column_details) + # Is there just one unique column that's text? + unique_text_columns = [ + name + for name, (type_, is_unique) in column_details.items() + if is_unique and type_ is str + ] + if len(unique_text_columns) == 1: + return unique_text_columns[0] + + column_names = list(column_details.keys()) + # Is there a name or title column? + name_or_title = [c for c in column_names if c.lower() in ("name", "title")] + if name_or_title: + return name_or_title[0] + # If a table has two columns, one of which is ID, then label_column is the other one + if ( + column_names + and len(column_names) == 2 + and ("id" in column_names or "pk" in column_names) + and not set(column_names) == {"id", "pk"} + ): + return [c for c in column_names if c not in ("id", "pk")][0] + # Couldn't find a label: + return None + + async def foreign_keys_for_table(self, table): + return await self.execute_fn( + lambda conn: get_outbound_foreign_keys(conn, table) + ) + + async def hidden_table_names(self): + hidden_tables = [] + # Add any tables marked as hidden in config + db_config = self.ds.config.get("databases", {}).get(self.name, {}) + if "tables" in db_config: + hidden_tables += [ + t for t in db_config["tables"] if db_config["tables"][t].get("hidden") + ] + + if sqlite_version()[1] >= 37: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + with shadow_tables as ( + select name + from pragma_table_list + where [type] = 'shadow' + order by name + ), + core_tables as ( + select name + from sqlite_master + WHERE name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + combined as ( + select name from shadow_tables + union all + select name from core_tables + ) + select name from combined order by 1 + """ + ) + ] + else: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + WITH base AS ( + SELECT name + FROM sqlite_master + WHERE name IN ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + fts_suffixes AS ( + SELECT column1 AS suffix + FROM (VALUES ('_data'), ('_idx'), ('_docsize'), ('_content'), ('_config')) + ), + fts5_names AS ( + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%USING FTS%' + ), + fts5_shadow_tables AS ( + SELECT + printf('%s%s', fts5_names.name, fts_suffixes.suffix) AS name + FROM fts5_names + JOIN fts_suffixes + ), + fts3_suffixes AS ( + SELECT column1 AS suffix + FROM (VALUES ('_content'), ('_segdir'), ('_segments'), ('_stat'), ('_docsize')) + ), + fts3_names AS ( + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%USING FTS3%' + OR sql LIKE '%VIRTUAL TABLE%USING FTS4%' + ), + fts3_shadow_tables AS ( + SELECT + printf('%s%s', fts3_names.name, fts3_suffixes.suffix) AS name + FROM fts3_names + JOIN fts3_suffixes + ), + final AS ( + SELECT name FROM base + UNION ALL + SELECT name FROM fts5_shadow_tables + UNION ALL + SELECT name FROM fts3_shadow_tables + ) + SELECT name FROM final ORDER BY 1 + """ + ) + ] + # Also hide any FTS tables that have a content= argument + hidden_tables += [ + x[0] + for x in await self.execute( + """ + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%' + AND sql LIKE '%USING FTS%' + AND sql LIKE '%content=%' + """ + ) + ] + + has_spatialite = await self.execute_fn(detect_spatialite) + if has_spatialite: + # Also hide Spatialite internal tables + hidden_tables += [ + "ElementaryGeometries", + "SpatialIndex", + "geometry_columns", + "spatial_ref_sys", + "spatialite_history", + "sql_statements_log", + "sqlite_sequence", + "views_geometry_columns", + "virts_geometry_columns", + "data_licenses", + "KNN", + "KNN2", + ] + [ + r[0] + for r in ( + await self.execute( + """ + select name from sqlite_master + where name like "idx_%" + and type = "table" + """ + ) + ).rows + ] + + return hidden_tables + + async def view_names(self): + results = await self.execute("select name from sqlite_master where type='view'") + return [r[0] for r in results.rows] + + async def get_all_foreign_keys(self): + return await self.execute_fn(get_all_foreign_keys) + + async def get_table_definition(self, table, type_="table"): + table_definition_rows = list( + await self.execute( + "select sql from sqlite_master where name = :n and type=:t", + {"n": table, "t": type_}, + ) + ) + if not table_definition_rows: + return None + bits = [table_definition_rows[0][0] + ";"] + # Add on any indexes + index_rows = list( + await self.execute( + "select sql from sqlite_master where tbl_name = :n and type='index' and sql is not null", + {"n": table}, + ) + ) + for index_row in index_rows: + bits.append(index_row[0] + ";") + return "\n".join(bits) + + async def get_view_definition(self, view): + return await self.get_table_definition(view, "view") + + def __repr__(self): + tags = [] + if self.is_mutable: + tags.append("mutable") + if self.is_memory: + tags.append("memory") + if self.hash: + tags.append(f"hash={self.hash}") + if self.size is not None: + tags.append(f"size={self.size}") + tags_str = "" + if tags: + tags_str = f" ({', '.join(tags)})" + return f"" + + +class WriteTask: + __slots__ = ("fn", "task_id", "reply_queue", "isolated_connection", "transaction") + + def __init__(self, fn, task_id, reply_queue, isolated_connection, transaction): + self.fn = fn + self.task_id = task_id + self.reply_queue = reply_queue + self.isolated_connection = isolated_connection + self.transaction = transaction + + +class QueryInterrupted(Exception): + def __init__(self, e, sql, params): + self.e = e + self.sql = sql + self.params = params + + def __str__(self): + return "QueryInterrupted: {}".format(self.e) + + +class MultipleValues(Exception): + pass + + +class Results: + def __init__(self, rows, truncated, description): + self.rows = rows + self.truncated = truncated + self.description = description + + @property + def columns(self): + return [d[0] for d in self.description] + + def first(self): + if self.rows: + return self.rows[0] + else: + return None + + def single_value(self): + if self.rows and 1 == len(self.rows) and 1 == len(self.rows[0]): + return self.rows[0][0] + else: + raise MultipleValues + + def dicts(self): + return [dict(row) for row in self.rows] + + def __iter__(self): + return iter(self.rows) + + def __len__(self): + return len(self.rows) diff --git a/datasette/default_actions.py b/datasette/default_actions.py new file mode 100644 index 00000000..87d98fac --- /dev/null +++ b/datasette/default_actions.py @@ -0,0 +1,101 @@ +from datasette import hookimpl +from datasette.permissions import Action +from datasette.resources import ( + DatabaseResource, + TableResource, + QueryResource, +) + + +@hookimpl +def register_actions(): + """Register the core Datasette actions.""" + return ( + # Global actions (no resource_class) + Action( + name="view-instance", + abbr="vi", + description="View Datasette instance", + ), + Action( + name="permissions-debug", + abbr="pd", + description="Access permission debug tool", + ), + Action( + name="debug-menu", + abbr="dm", + description="View debug menu items", + ), + # Database-level actions (parent-level) + Action( + name="view-database", + abbr="vd", + description="View database", + resource_class=DatabaseResource, + ), + Action( + name="view-database-download", + abbr="vdd", + description="Download database file", + resource_class=DatabaseResource, + also_requires="view-database", + ), + Action( + name="execute-sql", + abbr="es", + description="Execute read-only SQL queries", + resource_class=DatabaseResource, + also_requires="view-database", + ), + Action( + name="create-table", + abbr="ct", + description="Create tables", + resource_class=DatabaseResource, + ), + # Table-level actions (child-level) + Action( + name="view-table", + abbr="vt", + description="View table", + resource_class=TableResource, + ), + Action( + name="insert-row", + abbr="ir", + description="Insert rows", + resource_class=TableResource, + ), + Action( + name="delete-row", + abbr="dr", + description="Delete rows", + resource_class=TableResource, + ), + Action( + name="update-row", + abbr="ur", + description="Update rows", + resource_class=TableResource, + ), + Action( + name="alter-table", + abbr="at", + description="Alter tables", + resource_class=TableResource, + ), + Action( + name="drop-table", + abbr="dt", + description="Drop tables", + resource_class=TableResource, + ), + # Query-level actions (child-level) + Action( + name="view-query", + abbr="vq", + description="View named query results", + resource_class=QueryResource, + ), + ) diff --git a/datasette/default_magic_parameters.py b/datasette/default_magic_parameters.py new file mode 100644 index 00000000..91c1c5aa --- /dev/null +++ b/datasette/default_magic_parameters.py @@ -0,0 +1,57 @@ +from datasette import hookimpl +import datetime +import os +import time + + +def header(key, request): + key = key.replace("_", "-").encode("utf-8") + headers_dict = dict(request.scope["headers"]) + return headers_dict.get(key, b"").decode("utf-8") + + +def actor(key, request): + if request.actor is None: + raise KeyError + return request.actor[key] + + +def cookie(key, request): + return request.cookies[key] + + +def now(key, request): + if key == "epoch": + return int(time.time()) + elif key == "date_utc": + return datetime.datetime.now(datetime.timezone.utc).date().isoformat() + elif key == "datetime_utc": + return ( + datetime.datetime.now(datetime.timezone.utc).strftime(r"%Y-%m-%dT%H:%M:%S") + + "Z" + ) + else: + raise KeyError + + +def random(key, request): + if key.startswith("chars_") and key.split("chars_")[-1].isdigit(): + num_chars = int(key.split("chars_")[-1]) + if num_chars % 2 == 1: + urandom_len = (num_chars + 1) / 2 + else: + urandom_len = num_chars / 2 + return os.urandom(int(urandom_len)).hex()[:num_chars] + else: + raise KeyError + + +@hookimpl +def register_magic_parameters(): + return [ + ("header", header), + ("actor", actor), + ("cookie", cookie), + ("now", now), + ("random", random), + ] diff --git a/datasette/default_menu_links.py b/datasette/default_menu_links.py new file mode 100644 index 00000000..85032387 --- /dev/null +++ b/datasette/default_menu_links.py @@ -0,0 +1,41 @@ +from datasette import hookimpl + + +@hookimpl +def menu_links(datasette, actor): + async def inner(): + if not await datasette.allowed(action="debug-menu", actor=actor): + return [] + + return [ + {"href": datasette.urls.path("/-/databases"), "label": "Databases"}, + { + "href": datasette.urls.path("/-/plugins"), + "label": "Installed plugins", + }, + { + "href": datasette.urls.path("/-/versions"), + "label": "Version info", + }, + { + "href": datasette.urls.path("/-/settings"), + "label": "Settings", + }, + { + "href": datasette.urls.path("/-/permissions"), + "label": "Debug permissions", + }, + { + "href": datasette.urls.path("/-/messages"), + "label": "Debug messages", + }, + { + "href": datasette.urls.path("/-/allow-debug"), + "label": "Debug allow rules", + }, + {"href": datasette.urls.path("/-/threads"), "label": "Debug threads"}, + {"href": datasette.urls.path("/-/actor"), "label": "Debug actor"}, + {"href": datasette.urls.path("/-/patterns"), "label": "Pattern portfolio"}, + ] + + return inner diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py new file mode 100644 index 00000000..4c82d705 --- /dev/null +++ b/datasette/default_permissions/__init__.py @@ -0,0 +1,59 @@ +""" +Default permission implementations for Datasette. + +This module provides the built-in permission checking logic through implementations +of the permission_resources_sql hook. The hooks are organized by their purpose: + +1. Actor Restrictions - Enforces _r allowlists embedded in actor tokens +2. Root User - Grants full access when --root flag is used +3. Config Rules - Applies permissions from datasette.yaml +4. Default Settings - Enforces default_allow_sql and default view permissions + +IMPORTANT: These hooks return PermissionSQL objects that are combined using SQL +UNION/INTERSECT operations. The order of evaluation is: + - restriction_sql fields are INTERSECTed (all must match) + - Regular sql fields are UNIONed and evaluated with cascading priority +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl + +# Re-export all hooks and public utilities +from .restrictions import ( + actor_restrictions_sql, + restrictions_allow_action, + ActorRestrictions, +) +from .root import root_user_permissions_sql +from .config import config_permissions_sql +from .defaults import ( + default_allow_sql_check, + default_action_permissions_sql, + DEFAULT_ALLOW_ACTIONS, +) +from .tokens import actor_from_signed_api_token + + +@hookimpl +def skip_csrf(scope) -> Optional[bool]: + """Skip CSRF check for JSON content-type requests.""" + if scope["type"] == "http": + headers = scope.get("headers") or {} + if dict(headers).get(b"content-type") == b"application/json": + return True + return None + + +@hookimpl +def canned_queries(datasette: "Datasette", database: str, actor) -> dict: + """Return canned queries defined in datasette.yaml configuration.""" + queries = ( + ((datasette.config or {}).get("databases") or {}).get(database) or {} + ).get("queries") or {} + return queries diff --git a/datasette/default_permissions/config.py b/datasette/default_permissions/config.py new file mode 100644 index 00000000..aab87c1c --- /dev/null +++ b/datasette/default_permissions/config.py @@ -0,0 +1,442 @@ +""" +Config-based permission handling for Datasette. + +Applies permission rules from datasette.yaml configuration. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL +from datasette.utils import actor_matches_allow + +from .helpers import PermissionRowCollector, get_action_name_variants + + +class ConfigPermissionProcessor: + """ + Processes permission rules from datasette.yaml configuration. + + Configuration structure: + + permissions: # Root-level permissions block + view-instance: + id: admin + + databases: + mydb: + permissions: # Database-level permissions + view-database: + id: admin + allow: # Database-level allow block (for view-*) + id: viewer + allow_sql: # execute-sql allow block + id: analyst + tables: + users: + permissions: # Table-level permissions + view-table: + id: admin + allow: # Table-level allow block + id: viewer + queries: + my_query: + permissions: # Query-level permissions + view-query: + id: admin + allow: # Query-level allow block + id: viewer + """ + + def __init__( + self, + datasette: "Datasette", + actor: Optional[dict], + action: str, + ): + self.datasette = datasette + self.actor = actor + self.action = action + self.config = datasette.config or {} + self.collector = PermissionRowCollector(prefix="cfg") + + # Pre-compute action variants + self.action_checks = get_action_name_variants(datasette, action) + self.action_obj = datasette.actions.get(action) + + # Parse restrictions if present + self.has_restrictions = actor and "_r" in actor if actor else False + self.restrictions = actor.get("_r", {}) if actor else {} + + # Pre-compute restriction info for efficiency + self.restricted_databases: Set[str] = set() + self.restricted_tables: Set[Tuple[str, str]] = set() + + if self.has_restrictions: + self.restricted_databases = { + db_name + for db_name, db_actions in (self.restrictions.get("d") or {}).items() + if self.action_checks.intersection(db_actions) + } + self.restricted_tables = { + (db_name, table_name) + for db_name, tables in (self.restrictions.get("r") or {}).items() + for table_name, table_actions in tables.items() + if self.action_checks.intersection(table_actions) + } + # Tables implicitly reference their parent databases + self.restricted_databases.update(db for db, _ in self.restricted_tables) + + def evaluate_allow_block(self, allow_block: Any) -> Optional[bool]: + """Evaluate an allow block against the current actor.""" + if allow_block is None: + return None + return actor_matches_allow(self.actor, allow_block) + + def is_in_restriction_allowlist( + self, + parent: Optional[str], + child: Optional[str], + ) -> bool: + """Check if resource is allowed by actor restrictions.""" + if not self.has_restrictions: + return True # No restrictions, all resources allowed + + # Check global allowlist + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + + # Check database-level allowlist + if parent and self.action_checks.intersection( + self.restrictions.get("d", {}).get(parent, []) + ): + return True + + # Check table-level allowlist + if parent: + table_restrictions = (self.restrictions.get("r", {}) or {}).get(parent, {}) + if child: + table_actions = table_restrictions.get(child, []) + if self.action_checks.intersection(table_actions): + return True + else: + # Parent query should proceed if any child in this database is allowlisted + for table_actions in table_restrictions.values(): + if self.action_checks.intersection(table_actions): + return True + + # Parent/child both None: include if any restrictions exist for this action + if parent is None and child is None: + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + if self.restricted_databases: + return True + if self.restricted_tables: + return True + + return False + + def add_permissions_rule( + self, + parent: Optional[str], + child: Optional[str], + permissions_block: Optional[dict], + scope_desc: str, + ) -> None: + """Add a rule from a permissions:{action} block.""" + if permissions_block is None: + return + + action_allow_block = permissions_block.get(self.action) + result = self.evaluate_allow_block(action_allow_block) + + self.collector.add( + parent=parent, + child=child, + allow=result, + reason=f"config {'allow' if result else 'deny'} {scope_desc}", + if_not_none=True, + ) + + def add_allow_block_rule( + self, + parent: Optional[str], + child: Optional[str], + allow_block: Any, + scope_desc: str, + ) -> None: + """ + Add rules from an allow:{} block. + + For allow blocks, if the block exists but doesn't match the actor, + this is treated as a deny. We also handle the restriction-gate logic. + """ + if allow_block is None: + return + + # Skip if resource is not in restriction allowlist + if not self.is_in_restriction_allowlist(parent, child): + return + + result = self.evaluate_allow_block(allow_block) + bool_result = bool(result) + + self.collector.add( + parent, + child, + bool_result, + f"config {'allow' if result else 'deny'} {scope_desc}", + ) + + # Handle restriction-gate: add explicit denies for restricted resources + self._add_restriction_gate_denies(parent, child, bool_result, scope_desc) + + def _add_restriction_gate_denies( + self, + parent: Optional[str], + child: Optional[str], + is_allowed: bool, + scope_desc: str, + ) -> None: + """ + When a config rule denies at a higher level, add explicit denies + for restricted resources to prevent child-level allows from + incorrectly granting access. + """ + if is_allowed or child is not None or not self.has_restrictions: + return + + if not self.action_obj: + return + + reason = f"config deny {scope_desc} (restriction gate)" + + if parent is None: + # Root-level deny: add denies for all restricted resources + if self.action_obj.takes_parent: + for db_name in self.restricted_databases: + self.collector.add(db_name, None, False, reason) + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + self.collector.add(db_name, table_name, False, reason) + else: + # Database-level deny: add denies for tables in that database + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + if db_name == parent: + self.collector.add(db_name, table_name, False, reason) + + def process(self) -> Optional[PermissionSQL]: + """Process all config rules and return combined PermissionSQL.""" + self._process_root_permissions() + self._process_databases() + self._process_root_allow_blocks() + + return self.collector.to_permission_sql() + + def _process_root_permissions(self) -> None: + """Process root-level permissions block.""" + root_perms = self.config.get("permissions") or {} + self.add_permissions_rule( + None, + None, + root_perms, + f"permissions for {self.action}", + ) + + def _process_databases(self) -> None: + """Process database-level and nested configurations.""" + databases = self.config.get("databases") or {} + + for db_name, db_config in databases.items(): + self._process_database(db_name, db_config or {}) + + def _process_database(self, db_name: str, db_config: dict) -> None: + """Process a single database's configuration.""" + # Database-level permissions block + db_perms = db_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + None, + db_perms, + f"permissions for {self.action} on {db_name}", + ) + + # Process tables + for table_name, table_config in (db_config.get("tables") or {}).items(): + self._process_table(db_name, table_name, table_config or {}) + + # Process queries + for query_name, query_config in (db_config.get("queries") or {}).items(): + self._process_query(db_name, query_name, query_config) + + # Database-level allow blocks + self._process_database_allow_blocks(db_name, db_config) + + def _process_table( + self, + db_name: str, + table_name: str, + table_config: dict, + ) -> None: + """Process a single table's configuration.""" + # Table-level permissions block + table_perms = table_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + table_name, + table_perms, + f"permissions for {self.action} on {db_name}/{table_name}", + ) + + # Table-level allow block (for view-table) + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + table_name, + table_config.get("allow"), + f"allow for {self.action} on {db_name}/{table_name}", + ) + + def _process_query( + self, + db_name: str, + query_name: str, + query_config: Any, + ) -> None: + """Process a single query's configuration.""" + # Query config can be a string (just SQL) or dict + if not isinstance(query_config, dict): + return + + # Query-level permissions block + query_perms = query_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + query_name, + query_perms, + f"permissions for {self.action} on {db_name}/{query_name}", + ) + + # Query-level allow block (for view-query) + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + query_name, + query_config.get("allow"), + f"allow for {self.action} on {db_name}/{query_name}", + ) + + def _process_database_allow_blocks( + self, + db_name: str, + db_config: dict, + ) -> None: + """Process database-level allow/allow_sql blocks.""" + # view-database allow block + if self.action == "view-database": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # execute-sql allow_sql block + if self.action == "execute-sql": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow_sql"), + f"allow_sql for {db_name}", + ) + + # view-table uses database-level allow for inheritance + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # view-query uses database-level allow for inheritance + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + def _process_root_allow_blocks(self) -> None: + """Process root-level allow/allow_sql blocks.""" + root_allow = self.config.get("allow") + + if self.action == "view-instance": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-instance", + ) + + if self.action == "view-database": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-database", + ) + + if self.action == "view-table": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-table", + ) + + if self.action == "view-query": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-query", + ) + + if self.action == "execute-sql": + self.add_allow_block_rule( + None, + None, + self.config.get("allow_sql"), + "allow_sql", + ) + + +@hookimpl(specname="permission_resources_sql") +async def config_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Apply permission rules from datasette.yaml configuration. + + This processes: + - permissions: blocks at root, database, table, and query levels + - allow: blocks for view-* actions + - allow_sql: blocks for execute-sql action + """ + processor = ConfigPermissionProcessor(datasette, actor, action) + result = processor.process() + + if result is None: + return [] + + return [result] diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py new file mode 100644 index 00000000..f5a6a270 --- /dev/null +++ b/datasette/default_permissions/defaults.py @@ -0,0 +1,70 @@ +""" +Default permission settings for Datasette. + +Provides default allow rules for standard view/execute actions. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +# Actions that are allowed by default (unless --default-deny is used) +DEFAULT_ALLOW_ACTIONS = frozenset( + { + "view-instance", + "view-database", + "view-database-download", + "view-table", + "view-query", + "execute-sql", + } +) + + +@hookimpl(specname="permission_resources_sql") +async def default_allow_sql_check( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Enforce the default_allow_sql setting. + + When default_allow_sql is false (the default), execute-sql is denied + unless explicitly allowed by config or other rules. + """ + if action == "execute-sql": + if not datasette.setting("default_allow_sql"): + return PermissionSQL.deny(reason="default_allow_sql is false") + + return None + + +@hookimpl(specname="permission_resources_sql") +async def default_action_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Provide default allow rules for standard view/execute actions. + + These defaults are skipped when datasette is started with --default-deny. + The restriction_sql mechanism (from actor_restrictions_sql) will still + filter these results if the actor has restrictions. + """ + if datasette.default_deny: + return None + + if action in DEFAULT_ALLOW_ACTIONS: + reason = f"default allow for {action}".replace("'", "''") + return PermissionSQL.allow(reason=reason) + + return None diff --git a/datasette/default_permissions/helpers.py b/datasette/default_permissions/helpers.py new file mode 100644 index 00000000..47e03569 --- /dev/null +++ b/datasette/default_permissions/helpers.py @@ -0,0 +1,85 @@ +""" +Shared helper utilities for default permission implementations. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette.permissions import PermissionSQL + + +def get_action_name_variants(datasette: "Datasette", action: str) -> Set[str]: + """ + Get all name variants for an action (full name and abbreviation). + + Example: + get_action_name_variants(ds, "view-table") -> {"view-table", "vt"} + """ + variants = {action} + action_obj = datasette.actions.get(action) + if action_obj and action_obj.abbr: + variants.add(action_obj.abbr) + return variants + + +def action_in_list(datasette: "Datasette", action: str, action_list: list) -> bool: + """Check if an action (or its abbreviation) is in a list.""" + return bool(get_action_name_variants(datasette, action).intersection(action_list)) + + +@dataclass +class PermissionRow: + """A single permission rule row.""" + + parent: Optional[str] + child: Optional[str] + allow: bool + reason: str + + +class PermissionRowCollector: + """Collects permission rows and converts them to PermissionSQL.""" + + def __init__(self, prefix: str = "row"): + self.rows: List[PermissionRow] = [] + self.prefix = prefix + + def add( + self, + parent: Optional[str], + child: Optional[str], + allow: Optional[bool], + reason: str, + if_not_none: bool = False, + ) -> None: + """Add a permission row. If if_not_none=True, only add if allow is not None.""" + if if_not_none and allow is None: + return + self.rows.append(PermissionRow(parent, child, allow, reason)) + + def to_permission_sql(self) -> Optional[PermissionSQL]: + """Convert collected rows to a PermissionSQL object.""" + if not self.rows: + return None + + parts = [] + params = {} + + for idx, row in enumerate(self.rows): + key = f"{self.prefix}_{idx}" + parts.append( + f"SELECT :{key}_parent AS parent, :{key}_child AS child, " + f":{key}_allow AS allow, :{key}_reason AS reason" + ) + params[f"{key}_parent"] = row.parent + params[f"{key}_child"] = row.child + params[f"{key}_allow"] = 1 if row.allow else 0 + params[f"{key}_reason"] = row.reason + + sql = "\nUNION ALL\n".join(parts) + return PermissionSQL(sql=sql, params=params) diff --git a/datasette/default_permissions/restrictions.py b/datasette/default_permissions/restrictions.py new file mode 100644 index 00000000..a22cd7e5 --- /dev/null +++ b/datasette/default_permissions/restrictions.py @@ -0,0 +1,195 @@ +""" +Actor restriction handling for Datasette permissions. + +This module handles the _r (restrictions) key in actor dictionaries, which +contains allowlists of resources the actor can access. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + +from .helpers import action_in_list, get_action_name_variants + + +@dataclass +class ActorRestrictions: + """Parsed actor restrictions from the _r key.""" + + global_actions: List[str] # _r.a - globally allowed actions + database_actions: dict # _r.d - {db_name: [actions]} + table_actions: dict # _r.r - {db_name: {table: [actions]}} + + @classmethod + def from_actor(cls, actor: Optional[dict]) -> Optional["ActorRestrictions"]: + """Parse restrictions from actor dict. Returns None if no restrictions.""" + if not actor: + return None + assert isinstance(actor, dict), "actor must be a dictionary" + + restrictions = actor.get("_r") + if restrictions is None: + return None + + return cls( + global_actions=restrictions.get("a", []), + database_actions=restrictions.get("d", {}), + table_actions=restrictions.get("r", {}), + ) + + def is_action_globally_allowed(self, datasette: "Datasette", action: str) -> bool: + """Check if action is in the global allowlist.""" + return action_in_list(datasette, action, self.global_actions) + + def get_allowed_databases(self, datasette: "Datasette", action: str) -> Set[str]: + """Get database names where this action is allowed.""" + allowed = set() + for db_name, db_actions in self.database_actions.items(): + if action_in_list(datasette, action, db_actions): + allowed.add(db_name) + return allowed + + def get_allowed_tables( + self, datasette: "Datasette", action: str + ) -> Set[Tuple[str, str]]: + """Get (database, table) pairs where this action is allowed.""" + allowed = set() + for db_name, tables in self.table_actions.items(): + for table_name, table_actions in tables.items(): + if action_in_list(datasette, action, table_actions): + allowed.add((db_name, table_name)) + return allowed + + +@hookimpl(specname="permission_resources_sql") +async def actor_restrictions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Handle actor restriction-based permission rules. + + When an actor has an "_r" key, it contains an allowlist of resources they + can access. This function returns restriction_sql that filters the final + results to only include resources in that allowlist. + + The _r structure: + { + "a": ["vi", "pd"], # Global actions allowed + "d": {"mydb": ["vt", "es"]}, # Database-level actions + "r": {"mydb": {"users": ["vt"]}} # Table-level actions + } + """ + if not actor: + return None + + restrictions = ActorRestrictions.from_actor(actor) + + if restrictions is None: + # No restrictions - all resources allowed + return [] + + # If globally allowed, no filtering needed + if restrictions.is_action_globally_allowed(datasette, action): + return [] + + # Build restriction SQL + allowed_dbs = restrictions.get_allowed_databases(datasette, action) + allowed_tables = restrictions.get_allowed_tables(datasette, action) + + # If nothing is allowed for this action, return empty-set restriction + if not allowed_dbs and not allowed_tables: + return [ + PermissionSQL( + params={"deny": f"actor restrictions: {action} not in allowlist"}, + restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", + ) + ] + + # Build UNION of allowed resources + selects = [] + params = {} + counter = 0 + + # Database-level entries (parent, NULL) - allows all children + for db_name in allowed_dbs: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, NULL AS child") + params[f"{key}_parent"] = db_name + + # Table-level entries (parent, child) + for db_name, table_name in allowed_tables: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, :{key}_child AS child") + params[f"{key}_parent"] = db_name + params[f"{key}_child"] = table_name + + restriction_sql = "\nUNION ALL\n".join(selects) + + return [PermissionSQL(params=params, restriction_sql=restriction_sql)] + + +def restrictions_allow_action( + datasette: "Datasette", + restrictions: dict, + action: str, + resource: Optional[str | Tuple[str, str]], +) -> bool: + """ + Check if restrictions allow the requested action on the requested resource. + + This is a synchronous utility function for use by other code that needs + to quickly check restriction allowlists. + + Args: + datasette: The Datasette instance + restrictions: The _r dict from an actor + action: The action name to check + resource: None for global, str for database, (db, table) tuple for table + + Returns: + True if allowed, False if denied + """ + # Does this action have an abbreviation? + to_check = get_action_name_variants(datasette, action) + + # Check global level (any resource) + all_allowed = restrictions.get("a") + if all_allowed is not None: + assert isinstance(all_allowed, list) + if to_check.intersection(all_allowed): + return True + + # Check database level + if resource: + if isinstance(resource, str): + database_name = resource + else: + database_name = resource[0] + database_allowed = restrictions.get("d", {}).get(database_name) + if database_allowed is not None: + assert isinstance(database_allowed, list) + if to_check.intersection(database_allowed): + return True + + # Check table/resource level + if resource is not None and not isinstance(resource, str) and len(resource) == 2: + database, table = resource + table_allowed = restrictions.get("r", {}).get(database, {}).get(table) + if table_allowed is not None: + assert isinstance(table_allowed, list) + if to_check.intersection(table_allowed): + return True + + # This action is not explicitly allowed, so reject it + return False diff --git a/datasette/default_permissions/root.py b/datasette/default_permissions/root.py new file mode 100644 index 00000000..4931f7ff --- /dev/null +++ b/datasette/default_permissions/root.py @@ -0,0 +1,29 @@ +""" +Root user permission handling for Datasette. + +Grants full permissions to the root user when --root flag is used. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +@hookimpl(specname="permission_resources_sql") +async def root_user_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], +) -> Optional[PermissionSQL]: + """ + Grant root user full permissions when --root flag is used. + """ + if not datasette.root_enabled: + return None + if actor is not None and actor.get("id") == "root": + return PermissionSQL.allow(reason="root user") diff --git a/datasette/default_permissions/tokens.py b/datasette/default_permissions/tokens.py new file mode 100644 index 00000000..474b0c23 --- /dev/null +++ b/datasette/default_permissions/tokens.py @@ -0,0 +1,95 @@ +""" +Token authentication for Datasette. + +Handles signed API tokens (dstok_ prefix). +""" + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +import itsdangerous + +from datasette import hookimpl + + +@hookimpl(specname="actor_from_request") +def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dict]: + """ + Authenticate requests using signed API tokens (dstok_ prefix). + + Token structure (signed JSON): + { + "a": "actor_id", # Actor ID + "t": 1234567890, # Timestamp (Unix epoch) + "d": 3600, # Optional: Duration in seconds + "_r": {...} # Optional: Restrictions + } + """ + prefix = "dstok_" + + # Check if tokens are enabled + if not datasette.setting("allow_signed_tokens"): + return None + + max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") + + # Get authorization header + authorization = request.headers.get("authorization") + if not authorization: + return None + if not authorization.startswith("Bearer "): + return None + + token = authorization[len("Bearer ") :] + if not token.startswith(prefix): + return None + + # Remove prefix and verify signature + token = token[len(prefix) :] + try: + decoded = datasette.unsign(token, namespace="token") + except itsdangerous.BadSignature: + return None + + # Validate timestamp + if "t" not in decoded: + return None + created = decoded["t"] + if not isinstance(created, int): + return None + + # Handle duration/expiry + duration = decoded.get("d") + if duration is not None and not isinstance(duration, int): + return None + + # Apply max TTL if configured + if (duration is None and max_signed_tokens_ttl) or ( + duration is not None + and max_signed_tokens_ttl + and duration > max_signed_tokens_ttl + ): + duration = max_signed_tokens_ttl + + # Check expiry + if duration: + if time.time() - created > duration: + return None + + # Build actor dict + actor = {"id": decoded["a"], "token": "dstok"} + + # Copy restrictions if present + if "_r" in decoded: + actor["_r"] = decoded["_r"] + + # Add expiry timestamp if applicable + if duration: + actor["token_expires"] = created + duration + + return actor diff --git a/datasette/events.py b/datasette/events.py new file mode 100644 index 00000000..5cd5ba3d --- /dev/null +++ b/datasette/events.py @@ -0,0 +1,235 @@ +from abc import ABC, abstractproperty +from dataclasses import asdict, dataclass, field +from datasette.hookspecs import hookimpl +from datetime import datetime, timezone + + +@dataclass +class Event(ABC): + @abstractproperty + def name(self): + pass + + created: datetime = field( + init=False, default_factory=lambda: datetime.now(timezone.utc) + ) + actor: dict | None + + def properties(self): + properties = asdict(self) + properties.pop("actor", None) + properties.pop("created", None) + return properties + + +@dataclass +class LoginEvent(Event): + """ + Event name: ``login`` + + A user (represented by ``event.actor``) has logged in. + """ + + name = "login" + + +@dataclass +class LogoutEvent(Event): + """ + Event name: ``logout`` + + A user (represented by ``event.actor``) has logged out. + """ + + name = "logout" + + +@dataclass +class CreateTokenEvent(Event): + """ + Event name: ``create-token`` + + A user created an API token. + + :ivar expires_after: Number of seconds after which this token will expire. + :type expires_after: int or None + :ivar restrict_all: Restricted permissions for this token. + :type restrict_all: list + :ivar restrict_database: Restricted database permissions for this token. + :type restrict_database: dict + :ivar restrict_resource: Restricted resource permissions for this token. + :type restrict_resource: dict + """ + + name = "create-token" + expires_after: int | None + restrict_all: list + restrict_database: dict + restrict_resource: dict + + +@dataclass +class CreateTableEvent(Event): + """ + Event name: ``create-table`` + + A new table has been created in the database. + + :ivar database: The name of the database where the table was created. + :type database: str + :ivar table: The name of the table that was created + :type table: str + :ivar schema: The SQL schema definition for the new table. + :type schema: str + """ + + name = "create-table" + database: str + table: str + schema: str + + +@dataclass +class DropTableEvent(Event): + """ + Event name: ``drop-table`` + + A table has been dropped from the database. + + :ivar database: The name of the database where the table was dropped. + :type database: str + :ivar table: The name of the table that was dropped + :type table: str + """ + + name = "drop-table" + database: str + table: str + + +@dataclass +class AlterTableEvent(Event): + """ + Event name: ``alter-table`` + + A table has been altered. + + :ivar database: The name of the database where the table was altered + :type database: str + :ivar table: The name of the table that was altered + :type table: str + :ivar before_schema: The table's SQL schema before the alteration + :type before_schema: str + :ivar after_schema: The table's SQL schema after the alteration + :type after_schema: str + """ + + name = "alter-table" + database: str + table: str + before_schema: str + after_schema: str + + +@dataclass +class InsertRowsEvent(Event): + """ + Event name: ``insert-rows`` + + Rows were inserted into a table. + + :ivar database: The name of the database where the rows were inserted. + :type database: str + :ivar table: The name of the table where the rows were inserted. + :type table: str + :ivar num_rows: The number of rows that were requested to be inserted. + :type num_rows: int + :ivar ignore: Was ignore set? + :type ignore: bool + :ivar replace: Was replace set? + :type replace: bool + """ + + name = "insert-rows" + database: str + table: str + num_rows: int + ignore: bool + replace: bool + + +@dataclass +class UpsertRowsEvent(Event): + """ + Event name: ``upsert-rows`` + + Rows were upserted into a table. + + :ivar database: The name of the database where the rows were inserted. + :type database: str + :ivar table: The name of the table where the rows were inserted. + :type table: str + :ivar num_rows: The number of rows that were requested to be inserted. + :type num_rows: int + """ + + name = "upsert-rows" + database: str + table: str + num_rows: int + + +@dataclass +class UpdateRowEvent(Event): + """ + Event name: ``update-row`` + + A row was updated in a table. + + :ivar database: The name of the database where the row was updated. + :type database: str + :ivar table: The name of the table where the row was updated. + :type table: str + :ivar pks: The primary key values of the updated row. + """ + + name = "update-row" + database: str + table: str + pks: list + + +@dataclass +class DeleteRowEvent(Event): + """ + Event name: ``delete-row`` + + A row was deleted from a table. + + :ivar database: The name of the database where the row was deleted. + :type database: str + :ivar table: The name of the table where the row was deleted. + :type table: str + :ivar pks: The primary key values of the deleted row. + """ + + name = "delete-row" + database: str + table: str + pks: list + + +@hookimpl +def register_events(): + return [ + LoginEvent, + LogoutEvent, + CreateTableEvent, + CreateTokenEvent, + AlterTableEvent, + DropTableEvent, + InsertRowsEvent, + UpsertRowsEvent, + UpdateRowEvent, + DeleteRowEvent, + ] diff --git a/datasette/facets.py b/datasette/facets.py new file mode 100644 index 00000000..dd149424 --- /dev/null +++ b/datasette/facets.py @@ -0,0 +1,582 @@ +import json +import urllib +from datasette import hookimpl +from datasette.database import QueryInterrupted +from datasette.utils import ( + escape_sqlite, + path_with_added_args, + path_with_removed_args, + detect_json1, + sqlite3, +) + + +def load_facet_configs(request, table_config): + # Given a request and the configuration for a table, return + # a dictionary of selected facets, their lists of configs and for each + # config whether it came from the request or the metadata. + # + # return {type: [ + # {"source": "metadata", "config": config1}, + # {"source": "request", "config": config2}]} + facet_configs = {} + table_config = table_config or {} + table_facet_configs = table_config.get("facets", []) + for facet_config in table_facet_configs: + if isinstance(facet_config, str): + type = "column" + facet_config = {"simple": facet_config} + else: + assert ( + len(facet_config.values()) == 1 + ), "Metadata config dicts should be {type: config}" + type, facet_config = list(facet_config.items())[0] + if isinstance(facet_config, str): + facet_config = {"simple": facet_config} + facet_configs.setdefault(type, []).append( + {"source": "metadata", "config": facet_config} + ) + qs_pairs = urllib.parse.parse_qs(request.query_string, keep_blank_values=True) + for key, values in qs_pairs.items(): + if key.startswith("_facet"): + # Figure out the facet type + if key == "_facet": + type = "column" + elif key.startswith("_facet_"): + type = key[len("_facet_") :] + for value in values: + # The value is the facet_config - either JSON or not + facet_config = ( + json.loads(value) if value.startswith("{") else {"simple": value} + ) + facet_configs.setdefault(type, []).append( + {"source": "request", "config": facet_config} + ) + return facet_configs + + +@hookimpl +def register_facet_classes(): + classes = [ColumnFacet, DateFacet] + if detect_json1(): + classes.append(ArrayFacet) + return classes + + +class Facet: + type = None + # How many rows to consider when suggesting facets: + suggest_consider = 1000 + + def __init__( + self, + ds, + request, + database, + sql=None, + table=None, + params=None, + table_config=None, + row_count=None, + ): + assert table or sql, "Must provide either table= or sql=" + self.ds = ds + self.request = request + self.database = database + # For foreign key expansion. Can be None for e.g. canned SQL queries: + self.table = table + self.sql = sql or f"select * from [{table}]" + self.params = params or [] + self.table_config = table_config + # row_count can be None, in which case we calculate it ourselves: + self.row_count = row_count + + def get_configs(self): + configs = load_facet_configs(self.request, self.table_config) + return configs.get(self.type) or [] + + def get_querystring_pairs(self): + # ?_foo=bar&_foo=2&empty= becomes: + # [('_foo', 'bar'), ('_foo', '2'), ('empty', '')] + return urllib.parse.parse_qsl(self.request.query_string, keep_blank_values=True) + + def get_facet_size(self): + facet_size = self.ds.setting("default_facet_size") + max_returned_rows = self.ds.setting("max_returned_rows") + table_facet_size = None + if self.table: + config_facet_size = ( + self.ds.config.get("databases", {}) + .get(self.database, {}) + .get("tables", {}) + .get(self.table, {}) + .get("facet_size") + ) + if config_facet_size: + table_facet_size = config_facet_size + custom_facet_size = self.request.args.get("_facet_size") + if custom_facet_size: + if custom_facet_size == "max": + facet_size = max_returned_rows + elif custom_facet_size.isdigit(): + facet_size = int(custom_facet_size) + else: + # Invalid value, ignore it + custom_facet_size = None + if table_facet_size and not custom_facet_size: + if table_facet_size == "max": + facet_size = max_returned_rows + else: + facet_size = table_facet_size + return min(facet_size, max_returned_rows) + + async def suggest(self): + return [] + + async def facet_results(self): + # returns ([results], [timed_out]) + # TODO: Include "hideable" with each one somehow, which indicates if it was + # defined in metadata (in which case you cannot turn it off) + raise NotImplementedError + + async def get_columns(self, sql, params=None): + # Detect column names using the "limit 0" trick + return ( + await self.ds.execute( + self.database, f"select * from ({sql}) limit 0", params or [] + ) + ).columns + + +class ColumnFacet(Facet): + type = "column" + + async def suggest(self): + row_count = await self.get_row_count() + columns = await self.get_columns(self.sql, self.params) + facet_size = self.get_facet_size() + suggested_facets = [] + already_enabled = [c["config"]["simple"] for c in self.get_configs()] + for column in columns: + if column in already_enabled: + continue + suggested_facet_sql = """ + with limited as (select * from ({sql}) limit {suggest_consider}) + select {column} as value, count(*) as n from limited + where value is not null + group by value + limit {limit} + """.format( + column=escape_sqlite(column), + sql=self.sql, + limit=facet_size + 1, + suggest_consider=self.suggest_consider, + ) + distinct_values = None + try: + distinct_values = await self.ds.execute( + self.database, + suggested_facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.setting("facet_suggest_time_limit_ms"), + ) + num_distinct_values = len(distinct_values) + if ( + 1 < num_distinct_values < row_count + and num_distinct_values <= facet_size + # And at least one has n > 1 + and any(r["n"] > 1 for r in distinct_values) + ): + suggested_facets.append( + { + "name": column, + "toggle_url": self.ds.absolute_url( + self.request, + self.ds.urls.path( + path_with_added_args( + self.request, {"_facet": column} + ) + ), + ), + } + ) + except QueryInterrupted: + continue + return suggested_facets + + async def get_row_count(self): + if self.row_count is None: + self.row_count = ( + await self.ds.execute( + self.database, + f"select count(*) from (select * from ({self.sql}) limit {self.suggest_consider})", + self.params, + ) + ).rows[0][0] + return self.row_count + + async def facet_results(self): + facet_results = [] + facets_timed_out = [] + + qs_pairs = self.get_querystring_pairs() + + facet_size = self.get_facet_size() + for source_and_config in self.get_configs(): + config = source_and_config["config"] + source = source_and_config["source"] + column = config.get("column") or config["simple"] + facet_sql = """ + select {col} as value, count(*) as count from ( + {sql} + ) + where {col} is not null + group by {col} order by count desc, value limit {limit} + """.format( + col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + ) + try: + facet_rows_results = await self.ds.execute( + self.database, + facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.setting("facet_time_limit_ms"), + ) + facet_results_values = [] + facet_results.append( + { + "name": column, + "type": self.type, + "hideable": source != "metadata", + "toggle_url": self.ds.urls.path( + path_with_removed_args(self.request, {"_facet": column}) + ), + "results": facet_results_values, + "truncated": len(facet_rows_results) > facet_size, + } + ) + facet_rows = facet_rows_results.rows[:facet_size] + if self.table: + # Attempt to expand foreign keys into labels + values = [row["value"] for row in facet_rows] + expanded = await self.ds.expand_foreign_keys( + self.request.actor, self.database, self.table, column, values + ) + else: + expanded = {} + for row in facet_rows: + column_qs = column + if column.startswith("_"): + column_qs = "{}__exact".format(column) + selected = (column_qs, str(row["value"])) in qs_pairs + if selected: + toggle_path = path_with_removed_args( + self.request, {column_qs: str(row["value"])} + ) + else: + toggle_path = path_with_added_args( + self.request, {column_qs: row["value"]} + ) + facet_results_values.append( + { + "value": row["value"], + "label": expanded.get((column, row["value"]), row["value"]), + "count": row["count"], + "toggle_url": self.ds.absolute_url( + self.request, self.ds.urls.path(toggle_path) + ), + "selected": selected, + } + ) + except QueryInterrupted: + facets_timed_out.append(column) + + return facet_results, facets_timed_out + + +class ArrayFacet(Facet): + type = "array" + + def _is_json_array_of_strings(self, json_string): + try: + array = json.loads(json_string) + except ValueError: + return False + for item in array: + if not isinstance(item, str): + return False + return True + + async def suggest(self): + columns = await self.get_columns(self.sql, self.params) + suggested_facets = [] + already_enabled = [c["config"]["simple"] for c in self.get_configs()] + for column in columns: + if column in already_enabled: + continue + # Is every value in this column either null or a JSON array? + suggested_facet_sql = """ + with limited as (select * from ({sql}) limit {suggest_consider}) + select distinct json_type({column}) + from limited + where {column} is not null and {column} != '' + """.format( + column=escape_sqlite(column), + sql=self.sql, + suggest_consider=self.suggest_consider, + ) + try: + results = await self.ds.execute( + self.database, + suggested_facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.setting("facet_suggest_time_limit_ms"), + log_sql_errors=False, + ) + types = tuple(r[0] for r in results.rows) + if types in (("array",), ("array", None)): + # Now check that first 100 arrays contain only strings + first_100 = [ + v[0] + for v in await self.ds.execute( + self.database, + ( + "select {column} from ({sql}) " + "where {column} is not null " + "and {column} != '' " + "and json_array_length({column}) > 0 " + "limit 100" + ).format(column=escape_sqlite(column), sql=self.sql), + self.params, + truncate=False, + custom_time_limit=self.ds.setting( + "facet_suggest_time_limit_ms" + ), + log_sql_errors=False, + ) + ] + if first_100 and all( + self._is_json_array_of_strings(r) for r in first_100 + ): + suggested_facets.append( + { + "name": column, + "type": "array", + "toggle_url": self.ds.absolute_url( + self.request, + self.ds.urls.path( + path_with_added_args( + self.request, {"_facet_array": column} + ) + ), + ), + } + ) + except (QueryInterrupted, sqlite3.OperationalError): + continue + return suggested_facets + + async def facet_results(self): + # self.configs should be a plain list of columns + facet_results = [] + facets_timed_out = [] + + facet_size = self.get_facet_size() + for source_and_config in self.get_configs(): + config = source_and_config["config"] + source = source_and_config["source"] + column = config.get("column") or config["simple"] + # https://github.com/simonw/datasette/issues/448 + facet_sql = """ + with inner as ({sql}), + deduped_array_items as ( + select + distinct j.value, + inner.* + from + json_each([inner].{col}) j + join inner + ) + select + value as value, + count(*) as count + from + deduped_array_items + group by + value + order by + count(*) desc, value limit {limit} + """.format( + col=escape_sqlite(column), + sql=self.sql, + limit=facet_size + 1, + ) + try: + facet_rows_results = await self.ds.execute( + self.database, + facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.setting("facet_time_limit_ms"), + ) + facet_results_values = [] + facet_results.append( + { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": self.ds.urls.path( + path_with_removed_args( + self.request, {"_facet_array": column} + ) + ), + "truncated": len(facet_rows_results) > facet_size, + } + ) + facet_rows = facet_rows_results.rows[:facet_size] + pairs = self.get_querystring_pairs() + for row in facet_rows: + value = str(row["value"]) + selected = (f"{column}__arraycontains", value) in pairs + if selected: + toggle_path = path_with_removed_args( + self.request, {f"{column}__arraycontains": value} + ) + else: + toggle_path = path_with_added_args( + self.request, {f"{column}__arraycontains": value} + ) + facet_results_values.append( + { + "value": value, + "label": value, + "count": row["count"], + "toggle_url": self.ds.absolute_url( + self.request, toggle_path + ), + "selected": selected, + } + ) + except QueryInterrupted: + facets_timed_out.append(column) + + return facet_results, facets_timed_out + + +class DateFacet(Facet): + type = "date" + + async def suggest(self): + columns = await self.get_columns(self.sql, self.params) + already_enabled = [c["config"]["simple"] for c in self.get_configs()] + suggested_facets = [] + for column in columns: + if column in already_enabled: + continue + # Does this column contain any dates in the first 100 rows? + suggested_facet_sql = """ + select date({column}) from ( + select * from ({sql}) limit 100 + ) where {column} glob "????-??-*" + """.format( + column=escape_sqlite(column), sql=self.sql + ) + try: + results = await self.ds.execute( + self.database, + suggested_facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.setting("facet_suggest_time_limit_ms"), + log_sql_errors=False, + ) + values = tuple(r[0] for r in results.rows) + if any(values): + suggested_facets.append( + { + "name": column, + "type": "date", + "toggle_url": self.ds.absolute_url( + self.request, + self.ds.urls.path( + path_with_added_args( + self.request, {"_facet_date": column} + ) + ), + ), + } + ) + except (QueryInterrupted, sqlite3.OperationalError): + continue + return suggested_facets + + async def facet_results(self): + facet_results = [] + facets_timed_out = [] + args = dict(self.get_querystring_pairs()) + facet_size = self.get_facet_size() + for source_and_config in self.get_configs(): + config = source_and_config["config"] + source = source_and_config["source"] + column = config.get("column") or config["simple"] + # TODO: does this query break if inner sql produces value or count columns? + facet_sql = """ + select date({col}) as value, count(*) as count from ( + {sql} + ) + where date({col}) is not null + group by date({col}) order by count desc, value limit {limit} + """.format( + col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + ) + try: + facet_rows_results = await self.ds.execute( + self.database, + facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.setting("facet_time_limit_ms"), + ) + facet_results_values = [] + facet_results.append( + { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": path_with_removed_args( + self.request, {"_facet_date": column} + ), + "truncated": len(facet_rows_results) > facet_size, + } + ) + facet_rows = facet_rows_results.rows[:facet_size] + for row in facet_rows: + selected = str(args.get(f"{column}__date")) == str(row["value"]) + if selected: + toggle_path = path_with_removed_args( + self.request, {f"{column}__date": str(row["value"])} + ) + else: + toggle_path = path_with_added_args( + self.request, {f"{column}__date": row["value"]} + ) + facet_results_values.append( + { + "value": row["value"], + "label": row["value"], + "count": row["count"], + "toggle_url": self.ds.absolute_url( + self.request, toggle_path + ), + "selected": selected, + } + ) + except QueryInterrupted: + facets_timed_out.append(column) + + return facet_results, facets_timed_out diff --git a/datasette/filters.py b/datasette/filters.py new file mode 100644 index 00000000..95cc5f37 --- /dev/null +++ b/datasette/filters.py @@ -0,0 +1,427 @@ +from datasette import hookimpl +from datasette.resources import DatabaseResource +from datasette.views.base import DatasetteError +from datasette.utils.asgi import BadRequest +import json +from .utils import detect_json1, escape_sqlite, path_with_removed_args + + +@hookimpl(specname="filters_from_request") +def where_filters(request, database, datasette): + # This one deals with ?_where= + async def inner(): + where_clauses = [] + extra_wheres_for_ui = [] + if "_where" in request.args: + if not await datasette.allowed( + action="execute-sql", + resource=DatabaseResource(database=database), + actor=request.actor, + ): + raise DatasetteError("_where= is not allowed", status=403) + else: + where_clauses.extend(request.args.getlist("_where")) + extra_wheres_for_ui = [ + { + "text": text, + "remove_url": path_with_removed_args(request, {"_where": text}), + } + for text in request.args.getlist("_where") + ] + + return FilterArguments( + where_clauses, + extra_context={ + "extra_wheres_for_ui": extra_wheres_for_ui, + }, + ) + + return inner + + +@hookimpl(specname="filters_from_request") +def search_filters(request, database, table, datasette): + # ?_search= and _search_colname= + async def inner(): + where_clauses = [] + params = {} + human_descriptions = [] + extra_context = {} + + # Figure out which fts_table to use + table_metadata = await datasette.table_config(database, table) + db = datasette.get_database(database) + fts_table = request.args.get("_fts_table") + fts_table = fts_table or table_metadata.get("fts_table") + fts_table = fts_table or await db.fts_table(table) + fts_pk = request.args.get("_fts_pk", table_metadata.get("fts_pk", "rowid")) + search_args = { + key: request.args[key] + for key in request.args + if key.startswith("_search") and key != "_searchmode" + } + search = "" + search_mode_raw = table_metadata.get("searchmode") == "raw" + # Or set search mode from the querystring + qs_searchmode = request.args.get("_searchmode") + if qs_searchmode == "escaped": + search_mode_raw = False + if qs_searchmode == "raw": + search_mode_raw = True + + extra_context["supports_search"] = bool(fts_table) + + if fts_table and search_args: + if "_search" in search_args: + # Simple ?_search=xxx + search = search_args["_search"] + where_clauses.append( + "{fts_pk} in (select rowid from {fts_table} where {fts_table} match {match_clause})".format( + fts_table=escape_sqlite(fts_table), + fts_pk=escape_sqlite(fts_pk), + match_clause=( + ":search" if search_mode_raw else "escape_fts(:search)" + ), + ) + ) + human_descriptions.append(f'search matches "{search}"') + params["search"] = search + extra_context["search"] = search + else: + # More complex: search against specific columns + for i, (key, search_text) in enumerate(search_args.items()): + search_col = key.split("_search_", 1)[1] + if search_col not in await db.table_columns(fts_table): + raise BadRequest("Cannot search by that column") + + where_clauses.append( + "rowid in (select rowid from {fts_table} where {search_col} match {match_clause})".format( + fts_table=escape_sqlite(fts_table), + search_col=escape_sqlite(search_col), + match_clause=( + ":search_{}".format(i) + if search_mode_raw + else "escape_fts(:search_{})".format(i) + ), + ) + ) + human_descriptions.append( + f'search column "{search_col}" matches "{search_text}"' + ) + params[f"search_{i}"] = search_text + extra_context["search"] = search_text + + return FilterArguments(where_clauses, params, human_descriptions, extra_context) + + return inner + + +@hookimpl(specname="filters_from_request") +def through_filters(request, database, table, datasette): + # ?_search= and _search_colname= + async def inner(): + where_clauses = [] + params = {} + human_descriptions = [] + extra_context = {} + + # Support for ?_through={table, column, value} + if "_through" in request.args: + for through in request.args.getlist("_through"): + through_data = json.loads(through) + through_table = through_data["table"] + other_column = through_data["column"] + value = through_data["value"] + db = datasette.get_database(database) + outgoing_foreign_keys = await db.foreign_keys_for_table(through_table) + try: + fk_to_us = [ + fk for fk in outgoing_foreign_keys if fk["other_table"] == table + ][0] + except IndexError: + raise DatasetteError( + "Invalid _through - could not find corresponding foreign key" + ) + param = f"p{len(params)}" + where_clauses.append( + "{our_pk} in (select {our_column} from {through_table} where {other_column} = :{param})".format( + through_table=escape_sqlite(through_table), + our_pk=escape_sqlite(fk_to_us["other_column"]), + our_column=escape_sqlite(fk_to_us["column"]), + other_column=escape_sqlite(other_column), + param=param, + ) + ) + params[param] = value + human_descriptions.append(f'{through_table}.{other_column} = "{value}"') + + return FilterArguments(where_clauses, params, human_descriptions, extra_context) + + return inner + + +class FilterArguments: + def __init__( + self, where_clauses, params=None, human_descriptions=None, extra_context=None + ): + self.where_clauses = where_clauses + self.params = params or {} + self.human_descriptions = human_descriptions or [] + self.extra_context = extra_context or {} + + +class Filter: + key = None + display = None + no_argument = False + + def where_clause(self, table, column, value, param_counter): + raise NotImplementedError + + def human_clause(self, column, value): + raise NotImplementedError + + +class TemplatedFilter(Filter): + def __init__( + self, + key, + display, + sql_template, + human_template, + format="{}", + numeric=False, + no_argument=False, + ): + self.key = key + self.display = display + self.sql_template = sql_template + self.human_template = human_template + self.format = format + self.numeric = numeric + self.no_argument = no_argument + + def where_clause(self, table, column, value, param_counter): + converted = self.format.format(value) + if self.numeric and converted.isdigit(): + converted = int(converted) + if self.no_argument: + kwargs = {"c": column} + converted = None + else: + kwargs = {"c": column, "p": f"p{param_counter}", "t": table} + return self.sql_template.format(**kwargs), converted + + def human_clause(self, column, value): + if callable(self.human_template): + template = self.human_template(column, value) + else: + template = self.human_template + if self.no_argument: + return template.format(c=column) + else: + return template.format(c=column, v=value) + + +class InFilter(Filter): + key = "in" + display = "in" + + def split_value(self, value): + if value.startswith("["): + return json.loads(value) + else: + return [v.strip() for v in value.split(",")] + + def where_clause(self, table, column, value, param_counter): + values = self.split_value(value) + params = [f":p{param_counter + i}" for i in range(len(values))] + sql = f"{escape_sqlite(column)} in ({', '.join(params)})" + return sql, values + + def human_clause(self, column, value): + return f"{column} in {json.dumps(self.split_value(value))}" + + +class NotInFilter(InFilter): + key = "notin" + display = "not in" + + def where_clause(self, table, column, value, param_counter): + values = self.split_value(value) + params = [f":p{param_counter + i}" for i in range(len(values))] + sql = f"{escape_sqlite(column)} not in ({', '.join(params)})" + return sql, values + + def human_clause(self, column, value): + return f"{column} not in {json.dumps(self.split_value(value))}" + + +class Filters: + _filters = ( + [ + # key, display, sql_template, human_template, format=, numeric=, no_argument= + TemplatedFilter( + "exact", + "=", + '"{c}" = :{p}', + lambda c, v: "{c} = {v}" if v.isdigit() else '{c} = "{v}"', + ), + TemplatedFilter( + "not", + "!=", + '"{c}" != :{p}', + lambda c, v: "{c} != {v}" if v.isdigit() else '{c} != "{v}"', + ), + TemplatedFilter( + "contains", + "contains", + '"{c}" like :{p}', + '{c} contains "{v}"', + format="%{}%", + ), + TemplatedFilter( + "notcontains", + "does not contain", + '"{c}" not like :{p}', + '{c} does not contain "{v}"', + format="%{}%", + ), + TemplatedFilter( + "endswith", + "ends with", + '"{c}" like :{p}', + '{c} ends with "{v}"', + format="%{}", + ), + TemplatedFilter( + "startswith", + "starts with", + '"{c}" like :{p}', + '{c} starts with "{v}"', + format="{}%", + ), + TemplatedFilter("gt", ">", '"{c}" > :{p}', "{c} > {v}", numeric=True), + TemplatedFilter( + "gte", "\u2265", '"{c}" >= :{p}', "{c} \u2265 {v}", numeric=True + ), + TemplatedFilter("lt", "<", '"{c}" < :{p}', "{c} < {v}", numeric=True), + TemplatedFilter( + "lte", "\u2264", '"{c}" <= :{p}', "{c} \u2264 {v}", numeric=True + ), + TemplatedFilter("like", "like", '"{c}" like :{p}', '{c} like "{v}"'), + TemplatedFilter( + "notlike", "not like", '"{c}" not like :{p}', '{c} not like "{v}"' + ), + TemplatedFilter("glob", "glob", '"{c}" glob :{p}', '{c} glob "{v}"'), + InFilter(), + NotInFilter(), + ] + + ( + [ + TemplatedFilter( + "arraycontains", + "array contains", + """:{p} in (select value from json_each([{t}].[{c}]))""", + '{c} contains "{v}"', + ), + TemplatedFilter( + "arraynotcontains", + "array does not contain", + """:{p} not in (select value from json_each([{t}].[{c}]))""", + '{c} does not contain "{v}"', + ), + ] + if detect_json1() + else [] + ) + + [ + TemplatedFilter( + "date", "date", 'date("{c}") = :{p}', '"{c}" is on date {v}' + ), + TemplatedFilter( + "isnull", "is null", '"{c}" is null', "{c} is null", no_argument=True + ), + TemplatedFilter( + "notnull", + "is not null", + '"{c}" is not null', + "{c} is not null", + no_argument=True, + ), + TemplatedFilter( + "isblank", + "is blank", + '("{c}" is null or "{c}" = "")', + "{c} is blank", + no_argument=True, + ), + TemplatedFilter( + "notblank", + "is not blank", + '("{c}" is not null and "{c}" != "")', + "{c} is not blank", + no_argument=True, + ), + ] + ) + _filters_by_key = {f.key: f for f in _filters} + + def __init__(self, pairs): + self.pairs = pairs + + def lookups(self): + """Yields (lookup, display, no_argument) pairs""" + for filter in self._filters: + yield filter.key, filter.display, filter.no_argument + + def human_description_en(self, extra=None): + bits = [] + if extra: + bits.extend(extra) + for column, lookup, value in self.selections(): + filter = self._filters_by_key.get(lookup, None) + if filter: + bits.append(filter.human_clause(column, value)) + # Comma separated, with an ' and ' at the end + and_bits = [] + commas, tail = bits[:-1], bits[-1:] + if commas: + and_bits.append(", ".join(commas)) + if tail: + and_bits.append(tail[0]) + s = " and ".join(and_bits) + if not s: + return "" + return f"where {s}" + + def selections(self): + """Yields (column, lookup, value) tuples""" + for key, value in self.pairs: + if "__" in key: + column, lookup = key.rsplit("__", 1) + else: + column = key + lookup = "exact" + yield column, lookup, value + + def has_selections(self): + return bool(self.pairs) + + def build_where_clauses(self, table): + sql_bits = [] + params = {} + i = 0 + for column, lookup, value in self.selections(): + filter = self._filters_by_key.get(lookup, None) + if filter: + sql_bit, param = filter.where_clause(table, column, value, i) + sql_bits.append(sql_bit) + if param is not None: + if not isinstance(param, list): + param = [param] + for individual_param in param: + param_id = f"p{i}" + params[param_id] = individual_param + i += 1 + return sql_bits, params diff --git a/datasette/forbidden.py b/datasette/forbidden.py new file mode 100644 index 00000000..41c48396 --- /dev/null +++ b/datasette/forbidden.py @@ -0,0 +1,19 @@ +from datasette import hookimpl, Response + + +@hookimpl(trylast=True) +def forbidden(datasette, request, message): + async def inner(): + return Response.html( + await datasette.render_template( + "error.html", + { + "title": "Forbidden", + "error": message, + }, + request=request, + ), + status=403, + ) + + return inner diff --git a/datasette/handle_exception.py b/datasette/handle_exception.py new file mode 100644 index 00000000..96398a4c --- /dev/null +++ b/datasette/handle_exception.py @@ -0,0 +1,77 @@ +from datasette import hookimpl, Response +from .utils import add_cors_headers +from .utils.asgi import ( + Base400, +) +from .views.base import DatasetteError +from markupsafe import Markup +import traceback + +try: + import ipdb as pdb +except ImportError: + import pdb + +try: + import rich +except ImportError: + rich = None + + +@hookimpl(trylast=True) +def handle_exception(datasette, request, exception): + async def inner(): + if datasette.pdb: + pdb.post_mortem(exception.__traceback__) + + if rich is not None: + rich.get_console().print_exception(show_locals=True) + + title = None + if isinstance(exception, Base400): + status = exception.status + info = {} + message = exception.args[0] + elif isinstance(exception, DatasetteError): + status = exception.status + info = exception.error_dict + message = exception.message + if exception.message_is_html: + message = Markup(message) + title = exception.title + else: + status = 500 + info = {} + message = str(exception) + traceback.print_exc() + templates = [f"{status}.html", "error.html"] + info.update( + { + "ok": False, + "error": message, + "status": status, + "title": title, + } + ) + headers = {} + if datasette.cors: + add_cors_headers(headers) + if request.path.split("?")[0].endswith(".json"): + return Response.json(info, status=status, headers=headers) + else: + environment = datasette.get_jinja_environment(request) + template = environment.select_template(templates) + return Response.html( + await template.render_async( + dict( + info, + urls=datasette.urls, + app_css_hash=datasette.app_css_hash(), + menu_links=lambda: [], + ) + ), + status=status, + headers=headers, + ) + + return inner diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py new file mode 100644 index 00000000..3f6a1425 --- /dev/null +++ b/datasette/hookspecs.py @@ -0,0 +1,222 @@ +from pluggy import HookimplMarker +from pluggy import HookspecMarker + +hookspec = HookspecMarker("datasette") +hookimpl = HookimplMarker("datasette") + + +@hookspec +def startup(datasette): + """Fires directly after Datasette first starts running""" + + +@hookspec +def asgi_wrapper(datasette): + """Returns an ASGI middleware callable to wrap our ASGI application with""" + + +@hookspec +def prepare_connection(conn, database, datasette): + """Modify SQLite connection in some way e.g. register custom SQL functions""" + + +@hookspec +def prepare_jinja2_environment(env, datasette): + """Modify Jinja2 template environment e.g. register custom template tags""" + + +@hookspec +def extra_css_urls(template, database, table, columns, view_name, request, datasette): + """Extra CSS URLs added by this plugin""" + + +@hookspec +def extra_js_urls(template, database, table, columns, view_name, request, datasette): + """Extra JavaScript URLs added by this plugin""" + + +@hookspec +def extra_body_script( + template, database, table, columns, view_name, request, datasette +): + """Extra JavaScript code to be included in diff --git a/datasette/templates/_codemirror.html b/datasette/templates/_codemirror.html new file mode 100644 index 00000000..c4629aeb --- /dev/null +++ b/datasette/templates/_codemirror.html @@ -0,0 +1,16 @@ + + + diff --git a/datasette/templates/_codemirror_foot.html b/datasette/templates/_codemirror_foot.html new file mode 100644 index 00000000..a624c8a4 --- /dev/null +++ b/datasette/templates/_codemirror_foot.html @@ -0,0 +1,42 @@ + diff --git a/datasette/templates/_crumbs.html b/datasette/templates/_crumbs.html new file mode 100644 index 00000000..bd1ff0da --- /dev/null +++ b/datasette/templates/_crumbs.html @@ -0,0 +1,15 @@ +{% macro nav(request, database=None, table=None) -%} +{% if crumb_items is defined %} + {% set items=crumb_items(request=request, database=database, table=table) %} + {% if items %} +

+ {% for item in items %} + {{ item.label }} + {% if not loop.last %} + / + {% endif %} + {% endfor %} +

+ {% endif %} +{% endif %} +{%- endmacro %} diff --git a/datasette/templates/_debug_common_functions.html b/datasette/templates/_debug_common_functions.html new file mode 100644 index 00000000..d988a2f3 --- /dev/null +++ b/datasette/templates/_debug_common_functions.html @@ -0,0 +1,50 @@ + diff --git a/datasette/templates/_description_source_license.html b/datasette/templates/_description_source_license.html new file mode 100644 index 00000000..f852268f --- /dev/null +++ b/datasette/templates/_description_source_license.html @@ -0,0 +1,30 @@ +{% if metadata.get("description_html") or metadata.get("description") %} + +{% endif %} +{% if metadata.license or metadata.license_url or metadata.source or metadata.source_url %} +

+ {% if metadata.license or metadata.license_url %}Data license: + {% if metadata.license_url %} + {{ metadata.license or metadata.license_url }} + {% else %} + {{ metadata.license }} + {% endif %} + {% endif %} + {% if metadata.source or metadata.source_url %}{% if metadata.license or metadata.license_url %}·{% endif %} + Data source: {% if metadata.source_url %} + + {% endif %}{{ metadata.source or metadata.source_url }}{% if metadata.source_url %}{% endif %} + {% endif %} + {% if metadata.about or metadata.about_url %}{% if metadata.license or metadata.license_url or metadata.source or metadata.source_url %}·{% endif %} + About: {% if metadata.about_url %} + + {% endif %}{{ metadata.about or metadata.about_url }}{% if metadata.about_url %}{% endif %} + {% endif %} +

+{% endif %} diff --git a/datasette/templates/_facet_results.html b/datasette/templates/_facet_results.html new file mode 100644 index 00000000..034e9678 --- /dev/null +++ b/datasette/templates/_facet_results.html @@ -0,0 +1,28 @@ +
+ {% for facet_info in sorted_facet_results %} +
+

+ {{ facet_info.name }}{% if facet_info.type != "column" %} ({{ facet_info.type }}){% endif %} + {% if facet_info.truncated %}>{% endif %}{{ facet_info.results|length }} + + {% if facet_info.hideable %} + + {% endif %} +

+
    + {% for facet_value in facet_info.results %} + {% if not facet_value.selected %} +
  • {{ (facet_value.label | string()) or "-" }} {{ "{:,}".format(facet_value.count) }}
  • + {% else %} +
  • {{ facet_value.label or "-" }} · {{ "{:,}".format(facet_value.count) }}
  • + {% endif %} + {% endfor %} + {% if facet_info.truncated %} +
  • {% if request.args._facet_size != "max" -%} + {% else -%}…{% endif %} +
  • + {% endif %} +
+
+ {% endfor %} +
diff --git a/datasette/templates/_footer.html b/datasette/templates/_footer.html new file mode 100644 index 00000000..074270f1 --- /dev/null +++ b/datasette/templates/_footer.html @@ -0,0 +1,21 @@ +Powered by Datasette +{% if query_ms %}· Queries took {{ query_ms|round(3) }}ms{% endif %} +{% if metadata %} + {% if metadata.license or metadata.license_url %}· Data license: + {% if metadata.license_url %} + {{ metadata.license or metadata.license_url }} + {% else %} + {{ metadata.license }} + {% endif %} + {% endif %} + {% if metadata.source or metadata.source_url %}· + Data source: {% if metadata.source_url %} + + {% endif %}{{ metadata.source or metadata.source_url }}{% if metadata.source_url %}{% endif %} + {% endif %} + {% if metadata.about or metadata.about_url %}· + About: {% if metadata.about_url %} + + {% endif %}{{ metadata.about or metadata.about_url }}{% if metadata.about_url %}{% endif %} + {% endif %} +{% endif %} diff --git a/datasette/templates/_permission_ui_styles.html b/datasette/templates/_permission_ui_styles.html new file mode 100644 index 00000000..53a824f1 --- /dev/null +++ b/datasette/templates/_permission_ui_styles.html @@ -0,0 +1,145 @@ + diff --git a/datasette/templates/_permissions_debug_tabs.html b/datasette/templates/_permissions_debug_tabs.html new file mode 100644 index 00000000..d7203c1e --- /dev/null +++ b/datasette/templates/_permissions_debug_tabs.html @@ -0,0 +1,54 @@ +{% if has_debug_permission %} +{% set query_string = '?' + request.query_string if request.query_string else '' %} + + + + +{% endif %} diff --git a/datasette/templates/_suggested_facets.html b/datasette/templates/_suggested_facets.html new file mode 100644 index 00000000..b80208c3 --- /dev/null +++ b/datasette/templates/_suggested_facets.html @@ -0,0 +1,3 @@ +

+ Suggested facets: {% for facet in suggested_facets %}{{ facet.name }}{% if facet.get("type") %} ({{ facet.type }}){% endif %}{% if not loop.last %}, {% endif %}{% endfor %} +

diff --git a/datasette/templates/_table.html b/datasette/templates/_table.html new file mode 100644 index 00000000..a1329ba7 --- /dev/null +++ b/datasette/templates/_table.html @@ -0,0 +1,36 @@ + +
+{% if display_rows %} +
+
+ + + {% for column in display_columns %} + + {% endfor %} + + + + {% for row in display_rows %} + + {% for cell in row %} + + {% endfor %} + + {% endfor %} + +
+ {% if not column.sortable %} + {{ column.name }} + {% else %} + {% if column.name == sort %} + {{ column.name }} ▼ + {% else %} + {{ column.name }}{% if column.name == sort_desc %} ▲{% endif %} + {% endif %} + {% endif %} +
{{ cell.value }}
+ +{% else %} +

0 records

+{% endif %} diff --git a/datasette/templates/allow_debug.html b/datasette/templates/allow_debug.html new file mode 100644 index 00000000..1ecc92df --- /dev/null +++ b/datasette/templates/allow_debug.html @@ -0,0 +1,61 @@ +{% extends "base.html" %} + +{% block title %}Debug allow rules{% endblock %} + +{% block extra_head %} + +{% endblock %} + +{% block content %} + +

Debug allow rules

+ +{% set current_tab = "allow_debug" %} +{% include "_permissions_debug_tabs.html" %} + +

Use this tool to try out different actor and allow combinations. See Defining permissions with "allow" blocks for documentation.

+ +
+
+

+ +
+
+

+ +
+
+ +
+
+ +{% if error %}

{{ error }}

{% endif %} + +{% if result == "True" %}

Result: allow

{% endif %} + +{% if result == "False" %}

Result: deny

{% endif %} + +{% endblock %} diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html new file mode 100644 index 00000000..dc393c20 --- /dev/null +++ b/datasette/templates/api_explorer.html @@ -0,0 +1,208 @@ +{% extends "base.html" %} + +{% block title %}API Explorer{% endblock %} + +{% block extra_head %} + +{% endblock %} + +{% block content %} + +

API Explorer{% if private %} 🔒{% endif %}

+ +

Use this tool to try out the + {% if datasette_version %} + Datasette API. + {% else %} + Datasette API. + {% endif %} +

+
+ GET +
+
+ + + +
+
+
+
+ POST +
+
+ + +
+
+ + +
+

+
+
+ + + + + +{% if example_links %} +

API endpoints

+
    + {% for database in example_links %} +
  • Database: {{ database.name }}
  • +
      + {% for link in database.links %} +
    • {{ link.path }} - {{ link.label }}
    • + {% endfor %} + {% for table in database.tables %} +
    • {{ table.name }} +
        + {% for link in table.links %} +
      • {{ link.path }} - {{ link.label }}
      • + {% endfor %} +
      +
    • + {% endfor %} +
    + {% endfor %} +
+{% endif %} + +{% endblock %} diff --git a/datasette/templates/base.html b/datasette/templates/base.html index d779f12c..0d89e11c 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -1,34 +1,78 @@ - - +{% import "_crumbs.html" as crumbs with context %} + {% block title %}{% endblock %} - + -{% block extra_head %}{% endblock %} +{% for url in extra_css_urls %} + +{% endfor %} + + +{% for url in extra_js_urls %} + +{% endfor %} +{%- if alternate_url_json -%} + +{%- endif -%} +{%- block extra_head %}{% endblock -%} - + +