From 354d7a28732b701d5ebee334fc32a6e6e74ce0b2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 9 Nov 2025 15:42:11 -0800 Subject: [PATCH 01/53] Bump a few versions, deploy on push to main Refs: - #2511 --- .github/workflows/deploy-latest.yml | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 8ffdbfd5..9f53b01e 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -2,10 +2,10 @@ name: Deploy latest.datasette.io on: workflow_dispatch: - # push: - # branches: - # - main - # - 1.0-dev + push: + branches: + - main + # - 1.0-dev permissions: contents: read @@ -15,19 +15,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out datasette - uses: actions/checkout@v3 + uses: actions/checkout@v5 - name: Set up Python uses: actions/setup-python@v6 - # Using Python 3.10 for gcloud compatibility: with: - python-version: "3.10" - - uses: actions/cache@v4 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }} - restore-keys: | - ${{ runner.os }}-pip- + python-version: "3.13" + cache: pip - name: Install Python dependencies run: | python -m pip install --upgrade pip @@ -104,7 +97,7 @@ jobs: # cat metadata.json - id: auth name: Authenticate to Google Cloud - uses: google-github-actions/auth@v2 + uses: google-github-actions/auth@v3 with: credentials_json: ${{ secrets.GCP_SA_KEY }} - name: Set up Cloud SDK From 291f71ec6b52bb7d346f8cad74ca60122db392e3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 11 Nov 2025 21:59:26 -0800 Subject: [PATCH 02/53] Remove out-dated plugin_hook_permission_allowed references --- docs/changelog.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 7696fd89..66d46bce 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -278,7 +278,7 @@ To avoid similar mistakes in the future the ``datasette.permission_allowed()`` m Permission checks now consider opinions from every plugin ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``datasette.permission_allowed()`` method previously consulted every plugin that implemented the :ref:`permission_allowed() ` plugin hook and obeyed the opinion of the last plugin to return a value. (:issue:`2275`) +The ``datasette.permission_allowed()`` method previously consulted every plugin that implemented the ``permission_allowed()`` plugin hook and obeyed the opinion of the last plugin to return a value. (:issue:`2275`) Datasette now consults every plugin and checks to see if any of them returned ``False`` (the veto rule), and if none of them did, it then checks to see if any of them returned ``True``. @@ -1397,7 +1397,7 @@ You can use the new ``"allow"`` block syntax in ``metadata.json`` (or ``metadata See :ref:`authentication_permissions_allow` for more details. -Plugins can implement their own custom permission checks using the new :ref:`plugin_hook_permission_allowed` hook. +Plugins can implement their own custom permission checks using the new ``plugin_hook_permission_allowed()`` plugin hook. A new debug page at ``/-/permissions`` shows recent permission checks, to help administrators and plugin authors understand exactly what checks are being performed. This tool defaults to only being available to the root user, but can be exposed to other users by plugins that respond to the ``permissions-debug`` permission. (:issue:`788`) From 32a425868cd6b58c66d9e255fd59017be0cd34c4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Nov 2025 06:07:16 -0800 Subject: [PATCH 03/53] Bump black from 25.9.0 to 25.11.0 in the python-packages group (#2590) Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). Updates `black` from 25.9.0 to 25.11.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/25.9.0...25.11.0) --- updated-dependencies: - dependency-name: black dependency-version: 25.11.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1395ce82..4f487458 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ test = [ "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", "beautifulsoup4>=4.8.1", - "black==25.9.0", + "black==25.11.0", "blacken-docs==1.20.0", "pytest-timeout>=1.4.2", "trustme>=0.7", From 23a640d38bebd55d9cc3b13a83ef6bc89d717fab Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 12 Nov 2025 16:14:21 -0800 Subject: [PATCH 04/53] datasette serve --default-deny option (#2593) Closes #2592 --- datasette/app.py | 2 + datasette/cli.py | 7 ++ datasette/default_permissions.py | 4 + docs/authentication.rst | 33 ++++++++ docs/cli-reference.rst | 1 + tests/test_cli.py | 1 + tests/test_default_deny.py | 129 +++++++++++++++++++++++++++++++ 7 files changed, 177 insertions(+) create mode 100644 tests/test_default_deny.py diff --git a/datasette/app.py b/datasette/app.py index 60a20032..5f2a484e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -304,6 +304,7 @@ class Datasette: crossdb=False, nolock=False, internal=None, + default_deny=False, ): self._startup_invoked = False assert config_dir is None or isinstance( @@ -512,6 +513,7 @@ class Datasette: self._permission_checks = collections.deque(maxlen=200) self._root_token = secrets.token_hex(32) self.root_enabled = False + self.default_deny = default_deny self.client = DatasetteClient(self) async def apply_metadata_json(self): diff --git a/datasette/cli.py b/datasette/cli.py index aaf1b244..21420491 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -438,6 +438,11 @@ def uninstall(packages, yes): help="Output URL that sets a cookie authenticating the root user", is_flag=True, ) +@click.option( + "--default-deny", + help="Deny all permissions by default", + is_flag=True, +) @click.option( "--get", help="Run an HTTP GET request against this path, print results and exit", @@ -514,6 +519,7 @@ def serve( settings, secret, root, + default_deny, get, headers, token, @@ -594,6 +600,7 @@ def serve( crossdb=crossdb, nolock=nolock, internal=internal, + default_deny=default_deny, ) # Separate directories from files diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 5642cdfe..12e6c1ef 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -352,6 +352,10 @@ async def default_action_permissions_sql(datasette, actor, action): With the INTERSECT-based restriction approach, these defaults are always generated and then filtered by restriction_sql if the actor has restrictions. """ + # Skip default allow rules if default_deny is enabled + if datasette.default_deny: + return None + default_allow_actions = { "view-instance", "view-database", diff --git a/docs/authentication.rst b/docs/authentication.rst index e69b0aa4..69a6f606 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -83,6 +83,39 @@ Datasette's built-in view actions (``view-database``, ``view-table`` etc) are al Other actions, including those introduced by plugins, will default to *deny*. +.. _authentication_default_deny: + +Denying all permissions by default +---------------------------------- + +By default, Datasette allows unauthenticated access to view databases, tables, and execute SQL queries. + +You may want to run Datasette in a mode where **all** access is denied by default, and you explicitly grant permissions only to authenticated users, either using the :ref:`--root mechanism ` or through :ref:`configuration file rules ` or plugins. + +Use the ``--default-deny`` command-line option to run Datasette in this mode:: + + datasette --default-deny data.db --root + +With ``--default-deny`` enabled: + +* Anonymous users are denied access to view the instance, databases, tables, and queries +* Authenticated users are also denied access unless they're explicitly granted permissions +* The root user (when using ``--root``) still has access to everything +* You can grant permissions using :ref:`configuration file rules ` or plugins + +For example, to allow only a specific user to access your instance:: + + datasette --default-deny data.db --config datasette.yaml + +Where ``datasette.yaml`` contains: + +.. code-block:: yaml + + allow: + id: alice + +This configuration will deny access to everyone except the user with ``id`` of ``alice``. + .. _authentication_permissions_explained: How permissions are resolved diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index f002d05a..7ca88c4e 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -119,6 +119,7 @@ Once started you can access it at ``http://localhost:8001`` signed cookies --root Output URL that sets a cookie authenticating the root user + --default-deny Deny all permissions by default --get TEXT Run an HTTP GET request against this path, print results and exit --headers Include HTTP headers in --get output diff --git a/tests/test_cli.py b/tests/test_cli.py index 3bb360fb..21b86569 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -142,6 +142,7 @@ def test_metadata_yaml(): settings=[], secret=None, root=False, + default_deny=False, token=None, actor=None, version_note=None, diff --git a/tests/test_default_deny.py b/tests/test_default_deny.py new file mode 100644 index 00000000..81e95b84 --- /dev/null +++ b/tests/test_default_deny.py @@ -0,0 +1,129 @@ +import pytest +from datasette.app import Datasette +from datasette.resources import DatabaseResource, TableResource + + +@pytest.mark.asyncio +async def test_default_deny_denies_default_permissions(): + """Test that default_deny=True denies default permissions""" + # Without default_deny, anonymous users can view instance/database/tables + ds_normal = Datasette() + await ds_normal.invoke_startup() + + # Add a test database + db = ds_normal.add_memory_database("test_db_normal") + await db.execute_write("create table test_table (id integer primary key)") + await ds_normal._refresh_schemas() # Trigger catalog refresh + + # Test default behavior - anonymous user should be able to view + response = await ds_normal.client.get("/") + assert response.status_code == 200 + + response = await ds_normal.client.get("/test_db_normal") + assert response.status_code == 200 + + response = await ds_normal.client.get("/test_db_normal/test_table") + assert response.status_code == 200 + + # With default_deny=True, anonymous users should be denied + ds_deny = Datasette(default_deny=True) + await ds_deny.invoke_startup() + + # Add the same test database + db = ds_deny.add_memory_database("test_db_deny") + await db.execute_write("create table test_table (id integer primary key)") + await ds_deny._refresh_schemas() # Trigger catalog refresh + + # Anonymous user should be denied + response = await ds_deny.client.get("/") + assert response.status_code == 403 + + response = await ds_deny.client.get("/test_db_deny") + assert response.status_code == 403 + + response = await ds_deny.client.get("/test_db_deny/test_table") + assert response.status_code == 403 + + +@pytest.mark.asyncio +async def test_default_deny_with_root_user(): + """Test that root user still has access when default_deny=True""" + ds = Datasette(default_deny=True) + ds.root_enabled = True + await ds.invoke_startup() + + root_actor = {"id": "root"} + + # Root user should have all permissions even with default_deny + assert await ds.allowed(action="view-instance", actor=root_actor) is True + assert ( + await ds.allowed( + action="view-database", + actor=root_actor, + resource=DatabaseResource("test_db"), + ) + is True + ) + assert ( + await ds.allowed( + action="view-table", + actor=root_actor, + resource=TableResource("test_db", "test_table"), + ) + is True + ) + assert ( + await ds.allowed( + action="execute-sql", actor=root_actor, resource=DatabaseResource("test_db") + ) + is True + ) + + +@pytest.mark.asyncio +async def test_default_deny_with_config_allow(): + """Test that config allow rules still work with default_deny=True""" + ds = Datasette(default_deny=True, config={"allow": {"id": "user1"}}) + await ds.invoke_startup() + + # Anonymous user should be denied + assert await ds.allowed(action="view-instance", actor=None) is False + + # Authenticated user with explicit permission should have access + assert await ds.allowed(action="view-instance", actor={"id": "user1"}) is True + + # Different user should be denied + assert await ds.allowed(action="view-instance", actor={"id": "user2"}) is False + + +@pytest.mark.asyncio +async def test_default_deny_basic_permissions(): + """Test that default_deny=True denies basic permissions""" + ds = Datasette(default_deny=True) + await ds.invoke_startup() + + # Anonymous user should be denied all default permissions + assert await ds.allowed(action="view-instance", actor=None) is False + assert ( + await ds.allowed( + action="view-database", actor=None, resource=DatabaseResource("test_db") + ) + is False + ) + assert ( + await ds.allowed( + action="view-table", + actor=None, + resource=TableResource("test_db", "test_table"), + ) + is False + ) + assert ( + await ds.allowed( + action="execute-sql", actor=None, resource=DatabaseResource("test_db") + ) + is False + ) + + # Authenticated user without explicit permission should also be denied + assert await ds.allowed(action="view-instance", actor={"id": "user"}) is False From 5125bef5735c0823b72b27088cb11a189502e323 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 09:56:06 -0800 Subject: [PATCH 05/53] datasette.in_client() method, closes #2594 --- datasette/app.py | 63 ++++++++++++----- docs/internals.rst | 22 ++++++ tests/test_internals_datasette_client.py | 86 ++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 18 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 5f2a484e..a5efdad5 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2,6 +2,7 @@ from __future__ import annotations from asgi_csrf import Errors import asyncio +import contextvars from typing import TYPE_CHECKING, Any, Dict, Iterable, List if TYPE_CHECKING: @@ -130,6 +131,22 @@ from .resources import DatabaseResource, TableResource app_root = Path(__file__).parent.parent +# Context variable to track when code is executing within a datasette.client request +_in_datasette_client = contextvars.ContextVar("in_datasette_client", default=False) + + +class _DatasetteClientContext: + """Context manager to mark code as executing within a datasette.client request.""" + + def __enter__(self): + self.token = _in_datasette_client.set(True) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + _in_datasette_client.reset(self.token) + return False + + @dataclasses.dataclass class PermissionCheck: """Represents a logged permission check for debugging purposes.""" @@ -666,6 +683,14 @@ class Datasette: def unsign(self, signed, namespace="default"): return URLSafeSerializer(self._secret, namespace).loads(signed) + def in_client(self) -> bool: + """Check if the current code is executing within a datasette.client request. + + Returns: + bool: True if currently executing within a datasette.client request, False otherwise. + """ + return _in_datasette_client.get() + def create_token( self, actor_id: str, @@ -2406,19 +2431,20 @@ class DatasetteClient: async def _request(self, method, path, skip_permission_checks=False, **kwargs): from datasette.permissions import SkipPermissions - if skip_permission_checks: - with SkipPermissions(): + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) + else: async with httpx.AsyncClient( transport=httpx.ASGITransport(app=self.app), cookies=kwargs.pop("cookies", None), ) as client: return await getattr(client, method)(self._fix(path), **kwargs) - else: - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await getattr(client, method)(self._fix(path), **kwargs) async def get(self, path, skip_permission_checks=False, **kwargs): return await self._request( @@ -2470,8 +2496,17 @@ class DatasetteClient: from datasette.permissions import SkipPermissions avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) - if skip_permission_checks: - with SkipPermissions(): + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) + else: async with httpx.AsyncClient( transport=httpx.ASGITransport(app=self.app), cookies=kwargs.pop("cookies", None), @@ -2479,11 +2514,3 @@ class DatasetteClient: return await client.request( method, self._fix(path, avoid_path_rewrites), **kwargs ) - else: - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await client.request( - method, self._fix(path, avoid_path_rewrites), **kwargs - ) diff --git a/docs/internals.rst b/docs/internals.rst index 2e01a8e8..09fb7572 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1077,6 +1077,28 @@ This parameter works with all HTTP methods (``get``, ``post``, ``put``, ``patch` Use ``skip_permission_checks=True`` with caution. It completely bypasses Datasette's permission system and should only be used in trusted plugin code or internal operations where you need guaranteed access to resources. +Detecting internal client requests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``datasette.in_client()`` - returns bool + Returns ``True`` if the current code is executing within a ``datasette.client`` request, ``False`` otherwise. + +This method is useful for plugins that need to behave differently when called through ``datasette.client`` versus when handling external HTTP requests. + +Example usage: + +.. code-block:: python + + async def fetch_documents(datasette): + if not datasette.in_client(): + return Response.text( + "Only available via internal client requests", + status=403 + ) + ... + +Note that ``datasette.in_client()`` is independent of ``skip_permission_checks``. A request made through ``datasette.client`` will always have ``in_client()`` return ``True``, regardless of whether ``skip_permission_checks`` is set. + .. _internals_datasette_urls: datasette.urls diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index a15d294f..b254c5e4 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -227,3 +227,89 @@ async def test_skip_permission_checks_shows_denied_tables(): table_names = [match["name"] for match in data["matches"]] # Should see fixtures tables when permission checks are skipped assert "fixtures: test_table" in table_names + + +@pytest.mark.asyncio +async def test_in_client_returns_false_outside_request(datasette): + """Test that datasette.in_client() returns False outside of a client request""" + assert datasette.in_client() is False + + +@pytest.mark.asyncio +async def test_in_client_returns_true_inside_request(): + """Test that datasette.in_client() returns True inside a client request""" + from datasette import hookimpl, Response + from datasette.plugins import pm + + class TestPlugin: + __name__ = "test_in_client_plugin" + + @hookimpl + def register_routes(self): + async def test_view(datasette): + # Assert in_client() returns True within the view + assert datasette.in_client() is True + return Response.json({"in_client": datasette.in_client()}) + + return [ + (r"^/-/test-in-client$", test_view), + ] + + pm.register(TestPlugin(), name="test_in_client_plugin") + try: + ds = Datasette() + await ds.invoke_startup() + + # Outside of a client request, should be False + assert ds.in_client() is False + + # Make a request via datasette.client + response = await ds.client.get("/-/test-in-client") + assert response.status_code == 200 + assert response.json()["in_client"] is True + + # After the request, should be False again + assert ds.in_client() is False + finally: + pm.unregister(name="test_in_client_plugin") + + +@pytest.mark.asyncio +async def test_in_client_with_skip_permission_checks(): + """Test that in_client() works regardless of skip_permission_checks value""" + from datasette import hookimpl + from datasette.plugins import pm + from datasette.utils.asgi import Response + + in_client_values = [] + + class TestPlugin: + __name__ = "test_in_client_skip_plugin" + + @hookimpl + def register_routes(self): + async def test_view(datasette): + in_client_values.append(datasette.in_client()) + return Response.json({"in_client": datasette.in_client()}) + + return [ + (r"^/-/test-in-client$", test_view), + ] + + pm.register(TestPlugin(), name="test_in_client_skip_plugin") + try: + ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) + await ds.invoke_startup() + + # Request without skip_permission_checks + await ds.client.get("/-/test-in-client") + # Request with skip_permission_checks=True + await ds.client.get("/-/test-in-client", skip_permission_checks=True) + + # Both should have detected in_client as True + assert ( + len(in_client_values) == 2 + ), f"Expected 2 values, got {len(in_client_values)}" + assert all(in_client_values), f"Expected all True, got {in_client_values}" + finally: + pm.unregister(name="test_in_client_skip_plugin") From 4b4add4d311ce9c8b3e6b08b2f81db1bbd9cbf7e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 10:31:03 -0800 Subject: [PATCH 06/53] datasette.pm property, closes #2595 --- datasette/app.py | 16 +++++++++- datasette/plugins.py | 19 +++++++----- docs/internals.rst | 2 +- docs/testing_plugins.rst | 9 +++--- tests/test_actions_sql.py | 25 ++++++++-------- tests/test_allowed_resources.py | 25 ++++++++-------- tests/test_docs_plugins.py | 8 ++--- tests/test_internals_datasette_client.py | 18 +++++------ tests/test_permission_endpoints.py | 10 +++---- tests/test_plugins.py | 38 ++++++++++++------------ tests/test_restriction_sql.py | 20 ++++++------- 11 files changed, 101 insertions(+), 89 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index a5efdad5..2d8283a4 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -631,6 +631,17 @@ class Datasette: def urls(self): return Urls(self) + @property + def pm(self): + """ + Return the global plugin manager instance. + + This provides access to the pluggy PluginManager that manages all + Datasette plugins and hooks. Use datasette.pm.hook.hook_name() to + call plugin hooks. + """ + return pm + async def invoke_startup(self): # This must be called for Datasette to be in a usable state if self._startup_invoked: @@ -2415,7 +2426,10 @@ class DatasetteClient: def __init__(self, ds): self.ds = ds - self.app = ds.app() + + @property + def app(self): + return self.ds.app() def actor_cookie(self, actor): # Utility method, mainly for tests diff --git a/datasette/plugins.py b/datasette/plugins.py index 392ab60d..e9818885 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -94,21 +94,24 @@ def get_plugins(): for plugin in pm.get_plugins(): static_path = None templates_path = None - if plugin.__name__ not in DEFAULT_PLUGINS: + plugin_name = ( + plugin.__name__ + if hasattr(plugin, "__name__") + else plugin.__class__.__name__ + ) + if plugin_name not in DEFAULT_PLUGINS: try: - if (importlib_resources.files(plugin.__name__) / "static").is_dir(): - static_path = str( - importlib_resources.files(plugin.__name__) / "static" - ) - if (importlib_resources.files(plugin.__name__) / "templates").is_dir(): + if (importlib_resources.files(plugin_name) / "static").is_dir(): + static_path = str(importlib_resources.files(plugin_name) / "static") + if (importlib_resources.files(plugin_name) / "templates").is_dir(): templates_path = str( - importlib_resources.files(plugin.__name__) / "templates" + importlib_resources.files(plugin_name) / "templates" ) except (TypeError, ModuleNotFoundError): # Caused by --plugins_dir= plugins pass plugin_info = { - "name": plugin.__name__, + "name": plugin_name, "static_path": static_path, "templates_path": templates_path, "hooks": [h.name for h in pm.get_hookcallers(plugin)], diff --git a/docs/internals.rst b/docs/internals.rst index 09fb7572..09d45c90 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1093,7 +1093,7 @@ Example usage: if not datasette.in_client(): return Response.text( "Only available via internal client requests", - status=403 + status=403, ) ... diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index e4fad500..fc1aa6f6 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -283,13 +283,12 @@ Here's a test for that plugin that mocks the HTTPX outbound request: Registering a plugin for the duration of a test ----------------------------------------------- -When writing tests for plugins you may find it useful to register a test plugin just for the duration of a single test. You can do this using ``pm.register()`` and ``pm.unregister()`` like this: +When writing tests for plugins you may find it useful to register a test plugin just for the duration of a single test. You can do this using ``datasette.pm.register()`` and ``datasette.pm.unregister()`` like this: .. code-block:: python from datasette import hookimpl from datasette.app import Datasette - from datasette.plugins import pm import pytest @@ -305,14 +304,14 @@ When writing tests for plugins you may find it useful to register a test plugin (r"^/error$", lambda: 1 / 0), ] - pm.register(TestPlugin(), name="undo") + datasette = Datasette() try: # The test implementation goes here - datasette = Datasette() + datasette.pm.register(TestPlugin(), name="undo") response = await datasette.client.get("/error") assert response.status_code == 500 finally: - pm.unregister(name="undo") + datasette.pm.unregister(name="undo") To reuse the same temporary plugin in multiple tests, you can register it inside a fixture in your ``conftest.py`` file like this: diff --git a/tests/test_actions_sql.py b/tests/test_actions_sql.py index 734a427d..863d2529 100644 --- a/tests/test_actions_sql.py +++ b/tests/test_actions_sql.py @@ -11,7 +11,6 @@ These tests verify: import pytest import pytest_asyncio from datasette.app import Datasette -from datasette.plugins import pm from datasette.permissions import PermissionSQL from datasette.resources import TableResource from datasette import hookimpl @@ -67,7 +66,7 @@ async def test_allowed_resources_global_allow(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: # Use the new allowed_resources() method @@ -87,7 +86,7 @@ async def test_allowed_resources_global_allow(test_ds): assert ("production", "orders") in table_set finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -106,7 +105,7 @@ async def test_allowed_specific_resource(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "bob", "role": "analyst"} @@ -130,7 +129,7 @@ async def test_allowed_specific_resource(test_ds): ) finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -148,7 +147,7 @@ async def test_allowed_resources_include_reasons(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: # Use allowed_resources with include_reasons to get debugging info @@ -170,7 +169,7 @@ async def test_allowed_resources_include_reasons(test_ds): assert "analyst access" in reasons_text finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -190,7 +189,7 @@ async def test_child_deny_overrides_parent_allow(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "bob", "role": "analyst"} @@ -219,7 +218,7 @@ async def test_child_deny_overrides_parent_allow(test_ds): ) finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -239,7 +238,7 @@ async def test_child_allow_overrides_parent_deny(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "carol"} @@ -264,7 +263,7 @@ async def test_child_allow_overrides_parent_deny(test_ds): ) finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -288,7 +287,7 @@ async def test_sql_does_filtering_not_python(test_ds): return PermissionSQL(sql=sql) plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "dave"} @@ -314,4 +313,4 @@ async def test_sql_does_filtering_not_python(test_ds): assert tables[0].child == "users" finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") diff --git a/tests/test_allowed_resources.py b/tests/test_allowed_resources.py index cecffbe2..0cd48ea9 100644 --- a/tests/test_allowed_resources.py +++ b/tests/test_allowed_resources.py @@ -8,7 +8,6 @@ based on permission rules from plugins and configuration. import pytest import pytest_asyncio from datasette.app import Datasette -from datasette.plugins import pm from datasette.permissions import PermissionSQL from datasette import hookimpl @@ -62,7 +61,7 @@ async def test_tables_endpoint_global_access(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: # Use the allowed_resources API directly @@ -87,7 +86,7 @@ async def test_tables_endpoint_global_access(test_ds): assert "production/orders" in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -102,7 +101,7 @@ async def test_tables_endpoint_database_restriction(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources( @@ -130,7 +129,7 @@ async def test_tables_endpoint_database_restriction(test_ds): # Note: default_permissions.py provides default allows, so we just check analytics are present finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -149,7 +148,7 @@ async def test_tables_endpoint_table_exception(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources("view-table", {"id": "carol"}) @@ -172,7 +171,7 @@ async def test_tables_endpoint_table_exception(test_ds): assert "analytics/sensitive" not in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -191,7 +190,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources( @@ -214,7 +213,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds): assert "analytics/sensitive" not in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -257,7 +256,7 @@ async def test_tables_endpoint_specific_table_only(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources("view-table", {"id": "dave"}) @@ -280,7 +279,7 @@ async def test_tables_endpoint_specific_table_only(test_ds): assert "production/orders" in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -295,7 +294,7 @@ async def test_tables_endpoint_empty_result(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources("view-table", {"id": "blocked"}) @@ -311,7 +310,7 @@ async def test_tables_endpoint_empty_result(test_ds): assert len(result) == 0 finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio diff --git a/tests/test_docs_plugins.py b/tests/test_docs_plugins.py index 92b4514c..c51858d3 100644 --- a/tests/test_docs_plugins.py +++ b/tests/test_docs_plugins.py @@ -2,7 +2,6 @@ # -- start datasette_with_plugin_fixture -- from datasette import hookimpl from datasette.app import Datasette -from datasette.plugins import pm import pytest import pytest_asyncio @@ -18,11 +17,12 @@ async def datasette_with_plugin(): (r"^/error$", lambda: 1 / 0), ] - pm.register(TestPlugin(), name="undo") + datasette = Datasette() + datasette.pm.register(TestPlugin(), name="undo") try: - yield Datasette() + yield datasette finally: - pm.unregister(name="undo") + datasette.pm.unregister(name="undo") # -- end datasette_with_plugin_fixture -- diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index b254c5e4..326fcdc0 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -239,7 +239,6 @@ async def test_in_client_returns_false_outside_request(datasette): async def test_in_client_returns_true_inside_request(): """Test that datasette.in_client() returns True inside a client request""" from datasette import hookimpl, Response - from datasette.plugins import pm class TestPlugin: __name__ = "test_in_client_plugin" @@ -255,10 +254,10 @@ async def test_in_client_returns_true_inside_request(): (r"^/-/test-in-client$", test_view), ] - pm.register(TestPlugin(), name="test_in_client_plugin") + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(TestPlugin(), name="test_in_client_plugin") try: - ds = Datasette() - await ds.invoke_startup() # Outside of a client request, should be False assert ds.in_client() is False @@ -271,14 +270,13 @@ async def test_in_client_returns_true_inside_request(): # After the request, should be False again assert ds.in_client() is False finally: - pm.unregister(name="test_in_client_plugin") + ds.pm.unregister(name="test_in_client_plugin") @pytest.mark.asyncio async def test_in_client_with_skip_permission_checks(): """Test that in_client() works regardless of skip_permission_checks value""" from datasette import hookimpl - from datasette.plugins import pm from datasette.utils.asgi import Response in_client_values = [] @@ -296,10 +294,10 @@ async def test_in_client_with_skip_permission_checks(): (r"^/-/test-in-client$", test_view), ] - pm.register(TestPlugin(), name="test_in_client_skip_plugin") + ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) + await ds.invoke_startup() + ds.pm.register(TestPlugin(), name="test_in_client_skip_plugin") try: - ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) - await ds.invoke_startup() # Request without skip_permission_checks await ds.client.get("/-/test-in-client") @@ -312,4 +310,4 @@ async def test_in_client_with_skip_permission_checks(): ), f"Expected 2 values, got {len(in_client_values)}" assert all(in_client_values), f"Expected all True, got {in_client_values}" finally: - pm.unregister(name="test_in_client_skip_plugin") + ds.pm.unregister(name="test_in_client_skip_plugin") diff --git a/tests/test_permission_endpoints.py b/tests/test_permission_endpoints.py index d7b7bf07..84f3370f 100644 --- a/tests/test_permission_endpoints.py +++ b/tests/test_permission_endpoints.py @@ -439,7 +439,6 @@ async def test_execute_sql_requires_view_database(): be able to execute SQL on that database. """ from datasette.permissions import PermissionSQL - from datasette.plugins import pm from datasette import hookimpl class TestPermissionPlugin: @@ -464,11 +463,12 @@ async def test_execute_sql_requires_view_database(): return [] plugin = TestPermissionPlugin() - pm.register(plugin, name="test_plugin") + + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(plugin, name="test_plugin") try: - ds = Datasette() - await ds.invoke_startup() ds.add_memory_database("secret") await ds.refresh_schemas() @@ -498,4 +498,4 @@ async def test_execute_sql_requires_view_database(): f"but got {response.status_code}" ) finally: - pm.unregister(plugin) + ds.pm.unregister(plugin) diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 4a8c60d7..42995c0d 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -691,7 +691,7 @@ async def test_hook_permission_resources_sql(): await ds.invoke_startup() collected = [] - for block in pm.hook.permission_resources_sql( + for block in ds.pm.hook.permission_resources_sql( datasette=ds, actor={"id": "alice"}, action="view-table", @@ -1161,12 +1161,12 @@ async def test_hook_filters_from_request(ds_client): if request.args.get("_nothing"): return FilterArguments(["1 = 0"], human_descriptions=["NOTHING"]) - pm.register(ReturnNothingPlugin(), name="ReturnNothingPlugin") + ds_client.ds.pm.register(ReturnNothingPlugin(), name="ReturnNothingPlugin") response = await ds_client.get("/fixtures/facetable?_nothing=1") assert "0 rows\n where NOTHING" in response.text json_response = await ds_client.get("/fixtures/facetable.json?_nothing=1") assert json_response.json()["rows"] == [] - pm.unregister(name="ReturnNothingPlugin") + ds_client.ds.pm.unregister(name="ReturnNothingPlugin") @pytest.mark.asyncio @@ -1327,7 +1327,7 @@ async def test_hook_actors_from_ids(): return inner try: - pm.register(ActorsFromIdsPlugin(), name="ActorsFromIdsPlugin") + ds.pm.register(ActorsFromIdsPlugin(), name="ActorsFromIdsPlugin") actors2 = await ds.actors_from_ids(["3", "5", "7"]) assert actors2 == { "3": {"id": "3", "name": "Cate Blanchett"}, @@ -1335,7 +1335,7 @@ async def test_hook_actors_from_ids(): "7": {"id": "7", "name": "Sarah Paulson"}, } finally: - pm.unregister(name="ReturnNothingPlugin") + ds.pm.unregister(name="ReturnNothingPlugin") @pytest.mark.asyncio @@ -1350,14 +1350,14 @@ async def test_plugin_is_installed(): return {} try: - pm.register(DummyPlugin(), name="DummyPlugin") + datasette.pm.register(DummyPlugin(), name="DummyPlugin") response = await datasette.client.get("/-/plugins.json") assert response.status_code == 200 installed_plugins = {p["name"] for p in response.json()} assert "DummyPlugin" in installed_plugins finally: - pm.unregister(name="DummyPlugin") + datasette.pm.unregister(name="DummyPlugin") @pytest.mark.asyncio @@ -1384,7 +1384,7 @@ async def test_hook_jinja2_environment_from_request(tmpdir): datasette = Datasette(memory=True) try: - pm.register(EnvironmentPlugin(), name="EnvironmentPlugin") + datasette.pm.register(EnvironmentPlugin(), name="EnvironmentPlugin") response = await datasette.client.get("/") assert response.status_code == 200 assert "Hello museums!" not in response.text @@ -1395,7 +1395,7 @@ async def test_hook_jinja2_environment_from_request(tmpdir): assert response2.status_code == 200 assert "Hello museums!" in response2.text finally: - pm.unregister(name="EnvironmentPlugin") + datasette.pm.unregister(name="EnvironmentPlugin") class SlotPlugin: @@ -1433,48 +1433,48 @@ class SlotPlugin: @pytest.mark.asyncio async def test_hook_top_homepage(): + datasette = Datasette(memory=True) try: - pm.register(SlotPlugin(), name="SlotPlugin") - datasette = Datasette(memory=True) + datasette.pm.register(SlotPlugin(), name="SlotPlugin") response = await datasette.client.get("/?z=foo") assert response.status_code == 200 assert "Xtop_homepage:foo" in response.text finally: - pm.unregister(name="SlotPlugin") + datasette.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio async def test_hook_top_database(): + datasette = Datasette(memory=True) try: - pm.register(SlotPlugin(), name="SlotPlugin") - datasette = Datasette(memory=True) + datasette.pm.register(SlotPlugin(), name="SlotPlugin") response = await datasette.client.get("/_memory?z=bar") assert response.status_code == 200 assert "Xtop_database:_memory:bar" in response.text finally: - pm.unregister(name="SlotPlugin") + datasette.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio async def test_hook_top_table(ds_client): try: - pm.register(SlotPlugin(), name="SlotPlugin") + ds_client.ds.pm.register(SlotPlugin(), name="SlotPlugin") response = await ds_client.get("/fixtures/facetable?z=baz") assert response.status_code == 200 assert "Xtop_table:fixtures:facetable:baz" in response.text finally: - pm.unregister(name="SlotPlugin") + ds_client.ds.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio async def test_hook_top_row(ds_client): try: - pm.register(SlotPlugin(), name="SlotPlugin") + ds_client.ds.pm.register(SlotPlugin(), name="SlotPlugin") response = await ds_client.get("/fixtures/facet_cities/1?z=bax") assert response.status_code == 200 assert "Xtop_row:fixtures:facet_cities:San Francisco:bax" in response.text finally: - pm.unregister(name="SlotPlugin") + ds_client.ds.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio diff --git a/tests/test_restriction_sql.py b/tests/test_restriction_sql.py index 7d6d8a5a..f23eb839 100644 --- a/tests/test_restriction_sql.py +++ b/tests/test_restriction_sql.py @@ -13,7 +13,6 @@ async def test_multiple_restriction_sources_intersect(): provide restriction_sql - both must pass for access to be granted. """ from datasette import hookimpl - from datasette.plugins import pm class RestrictivePlugin: __name__ = "RestrictivePlugin" @@ -29,11 +28,12 @@ async def test_multiple_restriction_sources_intersect(): return None plugin = RestrictivePlugin() - pm.register(plugin, name="restrictive_plugin") + + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(plugin, name="restrictive_plugin") try: - ds = Datasette() - await ds.invoke_startup() db1 = ds.add_memory_database("db1_multi_intersect") db2 = ds.add_memory_database("db2_multi_intersect") await db1.execute_write("CREATE TABLE t1 (id INTEGER)") @@ -55,7 +55,7 @@ async def test_multiple_restriction_sources_intersect(): assert ("db1_multi_intersect", "t1") in resources assert ("db2_multi_intersect", "t1") not in resources finally: - pm.unregister(name="restrictive_plugin") + ds.pm.unregister(name="restrictive_plugin") @pytest.mark.asyncio @@ -265,7 +265,6 @@ async def test_permission_resources_sql_multiple_restriction_sources_intersect() provide restriction_sql - both must pass for access to be granted. """ from datasette import hookimpl - from datasette.plugins import pm class RestrictivePlugin: __name__ = "RestrictivePlugin" @@ -281,11 +280,12 @@ async def test_permission_resources_sql_multiple_restriction_sources_intersect() return None plugin = RestrictivePlugin() - pm.register(plugin, name="restrictive_plugin") + + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(plugin, name="restrictive_plugin") try: - ds = Datasette() - await ds.invoke_startup() db1 = ds.add_memory_database("db1_multi_restrictions") db2 = ds.add_memory_database("db2_multi_restrictions") await db1.execute_write("CREATE TABLE t1 (id INTEGER)") @@ -312,4 +312,4 @@ async def test_permission_resources_sql_multiple_restriction_sources_intersect() assert ("db1_multi_restrictions", "t1") in resources assert ("db2_multi_restrictions", "t1") not in resources finally: - pm.unregister(name="restrictive_plugin") + ds.pm.unregister(name="restrictive_plugin") From 93b455239a4063c80d52da795db700c6a88e4d16 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 10:40:24 -0800 Subject: [PATCH 07/53] Release notes for 1.0a22, closes #2596 --- docs/changelog.rst | 9 +++++++++ docs/internals.rst | 2 ++ 2 files changed, 11 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 66d46bce..feba9390 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,15 @@ Changelog ========= +.. _v1_0_a22: + +1.0a22 (2025-11-13) +------------------- + +- ``datasette serve --default-deny`` option for running Datasette configured to :ref:`deny all permissions by default `. (:issue:`2592`) +- ``datasette.is_client()`` method for detecting if code is :ref:`executing inside a datasette.client request `. (:issue:`2594`) +- ``datasette.pm`` property can now be used to :ref:`register and unregister plugins in tests `. (:issue:`2595`) + .. _v1_0_a21: 1.0a21 (2025-11-05) diff --git a/docs/internals.rst b/docs/internals.rst index 09d45c90..cfd78593 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1077,6 +1077,8 @@ This parameter works with all HTTP methods (``get``, ``post``, ``put``, ``patch` Use ``skip_permission_checks=True`` with caution. It completely bypasses Datasette's permission system and should only be used in trusted plugin code or internal operations where you need guaranteed access to resources. +.. _internals_datasette_is_client: + Detecting internal client requests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 2125115cd9b609def872cd8051912ac80179f510 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 10:41:02 -0800 Subject: [PATCH 08/53] Release 1.0a22 Refs #2592, #2594, #2595, #2596 --- datasette/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 01f00fcd..d0ff6ab1 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a21" +__version__ = "1.0a22" __version_info__ = tuple(__version__.split(".")) From 68f1179bac991b5e37b99a5482c40134f317c04f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 26 Nov 2025 17:12:52 -0800 Subject: [PATCH 09/53] Fix for text None shown on /-/actions, closes #2599 --- datasette/templates/debug_actions.html | 2 +- tests/test_html.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/datasette/templates/debug_actions.html b/datasette/templates/debug_actions.html index 6dd5ac0e..0ef7b329 100644 --- a/datasette/templates/debug_actions.html +++ b/datasette/templates/debug_actions.html @@ -31,7 +31,7 @@ {{ action.name }} {% if action.abbr %}{{ action.abbr }}{% endif %} {{ action.description or "" }} - {{ action.resource_class }} + {% if action.resource_class %}{{ action.resource_class }}{% endif %} {% if action.takes_parent %}✓{% endif %} {% if action.takes_child %}✓{% endif %} {% if action.also_requires %}{{ action.also_requires }}{% endif %} diff --git a/tests/test_html.py b/tests/test_html.py index 35b839ec..7b667301 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1194,6 +1194,21 @@ async def test_actions_page(ds_client): ds_client.ds.root_enabled = original_root_enabled +@pytest.mark.asyncio +async def test_actions_page_does_not_display_none_string(ds_client): + """Ensure the Resource column doesn't display the string 'None' for null values.""" + # https://github.com/simonw/datasette/issues/2599 + original_root_enabled = ds_client.ds.root_enabled + try: + ds_client.ds.root_enabled = True + cookies = {"ds_actor": ds_client.actor_cookie({"id": "root"})} + response = await ds_client.get("/-/actions", cookies=cookies) + assert response.status_code == 200 + assert "None" not in response.text + finally: + ds_client.ds.root_enabled = original_root_enabled + + @pytest.mark.asyncio async def test_permission_debug_tabs_with_query_string(ds_client): """Test that navigation tabs persist query strings across Check, Allowed, and Rules pages""" From c6c2a238c3e890384eef6bf9bca062fd784d9157 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 16:22:42 -0800 Subject: [PATCH 10/53] Fix for stale internal database bug, closes #2605 --- datasette/utils/internal_db.py | 3 +++ tests/test_internal_db.py | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index a3afbab2..587ea7b1 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -116,6 +116,9 @@ async def populate_schema_tables(internal_db, db): database_name = db.name def delete_everything(conn): + conn.execute( + "DELETE FROM catalog_databases WHERE database_name = ?", [database_name] + ) conn.execute( "DELETE FROM catalog_tables WHERE database_name = ?", [database_name] ) diff --git a/tests/test_internal_db.py b/tests/test_internal_db.py index 59516225..7a0d1630 100644 --- a/tests/test_internal_db.py +++ b/tests/test_internal_db.py @@ -91,3 +91,51 @@ async def test_internal_foreign_key_references(ds_client): ) await internal_db.execute_fn(inner) + + +@pytest.mark.asyncio +async def test_stale_catalog_entry_database_fix(tmp_path): + """ + Test for https://github.com/simonw/datasette/issues/2605 + + When the internal database persists across restarts and has entries in + catalog_databases for databases that no longer exist, accessing the + index page should not cause a 500 error (KeyError). + """ + from datasette.app import Datasette + + internal_db_path = str(tmp_path / "internal.db") + data_db_path = str(tmp_path / "data.db") + + # Create a data database file + import sqlite3 + + conn = sqlite3.connect(data_db_path) + conn.execute("CREATE TABLE test_table (id INTEGER PRIMARY KEY)") + conn.close() + + # First Datasette instance: with the data database and persistent internal db + ds1 = Datasette(files=[data_db_path], internal=internal_db_path) + await ds1.invoke_startup() + + # Access the index page to populate the internal catalog + response = await ds1.client.get("/") + assert "data" in ds1.databases + assert response.status_code == 200 + + # Second Datasette instance: reusing internal.db but WITHOUT the data database + # This simulates restarting Datasette after removing a database + ds2 = Datasette(internal=internal_db_path) + await ds2.invoke_startup() + + # The database is not in ds2.databases + assert "data" not in ds2.databases + + # Accessing the index page should NOT cause a 500 error + # This is the bug: it currently raises KeyError when trying to + # access ds.databases["data"] for the stale catalog entry + response = await ds2.client.get("/") + assert response.status_code == 200, ( + f"Index page should return 200, not {response.status_code}. " + "This fails due to stale catalog entries causing KeyError." + ) From 170b3ff61c1c7bc49b999ecbe43853af9727f2f1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:00:13 -0800 Subject: [PATCH 11/53] Better fix for stale catalog_databases, closes #2606 Refs 2605 --- datasette/app.py | 9 +++++++++ datasette/utils/internal_db.py | 3 --- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 2d8283a4..b9955925 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -606,6 +606,15 @@ class Datasette: "select database_name, schema_version from catalog_databases" ) } + # Delete stale entries for databases that are no longer attached + stale_databases = set(current_schema_versions.keys()) - set( + self.databases.keys() + ) + for stale_db_name in stale_databases: + await internal_db.execute_write( + "DELETE FROM catalog_databases WHERE database_name = ?", + [stale_db_name], + ) for database_name, db in self.databases.items(): schema_version = (await db.execute("PRAGMA schema_version")).first()[0] # Compare schema versions to see if we should skip it diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index 587ea7b1..a3afbab2 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -116,9 +116,6 @@ async def populate_schema_tables(internal_db, db): database_name = db.name def delete_everything(conn): - conn.execute( - "DELETE FROM catalog_databases WHERE database_name = ?", [database_name] - ) conn.execute( "DELETE FROM catalog_tables WHERE database_name = ?", [database_name] ) From 0a924524be06a331f20d2e1314ec82370995630b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:11:31 -0800 Subject: [PATCH 12/53] Split default_permissions.py into a package (#2603) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Split default_permissions.py into a package, refs #2602 * Remove unused is_resource_allowed() method, improve test coverage - Remove dead code: is_resource_allowed() method was never called - Change isinstance check to assertion with error message - Add test cases for table-level restrictions in restrictions_allow_action() - Coverage for restrictions.py improved from 79% to 99% 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * Additional permission test for gap spotted by coverage --- datasette/default_permissions.py | 494 ------------------ datasette/default_permissions/__init__.py | 59 +++ datasette/default_permissions/config.py | 442 ++++++++++++++++ datasette/default_permissions/defaults.py | 70 +++ datasette/default_permissions/helpers.py | 85 +++ datasette/default_permissions/restrictions.py | 195 +++++++ datasette/default_permissions/root.py | 29 + datasette/default_permissions/tokens.py | 95 ++++ tests/test_permissions.py | 59 +++ 9 files changed, 1034 insertions(+), 494 deletions(-) delete mode 100644 datasette/default_permissions.py create mode 100644 datasette/default_permissions/__init__.py create mode 100644 datasette/default_permissions/config.py create mode 100644 datasette/default_permissions/defaults.py create mode 100644 datasette/default_permissions/helpers.py create mode 100644 datasette/default_permissions/restrictions.py create mode 100644 datasette/default_permissions/root.py create mode 100644 datasette/default_permissions/tokens.py diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py deleted file mode 100644 index 12e6c1ef..00000000 --- a/datasette/default_permissions.py +++ /dev/null @@ -1,494 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette import hookimpl -from datasette.permissions import PermissionSQL -from datasette.utils import actor_matches_allow -import itsdangerous -import time - - -@hookimpl(specname="permission_resources_sql") -async def actor_restrictions_sql(datasette, actor, action): - """Handle actor restriction-based permission rules (_r key).""" - if not actor: - return None - - restrictions = actor.get("_r") if isinstance(actor, dict) else None - if restrictions is None: - return [] - - # Check if this action appears in restrictions (with abbreviations) - action_obj = datasette.actions.get(action) - action_checks = {action} - if action_obj and action_obj.abbr: - action_checks.add(action_obj.abbr) - - # Check if globally allowed in restrictions - global_actions = restrictions.get("a", []) - is_globally_allowed = action_checks.intersection(global_actions) - - if is_globally_allowed: - # Globally allowed - no restriction filtering needed - return [] - - # Not globally allowed - build restriction_sql that lists allowlisted resources - restriction_selects = [] - restriction_params = {} - param_counter = 0 - - # Add database-level allowlisted resources - db_restrictions = restrictions.get("d", {}) - for db_name, db_actions in db_restrictions.items(): - if action_checks.intersection(db_actions): - prefix = f"restr_{param_counter}" - param_counter += 1 - restriction_selects.append( - f"SELECT :{prefix}_parent AS parent, NULL AS child" - ) - restriction_params[f"{prefix}_parent"] = db_name - - # Add table-level allowlisted resources - resource_restrictions = restrictions.get("r", {}) - for db_name, tables in resource_restrictions.items(): - for table_name, table_actions in tables.items(): - if action_checks.intersection(table_actions): - prefix = f"restr_{param_counter}" - param_counter += 1 - restriction_selects.append( - f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child" - ) - restriction_params[f"{prefix}_parent"] = db_name - restriction_params[f"{prefix}_child"] = table_name - - if not restriction_selects: - # Action not in allowlist - return empty restriction (INTERSECT will return no results) - return [ - PermissionSQL( - params={"deny": f"actor restrictions: {action} not in allowlist"}, - restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", # Empty set - ) - ] - - # Build restriction SQL that returns allowed (parent, child) pairs - restriction_sql = "\nUNION ALL\n".join(restriction_selects) - - # Return restriction-only PermissionSQL (sql=None means no permission rules) - # The restriction_sql does the actual filtering via INTERSECT - return [ - PermissionSQL( - params=restriction_params, - restriction_sql=restriction_sql, - ) - ] - - -@hookimpl(specname="permission_resources_sql") -async def root_user_permissions_sql(datasette, actor, action): - """Grant root user full permissions when enabled.""" - if datasette.root_enabled and actor and actor.get("id") == "root": - # Add a single global-level allow rule (NULL, NULL) for root - # This allows root to access everything by default, but database-level - # and table-level deny rules in config can still block specific resources - return PermissionSQL.allow(reason="root user") - return None - - -@hookimpl(specname="permission_resources_sql") -async def config_permissions_sql(datasette, actor, action): - """Apply config-based permission rules from datasette.yaml.""" - config = datasette.config or {} - - def evaluate(allow_block): - if allow_block is None: - return None - return actor_matches_allow(actor, allow_block) - - has_restrictions = actor and "_r" in actor if actor else False - restrictions = actor.get("_r", {}) if actor else {} - - action_obj = datasette.actions.get(action) - action_checks = {action} - if action_obj and action_obj.abbr: - action_checks.add(action_obj.abbr) - - restricted_databases: set[str] = set() - restricted_tables: set[tuple[str, str]] = set() - if has_restrictions: - restricted_databases = { - db_name - for db_name, db_actions in (restrictions.get("d") or {}).items() - if action_checks.intersection(db_actions) - } - restricted_tables = { - (db_name, table_name) - for db_name, tables in (restrictions.get("r") or {}).items() - for table_name, table_actions in tables.items() - if action_checks.intersection(table_actions) - } - # Tables implicitly reference their parent databases - restricted_databases.update(db for db, _ in restricted_tables) - - def is_in_restriction_allowlist(parent, child, action_name): - """Check if a resource is in the actor's restriction allowlist for this action""" - if not has_restrictions: - return True # No restrictions, all resources allowed - - # Check global allowlist - if action_checks.intersection(restrictions.get("a", [])): - return True - - # Check database-level allowlist - if parent and action_checks.intersection( - restrictions.get("d", {}).get(parent, []) - ): - return True - - # Check table-level allowlist - if parent: - table_restrictions = (restrictions.get("r", {}) or {}).get(parent, {}) - if child: - table_actions = table_restrictions.get(child, []) - if action_checks.intersection(table_actions): - return True - else: - # Parent query should proceed if any child in this database is allowlisted - for table_actions in table_restrictions.values(): - if action_checks.intersection(table_actions): - return True - - # Parent/child both None: include if any restrictions exist for this action - if parent is None and child is None: - if action_checks.intersection(restrictions.get("a", [])): - return True - if restricted_databases: - return True - if restricted_tables: - return True - - return False - - rows = [] - - def add_row(parent, child, result, scope): - if result is None: - return - rows.append( - ( - parent, - child, - bool(result), - f"config {'allow' if result else 'deny'} {scope}", - ) - ) - - def add_row_allow_block(parent, child, allow_block, scope): - """For 'allow' blocks, always add a row if the block exists - deny if no match""" - if allow_block is None: - return - - # If actor has restrictions and this resource is NOT in allowlist, skip this config rule - # Restrictions act as a gating filter - config cannot grant access to restricted-out resources - if not is_in_restriction_allowlist(parent, child, action): - return - - result = evaluate(allow_block) - bool_result = bool(result) - # If result is None (no match) or False, treat as deny - rows.append( - ( - parent, - child, - bool_result, # None becomes False, False stays False, True stays True - f"config {'allow' if result else 'deny'} {scope}", - ) - ) - if has_restrictions and not bool_result and child is None: - reason = f"config deny {scope} (restriction gate)" - if parent is None: - # Root-level deny: add more specific denies for restricted resources - if action_obj and action_obj.takes_parent: - for db_name in restricted_databases: - rows.append((db_name, None, 0, reason)) - if action_obj and action_obj.takes_child: - for db_name, table_name in restricted_tables: - rows.append((db_name, table_name, 0, reason)) - else: - # Database-level deny: add child-level denies for restricted tables - if action_obj and action_obj.takes_child: - for db_name, table_name in restricted_tables: - if db_name == parent: - rows.append((db_name, table_name, 0, reason)) - - root_perm = (config.get("permissions") or {}).get(action) - add_row(None, None, evaluate(root_perm), f"permissions for {action}") - - for db_name, db_config in (config.get("databases") or {}).items(): - db_perm = (db_config.get("permissions") or {}).get(action) - add_row( - db_name, None, evaluate(db_perm), f"permissions for {action} on {db_name}" - ) - - for table_name, table_config in (db_config.get("tables") or {}).items(): - table_perm = (table_config.get("permissions") or {}).get(action) - add_row( - db_name, - table_name, - evaluate(table_perm), - f"permissions for {action} on {db_name}/{table_name}", - ) - - if action == "view-table": - table_allow = (table_config or {}).get("allow") - add_row_allow_block( - db_name, - table_name, - table_allow, - f"allow for {action} on {db_name}/{table_name}", - ) - - for query_name, query_config in (db_config.get("queries") or {}).items(): - # query_config can be a string (just SQL) or a dict (with SQL and options) - if isinstance(query_config, dict): - query_perm = (query_config.get("permissions") or {}).get(action) - add_row( - db_name, - query_name, - evaluate(query_perm), - f"permissions for {action} on {db_name}/{query_name}", - ) - if action == "view-query": - query_allow = query_config.get("allow") - add_row_allow_block( - db_name, - query_name, - query_allow, - f"allow for {action} on {db_name}/{query_name}", - ) - - if action == "view-database": - db_allow = db_config.get("allow") - add_row_allow_block( - db_name, None, db_allow, f"allow for {action} on {db_name}" - ) - - if action == "execute-sql": - db_allow_sql = db_config.get("allow_sql") - add_row_allow_block(db_name, None, db_allow_sql, f"allow_sql for {db_name}") - - if action == "view-table": - # Database-level allow block affects all tables in that database - db_allow = db_config.get("allow") - add_row_allow_block( - db_name, None, db_allow, f"allow for {action} on {db_name}" - ) - - if action == "view-query": - # Database-level allow block affects all queries in that database - db_allow = db_config.get("allow") - add_row_allow_block( - db_name, None, db_allow, f"allow for {action} on {db_name}" - ) - - # Root-level allow block applies to all view-* actions - if action == "view-instance": - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-instance") - - if action == "view-database": - # Root-level allow block also applies to view-database - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-database") - - if action == "view-table": - # Root-level allow block also applies to view-table - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-table") - - if action == "view-query": - # Root-level allow block also applies to view-query - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-query") - - if action == "execute-sql": - allow_sql = config.get("allow_sql") - add_row_allow_block(None, None, allow_sql, "allow_sql") - - if not rows: - return [] - - parts = [] - params = {} - for idx, (parent, child, allow, reason) in enumerate(rows): - key = f"cfg_{idx}" - parts.append( - f"SELECT :{key}_parent AS parent, :{key}_child AS child, :{key}_allow AS allow, :{key}_reason AS reason" - ) - params[f"{key}_parent"] = parent - params[f"{key}_child"] = child - params[f"{key}_allow"] = 1 if allow else 0 - params[f"{key}_reason"] = reason - - sql = "\nUNION ALL\n".join(parts) - return [PermissionSQL(sql=sql, params=params)] - - -@hookimpl(specname="permission_resources_sql") -async def default_allow_sql_check(datasette, actor, action): - """Enforce default_allow_sql setting for execute-sql action.""" - if action == "execute-sql" and not datasette.setting("default_allow_sql"): - return PermissionSQL.deny(reason="default_allow_sql is false") - return None - - -@hookimpl(specname="permission_resources_sql") -async def default_action_permissions_sql(datasette, actor, action): - """Apply default allow rules for standard view/execute actions. - - With the INTERSECT-based restriction approach, these defaults are always generated - and then filtered by restriction_sql if the actor has restrictions. - """ - # Skip default allow rules if default_deny is enabled - if datasette.default_deny: - return None - - default_allow_actions = { - "view-instance", - "view-database", - "view-database-download", - "view-table", - "view-query", - "execute-sql", - } - if action in default_allow_actions: - reason = f"default allow for {action}".replace("'", "''") - return PermissionSQL.allow(reason=reason) - - return None - - -def restrictions_allow_action( - datasette: "Datasette", - restrictions: dict, - action: str, - resource: str | tuple[str, str], -): - """ - Check if actor restrictions allow the requested action against the requested resource. - - Restrictions work on an exact-match basis: if an actor has view-table permission, - they can view tables, but NOT automatically view-instance or view-database. - Each permission is checked independently without implication logic. - """ - # Does this action have an abbreviation? - to_check = {action} - action_obj = datasette.actions.get(action) - if action_obj and action_obj.abbr: - to_check.add(action_obj.abbr) - - # Check if restrictions explicitly allow this action - # Restrictions can be at three levels: - # - "a": global (any resource) - # - "d": per-database - # - "r": per-table/resource - - # Check global level (any resource) - all_allowed = restrictions.get("a") - if all_allowed is not None: - assert isinstance(all_allowed, list) - if to_check.intersection(all_allowed): - return True - - # Check database level - if resource: - if isinstance(resource, str): - database_name = resource - else: - database_name = resource[0] - database_allowed = restrictions.get("d", {}).get(database_name) - if database_allowed is not None: - assert isinstance(database_allowed, list) - if to_check.intersection(database_allowed): - return True - - # Check table/resource level - if resource is not None and not isinstance(resource, str) and len(resource) == 2: - database, table = resource - table_allowed = restrictions.get("r", {}).get(database, {}).get(table) - if table_allowed is not None: - assert isinstance(table_allowed, list) - if to_check.intersection(table_allowed): - return True - - # This action is not explicitly allowed, so reject it - return False - - -@hookimpl -def actor_from_request(datasette, request): - prefix = "dstok_" - if not datasette.setting("allow_signed_tokens"): - return None - max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") - authorization = request.headers.get("authorization") - if not authorization: - return None - if not authorization.startswith("Bearer "): - return None - token = authorization[len("Bearer ") :] - if not token.startswith(prefix): - return None - token = token[len(prefix) :] - try: - decoded = datasette.unsign(token, namespace="token") - except itsdangerous.BadSignature: - return None - if "t" not in decoded: - # Missing timestamp - return None - created = decoded["t"] - if not isinstance(created, int): - # Invalid timestamp - return None - duration = decoded.get("d") - if duration is not None and not isinstance(duration, int): - # Invalid duration - return None - if (duration is None and max_signed_tokens_ttl) or ( - duration is not None - and max_signed_tokens_ttl - and duration > max_signed_tokens_ttl - ): - duration = max_signed_tokens_ttl - if duration: - if time.time() - created > duration: - # Expired - return None - actor = {"id": decoded["a"], "token": "dstok"} - if "_r" in decoded: - actor["_r"] = decoded["_r"] - if duration: - actor["token_expires"] = created + duration - return actor - - -@hookimpl -def skip_csrf(scope): - # Skip CSRF check for requests with content-type: application/json - if scope["type"] == "http": - headers = scope.get("headers") or {} - if dict(headers).get(b"content-type") == b"application/json": - return True - - -@hookimpl -def canned_queries(datasette, database, actor): - """Return canned queries from datasette configuration.""" - queries = ( - ((datasette.config or {}).get("databases") or {}).get(database) or {} - ).get("queries") or {} - return queries diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py new file mode 100644 index 00000000..4c82d705 --- /dev/null +++ b/datasette/default_permissions/__init__.py @@ -0,0 +1,59 @@ +""" +Default permission implementations for Datasette. + +This module provides the built-in permission checking logic through implementations +of the permission_resources_sql hook. The hooks are organized by their purpose: + +1. Actor Restrictions - Enforces _r allowlists embedded in actor tokens +2. Root User - Grants full access when --root flag is used +3. Config Rules - Applies permissions from datasette.yaml +4. Default Settings - Enforces default_allow_sql and default view permissions + +IMPORTANT: These hooks return PermissionSQL objects that are combined using SQL +UNION/INTERSECT operations. The order of evaluation is: + - restriction_sql fields are INTERSECTed (all must match) + - Regular sql fields are UNIONed and evaluated with cascading priority +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl + +# Re-export all hooks and public utilities +from .restrictions import ( + actor_restrictions_sql, + restrictions_allow_action, + ActorRestrictions, +) +from .root import root_user_permissions_sql +from .config import config_permissions_sql +from .defaults import ( + default_allow_sql_check, + default_action_permissions_sql, + DEFAULT_ALLOW_ACTIONS, +) +from .tokens import actor_from_signed_api_token + + +@hookimpl +def skip_csrf(scope) -> Optional[bool]: + """Skip CSRF check for JSON content-type requests.""" + if scope["type"] == "http": + headers = scope.get("headers") or {} + if dict(headers).get(b"content-type") == b"application/json": + return True + return None + + +@hookimpl +def canned_queries(datasette: "Datasette", database: str, actor) -> dict: + """Return canned queries defined in datasette.yaml configuration.""" + queries = ( + ((datasette.config or {}).get("databases") or {}).get(database) or {} + ).get("queries") or {} + return queries diff --git a/datasette/default_permissions/config.py b/datasette/default_permissions/config.py new file mode 100644 index 00000000..aab87c1c --- /dev/null +++ b/datasette/default_permissions/config.py @@ -0,0 +1,442 @@ +""" +Config-based permission handling for Datasette. + +Applies permission rules from datasette.yaml configuration. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL +from datasette.utils import actor_matches_allow + +from .helpers import PermissionRowCollector, get_action_name_variants + + +class ConfigPermissionProcessor: + """ + Processes permission rules from datasette.yaml configuration. + + Configuration structure: + + permissions: # Root-level permissions block + view-instance: + id: admin + + databases: + mydb: + permissions: # Database-level permissions + view-database: + id: admin + allow: # Database-level allow block (for view-*) + id: viewer + allow_sql: # execute-sql allow block + id: analyst + tables: + users: + permissions: # Table-level permissions + view-table: + id: admin + allow: # Table-level allow block + id: viewer + queries: + my_query: + permissions: # Query-level permissions + view-query: + id: admin + allow: # Query-level allow block + id: viewer + """ + + def __init__( + self, + datasette: "Datasette", + actor: Optional[dict], + action: str, + ): + self.datasette = datasette + self.actor = actor + self.action = action + self.config = datasette.config or {} + self.collector = PermissionRowCollector(prefix="cfg") + + # Pre-compute action variants + self.action_checks = get_action_name_variants(datasette, action) + self.action_obj = datasette.actions.get(action) + + # Parse restrictions if present + self.has_restrictions = actor and "_r" in actor if actor else False + self.restrictions = actor.get("_r", {}) if actor else {} + + # Pre-compute restriction info for efficiency + self.restricted_databases: Set[str] = set() + self.restricted_tables: Set[Tuple[str, str]] = set() + + if self.has_restrictions: + self.restricted_databases = { + db_name + for db_name, db_actions in (self.restrictions.get("d") or {}).items() + if self.action_checks.intersection(db_actions) + } + self.restricted_tables = { + (db_name, table_name) + for db_name, tables in (self.restrictions.get("r") or {}).items() + for table_name, table_actions in tables.items() + if self.action_checks.intersection(table_actions) + } + # Tables implicitly reference their parent databases + self.restricted_databases.update(db for db, _ in self.restricted_tables) + + def evaluate_allow_block(self, allow_block: Any) -> Optional[bool]: + """Evaluate an allow block against the current actor.""" + if allow_block is None: + return None + return actor_matches_allow(self.actor, allow_block) + + def is_in_restriction_allowlist( + self, + parent: Optional[str], + child: Optional[str], + ) -> bool: + """Check if resource is allowed by actor restrictions.""" + if not self.has_restrictions: + return True # No restrictions, all resources allowed + + # Check global allowlist + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + + # Check database-level allowlist + if parent and self.action_checks.intersection( + self.restrictions.get("d", {}).get(parent, []) + ): + return True + + # Check table-level allowlist + if parent: + table_restrictions = (self.restrictions.get("r", {}) or {}).get(parent, {}) + if child: + table_actions = table_restrictions.get(child, []) + if self.action_checks.intersection(table_actions): + return True + else: + # Parent query should proceed if any child in this database is allowlisted + for table_actions in table_restrictions.values(): + if self.action_checks.intersection(table_actions): + return True + + # Parent/child both None: include if any restrictions exist for this action + if parent is None and child is None: + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + if self.restricted_databases: + return True + if self.restricted_tables: + return True + + return False + + def add_permissions_rule( + self, + parent: Optional[str], + child: Optional[str], + permissions_block: Optional[dict], + scope_desc: str, + ) -> None: + """Add a rule from a permissions:{action} block.""" + if permissions_block is None: + return + + action_allow_block = permissions_block.get(self.action) + result = self.evaluate_allow_block(action_allow_block) + + self.collector.add( + parent=parent, + child=child, + allow=result, + reason=f"config {'allow' if result else 'deny'} {scope_desc}", + if_not_none=True, + ) + + def add_allow_block_rule( + self, + parent: Optional[str], + child: Optional[str], + allow_block: Any, + scope_desc: str, + ) -> None: + """ + Add rules from an allow:{} block. + + For allow blocks, if the block exists but doesn't match the actor, + this is treated as a deny. We also handle the restriction-gate logic. + """ + if allow_block is None: + return + + # Skip if resource is not in restriction allowlist + if not self.is_in_restriction_allowlist(parent, child): + return + + result = self.evaluate_allow_block(allow_block) + bool_result = bool(result) + + self.collector.add( + parent, + child, + bool_result, + f"config {'allow' if result else 'deny'} {scope_desc}", + ) + + # Handle restriction-gate: add explicit denies for restricted resources + self._add_restriction_gate_denies(parent, child, bool_result, scope_desc) + + def _add_restriction_gate_denies( + self, + parent: Optional[str], + child: Optional[str], + is_allowed: bool, + scope_desc: str, + ) -> None: + """ + When a config rule denies at a higher level, add explicit denies + for restricted resources to prevent child-level allows from + incorrectly granting access. + """ + if is_allowed or child is not None or not self.has_restrictions: + return + + if not self.action_obj: + return + + reason = f"config deny {scope_desc} (restriction gate)" + + if parent is None: + # Root-level deny: add denies for all restricted resources + if self.action_obj.takes_parent: + for db_name in self.restricted_databases: + self.collector.add(db_name, None, False, reason) + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + self.collector.add(db_name, table_name, False, reason) + else: + # Database-level deny: add denies for tables in that database + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + if db_name == parent: + self.collector.add(db_name, table_name, False, reason) + + def process(self) -> Optional[PermissionSQL]: + """Process all config rules and return combined PermissionSQL.""" + self._process_root_permissions() + self._process_databases() + self._process_root_allow_blocks() + + return self.collector.to_permission_sql() + + def _process_root_permissions(self) -> None: + """Process root-level permissions block.""" + root_perms = self.config.get("permissions") or {} + self.add_permissions_rule( + None, + None, + root_perms, + f"permissions for {self.action}", + ) + + def _process_databases(self) -> None: + """Process database-level and nested configurations.""" + databases = self.config.get("databases") or {} + + for db_name, db_config in databases.items(): + self._process_database(db_name, db_config or {}) + + def _process_database(self, db_name: str, db_config: dict) -> None: + """Process a single database's configuration.""" + # Database-level permissions block + db_perms = db_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + None, + db_perms, + f"permissions for {self.action} on {db_name}", + ) + + # Process tables + for table_name, table_config in (db_config.get("tables") or {}).items(): + self._process_table(db_name, table_name, table_config or {}) + + # Process queries + for query_name, query_config in (db_config.get("queries") or {}).items(): + self._process_query(db_name, query_name, query_config) + + # Database-level allow blocks + self._process_database_allow_blocks(db_name, db_config) + + def _process_table( + self, + db_name: str, + table_name: str, + table_config: dict, + ) -> None: + """Process a single table's configuration.""" + # Table-level permissions block + table_perms = table_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + table_name, + table_perms, + f"permissions for {self.action} on {db_name}/{table_name}", + ) + + # Table-level allow block (for view-table) + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + table_name, + table_config.get("allow"), + f"allow for {self.action} on {db_name}/{table_name}", + ) + + def _process_query( + self, + db_name: str, + query_name: str, + query_config: Any, + ) -> None: + """Process a single query's configuration.""" + # Query config can be a string (just SQL) or dict + if not isinstance(query_config, dict): + return + + # Query-level permissions block + query_perms = query_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + query_name, + query_perms, + f"permissions for {self.action} on {db_name}/{query_name}", + ) + + # Query-level allow block (for view-query) + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + query_name, + query_config.get("allow"), + f"allow for {self.action} on {db_name}/{query_name}", + ) + + def _process_database_allow_blocks( + self, + db_name: str, + db_config: dict, + ) -> None: + """Process database-level allow/allow_sql blocks.""" + # view-database allow block + if self.action == "view-database": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # execute-sql allow_sql block + if self.action == "execute-sql": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow_sql"), + f"allow_sql for {db_name}", + ) + + # view-table uses database-level allow for inheritance + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # view-query uses database-level allow for inheritance + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + def _process_root_allow_blocks(self) -> None: + """Process root-level allow/allow_sql blocks.""" + root_allow = self.config.get("allow") + + if self.action == "view-instance": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-instance", + ) + + if self.action == "view-database": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-database", + ) + + if self.action == "view-table": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-table", + ) + + if self.action == "view-query": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-query", + ) + + if self.action == "execute-sql": + self.add_allow_block_rule( + None, + None, + self.config.get("allow_sql"), + "allow_sql", + ) + + +@hookimpl(specname="permission_resources_sql") +async def config_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Apply permission rules from datasette.yaml configuration. + + This processes: + - permissions: blocks at root, database, table, and query levels + - allow: blocks for view-* actions + - allow_sql: blocks for execute-sql action + """ + processor = ConfigPermissionProcessor(datasette, actor, action) + result = processor.process() + + if result is None: + return [] + + return [result] diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py new file mode 100644 index 00000000..f5a6a270 --- /dev/null +++ b/datasette/default_permissions/defaults.py @@ -0,0 +1,70 @@ +""" +Default permission settings for Datasette. + +Provides default allow rules for standard view/execute actions. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +# Actions that are allowed by default (unless --default-deny is used) +DEFAULT_ALLOW_ACTIONS = frozenset( + { + "view-instance", + "view-database", + "view-database-download", + "view-table", + "view-query", + "execute-sql", + } +) + + +@hookimpl(specname="permission_resources_sql") +async def default_allow_sql_check( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Enforce the default_allow_sql setting. + + When default_allow_sql is false (the default), execute-sql is denied + unless explicitly allowed by config or other rules. + """ + if action == "execute-sql": + if not datasette.setting("default_allow_sql"): + return PermissionSQL.deny(reason="default_allow_sql is false") + + return None + + +@hookimpl(specname="permission_resources_sql") +async def default_action_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Provide default allow rules for standard view/execute actions. + + These defaults are skipped when datasette is started with --default-deny. + The restriction_sql mechanism (from actor_restrictions_sql) will still + filter these results if the actor has restrictions. + """ + if datasette.default_deny: + return None + + if action in DEFAULT_ALLOW_ACTIONS: + reason = f"default allow for {action}".replace("'", "''") + return PermissionSQL.allow(reason=reason) + + return None diff --git a/datasette/default_permissions/helpers.py b/datasette/default_permissions/helpers.py new file mode 100644 index 00000000..47e03569 --- /dev/null +++ b/datasette/default_permissions/helpers.py @@ -0,0 +1,85 @@ +""" +Shared helper utilities for default permission implementations. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette.permissions import PermissionSQL + + +def get_action_name_variants(datasette: "Datasette", action: str) -> Set[str]: + """ + Get all name variants for an action (full name and abbreviation). + + Example: + get_action_name_variants(ds, "view-table") -> {"view-table", "vt"} + """ + variants = {action} + action_obj = datasette.actions.get(action) + if action_obj and action_obj.abbr: + variants.add(action_obj.abbr) + return variants + + +def action_in_list(datasette: "Datasette", action: str, action_list: list) -> bool: + """Check if an action (or its abbreviation) is in a list.""" + return bool(get_action_name_variants(datasette, action).intersection(action_list)) + + +@dataclass +class PermissionRow: + """A single permission rule row.""" + + parent: Optional[str] + child: Optional[str] + allow: bool + reason: str + + +class PermissionRowCollector: + """Collects permission rows and converts them to PermissionSQL.""" + + def __init__(self, prefix: str = "row"): + self.rows: List[PermissionRow] = [] + self.prefix = prefix + + def add( + self, + parent: Optional[str], + child: Optional[str], + allow: Optional[bool], + reason: str, + if_not_none: bool = False, + ) -> None: + """Add a permission row. If if_not_none=True, only add if allow is not None.""" + if if_not_none and allow is None: + return + self.rows.append(PermissionRow(parent, child, allow, reason)) + + def to_permission_sql(self) -> Optional[PermissionSQL]: + """Convert collected rows to a PermissionSQL object.""" + if not self.rows: + return None + + parts = [] + params = {} + + for idx, row in enumerate(self.rows): + key = f"{self.prefix}_{idx}" + parts.append( + f"SELECT :{key}_parent AS parent, :{key}_child AS child, " + f":{key}_allow AS allow, :{key}_reason AS reason" + ) + params[f"{key}_parent"] = row.parent + params[f"{key}_child"] = row.child + params[f"{key}_allow"] = 1 if row.allow else 0 + params[f"{key}_reason"] = row.reason + + sql = "\nUNION ALL\n".join(parts) + return PermissionSQL(sql=sql, params=params) diff --git a/datasette/default_permissions/restrictions.py b/datasette/default_permissions/restrictions.py new file mode 100644 index 00000000..a22cd7e5 --- /dev/null +++ b/datasette/default_permissions/restrictions.py @@ -0,0 +1,195 @@ +""" +Actor restriction handling for Datasette permissions. + +This module handles the _r (restrictions) key in actor dictionaries, which +contains allowlists of resources the actor can access. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + +from .helpers import action_in_list, get_action_name_variants + + +@dataclass +class ActorRestrictions: + """Parsed actor restrictions from the _r key.""" + + global_actions: List[str] # _r.a - globally allowed actions + database_actions: dict # _r.d - {db_name: [actions]} + table_actions: dict # _r.r - {db_name: {table: [actions]}} + + @classmethod + def from_actor(cls, actor: Optional[dict]) -> Optional["ActorRestrictions"]: + """Parse restrictions from actor dict. Returns None if no restrictions.""" + if not actor: + return None + assert isinstance(actor, dict), "actor must be a dictionary" + + restrictions = actor.get("_r") + if restrictions is None: + return None + + return cls( + global_actions=restrictions.get("a", []), + database_actions=restrictions.get("d", {}), + table_actions=restrictions.get("r", {}), + ) + + def is_action_globally_allowed(self, datasette: "Datasette", action: str) -> bool: + """Check if action is in the global allowlist.""" + return action_in_list(datasette, action, self.global_actions) + + def get_allowed_databases(self, datasette: "Datasette", action: str) -> Set[str]: + """Get database names where this action is allowed.""" + allowed = set() + for db_name, db_actions in self.database_actions.items(): + if action_in_list(datasette, action, db_actions): + allowed.add(db_name) + return allowed + + def get_allowed_tables( + self, datasette: "Datasette", action: str + ) -> Set[Tuple[str, str]]: + """Get (database, table) pairs where this action is allowed.""" + allowed = set() + for db_name, tables in self.table_actions.items(): + for table_name, table_actions in tables.items(): + if action_in_list(datasette, action, table_actions): + allowed.add((db_name, table_name)) + return allowed + + +@hookimpl(specname="permission_resources_sql") +async def actor_restrictions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Handle actor restriction-based permission rules. + + When an actor has an "_r" key, it contains an allowlist of resources they + can access. This function returns restriction_sql that filters the final + results to only include resources in that allowlist. + + The _r structure: + { + "a": ["vi", "pd"], # Global actions allowed + "d": {"mydb": ["vt", "es"]}, # Database-level actions + "r": {"mydb": {"users": ["vt"]}} # Table-level actions + } + """ + if not actor: + return None + + restrictions = ActorRestrictions.from_actor(actor) + + if restrictions is None: + # No restrictions - all resources allowed + return [] + + # If globally allowed, no filtering needed + if restrictions.is_action_globally_allowed(datasette, action): + return [] + + # Build restriction SQL + allowed_dbs = restrictions.get_allowed_databases(datasette, action) + allowed_tables = restrictions.get_allowed_tables(datasette, action) + + # If nothing is allowed for this action, return empty-set restriction + if not allowed_dbs and not allowed_tables: + return [ + PermissionSQL( + params={"deny": f"actor restrictions: {action} not in allowlist"}, + restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", + ) + ] + + # Build UNION of allowed resources + selects = [] + params = {} + counter = 0 + + # Database-level entries (parent, NULL) - allows all children + for db_name in allowed_dbs: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, NULL AS child") + params[f"{key}_parent"] = db_name + + # Table-level entries (parent, child) + for db_name, table_name in allowed_tables: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, :{key}_child AS child") + params[f"{key}_parent"] = db_name + params[f"{key}_child"] = table_name + + restriction_sql = "\nUNION ALL\n".join(selects) + + return [PermissionSQL(params=params, restriction_sql=restriction_sql)] + + +def restrictions_allow_action( + datasette: "Datasette", + restrictions: dict, + action: str, + resource: Optional[str | Tuple[str, str]], +) -> bool: + """ + Check if restrictions allow the requested action on the requested resource. + + This is a synchronous utility function for use by other code that needs + to quickly check restriction allowlists. + + Args: + datasette: The Datasette instance + restrictions: The _r dict from an actor + action: The action name to check + resource: None for global, str for database, (db, table) tuple for table + + Returns: + True if allowed, False if denied + """ + # Does this action have an abbreviation? + to_check = get_action_name_variants(datasette, action) + + # Check global level (any resource) + all_allowed = restrictions.get("a") + if all_allowed is not None: + assert isinstance(all_allowed, list) + if to_check.intersection(all_allowed): + return True + + # Check database level + if resource: + if isinstance(resource, str): + database_name = resource + else: + database_name = resource[0] + database_allowed = restrictions.get("d", {}).get(database_name) + if database_allowed is not None: + assert isinstance(database_allowed, list) + if to_check.intersection(database_allowed): + return True + + # Check table/resource level + if resource is not None and not isinstance(resource, str) and len(resource) == 2: + database, table = resource + table_allowed = restrictions.get("r", {}).get(database, {}).get(table) + if table_allowed is not None: + assert isinstance(table_allowed, list) + if to_check.intersection(table_allowed): + return True + + # This action is not explicitly allowed, so reject it + return False diff --git a/datasette/default_permissions/root.py b/datasette/default_permissions/root.py new file mode 100644 index 00000000..4931f7ff --- /dev/null +++ b/datasette/default_permissions/root.py @@ -0,0 +1,29 @@ +""" +Root user permission handling for Datasette. + +Grants full permissions to the root user when --root flag is used. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +@hookimpl(specname="permission_resources_sql") +async def root_user_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], +) -> Optional[PermissionSQL]: + """ + Grant root user full permissions when --root flag is used. + """ + if not datasette.root_enabled: + return None + if actor is not None and actor.get("id") == "root": + return PermissionSQL.allow(reason="root user") diff --git a/datasette/default_permissions/tokens.py b/datasette/default_permissions/tokens.py new file mode 100644 index 00000000..474b0c23 --- /dev/null +++ b/datasette/default_permissions/tokens.py @@ -0,0 +1,95 @@ +""" +Token authentication for Datasette. + +Handles signed API tokens (dstok_ prefix). +""" + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +import itsdangerous + +from datasette import hookimpl + + +@hookimpl(specname="actor_from_request") +def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dict]: + """ + Authenticate requests using signed API tokens (dstok_ prefix). + + Token structure (signed JSON): + { + "a": "actor_id", # Actor ID + "t": 1234567890, # Timestamp (Unix epoch) + "d": 3600, # Optional: Duration in seconds + "_r": {...} # Optional: Restrictions + } + """ + prefix = "dstok_" + + # Check if tokens are enabled + if not datasette.setting("allow_signed_tokens"): + return None + + max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") + + # Get authorization header + authorization = request.headers.get("authorization") + if not authorization: + return None + if not authorization.startswith("Bearer "): + return None + + token = authorization[len("Bearer ") :] + if not token.startswith(prefix): + return None + + # Remove prefix and verify signature + token = token[len(prefix) :] + try: + decoded = datasette.unsign(token, namespace="token") + except itsdangerous.BadSignature: + return None + + # Validate timestamp + if "t" not in decoded: + return None + created = decoded["t"] + if not isinstance(created, int): + return None + + # Handle duration/expiry + duration = decoded.get("d") + if duration is not None and not isinstance(duration, int): + return None + + # Apply max TTL if configured + if (duration is None and max_signed_tokens_ttl) or ( + duration is not None + and max_signed_tokens_ttl + and duration > max_signed_tokens_ttl + ): + duration = max_signed_tokens_ttl + + # Check expiry + if duration: + if time.time() - created > duration: + return None + + # Build actor dict + actor = {"id": decoded["a"], "token": "dstok"} + + # Copy restrictions if present + if "_r" in decoded: + actor["_r"] = decoded["_r"] + + # Add expiry timestamp if applicable + if duration: + actor["token_expires"] = created + duration + + return actor diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 6def3840..e2dd92b8 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -1323,6 +1323,20 @@ async def test_actor_restrictions( ("dbname2", "tablename"), False, ), + # Table-level restriction allows access to that specific table + ( + {"r": {"dbname": {"tablename": ["view-table"]}}}, + "view-table", + ("dbname", "tablename"), + True, + ), + # But not to a different table in the same database + ( + {"r": {"dbname": {"tablename": ["view-table"]}}}, + "view-table", + ("dbname", "other_table"), + False, + ), ), ) async def test_restrictions_allow_action(restrictions, action, resource, expected): @@ -1653,3 +1667,48 @@ async def test_permission_check_view_requires_debug_permission(): data = response.json() assert data["action"] == "view-instance" assert data["allowed"] is True + + +@pytest.mark.asyncio +async def test_root_allow_block_with_table_restricted_actor(): + """ + Test that root-level allow: blocks are processed for actors with + table-level restrictions. + + This covers the case in config.py is_in_restriction_allowlist() where + parent=None, child=None and actor has table restrictions but not global. + """ + from datasette.resources import TableResource + + # Config with root-level allow block that denies non-admin users + ds = Datasette( + config={ + "allow": {"id": "admin"}, # Root-level allow block + } + ) + await ds.invoke_startup() + db = ds.add_memory_database("mydb") + await db.execute_write("create table t1 (id integer primary key)") + await ds.client.get("/") # Trigger catalog refresh + + # Actor with table-level restrictions only (not global) + actor = {"id": "user", "_r": {"r": {"mydb": {"t1": ["view-table"]}}}} + + # The root-level allow: {id: admin} should be processed and deny this user + # because they're not "admin", even though they have table restrictions + result = await ds.allowed( + action="view-table", + resource=TableResource("mydb", "t1"), + actor=actor, + ) + # Should be False because root allow: {id: admin} denies non-admin users + assert result is False + + # But admin with same restrictions should be allowed + admin_actor = {"id": "admin", "_r": {"r": {"mydb": {"t1": ["view-table"]}}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("mydb", "t1"), + actor=admin_actor, + ) + assert result is True From 3eca3ad6d45c94da16a09b51a648052bbeeeaf2f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:16:39 -0800 Subject: [PATCH 13/53] Better recipe for 'just docs' --- Justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Justfile b/Justfile index abb134a6..a47662c3 100644 --- a/Justfile +++ b/Justfile @@ -29,7 +29,7 @@ export DATASETTE_SECRET := "not_a_secret" # Serve live docs on localhost:8000 @docs: cog blacken-docs - uv sync --extra docs && cd docs && uv run make livehtml + uv run --extra docs make -C docs livehtml # Build docs as static HTML @docs-build: cog blacken-docs From 03ab3592083c6677bde58f1bd20002963c980344 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:19:48 -0800 Subject: [PATCH 14/53] tool.uv.package = true --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 4f487458..8ec1c6b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,3 +93,6 @@ datasette = ["templates/*.html"] [tool.setuptools.dynamic] version = {attr = "datasette.version.__version__"} + +[tool.uv] +package = true From 2ca00b6c75b165c3318d06e6dc6eb228b9b60338 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:20:43 -0800 Subject: [PATCH 15/53] Release 1.0a23 Refs #2605, #2599 --- datasette/version.py | 2 +- docs/changelog.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index d0ff6ab1..fff37a72 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a22" +__version__ = "1.0a23" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index feba9390..feba7e86 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Changelog ========= +.. _v1_0_a23: + +1.0a23 (2025-12-02) +------------------- + +- Fix for bug where a stale database entry in ``internal.db`` could cause a 500 error on the homepage. (:issue:`2605`) +- Cosmetic improvement to ``/-/actions`` page. (:issue:`2599`) + .. _v1_0_a22: 1.0a22 (2025-11-13) From 1d4448fc5603f479f11b37b9da0ee11c2b1a19e4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 4 Dec 2025 21:36:39 -0800 Subject: [PATCH 16/53] Use subtests in tests/test_docs.py (#2609) Closes #2608 --- pyproject.toml | 2 +- tests/test_docs.py | 53 +++++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8ec1c6b7..f3053447 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ docs = [ "ruamel.yaml", ] test = [ - "pytest>=5.2.2", + "pytest>=9", "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", "beautifulsoup4>=4.8.1", diff --git a/tests/test_docs.py b/tests/test_docs.py index 953224dd..b94a6f23 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -28,9 +28,10 @@ def settings_headings(): return get_headings((docs_path / "settings.rst").read_text(), "~") -@pytest.mark.parametrize("setting", app.SETTINGS) -def test_settings_are_documented(settings_headings, setting): - assert setting.name in settings_headings +def test_settings_are_documented(settings_headings, subtests): + for setting in app.SETTINGS: + with subtests.test(setting=setting.name): + assert setting.name in settings_headings @pytest.fixture(scope="session") @@ -38,21 +39,21 @@ def plugin_hooks_content(): return (docs_path / "plugin_hooks.rst").read_text() -@pytest.mark.parametrize( - "plugin", [name for name in dir(app.pm.hook) if not name.startswith("_")] -) -def test_plugin_hooks_are_documented(plugin, plugin_hooks_content): +def test_plugin_hooks_are_documented(plugin_hooks_content, subtests): headings = set() headings.update(get_headings(plugin_hooks_content, "-")) headings.update(get_headings(plugin_hooks_content, "~")) - assert plugin in headings - hook_caller = getattr(app.pm.hook, plugin) - arg_names = [a for a in hook_caller.spec.argnames if a != "__multicall__"] - # Check for plugin_name(arg1, arg2, arg3) - expected = f"{plugin}({', '.join(arg_names)})" - assert ( - expected in plugin_hooks_content - ), f"Missing from plugin hook documentation: {expected}" + plugins = [name for name in dir(app.pm.hook) if not name.startswith("_")] + for plugin in plugins: + with subtests.test(plugin=plugin): + assert plugin in headings + hook_caller = getattr(app.pm.hook, plugin) + arg_names = [a for a in hook_caller.spec.argnames if a != "__multicall__"] + # Check for plugin_name(arg1, arg2, arg3) + expected = f"{plugin}({', '.join(arg_names)})" + assert ( + expected in plugin_hooks_content + ), f"Missing from plugin hook documentation: {expected}" @pytest.fixture(scope="session") @@ -68,9 +69,11 @@ def documented_views(): return view_labels -@pytest.mark.parametrize("view_class", [v for v in dir(app) if v.endswith("View")]) -def test_view_classes_are_documented(documented_views, view_class): - assert view_class in documented_views +def test_view_classes_are_documented(documented_views, subtests): + view_classes = [v for v in dir(app) if v.endswith("View")] + for view_class in view_classes: + with subtests.test(view_class=view_class): + assert view_class in documented_views @pytest.fixture(scope="session") @@ -85,9 +88,10 @@ def documented_table_filters(): } -@pytest.mark.parametrize("filter", [f.key for f in Filters._filters]) -def test_table_filters_are_documented(documented_table_filters, filter): - assert filter in documented_table_filters +def test_table_filters_are_documented(documented_table_filters, subtests): + for f in Filters._filters: + with subtests.test(filter=f.key): + assert f.key in documented_table_filters @pytest.fixture(scope="session") @@ -101,9 +105,10 @@ def documented_fns(): } -@pytest.mark.parametrize("fn", utils.functions_marked_as_documented) -def test_functions_marked_with_documented_are_documented(documented_fns, fn): - assert fn.__name__ in documented_fns +def test_functions_marked_with_documented_are_documented(documented_fns, subtests): + for fn in utils.functions_marked_as_documented: + with subtests.test(fn=fn.__name__): + assert fn.__name__ in documented_fns def test_rst_heading_underlines_match_title_length(): From 4cbdfcc07d36c36ac77243d586836b91f90be67c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 11 Dec 2025 17:32:58 -0800 Subject: [PATCH 17/53] dependency-groups and uv (#2611) * dependency-groups and uv, closes #2610 * New .readthedocs config for --group dev --- .github/workflows/deploy-latest.yml | 3 +- .github/workflows/publish.yml | 4 +- .github/workflows/spellcheck.yml | 2 +- .github/workflows/test-coverage.yml | 2 +- .github/workflows/test-sqlite-support.yml | 2 +- .github/workflows/test.yml | 5 +- .readthedocs.yaml | 25 ++++----- Justfile | 8 +-- docs/contributing.rst | 65 ++++++++++------------- pyproject.toml | 28 +++++----- 10 files changed, 67 insertions(+), 77 deletions(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 9f53b01e..7349a1ab 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -24,8 +24,7 @@ jobs: - name: Install Python dependencies run: | python -m pip install --upgrade pip - python -m pip install -e .[test] - python -m pip install -e .[docs] + python -m pip install . --group dev python -m pip install sphinx-to-sqlite==0.1a1 - name: Run tests if: ${{ github.ref == 'refs/heads/main' }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e94d0bdd..2e8cea9c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -23,7 +23,7 @@ jobs: cache-dependency-path: pyproject.toml - name: Install dependencies run: | - pip install -e '.[test]' + pip install . --group dev - name: Run tests run: | pytest @@ -65,7 +65,7 @@ jobs: cache-dependency-path: pyproject.toml - name: Install dependencies run: | - python -m pip install -e .[docs] + python -m pip install . --group dev python -m pip install sphinx-to-sqlite==0.1a1 - name: Build docs.db run: |- diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml index 7c5370ce..d42ae96b 100644 --- a/.github/workflows/spellcheck.yml +++ b/.github/workflows/spellcheck.yml @@ -18,7 +18,7 @@ jobs: cache-dependency-path: '**/pyproject.toml' - name: Install dependencies run: | - pip install -e '.[docs]' + pip install . --group dev - name: Check spelling run: | codespell README.md --ignore-words docs/codespell-ignore-words.txt diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index 8d73b64d..1b3d2f2c 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -25,7 +25,7 @@ jobs: - name: Install Python dependencies run: | python -m pip install --upgrade pip - python -m pip install -e .[test] + python -m pip install . --group dev python -m pip install pytest-cov - name: Run tests run: |- diff --git a/.github/workflows/test-sqlite-support.yml b/.github/workflows/test-sqlite-support.yml index 76ea138a..c81a3c0b 100644 --- a/.github/workflows/test-sqlite-support.yml +++ b/.github/workflows/test-sqlite-support.yml @@ -45,7 +45,7 @@ jobs: (cd tests && gcc ext.c -fPIC -shared -o ext.so) - name: Install dependencies run: | - pip install -e '.[test]' + pip install . --group dev pip freeze - name: Run tests run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1e5e03d2..3790c788 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,7 +25,7 @@ jobs: (cd tests && gcc ext.c -fPIC -shared -o ext.so) - name: Install dependencies run: | - pip install -e '.[test]' + pip install . --group dev pip freeze - name: Run tests run: | @@ -33,9 +33,6 @@ jobs: pytest -m "serial" # And the test that exceeds a localhost HTTPS server tests/test_datasette_https_server.sh - - name: Install docs dependencies - run: | - pip install -e '.[docs]' - name: Black run: black --check . - name: Check if cog needs to be run diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 5b30e75a..8b3e54aa 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,16 +1,17 @@ version: 2 -build: - os: ubuntu-20.04 - tools: - python: "3.11" - sphinx: - configuration: docs/conf.py + configuration: docs/conf.py -python: - install: - - method: pip - path: . - extra_requirements: - - docs +build: + os: ubuntu-24.04 + tools: + python: "3.13" + jobs: + install: + - pip install --upgrade pip + - pip install . --group dev + +formats: +- pdf +- epub diff --git a/Justfile b/Justfile index a47662c3..8c50e5ca 100644 --- a/Justfile +++ b/Justfile @@ -5,7 +5,7 @@ export DATASETTE_SECRET := "not_a_secret" # Setup project @init: - uv sync --extra test --extra docs + uv sync # Run pytest with supplied options @test *options: init @@ -21,15 +21,15 @@ export DATASETTE_SECRET := "not_a_secret" @lint: codespell uv run black . --check uv run flake8 - uv run --extra test cog --check README.md docs/*.rst + uv run cog --check README.md docs/*.rst # Rebuild docs with cog @cog: - uv run --extra test cog -r README.md docs/*.rst + uv run cog -r README.md docs/*.rst # Serve live docs on localhost:8000 @docs: cog blacken-docs - uv run --extra docs make -C docs livehtml + uv run make -C docs livehtml # Build docs as static HTML @docs-build: cog blacken-docs diff --git a/docs/contributing.rst b/docs/contributing.rst index 6be0247c..3d41a125 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -32,17 +32,18 @@ If you want to get started without creating your own fork, you can do this inste git clone git@github.com:simonw/datasette -The next step is to create a virtual environment for your project and use it to install Datasette's dependencies:: +The quickest way to set up a development environment is to use `uv `__. From the repository root you can run the tests directly:: cd datasette - # Create a virtual environment in ./venv - python3 -m venv ./venv - # Now activate the virtual environment, so pip can install into it - source venv/bin/activate - # Install Datasette and its testing dependencies - python3 -m pip install -e '.[test]' + uv run pytest -That last line does most of the work: ``pip install -e`` means "install this package in a way that allows me to edit the source code in place". The ``.[test]`` option means "install the optional testing dependencies as well". +This will create a local ``.venv/`` and install Datasette plus its development dependencies. + +If you prefer to manage your own virtual environment with pip, create and activate one and then install the development dependency group:: + + python3 -m venv ./venv + source venv/bin/activate + python3 -m pip install -e . --group dev .. _contributing_running_tests: @@ -51,15 +52,15 @@ Running the tests Once you have done this, you can run the Datasette unit tests from inside your ``datasette/`` directory using `pytest `__ like so:: - pytest + uv run pytest You can run the tests faster using multiple CPU cores with `pytest-xdist `__ like this:: - pytest -n auto -m "not serial" + uv run pytest -n auto -m "not serial" ``-n auto`` detects the number of available cores automatically. The ``-m "not serial"`` skips tests that don't work well in a parallel test environment. You can run those tests separately like so:: - pytest -m "serial" + uv run pytest -m "serial" .. _contributing_using_fixtures: @@ -72,11 +73,11 @@ You're going to need at least one SQLite database. A quick way to get started is You can create a copy of that database by running this command:: - python tests/fixtures.py fixtures.db + uv run python tests/fixtures.py fixtures.db Now you can run Datasette against the new fixtures database like so:: - datasette fixtures.db + uv run datasette fixtures.db This will start a server at ``http://127.0.0.1:8001/``. @@ -84,15 +85,14 @@ Any changes you make in the ``datasette/templates`` or ``datasette/static`` fold If you want to change Datasette's Python code you can use the ``--reload`` option to cause Datasette to automatically reload any time the underlying code changes:: - datasette --reload fixtures.db + uv run datasette --reload fixtures.db You can also use the ``fixtures.py`` script to recreate the testing version of ``metadata.json`` used by the unit tests. To do that:: - python tests/fixtures.py fixtures.db fixtures-metadata.json - + uv run python tests/fixtures.py fixtures.db fixtures-metadata.json Or to output the plugins used by the tests, run this:: - python tests/fixtures.py fixtures.db fixtures-metadata.json fixtures-plugins + uv run python tests/fixtures.py fixtures.db fixtures-metadata.json fixtures-plugins Test tables written to fixtures.db - metadata written to fixtures-metadata.json Wrote plugin: fixtures-plugins/register_output_renderer.py @@ -103,7 +103,7 @@ Or to output the plugins used by the tests, run this:: Then run Datasette like this:: - datasette fixtures.db -m fixtures-metadata.json --plugins-dir=fixtures-plugins/ + uv run datasette fixtures.db -m fixtures-metadata.json --plugins-dir=fixtures-plugins/ .. _contributing_debugging: @@ -114,11 +114,11 @@ Any errors that occur while Datasette is running while display a stack trace on You can tell Datasette to open an interactive ``pdb`` (or ``ipdb``, if present) debugger session if an error occurs using the ``--pdb`` option:: - datasette --pdb fixtures.db + uv run datasette --pdb fixtures.db For `ipdb `__, first run this:: - datasette install ipdb + uv run datasette install ipdb .. _contributing_formatting: @@ -145,9 +145,9 @@ Or run both at the same time:: Running Black ~~~~~~~~~~~~~ -Black will be installed when you run ``pip install -e '.[test]'``. To test that your code complies with Black, run the following in your root ``datasette`` repository checkout:: +Black is installed as part of the development dependency group. To test that your code complies with Black, run the following in your root ``datasette`` repository checkout:: - black . --check + uv run black . --check :: @@ -156,7 +156,7 @@ Black will be installed when you run ``pip install -e '.[test]'``. To test that If any of your code does not conform to Black you can run this to automatically fix those problems:: - black . + uv run black . :: @@ -171,7 +171,7 @@ blacken-docs The `blacken-docs `__ command applies Black formatting rules to code examples in the documentation. Run it like this:: - blacken-docs -l 60 docs/*.rst + uv run blacken-docs -l 60 docs/*.rst .. _contributing_formatting_prettier: @@ -208,17 +208,10 @@ Datasette's documentation lives in the ``docs/`` directory and is deployed autom The documentation is written using reStructuredText. You may find this article on `The subset of reStructuredText worth committing to memory `__ useful. -You can build it locally by installing ``sphinx`` and ``sphinx_rtd_theme`` in your Datasette development environment and then running ``make html`` directly in the ``docs/`` directory:: +You can build it locally once you have installed the development dependency group (which includes Sphinx and related tools) and then running ``make html`` directly in the ``docs/`` directory:: - # You may first need to activate your virtual environment: - source venv/bin/activate - - # Install the dependencies needed to build the docs - pip install -e .[docs] - - # Now build the docs cd docs/ - make html + uv run make html This will create the HTML version of the documentation in ``docs/_build/html``. You can open it in your browser like so:: @@ -228,9 +221,9 @@ Any time you make changes to a ``.rst`` file you can re-run ``make html`` to upd For added productivity, you can use use `sphinx-autobuild `__ to run Sphinx in auto-build mode. This will run a local webserver serving the docs that automatically rebuilds them and refreshes the page any time you hit save in your editor. -``sphinx-autobuild`` will have been installed when you ran ``pip install -e .[docs]``. In your ``docs/`` directory you can start the server by running the following:: +``sphinx-autobuild`` is included in the development dependency group. In your ``docs/`` directory you can start the server by running the following:: - make livehtml + uv run make livehtml Now browse to ``http://localhost:8000/`` to view the documentation. Any edits you make should be instantly reflected in your browser. @@ -243,7 +236,7 @@ Some pages of documentation (in particular the :ref:`cli_reference`) are automat To update these pages, run the following command:: - cog -r docs/*.rst + uv run cog -r docs/*.rst .. _contributing_continuous_deployment: diff --git a/pyproject.toml b/pyproject.toml index f3053447..87884341 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,20 +55,8 @@ CI = "https://github.com/simonw/datasette/actions?query=workflow%3ATest" [project.scripts] datasette = "datasette.cli:cli" -[project.optional-dependencies] -docs = [ - "Sphinx==7.4.7", - "furo==2025.9.25", - "sphinx-autobuild", - "codespell>=2.2.5", - "blacken-docs", - "sphinx-copybutton", - "sphinx-inline-tabs", - "myst-parser", - "sphinx-markdown-builder", - "ruamel.yaml", -] -test = [ +[dependency-groups] +dev = [ "pytest>=9", "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", @@ -78,7 +66,19 @@ test = [ "pytest-timeout>=1.4.2", "trustme>=0.7", "cogapp>=3.3.0", + # docs + "Sphinx==7.4.7", + "furo==2025.9.25", + "sphinx-autobuild", + "codespell>=2.2.5", + "sphinx-copybutton", + "sphinx-inline-tabs", + "myst-parser", + "sphinx-markdown-builder", + "ruamel.yaml", ] + +[project.optional-dependencies] rich = ["rich"] [build-system] From 3b4c7e1abed15c8343a46ff9bc0a8171511a3624 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 12 Dec 2025 21:43:00 -0800 Subject: [PATCH 18/53] {"ok": true} on row API, to be consistent with table --- datasette/views/row.py | 1 + tests/test_api.py | 1 + 2 files changed, 2 insertions(+) diff --git a/datasette/views/row.py b/datasette/views/row.py index c9b74b12..4f896632 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -95,6 +95,7 @@ class RowView(DataView): } data = { + "ok": True, "database": database, "table": table, "rows": rows, diff --git a/tests/test_api.py b/tests/test_api.py index 859c5809..16e1d8e6 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -710,6 +710,7 @@ async def test_invalid_custom_sql(ds_client): async def test_row(ds_client): response = await ds_client.get("/fixtures/simple_primary_key/1.json?_shape=objects") assert response.status_code == 200 + assert response.json()["ok"] is True assert response.json()["rows"] == [{"id": 1, "content": "hello"}] From 232a404743ad007285b02838c222845ee4d39cbd Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 12 Dec 2025 22:18:35 -0800 Subject: [PATCH 19/53] Switch searchable_fts test table to FTS5, closes #2613 --- tests/fixtures.py | 5 +-- tests/test_api.py | 68 ++++++++++++-------------------- tests/test_internals_database.py | 6 +-- 3 files changed, 31 insertions(+), 48 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 8d600c9b..01c501f2 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -536,9 +536,8 @@ INSERT INTO searchable_tags (searchable_id, tag) VALUES ; CREATE VIRTUAL TABLE "searchable_fts" - USING FTS4 (text1, text2, [name with . and spaces], content="searchable"); -INSERT INTO "searchable_fts" (rowid, text1, text2, [name with . and spaces]) - SELECT rowid, text1, text2, [name with . and spaces] FROM searchable; + USING FTS5 (text1, text2, [name with . and spaces], content="searchable", content_rowid="pk"); +INSERT INTO "searchable_fts" (searchable_fts) VALUES ('rebuild'); CREATE TABLE [select] ( [group] text, diff --git a/tests/test_api.py b/tests/test_api.py index 16e1d8e6..008fc42b 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -515,22 +515,13 @@ async def test_database_page(ds_client): "private": False, }, { - "columns": Either( - [ - "text1", - "text2", - "name with . and spaces", - "searchable_fts", - "docid", - "__langid", - ], - # Get tests to pass on SQLite 3.25 as well - [ - "text1", - "text2", - "name with . and spaces", - ], - ), + "columns": [ + "text1", + "text2", + "name with . and spaces", + "searchable_fts", + "rank", + ], "count": 2, "foreign_keys": {"incoming": [], "outgoing": []}, "fts_table": "searchable_fts", @@ -540,26 +531,9 @@ async def test_database_page(ds_client): "private": False, }, { - "name": "searchable_fts_docsize", - "columns": ["docid", "size"], - "primary_keys": ["docid"], - "count": 2, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "searchable_fts_segdir", - "columns": [ - "level", - "idx", - "start_block", - "leaves_end_block", - "end_block", - "root", - ], - "primary_keys": ["level", "idx"], + "name": "searchable_fts_config", + "columns": ["k", "v"], + "primary_keys": ["k"], "count": 1, "hidden": True, "fts_table": None, @@ -567,19 +541,29 @@ async def test_database_page(ds_client): "private": False, }, { - "name": "searchable_fts_segments", - "columns": ["blockid", "block"], - "primary_keys": ["blockid"], - "count": 0, + "name": "searchable_fts_data", + "columns": ["id", "block"], + "primary_keys": ["id"], + "count": 3, "hidden": True, "fts_table": None, "foreign_keys": {"incoming": [], "outgoing": []}, "private": False, }, { - "name": "searchable_fts_stat", - "columns": ["id", "value"], + "name": "searchable_fts_docsize", + "columns": ["id", "sz"], "primary_keys": ["id"], + "count": 2, + "hidden": True, + "fts_table": None, + "foreign_keys": {"incoming": [], "outgoing": []}, + "private": False, + }, + { + "name": "searchable_fts_idx", + "columns": ["segid", "term", "pgno"], + "primary_keys": ["segid", "term"], "count": 1, "hidden": True, "fts_table": None, diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 4a078f75..d2e06073 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -436,10 +436,10 @@ async def test_table_names(db): "searchable", "searchable_tags", "searchable_fts", - "searchable_fts_segments", - "searchable_fts_segdir", + "searchable_fts_data", + "searchable_fts_idx", "searchable_fts_docsize", - "searchable_fts_stat", + "searchable_fts_config", "select", "infinity", "facet_cities", From 97496d5a672c78271735dd77abde3248eea8b967 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 21 Dec 2025 19:52:49 -0800 Subject: [PATCH 20/53] ?_extra=render_cells for tables, refs #2619 --- datasette/views/table.py | 31 ++++++++++++++++++++ tests/test_table_api.py | 62 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/datasette/views/table.py b/datasette/views/table.py index 007c0c85..c8f209d6 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1492,6 +1492,36 @@ async def table_view_data( async def extra_display_rows(run_display_columns_and_rows): return run_display_columns_and_rows["rows"] + async def extra_render_cells(): + "Rendered HTML for each cell using the render_cell plugin hook" + columns = [col[0] for col in results.description] + rendered_rows = [] + for row in rows: + rendered_row = {} + for value, column in zip(row, columns): + # Call render_cell plugin hook + plugin_display_value = None + for candidate in pm.hook.render_cell( + row=row, + value=value, + column=column, + table=table_name, + database=database_name, + datasette=datasette, + request=request, + ): + candidate = await await_me_maybe(candidate) + if candidate is not None: + plugin_display_value = candidate + break + if plugin_display_value: + rendered_row[column] = str(plugin_display_value) + else: + # Default: convert value to string + rendered_row[column] = "" if value is None else str(value) + rendered_rows.append(rendered_row) + return rendered_rows + async def extra_query(): "Details of the underlying SQL query" return { @@ -1678,6 +1708,7 @@ async def table_view_data( run_display_columns_and_rows, extra_display_columns, extra_display_rows, + extra_render_cells, extra_debug, extra_request, extra_query, diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 653679e4..d5a8ca41 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1383,3 +1383,65 @@ async def test_table_extras(ds_client, extra, expected_json): ) assert response.status_code == 200 assert response.json() == expected_json + + +@pytest.mark.asyncio +async def test_extra_render_cells(): + """Test that _extra=render_cells returns rendered HTML from render_cell plugin hook""" + from datasette import hookimpl + from datasette.app import Datasette + + class TestRenderCellPlugin: + __name__ = "TestRenderCellPlugin" + + @hookimpl + def render_cell(self, value, column, table, database): + # Only modify cells in our test table + if table == "test_render" and column == "name": + return f"{value}" + return None + + ds = Datasette(memory=True) + await ds.invoke_startup() + db = ds.add_memory_database("test") + await db.execute_write( + "create table test_render (id integer primary key, name text)" + ) + await db.execute_write("insert into test_render values (1, 'Alice')") + await db.execute_write("insert into test_render values (2, 'Bob')") + + # Register our test plugin + ds.pm.register(TestRenderCellPlugin(), name="TestRenderCellPlugin") + + try: + # Request with _extra=render_cells + response = await ds.client.get("/test/test_render.json?_extra=render_cells") + assert response.status_code == 200 + data = response.json() + + # Verify the response structure + assert "render_cells" in data + assert "rows" in data + + # render_cells should be a list of rows, each row being a dict of column -> rendered HTML + render_cells = data["render_cells"] + assert len(render_cells) == 2 + + # First row: id=1, name='Alice' + # The 'name' column should be rendered by our plugin as Alice + assert render_cells[0]["name"] == "Alice" + # The 'id' column should use default rendering (just the value as string) + assert render_cells[0]["id"] == "1" + + # Second row: id=2, name='Bob' + assert render_cells[1]["name"] == "Bob" + assert render_cells[1]["id"] == "2" + + # The regular rows should still contain raw values + assert data["rows"] == [ + {"id": 1, "name": "Alice"}, + {"id": 2, "name": "Bob"}, + ] + + finally: + ds.pm.unregister(name="TestRenderCellPlugin") From eae94dc2c3db39ac2574a1f6394d67f1f07cc9fc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 21 Dec 2025 20:03:10 -0800 Subject: [PATCH 21/53] Initial render_cell and foreign_key_tables extras for row Closes #2619, refs #2050 --- datasette/views/row.py | 39 +++++++++++++++++++++++++++- datasette/views/table.py | 4 +-- tests/test_api.py | 56 ++++++++++++++++++++++++++++++++++++++++ tests/test_table_api.py | 28 ++++++++++---------- 4 files changed, 111 insertions(+), 16 deletions(-) diff --git a/datasette/views/row.py b/datasette/views/row.py index 4f896632..077c33c2 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -12,7 +12,7 @@ from datasette.utils import ( from datasette.plugins import pm import json import sqlite_utils -from .table import display_columns_and_rows +from .table import display_columns_and_rows, _get_extras class RowView(DataView): @@ -104,11 +104,48 @@ class RowView(DataView): "primary_key_values": pk_values, } + # Handle _extra parameter (new style) + extras = _get_extras(request) + + # Also support legacy _extras parameter for backward compatibility if "foreign_key_tables" in (request.args.get("_extras") or "").split(","): + extras.add("foreign_key_tables") + + # Process extras + if "foreign_key_tables" in extras: data["foreign_key_tables"] = await self.foreign_key_tables( database, table, pk_values ) + if "render_cell" in extras: + # Call render_cell plugin hook for each cell + rendered_rows = [] + for row in rows: + rendered_row = {} + for value, column in zip(row, columns): + # Call render_cell plugin hook + plugin_display_value = None + for candidate in pm.hook.render_cell( + row=row, + value=value, + column=column, + table=table, + database=database, + datasette=self.ds, + request=request, + ): + candidate = await await_me_maybe(candidate) + if candidate is not None: + plugin_display_value = candidate + break + if plugin_display_value: + rendered_row[column] = str(plugin_display_value) + else: + # Default: convert value to string + rendered_row[column] = "" if value is None else str(value) + rendered_rows.append(rendered_row) + data["render_cell"] = rendered_rows + return ( data, template_data, diff --git a/datasette/views/table.py b/datasette/views/table.py index c8f209d6..9a3ae69f 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1492,7 +1492,7 @@ async def table_view_data( async def extra_display_rows(run_display_columns_and_rows): return run_display_columns_and_rows["rows"] - async def extra_render_cells(): + async def extra_render_cell(): "Rendered HTML for each cell using the render_cell plugin hook" columns = [col[0] for col in results.description] rendered_rows = [] @@ -1708,7 +1708,7 @@ async def table_view_data( run_display_columns_and_rows, extra_display_columns, extra_display_rows, - extra_render_cells, + extra_render_cell, extra_debug, extra_request, extra_query, diff --git a/tests/test_api.py b/tests/test_api.py index 008fc42b..1571fd5d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -752,6 +752,62 @@ async def test_row_foreign_key_tables(ds_client): ] +@pytest.mark.asyncio +async def test_row_extra_render_cell(): + """Test that _extra=render_cell returns rendered HTML from render_cell plugin hook on row pages""" + from datasette import hookimpl + from datasette.app import Datasette + + class TestRenderCellPlugin: + __name__ = "TestRenderCellPlugin" + + @hookimpl + def render_cell(self, value, column, table, database): + # Only modify cells in our test table + if table == "test_render" and column == "name": + return f"{value}" + return None + + ds = Datasette(memory=True) + await ds.invoke_startup() + db = ds.add_memory_database("test_row_render") + await db.execute_write( + "create table test_render (id integer primary key, name text)" + ) + await db.execute_write("insert into test_render values (1, 'Alice')") + + # Register our test plugin + ds.pm.register(TestRenderCellPlugin(), name="TestRenderCellPlugin") + + try: + # Request row with _extra=render_cell + response = await ds.client.get( + "/test_row_render/test_render/1.json?_extra=render_cell" + ) + assert response.status_code == 200 + data = response.json() + + # Verify the response structure + assert "render_cell" in data + assert "rows" in data + + # render_cell should be a list with one row (since this is a row page) + render_cell = data["render_cell"] + assert len(render_cell) == 1 + + # The row: id=1, name='Alice' + # The 'name' column should be rendered by our plugin as Alice + assert render_cell[0]["name"] == "Alice" + # The 'id' column should use default rendering (just the value as string) + assert render_cell[0]["id"] == "1" + + # The regular rows should still contain raw values + assert data["rows"] == [{"id": 1, "name": "Alice"}] + + finally: + ds.pm.unregister(name="TestRenderCellPlugin") + + def test_databases_json(app_client_two_attached_databases_one_immutable): response = app_client_two_attached_databases_one_immutable.get("/-/databases.json") databases = response.json diff --git a/tests/test_table_api.py b/tests/test_table_api.py index d5a8ca41..25419bb8 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1386,8 +1386,8 @@ async def test_table_extras(ds_client, extra, expected_json): @pytest.mark.asyncio -async def test_extra_render_cells(): - """Test that _extra=render_cells returns rendered HTML from render_cell plugin hook""" +async def test_extra_render_cell(): + """Test that _extra=render_cell returns rendered HTML from render_cell plugin hook""" from datasette import hookimpl from datasette.app import Datasette @@ -1403,7 +1403,7 @@ async def test_extra_render_cells(): ds = Datasette(memory=True) await ds.invoke_startup() - db = ds.add_memory_database("test") + db = ds.add_memory_database("test_table_render") await db.execute_write( "create table test_render (id integer primary key, name text)" ) @@ -1414,28 +1414,30 @@ async def test_extra_render_cells(): ds.pm.register(TestRenderCellPlugin(), name="TestRenderCellPlugin") try: - # Request with _extra=render_cells - response = await ds.client.get("/test/test_render.json?_extra=render_cells") + # Request with _extra=render_cell + response = await ds.client.get( + "/test_table_render/test_render.json?_extra=render_cell" + ) assert response.status_code == 200 data = response.json() # Verify the response structure - assert "render_cells" in data + assert "render_cell" in data assert "rows" in data - # render_cells should be a list of rows, each row being a dict of column -> rendered HTML - render_cells = data["render_cells"] - assert len(render_cells) == 2 + # render_cell should be a list of rows, each row being a dict of column -> rendered HTML + render_cell = data["render_cell"] + assert len(render_cell) == 2 # First row: id=1, name='Alice' # The 'name' column should be rendered by our plugin as Alice - assert render_cells[0]["name"] == "Alice" + assert render_cell[0]["name"] == "Alice" # The 'id' column should use default rendering (just the value as string) - assert render_cells[0]["id"] == "1" + assert render_cell[0]["id"] == "1" # Second row: id=2, name='Bob' - assert render_cells[1]["name"] == "Bob" - assert render_cells[1]["id"] == "2" + assert render_cell[1]["name"] == "Bob" + assert render_cell[1]["id"] == "2" # The regular rows should still contain raw values assert data["rows"] == [ From 6fede23a2ebb586c9f5dd6159907e259ff8f3082 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 21 Dec 2025 20:18:26 -0800 Subject: [PATCH 22/53] Only return render_coll columns that differ from default, refs #2619 --- datasette/views/row.py | 3 --- datasette/views/table.py | 3 --- tests/test_api.py | 5 +++-- tests/test_table_api.py | 7 ++++--- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/datasette/views/row.py b/datasette/views/row.py index 077c33c2..718ee00c 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -140,9 +140,6 @@ class RowView(DataView): break if plugin_display_value: rendered_row[column] = str(plugin_display_value) - else: - # Default: convert value to string - rendered_row[column] = "" if value is None else str(value) rendered_rows.append(rendered_row) data["render_cell"] = rendered_rows diff --git a/datasette/views/table.py b/datasette/views/table.py index 9a3ae69f..b07b62ae 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1516,9 +1516,6 @@ async def table_view_data( break if plugin_display_value: rendered_row[column] = str(plugin_display_value) - else: - # Default: convert value to string - rendered_row[column] = "" if value is None else str(value) rendered_rows.append(rendered_row) return rendered_rows diff --git a/tests/test_api.py b/tests/test_api.py index 1571fd5d..41bad84e 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -792,14 +792,15 @@ async def test_row_extra_render_cell(): assert "rows" in data # render_cell should be a list with one row (since this is a row page) + # Only columns modified by plugins are included (sparse output) render_cell = data["render_cell"] assert len(render_cell) == 1 # The row: id=1, name='Alice' # The 'name' column should be rendered by our plugin as Alice assert render_cell[0]["name"] == "Alice" - # The 'id' column should use default rendering (just the value as string) - assert render_cell[0]["id"] == "1" + # The 'id' column is not included since no plugin modified it + assert "id" not in render_cell[0] # The regular rows should still contain raw values assert data["rows"] == [{"id": 1, "name": "Alice"}] diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 25419bb8..527550fb 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1426,18 +1426,19 @@ async def test_extra_render_cell(): assert "rows" in data # render_cell should be a list of rows, each row being a dict of column -> rendered HTML + # Only columns modified by plugins are included (sparse output) render_cell = data["render_cell"] assert len(render_cell) == 2 # First row: id=1, name='Alice' # The 'name' column should be rendered by our plugin as Alice assert render_cell[0]["name"] == "Alice" - # The 'id' column should use default rendering (just the value as string) - assert render_cell[0]["id"] == "1" + # The 'id' column is not included since no plugin modified it + assert "id" not in render_cell[0] # Second row: id=2, name='Bob' assert render_cell[1]["name"] == "Bob" - assert render_cell[1]["id"] == "2" + assert "id" not in render_cell[1] # The regular rows should still contain raw values assert data["rows"] == [ From 757ce92bafb91bc40c74f41fffd9c3d3c6fffdec Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Jan 2026 07:58:18 -0800 Subject: [PATCH 23/53] datasette.utils.StartupError() now becomes a click exception, closes #2624 --- datasette/cli.py | 10 ++++++++-- docs/plugin_hooks.rst | 8 +++++--- tests/test_cli.py | 26 ++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/datasette/cli.py b/datasette/cli.py index 21420491..1d0cb022 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -666,7 +666,10 @@ def serve( return ds # Run the "startup" plugin hooks - run_sync(ds.invoke_startup) + try: + run_sync(ds.invoke_startup) + except StartupError as e: + raise click.ClickException(e.args[0]) # Run async soundness checks - but only if we're not under pytest run_sync(lambda: check_databases(ds)) @@ -815,7 +818,10 @@ def create_token( ds = Datasette(secret=secret, plugins_dir=plugins_dir) # Run ds.invoke_startup() in an event loop - run_sync(ds.invoke_startup) + try: + run_sync(ds.invoke_startup) + except StartupError as e: + raise click.ClickException(e.args[0]) # Warn about any unknown actions actions = [] diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 118a6bde..da49811a 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -965,12 +965,13 @@ Here is an example that validates required plugin configuration. The server will .. code-block:: python + from datasette.utils import StartupError + @hookimpl def startup(datasette): config = datasette.plugin_config("my-plugin") or {} - assert ( - "required-setting" in config - ), "my-plugin requires setting required-setting" + if "required-setting" not in config: + raise StartupError("my-plugin requires setting required-setting") You can also return an async function, which will be awaited on startup. Use this option if you need to execute any database queries, for example this function which creates the ``my_table`` database table if it does not yet exist: @@ -994,6 +995,7 @@ Potential use-cases: * Run some initialization code for the plugin * Create database tables that a plugin needs on startup * Validate the configuration for a plugin on startup, and raise an error if it is invalid +* Raise a ``datasette.utils.StartupError("message")`` exception to prevent Datasette from starting and display that message to the user. .. note:: diff --git a/tests/test_cli.py b/tests/test_cli.py index 21b86569..36d90e82 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -304,6 +304,32 @@ def test_plugin_s_overwrite(): ) +def test_startup_error_from_plugin_is_click_exception(tmp_path): + plugins_dir = tmp_path / "plugins" + plugins_dir.mkdir() + (plugins_dir / "startup_error.py").write_text( + "from datasette import hookimpl\n" + "from datasette.utils import StartupError\n" + "\n" + "@hookimpl\n" + "def startup(datasette):\n" + ' raise StartupError("boom")\n', + "utf-8", + ) + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--plugins-dir", + str(plugins_dir), + "--get", + "/", + ], + ) + assert result.exit_code == 1 + assert "Error: boom" in result.output + + def test_setting_type_validation(): runner = CliRunner() result = runner.invoke(cli, ["--setting", "default_page_size", "dog"]) From b52655e85684c44690105e22a7028bad36ee5557 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Jan 2026 07:59:07 -0800 Subject: [PATCH 24/53] Ignore *.db in gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 70e6bbeb..ce256606 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ scratchpad uv.lock data.db +# test databases +*.db + # We don't use Pipfile, so ignore them Pipfile Pipfile.lock From b0436faa5e3c35977607da6a653425fc6bf43403 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 22 Jan 2026 07:03:05 -0800 Subject: [PATCH 25/53] Fix test isolation bug in test_startup_error_from_plugin_is_click_exception (#2627) * Fix test isolation bug in test_startup_error_from_plugin_is_click_exception The test creates a plugin that raises StartupError("boom") and registers it in the global plugin manager (pm). Without cleanup, this plugin leaks to subsequent tests, causing test_setting_boolean_validation_false_values to fail with "Error: boom" instead of "Forbidden". Add try/finally block to ensure the plugin is unregistered after the test completes, following the established cleanup pattern used elsewhere in the test suite. * Fix blacken-docs formatting in plugin_hooks.rst Apply blacken-docs formatting to code example that exceeded the 60 character line limit. --------- Co-authored-by: Claude --- docs/plugin_hooks.rst | 5 ++++- tests/test_cli.py | 14 +++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index da49811a..ad4a70f8 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -967,11 +967,14 @@ Here is an example that validates required plugin configuration. The server will from datasette.utils import StartupError + @hookimpl def startup(datasette): config = datasette.plugin_config("my-plugin") or {} if "required-setting" not in config: - raise StartupError("my-plugin requires setting required-setting") + raise StartupError( + "my-plugin requires setting required-setting" + ) You can also return an async function, which will be awaited on startup. Use this option if you need to execute any database queries, for example this function which creates the ``my_table`` database table if it does not yet exist: diff --git a/tests/test_cli.py b/tests/test_cli.py index 36d90e82..6cdfd924 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,7 +4,7 @@ from .fixtures import ( EXPECTED_PLUGINS, ) from datasette.app import SETTINGS -from datasette.plugins import DEFAULT_PLUGINS +from datasette.plugins import DEFAULT_PLUGINS, pm from datasette.cli import cli, serve from datasette.version import __version__ from datasette.utils import tilde_encode @@ -326,8 +326,16 @@ def test_startup_error_from_plugin_is_click_exception(tmp_path): "/", ], ) - assert result.exit_code == 1 - assert "Error: boom" in result.output + try: + assert result.exit_code == 1 + assert "Error: boom" in result.output + finally: + # Cleanup: Unregister the plugin to avoid test isolation issues + to_unregister = [ + p for p in pm.get_plugins() if p.__name__ == "startup_error.py" + ] + if to_unregister: + pm.unregister(to_unregister[0]) def test_setting_type_validation(): From 66d2a033f8ad124e08cf4f0b488454c76dfdb63f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 23 Jan 2026 20:43:16 -0800 Subject: [PATCH 26/53] Switch to ruff and fix all lint errors, refs #2630 --- .github/workflows/test.yml | 2 ++ Justfile | 12 +++++++---- datasette/app.py | 4 ++-- datasette/default_permissions/__init__.py | 18 ++++++++-------- datasette/views/base.py | 1 - pyproject.toml | 5 +++++ setup.cfg | 3 --- tests/test_allowed_resources.py | 1 - tests/test_api.py | 26 ++++++----------------- tests/test_config_dir.py | 2 +- tests/test_crossdb.py | 2 +- tests/test_csv.py | 6 ------ tests/test_filters.py | 21 ------------------ tests/test_html.py | 9 +------- tests/test_internals_datasette.py | 2 +- tests/test_permissions.py | 3 +-- tests/test_plugins.py | 6 ++---- tests/test_restriction_sql.py | 4 ++-- tests/test_schema_endpoints.py | 1 - tests/test_table_api.py | 9 +------- tests/test_table_html.py | 8 ++----- 21 files changed, 44 insertions(+), 101 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3790c788..b1ba3232 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,6 +35,8 @@ jobs: tests/test_datasette_https_server.sh - name: Black run: black --check . + - name: Ruff + run: ruff check datasette tests - name: Check if cog needs to be run run: | cog --check docs/*.rst diff --git a/Justfile b/Justfile index 8c50e5ca..657881be 100644 --- a/Justfile +++ b/Justfile @@ -17,12 +17,16 @@ export DATASETTE_SECRET := "not_a_secret" uv run codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt uv run codespell tests --ignore-words docs/codespell-ignore-words.txt -# Run linters: black, flake8, mypy, cog +# Run linters: black, ruff, cog @lint: codespell - uv run black . --check - uv run flake8 + uv run black datasette tests --check + uv run ruff check datasette tests uv run cog --check README.md docs/*.rst +# Apply ruff fixes +@fix: + uv run ruff check --fix datasette tests + # Rebuild docs with cog @cog: uv run cog -r README.md docs/*.rst @@ -37,7 +41,7 @@ export DATASETTE_SECRET := "not_a_secret" # Apply Black @black: - uv run black . + uv run black datasette tests # Apply blacken-docs @blacken-docs: diff --git a/datasette/app.py b/datasette/app.py index b9955925..a5cd75c5 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -6,7 +6,7 @@ import contextvars from typing import TYPE_CHECKING, Any, Dict, Iterable, List if TYPE_CHECKING: - from datasette.permissions import AllowedResource, Resource + from datasette.permissions import Resource import asgi_csrf import collections import dataclasses @@ -1144,7 +1144,7 @@ class Datasette: # Validate that resource is a Resource object or None if resource is not None and not isinstance(resource, Resource): - raise TypeError(f"resource must be a Resource subclass instance or None.") + raise TypeError("resource must be a Resource subclass instance or None.") # Check if actor can see it if not await self.allowed(action=action, resource=resource, actor=actor): diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py index 4c82d705..40373fa7 100644 --- a/datasette/default_permissions/__init__.py +++ b/datasette/default_permissions/__init__.py @@ -26,18 +26,18 @@ from datasette import hookimpl # Re-export all hooks and public utilities from .restrictions import ( - actor_restrictions_sql, - restrictions_allow_action, - ActorRestrictions, + actor_restrictions_sql as actor_restrictions_sql, + restrictions_allow_action as restrictions_allow_action, + ActorRestrictions as ActorRestrictions, ) -from .root import root_user_permissions_sql -from .config import config_permissions_sql +from .root import root_user_permissions_sql as root_user_permissions_sql +from .config import config_permissions_sql as config_permissions_sql from .defaults import ( - default_allow_sql_check, - default_action_permissions_sql, - DEFAULT_ALLOW_ACTIONS, + default_allow_sql_check as default_allow_sql_check, + default_action_permissions_sql as default_action_permissions_sql, + DEFAULT_ALLOW_ACTIONS as DEFAULT_ALLOW_ACTIONS, ) -from .tokens import actor_from_signed_api_token +from .tokens import actor_from_signed_api_token as actor_from_signed_api_token @hookimpl diff --git a/datasette/views/base.py b/datasette/views/base.py index 5216924f..bdc9f742 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -1,7 +1,6 @@ import asyncio import csv import hashlib -import json import sys import textwrap import time diff --git a/pyproject.toml b/pyproject.toml index 87884341..6fca673d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ dev = [ "pytest-timeout>=1.4.2", "trustme>=0.7", "cogapp>=3.3.0", + "ruff>=0.9", # docs "Sphinx==7.4.7", "furo==2025.9.25", @@ -94,5 +95,9 @@ datasette = ["templates/*.html"] [tool.setuptools.dynamic] version = {attr = "datasette.version.__version__"} +[tool.ruff] +line-length = 160 +select = ["E", "F", "W"] + [tool.uv] package = true diff --git a/setup.cfg b/setup.cfg index ebf43062..b7e47898 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,2 @@ [aliases] test=pytest - -[flake8] -max-line-length = 160 diff --git a/tests/test_allowed_resources.py b/tests/test_allowed_resources.py index 0cd48ea9..08adbe48 100644 --- a/tests/test_allowed_resources.py +++ b/tests/test_allowed_resources.py @@ -117,7 +117,6 @@ async def test_tables_endpoint_database_restriction(test_ds): # Bob should only see analytics tables analytics_tables = [m for m in result if m["name"].startswith("analytics/")] - production_tables = [m for m in result if m["name"].startswith("production/")] assert len(analytics_tables) == 3 table_names = {m["name"] for m in analytics_tables} diff --git a/tests/test_api.py b/tests/test_api.py index 41bad84e..907d7445 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,21 +1,7 @@ from datasette.app import Datasette from datasette.plugins import DEFAULT_PLUGINS from datasette.version import __version__ -from .fixtures import ( # noqa - app_client, - app_client_no_files, - app_client_with_dot, - app_client_shorter_time_limit, - app_client_two_attached_databases_one_immutable, - app_client_larger_cache_size, - app_client_with_cors, - app_client_two_attached_databases, - app_client_conflicting_database_names, - app_client_immutable_and_inspect_file, - make_app_client, - EXPECTED_PLUGINS, - METADATA, -) +from .fixtures import make_app_client, EXPECTED_PLUGINS import pathlib import pytest import sys @@ -815,14 +801,14 @@ def test_databases_json(app_client_two_attached_databases_one_immutable): assert 2 == len(databases) extra_database, fixtures_database = databases assert "extra database" == extra_database["name"] - assert None == extra_database["hash"] - assert True == extra_database["is_mutable"] - assert False == extra_database["is_memory"] + assert extra_database["hash"] is None + assert extra_database["is_mutable"] is True + assert extra_database["is_memory"] is False assert "fixtures" == fixtures_database["name"] assert fixtures_database["hash"] is not None - assert False == fixtures_database["is_mutable"] - assert False == fixtures_database["is_memory"] + assert fixtures_database["is_mutable"] is False + assert fixtures_database["is_memory"] is False @pytest.mark.asyncio diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py index 0598a4a6..f9a90fbe 100644 --- a/tests/test_config_dir.py +++ b/tests/test_config_dir.py @@ -87,7 +87,7 @@ def test_invalid_settings(config_dir): ) try: with pytest.raises(StartupError) as ex: - ds = Datasette([], config_dir=config_dir) + Datasette([], config_dir=config_dir) assert ex.value.args[0] == "Invalid setting 'invalid' in config file" finally: (config_dir / "datasette.json").write_text(previous, "utf-8") diff --git a/tests/test_crossdb.py b/tests/test_crossdb.py index 1ec1a05c..7807cd5d 100644 --- a/tests/test_crossdb.py +++ b/tests/test_crossdb.py @@ -67,7 +67,7 @@ def test_crossdb_attached_database_list_display( ): app_client = app_client_two_attached_databases_crossdb_enabled response = app_client.get("/_memory") - response2 = app_client.get("/") + app_client.get("/") for fragment in ( "databases are attached to this connection", "
  • fixtures - ", diff --git a/tests/test_csv.py b/tests/test_csv.py index b4a71169..5589bd97 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -1,12 +1,6 @@ from datasette.app import Datasette from bs4 import BeautifulSoup as Soup import pytest -from .fixtures import ( # noqa - app_client, - app_client_csv_max_mb_one, - app_client_with_cors, - app_client_with_trace, -) import urllib.parse EXPECTED_TABLE_CSV = """id,content diff --git a/tests/test_filters.py b/tests/test_filters.py index a3fada98..eda9e9a1 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -103,27 +103,6 @@ async def test_through_filters_from_request(ds_client): assert filter_args.extra_context == {} -@pytest.mark.asyncio -async def test_through_filters_from_request(ds_client): - request = Request.fake( - '/?_through={"table":"roadside_attraction_characteristics","column":"characteristic_id","value":"1"}' - ) - filter_args = await through_filters( - request=request, - datasette=ds_client.ds, - table="roadside_attractions", - database="fixtures", - )() - assert filter_args.where_clauses == [ - "pk in (select attraction_id from roadside_attraction_characteristics where characteristic_id = :p0)" - ] - assert filter_args.params == {"p0": "1"} - assert filter_args.human_descriptions == [ - 'roadside_attraction_characteristics.characteristic_id = "1"' - ] - assert filter_args.extra_context == {} - - @pytest.mark.asyncio async def test_where_filters_from_request(ds_client): await ds_client.ds.invoke_startup() diff --git a/tests/test_html.py b/tests/test_html.py index 7b667301..8fad5764 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1,14 +1,7 @@ from bs4 import BeautifulSoup as Soup from datasette.app import Datasette from datasette.utils import allowed_pragmas -from .fixtures import ( # noqa - app_client, - app_client_base_url_prefix, - app_client_shorter_time_limit, - app_client_two_attached_databases, - make_app_client, - METADATA, -) +from .fixtures import make_app_client from .utils import assert_footer_links, inner_html import copy import json diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index c64620a6..b378a158 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -158,7 +158,7 @@ def test_datasette_error_if_string_not_list(tmpdir): # https://github.com/simonw/datasette/issues/1985 db_path = str(tmpdir / "data.db") with pytest.raises(ValueError): - ds = Datasette(db_path) + Datasette(db_path) @pytest.mark.asyncio diff --git a/tests/test_permissions.py b/tests/test_permissions.py index e2dd92b8..96c0cf6f 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -2,7 +2,7 @@ import collections from datasette.app import Datasette from datasette.cli import cli from datasette.default_permissions import restrictions_allow_action -from .fixtures import app_client, assert_permissions_checked, make_app_client +from .fixtures import assert_permissions_checked, make_app_client from click.testing import CliRunner from bs4 import BeautifulSoup as Soup import copy @@ -1481,7 +1481,6 @@ async def test_actor_restrictions_view_instance_only(perms_ds): assert response.status_code == 200 # But no databases should be visible (no view-database permission) - data = response.json() # The instance is visible but databases list should be empty or minimal # Actually, let's check via allowed_resources page = await perms_ds.allowed_resources("view-database", actor) diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 42995c0d..6c23b3ef 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1172,8 +1172,6 @@ async def test_hook_filters_from_request(ds_client): @pytest.mark.asyncio @pytest.mark.parametrize("extra_metadata", (False, True)) async def test_hook_register_actions(extra_metadata): - from datasette.permissions import Action - from datasette.resources import DatabaseResource, InstanceResource ds = Datasette( config=( @@ -1527,7 +1525,7 @@ async def test_hook_register_events(): @pytest.mark.asyncio -async def test_hook_register_actions(): +async def test_hook_register_actions_view_collection(): datasette = Datasette(memory=True, plugins_dir=PLUGINS_DIR) await datasette.invoke_startup() # Check that the custom action from my_plugin.py is registered @@ -1545,7 +1543,7 @@ async def test_hook_register_actions_with_custom_resources(): - A parent-level action (DocumentCollectionResource) - A child-level action (DocumentResource) """ - from datasette.permissions import Resource, Action + from datasette.permissions import Resource # Define custom Resource classes class DocumentCollectionResource(Resource): diff --git a/tests/test_restriction_sql.py b/tests/test_restriction_sql.py index f23eb839..df6abd29 100644 --- a/tests/test_restriction_sql.py +++ b/tests/test_restriction_sql.py @@ -182,8 +182,8 @@ async def test_also_requires_with_restrictions(): """ ds = Datasette() await ds.invoke_startup() - db1 = ds.add_memory_database("db1_also_requires") - db2 = ds.add_memory_database("db2_also_requires") + ds.add_memory_database("db1_also_requires") + ds.add_memory_database("db2_also_requires") await ds._refresh_schemas() # Actor restricted to only db1_also_requires for view-database diff --git a/tests/test_schema_endpoints.py b/tests/test_schema_endpoints.py index 5500a7b0..50742df2 100644 --- a/tests/test_schema_endpoints.py +++ b/tests/test_schema_endpoints.py @@ -1,4 +1,3 @@ -import asyncio import pytest import pytest_asyncio from datasette.app import Datasette diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 527550fb..49df3ad5 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1,13 +1,6 @@ from datasette.utils import detect_json1 from datasette.utils.sqlite import sqlite_version -from .fixtures import ( # noqa - app_client, - app_client_with_trace, - app_client_returned_rows_matches_page_size, - generate_compound_rows, - generate_sortable_rows, - make_app_client, -) +from .fixtures import generate_compound_rows, generate_sortable_rows, make_app_client import json import pytest import urllib diff --git a/tests/test_table_html.py b/tests/test_table_html.py index e3ddb4b0..90be591a 100644 --- a/tests/test_table_html.py +++ b/tests/test_table_html.py @@ -1,10 +1,6 @@ from datasette.app import Datasette from bs4 import BeautifulSoup as Soup -from .fixtures import ( # noqa - app_client, - make_app_client, - app_client_with_dot, -) +from .fixtures import make_app_client import pathlib import pytest import urllib.parse @@ -1263,7 +1259,7 @@ async def test_foreign_key_labels_obey_permissions(config): "insert or replace into b (id, name, a_id) values (1, 'world', 1)" ) # Anonymous user can see table b but not table a - blah = await ds.client.get("/foreign_key_labels.json") + await ds.client.get("/foreign_key_labels.json") anon_a = await ds.client.get("/foreign_key_labels/a.json?_labels=on") assert anon_a.status_code == 403 anon_b = await ds.client.get("/foreign_key_labels/b.json?_labels=on") From 7915c46ddd50e058cfc441c6b061cee177d6c562 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 23 Jan 2026 20:57:25 -0800 Subject: [PATCH 27/53] Fix flaky test_database_page test with deterministic ordering (#2628) * Fix flaky test_database_page test with deterministic ordering - Add ORDER BY to table_names() query in database.py - Sort foreign keys deterministically in get_all_foreign_keys() - Refactor test_database_page to use property-based assertions instead of 500+ lines of hardcoded expected data - Run blacken-docs on plugin_hooks.rst * Update test_row_foreign_key_tables for new deterministic FK ordering The foreign keys are now sorted by (other_table, column, other_column), so complex_foreign_keys comes before foreign_key_references alphabetically. * Update test_table_names for new alphabetical ordering The table_names() method now returns tables sorted alphabetically. * Fix for test that fails prior to SQLite 3.37 --------- Co-authored-by: Claude --- datasette/database.py | 2 +- datasette/utils/__init__.py | 14 +- tests/test_api.py | 725 +++++++++---------------------- tests/test_internals_database.py | 45 +- 4 files changed, 243 insertions(+), 543 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index e5858128..8e4ee2b6 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -431,7 +431,7 @@ class Database: async def table_names(self): results = await self.execute( - "select name from sqlite_master where type='table'" + "select name from sqlite_master where type='table' order by name" ) return [r[0] for r in results.rows] diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index ac2c74da..fb864077 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -612,7 +612,10 @@ def get_outbound_foreign_keys(conn, table): def get_all_foreign_keys(conn): tables = [ - r[0] for r in conn.execute('select name from sqlite_master where type="table"') + r[0] + for r in conn.execute( + 'select name from sqlite_master where type="table" order by name' + ) ] table_to_foreign_keys = {} for table in tables: @@ -634,6 +637,15 @@ def get_all_foreign_keys(conn): {"other_table": table_name, "column": from_, "other_column": to_} ) + # Sort foreign keys for deterministic ordering + for table in table_to_foreign_keys: + table_to_foreign_keys[table]["incoming"].sort( + key=lambda fk: (fk["other_table"], fk["column"], fk["other_column"]) + ) + table_to_foreign_keys[table]["outgoing"].sort( + key=lambda fk: (fk["other_table"], fk["column"], fk["other_column"]) + ) + return table_to_foreign_keys diff --git a/tests/test_api.py b/tests/test_api.py index 907d7445..e3951df9 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,5 +1,6 @@ from datasette.app import Datasette from datasette.plugins import DEFAULT_PLUGINS +from datasette.utils.sqlite import sqlite_version from datasette.version import __version__ from .fixtures import make_app_client, EXPECTED_PLUGINS import pathlib @@ -59,504 +60,189 @@ async def test_database_page(ds_client): assert response.status_code == 200 data = response.json() assert data["database"] == "fixtures" - assert data["tables"] == [ - { - "name": "123_starts_with_digits", - "columns": ["content"], - "primary_keys": [], - "count": 0, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "Table With Space In Name", - "columns": ["pk", "content"], - "primary_keys": ["pk"], - "count": 0, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "attraction_characteristic", - "columns": ["pk", "name"], - "primary_keys": ["pk"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "roadside_attraction_characteristics", - "column": "pk", - "other_column": "characteristic_id", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "binary_data", - "columns": ["data"], - "primary_keys": [], - "count": 3, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "complex_foreign_keys", - "columns": ["pk", "f1", "f2", "f3"], - "primary_keys": ["pk"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "simple_primary_key", - "column": "f3", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "f2", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "f1", - "other_column": "id", - }, - ], - }, - "private": False, - }, - { - "name": "compound_primary_key", - "columns": ["pk1", "pk2", "content"], - "primary_keys": ["pk1", "pk2"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "compound_three_primary_keys", - "columns": ["pk1", "pk2", "pk3", "content"], - "primary_keys": ["pk1", "pk2", "pk3"], - "count": 1001, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "custom_foreign_key_label", - "columns": ["pk", "foreign_key_with_custom_label"], - "primary_keys": ["pk"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "primary_key_multiple_columns_explicit_label", - "column": "foreign_key_with_custom_label", - "other_column": "id", - } - ], - }, - "private": False, - }, - { - "name": "facet_cities", - "columns": ["id", "name"], - "primary_keys": ["id"], - "count": 4, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "facetable", - "column": "id", - "other_column": "_city_id", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "facetable", - "columns": [ - "pk", - "created", - "planet_int", - "on_earth", - "state", - "_city_id", - "_neighborhood", - "tags", - "complex_array", - "distinct_some_null", - "n", - ], - "primary_keys": ["pk"], - "count": 15, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "facet_cities", - "column": "_city_id", - "other_column": "id", - } - ], - }, - "private": False, - }, - { - "name": "foreign_key_references", - "columns": [ - "pk", - "foreign_key_with_label", - "foreign_key_with_blank_label", - "foreign_key_with_no_label", - "foreign_key_compound_pk1", - "foreign_key_compound_pk2", - ], - "primary_keys": ["pk"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "primary_key_multiple_columns", - "column": "foreign_key_with_no_label", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "foreign_key_with_blank_label", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "foreign_key_with_label", - "other_column": "id", - }, - ], - }, - "private": False, - }, - ] + [ - { - "name": "infinity", - "columns": ["value"], - "primary_keys": [], - "count": 3, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "primary_key_multiple_columns", - "columns": ["id", "content", "content2"], - "primary_keys": ["id"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "foreign_key_references", - "column": "id", - "other_column": "foreign_key_with_no_label", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "primary_key_multiple_columns_explicit_label", - "columns": ["id", "content", "content2"], - "primary_keys": ["id"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "custom_foreign_key_label", - "column": "id", - "other_column": "foreign_key_with_custom_label", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "roadside_attraction_characteristics", - "columns": ["attraction_id", "characteristic_id"], - "primary_keys": [], - "count": 5, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "attraction_characteristic", - "column": "characteristic_id", - "other_column": "pk", - }, - { - "other_table": "roadside_attractions", - "column": "attraction_id", - "other_column": "pk", - }, - ], - }, - "private": False, - }, - { - "name": "roadside_attractions", - "columns": ["pk", "name", "address", "url", "latitude", "longitude"], - "primary_keys": ["pk"], - "count": 4, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "roadside_attraction_characteristics", - "column": "pk", - "other_column": "attraction_id", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "searchable", - "columns": ["pk", "text1", "text2", "name with . and spaces"], - "primary_keys": ["pk"], - "count": 2, - "hidden": False, - "fts_table": "searchable_fts", - "foreign_keys": { - "incoming": [ - { - "other_table": "searchable_tags", - "column": "pk", - "other_column": "searchable_id", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "searchable_tags", - "columns": ["searchable_id", "tag"], - "primary_keys": ["searchable_id", "tag"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - {"other_table": "tags", "column": "tag", "other_column": "tag"}, - { - "other_table": "searchable", - "column": "searchable_id", - "other_column": "pk", - }, - ], - }, - "private": False, - }, - { - "name": "select", - "columns": ["group", "having", "and", "json"], - "primary_keys": [], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "simple_primary_key", - "columns": ["id", "content"], - "primary_keys": ["id"], - "count": 5, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "foreign_key_references", - "column": "id", - "other_column": "foreign_key_with_blank_label", - }, - { - "other_table": "foreign_key_references", - "column": "id", - "other_column": "foreign_key_with_label", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f3", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f2", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f1", - }, - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "sortable", - "columns": [ - "pk1", - "pk2", - "content", - "sortable", - "sortable_with_nulls", - "sortable_with_nulls_2", - "text", - ], - "primary_keys": ["pk1", "pk2"], - "count": 201, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "table/with/slashes.csv", - "columns": ["pk", "content"], - "primary_keys": ["pk"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "tags", - "columns": ["tag"], - "primary_keys": ["tag"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "searchable_tags", - "column": "tag", - "other_column": "tag", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "no_primary_key", - "columns": ["content", "a", "b", "c"], - "primary_keys": [], - "count": 201, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "columns": [ - "text1", - "text2", - "name with . and spaces", - "searchable_fts", - "rank", - ], - "count": 2, - "foreign_keys": {"incoming": [], "outgoing": []}, - "fts_table": "searchable_fts", - "hidden": True, - "name": "searchable_fts", - "primary_keys": [], - "private": False, - }, - { - "name": "searchable_fts_config", - "columns": ["k", "v"], - "primary_keys": ["k"], - "count": 1, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "searchable_fts_data", - "columns": ["id", "block"], - "primary_keys": ["id"], - "count": 3, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "searchable_fts_docsize", - "columns": ["id", "sz"], - "primary_keys": ["id"], - "count": 2, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "searchable_fts_idx", - "columns": ["segid", "term", "pgno"], - "primary_keys": ["segid", "term"], - "count": 1, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - ] + + # Build lookup for easier assertions + tables = data["tables"] + tables_by_name = {t["name"]: t for t in tables} + + # Verify tables are sorted by (hidden, name) - visible first, then hidden + table_names = [t["name"] for t in tables] + expected_order = sorted(tables, key=lambda t: (t["hidden"], t["name"])) + assert table_names == [t["name"] for t in expected_order] + + # Expected visible tables (not hidden) + expected_visible_tables = { + "123_starts_with_digits", + "Table With Space In Name", + "attraction_characteristic", + "binary_data", + "complex_foreign_keys", + "compound_primary_key", + "compound_three_primary_keys", + "custom_foreign_key_label", + "facet_cities", + "facetable", + "foreign_key_references", + "infinity", + "primary_key_multiple_columns", + "primary_key_multiple_columns_explicit_label", + "roadside_attraction_characteristics", + "roadside_attractions", + "searchable", + "searchable_tags", + "select", + "simple_primary_key", + "sortable", + "table/with/slashes.csv", + "tags", + } + + # Expected hidden tables + expected_hidden_tables = { + "no_primary_key", + "searchable_fts", + "searchable_fts_config", + "searchable_fts_data", + "searchable_fts_docsize", + "searchable_fts_idx", + } + + # Verify all expected tables exist + assert expected_visible_tables.issubset(tables_by_name.keys()) + assert expected_hidden_tables.issubset(tables_by_name.keys()) + + # Verify hidden status + visible_tables = {t["name"] for t in tables if not t["hidden"]} + hidden_tables = {t["name"] for t in tables if t["hidden"]} + assert expected_visible_tables == visible_tables + assert expected_hidden_tables == hidden_tables + + # Helper to compare foreign keys (order-insensitive) + def fk_set(fks): + return {(fk["other_table"], fk["column"], fk["other_column"]) for fk in fks} + + # Test specific table properties + # -- facetable: has outgoing FK to facet_cities + facetable = tables_by_name["facetable"] + assert facetable["count"] == 15 + assert facetable["primary_keys"] == ["pk"] + assert facetable["fts_table"] is None + assert facetable["private"] is False + assert fk_set(facetable["foreign_keys"]["outgoing"]) == { + ("facet_cities", "_city_id", "id") + } + assert fk_set(facetable["foreign_keys"]["incoming"]) == set() + + # -- facet_cities: has incoming FK from facetable + facet_cities = tables_by_name["facet_cities"] + assert facet_cities["count"] == 4 + assert facet_cities["columns"] == ["id", "name"] + assert fk_set(facet_cities["foreign_keys"]["incoming"]) == { + ("facetable", "id", "_city_id") + } + + # -- simple_primary_key: has multiple incoming FKs + simple_pk = tables_by_name["simple_primary_key"] + assert simple_pk["count"] == 5 + assert simple_pk["columns"] == ["id", "content"] + assert simple_pk["primary_keys"] == ["id"] + # Should have incoming FKs from complex_foreign_keys (f1, f2, f3) and foreign_key_references + incoming = fk_set(simple_pk["foreign_keys"]["incoming"]) + assert ("complex_foreign_keys", "id", "f1") in incoming + assert ("complex_foreign_keys", "id", "f2") in incoming + assert ("complex_foreign_keys", "id", "f3") in incoming + assert ("foreign_key_references", "id", "foreign_key_with_label") in incoming + assert ("foreign_key_references", "id", "foreign_key_with_blank_label") in incoming + + # -- complex_foreign_keys: has multiple outgoing FKs to same table + complex_fk = tables_by_name["complex_foreign_keys"] + assert complex_fk["count"] == 1 + assert complex_fk["columns"] == ["pk", "f1", "f2", "f3"] + outgoing = fk_set(complex_fk["foreign_keys"]["outgoing"]) + assert outgoing == { + ("simple_primary_key", "f1", "id"), + ("simple_primary_key", "f2", "id"), + ("simple_primary_key", "f3", "id"), + } + + # -- searchable: has FTS table association + searchable = tables_by_name["searchable"] + assert searchable["count"] == 2 + assert searchable["fts_table"] == "searchable_fts" + assert searchable["columns"] == ["pk", "text1", "text2", "name with . and spaces"] + + # -- searchable_fts: is the FTS virtual table (hidden) + searchable_fts = tables_by_name["searchable_fts"] + assert searchable_fts["hidden"] is True + assert searchable_fts["fts_table"] == "searchable_fts" + # The "rank" column became visible in pragma_table_info in SQLite 3.37+ + if sqlite_version() >= (3, 37, 0): + assert "rank" in searchable_fts["columns"] + + # -- compound primary keys + compound_pk = tables_by_name["compound_primary_key"] + assert compound_pk["primary_keys"] == ["pk1", "pk2"] + assert compound_pk["count"] == 2 + + compound_three = tables_by_name["compound_three_primary_keys"] + assert compound_three["primary_keys"] == ["pk1", "pk2", "pk3"] + assert compound_three["count"] == 1001 + + # -- sortable: generated data + sortable = tables_by_name["sortable"] + assert sortable["count"] == 201 + assert sortable["primary_keys"] == ["pk1", "pk2"] + + # -- no_primary_key: hidden table with generated data + no_pk = tables_by_name["no_primary_key"] + assert no_pk["hidden"] is True + assert no_pk["count"] == 201 + assert no_pk["primary_keys"] == [] + + # -- roadside attractions relationship chain + attractions = tables_by_name["roadside_attractions"] + assert attractions["count"] == 4 + assert fk_set(attractions["foreign_keys"]["incoming"]) == { + ("roadside_attraction_characteristics", "pk", "attraction_id") + } + + characteristics = tables_by_name["attraction_characteristic"] + assert characteristics["count"] == 2 + assert fk_set(characteristics["foreign_keys"]["incoming"]) == { + ("roadside_attraction_characteristics", "pk", "characteristic_id") + } + + # -- searchable_tags: multiple outgoing FKs + searchable_tags = tables_by_name["searchable_tags"] + assert searchable_tags["primary_keys"] == ["searchable_id", "tag"] + outgoing = fk_set(searchable_tags["foreign_keys"]["outgoing"]) + assert outgoing == { + ("searchable", "searchable_id", "pk"), + ("tags", "tag", "tag"), + } + + # -- tables with special names + assert "123_starts_with_digits" in tables_by_name + assert "Table With Space In Name" in tables_by_name + assert "table/with/slashes.csv" in tables_by_name + assert "select" in tables_by_name # SQL reserved word + + # Verify select table has SQL reserved word columns + select_table = tables_by_name["select"] + assert set(select_table["columns"]) == {"group", "having", "and", "json"} + + # Verify all tables have required fields + for table in tables: + assert "name" in table + assert "columns" in table + assert "primary_keys" in table + assert "count" in table + assert "hidden" in table + assert "fts_table" in table + assert "foreign_keys" in table + assert "private" in table + assert "incoming" in table["foreign_keys"] + assert "outgoing" in table["foreign_keys"] def test_no_files_uses_memory_database(app_client_no_files): @@ -699,7 +385,29 @@ async def test_row_foreign_key_tables(ds_client): "/fixtures/simple_primary_key/1.json?_extras=foreign_key_tables" ) assert response.status_code == 200 + # Foreign keys are sorted by (other_table, column, other_column) assert response.json()["foreign_key_tables"] == [ + { + "other_table": "complex_foreign_keys", + "column": "id", + "other_column": "f1", + "count": 1, + "link": "/fixtures/complex_foreign_keys?f1=1", + }, + { + "other_table": "complex_foreign_keys", + "column": "id", + "other_column": "f2", + "count": 0, + "link": "/fixtures/complex_foreign_keys?f2=1", + }, + { + "other_table": "complex_foreign_keys", + "column": "id", + "other_column": "f3", + "count": 1, + "link": "/fixtures/complex_foreign_keys?f3=1", + }, { "other_table": "foreign_key_references", "column": "id", @@ -714,27 +422,6 @@ async def test_row_foreign_key_tables(ds_client): "count": 1, "link": "/fixtures/foreign_key_references?foreign_key_with_label=1", }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f3", - "count": 1, - "link": "/fixtures/complex_foreign_keys?f3=1", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f2", - "count": 0, - "link": "/fixtures/complex_foreign_keys?f2=1", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f1", - "count": 1, - "link": "/fixtures/complex_foreign_keys?f1=1", - }, ] diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index d2e06073..02c67bfc 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -418,36 +418,37 @@ async def test_get_all_foreign_keys(db): @pytest.mark.asyncio async def test_table_names(db): table_names = await db.table_names() + # Tables are sorted alphabetically by name assert table_names == [ - "simple_primary_key", - "primary_key_multiple_columns", - "primary_key_multiple_columns_explicit_label", - "compound_primary_key", - "compound_three_primary_keys", - "foreign_key_references", - "sortable", - "no_primary_key", "123_starts_with_digits", "Table With Space In Name", - "table/with/slashes.csv", + "attraction_characteristic", + "binary_data", "complex_foreign_keys", + "compound_primary_key", + "compound_three_primary_keys", "custom_foreign_key_label", - "tags", - "searchable", - "searchable_tags", - "searchable_fts", - "searchable_fts_data", - "searchable_fts_idx", - "searchable_fts_docsize", - "searchable_fts_config", - "select", - "infinity", "facet_cities", "facetable", - "binary_data", - "roadside_attractions", - "attraction_characteristic", + "foreign_key_references", + "infinity", + "no_primary_key", + "primary_key_multiple_columns", + "primary_key_multiple_columns_explicit_label", "roadside_attraction_characteristics", + "roadside_attractions", + "searchable", + "searchable_fts", + "searchable_fts_config", + "searchable_fts_data", + "searchable_fts_docsize", + "searchable_fts_idx", + "searchable_tags", + "select", + "simple_primary_key", + "sortable", + "table/with/slashes.csv", + "tags", ] From 7988a179fe317cdb3dfa5c13d879d192ae36898d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 23 Jan 2026 21:03:16 -0800 Subject: [PATCH 28/53] Throttle schema refreshes to at most once per second, refs #2629 --- datasette/app.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datasette/app.py b/datasette/app.py index a5cd75c5..75f6071e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -589,6 +589,10 @@ class Datasette: return None async def refresh_schemas(self): + # Throttle schema refreshes to at most once per second + if time.monotonic() - getattr(self, "_last_schema_refresh", 0) < 1.0: + return + self._last_schema_refresh = time.monotonic() if self._refresh_schemas_lock.locked(): return async with self._refresh_schemas_lock: From 2f7b120177f3285a8d504d5810fb081711d1b979 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 24 Jan 2026 22:07:54 -0800 Subject: [PATCH 29/53] Minor speedup for remove_infinites, refs #2629 --- datasette/utils/__init__.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index fb864077..4aaed967 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -901,18 +901,26 @@ _infinities = {float("inf"), float("-inf")} def remove_infinites(row): - to_check = row + """ + Replace float('inf') and float('-inf') with None in a row. + + Returns the original row object unchanged if no infinities are found. + """ if isinstance(row, dict): - to_check = row.values() - if not any((c in _infinities) if isinstance(c, float) else 0 for c in to_check): - return row - if isinstance(row, dict): - return { - k: (None if (isinstance(v, float) and v in _infinities) else v) - for k, v in row.items() - } + for v in row.values(): + if isinstance(v, float) and v in _infinities: + return { + k: (None if isinstance(v2, float) and v2 in _infinities else v2) + for k, v2 in row.items() + } else: - return [None if (isinstance(c, float) and c in _infinities) else c for c in row] + for v in row: + if isinstance(v, float) and v in _infinities: + return [ + None if isinstance(v2, float) and v2 in _infinities else v2 + for v2 in row + ] + return row class StaticMount(click.ParamType): From 3f8f97e92a2ec058d38dbc151eef40245cb234a3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 28 Jan 2026 09:55:25 -0800 Subject: [PATCH 30/53] Close more connections in test suite To try and avoid too many open files on macOS --- tests/test_api_write.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 3a76e655..05835e51 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -20,7 +20,12 @@ def ds_write(tmp_path_factory): ds = Datasette([db_path], immutables=[db_path_immutable]) ds.root_enabled = True yield ds - db.close() + # Close both setup connections plus any Datasette-managed connections. + db1.close() + db2.close() + for database in ds.databases.values(): + if not database.is_memory: + database.close() def write_token(ds, actor_id="root", permissions=None): From ffadb5f74cf4e649671be42d9f56d0c233d381fb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 28 Jan 2026 18:34:00 -0800 Subject: [PATCH 31/53] Workaround for intermittent test failure on SQLite 3.25.3 Closes: - #2632 --- datasette/utils/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 4aaed967..d0d216eb 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -706,8 +706,11 @@ def table_column_details(conn, table): ).fetchall() ] else: - # Treat hidden as 0 for all columns + # First trigger a query against sqlite_master to fix an intermittent + # test failure, see https://github.com/simonw/datasette/issues/2632 + conn.execute("select 1 from sqlite_master limit 1").fetchall() return [ + # Treat hidden as 0 for all columns. Column(*(list(r) + [0])) for r in conn.execute( f"PRAGMA table_info({escape_sqlite(table)});" From 40a37307ded36311a07eb2577cb74c92a2639f9d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 28 Jan 2026 18:41:03 -0800 Subject: [PATCH 32/53] Add request.form() for multipart form data and file uploads * Add request.form() for multipart form data and file uploads New Request.form() method that handles both application/x-www-form-urlencoded and multipart/form-data content types with streaming parsing. Features: - Streaming multipart parser that doesn't buffer entire body in memory - Files spill to disk above 1MB threshold via SpooledTemporaryFile - files=False (default) discards file content, files=True stores them - Security limits: max_request_size, max_file_size, max_fields, max_files - FormData container with dict-like access and getlist() for multiple values - UploadedFile class with async read(), seek(), filename, content_type, size - Support for RFC 5987 filename* encoding for international filenames Uses multipart-form-data-conformance test suite for validation. * Update views to use request.form() and document new API - Migrate PermissionsDebugView, MessagesDebugView, and CreateTokenView from post_vars() to form() - Add documentation for request.form(), FormData, and UploadedFile classes Centralize multipart defaults and expose stricter limits via Request.form(). Enforce header, part, file, and disk space limits even when files are discarded; detect truncated bodies and client disconnects; and move blocking work off the event loop. Add FormData close/aclose context managers, update internals docs, and expand multipart tests (including len semantics and stricter conformance expectations). --- datasette/utils/asgi.py | 81 +++ datasette/utils/multipart.py | 757 ++++++++++++++++++++++ datasette/views/special.py | 26 +- docs/internals.rst | 131 +++- pyproject.toml | 1 + tests/test_multipart.py | 1152 ++++++++++++++++++++++++++++++++++ 6 files changed, 2133 insertions(+), 15 deletions(-) create mode 100644 datasette/utils/multipart.py create mode 100644 tests/test_multipart.py diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 7f3329a6..35f243b6 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -1,5 +1,21 @@ import json +from typing import Optional from datasette.utils import MultiParams, calculate_etag +from datasette.utils.multipart import ( + parse_form_data, + MultipartParseError, + FormData, + DEFAULT_MAX_FILE_SIZE, + DEFAULT_MAX_REQUEST_SIZE, + DEFAULT_MAX_FIELDS, + DEFAULT_MAX_FILES, + DEFAULT_MAX_PARTS, + DEFAULT_MAX_FIELD_SIZE, + DEFAULT_MAX_MEMORY_FILE_SIZE, + DEFAULT_MAX_PART_HEADER_BYTES, + DEFAULT_MAX_PART_HEADER_LINES, + DEFAULT_MIN_FREE_DISK_BYTES, +) from mimetypes import guess_type from urllib.parse import parse_qs, urlunparse, parse_qsl from pathlib import Path @@ -139,6 +155,71 @@ class Request: body = await self.post_body() return dict(parse_qsl(body.decode("utf-8"), keep_blank_values=True)) + async def form( + self, + files: bool = False, + max_file_size: int = DEFAULT_MAX_FILE_SIZE, + max_request_size: int = DEFAULT_MAX_REQUEST_SIZE, + max_fields: int = DEFAULT_MAX_FIELDS, + max_files: int = DEFAULT_MAX_FILES, + max_parts: Optional[int] = DEFAULT_MAX_PARTS, + max_field_size: int = DEFAULT_MAX_FIELD_SIZE, + max_memory_file_size: int = DEFAULT_MAX_MEMORY_FILE_SIZE, + max_part_header_bytes: int = DEFAULT_MAX_PART_HEADER_BYTES, + max_part_header_lines: int = DEFAULT_MAX_PART_HEADER_LINES, + min_free_disk_bytes: int = DEFAULT_MIN_FREE_DISK_BYTES, + ) -> FormData: + """ + Parse form data from the request body. + + Supports both application/x-www-form-urlencoded and multipart/form-data. + + Args: + files: If True, store file uploads; if False (default), discard them + max_file_size: Maximum size per file in bytes (default 50MB) + max_request_size: Maximum total request size in bytes (default 100MB) + max_fields: Maximum number of form fields (default 1000) + max_files: Maximum number of file uploads (default 100) + max_parts: Maximum number of multipart parts (default max_fields + max_files) + max_field_size: Maximum size of a text field value in bytes (default 100KB) + max_memory_file_size: Threshold before files spill to disk (default 1MB) + max_part_header_bytes: Maximum bytes allowed in part headers (default 16KB) + max_part_header_lines: Maximum header lines per part (default 100) + min_free_disk_bytes: Minimum free bytes required in temp dir (default 50MB) + + Returns: + FormData object with dict-like access to fields and files. + Use form["key"] for first value, form.getlist("key") for all values. + + Raises: + BadRequest: If content-type is missing, unsupported, or parsing fails + """ + content_type = self.headers.get("content-type", "") + if not content_type: + raise BadRequest( + "Missing Content-Type header; expected application/x-www-form-urlencoded " + "or multipart/form-data" + ) + + try: + return await parse_form_data( + receive=self.receive, + content_type=content_type, + files=files, + max_file_size=max_file_size, + max_request_size=max_request_size, + max_fields=max_fields, + max_files=max_files, + max_parts=max_parts, + max_field_size=max_field_size, + max_memory_file_size=max_memory_file_size, + max_part_header_bytes=max_part_header_bytes, + max_part_header_lines=max_part_header_lines, + min_free_disk_bytes=min_free_disk_bytes, + ) + except MultipartParseError as e: + raise BadRequest(str(e)) + @classmethod def fake(cls, path_with_query_string, method="GET", scheme="http", url_vars=None): """Useful for constructing Request objects for tests""" diff --git a/datasette/utils/multipart.py b/datasette/utils/multipart.py new file mode 100644 index 00000000..cfa77486 --- /dev/null +++ b/datasette/utils/multipart.py @@ -0,0 +1,757 @@ +""" +Streaming multipart/form-data parser for ASGI applications. + +Supports: +- Streaming parsing without buffering entire body in memory +- Files spill to disk above configurable threshold +- Security limits on request size, file size, field count +- Both multipart/form-data and application/x-www-form-urlencoded +""" + +import asyncio +import shutil +import tempfile +from dataclasses import dataclass, field +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) +from urllib.parse import parse_qsl + +# Centralized defaults for multipart/form-data parsing +DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB +DEFAULT_MAX_REQUEST_SIZE = 100 * 1024 * 1024 # 100MB +DEFAULT_MAX_FIELDS = 1000 +DEFAULT_MAX_FILES = 100 +# If max_parts is not specified, it defaults to max_fields + max_files +DEFAULT_MAX_PARTS: Optional[int] = None +DEFAULT_MAX_FIELD_SIZE = 100 * 1024 # 100KB +DEFAULT_MAX_MEMORY_FILE_SIZE = 1024 * 1024 # 1MB +DEFAULT_MAX_PART_HEADER_BYTES = 16 * 1024 # 16KB +DEFAULT_MAX_PART_HEADER_LINES = 100 +DEFAULT_MIN_FREE_DISK_BYTES = 50 * 1024 * 1024 # 50MB + + +class MultipartParseError(Exception): + """Raised when multipart parsing fails.""" + + pass + + +@dataclass +class UploadedFile: + """ + Represents an uploaded file from a multipart form. + + Attributes: + name: The form field name + filename: The original filename from the upload + content_type: The MIME type of the file + size: Size in bytes + """ + + name: str + filename: str + content_type: Optional[str] + size: int + _file: tempfile.SpooledTemporaryFile = field(repr=False) + + async def read(self, size: int = -1) -> bytes: + """Read file contents.""" + return await asyncio.to_thread(self._file.read, size) + + async def seek(self, offset: int, whence: int = 0) -> int: + """Seek to position in file.""" + return await asyncio.to_thread(self._file.seek, offset, whence) + + async def close(self) -> None: + """Close the underlying file.""" + await asyncio.to_thread(self._file.close) + + def close_sync(self) -> None: + """Close the underlying file synchronously.""" + self._file.close() + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.close() + + def __del__(self): + try: + self._file.close() + except Exception: + pass + + +class FormData: + """ + Container for parsed form data, supporting both fields and files. + + Provides dict-like access with support for multiple values per key. + """ + + def __init__(self): + self._data: List[Tuple[str, Union[str, UploadedFile]]] = [] + + def append(self, key: str, value: Union[str, UploadedFile]) -> None: + """Add a key-value pair.""" + self._data.append((key, value)) + + def __getitem__(self, key: str) -> Union[str, UploadedFile]: + """Get the first value for a key.""" + for k, v in self._data: + if k == key: + return v + raise KeyError(key) + + def get(self, key: str, default: Any = None) -> Optional[Union[str, UploadedFile]]: + """Get the first value for a key, or default if not found.""" + try: + return self[key] + except KeyError: + return default + + def getlist(self, key: str) -> List[Union[str, UploadedFile]]: + """Get all values for a key.""" + return [v for k, v in self._data if k == key] + + def __contains__(self, key: str) -> bool: + """Check if key exists.""" + return any(k == key for k, _ in self._data) + + def __len__(self) -> int: + """Return number of items.""" + return len(self._data) + + def __iter__(self): + """Iterate over unique keys.""" + seen = set() + for k, _ in self._data: + if k not in seen: + seen.add(k) + yield k + + def keys(self): + """Return unique keys.""" + return list(self) + + def items(self) -> List[Tuple[str, Union[str, UploadedFile]]]: + """Return all key-value pairs.""" + return list(self._data) + + def values(self) -> List[Union[str, UploadedFile]]: + """Return all values.""" + return [v for _, v in self._data] + + def _uploaded_files(self) -> List[UploadedFile]: + """Return UploadedFile instances contained in this form.""" + return [v for _, v in self._data if isinstance(v, UploadedFile)] + + def close(self) -> None: + """ + Close any uploaded files. + + This provides deterministic cleanup for spooled temp files. + """ + for uploaded in self._uploaded_files(): + try: + uploaded.close_sync() + except Exception: + # Best-effort cleanup; ignore close errors + pass + + async def aclose(self) -> None: + """Asynchronously close any uploaded files.""" + for uploaded in self._uploaded_files(): + try: + await uploaded.close() + except Exception: + # Best-effort cleanup; ignore close errors + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + self.close() + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.aclose() + + +def parse_content_disposition(header: str) -> Dict[str, Optional[str]]: + """ + Parse Content-Disposition header value. + + Returns dict with 'name', 'filename' keys (filename may be None). + """ + result: Dict[str, Optional[str]] = {"name": None, "filename": None} + + # Split on semicolons, handling quoted strings + parts = [] + current = "" + in_quotes = False + i = 0 + while i < len(header): + char = header[i] + if char == '"' and (i == 0 or header[i - 1] != "\\"): + in_quotes = not in_quotes + current += char + elif char == ";" and not in_quotes: + parts.append(current.strip()) + current = "" + else: + current += char + i += 1 + if current.strip(): + parts.append(current.strip()) + + for part in parts[1:]: # Skip the "form-data" part + if "=" not in part: + continue + + key, _, value = part.partition("=") + key = key.strip().lower() + value = value.strip() + + # Handle filename* (RFC 5987 encoding) + if key == "filename*": + # Format: utf-8''encoded_filename or charset'language'encoded_filename + if "'" in value: + parts_star = value.split("'", 2) + if len(parts_star) >= 3: + # charset = parts_star[0] + # language = parts_star[1] + encoded = parts_star[2] + # URL decode + try: + from urllib.parse import unquote + + result["filename"] = unquote(encoded, encoding="utf-8") + except Exception: + pass + continue + + # Remove quotes if present + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + # Unescape backslash sequences + value = value.replace('\\"', '"').replace("\\\\", "\\") + + if key == "name": + result["name"] = value + elif key == "filename": + # Only set if filename* hasn't already set it + if result["filename"] is None: + # Strip path components (security) + # Handle both Unix and Windows paths + value = value.replace("\\", "/") + if "/" in value: + value = value.rsplit("/", 1)[-1] + result["filename"] = value + + return result + + +def parse_content_type(header: str) -> Tuple[str, Dict[str, str]]: + """ + Parse Content-Type header value. + + Returns (media_type, parameters_dict). + """ + parts = header.split(";") + media_type = parts[0].strip().lower() + params = {} + + for part in parts[1:]: + part = part.strip() + if "=" in part: + key, _, value = part.partition("=") + key = key.strip().lower() + value = value.strip() + # Remove quotes if present + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + params[key] = value + + return media_type, params + + +class MultipartParser: + """ + Streaming multipart/form-data parser. + + Processes the body chunk by chunk without loading everything into memory. + """ + + # Parser states + STATE_PREAMBLE = 0 + STATE_HEADER = 1 + STATE_BODY = 2 + STATE_DONE = 3 + + def __init__( + self, + boundary: bytes, + max_file_size: int = DEFAULT_MAX_FILE_SIZE, + max_request_size: int = DEFAULT_MAX_REQUEST_SIZE, + max_fields: int = DEFAULT_MAX_FIELDS, + max_files: int = DEFAULT_MAX_FILES, + max_parts: Optional[int] = DEFAULT_MAX_PARTS, + max_field_size: int = DEFAULT_MAX_FIELD_SIZE, + max_memory_file_size: int = DEFAULT_MAX_MEMORY_FILE_SIZE, + max_part_header_bytes: int = DEFAULT_MAX_PART_HEADER_BYTES, + max_part_header_lines: int = DEFAULT_MAX_PART_HEADER_LINES, + min_free_disk_bytes: int = DEFAULT_MIN_FREE_DISK_BYTES, + handle_files: bool = False, + ): + self.boundary = b"--" + boundary + self.end_boundary = self.boundary + b"--" + self.max_file_size = max_file_size + self.max_request_size = max_request_size + self.max_fields = max_fields + self.max_files = max_files + # If not specified, tie max_parts to the other cardinality limits + if max_parts is None: + max_parts = max_fields + max_files + self.max_parts = max_parts + self.max_field_size = max_field_size + self.max_memory_file_size = max_memory_file_size + self.max_part_header_bytes = max_part_header_bytes + self.max_part_header_lines = max_part_header_lines + self.min_free_disk_bytes = min_free_disk_bytes + self.handle_files = handle_files + + self.state = self.STATE_PREAMBLE + self.buffer = bytearray() + self.total_bytes = 0 + self.field_count = 0 + self.file_count = 0 + self.part_count = 0 + self.current_part_size = 0 + self.current_header_bytes = 0 + self.current_header_lines = 0 + + self.form_data = FormData() + self._disk_check_interval_bytes = 1024 * 1024 # 1MB between disk checks + self._bytes_since_disk_check = 0 + self._tempdir = tempfile.gettempdir() + + # Current part state + self.current_headers: Dict[str, str] = {} + self.current_file: Optional[tempfile.SpooledTemporaryFile] = None + self.current_body = bytearray() + self.current_name: Optional[str] = None + self.current_filename: Optional[str] = None + self.current_content_type: Optional[str] = None + + def feed(self, chunk: bytes) -> None: + """Feed a chunk of data to the parser.""" + self.total_bytes += len(chunk) + if self.total_bytes > self.max_request_size: + raise MultipartParseError("Request body too large") + + self.buffer.extend(chunk) + self._process() + + def _process(self) -> None: + """Process buffered data.""" + while True: + if self.state == self.STATE_PREAMBLE: + if not self._process_preamble(): + break + elif self.state == self.STATE_HEADER: + if not self._process_header(): + break + elif self.state == self.STATE_BODY: + if not self._process_body(): + break + elif self.state == self.STATE_DONE: + break + + def _process_preamble(self) -> bool: + """Skip preamble and find first boundary.""" + # Look for boundary (could be at start or after preamble) + # Try both \r\n prefixed and bare boundary at start + idx = self.buffer.find(self.boundary) + if idx == -1: + # Keep potential partial boundary at end + keep = len(self.boundary) - 1 + if len(self.buffer) > keep: + self.buffer = self.buffer[-keep:] + return False + + # Found boundary, skip to after it + after_boundary = idx + len(self.boundary) + + # Check for end boundary + if self.buffer[idx : idx + len(self.end_boundary)] == self.end_boundary: + self.state = self.STATE_DONE + return False + + # Skip CRLF or LF after boundary + if after_boundary < len(self.buffer): + if self.buffer[after_boundary : after_boundary + 2] == b"\r\n": + after_boundary += 2 + elif self.buffer[after_boundary : after_boundary + 1] == b"\n": + after_boundary += 1 + + self.buffer = self.buffer[after_boundary:] + self.state = self.STATE_HEADER + self.current_headers = {} + self.current_header_bytes = 0 + self.current_header_lines = 0 + return True + + def _process_header(self) -> bool: + """Parse part headers.""" + while True: + # Look for end of header line + crlf_idx = self.buffer.find(b"\r\n") + lf_idx = self.buffer.find(b"\n") + + if crlf_idx == -1 and lf_idx == -1: + # Guard against unbounded header buffering if no newline is ever sent + if len(self.buffer) > self.max_part_header_bytes: + raise MultipartParseError("Part headers too large") + return False # Need more data + + # Use whichever comes first + if crlf_idx != -1 and (lf_idx == -1 or crlf_idx < lf_idx): + idx = crlf_idx + line_end_len = 2 + else: + idx = lf_idx + line_end_len = 1 + + line = self.buffer[:idx] + self.buffer = self.buffer[idx + line_end_len :] + + self.current_header_lines += 1 + self.current_header_bytes += idx + line_end_len + if ( + self.current_header_lines > self.max_part_header_lines + or self.current_header_bytes > self.max_part_header_bytes + ): + raise MultipartParseError("Part headers too large") + + if not line: + # Empty line = end of headers + self._start_body() + self.state = self.STATE_BODY + return True + + # Parse header + try: + line_str = line.decode("utf-8", errors="replace") + except Exception: + line_str = line.decode("latin-1") + + if ":" in line_str: + name, _, value = line_str.partition(":") + self.current_headers[name.strip().lower()] = value.strip() + + def _start_body(self) -> None: + """Initialize body parsing for current part.""" + self.part_count += 1 + if self.part_count > self.max_parts: + raise MultipartParseError("Too many parts") + + # Parse Content-Disposition + cd = self.current_headers.get("content-disposition", "") + parsed = parse_content_disposition(cd) + self.current_name = parsed.get("name") + self.current_filename = parsed.get("filename") + self.current_content_type = self.current_headers.get("content-type") + self.current_part_size = 0 + + if self.current_filename is not None: + # It's a file + self.file_count += 1 + if self.file_count > self.max_files: + raise MultipartParseError("Too many files") + if self.handle_files: + self.current_file = tempfile.SpooledTemporaryFile( + max_size=self.max_memory_file_size + ) + else: + # Will discard file content + self.current_file = None + else: + # It's a text field + self.field_count += 1 + if self.field_count > self.max_fields: + raise MultipartParseError("Too many fields") + self.current_body = bytearray() + self.current_file = None + + # Check disk space before allocating a spooled temp file + if self.current_filename is not None and self.handle_files: + self._ensure_disk_space() + + def _process_body(self) -> bool: + """Process body data for current part.""" + # Look for boundary in buffer + # Need to handle boundary potentially split across chunks + + # The boundary is preceded by \r\n (or \n for lenient parsing) + search_boundary = b"\r\n" + self.boundary + + idx = self.buffer.find(search_boundary) + if idx == -1: + # Try LF-only boundary (lenient) + search_boundary_lf = b"\n" + self.boundary + idx = self.buffer.find(search_boundary_lf) + if idx != -1: + search_boundary = search_boundary_lf + + if idx == -1: + # No boundary found yet + # Keep potential partial boundary at end of buffer + safe_len = len(self.buffer) - len(search_boundary) - 1 + if safe_len > 0: + safe_data = self.buffer[:safe_len] + self._write_body_data(bytes(safe_data)) + self.buffer = self.buffer[safe_len:] + return False + + # Found boundary - write remaining body data + body_data = self.buffer[:idx] + self._write_body_data(bytes(body_data)) + + # Move past the boundary + after_boundary = idx + len(search_boundary) + + # Check for end boundary + remaining = self.buffer[after_boundary:] + if remaining.startswith(b"--"): + # End boundary + self._finish_part() + self.state = self.STATE_DONE + return False + + # Skip CRLF or LF after boundary + if remaining.startswith(b"\r\n"): + after_boundary += 2 + elif remaining.startswith(b"\n"): + after_boundary += 1 + + self.buffer = self.buffer[after_boundary:] + self._finish_part() + self.state = self.STATE_HEADER + self.current_headers = {} + self.current_header_bytes = 0 + self.current_header_lines = 0 + return True + + def _write_body_data(self, data: bytes) -> None: + """Write data to current part body.""" + if not data: + return + + self.current_part_size += len(data) + + if self.current_filename is not None: + # File data + if self.current_part_size > self.max_file_size: + raise MultipartParseError("File too large") + if self.handle_files and self.current_file: + self._bytes_since_disk_check += len(data) + if self._bytes_since_disk_check >= self._disk_check_interval_bytes: + self._ensure_disk_space() + self._bytes_since_disk_check = 0 + self.current_file.write(data) + # else: discard file data + else: + # Field data + if self.current_part_size > self.max_field_size: + raise MultipartParseError("Field value too large") + self.current_body.extend(data) + + def _finish_part(self) -> None: + """Finalize current part and add to form data.""" + if self.current_name is None: + return + + if self.current_filename is not None: + # File + if self.handle_files and self.current_file: + self.current_file.seek(0) + uploaded = UploadedFile( + name=self.current_name, + filename=self.current_filename, + content_type=self.current_content_type, + size=self.current_part_size, + _file=self.current_file, + ) + self.form_data.append(self.current_name, uploaded) + # else: file was discarded + else: + # Text field + try: + value = bytes(self.current_body).decode("utf-8") + except UnicodeDecodeError: + value = bytes(self.current_body).decode("latin-1") + self.form_data.append(self.current_name, value) + + # Reset part state + self.current_file = None + self.current_body = bytearray() + self.current_name = None + self.current_filename = None + self.current_content_type = None + + def finalize(self) -> FormData: + """Finalize parsing and return form data.""" + # Process any remaining data + self._process() + if self.state != self.STATE_DONE: + raise MultipartParseError( + "Truncated multipart body (missing closing boundary)" + ) + return self.form_data + + def _ensure_disk_space(self) -> None: + """ + Ensure there is enough free space on the temp filesystem. + + This is a best-effort guard against filling the disk with uploads. + """ + if not self.handle_files: + return + if self.min_free_disk_bytes <= 0: + return + free_bytes = shutil.disk_usage(self._tempdir).free + if free_bytes < self.min_free_disk_bytes: + raise MultipartParseError("Insufficient disk space for uploads") + + +async def parse_form_data( + receive: Callable, + content_type: str, + files: bool = False, + max_file_size: int = DEFAULT_MAX_FILE_SIZE, + max_request_size: int = DEFAULT_MAX_REQUEST_SIZE, + max_fields: int = DEFAULT_MAX_FIELDS, + max_files: int = DEFAULT_MAX_FILES, + max_parts: Optional[int] = DEFAULT_MAX_PARTS, + max_field_size: int = DEFAULT_MAX_FIELD_SIZE, + max_memory_file_size: int = DEFAULT_MAX_MEMORY_FILE_SIZE, + max_part_header_bytes: int = DEFAULT_MAX_PART_HEADER_BYTES, + max_part_header_lines: int = DEFAULT_MAX_PART_HEADER_LINES, + min_free_disk_bytes: int = DEFAULT_MIN_FREE_DISK_BYTES, +) -> FormData: + """ + Parse form data from an ASGI receive callable. + + Supports both application/x-www-form-urlencoded and multipart/form-data. + + Args: + receive: ASGI receive callable + content_type: Content-Type header value + files: If True, store file uploads; if False, discard them + max_file_size: Maximum size per file in bytes + max_request_size: Maximum total request size in bytes + max_fields: Maximum number of form fields + max_files: Maximum number of file uploads + max_field_size: Maximum size of a text field value + max_memory_file_size: File size threshold before spilling to disk + + Returns: + FormData object containing parsed fields and files + """ + media_type, params = parse_content_type(content_type) + + if media_type == "application/x-www-form-urlencoded": + # Read entire body for URL-encoded forms (they're typically small) + body = bytearray() + total = 0 + while True: + message = await receive() + message_type = message.get("type") + if message_type == "http.disconnect": + raise MultipartParseError("Client disconnected during request body") + if message_type is not None and message_type != "http.request": + continue + chunk = message.get("body", b"") + total += len(chunk) + if total > max_request_size: + raise MultipartParseError("Request body too large") + body.extend(chunk) + if not message.get("more_body", False): + break + + form_data = FormData() + try: + pairs = parse_qsl(bytes(body).decode("utf-8"), keep_blank_values=True) + except UnicodeDecodeError: + pairs = parse_qsl(bytes(body).decode("latin-1"), keep_blank_values=True) + + for key, value in pairs: + form_data.append(key, value) + + return form_data + + elif media_type == "multipart/form-data": + boundary = params.get("boundary") + if not boundary: + raise MultipartParseError("Missing boundary in Content-Type") + + parser = MultipartParser( + boundary=boundary.encode("utf-8"), + max_file_size=max_file_size, + max_request_size=max_request_size, + max_fields=max_fields, + max_files=max_files, + max_parts=max_parts, + max_field_size=max_field_size, + max_memory_file_size=max_memory_file_size, + max_part_header_bytes=max_part_header_bytes, + max_part_header_lines=max_part_header_lines, + min_free_disk_bytes=min_free_disk_bytes, + handle_files=files, + ) + + # Stream body through parser + batch_target = 64 * 1024 + batch = bytearray() + + async def flush_batch() -> None: + if batch: + data = bytes(batch) + batch.clear() + await asyncio.to_thread(parser.feed, data) + + while True: + message = await receive() + message_type = message.get("type") + if message_type == "http.disconnect": + raise MultipartParseError("Client disconnected during request body") + if message_type is not None and message_type != "http.request": + continue + chunk = message.get("body", b"") + if chunk: + batch.extend(chunk) + if len(batch) >= batch_target: + await flush_batch() + if not message.get("more_body", False): + break + + await flush_batch() + return await asyncio.to_thread(parser.finalize) + + else: + raise MultipartParseError( + f"Unsupported Content-Type: {media_type}. " + "Expected application/x-www-form-urlencoded or multipart/form-data" + ) diff --git a/datasette/views/special.py b/datasette/views/special.py index 411363ec..57a3024d 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -177,11 +177,11 @@ class PermissionsDebugView(BaseView): async def post(self, request): await self.ds.ensure_permission(action="view-instance", actor=request.actor) await self.ds.ensure_permission(action="permissions-debug", actor=request.actor) - vars = await request.post_vars() - actor = json.loads(vars["actor"]) - permission = vars["permission"] - parent = vars.get("resource_1") or None - child = vars.get("resource_2") or None + form = await request.form() + actor = json.loads(form["actor"]) + permission = form["permission"] + parent = form.get("resource_1") or None + child = form.get("resource_2") or None response, status = await _check_permission_for_actor( self.ds, permission, parent, child, actor @@ -602,9 +602,9 @@ class MessagesDebugView(BaseView): async def post(self, request): await self.ds.ensure_permission(action="view-instance", actor=request.actor) - post = await request.post_vars() - message = post.get("message", "") - message_type = post.get("message_type") or "INFO" + form = await request.form() + message = form.get("message", "") + message_type = form.get("message_type") or "INFO" assert message_type in ("INFO", "WARNING", "ERROR", "all") datasette = self.ds if message_type == "all": @@ -688,11 +688,11 @@ class CreateTokenView(BaseView): async def post(self, request): self.check_permission(request) - post = await request.post_vars() + form = await request.form() errors = [] expires_after = None - if post.get("expire_type"): - duration_string = post.get("expire_duration") + if form.get("expire_type"): + duration_string = form.get("expire_duration") if ( not duration_string or not duration_string.isdigit() @@ -700,7 +700,7 @@ class CreateTokenView(BaseView): ): errors.append("Invalid expire duration") else: - unit = post["expire_type"] + unit = form["expire_type"] if unit == "minutes": expires_after = int(duration_string) * 60 elif unit == "hours": @@ -715,7 +715,7 @@ class CreateTokenView(BaseView): restrict_database = {} restrict_resource = {} - for key in post: + for key in form: if key.startswith("all:") and key.count(":") == 1: restrict_all.append(key.split(":")[1]) elif key.startswith("database:") and key.count(":") == 2: diff --git a/docs/internals.rst b/docs/internals.rst index cfd78593..0491c1f7 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -52,10 +52,59 @@ The request object is passed to various plugin hooks. It represents an incoming ``.actor`` - dictionary (str -> Any) or None The currently authenticated actor (see :ref:`actors `), or ``None`` if the request is unauthenticated. -The object also has two awaitable methods: +The object also has the following awaitable methods: + +``await request.form(files=False, ...)`` - FormData + Parses form data from the request body. Supports both ``application/x-www-form-urlencoded`` and ``multipart/form-data`` content types. + + Returns a :ref:`internals_formdata` object with dict-like access to form fields and uploaded files. + + Requirements and errors: + + - A ``Content-Type`` header is required. Missing or unsupported content types raise ``BadRequest``. + - For ``multipart/form-data``, the ``boundary=...`` parameter is required. + + Parameters: + + - ``files`` (bool, default ``False``): If ``True``, uploaded files are stored and accessible. If ``False`` (default), file content is discarded but form fields are still available. + - ``max_file_size`` (int, default 50MB): Maximum size per uploaded file in bytes. + - ``max_request_size`` (int, default 100MB): Maximum total request body size in bytes. + - ``max_fields`` (int, default 1000): Maximum number of form fields. + - ``max_files`` (int, default 100): Maximum number of uploaded files. + - ``max_parts`` (int, default ``max_fields + max_files``): Maximum number of multipart parts in total. + - ``max_field_size`` (int, default 100KB): Maximum size of a text field value in bytes. + - ``max_memory_file_size`` (int, default 1MB): File size threshold before uploads spill to disk. + - ``max_part_header_bytes`` (int, default 16KB): Maximum total bytes allowed in part headers. + - ``max_part_header_lines`` (int, default 100): Maximum header lines per part. + - ``min_free_disk_bytes`` (int, default 50MB): Minimum free bytes required in the temp directory before accepting file uploads. + + Example usage: + + .. code-block:: python + + # Parse form fields only (files are discarded) + form = await request.form() + username = form["username"] + tags = form.getlist("tags") # For multiple values + + # Parse form fields AND files + form = await request.form(files=True) + uploaded = form["avatar"] + content = await uploaded.read() + print( + uploaded.filename, uploaded.content_type, uploaded.size + ) + + Cleanup note: + + When using ``files=True``, call ``await form.aclose()`` once you are done with the uploads + to ensure spooled temporary files are closed promptly. You can also use + ``async with form: ...`` for automatic cleanup. + + Don't forget to read about :ref:`internals_csrf`! ``await request.post_vars()`` - dictionary - Returns a dictionary of form variables that were submitted in the request body via ``POST``. Don't forget to read about :ref:`internals_csrf`! + Returns a dictionary of form variables that were submitted in the request body via ``POST`` using ``application/x-www-form-urlencoded`` encoding. For multipart forms or file uploads, use ``request.form()`` instead. ``await request.post_body()`` - bytes Returns the un-parsed body of a request submitted by ``POST`` - useful for things like incoming JSON data. @@ -117,6 +166,84 @@ Consider the query string ``?foo=1&foo=2&bar=3`` - with two values for ``foo`` a ``len(request.args)`` - integer Returns the number of keys. +.. _internals_formdata: + +The FormData class +================== + +``await request.form()`` returns a ``FormData`` object - a dictionary-like object which provides access to form fields and uploaded files. It has a similar interface to ``MultiParams``. + +``form[key]`` - string or UploadedFile + Returns the first value for that key, or raises a ``KeyError`` if the key is missing. + +``form.get(key)`` - string, UploadedFile, or None + Returns the first value for that key, or ``None`` if the key is missing. Pass a second argument to specify a different default. + +``form.getlist(key)`` - list + Returns the list of values for that key. If the key is missing an empty list will be returned. + +``form.keys()`` - list of strings + Returns the list of available keys. + +``key in form`` - True or False + You can use ``if key in form`` to check if a key is present. + +``for key in form`` - iterator + This lets you loop through every available key. + +``len(form)`` - integer + Returns the total number of submitted values. + +.. _internals_uploadedfile: + +The UploadedFile class +====================== + +When parsing multipart form data with ``files=True``, file uploads are returned as ``UploadedFile`` objects with the following properties and methods: + +``uploaded_file.name`` - string + The form field name. + +``uploaded_file.filename`` - string + The original filename provided by the client. Note: This is sanitized to remove path components for security. + +``uploaded_file.content_type`` - string or None + The MIME type of the uploaded file, if provided by the client. + +``uploaded_file.size`` - integer + The size of the uploaded file in bytes. + +``await uploaded_file.read(size=-1)`` - bytes + Read and return up to ``size`` bytes from the file. If ``size`` is -1 (default), read the entire file. + +``await uploaded_file.seek(offset, whence=0)`` - integer + Seek to the given position in the file. Returns the new position. + +``await uploaded_file.close()`` + Close the underlying file. This is called automatically when the object is garbage collected. + +Files smaller than 1MB are stored in memory. Larger files are automatically spilled to temporary files on disk and cleaned up when the request completes. + +Example: + +.. code-block:: python + + form = await request.form(files=True) + uploaded = form["document"] + + # Check file metadata + print(f"Filename: {uploaded.filename}") + print(f"Content-Type: {uploaded.content_type}") + print(f"Size: {uploaded.size} bytes") + + # Read file content + content = await uploaded.read() + + # Or read in chunks + await uploaded.seek(0) + while chunk := await uploaded.read(8192): + process_chunk(chunk) + .. _internals_response: Response class diff --git a/pyproject.toml b/pyproject.toml index 6fca673d..d9ef2a73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ dev = [ "pytest-timeout>=1.4.2", "trustme>=0.7", "cogapp>=3.3.0", + "multipart-form-data-conformance==0.1a0", "ruff>=0.9", # docs "Sphinx==7.4.7", diff --git a/tests/test_multipart.py b/tests/test_multipart.py new file mode 100644 index 00000000..0dc3ecd7 --- /dev/null +++ b/tests/test_multipart.py @@ -0,0 +1,1152 @@ +""" +Tests for request.form() multipart form data parsing. + +Uses TDD approach - these tests are written first, then implementation follows. +""" + +import base64 +import json +import pytest +from collections import namedtuple + +from multipart_form_data_conformance import get_tests_dir + +from datasette.utils.asgi import Request, BadRequest + + +def make_receive(body: bytes): + """Create an async receive callable that yields body in chunks.""" + consumed = False + + async def receive(): + nonlocal consumed + if consumed: + return {"type": "http.request", "body": b"", "more_body": False} + consumed = True + return {"type": "http.request", "body": body, "more_body": False} + + return receive + + +def make_chunked_receive(body: bytes, chunk_size: int = 64): + """Create an async receive callable that yields body in small chunks.""" + offset = 0 + + async def receive(): + nonlocal offset + chunk = body[offset : offset + chunk_size] + offset += chunk_size + more_body = offset < len(body) + return {"type": "http.request", "body": chunk, "more_body": more_body} + + return receive + + +def make_receive_with_noise(body: bytes): + """ + Create an async receive callable that includes an unexpected ASGI message. + + The parser should ignore the unknown message type and continue. + """ + messages = [ + {"type": "http.response.start", "status": 200, "headers": []}, + {"type": "http.request", "body": body, "more_body": False}, + ] + index = 0 + + async def receive(): + nonlocal index + if index >= len(messages): + return {"type": "http.request", "body": b"", "more_body": False} + message = messages[index] + index += 1 + return message + + return receive + + +def make_disconnect_receive(body: bytes, chunk_size: int = 64): + """ + Create an async receive callable that disconnects mid-request. + + The parser should raise on the disconnect. + """ + offset = 0 + disconnected = False + + async def receive(): + nonlocal offset, disconnected + if disconnected: + return {"type": "http.disconnect"} + chunk = body[offset : offset + chunk_size] + offset += chunk_size + more_body = offset < len(body) + if more_body: + disconnected = True + return {"type": "http.request", "body": chunk, "more_body": more_body} + + return receive + + +class TestFormUrlEncoded: + """Test request.form() with application/x-www-form-urlencoded data.""" + + @pytest.mark.asyncio + async def test_basic_form_fields(self): + """Basic URL-encoded form should be parseable via request.form().""" + body = b"username=john&password=secret" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["username"] == "john" + assert form["password"] == "secret" + + @pytest.mark.asyncio + async def test_form_with_multiple_values(self): + """Multiple values for same key should be accessible via getlist().""" + body = b"tag=python&tag=web&tag=api" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["tag"] == "python" # First value + assert form.getlist("tag") == ["python", "web", "api"] + + @pytest.mark.asyncio + async def test_empty_form(self): + """Empty form should return empty FormData.""" + body = b"" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert len(form) == 0 + + @pytest.mark.asyncio + async def test_form_with_special_characters(self): + """URL-encoded special characters should be decoded properly.""" + body = b"message=hello%20world&emoji=%F0%9F%91%8B" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["message"] == "hello world" + assert form["emoji"] == "👋" + + +class TestMultipartBasic: + """Test request.form() with multipart/form-data (fields only, no files).""" + + @pytest.mark.asyncio + async def test_single_text_field(self): + """Single text field in multipart should be parseable.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="username"\r\n' + b"\r\n" + b"john_doe\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["username"] == "john_doe" + + @pytest.mark.asyncio + async def test_multiple_text_fields(self): + """Multiple text fields in multipart should all be accessible.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="first_name"\r\n' + b"\r\n" + b"John\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="last_name"\r\n' + b"\r\n" + b"Doe\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["first_name"] == "John" + assert form["last_name"] == "Doe" + + @pytest.mark.asyncio + async def test_file_discarded_when_files_false(self): + """File content should be discarded when files=False (default).""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="title"\r\n' + b"\r\n" + b"My Document\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="doc.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"File content here\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="description"\r\n' + b"\r\n" + b"A sample document\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() # files=False is default + + # Text fields should be present + assert form["title"] == "My Document" + assert form["description"] == "A sample document" + # File should NOT be present + assert "file" not in form + + @pytest.mark.asyncio + async def test_chunked_body_parsing(self): + """Multipart should work when body arrives in small chunks.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="username"\r\n' + b"\r\n" + b"john_doe\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + # Use small chunks to test streaming parser + request = Request(scope, make_chunked_receive(body, chunk_size=16)) + + form = await request.form() + + assert form["username"] == "john_doe" + + +class TestMultipartWithFiles: + """Test request.form(files=True) for file uploads.""" + + @pytest.mark.asyncio + async def test_single_file_upload(self): + """Single file upload should create UploadedFile object.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="document"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello, World!\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + uploaded_file = form["document"] + assert uploaded_file.filename == "test.txt" + assert uploaded_file.content_type == "text/plain" + assert await uploaded_file.read() == b"Hello, World!" + assert uploaded_file.size == 13 + + @pytest.mark.asyncio + async def test_mixed_fields_and_files(self): + """Mixed form fields and files should all be accessible.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="title"\r\n' + b"\r\n" + b"My Document\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="doc.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Document content\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="description"\r\n' + b"\r\n" + b"A sample\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + # Text fields + assert form["title"] == "My Document" + assert form["description"] == "A sample" + # File + uploaded_file = form["file"] + assert uploaded_file.filename == "doc.txt" + assert await uploaded_file.read() == b"Document content" + + @pytest.mark.asyncio + async def test_multiple_files_same_name(self): + """Multiple files with same name should be accessible via getlist().""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="files"; filename="a.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"File A\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="files"; filename="b.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"File B\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + files = form.getlist("files") + assert len(files) == 2 + assert files[0].filename == "a.txt" + assert files[1].filename == "b.txt" + + @pytest.mark.asyncio + async def test_large_file_spills_to_disk(self): + """Files larger than threshold should spill to temp file.""" + boundary = "----TestBoundary123" + # Create a body larger than the in-memory threshold (1MB) + large_content = b"x" * (2 * 1024 * 1024) # 2MB + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="bigfile"; filename="large.bin"\r\n' + b"Content-Type: application/octet-stream\r\n" + b"\r\n" + large_content + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + uploaded_file = form["bigfile"] + assert uploaded_file.size == len(large_content) + # Content should still be readable + content = await uploaded_file.read() + assert content == large_content + + @pytest.mark.asyncio + async def test_uploaded_file_seek_and_read(self): + """UploadedFile should support seek and multiple reads.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello, World!\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + uploaded_file = form["file"] + + # First read + content1 = await uploaded_file.read() + assert content1 == b"Hello, World!" + + # Seek back to start + await uploaded_file.seek(0) + + # Second read + content2 = await uploaded_file.read() + assert content2 == b"Hello, World!" + + +class TestMultipartCleanup: + """Test deterministic cleanup of uploaded files.""" + + @pytest.mark.asyncio + async def test_formdata_close_closes_uploaded_files(self): + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + form = await request.form(files=True) + uploaded_file = form["file"] + + form.close() + + with pytest.raises(ValueError): + await uploaded_file.read() + + @pytest.mark.asyncio + async def test_formdata_async_context_manager_closes_files(self): + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + form = await request.form(files=True) + uploaded_file = form["file"] + + async with form: + pass + + with pytest.raises(ValueError): + await uploaded_file.read() + + +class TestMultipartEdgeCases: + """Test edge cases in multipart parsing.""" + + @pytest.mark.asyncio + async def test_empty_file_upload(self): + """Empty file (filename but no content) should be handled.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="empty.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + uploaded_file = form["file"] + assert uploaded_file.filename == "empty.txt" + assert uploaded_file.size == 0 + assert await uploaded_file.read() == b"" + + @pytest.mark.asyncio + async def test_filename_with_path(self): + """Filename containing path should extract just the filename.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="C:\\Users\\test\\doc.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"content\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + # Should extract just the filename, not the full path + uploaded_file = form["file"] + assert uploaded_file.filename == "doc.txt" + + @pytest.mark.asyncio + async def test_missing_content_type_header(self): + """Missing content-type in request should raise BadRequest.""" + body = b"some body" + scope = { + "type": "http", + "method": "POST", + "headers": [], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest): + await request.form() + + @pytest.mark.asyncio + async def test_invalid_content_type(self): + """Non-form content-type should raise BadRequest.""" + body = b'{"key": "value"}' + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/json"), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest): + await request.form() + + @pytest.mark.asyncio + async def test_missing_boundary(self): + """Multipart without boundary should raise BadRequest.""" + body = b"some body" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"multipart/form-data"), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest): + await request.form() + + +class TestSecurityLimits: + """Test security limits on form parsing.""" + + @pytest.mark.asyncio + async def test_max_fields_limit(self): + """Should reject requests with too many fields.""" + boundary = "----TestBoundary123" + # Create body with many fields + parts = [] + for i in range(1001): # Default max is 1000 + parts.append( + f"------TestBoundary123\r\n" + f'Content-Disposition: form-data; name="field{i}"\r\n' + f"\r\n" + f"value{i}\r\n" + ) + parts.append("------TestBoundary123--\r\n") + body = "".join(parts).encode() + + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="(?i)too many"): + await request.form(max_fields=1000) + + @pytest.mark.asyncio + async def test_max_file_size_limit(self): + """Should reject files exceeding size limit.""" + boundary = "----TestBoundary123" + large_content = b"x" * (11 * 1024 * 1024) # 11MB + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="big.bin"\r\n' + b"Content-Type: application/octet-stream\r\n" + b"\r\n" + large_content + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="(?i)file.*too large|too large"): + await request.form(files=True, max_file_size=10 * 1024 * 1024) + + @pytest.mark.asyncio + async def test_max_request_size_limit(self): + """Should reject requests exceeding total size limit.""" + boundary = "----TestBoundary123" + large_content = b"x" * (6 * 1024 * 1024) # 6MB + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="big.bin"\r\n' + b"Content-Type: application/octet-stream\r\n" + b"\r\n" + large_content + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="(?i)too large|request.*too large"): + await request.form(files=True, max_request_size=5 * 1024 * 1024) + + +class TestMultipartStrictnessAndLimits: + """Tests that enforce stricter ASGI and multipart behaviors.""" + + @pytest.mark.asyncio + async def test_multipart_truncated_body_is_error(self): + """Truncated multipart without closing boundary should raise.""" + boundary = "----TestBoundary123" + # Missing the final closing boundary line + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"\r\n' + b"\r\n" + b"value\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="Truncated multipart body"): + await request.form() + + @pytest.mark.asyncio + async def test_disconnect_mid_body_is_error(self): + """Client disconnect during body streaming should raise.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"\r\n' + b"\r\n" + b"value\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_disconnect_receive(body, chunk_size=16)) + + with pytest.raises(BadRequest, match="disconnected"): + await request.form() + + @pytest.mark.asyncio + async def test_unknown_asgi_message_type_is_ignored(self): + """Unexpected ASGI message types should be ignored.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"\r\n' + b"\r\n" + b"value\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive_with_noise(body)) + + form = await request.form() + assert form["field"] == "value" + + @pytest.mark.asyncio + async def test_max_files_enforced_even_when_files_false(self): + """File count limits should apply even when file handling is disabled.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="f1"; filename="a.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"a\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="f2"; filename="b.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"b\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="Too many files"): + await request.form(files=False, max_files=1) + + @pytest.mark.asyncio + async def test_max_parts_limit(self): + """Total part count should be bounded.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="a"\r\n' + b"\r\n" + b"1\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="b"\r\n' + b"\r\n" + b"2\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="Too many parts"): + await request.form(max_parts=1) + + @pytest.mark.asyncio + async def test_max_file_size_enforced_even_when_files_false(self): + """File size limits should apply even when file handling is disabled.""" + boundary = "----TestBoundary123" + big_content = b"x" * 2048 + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="big.bin"\r\n' + b"Content-Type: application/octet-stream\r\n" + b"\r\n" + big_content + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="File too large"): + await request.form(files=False, max_file_size=1024) + + @pytest.mark.asyncio + async def test_part_header_limits(self): + """Overly large part headers should be rejected.""" + boundary = "----TestBoundary123" + huge_header_value = "x" * 5000 + body = ( + b"------TestBoundary123\r\n" + + f'Content-Disposition: form-data; name="field"; foo="{huge_header_value}"\r\n'.encode() + + b"\r\n" + + b"value\r\n" + + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="headers too large"): + await request.form(max_part_header_bytes=1024) + + @pytest.mark.asyncio + async def test_insufficient_disk_space_rejects_upload(self, monkeypatch): + """Uploads should be rejected when free disk is below the floor.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + + DiskUsage = namedtuple("DiskUsage", ("total", "used", "free")) + monkeypatch.setattr( + "datasette.utils.multipart.shutil.disk_usage", + lambda path: DiskUsage(total=100, used=95, free=5), + ) + + request = Request(scope, make_receive(body)) + with pytest.raises(BadRequest, match="Insufficient disk space"): + await request.form(files=True, min_free_disk_bytes=50) + + @pytest.mark.asyncio + async def test_low_disk_space_does_not_block_field_only_forms(self, monkeypatch): + """Low disk space should not reject multipart forms with no file parts.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"\r\n' + b"\r\n" + b"value\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + + DiskUsage = namedtuple("DiskUsage", ("total", "used", "free")) + monkeypatch.setattr( + "datasette.utils.multipart.shutil.disk_usage", + lambda path: DiskUsage(total=100, used=99, free=1), + ) + + request = Request(scope, make_receive(body)) + form = await request.form(files=True, min_free_disk_bytes=50) + assert form["field"] == "value" + + @pytest.mark.asyncio + async def test_headers_without_newline_hit_header_byte_limit(self): + """Headers that never terminate should still hit the header byte limit.""" + boundary = "----TestBoundary123" + huge = b"x" * 5000 + # No CRLF is included after the header line + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"; foo="' + huge + b'"' + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="headers too large"): + await request.form(max_part_header_bytes=1024) + + +class TestFormDataLenSemantics: + """Test that FormData.__len__ reflects number of items, not unique keys.""" + + @pytest.mark.asyncio + async def test_len_counts_items(self): + body = b"tag=python&tag=web&tag=api" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + assert len(form) == 3 + + +# Conformance test suite using multipart-form-data-conformance + +# Tests where our parser intentionally differs from strict spec for security/practicality +# Our parser sanitizes filenames (strips paths) while the conformance suite expects raw +FILENAME_SANITIZATION_TESTS = { + "026-filename-with-backslash", # We preserve backslashes but they test expects raw + "029-filename-path-traversal", # We strip path components for security +} + +# Tests for optional/lenient features we don't implement +OPTIONAL_TESTS = { + "085-header-folding", # Obsolete header folding feature +} + +# Tests for malformed input where we're lenient instead of erroring +LENIENT_PARSING_TESTS = { + "203-missing-content-disposition", + "204-invalid-content-disposition", +} + + +def load_conformance_test_cases(): + """Load all test cases from multipart-form-data-conformance.""" + tests_dir = get_tests_dir() + test_cases = [] + + for category_dir in sorted(tests_dir.iterdir()): + if not category_dir.is_dir(): + continue + for test_dir in sorted(category_dir.iterdir()): + if not test_dir.is_dir(): + continue + test_json = test_dir / "test.json" + headers_json = test_dir / "headers.json" + input_raw = test_dir / "input.raw" + + if not all(f.exists() for f in [test_json, headers_json, input_raw]): + continue + + with open(test_json) as f: + test_spec = json.load(f) + with open(headers_json) as f: + headers = json.load(f) + with open(input_raw, "rb") as f: + body = f.read() + + test_id = test_spec["id"] + + # Add marks for tests we handle differently + marks = [] + if test_id in FILENAME_SANITIZATION_TESTS: + marks.append( + pytest.mark.xfail(reason="Parser sanitizes filenames for security") + ) + elif test_id in OPTIONAL_TESTS: + marks.append( + pytest.mark.xfail(reason="Optional feature not implemented") + ) + elif test_id in LENIENT_PARSING_TESTS: + marks.append( + pytest.mark.xfail(reason="Parser is lenient with malformed input") + ) + + test_cases.append( + pytest.param( + test_spec, + headers, + body, + id=test_id, + marks=marks, + ) + ) + + return test_cases + + +CONFORMANCE_TEST_CASES = load_conformance_test_cases() + + +@pytest.mark.parametrize("test_spec,headers,body", CONFORMANCE_TEST_CASES) +@pytest.mark.asyncio +async def test_conformance(test_spec, headers, body): + """ + Run conformance test cases from multipart-form-data-conformance. + + Each test case specifies: + - headers: HTTP headers including Content-Type with boundary + - body: Raw multipart body bytes + - expected: Expected parse result (valid/invalid, parts list) + """ + scope = { + "type": "http", + "method": "POST", + "headers": [(k.encode(), v.encode()) for k, v in headers.items()], + } + request = Request(scope, make_receive(body)) + + expected = test_spec["expected"] + + if not expected["valid"]: + # Should raise an error for invalid input + with pytest.raises((BadRequest, ValueError)): + await request.form(files=True) + return + + # Parse form data + form = await request.form(files=True) + + # Verify each expected part + for i, expected_part in enumerate(expected["parts"]): + name = expected_part["name"] + + # Get value(s) for this name + values = form.getlist(name) + + # Find the value at the correct index for this name + # (handles multiple values with same name) + same_name_count = sum(1 for p in expected["parts"][:i] if p["name"] == name) + + if same_name_count >= len(values): + pytest.fail( + f"Expected part {name} at index {same_name_count} but only {len(values)} found" + ) + + value = values[same_name_count] + + # Determine expected content + if "body_base64" in expected_part: + expected_content = base64.b64decode(expected_part["body_base64"]) + elif "body_text" in expected_part: + expected_content = expected_part["body_text"].encode("utf-8") + else: + expected_content = None + + # Check for file vs field + # A part is a file if it has a filename OR filename_star + is_file = ( + expected_part.get("filename") is not None + or expected_part.get("filename_star") is not None + ) + + if is_file: + # It's a file + assert hasattr(value, "filename"), f"Expected file for {name}" + + # Check filename - use filename_star if present, else filename + expected_filename = expected_part.get("filename_star") or expected_part.get( + "filename" + ) + if expected_filename: + assert ( + value.filename == expected_filename + ), f"Filename mismatch: expected {expected_filename!r}, got {value.filename!r}" + + if expected_part.get("content_type"): + assert value.content_type == expected_part["content_type"] + + content = await value.read() + assert ( + len(content) == expected_part["body_size"] + ), f"Size mismatch: expected {expected_part['body_size']}, got {len(content)}" + if expected_content is not None: + assert content == expected_content + else: + # It's a text field + if hasattr(value, "filename"): + pytest.fail(f"Expected text field for {name}, got file") + + if expected_content is not None: + # For text fields, value is a string + try: + expected_text = expected_content.decode("utf-8") + except UnicodeDecodeError: + expected_text = expected_content.decode("latin-1") + assert ( + value == expected_text + ), f"Value mismatch: expected {expected_text!r}, got {value!r}" From b771e930bc16e128b48da80c9ccbba20cba177b5 Mon Sep 17 00:00:00 2001 From: Daniel Olasubomi Sobowale Date: Wed, 28 Jan 2026 20:41:58 -0600 Subject: [PATCH 33/53] Fix filter-input and search-input zoom on iOS Safari Closes #2346 --- .gitignore | 2 ++ datasette/static/app.css | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ce256606..12acd87e 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,5 @@ node_modules tests/*.dylib tests/*.so tests/*.dll + +.idea \ No newline at end of file diff --git a/datasette/static/app.css b/datasette/static/app.css index a3117152..a7fc7fa3 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -647,10 +647,14 @@ button.core[type=button] { border-radius: 3px; -webkit-appearance: none; padding: 9px 4px; - font-size: 1em; + font-size: 16px; font-family: Helvetica, sans-serif; } +#_search { + font-size: 16px; +} + From 5873578d49a894e358f8480fee27e17e37f6c97e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 29 Jan 2026 09:00:22 -0800 Subject: [PATCH 34/53] Release 1.0a24 Refs #2050, #2346, #2608, #2609, #2610, #2611, #2613, #2619, #2624, #2627, #2628, #2629, #2630, #2632 --- datasette/version.py | 2 +- docs/changelog.rst | 55 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index fff37a72..de7585ca 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a23" +__version__ = "1.0a24" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index feba7e86..67ceeece 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,61 @@ Changelog ========= +.. _v1_0_a24: + +1.0a24 (2026-01-29) +------------------- + +``request.form()`` method for POST data and file uploads +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Datasette now includes a ``request.form()`` method for parsing form submissions, including handling file uploads. (`#2626 `__) + +This supports both ``application/x-www-form-urlencoded`` and ``multipart/form-data`` content types, and uses a new streaming multipart parser that processes uploads without buffering entire request bodies in memory. + +.. code-block:: python + + # Parse form fields (files are discarded by default) + form = await request.form() + username = form["username"] + + # Parse form fields AND file uploads + form = await request.form(files=True) + uploaded = form["avatar"] + content = await uploaded.read() + +The returned :ref:`FormData ` object provides dictionary-style access with support for multiple values per key via ``form.getlist("key")``. Uploaded files are represented as :ref:`UploadedFile ` objects with ``filename``, ``content_type``, ``size`` properties and async ``read()`` and ``seek()`` methods. + +Files smaller than 1MB are held in memory; larger files automatically spill to temporary files on disk. Configurable limits control maximum file size, request size, field counts and more. + +Several internal views (permissions debug, messages debug, create token) now use ``request.form()`` instead of ``request.post_vars()``. + +``request.post_vars()`` remains available for backwards compatibility but is no longer the recommended API for handling POST data. + +``render_cell`` and ``foreign_key_tables`` extras for the JSON API +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The table JSON API now supports ``?_extra=render_cell``, which returns the rendered HTML for each cell as produced by the :ref:`render_cell plugin hook `. Only columns whose rendered output differs from the default are included. (:issue:`2619`) + +The row JSON API also gains ``?_extra=render_cell`` and ``?_extra=foreign_key_tables`` extras, bringing it closer to parity with the table API. + +The row JSON API now returns ``"ok": true`` in its response, for consistency with the table API. + +``uv run pytest`` with a ``dev=`` dependency group +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The recommended development environment for Datasette now uses `uv `__. You can now set up a development environment and run the test suite with just ``uv run pytest`` — no manual virtualenv or ``pip install`` step required. (:issue:`2611`) + +Other changes +~~~~~~~~~~~~~ + +- Plugins that raise ``datasette.utils.StartupError()`` during startup now display a clean error message instead of a full traceback. (:issue:`2624`) +- Schema refreshes are now throttled to at most once per second, providing a small performance increase. (:issue:`2629`) +- Minor performance improvement to ``remove_infinites`` — rows without infinity values now skip the list/dict reconstruction step. (:issue:`2629`) +- Filter inputs and the search input no longer trigger unwanted zoom on iOS Safari. Thanks, `Daniel Olasubomi Sobowale `__. (:issue:`2346`) +- ``table_names()`` and ``get_all_foreign_keys()`` now return results in deterministic sorted order. (:issue:`2628`) +- Switched linting to `ruff `__ and fixed all lint errors. (:issue:`2630`) + .. _v1_0_a23: 1.0a23 (2025-12-02) From 80b7f987cad59113896f28a29828ffe856218216 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Feb 2026 13:20:33 -0800 Subject: [PATCH 35/53] write_wrapper plugin hook for intercepting write operations (#2636) * Implement write_wrapper plugin hook for intercepting database writes Add a new `write_wrapper` plugin hook that lets plugins wrap write operations with before/after logic using a generator-based context manager pattern. The hook receives (datasette, database, request, transaction) and returns a generator function that takes a conn, yields once to let the write execute, and can run cleanup after. The write result is sent back via `generator.send()` and exceptions are thrown via `generator.throw()`, giving plugins full visibility. Also adds `request=None` parameter to execute_write, execute_write_fn, execute_write_script, and execute_write_many, and threads request through all view-layer call sites (insert, upsert, update, delete, drop, create table, canned queries). * Add documentation for wrap_write hook, fix lint issues Document the wrap_write plugin hook in plugin_hooks.rst with parameter descriptions and two examples: a simple logging wrapper and an advanced SQLite authorizer-based table protection pattern. Also fix black formatting and remove unused variable flagged by ruff. * Rename wrap_write hook to write_wrapper for consistency with asgi_wrapper * Move write_wrapper docs to just below prepare_connection * Refactor write_wrapper tests to use pytest.parametrize Consolidate duplicate test cases: merge before/after tests for execute_write_fn and execute_write into one parametrized test, and merge three parameter-passing tests into one parametrized test. Claude Code transcript: https://gisthost.github.io/?c4c12079434e69677e4aa8ac664b21b8/index.html --- datasette/database.py | 77 ++++++- datasette/hookspecs.py | 22 ++ datasette/views/database.py | 6 +- datasette/views/row.py | 4 +- datasette/views/table.py | 4 +- docs/plugin_hooks.rst | 87 ++++++++ tests/test_plugins.py | 30 +++ tests/test_write_wrapper.py | 387 ++++++++++++++++++++++++++++++++++++ 8 files changed, 604 insertions(+), 13 deletions(-) create mode 100644 tests/test_write_wrapper.py diff --git a/datasette/database.py b/datasette/database.py index 8e4ee2b6..1e6f9032 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -130,25 +130,25 @@ class Database: for connection in self._all_file_connections: connection.close() - async def execute_write(self, sql, params=None, block=True): + async def execute_write(self, sql, params=None, block=True, request=None): def _inner(conn): return conn.execute(sql, params or []) with trace("sql", database=self.name, sql=sql.strip(), params=params): - results = await self.execute_write_fn(_inner, block=block) + results = await self.execute_write_fn(_inner, block=block, request=request) return results - async def execute_write_script(self, sql, block=True): + async def execute_write_script(self, sql, block=True, request=None): def _inner(conn): return conn.executescript(sql) with trace("sql", database=self.name, sql=sql.strip(), executescript=True): results = await self.execute_write_fn( - _inner, block=block, transaction=False + _inner, block=block, transaction=False, request=request ) return results - async def execute_write_many(self, sql, params_seq, block=True): + async def execute_write_many(self, sql, params_seq, block=True, request=None): def _inner(conn): count = 0 @@ -163,7 +163,9 @@ class Database: with trace( "sql", database=self.name, sql=sql.strip(), executemany=True ) as kwargs: - results, count = await self.execute_write_fn(_inner, block=block) + results, count = await self.execute_write_fn( + _inner, block=block, request=request + ) kwargs["count"] = count return results @@ -187,7 +189,8 @@ class Database: # Threaded mode - send to write thread return await self._send_to_write_thread(fn, isolated_connection=True) - async def execute_write_fn(self, fn, block=True, transaction=True): + async def execute_write_fn(self, fn, block=True, transaction=True, request=None): + fn = self._wrap_fn_with_hooks(fn, request, transaction) if self.ds.executor is None: # non-threaded mode if self._write_connection is None: @@ -203,6 +206,25 @@ class Database: fn, block=block, transaction=transaction ) + def _wrap_fn_with_hooks(self, fn, request, transaction): + from .plugins import pm + + wrappers = pm.hook.write_wrapper( + datasette=self.ds, + database=self.name, + request=request, + transaction=transaction, + ) + wrappers = [w for w in wrappers if w is not None] + if not wrappers: + return fn + # Build the wrapped fn by nesting context manager generators. + # The first wrapper returned by pluggy is outermost. + original_fn = fn + for wrapper_factory in reversed(wrappers): + original_fn = _apply_write_wrapper(original_fn, wrapper_factory) + return original_fn + async def _send_to_write_thread( self, fn, block=True, isolated_connection=False, transaction=True ): @@ -680,6 +702,47 @@ class Database: return f"" +def _apply_write_wrapper(fn, wrapper_factory): + """Apply a single write_wrapper context manager around fn. + + ``wrapper_factory`` is a callable that takes ``(conn)`` and returns a + generator that yields exactly once. Code before the yield runs before + ``fn(conn)``, code after the yield runs after. The result of + ``fn(conn)`` is sent into the generator via ``.send()``, and any + exception raised by ``fn(conn)`` is thrown via ``.throw()``. + """ + + def wrapped(conn): + gen = wrapper_factory(conn) + # Advance to the yield point (run "before" code) + try: + next(gen) + except StopIteration: + # Generator didn't yield — just run fn unchanged + return fn(conn) + + # Execute the actual write + try: + result = fn(conn) + except Exception: + # Throw exception into generator so it can handle it + try: + gen.throw(*sys.exc_info()) + except StopIteration: + pass + # Re-raise the original exception + raise + else: + # Send the result back through the yield + try: + gen.send(result) + except StopIteration: + pass + return result + + return wrapped + + class WriteTask: __slots__ = ("fn", "task_id", "reply_queue", "isolated_connection", "transaction") diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 3f6a1425..b993fb61 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -220,3 +220,25 @@ def top_query(datasette, request, database, sql): @hookspec def top_canned_query(datasette, request, database, query_name): """HTML to include at the top of the canned query page""" + + +@hookspec +def write_wrapper(datasette, database, request, transaction): + """Called when a write function is about to execute. + + Return a generator function that accepts a ``conn`` argument. + The generator should ``yield`` exactly once: code before the + ``yield`` runs before the write, code after the ``yield`` runs + after the write completes. The result of the write is sent + back through the ``yield``, so you can capture it with + ``result = yield``. + + If the write raises an exception, it is thrown into the generator + so you can handle it with a try/except around the ``yield``. + + ``request`` may be ``None`` for writes not originating from an + HTTP request. ``transaction`` is ``True`` if the write will + be wrapped in a transaction. + + Return ``None`` to skip wrapping. + """ diff --git a/datasette/views/database.py b/datasette/views/database.py index 51c752a0..e5f2cf16 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -466,7 +466,9 @@ class QueryView(View): ok = None redirect_url = None try: - cursor = await db.execute_write(canned_query["sql"], params_for_query) + cursor = await db.execute_write( + canned_query["sql"], params_for_query, request=request + ) # success message can come from on_success_message or on_success_message_sql message = None message_type = datasette.INFO @@ -1119,7 +1121,7 @@ class TableCreateView(BaseView): return table.schema try: - schema = await db.execute_write_fn(create_table) + schema = await db.execute_write_fn(create_table, request=request) except Exception as e: return _error([str(e)]) diff --git a/datasette/views/row.py b/datasette/views/row.py index 718ee00c..ff0a3594 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -245,7 +245,7 @@ class RowDeleteView(BaseView): sqlite_utils.Database(conn)[resolved.table].delete(resolved.pk_values) try: - await resolved.db.execute_write_fn(delete_row) + await resolved.db.execute_write_fn(delete_row, request=request) except Exception as e: return _error([str(e)], 500) @@ -305,7 +305,7 @@ class RowUpdateView(BaseView): ) try: - await resolved.db.execute_write_fn(update_row) + await resolved.db.execute_write_fn(update_row, request=request) except Exception as e: return _error([str(e)], 400) diff --git a/datasette/views/table.py b/datasette/views/table.py index b07b62ae..d4dbc194 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -550,7 +550,7 @@ class TableInsertView(BaseView): method_all(rows, **kwargs) try: - rows = await db.execute_write_fn(insert_or_upsert_rows) + rows = await db.execute_write_fn(insert_or_upsert_rows, request=request) except Exception as e: return _error([str(e)]) result = {"ok": True} @@ -670,7 +670,7 @@ class TableDropView(BaseView): def drop_table(conn): sqlite_utils.Database(conn)[table_name].drop() - await db.execute_write_fn(drop_table) + await db.execute_write_fn(drop_table, request=request) await self.ds.track_event( DropTableEvent( actor=request.actor, database=database_name, table=table_name diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index ad4a70f8..468b0ade 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -61,6 +61,92 @@ arguments and can be called like this:: Examples: `datasette-jellyfish `__, `datasette-jq `__, `datasette-haversine `__, `datasette-rure `__ +.. _plugin_hook_write_wrapper: + +write_wrapper(datasette, database, request, transaction) +-------------------------------------------------------- + +``datasette`` - :ref:`internals_datasette` + You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``. + +``database`` - string + The name of the database being written to. + +``request`` - :ref:`internals_request` or ``None`` + The HTTP request that triggered this write, if available. This will be ``None`` for writes that do not originate from an HTTP request (e.g. writes triggered by plugins during startup). + +``transaction`` - bool + ``True`` if the write will be wrapped in a database transaction. + +Return a generator function that accepts a ``conn`` argument (a SQLite connection object). The generator should ``yield`` exactly once. Code before the ``yield`` runs before the write function executes; code after the ``yield`` runs after it completes. + +The result of the write function is sent back through the ``yield``, so you can capture it with ``result = yield``. + +If the write function raises an exception, it is thrown into the generator so you can handle it with a ``try`` / ``except`` around the ``yield``. + +Return ``None`` to skip wrapping for this particular write. + +This example logs every write operation: + +.. code-block:: python + + from datasette import hookimpl + + + @hookimpl + def write_wrapper(datasette, database, request): + def wrapper(conn): + print(f"Before write to {database}") + result = yield + print(f"After write to {database}") + + return wrapper + +This more advanced example uses the SQLite authorizer callback to block writes to a specific table for non-admin users: + +.. code-block:: python + + import sqlite3 + from datasette import hookimpl + + WRITE_ACTIONS = ( + sqlite3.SQLITE_INSERT, + sqlite3.SQLITE_UPDATE, + sqlite3.SQLITE_DELETE, + ) + + + @hookimpl + def write_wrapper(datasette, database, request): + actor = None + if request: + actor = request.actor + if actor and actor.get("id") == "admin": + return None + + def wrapper(conn): + def authorizer( + action, arg1, arg2, db_name, trigger + ): + if ( + action in WRITE_ACTIONS + and arg1 == "protected_table" + ): + return sqlite3.SQLITE_DENY + return sqlite3.SQLITE_OK + + conn.set_authorizer(authorizer) + try: + yield + finally: + conn.set_authorizer(None) + + return wrapper + +The ``conn`` object passed to the generator is the same connection that the write function will use. Because the generator and the write function execute together in a single call on the write thread, any state you set on the connection (authorizers, pragmas, temporary tables) is visible to the write and can be cleaned up afterwards. + +When multiple plugins implement ``write_wrapper``, they are nested following pluggy's default calling convention. + .. _plugin_hook_prepare_jinja2_environment: prepare_jinja2_environment(env, datasette) @@ -2249,3 +2335,4 @@ The plugin can then call ``datasette.track_event(...)`` to send a ``ban-user`` e await datasette.track_event( BanUserEvent(user={"id": 1, "username": "cleverbot"}) ) + diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 6c23b3ef..7c2180e8 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1524,6 +1524,36 @@ async def test_hook_register_events(): assert any(k.__name__ == "OneEvent" for k in datasette.event_classes) +@pytest.mark.asyncio +async def test_hook_write_wrapper(): + datasette = Datasette(memory=True) + log = [] + + class WrapWritePlugin: + __name__ = "WrapWritePlugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + if database != "_memory": + return None + + def wrapper(conn): + log.append("before") + yield + log.append("after") + + return wrapper + + pm.register(WrapWritePlugin(), name="WrapWritePluginTest") + try: + db = datasette.get_database("_memory") + await db.execute_write("create table t (id integer primary key)") + assert log == ["before", "after"] + finally: + pm.unregister(name="WrapWritePluginTest") + + @pytest.mark.asyncio async def test_hook_register_actions_view_collection(): datasette = Datasette(memory=True, plugins_dir=PLUGINS_DIR) diff --git a/tests/test_write_wrapper.py b/tests/test_write_wrapper.py new file mode 100644 index 00000000..e05a2a9f --- /dev/null +++ b/tests/test_write_wrapper.py @@ -0,0 +1,387 @@ +""" +Tests for the write_wrapper plugin hook. +""" + +from datasette.app import Datasette +from datasette.hookspecs import hookimpl +from datasette.plugins import pm +import pytest +import time + + +@pytest.fixture +def datasette(tmp_path): + db_path = str(tmp_path / "test.db") + ds = Datasette([db_path]) + return ds + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "use_execute_write", + (False, True), + ids=["execute_write_fn", "execute_write"], +) +async def test_write_wrapper_before_and_after(datasette, use_execute_write): + """Test that code before and after yield both execute.""" + log = [] + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + log.append("before") + yield + log.append("after") + + return wrapper + + pm.register(Plugin(), name="test_before_after") + try: + db = datasette.get_database("test") + if use_execute_write: + await db.execute_write( + "create table if not exists t (id integer primary key)" + ) + else: + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t (id integer primary key)" + ) + ) + assert log == ["before", "after"] + finally: + pm.unregister(name="test_before_after") + + +@pytest.mark.asyncio +async def test_write_wrapper_receives_result_via_yield(datasette): + """Test that the result of fn(conn) is sent back through yield.""" + captured = {} + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + result = yield + captured["result"] = result + + return wrapper + + pm.register(Plugin(), name="test_result") + try: + db = datasette.get_database("test") + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t2 (id integer primary key)" + ) + ) + assert "result" in captured + # Should be a sqlite3 Cursor + assert captured["result"] is not None + finally: + pm.unregister(name="test_result") + + +@pytest.mark.asyncio +async def test_write_wrapper_exception_thrown_into_generator(datasette): + """Test that exceptions from fn(conn) are thrown into the generator.""" + caught = {} + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + try: + yield + except Exception as e: + caught["error"] = e + + return wrapper + + pm.register(Plugin(), name="test_exception") + try: + db = datasette.get_database("test") + with pytest.raises(Exception, match="deliberate"): + await db.execute_write_fn( + lambda conn: (_ for _ in ()).throw(Exception("deliberate")) + ) + assert "error" in caught + assert str(caught["error"]) == "deliberate" + finally: + pm.unregister(name="test_exception") + + +@pytest.mark.asyncio +async def test_write_wrapper_conn_is_usable(datasette): + """Test that the conn passed to the wrapper can execute SQL.""" + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + conn.execute("create table if not exists hook_log (msg text)") + conn.execute("insert into hook_log values ('before')") + yield + conn.execute("insert into hook_log values ('after')") + + return wrapper + + pm.register(Plugin(), name="test_conn") + try: + db = datasette.get_database("test") + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t3 (id integer primary key)" + ) + ) + result = await db.execute("select msg from hook_log order by rowid") + messages = [row[0] for row in result.rows] + assert messages == ["before", "after"] + finally: + pm.unregister(name="test_conn") + + +@pytest.mark.asyncio +async def test_write_wrapper_multiple_plugins_nest(datasette): + """Test that multiple write_wrapper plugins nest correctly.""" + log = [] + + class PluginA: + __name__ = "PluginA" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + log.append("A-before") + yield + log.append("A-after") + + return wrapper + + class PluginB: + __name__ = "PluginB" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + log.append("B-before") + yield + log.append("B-after") + + return wrapper + + pm.register(PluginA(), name="PluginA") + pm.register(PluginB(), name="PluginB") + try: + db = datasette.get_database("test") + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t4 (id integer primary key)" + ) + ) + assert set(log) == {"A-before", "A-after", "B-before", "B-after"} + # Verify proper nesting: each plugin's before/after should be + # symmetric around the write + a_before = log.index("A-before") + a_after = log.index("A-after") + b_before = log.index("B-before") + b_after = log.index("B-after") + if a_before < b_before: + assert a_after > b_after, "A is outer so A-after should come after B-after" + else: + assert b_after > a_after, "B is outer so B-after should come after A-after" + finally: + pm.unregister(name="PluginA") + pm.unregister(name="PluginB") + + +@pytest.mark.asyncio +async def test_write_wrapper_return_none_skips(datasette): + """Test that returning None from write_wrapper means no wrapping.""" + log = [] + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + log.append("hook-called") + return None + + pm.register(Plugin(), name="test_skip") + try: + db = datasette.get_database("test") + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t5 (id integer primary key)" + ) + ) + assert log == ["hook-called"] + finally: + pm.unregister(name="test_skip") + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "request_value,transaction_value,expected_request,expected_transaction", + ( + ("fake-request", True, "fake-request", True), + (None, True, None, True), + (None, False, None, False), + ), + ids=["with-request", "request-none-by-default", "transaction-false"], +) +async def test_write_wrapper_hook_parameters( + datasette, + request_value, + transaction_value, + expected_request, + expected_transaction, +): + """Test that request and transaction parameters are passed through.""" + captured = {} + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + captured["request"] = request + captured["database"] = database + captured["transaction"] = transaction + + pm.register(Plugin(), name="test_params") + try: + db = datasette.get_database("test") + kwargs = {"transaction": transaction_value} + if request_value is not None: + kwargs["request"] = request_value + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t6 (id integer primary key)" + ), + **kwargs, + ) + assert captured["request"] == expected_request + assert captured["database"] == "test" + assert captured["transaction"] == expected_transaction + finally: + pm.unregister(name="test_params") + + +@pytest.mark.asyncio +async def test_write_wrapper_via_api(tmp_path): + """Test that write_wrapper fires for API write operations.""" + log = [] + + db_path = str(tmp_path / "test.db") + ds = Datasette([db_path], pdb=False) + ds.root_enabled = True + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + if database != "test": + return None + + def wrapper(conn): + log.append("before") + yield + log.append("after") + + return wrapper + + pm.register(Plugin(), name="test_api") + try: + db = ds.get_database("test") + await db.execute_write( + "create table if not exists api_test (id integer primary key, name text)" + ) + log.clear() + + token = "dstok_{}".format( + ds.sign( + {"a": "root", "token": "dstok", "t": int(time.time())}, + namespace="token", + ) + ) + response = await ds.client.post( + "/test/api_test/-/insert", + json={"row": {"name": "test"}, "return": True}, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + assert response.status_code == 201, response.json() + assert log == ["before", "after"] + finally: + pm.unregister(name="test_api") + + +@pytest.mark.asyncio +async def test_write_wrapper_change_group_pattern(datasette): + """Test the motivating use case: activating a change group around a write.""" + db = datasette.get_database("test") + + await db.execute_write( + "create table if not exists groups (id integer primary key, current integer)" + ) + await db.execute_write( + "create table if not exists data (id integer primary key, value text)" + ) + await db.execute_write("insert into groups (id, current) values (1, null)") + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + if request and getattr(request, "group_id", None): + group_id = request.group_id + + def wrapper(conn): + conn.execute( + "update groups set current = 1 where id = ?", [group_id] + ) + yield + conn.execute("update groups set current = null where current = 1") + + return wrapper + + pm.register(Plugin(), name="test_change_group") + try: + + class FakeRequest: + group_id = 1 + + await db.execute_write_fn( + lambda conn: conn.execute("insert into data (value) values ('test')"), + request=FakeRequest(), + ) + + result = await db.execute("select current from groups where id = 1") + assert result.rows[0][0] is None + finally: + pm.unregister(name="test_change_group") From 8a315f3d7df8c668fdca216bbb55fe7ef44626dd Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Feb 2026 13:27:23 -0800 Subject: [PATCH 36/53] Added a test to exercise the write_wrapper example This example in the docs is now dulicated in a test: https://github.com/simonw/datasette/blob/80b7f987cad59113896f28a29828ffe856218216/docs/plugin_hooks.rst#write-wrapper-datasette-database-request-transaction Refs #2637 --- tests/test_write_wrapper.py | 90 +++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tests/test_write_wrapper.py b/tests/test_write_wrapper.py index e05a2a9f..38e5c94e 100644 --- a/tests/test_write_wrapper.py +++ b/tests/test_write_wrapper.py @@ -6,6 +6,7 @@ from datasette.app import Datasette from datasette.hookspecs import hookimpl from datasette.plugins import pm import pytest +import sqlite3 import time @@ -385,3 +386,92 @@ async def test_write_wrapper_change_group_pattern(datasette): assert result.rows[0][0] is None finally: pm.unregister(name="test_change_group") + + +WRITE_ACTIONS = ( + sqlite3.SQLITE_INSERT, + sqlite3.SQLITE_UPDATE, + sqlite3.SQLITE_DELETE, +) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "actor,table,should_deny", + ( + (None, "protected_table", True), + ({"id": "regular"}, "protected_table", True), + ({"id": "admin"}, "protected_table", False), + (None, "other_table", False), + ({"id": "regular"}, "other_table", False), + ), + ids=[ + "no-actor-protected", + "regular-user-protected", + "admin-protected", + "no-actor-other", + "regular-user-other", + ], +) +async def test_write_wrapper_set_authorizer(datasette, actor, table, should_deny): + """Test the docs example that uses set_authorizer to block writes to a protected table.""" + db = datasette.get_database("test") + await db.execute_write( + "create table if not exists protected_table (id integer primary key, value text)" + ) + await db.execute_write( + "create table if not exists other_table (id integer primary key, value text)" + ) + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + actor = None + if request: + actor = request.actor + if actor and actor.get("id") == "admin": + return None + + def wrapper(conn): + def authorizer(action, arg1, arg2, db_name, trigger): + if action in WRITE_ACTIONS and arg1 == "protected_table": + return sqlite3.SQLITE_DENY + return sqlite3.SQLITE_OK + + conn.set_authorizer(authorizer) + try: + yield + finally: + conn.set_authorizer(None) + + return wrapper + + class FakeRequest: + def __init__(self, actor): + self.actor = actor + + pm.register(Plugin(), name="test_set_authorizer") + try: + request = FakeRequest(actor) + if should_deny: + with pytest.raises(Exception): + await db.execute_write_fn( + lambda conn: conn.execute( + f"insert into {table} (value) values ('test')" + ), + request=request, + ) + else: + await db.execute_write_fn( + lambda conn: conn.execute( + f"insert into {table} (value) values ('test')" + ), + request=request, + ) + result = await db.execute(f"select value from {table} order by rowid desc limit 1") + assert result.rows[0][0] == "test" + finally: + pm.unregister(name="test_set_authorizer") From 170f9de774fd3d7487a40c9f67dc12a2c626e96e Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 18:21:25 +0000 Subject: [PATCH 37/53] Add pks parameter to render_cell() plugin hook The render_cell() hook now receives a pks parameter containing the list of primary key column names for the table being rendered. This avoids plugins needing to make redundant async calls to look up primary keys. For tables without an explicit primary key, pks is ["rowid"]. For custom SQL queries and views, pks is an empty list []. https://claude.ai/code/session_01HFYfevAziq4fSYTNRD9ZCh --- datasette/hookspecs.py | 2 +- datasette/views/database.py | 1 + datasette/views/row.py | 1 + datasette/views/table.py | 3 +++ docs/plugin_hooks.rst | 9 +++++--- tests/fixtures.py | 2 ++ tests/plugins/my_plugin.py | 3 ++- tests/test_plugins.py | 46 +++++++++++++++++++++++++++++++++++++ 8 files changed, 62 insertions(+), 5 deletions(-) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index b993fb61..89be6a65 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -55,7 +55,7 @@ def publish_subcommand(publish): @hookspec -def render_cell(row, value, column, table, database, datasette, request): +def render_cell(row, value, column, table, pks, database, datasette, request): """Customize rendering of HTML table cell values""" diff --git a/datasette/views/database.py b/datasette/views/database.py index e5f2cf16..a42ac758 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1205,6 +1205,7 @@ async def display_rows(datasette, database, request, rows, columns): value=value, column=column, table=None, + pks=[], database=database, datasette=datasette, request=request, diff --git a/datasette/views/row.py b/datasette/views/row.py index ff0a3594..9c59cd3b 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -130,6 +130,7 @@ class RowView(DataView): value=value, column=column, table=table, + pks=resolved.pks, database=database, datasette=self.ds, request=request, diff --git a/datasette/views/table.py b/datasette/views/table.py index d4dbc194..594e925e 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -235,6 +235,7 @@ async def display_columns_and_rows( value=value, column=column, table=table_name, + pks=pks_for_display, database=database_name, datasette=datasette, request=request, @@ -1494,6 +1495,7 @@ async def table_view_data( async def extra_render_cell(): "Rendered HTML for each cell using the render_cell plugin hook" + pks_for_display = pks if pks else (["rowid"] if not is_view else []) columns = [col[0] for col in results.description] rendered_rows = [] for row in rows: @@ -1506,6 +1508,7 @@ async def table_view_data( value=value, column=column, table=table_name, + pks=pks_for_display, database=database_name, datasette=datasette, request=request, diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 468b0ade..068469a8 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -9,7 +9,7 @@ Each plugin can implement one or more hooks using the ``@hookimpl`` decorator ag When you implement a plugin hook you can accept any or all of the parameters that are documented as being passed to that hook. -For example, you can implement the ``render_cell`` plugin hook like this even though the full documented hook signature is ``render_cell(row, value, column, table, database, datasette)``: +For example, you can implement the ``render_cell`` plugin hook like this even though the full documented hook signature is ``render_cell(row, value, column, table, pks, database, datasette, request)``: .. code-block:: python @@ -474,8 +474,8 @@ Examples: `datasette-publish-fly Date: Tue, 17 Feb 2026 20:09:04 +0000 Subject: [PATCH 38/53] Fix test assertions broken by new fixture rows in 170f9de The render_cell pks parameter commit added rows to compound_primary_key (2->3 rows) and no_primary_key (201->202 rows) tables but did not update existing tests that had hardcoded row count expectations. https://claude.ai/code/session_01XfPSZfK57bzRRiEa7Kz5n1 --- tests/test_api.py | 4 ++-- tests/test_table_api.py | 17 +++++++++-------- tests/test_table_html.py | 6 ++++++ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index e3951df9..95958a72 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -182,7 +182,7 @@ async def test_database_page(ds_client): # -- compound primary keys compound_pk = tables_by_name["compound_primary_key"] assert compound_pk["primary_keys"] == ["pk1", "pk2"] - assert compound_pk["count"] == 2 + assert compound_pk["count"] == 3 compound_three = tables_by_name["compound_three_primary_keys"] assert compound_three["primary_keys"] == ["pk1", "pk2", "pk3"] @@ -196,7 +196,7 @@ async def test_database_page(ds_client): # -- no_primary_key: hidden table with generated data no_pk = tables_by_name["no_primary_key"] assert no_pk["hidden"] is True - assert no_pk["count"] == 201 + assert no_pk["count"] == 202 assert no_pk["primary_keys"] == [] # -- roadside attractions relationship chain diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 49df3ad5..943a1549 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -136,6 +136,7 @@ async def test_table_shape_object_compound_primary_key(ds_client): assert response.json() == { "a,b": {"pk1": "a", "pk2": "b", "content": "c"}, "a~2Fb,~2Ec-d": {"pk1": "a/b", "pk2": ".c-d", "content": "c"}, + "d,e": {"pk1": "d", "pk2": "e", "content": "RENDER_CELL_DEMO"}, } @@ -169,11 +170,11 @@ async def test_table_with_reserved_word_name(ds_client): @pytest.mark.parametrize( "path,expected_rows,expected_pages", [ - ("/fixtures/no_primary_key.json", 201, 5), - ("/fixtures/paginated_view.json", 201, 9), - ("/fixtures/no_primary_key.json?_size=25", 201, 9), - ("/fixtures/paginated_view.json?_size=50", 201, 5), - ("/fixtures/paginated_view.json?_size=max", 201, 3), + ("/fixtures/no_primary_key.json", 202, 5), + ("/fixtures/paginated_view.json", 202, 9), + ("/fixtures/no_primary_key.json?_size=25", 202, 9), + ("/fixtures/paginated_view.json?_size=50", 202, 5), + ("/fixtures/paginated_view.json?_size=max", 202, 3), ("/fixtures/123_starts_with_digits.json", 0, 1), # Ensure faceting doesn't break pagination: ("/fixtures/compound_three_primary_keys.json?_facet=pk1", 1001, 21), @@ -232,7 +233,7 @@ async def test_page_size_zero(ds_client): ) assert response.status_code == 200 assert [] == response.json()["rows"] - assert 201 == response.json()["count"] + assert 202 == response.json()["count"] assert None is response.json()["next"] assert None is response.json()["next_url"] @@ -722,11 +723,11 @@ def test_page_size_matching_max_returned_rows( while path: response = app_client_returned_rows_matches_page_size.get(path) fetched.extend(response.json["rows"]) - assert len(response.json["rows"]) in (1, 50) + assert len(response.json["rows"]) in (2, 50) path = response.json["next_url"] if path: path = path.replace("http://localhost", "") - assert len(fetched) == 201 + assert len(fetched) == 202 @pytest.mark.asyncio diff --git a/tests/test_table_html.py b/tests/test_table_html.py index 90be591a..00cf9e19 100644 --- a/tests/test_table_html.py +++ b/tests/test_table_html.py @@ -597,6 +597,12 @@ async def test_table_html_compound_primary_key(ds_client): '.c-d', 'c', ], + [ + 'd,e', + 'd', + 'e', + '{"row": {"pk1": "d", "pk2": "e", "content": "RENDER_CELL_DEMO"}, "column": "content", "table": "compound_primary_key", "database": "fixtures", "pks": ["pk1", "pk2"], "config": {"depth": "database"}}', + ], ] assert [ [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") From 5c3137d14858c0750c93bb61ef593d807cadba43 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 17 Feb 2026 13:30:24 -0800 Subject: [PATCH 39/53] Black formatting --- datasette/app.py | 10 ++----- datasette/cli.py | 8 ++--- datasette/database.py | 36 +++++------------------ datasette/default_permissions/defaults.py | 1 - datasette/facets.py | 12 ++------ datasette/inspect.py | 17 +++-------- datasette/permissions.py | 1 - datasette/utils/__init__.py | 4 +-- datasette/utils/actions_sql.py | 18 ++++-------- datasette/utils/internal_db.py | 14 +++------ datasette/utils/permissions.py | 7 ++--- datasette/views/base.py | 8 ++--- datasette/views/database.py | 8 ++--- datasette/views/index.py | 1 - datasette/views/special.py | 1 - tests/conftest.py | 1 - tests/fixtures.py | 20 +++---------- tests/plugins/my_plugin.py | 8 ++--- tests/test_cli.py | 8 ++--- tests/test_cli_serve_get.py | 4 +-- tests/test_config_dir.py | 6 ++-- tests/test_csv.py | 22 ++++---------- tests/test_html.py | 7 ++--- tests/test_internals_database.py | 12 +++----- tests/test_plugins.py | 10 ++----- tests/test_publish_cloudrun.py | 14 +++------ tests/test_routes.py | 6 ++-- tests/test_table_api.py | 8 ++--- tests/test_utils.py | 22 ++++---------- tests/test_utils_permissions.py | 6 ++-- tests/test_write_wrapper.py | 4 ++- 31 files changed, 82 insertions(+), 222 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 75f6071e..6efaa430 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -633,9 +633,7 @@ class Datasette: """ INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version) VALUES {} - """.format( - placeholders - ), + """.format(placeholders), values, ) await populate_schema_tables(internal_db, db) @@ -813,14 +811,12 @@ class Datasette: return orig async def get_instance_metadata(self): - rows = await self.get_internal_database().execute( - """ + rows = await self.get_internal_database().execute(""" SELECT key, value FROM metadata_instance - """ - ) + """) return dict(rows) async def get_database_metadata(self, database_name: str): diff --git a/datasette/cli.py b/datasette/cli.py index 1d0cb022..121911ab 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -109,15 +109,11 @@ def sqlite_extensions(fn): return fn(*args, **kwargs) except AttributeError as e: if "enable_load_extension" in str(e): - raise click.ClickException( - textwrap.dedent( - """ + raise click.ClickException(textwrap.dedent(""" Your Python installation does not have the ability to load SQLite extensions. More information: https://datasette.io/help/extensions - """ - ).strip() - ) + """).strip()) raise return wrapped diff --git a/datasette/database.py b/datasette/database.py index 1e6f9032..fcf69c7f 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -532,10 +532,7 @@ class Database: ] if sqlite_version()[1] >= 37: - hidden_tables += [ - x[0] - for x in await self.execute( - """ + hidden_tables += [x[0] for x in await self.execute(""" with shadow_tables as ( select name from pragma_table_list @@ -554,14 +551,9 @@ class Database: select name from core_tables ) select name from combined order by 1 - """ - ) - ] + """)] else: - hidden_tables += [ - x[0] - for x in await self.execute( - """ + hidden_tables += [x[0] for x in await self.execute(""" WITH base AS ( SELECT name FROM sqlite_master @@ -607,22 +599,15 @@ class Database: SELECT name FROM fts3_shadow_tables ) SELECT name FROM final ORDER BY 1 - """ - ) - ] + """)] # Also hide any FTS tables that have a content= argument - hidden_tables += [ - x[0] - for x in await self.execute( - """ + hidden_tables += [x[0] for x in await self.execute(""" SELECT name FROM sqlite_master WHERE sql LIKE '%VIRTUAL TABLE%' AND sql LIKE '%USING FTS%' AND sql LIKE '%content=%' - """ - ) - ] + """)] has_spatialite = await self.execute_fn(detect_spatialite) if has_spatialite: @@ -641,16 +626,11 @@ class Database: "KNN", "KNN2", ] + [ - r[0] - for r in ( - await self.execute( - """ + r[0] for r in (await self.execute(""" select name from sqlite_master where name like "idx_%" and type = "table" - """ - ) - ).rows + """)).rows ] return hidden_tables diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index f5a6a270..4c74219d 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -14,7 +14,6 @@ if TYPE_CHECKING: from datasette import hookimpl from datasette.permissions import PermissionSQL - # Actions that are allowed by default (unless --default-deny is used) DEFAULT_ALLOW_ACTIONS = frozenset( { diff --git a/datasette/facets.py b/datasette/facets.py index dd149424..bc4b6904 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -233,9 +233,7 @@ class ColumnFacet(Facet): ) where {col} is not null group by {col} order by count desc, value limit {limit} - """.format( - col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 - ) + """.format(col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1) try: facet_rows_results = await self.ds.execute( self.database, @@ -482,9 +480,7 @@ class DateFacet(Facet): select date({column}) from ( select * from ({sql}) limit 100 ) where {column} glob "????-??-*" - """.format( - column=escape_sqlite(column), sql=self.sql - ) + """.format(column=escape_sqlite(column), sql=self.sql) try: results = await self.ds.execute( self.database, @@ -530,9 +526,7 @@ class DateFacet(Facet): ) where date({col}) is not null group by date({col}) order by count desc, value limit {limit} - """.format( - col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 - ) + """.format(col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1) try: facet_rows_results = await self.ds.execute( self.database, diff --git a/datasette/inspect.py b/datasette/inspect.py index ede142d0..5e681e03 100644 --- a/datasette/inspect.py +++ b/datasette/inspect.py @@ -10,7 +10,6 @@ from .utils import ( sqlite3, ) - HASH_BLOCK_SIZE = 1024 * 1024 @@ -70,16 +69,11 @@ def inspect_tables(conn, database_metadata): tables[table]["foreign_keys"] = info # Mark tables 'hidden' if they relate to FTS virtual tables - hidden_tables = [ - r["name"] - for r in conn.execute( - """ + hidden_tables = [r["name"] for r in conn.execute(""" select name from sqlite_master where rootpage = 0 and sql like '%VIRTUAL TABLE%USING FTS%' - """ - ) - ] + """)] if detect_spatialite(conn): # Also hide Spatialite internal tables @@ -94,14 +88,11 @@ def inspect_tables(conn, database_metadata): "views_geometry_columns", "virts_geometry_columns", ] + [ - r["name"] - for r in conn.execute( - """ + r["name"] for r in conn.execute(""" select name from sqlite_master where name like "idx_%" and type = "table" - """ - ) + """) ] for t in tables.keys(): diff --git a/datasette/permissions.py b/datasette/permissions.py index c48293ac..b5e72b8e 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -3,7 +3,6 @@ from dataclasses import dataclass from typing import Any, NamedTuple import contextvars - # Context variable to track when permission checks should be skipped _skip_permission_checks = contextvars.ContextVar( "skip_permission_checks", default=False diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index d0d216eb..c6973d06 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -677,9 +677,7 @@ def detect_fts_sql(table): and sql like '%VIRTUAL TABLE%USING FTS%' ) ) - """.format( - table=table.replace("'", "''") - ) + """.format(table=table.replace("'", "''")) def detect_json1(conn=None): diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index 9c2add0e..14383253 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -180,13 +180,11 @@ async def _build_single_action_sql( # Skip plugins that only provide restriction_sql (no permission rules) if permission_sql.sql is None: continue - rule_sqls.append( - f""" + rule_sqls.append(f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( {permission_sql.sql} ) - """.strip() - ) + """.strip()) # If no rules, return empty result (deny all) if not rule_sqls: @@ -405,14 +403,12 @@ async def _build_single_action_sql( # Add restriction filter if there are restrictions if restriction_sqls: - query_parts.append( - """ + query_parts.append(""" AND EXISTS ( SELECT 1 FROM restriction_list r WHERE (r.parent = decisions.parent OR r.parent IS NULL) AND (r.child = decisions.child OR r.child IS NULL) - )""" - ) + )""") # Add parent filter if specified if parent is not None: @@ -479,13 +475,11 @@ async def build_permission_rules_sql( if permission_sql.sql is None: continue - union_parts.append( - f""" + union_parts.append(f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( {permission_sql.sql} ) - """.strip() - ) + """.strip()) rules_union = " UNION ALL ".join(union_parts) return rules_union, all_params, restriction_sqls diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index a3afbab2..e4ebddde 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -3,8 +3,7 @@ from datasette.utils import table_column_details async def init_internal_db(db): - create_tables_sql = textwrap.dedent( - """ + create_tables_sql = textwrap.dedent(""" CREATE TABLE IF NOT EXISTS catalog_databases ( database_name TEXT PRIMARY KEY, path TEXT, @@ -68,16 +67,13 @@ async def init_internal_db(db): FOREIGN KEY (database_name) REFERENCES catalog_databases(database_name), FOREIGN KEY (database_name, table_name) REFERENCES catalog_tables(database_name, table_name) ); - """ - ).strip() + """).strip() await db.execute_write_script(create_tables_sql) await initialize_metadata_tables(db) async def initialize_metadata_tables(db): - await db.execute_write_script( - textwrap.dedent( - """ + await db.execute_write_script(textwrap.dedent(""" CREATE TABLE IF NOT EXISTS metadata_instance ( key text, value text, @@ -107,9 +103,7 @@ async def initialize_metadata_tables(db): value text, unique(database_name, resource_name, column_name, key) ); - """ - ) - ) + """)) async def populate_schema_tables(internal_db, db): diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py index 6c30a12a..fd1e41a1 100644 --- a/datasette/utils/permissions.py +++ b/datasette/utils/permissions.py @@ -9,7 +9,6 @@ from datasette.permissions import PermissionSQL from datasette.plugins import pm from datasette.utils import await_me_maybe - # Sentinel object to indicate permission checks should be skipped SKIP_PERMISSION_CHECKS = object() @@ -116,13 +115,11 @@ def build_rules_union( if p.sql is None: continue - parts.append( - f""" + parts.append(f""" SELECT parent, child, allow, reason, '{p.source}' AS source_plugin FROM ( {p.sql} ) - """.strip() - ) + """.strip()) if not parts: # Empty UNION that returns no rows diff --git a/datasette/views/base.py b/datasette/views/base.py index bdc9f742..e4c1c738 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -241,8 +241,7 @@ class DataView(BaseView): data, extra_template_data, templates = response_or_template_contexts except QueryInterrupted as ex: raise DatasetteError( - textwrap.dedent( - """ + textwrap.dedent("""

    SQL query took too long. The time limit is controlled by the sql_time_limit_ms configuration option.

    @@ -251,10 +250,7 @@ class DataView(BaseView): let ta = document.querySelector("textarea"); ta.style.height = ta.scrollHeight + "px"; - """.format( - escape(ex.sql) - ) - ).strip(), + """.format(escape(ex.sql))).strip(), title="SQL Interrupted", status=400, message_is_html=True, diff --git a/datasette/views/database.py b/datasette/views/database.py index a42ac758..93ad8eda 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -615,8 +615,7 @@ class QueryView(View): rows = results.rows except QueryInterrupted as ex: raise DatasetteError( - textwrap.dedent( - """ + textwrap.dedent("""

    SQL query took too long. The time limit is controlled by the sql_time_limit_ms configuration option.

    @@ -625,10 +624,7 @@ class QueryView(View): let ta = document.querySelector("textarea"); ta.style.height = ta.scrollHeight + "px"; - """.format( - markupsafe.escape(ex.sql) - ) - ).strip(), + """.format(markupsafe.escape(ex.sql))).strip(), title="SQL Interrupted", status=400, message_is_html=True, diff --git a/datasette/views/index.py b/datasette/views/index.py index a59c687c..6a9462ac 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -12,7 +12,6 @@ from datasette.version import __version__ from .base import BaseView - # Truncate table list on homepage at: TRUNCATE_AT = 5 diff --git a/datasette/views/special.py b/datasette/views/special.py index 57a3024d..640c82eb 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -13,7 +13,6 @@ from .base import BaseView, View import secrets import urllib - logger = logging.getLogger(__name__) diff --git a/tests/conftest.py b/tests/conftest.py index ad7243c1..efa02c0a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,6 @@ import time from dataclasses import dataclass from datasette import Event, hookimpl - try: import pysqlite3 as sqlite3 except ImportError: diff --git a/tests/fixtures.py b/tests/fixtures.py index 0c110a94..9f99519a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -13,7 +13,6 @@ import string import tempfile import textwrap - # This temp file is used by one of the plugin config tests TEMP_PLUGIN_SECRET_FILE = os.path.join(tempfile.gettempdir(), "plugin-secret") @@ -331,16 +330,14 @@ CONFIG = { "sql": "select :_header_user_agent as user_agent, :_now_datetime_utc as datetime", }, "neighborhood_search": { - "sql": textwrap.dedent( - """ + "sql": textwrap.dedent(""" select _neighborhood, facet_cities.name, state from facetable join facet_cities on facetable._city_id = facet_cities.id where _neighborhood like '%' || :text || '%' order by _neighborhood; - """ - ), + """), "title": "Search neighborhoods", "description_html": "Demonstrating simple like search", "fragment": "fragment-goes-here", @@ -710,19 +707,10 @@ CREATE VIEW searchable_view_configured_by_metadata AS for a, b, c, content in generate_compound_rows(1001) ] ) - + "\n".join( - [ - """INSERT INTO sortable VALUES ( + + "\n".join(["""INSERT INTO sortable VALUES ( "{pk1}", "{pk2}", "{content}", {sortable}, {sortable_with_nulls}, {sortable_with_nulls_2}, "{text}"); - """.format( - **row - ).replace( - "None", "null" - ) - for row in generate_sortable_rows(201) - ] - ) + """.format(**row).replace("None", "null") for row in generate_sortable_rows(201)]) ) TABLE_PARAMETERIZED_SQL = [ ("insert into binary_data (data) values (?);", [b"\x15\x1c\x02\xc7\xad\x05\xfe"]), diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index c8794fad..20e7d111 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -261,8 +261,7 @@ def register_routes(): response = Response.redirect("/") datasette.set_actor_cookie(response, {"id": "root"}) return response - return Response.html( - """ + return Response.html("""

    @@ -271,10 +270,7 @@ def register_routes(): style="font-size: 2em; padding: 0.1em 0.5em;">

    - """.format( - request.path, request.scope["csrftoken"]() - ) - ) + """.format(request.path, request.scope["csrftoken"]())) def asgi_scope(scope): return Response.json(scope, default=repr) diff --git a/tests/test_cli.py b/tests/test_cli.py index 6cdfd924..7673c3f3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -115,13 +115,9 @@ def test_plugins_cli(app_client): def test_metadata_yaml(): - yaml_file = io.StringIO( - textwrap.dedent( - """ + yaml_file = io.StringIO(textwrap.dedent(""" title: Hello from YAML - """ - ) - ) + """)) # Annoyingly we have to provide all default arguments here: ds = serve.callback( [], diff --git a/tests/test_cli_serve_get.py b/tests/test_cli_serve_get.py index 5ad01bfa..dc852201 100644 --- a/tests/test_cli_serve_get.py +++ b/tests/test_cli_serve_get.py @@ -16,9 +16,7 @@ def test_serve_with_get(tmp_path_factory): def startup(datasette): with open("{}", "w") as fp: fp.write("hello") - """.format( - str(plugins_dir / "hello.txt") - ), + """.format(str(plugins_dir / "hello.txt")), ), "utf-8", ) diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py index f9a90fbe..ae7fe500 100644 --- a/tests/test_config_dir.py +++ b/tests/test_config_dir.py @@ -51,8 +51,7 @@ def config_dir(tmp_path_factory): for dbname in ("demo.db", "immutable.db", "j.sqlite3", "k.sqlite"): db = sqlite3.connect(str(config_dir / dbname)) - db.executescript( - """ + db.executescript(""" CREATE TABLE cities ( id integer primary key, name text @@ -60,8 +59,7 @@ def config_dir(tmp_path_factory): INSERT INTO cities (id, name) VALUES (1, 'San Francisco') ; - """ - ) + """) # Mark "immutable.db" as immutable (config_dir / "inspect-data.json").write_text( diff --git a/tests/test_csv.py b/tests/test_csv.py index 5589bd97..a2f03776 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -9,16 +9,12 @@ EXPECTED_TABLE_CSV = """id,content 3, 4,RENDER_CELL_DEMO 5,RENDER_CELL_ASYNC -""".replace( - "\n", "\r\n" -) +""".replace("\n", "\r\n") EXPECTED_CUSTOM_CSV = """content hello world -""".replace( - "\n", "\r\n" -) +""".replace("\n", "\r\n") EXPECTED_TABLE_WITH_LABELS_CSV = """ pk,created,planet_int,on_earth,state,_city_id,_city_id_label,_neighborhood,tags,complex_array,distinct_some_null,n @@ -37,17 +33,13 @@ pk,created,planet_int,on_earth,state,_city_id,_city_id_label,_neighborhood,tags, 13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[],[],, 14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[],[],, 15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[],[],, -""".lstrip().replace( - "\n", "\r\n" -) +""".lstrip().replace("\n", "\r\n") EXPECTED_TABLE_WITH_NULLABLE_LABELS_CSV = """ pk,foreign_key_with_label,foreign_key_with_label_label,foreign_key_with_blank_label,foreign_key_with_blank_label_label,foreign_key_with_no_label,foreign_key_with_no_label_label,foreign_key_compound_pk1,foreign_key_compound_pk2 1,1,hello,3,,1,1,a,b 2,,,,,,,, -""".lstrip().replace( - "\n", "\r\n" -) +""".lstrip().replace("\n", "\r\n") @pytest.mark.asyncio @@ -108,8 +100,7 @@ async def test_table_csv_with_invalid_labels(): ) await ds.invoke_startup() db = ds.add_memory_database("db_2214") - await db.execute_write_script( - """ + await db.execute_write_script(""" create table t1 (id integer primary key, name text); insert into t1 (id, name) values (1, 'one'); insert into t1 (id, name) values (2, 'two'); @@ -124,8 +115,7 @@ async def test_table_csv_with_invalid_labels(): insert into maintable (id, fk_integer, fk_text) values (1, 1, 'a'); insert into maintable (id, fk_integer, fk_text) values (2, 3, 'b'); -- invalid fk_integer insert into maintable (id, fk_integer, fk_text) values (3, 2, 'c'); -- invalid fk_text - """ - ) + """) response = await ds.client.get("/db_2214/maintable.csv?_labels=1") assert response.status_code == 200 assert response.text == ( diff --git a/tests/test_html.py b/tests/test_html.py index 8fad5764..757f3e6e 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -620,14 +620,11 @@ async def test_urlify_custom_queries(ds_client): response = await ds_client.get(path) assert response.status_code == 200 soup = Soup(response.content, "html.parser") - assert ( - """ + assert """ https://twitter.com/simonw -""" - == soup.find("td", {"class": "col-user_url"}).prettify().strip() - ) +""" == soup.find("td", {"class": "col-user_url"}).prettify().strip() @pytest.mark.asyncio diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 02c67bfc..5e3459cd 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -747,19 +747,15 @@ async def test_replace_database(tmpdir): path1 = str(tmpdir / "data1.db") (tmpdir / "two").mkdir() path2 = str(tmpdir / "two" / "data1.db") - sqlite3.connect(path1).executescript( - """ + sqlite3.connect(path1).executescript(""" create table t (id integer primary key); insert into t (id) values (1); insert into t (id) values (2); - """ - ) - sqlite3.connect(path2).executescript( - """ + """) + sqlite3.connect(path2).executescript(""" create table t (id integer primary key); insert into t (id) values (1); - """ - ) + """) datasette = Datasette([path1]) db = datasette.get_database("data1") count = (await db.execute("select count(*) from t")).first()[0] diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 190ef659..754b199c 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -233,9 +233,7 @@ async def test_hook_render_cell_pks_compound_pk(ds_client): @pytest.mark.asyncio async def test_hook_render_cell_pks_rowid_table(ds_client): """pks should be ["rowid"] for a table with no explicit primary key""" - response = await ds_client.get( - "/fixtures/no_primary_key?content=RENDER_CELL_DEMO" - ) + response = await ds_client.get("/fixtures/no_primary_key?content=RENDER_CELL_DEMO") soup = Soup(response.text, "html.parser") td = soup.find("td", {"class": "col-content"}) data = json.loads(td.string) @@ -457,14 +455,12 @@ def view_names_client(tmp_path_factory): ): (templates / template).write_text("view_name:{{ view_name }}", "utf-8") (plugins / "extra_vars.py").write_text( - textwrap.dedent( - """ + textwrap.dedent(""" from datasette import hookimpl @hookimpl def extra_template_vars(view_name): return {"view_name": view_name} - """ - ), + """), "utf-8", ) db_path = str(tmpdir / "fixtures.db") diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index f53e5059..6617bc77 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -231,16 +231,12 @@ def test_publish_cloudrun_plugin_secrets( with open("test.db", "w") as fp: fp.write("data") with open("metadata.yml", "w") as fp: - fp.write( - textwrap.dedent( - """ + fp.write(textwrap.dedent(""" title: Hello from metadata YAML plugins: datasette-auth-github: foo: bar - """ - ).strip() - ) + """).strip()) result = runner.invoke( cli.cli, [ @@ -333,8 +329,7 @@ def test_publish_cloudrun_apt_get_install( .split("\n====================\n")[0] .strip() ) - expected = textwrap.dedent( - r""" + expected = textwrap.dedent(r""" FROM python:3.11.0-slim-bullseye COPY . /app WORKDIR /app @@ -350,8 +345,7 @@ def test_publish_cloudrun_apt_get_install( ENV PORT 8001 EXPOSE 8001 CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --setting force_https_urls on --port $PORT - """ - ).strip() + """).strip() assert expected == dockerfile diff --git a/tests/test_routes.py b/tests/test_routes.py index 9866cc76..24c702fc 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -63,12 +63,10 @@ async def ds_with_route(): ds.remove_database("_memory") db = Database(ds, is_memory=True, memory_name="route-name-db") ds.add_database(db, name="original-name", route="custom-route-name") - await db.execute_write_script( - """ + await db.execute_write_script(""" create table if not exists t (id integer primary key); insert or replace into t (id) values (1); - """ - ) + """) return ds diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 943a1549..51e40ad1 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1243,9 +1243,7 @@ async def test_paginate_using_link_header(ds_client, qs): reason="generated columns were added in SQLite 3.31.0", ) def test_generated_columns_are_visible_in_datasette(): - with make_app_client( - extra_databases={ - "generated.db": """ + with make_app_client(extra_databases={"generated.db": """ CREATE TABLE generated_columns ( body TEXT, id INT GENERATED ALWAYS AS (json_extract(body, '$.number')) STORED, @@ -1253,9 +1251,7 @@ def test_generated_columns_are_visible_in_datasette(): ); INSERT INTO generated_columns (body) VALUES ( '{"number": 1, "string": "This is a string"}' - );""" - } - ) as client: + );"""}) as client: response = client.get("/generated/generated_columns.json?_shape=array") assert response.json == [ { diff --git a/tests/test_utils.py b/tests/test_utils.py index b8d047e9..85ab9e6b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -201,9 +201,7 @@ def test_detect_fts(open_quote, close_quote): CREATE VIEW Test_View AS SELECT * FROM Dumb_Table; CREATE VIRTUAL TABLE {open}Street_Tree_List_fts{close} USING FTS4 ("qAddress", "qCaretaker", "qSpecies", content={open}Street_Tree_List{close}); CREATE VIRTUAL TABLE r USING rtree(a, b, c); - """.format( - open=open_quote, close=close_quote - ) + """.format(open=open_quote, close=close_quote) conn = utils.sqlite3.connect(":memory:") conn.executescript(sql) assert None is utils.detect_fts(conn, "Dumb_Table") @@ -220,9 +218,7 @@ def test_detect_fts_different_table_names(table): "qSpecies" TEXT ); CREATE VIRTUAL TABLE [{table}_fts] USING FTS4 ("qSpecies", content="{table}"); - """.format( - table=table - ) + """.format(table=table) conn = utils.sqlite3.connect(":memory:") conn.executescript(sql) assert "{table}_fts".format(table=table) == utils.detect_fts(conn, table) @@ -347,27 +343,21 @@ def test_compound_keys_after_sql(): ((a > :p0) or (a = :p0 and b > :p1)) - """.strip() == utils.compound_keys_after_sql( - ["a", "b"] - ) + """.strip() == utils.compound_keys_after_sql(["a", "b"]) assert """ ((a > :p0) or (a = :p0 and b > :p1) or (a = :p0 and b = :p1 and c > :p2)) - """.strip() == utils.compound_keys_after_sql( - ["a", "b", "c"] - ) + """.strip() == utils.compound_keys_after_sql(["a", "b", "c"]) def test_table_columns(): conn = sqlite3.connect(":memory:") - conn.executescript( - """ + conn.executescript(""" create table places (id integer primary key, name text, bob integer) - """ - ) + """) assert ["id", "name", "bob"] == utils.table_columns(conn, "places") diff --git a/tests/test_utils_permissions.py b/tests/test_utils_permissions.py index b412de0f..bc3599c2 100644 --- a/tests/test_utils_permissions.py +++ b/tests/test_utils_permissions.py @@ -497,16 +497,14 @@ async def test_actor_actor_id_action_parameters_available(db): def plugin_using_all_parameters() -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ + return PermissionSQL(""" SELECT NULL AS parent, NULL AS child, 1 AS allow, 'Actor ID: ' || COALESCE(:actor_id, 'null') || ', Actor JSON: ' || COALESCE(:actor, 'null') || ', Action: ' || :action AS reason WHERE :actor_id = 'test_user' AND :action = 'view-table' AND json_extract(:actor, '$.role') = 'admin' - """ - ) + """) return provider diff --git a/tests/test_write_wrapper.py b/tests/test_write_wrapper.py index 38e5c94e..cb320c06 100644 --- a/tests/test_write_wrapper.py +++ b/tests/test_write_wrapper.py @@ -471,7 +471,9 @@ async def test_write_wrapper_set_authorizer(datasette, actor, table, should_deny ), request=request, ) - result = await db.execute(f"select value from {table} order by rowid desc limit 1") + result = await db.execute( + f"select value from {table} order by rowid desc limit 1" + ) assert result.rows[0][0] == "test" finally: pm.unregister(name="test_set_authorizer") From 1c6c6d2e6897c1173ed6e209c8b7133688e75c58 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 17 Feb 2026 13:30:46 -0800 Subject: [PATCH 40/53] Fix test_write_wrapper_set_authorizer: use permissive callback instead of None conn.set_authorizer(None) does not clear the authorizer - SQLite treats None as an invalid callback. The denied state persists on the shared write connection, causing subsequent non-deny test cases to fail. Fixes test added in 8a315f3d. --- tests/test_write_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_write_wrapper.py b/tests/test_write_wrapper.py index cb320c06..55e0461e 100644 --- a/tests/test_write_wrapper.py +++ b/tests/test_write_wrapper.py @@ -445,7 +445,7 @@ async def test_write_wrapper_set_authorizer(datasette, actor, table, should_deny try: yield finally: - conn.set_authorizer(None) + conn.set_authorizer(lambda *args: sqlite3.SQLITE_OK) return wrapper From 7a66456615cad38d9e70267a14ca30dcc4bca701 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 Feb 2026 11:19:19 -0800 Subject: [PATCH 41/53] black --version --- .github/workflows/test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b1ba3232..a0f5477b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,7 +34,9 @@ jobs: # And the test that exceeds a localhost HTTPS server tests/test_datasette_https_server.sh - name: Black - run: black --check . + run: | + black --version + black --check . - name: Ruff run: ruff check datasette tests - name: Check if cog needs to be run From 2f0e64df681c7bf65e8ce3065380be36a4ccd266 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 Feb 2026 11:24:52 -0800 Subject: [PATCH 42/53] black==26.1.0 I'm getting CI failures for Black, maybe this will help --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d9ef2a73..2ab2ce10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ dev = [ "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", "beautifulsoup4>=4.8.1", - "black==25.11.0", + "black==26.1.0", "blacken-docs==1.20.0", "pytest-timeout>=1.4.2", "trustme>=0.7", From 6a2c27b15b300ba1b924ce00a61532943482392e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 Feb 2026 11:28:39 -0800 Subject: [PATCH 43/53] blacken-docs --- docs/plugin_hooks.rst | 13 ++++--------- docs/spatialite.rst | 6 ++---- docs/testing_plugins.rst | 8 ++------ 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 068469a8..fa335368 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1074,11 +1074,9 @@ You can also return an async function, which will be awaited on startup. Use thi async def inner(): db = datasette.get_database() if "my_table" not in await db.table_names(): - await db.execute_write( - """ + await db.execute_write(""" create table my_table (mycol text) - """ - ) + """) return inner @@ -1561,7 +1559,6 @@ The resolver will automatically apply the most specific rule. from datasette import hookimpl from datasette.permissions import PermissionSQL - TRUSTED = {"alice", "bob"} @@ -2261,8 +2258,7 @@ This example logs events to a ``datasette_events`` table in a database called `` def startup(datasette): async def inner(): db = datasette.get_database("events") - await db.execute_write( - """ + await db.execute_write(""" create table if not exists datasette_events ( id integer primary key, event_type text, @@ -2270,8 +2266,7 @@ This example logs events to a ``datasette_events`` table in a database called `` actor text, properties text ) - """ - ) + """) return inner diff --git a/docs/spatialite.rst b/docs/spatialite.rst index fbe0d75f..c93c1e00 100644 --- a/docs/spatialite.rst +++ b/docs/spatialite.rst @@ -90,12 +90,10 @@ Here's a recipe for taking a table with existing latitude and longitude columns, "SELECT AddGeometryColumn('museums', 'point_geom', 4326, 'POINT', 2);" ) # Now update that geometry column with the lat/lon points - conn.execute( - """ + conn.execute(""" UPDATE museums SET point_geom = GeomFromText('POINT('||"longitude"||' '||"latitude"||')',4326); - """ - ) + """) # Now add a spatial index to that column conn.execute( 'select CreateSpatialIndex("museums", "point_geom");' diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index fc1aa6f6..b0713e7c 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -233,15 +233,11 @@ As an example, here's a very simple plugin which executes an HTTP response and r async def fetch_url(datasette, request): if request.method == "GET": - return Response.html( - """ + return Response.html("""
    -
    """.format( - request.scope["csrftoken"]() - ) - ) + """.format(request.scope["csrftoken"]())) vars = await request.post_vars() url = vars["url"] return Response.text(httpx.get(url).text) From c96dc5ce2656607b9e81743acf600f8fd5f6a795 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 16:32:45 -0800 Subject: [PATCH 44/53] register_token_handler() plugin hook for custom API token backends (#2650) Closes #2649 * Add register_token_handler plugin hook for pluggable token backends Adds a new register_token_handler hook that allows plugins to provide custom token creation and verification backends. This enables plugins like datasette-oauth to issue tokens without depending on specific backend plugins like datasette-auth-tokens. Key changes: - New datasette/tokens.py with TokenHandler base class and SignedTokenHandler (the default signed-token implementation moved here) - New register_token_handler hookspec in hookspecs.py - Datasette.create_token() is now async and delegates to token handlers - New Datasette.verify_token() method tries all handlers in sequence - handler= parameter on create_token() to select a specific backend - TokenHandler exported from datasette package for plugin use - Fixed actor_from_request loop to await all coroutines (avoids warnings) * Add documentation and hook test for register_token_handler Fixes CI failures: the new hook needs a section in docs/plugin_hooks.rst (checked by test_plugin_hooks_are_documented) and a test_hook_* function in test_plugins.py (checked by test_plugin_hooks_have_tests). * Register tokens module as separate default plugin Instead of re-exporting hookimpls from default_permissions/__init__.py, register datasette.default_permissions.tokens as its own DEFAULT_PLUGINS entry. Cleaner and avoids confusing import-for-side-effect patterns. * Replace restrict_x params with TokenRestrictions dataclass Consolidates the three separate restrict_all, restrict_database, and restrict_resource parameters into a single TokenRestrictions dataclass. Cleaner API surface for both Datasette.create_token() and TokenHandler.create_token(). Also clarifies docs re: default handler selection via pluggy ordering. * Add builder methods to TokenRestrictions Adds allow_all(), allow_database(), and allow_resource() methods that return self for chaining. Callers no longer need to manipulate nested dicts directly: restrictions = (TokenRestrictions() .allow_all("view-instance") .allow_database("mydb", "create-table") .allow_resource("mydb", "mytable", "insert-row")) * docs: add 1.0a25 upgrade guide section for create_token() signature change Ref: https://github.com/simonw/datasette/issues/2649#issuecomment-3962639393 * docs: note that create_token() is now async in upgrade guide * docs: update internals, plugin_hooks, authentication for new token API - internals.rst: new async create_token() signature with restrictions and handler params, add TokenRestrictions reference docs - plugin_hooks.rst: show full create_token signature in TokenHandler example, note list returns and error cases - authentication.rst: cross-reference TokenRestrictions from the restrictions section * style: apply black formatting to token handler files * docs: fix RST heading underline length in internals.rst * tests: add restrictions round-trip and expiration tests for token handler Covers allow_database/allow_resource builders, _r payload encoding, and token_expires in verified actors. Coverage 76% -> 90%. * tests: add test for signed tokens disabled * fix: add TokenRestrictions TYPE_CHECKING import to fix ruff F821 * docs: regenerate plugins.rst with cog * docs: reformat code blocks in plugin_hooks.rst with blacken-docs * docs: add await .verify_token() to internals.rst * tests: rewrite register_token_handler test to use real plugin handler Adds a HardcodedTokenHandler to the test plugins dir that creates tokens like dstok_hardcoded_token_1. The test now exercises creating tokens via the default handler (which is the plugin's hardcoded one), by explicitly naming the hardcoded handler, and by explicitly naming the signed handler -- then verifies each token round-trips correctly. * tests: clarify test_token_handler_via_http tests the default signed handler * fix: use handler="signed" explicitly where signed tokens are expected The HardcodedTokenHandler in my_plugin.py gets globally registered, so create_token() without a handler name picks it up as the default. Fix the create-token view, CLI, and tests to explicitly request the signed handler where they depend on signed token behavior. * fix: use handler="signed" in test_create_table_permissions https://claude.ai/code/session_013cQFiDQjYRrRBH2biFfKuS --- datasette/__init__.py | 1 + datasette/app.py | 102 +++++--- datasette/cli.py | 30 ++- datasette/default_permissions/__init__.py | 1 - datasette/default_permissions/tokens.py | 85 ++---- datasette/hookspecs.py | 5 + datasette/plugins.py | 1 + datasette/tokens.py | 180 +++++++++++++ datasette/views/special.py | 34 +-- docs/authentication.rst | 1 + docs/internals.rst | 81 ++++-- docs/plugin_hooks.rst | 59 +++++ docs/plugins.rst | 11 +- docs/upgrade_guide.md | 40 +++ tests/fixtures.py | 1 + tests/plugins/my_plugin.py | 27 ++ tests/test_api_write.py | 9 +- tests/test_permissions.py | 2 +- tests/test_plugins.py | 32 +++ tests/test_token_handler.py | 301 ++++++++++++++++++++++ 20 files changed, 839 insertions(+), 164 deletions(-) create mode 100644 datasette/tokens.py create mode 100644 tests/test_token_handler.py diff --git a/datasette/__init__.py b/datasette/__init__.py index 47d2b4f6..eb18e59e 100644 --- a/datasette/__init__.py +++ b/datasette/__init__.py @@ -1,6 +1,7 @@ from datasette.permissions import Permission # noqa from datasette.version import __version_info__, __version__ # noqa from datasette.events import Event # noqa +from datasette.tokens import TokenHandler, TokenRestrictions # noqa from datasette.utils.asgi import Forbidden, NotFound, Request, Response # noqa from datasette.utils import actor_matches_allow # noqa from datasette.views import Context # noqa diff --git a/datasette/app.py b/datasette/app.py index 6efaa430..2df6e4e8 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List if TYPE_CHECKING: from datasette.permissions import Resource + from datasette.tokens import TokenRestrictions import asgi_csrf import collections import dataclasses @@ -713,44 +714,70 @@ class Datasette: """ return _in_datasette_client.get() - def create_token( + def _token_handlers(self): + """Collect all registered token handlers from plugins.""" + from datasette.tokens import TokenHandler + + handlers = [] + for result in pm.hook.register_token_handler(datasette=self): + if isinstance(result, TokenHandler): + handlers.append(result) + elif isinstance(result, list): + handlers.extend(h for h in result if isinstance(h, TokenHandler)) + return handlers + + async def create_token( self, actor_id: str, *, expires_after: int | None = None, - restrict_all: Iterable[str] | None = None, - restrict_database: Dict[str, Iterable[str]] | None = None, - restrict_resource: Dict[str, Dict[str, Iterable[str]]] | None = None, - ): - token = {"a": actor_id, "t": int(time.time())} - if expires_after: - token["d"] = expires_after + restrictions: "TokenRestrictions | None" = None, + handler: str | None = None, + ) -> str: + """ + Create an API token for the given actor. - def abbreviate_action(action): - # rename to abbr if possible - action_obj = self.actions.get(action) - if not action_obj: - return action - return action_obj.abbr or action + Uses the first registered token handler by default, or a specific + handler if ``handler`` is provided (matched by handler name). - if expires_after: - token["d"] = expires_after - if restrict_all or restrict_database or restrict_resource: - token["_r"] = {} - if restrict_all: - token["_r"]["a"] = [abbreviate_action(a) for a in restrict_all] - if restrict_database: - token["_r"]["d"] = {} - for database, actions in restrict_database.items(): - token["_r"]["d"][database] = [abbreviate_action(a) for a in actions] - if restrict_resource: - token["_r"]["r"] = {} - for database, resources in restrict_resource.items(): - for resource, actions in resources.items(): - token["_r"]["r"].setdefault(database, {})[resource] = [ - abbreviate_action(a) for a in actions - ] - return "dstok_{}".format(self.sign(token, namespace="token")) + Pass a :class:`TokenRestrictions` to limit which actions the token + can perform. + """ + handlers = self._token_handlers() + if not handlers: + raise RuntimeError("No token handlers are registered") + + if handler is not None: + matched = [h for h in handlers if h.name == handler] + if not matched: + available = [h.name for h in handlers] + raise ValueError( + f"Token handler {handler!r} not found. " + f"Available handlers: {available}" + ) + chosen = matched[0] + else: + chosen = handlers[0] + + return await chosen.create_token( + self, + actor_id, + expires_after=expires_after, + restrictions=restrictions, + ) + + async def verify_token(self, token: str) -> dict | None: + """ + Verify an API token by trying all registered token handlers. + + Returns an actor dict from the first handler that recognizes the + token, or None if no handler accepts it. + """ + for token_handler in self._token_handlers(): + result = await token_handler.verify_token(self, token) + if result is not None: + return result + return None def get_database(self, name=None, route=None): if route is not None: @@ -2159,10 +2186,13 @@ class DatasetteRouter: # Handle authentication default_actor = scope.get("actor") or None actor = None - for actor in pm.hook.actor_from_request(datasette=self.ds, request=request): - actor = await await_me_maybe(actor) - if actor: - break + results = pm.hook.actor_from_request(datasette=self.ds, request=request) + for result in results: + result = await await_me_maybe(result) + if result and actor is None: + actor = result + # Don't break — we must await all coroutines to avoid + # "coroutine was never awaited" warnings scope_modifications["actor"] = actor or default_actor scope = dict(scope, **scope_modifications) diff --git a/datasette/cli.py b/datasette/cli.py index 121911ab..b473fbb7 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -832,21 +832,23 @@ def create_token( err=True, ) - restrict_database = {} - for database, action in databases: - restrict_database.setdefault(database, []).append(action) - restrict_resource = {} - for database, resource, action in resources: - restrict_resource.setdefault(database, {}).setdefault(resource, []).append( - action - ) + from datasette.tokens import TokenRestrictions - token = ds.create_token( - id, - expires_after=expires_after, - restrict_all=alls, - restrict_database=restrict_database, - restrict_resource=restrict_resource, + restrictions = TokenRestrictions() + for action in alls: + restrictions.allow_all(action) + for database, action in databases: + restrictions.allow_database(database, action) + for database, resource, action in resources: + restrictions.allow_resource(database, resource, action) + + token = run_sync( + lambda: ds.create_token( + id, + expires_after=expires_after, + restrictions=restrictions, + handler="signed", + ) ) click.echo(token) if debug: diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py index 40373fa7..4ebe6147 100644 --- a/datasette/default_permissions/__init__.py +++ b/datasette/default_permissions/__init__.py @@ -37,7 +37,6 @@ from .defaults import ( default_action_permissions_sql as default_action_permissions_sql, DEFAULT_ALLOW_ACTIONS as DEFAULT_ALLOW_ACTIONS, ) -from .tokens import actor_from_signed_api_token as actor_from_signed_api_token @hookimpl diff --git a/datasette/default_permissions/tokens.py b/datasette/default_permissions/tokens.py index 474b0c23..7a359dc6 100644 --- a/datasette/default_permissions/tokens.py +++ b/datasette/default_permissions/tokens.py @@ -1,44 +1,35 @@ """ Token authentication for Datasette. -Handles signed API tokens (dstok_ prefix). +Registers the default SignedTokenHandler and delegates token verification +to datasette.verify_token() so all registered handlers are tried. """ from __future__ import annotations -import time from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from datasette.app import Datasette -import itsdangerous - from datasette import hookimpl +from datasette.tokens import SignedTokenHandler + + +@hookimpl +def register_token_handler(datasette: "Datasette"): + """Register the default signed token handler.""" + return SignedTokenHandler() @hookimpl(specname="actor_from_request") -def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dict]: +async def actor_from_signed_api_token( + datasette: "Datasette", request +) -> Optional[dict]: """ - Authenticate requests using signed API tokens (dstok_ prefix). - - Token structure (signed JSON): - { - "a": "actor_id", # Actor ID - "t": 1234567890, # Timestamp (Unix epoch) - "d": 3600, # Optional: Duration in seconds - "_r": {...} # Optional: Restrictions - } + Authenticate requests using API tokens by delegating to all registered + token handlers via datasette.verify_token(). """ - prefix = "dstok_" - - # Check if tokens are enabled - if not datasette.setting("allow_signed_tokens"): - return None - - max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") - - # Get authorization header authorization = request.headers.get("authorization") if not authorization: return None @@ -46,50 +37,4 @@ def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dic return None token = authorization[len("Bearer ") :] - if not token.startswith(prefix): - return None - - # Remove prefix and verify signature - token = token[len(prefix) :] - try: - decoded = datasette.unsign(token, namespace="token") - except itsdangerous.BadSignature: - return None - - # Validate timestamp - if "t" not in decoded: - return None - created = decoded["t"] - if not isinstance(created, int): - return None - - # Handle duration/expiry - duration = decoded.get("d") - if duration is not None and not isinstance(duration, int): - return None - - # Apply max TTL if configured - if (duration is None and max_signed_tokens_ttl) or ( - duration is not None - and max_signed_tokens_ttl - and duration > max_signed_tokens_ttl - ): - duration = max_signed_tokens_ttl - - # Check expiry - if duration: - if time.time() - created > duration: - return None - - # Build actor dict - actor = {"id": decoded["a"], "token": "dstok"} - - # Copy restrictions if present - if "_r" in decoded: - actor["_r"] = decoded["_r"] - - # Add expiry timestamp if applicable - if duration: - actor["token_expires"] = created + duration - - return actor + return await datasette.verify_token(token) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 89be6a65..64901900 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -222,6 +222,11 @@ def top_canned_query(datasette, request, database, query_name): """HTML to include at the top of the canned query page""" +@hookspec +def register_token_handler(datasette): + """Return a TokenHandler instance for token creation and verification""" + + @hookspec def write_wrapper(datasette, database, request, transaction): """Called when a write function is about to execute. diff --git a/datasette/plugins.py b/datasette/plugins.py index e9818885..992137bd 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -23,6 +23,7 @@ DEFAULT_PLUGINS = ( "datasette.sql_functions", "datasette.actor_auth_cookie", "datasette.default_permissions", + "datasette.default_permissions.tokens", "datasette.default_actions", "datasette.default_magic_parameters", "datasette.blob_renderer", diff --git a/datasette/tokens.py b/datasette/tokens.py new file mode 100644 index 00000000..5a12d8e0 --- /dev/null +++ b/datasette/tokens.py @@ -0,0 +1,180 @@ +""" +Token handler system for Datasette. + +Provides a base class for token handlers and the default signed token handler. +Plugins can implement register_token_handler to provide custom token backends +(e.g. database-backed tokens that can be revoked and audited). +""" + +from __future__ import annotations + +import dataclasses +import time +from typing import TYPE_CHECKING, Optional + +import itsdangerous + +if TYPE_CHECKING: + from datasette.app import Datasette + + +@dataclasses.dataclass +class TokenRestrictions: + """ + Restrictions to apply to a token, limiting which actions it can perform. + + Use the builder methods to construct restrictions:: + + restrictions = (TokenRestrictions() + .allow_all("view-instance") + .allow_database("mydb", "create-table") + .allow_resource("mydb", "mytable", "insert-row")) + """ + + all: list[str] = dataclasses.field(default_factory=list) + database: dict[str, list[str]] = dataclasses.field(default_factory=dict) + resource: dict[str, dict[str, list[str]]] = dataclasses.field(default_factory=dict) + + def allow_all(self, action: str) -> "TokenRestrictions": + """Allow an action across all databases and resources.""" + self.all.append(action) + return self + + def allow_database(self, database: str, action: str) -> "TokenRestrictions": + """Allow an action on a specific database.""" + self.database.setdefault(database, []).append(action) + return self + + def allow_resource( + self, database: str, resource: str, action: str + ) -> "TokenRestrictions": + """Allow an action on a specific resource within a database.""" + self.resource.setdefault(database, {}).setdefault(resource, []).append(action) + return self + + +class TokenHandler: + """ + Base class for token handlers. + + Subclass this and implement create_token() and verify_token() to provide + a custom token backend. Return an instance from the register_token_handler hook. + """ + + name: str = "" + + async def create_token( + self, + datasette: "Datasette", + actor_id: str, + *, + expires_after: Optional[int] = None, + restrictions: Optional[TokenRestrictions] = None, + ) -> str: + """Create and return a token string for the given actor.""" + raise NotImplementedError + + async def verify_token(self, datasette: "Datasette", token: str) -> Optional[dict]: + """ + Verify a token and return an actor dict, or None if this handler + does not recognize the token. + """ + raise NotImplementedError + + +class SignedTokenHandler(TokenHandler): + """ + Default token handler using itsdangerous signed tokens (dstok_ prefix). + """ + + name = "signed" + + async def create_token( + self, + datasette: "Datasette", + actor_id: str, + *, + expires_after: Optional[int] = None, + restrictions: Optional[TokenRestrictions] = None, + ) -> str: + if not datasette.setting("allow_signed_tokens"): + raise ValueError( + "Signed tokens are not enabled for this Datasette instance" + ) + + token = {"a": actor_id, "t": int(time.time())} + + def abbreviate_action(action): + action_obj = datasette.actions.get(action) + if not action_obj: + return action + return action_obj.abbr or action + + if expires_after: + token["d"] = expires_after + if restrictions and ( + restrictions.all or restrictions.database or restrictions.resource + ): + token["_r"] = {} + if restrictions.all: + token["_r"]["a"] = [abbreviate_action(a) for a in restrictions.all] + if restrictions.database: + token["_r"]["d"] = {} + for database, actions in restrictions.database.items(): + token["_r"]["d"][database] = [abbreviate_action(a) for a in actions] + if restrictions.resource: + token["_r"]["r"] = {} + for database, resources in restrictions.resource.items(): + for resource, actions in resources.items(): + token["_r"]["r"].setdefault(database, {})[resource] = [ + abbreviate_action(a) for a in actions + ] + return "dstok_{}".format(datasette.sign(token, namespace="token")) + + async def verify_token(self, datasette: "Datasette", token: str) -> Optional[dict]: + prefix = "dstok_" + + if not datasette.setting("allow_signed_tokens"): + return None + + max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") + + if not token.startswith(prefix): + return None + + raw = token[len(prefix) :] + try: + decoded = datasette.unsign(raw, namespace="token") + except itsdangerous.BadSignature: + return None + + if "t" not in decoded: + return None + created = decoded["t"] + if not isinstance(created, int): + return None + + duration = decoded.get("d") + if duration is not None and not isinstance(duration, int): + return None + + if (duration is None and max_signed_tokens_ttl) or ( + duration is not None + and max_signed_tokens_ttl + and duration > max_signed_tokens_ttl + ): + duration = max_signed_tokens_ttl + + if duration: + if time.time() - created > duration: + return None + + actor = {"id": decoded["a"], "token": "dstok"} + + if "_r" in decoded: + actor["_r"] = decoded["_r"] + + if duration: + actor["token_expires"] = created + duration + + return actor diff --git a/datasette/views/special.py b/datasette/views/special.py index 640c82eb..dbe5eab1 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -710,42 +710,36 @@ class CreateTokenView(BaseView): errors.append("Invalid expire duration unit") # Are there any restrictions? - restrict_all = [] - restrict_database = {} - restrict_resource = {} + from datasette.tokens import TokenRestrictions + + restrictions = TokenRestrictions() for key in form: if key.startswith("all:") and key.count(":") == 1: - restrict_all.append(key.split(":")[1]) + restrictions.allow_all(key.split(":")[1]) elif key.startswith("database:") and key.count(":") == 2: bits = key.split(":") - database = tilde_decode(bits[1]) - action = bits[2] - restrict_database.setdefault(database, []).append(action) + restrictions.allow_database(tilde_decode(bits[1]), bits[2]) elif key.startswith("resource:") and key.count(":") == 3: bits = key.split(":") - database = tilde_decode(bits[1]) - resource = tilde_decode(bits[2]) - action = bits[3] - restrict_resource.setdefault(database, {}).setdefault( - resource, [] - ).append(action) + restrictions.allow_resource( + tilde_decode(bits[1]), tilde_decode(bits[2]), bits[3] + ) - token = self.ds.create_token( + token = await self.ds.create_token( request.actor["id"], expires_after=expires_after, - restrict_all=restrict_all, - restrict_database=restrict_database, - restrict_resource=restrict_resource, + restrictions=restrictions, + handler="signed", ) token_bits = self.ds.unsign(token[len("dstok_") :], namespace="token") await self.ds.track_event( CreateTokenEvent( actor=request.actor, expires_after=expires_after, - restrict_all=restrict_all, - restrict_database=restrict_database, - restrict_resource=restrict_resource, + restrict_all=restrictions.all, + restrict_database=restrictions.database, + restrict_resource=restrictions.resource, ) ) context = await self.shared(request) diff --git a/docs/authentication.rst b/docs/authentication.rst index 69a6f606..1b949f9a 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1072,6 +1072,7 @@ cannot grant new access. If the underlying actor is denied by ``allow`` rules in ``datasette.yaml`` or by a plugin, a token that lists that resource in its ``"_r"`` section will still be denied. +To create tokens with restrictions in Python code, use the :ref:`TokenRestrictions ` builder and pass it to :ref:`datasette.create_token() `. .. _permissions_plugins: diff --git a/docs/internals.rst b/docs/internals.rst index 0491c1f7..7d607bfe 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -673,8 +673,8 @@ This example checks if the user can access a specific table, and sets ``private` .. _datasette_create_token: -.create_token(actor_id, expires_after=None, restrict_all=None, restrict_database=None, restrict_resource=None) --------------------------------------------------------------------------------------------------------------- +await .create_token(actor_id, expires_after=None, restrictions=None, handler=None) +---------------------------------------------------------------------------------- ``actor_id`` - string The ID of the actor to create a token for. @@ -682,16 +682,13 @@ This example checks if the user can access a specific table, and sets ``private` ``expires_after`` - int, optional The number of seconds after which the token should expire. -``restrict_all`` - iterable, optional - A list of actions that this token should be restricted to across all databases and resources. +``restrictions`` - :ref:`TokenRestrictions `, optional + A :ref:`TokenRestrictions ` object limiting which actions the token can perform. -``restrict_database`` - dict, optional - For restricting actions within specific databases, e.g. ``{"mydb": ["view-table", "view-query"]}``. +``handler`` - string, optional + The name of a specific token handler to use. If omitted, the first registered handler is used. See :ref:`plugin_hook_register_token_handler`. -``restrict_resource`` - dict, optional - For restricting actions to specific resources (tables, SQL views and :ref:`canned_queries`) within a database. For example: ``{"mydb": {"mytable": ["insert-row", "update-row"]}}``. - -This method returns a signed :ref:`API token ` of the format ``dstok_...`` which can be used to authenticate requests to the Datasette API. +This is an ``async`` method that returns an :ref:`API token ` string which can be used to authenticate requests to the Datasette API. The default ``SignedTokenHandler`` returns tokens of the format ``dstok_...``. All tokens must have an ``actor_id`` string indicating the ID of the actor which the token will act on behalf of. @@ -699,28 +696,72 @@ Tokens default to lasting forever, but can be set to expire after a given number .. code-block:: python - token = datasette.create_token( + token = await datasette.create_token( actor_id="user1", expires_after=3600, ) -The three ``restrict_*`` arguments can be used to create a token that has additional restrictions beyond what the associated actor is allowed to do. +.. _TokenRestrictions: + +TokenRestrictions +~~~~~~~~~~~~~~~~~ + +The ``TokenRestrictions`` class uses a builder pattern to specify which actions a token is allowed to perform. Import it from ``datasette.tokens``: + +.. code-block:: python + + from datasette.tokens import TokenRestrictions + + restrictions = ( + TokenRestrictions() + .allow_all("view-instance") + .allow_all("view-table") + .allow_database("docs", "view-query") + .allow_resource("docs", "attachments", "insert-row") + .allow_resource("docs", "attachments", "update-row") + ) + +The builder methods are: + +- ``allow_all(action)`` - allow an action across all databases and resources +- ``allow_database(database, action)`` - allow an action on a specific database +- ``allow_resource(database, resource, action)`` - allow an action on a specific resource (table, SQL view or :ref:`canned query `) within a database + +Each method returns the ``TokenRestrictions`` instance so calls can be chained. The following example creates a token that can access ``view-instance`` and ``view-table`` across everything, can additionally use ``view-query`` for anything in the ``docs`` database and is allowed to execute ``insert-row`` and ``update-row`` in the ``attachments`` table in that database: .. code-block:: python - token = datasette.create_token( + token = await datasette.create_token( actor_id="user1", - restrict_all=("view-instance", "view-table"), - restrict_database={"docs": ("view-query",)}, - restrict_resource={ - "docs": { - "attachments": ("insert-row", "update-row") - } - }, + restrictions=( + TokenRestrictions() + .allow_all("view-instance") + .allow_all("view-table") + .allow_database("docs", "view-query") + .allow_resource("docs", "attachments", "insert-row") + .allow_resource("docs", "attachments", "update-row") + ), ) +.. _datasette_verify_token: + +await .verify_token(token) +-------------------------- + +``token`` - string + The token string to verify. + +This is an ``async`` method that verifies an API token by trying each registered token handler in order. Returns an actor dictionary from the first handler that recognizes the token, or ``None`` if no handler accepts it. + +.. code-block:: python + + actor = await datasette.verify_token(token) + if actor: + # Token was valid + print(actor["id"]) + .. _datasette_get_database: .get_database(name) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index fa335368..b9701f7c 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -2334,3 +2334,62 @@ The plugin can then call ``datasette.track_event(...)`` to send a ``ban-user`` e BanUserEvent(user={"id": 1, "username": "cleverbot"}) ) +.. _plugin_hook_register_token_handler: + +register_token_handler(datasette) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``datasette`` - :ref:`internals_datasette` + You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``. + +Return a ``TokenHandler`` instance to provide a custom token creation and verification backend. This hook can return a single ``TokenHandler`` or a list of them. + +The default ``SignedTokenHandler`` uses itsdangerous signed tokens (``dstok_`` prefix). Plugins can provide alternative backends such as database-backed tokens that support revocation and auditing. + +.. code-block:: python + + from datasette import hookimpl, TokenHandler + + + class DatabaseTokenHandler(TokenHandler): + name = "database" + + async def create_token( + self, + datasette, + actor_id, + *, + expires_after=None, + restrictions=None + ): + # Store token in database and return token string + ... + + async def verify_token(self, datasette, token): + # Look up token in database, return actor dict or None + ... + + + @hookimpl + def register_token_handler(datasette): + return DatabaseTokenHandler() + +The ``create_token`` method receives a ``restrictions`` argument which will be a :ref:`TokenRestrictions ` instance or ``None``. + +Tokens can then be created and verified using :ref:`datasette.create_token() ` and ``datasette.verify_token()``, which delegate to the registered handlers. If no ``handler`` is specified, the first handler is used according to `pluggy call-time ordering `_. Use the ``handler`` parameter to select a specific backend by name: + +.. code-block:: python + + # Uses first registered handler (default) + token = await datasette.create_token("user123") + + # Uses a specific handler by name + token = await datasette.create_token( + "user123", handler="database" + ) + + # Verification tries all handlers + actor = await datasette.verify_token(token) + +If no handlers are registered, ``create_token()`` raises ``RuntimeError``. If the requested ``handler`` name is not found, it raises ``ValueError``. + diff --git a/docs/plugins.rst b/docs/plugins.rst index d5a98923..60bdc111 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -231,12 +231,21 @@ If you run ``datasette plugins --all`` it will include default plugins that ship "templates": false, "version": null, "hooks": [ - "actor_from_request", "canned_queries", "permission_resources_sql", "skip_csrf" ] }, + { + "name": "datasette.default_permissions.tokens", + "static": false, + "templates": false, + "version": null, + "hooks": [ + "actor_from_request", + "register_token_handler" + ] + }, { "name": "datasette.events", "static": false, diff --git a/docs/upgrade_guide.md b/docs/upgrade_guide.md index a3c321a4..861a8795 100644 --- a/docs/upgrade_guide.md +++ b/docs/upgrade_guide.md @@ -114,3 +114,43 @@ Instead, one should use the following methods on a Datasette class: ```{include} upgrade-1.0a20.md :heading-offset: 1 ``` + +(upgrade_guide_v1_a25)= +### Datasette 1.0a25: `create_token()` signature change + +`datasette.create_token()` is now an `async` method (previously it was synchronous). The `restrict_all`, `restrict_database`, and `restrict_resource` keyword arguments have been replaced by a single `restrictions` parameter that accepts a {ref}`TokenRestrictions ` object. + +Old code: + +```python +token = datasette.create_token( + actor_id="user1", + restrict_all=["view-instance", "view-table"], + restrict_database={"docs": ["view-query"]}, + restrict_resource={ + "docs": { + "attachments": ["insert-row", "update-row"] + } + }, +) +``` + +New code: + +```python +from datasette.tokens import TokenRestrictions + +token = await datasette.create_token( + actor_id="user1", + restrictions=( + TokenRestrictions() + .allow_all("view-instance") + .allow_all("view-table") + .allow_database("docs", "view-query") + .allow_resource("docs", "attachments", "insert-row") + .allow_resource("docs", "attachments", "update-row") + ), +) +``` + +The `datasette create-token` CLI command is unchanged. diff --git a/tests/fixtures.py b/tests/fixtures.py index 9f99519a..1f6c491d 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -51,6 +51,7 @@ EXPECTED_PLUGINS = [ "register_facet_classes", "register_magic_parameters", "register_routes", + "register_token_handler", "render_cell", "row_actions", "skip_csrf", diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 20e7d111..77079557 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -1,6 +1,7 @@ import asyncio from datasette import hookimpl from datasette.facets import Facet +from datasette.tokens import TokenHandler from datasette import tracer from datasette.permissions import Action from datasette.resources import DatabaseResource @@ -586,3 +587,29 @@ def permission_resources_sql(datasette, actor, action): return PermissionSQL.allow(reason=f"todomvc actor allowed for {action}") return None + + +class HardcodedTokenHandler(TokenHandler): + name = "hardcoded" + _counter = 0 + + async def create_token( + self, + datasette, + actor_id, + *, + expires_after=None, + restrictions=None, + ): + HardcodedTokenHandler._counter += 1 + return f"dstok_hardcoded_token_{HardcodedTokenHandler._counter}" + + async def verify_token(self, datasette, token): + if token.startswith("dstok_hardcoded_token_"): + return {"id": "hardcoded-actor", "token": "hardcoded"} + return None + + +@hookimpl +def register_token_handler(datasette): + return HardcodedTokenHandler() diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 05835e51..e59c4295 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -1362,7 +1362,14 @@ async def test_create_table( async def test_create_table_permissions( ds_write, permissions, body, expected_status, expected_errors ): - token = ds_write.create_token("root", restrict_all=["view-instance"] + permissions) + from datasette.tokens import TokenRestrictions + + restrictions = TokenRestrictions() + for action in ["view-instance"] + permissions: + restrictions.allow_all(action) + token = await ds_write.create_token( + "root", handler="signed", restrictions=restrictions + ) response = await ds_write.client.post( "/data/-/create", json=body, diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 96c0cf6f..42a19ca4 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -1657,7 +1657,7 @@ async def test_permission_check_view_requires_debug_permission(): # Root user should have access (root has all permissions) ds_with_root = Datasette() ds_with_root.root_enabled = True - root_token = ds_with_root.create_token("root") + root_token = await ds_with_root.create_token("root", handler="signed") response = await ds_with_root.client.get( "/-/check.json?action=view-instance", headers={"Authorization": f"Bearer {root_token}"}, diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 754b199c..fa9d1a1f 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1566,6 +1566,38 @@ async def test_hook_register_events(): assert any(k.__name__ == "OneEvent" for k in datasette.event_classes) +@pytest.mark.asyncio +async def test_hook_register_token_handler(ds_client): + handlers = ds_client.ds._token_handlers() + handler_names = [h.name for h in handlers] + # Both the default signed handler and the test hardcoded handler + assert "signed" in handler_names + assert "hardcoded" in handler_names + + # Create a token using the hardcoded handler (first registered from plugins dir) + token = await ds_client.ds.create_token("test-user") + assert token.startswith("dstok_hardcoded_token_") + + # Verify it + actor = await ds_client.ds.verify_token(token) + assert actor["id"] == "hardcoded-actor" + assert actor["token"] == "hardcoded" + + # Create a token by explicitly requesting the hardcoded handler by name + token2 = await ds_client.ds.create_token("test-user", handler="hardcoded") + assert token2.startswith("dstok_hardcoded_token_") + actor2 = await ds_client.ds.verify_token(token2) + assert actor2["id"] == "hardcoded-actor" + + # Create a token by explicitly requesting the signed handler by name + signed_token = await ds_client.ds.create_token("test-user", handler="signed") + assert signed_token.startswith("dstok_") + assert not signed_token.startswith("dstok_hardcoded_token_") + signed_actor = await ds_client.ds.verify_token(signed_token) + assert signed_actor["id"] == "test-user" + assert signed_actor["token"] == "dstok" + + @pytest.mark.asyncio async def test_hook_write_wrapper(): datasette = Datasette(memory=True) diff --git a/tests/test_token_handler.py b/tests/test_token_handler.py new file mode 100644 index 00000000..83f09046 --- /dev/null +++ b/tests/test_token_handler.py @@ -0,0 +1,301 @@ +""" +Tests for the register_token_handler plugin hook. +""" + +from datasette.app import Datasette +from datasette.hookspecs import hookimpl +from datasette.plugins import pm +from datasette.tokens import TokenHandler, TokenRestrictions, SignedTokenHandler +import pytest + + +@pytest.fixture +def datasette(): + return Datasette() + + +@pytest.mark.asyncio +async def test_default_signed_handler_registered(datasette): + """The default SignedTokenHandler should be registered automatically.""" + handlers = datasette._token_handlers() + assert len(handlers) >= 1 + assert any(isinstance(h, SignedTokenHandler) for h in handlers) + assert any(h.name == "signed" for h in handlers) + + +@pytest.mark.asyncio +async def test_create_token_default(datasette): + """create_token() with handler='signed' should create a signed token.""" + token = await datasette.create_token("test_actor", handler="signed") + assert token.startswith("dstok_") + + +@pytest.mark.asyncio +async def test_create_token_with_restrictions(datasette): + """create_token() should handle restriction parameters.""" + token = await datasette.create_token( + "test_actor", + handler="signed", + expires_after=3600, + restrictions=TokenRestrictions().allow_all("view-instance"), + ) + assert token.startswith("dstok_") + # Verify the token contains the expected data + decoded = datasette.unsign(token[len("dstok_") :], namespace="token") + assert decoded["a"] == "test_actor" + assert decoded["d"] == 3600 + assert "_r" in decoded + assert "a" in decoded["_r"] + + +@pytest.mark.asyncio +async def test_verify_token_default(datasette): + """verify_token() should verify signed tokens.""" + token = await datasette.create_token("test_actor", handler="signed") + actor = await datasette.verify_token(token) + assert actor is not None + assert actor["id"] == "test_actor" + assert actor["token"] == "dstok" + + +@pytest.mark.asyncio +async def test_verify_token_unknown_returns_none(datasette): + """verify_token() should return None for unrecognized tokens.""" + result = await datasette.verify_token("unknown_token_format_xyz") + assert result is None + + +@pytest.mark.asyncio +async def test_verify_token_bad_signature_returns_none(datasette): + """verify_token() should return None for tokens with bad signatures.""" + result = await datasette.verify_token("dstok_tampered_data_here") + assert result is None + + +@pytest.mark.asyncio +async def test_create_token_with_named_handler(datasette): + """create_token(handler='signed') should select the signed handler.""" + token = await datasette.create_token("test_actor", handler="signed") + assert token.startswith("dstok_") + + +@pytest.mark.asyncio +async def test_create_token_unknown_handler_raises(datasette): + """create_token(handler='nonexistent') should raise ValueError.""" + with pytest.raises(ValueError, match="Token handler 'nonexistent' not found"): + await datasette.create_token("test_actor", handler="nonexistent") + + +@pytest.mark.asyncio +async def test_custom_token_handler(datasette): + """A custom token handler should be usable for both create and verify.""" + + class CustomHandler(TokenHandler): + name = "custom" + + async def create_token(self, datasette, actor_id, **kwargs): + return f"custom_{actor_id}" + + async def verify_token(self, datasette, token): + if token.startswith("custom_"): + return {"id": token[len("custom_") :], "token": "custom"} + return None + + class Plugin: + __name__ = "CustomTokenPlugin" + + @staticmethod + @hookimpl + def register_token_handler(datasette): + return CustomHandler() + + pm.register(Plugin(), name="test_custom_handler") + try: + handlers = datasette._token_handlers() + assert any(h.name == "custom" for h in handlers) + + # Create with custom handler + token = await datasette.create_token("alice", handler="custom") + assert token == "custom_alice" + + # Verify custom token + actor = await datasette.verify_token("custom_alice") + assert actor is not None + assert actor["id"] == "alice" + assert actor["token"] == "custom" + + # Signed tokens should still work + signed_token = await datasette.create_token("bob", handler="signed") + assert signed_token.startswith("dstok_") + actor = await datasette.verify_token(signed_token) + assert actor["id"] == "bob" + finally: + pm.unregister(name="test_custom_handler") + + +@pytest.mark.asyncio +async def test_verify_token_tries_all_handlers(datasette): + """verify_token() should try each handler until one matches.""" + + class HandlerA(TokenHandler): + name = "handler_a" + + async def create_token(self, datasette, actor_id, **kwargs): + return f"a_{actor_id}" + + async def verify_token(self, datasette, token): + if token.startswith("a_"): + return {"id": token[2:], "token": "handler_a"} + return None + + class HandlerB(TokenHandler): + name = "handler_b" + + async def create_token(self, datasette, actor_id, **kwargs): + return f"b_{actor_id}" + + async def verify_token(self, datasette, token): + if token.startswith("b_"): + return {"id": token[2:], "token": "handler_b"} + return None + + class PluginA: + __name__ = "PluginA" + + @staticmethod + @hookimpl + def register_token_handler(datasette): + return HandlerA() + + class PluginB: + __name__ = "PluginB" + + @staticmethod + @hookimpl + def register_token_handler(datasette): + return HandlerB() + + pm.register(PluginA(), name="test_handler_a") + pm.register(PluginB(), name="test_handler_b") + try: + # Both handler tokens should verify + actor_a = await datasette.verify_token("a_alice") + assert actor_a is not None + assert actor_a["id"] == "alice" + assert actor_a["token"] == "handler_a" + + actor_b = await datasette.verify_token("b_bob") + assert actor_b is not None + assert actor_b["id"] == "bob" + assert actor_b["token"] == "handler_b" + + # Unknown token should return None + assert await datasette.verify_token("c_charlie") is None + finally: + pm.unregister(name="test_handler_a") + pm.unregister(name="test_handler_b") + + +@pytest.mark.asyncio +async def test_token_handler_via_http(datasette): + """Default signed tokens should work through HTTP auth.""" + token = await datasette.create_token("http_user", handler="signed") + response = await datasette.client.get( + "/-/actor.json", + headers={"Authorization": f"Bearer {token}"}, + ) + assert response.status_code == 200 + actor = response.json()["actor"] + assert actor["id"] == "http_user" + assert actor["token"] == "dstok" + + +@pytest.mark.asyncio +async def test_custom_handler_via_http(datasette): + """Custom handler tokens should work through HTTP auth.""" + + class CustomHandler(TokenHandler): + name = "custom_http" + + async def create_token(self, datasette, actor_id, **kwargs): + return f"chttp_{actor_id}" + + async def verify_token(self, datasette, token): + if token.startswith("chttp_"): + return {"id": token[len("chttp_") :], "token": "custom_http"} + return None + + class Plugin: + __name__ = "CustomHTTPPlugin" + + @staticmethod + @hookimpl + def register_token_handler(datasette): + return CustomHandler() + + pm.register(Plugin(), name="test_custom_http") + try: + token = await datasette.create_token("web_user", handler="custom_http") + response = await datasette.client.get( + "/-/actor.json", + headers={"Authorization": f"Bearer {token}"}, + ) + assert response.status_code == 200 + actor = response.json()["actor"] + assert actor["id"] == "web_user" + assert actor["token"] == "custom_http" + finally: + pm.unregister(name="test_custom_http") + + +@pytest.mark.asyncio +async def test_token_handler_base_class_raises(): + """TokenHandler base class methods should raise NotImplementedError.""" + handler = TokenHandler() + ds = Datasette() + with pytest.raises(NotImplementedError): + await handler.create_token(ds, "test") + with pytest.raises(NotImplementedError): + await handler.verify_token(ds, "test") + + +@pytest.mark.asyncio +async def test_restrictions_round_trip(datasette): + """Tokens with database/resource restrictions should round-trip correctly.""" + restrictions = ( + TokenRestrictions() + .allow_all("view-instance") + .allow_database("docs", "view-query") + .allow_resource("docs", "attachments", "insert-row") + ) + token = await datasette.create_token( + "test_actor", handler="signed", restrictions=restrictions + ) + actor = await datasette.verify_token(token) + assert actor is not None + assert actor["id"] == "test_actor" + assert actor["_r"]["a"] == ["view-instance"] + assert actor["_r"]["d"] == {"docs": ["view-query"]} + assert actor["_r"]["r"] == {"docs": {"attachments": ["insert-row"]}} + + +@pytest.mark.asyncio +async def test_expires_after_round_trip(datasette): + """Tokens with expires_after should include token_expires in the actor.""" + token = await datasette.create_token( + "test_actor", handler="signed", expires_after=3600 + ) + actor = await datasette.verify_token(token) + assert actor is not None + assert actor["id"] == "test_actor" + assert "token_expires" in actor + + +@pytest.mark.asyncio +async def test_signed_tokens_disabled(): + """create_token and verify_token should fail/skip when signed tokens are disabled.""" + ds = Datasette(settings={"allow_signed_tokens": False}) + with pytest.raises(ValueError, match="Signed tokens are not enabled"): + await ds.create_token("test_actor", handler="signed") + # verify_token should return None rather than raising + assert await ds.verify_token("dstok_anything") is None From 24d801b7f799912cb4eb897a97e4f4a9fe76b966 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 16:33:27 -0800 Subject: [PATCH 45/53] Respect metadata-defined facet ordering in sorted_facet_results (#2648) * Preserve metadata-defined facet ordering on table pages When facets are explicitly defined in table metadata/config, they now appear in the order specified in the configuration rather than being sorted by result count. Request-added facets still appear after metadata-defined facets, sorted by count as before. * Document metadata-defined facet ordering behavior * Apply black formatting https://claude.ai/code/session_01PbSHtjsUpNk3Fx7xjvVqDb --- datasette/views/table.py | 34 ++++++++++++++++++++++++++----- docs/facets.rst | 2 ++ tests/test_facets.py | 44 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 71 insertions(+), 9 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 594e925e..e1e5507f 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1580,11 +1580,35 @@ async def table_view_data( ] async def extra_sorted_facet_results(extra_facet_results): - return sorted( - extra_facet_results["results"].values(), - key=lambda f: (len(f["results"]), f["name"]), - reverse=True, - ) + facet_configs = table_metadata.get("facets", []) + if facet_configs: + # Build ordered list of facet names from metadata config + metadata_facet_names = [] + for fc in facet_configs: + if isinstance(fc, str): + metadata_facet_names.append(fc) + elif isinstance(fc, dict): + metadata_facet_names.append(list(fc.values())[0]) + metadata_order = {name: i for i, name in enumerate(metadata_facet_names)} + metadata_facets = [] + request_facets = [] + for f in extra_facet_results["results"].values(): + if f["name"] in metadata_order: + metadata_facets.append(f) + else: + request_facets.append(f) + metadata_facets.sort(key=lambda f: metadata_order[f["name"]]) + request_facets.sort( + key=lambda f: (len(f["results"]), f["name"]), + reverse=True, + ) + return metadata_facets + request_facets + else: + return sorted( + extra_facet_results["results"].values(), + key=lambda f: (len(f["results"]), f["name"]), + reverse=True, + ) async def extra_table_definition(): return await db.get_table_definition(table_name) diff --git a/docs/facets.rst b/docs/facets.rst index 15fe7227..2a135b69 100644 --- a/docs/facets.rst +++ b/docs/facets.rst @@ -153,6 +153,8 @@ Here's an example that turns on faceting by default for the ``qLegalStatus`` col Facets defined in this way will always be shown in the interface and returned in the API, regardless of the ``_facet`` arguments passed to the view. +Facets defined in metadata will be displayed in the order they are listed in the configuration. Any additional facets added via query string parameters (e.g. ``?_facet=column_name``) will appear after the metadata-defined facets, sorted by the number of unique values. + You can specify :ref:`array ` or :ref:`date ` facets in metadata using JSON objects with a single key of ``array`` or ``date`` and a value specifying the column, like this: .. [[[cog diff --git a/tests/test_facets.py b/tests/test_facets.py index a2b505ec..8c22ffce 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -623,12 +623,48 @@ def test_other_types_of_facet_in_metadata(): } ) as client: response = client.get("/fixtures/facetable") - for fragment in ( - "created (date)\n", - "tags (array)\n", + fragments = ( "state\n", - ): + "tags (array)\n", + "created (date)\n", + ) + for fragment in fragments: assert fragment in response.text + # Verify they appear in the metadata-defined order + positions = [response.text.index(f) for f in fragments] + assert positions == sorted( + positions + ), "Facets should appear in metadata-defined order" + + +def test_metadata_facet_ordering(): + with make_app_client( + metadata={ + "databases": { + "fixtures": { + "tables": { + "facetable": { + "facets": ["state", {"array": "tags"}, {"date": "created"}] + } + } + } + } + } + ) as client: + # JSON response should have facets in the metadata-defined order + response = client.get("/fixtures/facetable.json?_extra=sorted_facet_results") + data = response.json + facet_names = [f["name"] for f in data["sorted_facet_results"]] + assert facet_names == ["state", "tags", "created"] + + # With an additional request-based facet, metadata facets come first + # in their defined order, followed by request-based facets + response2 = client.get( + "/fixtures/facetable.json?_extra=sorted_facet_results&_facet=_city_id" + ) + data2 = response2.json + facet_names2 = [f["name"] for f in data2["sorted_facet_results"]] + assert facet_names2 == ["state", "tags", "created", "_city_id"] @pytest.mark.asyncio From 2bc1dd2275978e75622c5764729a4273ebac957e Mon Sep 17 00:00:00 2001 From: Daniel Bates Date: Wed, 25 Feb 2026 16:46:29 -0800 Subject: [PATCH 46/53] Fix --reload interpreting 'serve' command as a file argument (#2646) When hupper spawns the worker process, it calls the function specified by worker_path directly. Using "datasette.cli.serve" causes Click to parse sys.argv without going through the CLI group, so the literal word "serve" from the original command gets treated as a positional file argument. Change the worker path to "datasette.cli.cli" so the worker process goes through the Click group dispatcher, which properly recognizes "serve" as a subcommand and strips it from the argument list. Closes #2123 Co-authored-by: Claude Opus 4.6 Co-authored-by: Simon Willison --- datasette/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/cli.py b/datasette/cli.py index b473fbb7..db777fe8 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -547,7 +547,7 @@ def serve( if reload: import hupper - reloader = hupper.start_reloader("datasette.cli.serve") + reloader = hupper.start_reloader("datasette.cli.cli") if immutable: reloader.watch_files(immutable) if config: From 1246c6576bb2f1ba9dc5c7d9811427d00d440976 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 16:49:14 -0800 Subject: [PATCH 47/53] Release 1.0a25 Refs #2636, #2641, #2646, #2647, #2650 --- docs/changelog.rst | 41 +++++++++++++++++++++++++++++++++++++++++ docs/contributing.rst | 1 + docs/upgrade-1.0a20.md | 1 - docs/upgrade_guide.md | 1 + 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 67ceeece..c0467793 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,47 @@ Changelog ========= +.. _v1_0_a25: + +1.0a25 (2026-02-25) +------------------- + +``write_wrapper`` plugin hook for intercepting write operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A new :ref:`write_wrapper() ` plugin hook allows plugins to intercept and wrap database write operations. (`#2636 `__) + +Plugins implement the hook as a generator-based context manager: + +.. code-block:: python + + @hookimpl + def write_wrapper(datasette, database, request): + def wrapper(conn): + # Setup code runs before the write + yield + # Cleanup code runs after the write + + return wrapper + +``register_token_handler()`` plugin hook for custom API token backends +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A new :ref:`register_token_handler() ` plugin hook allows plugins to provide custom token backends for API authentication. (`#2650 `__) + +This includes a **backwards incompatible change**: the ``datasette.create_token()`` internal method is now an ``async`` method. Consult the :ref:`upgrade guide ` for details on how to update your code. + +``render_cell()`` now receives a ``pks`` parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :ref:`render_cell() ` plugin hook now receives a ``pks`` parameter containing the list of primary key column names for the table being rendered. This avoids plugins needing to make redundant async calls to look up primary keys. (`#2641 `__) + +Other changes +~~~~~~~~~~~~~ + +- Facets defined in metadata now preserve their configured order, instead of being sorted by result count. Request-based facets added via the ``_facet`` parameter are still sorted by result count and appear after metadata-defined facets. (:issue:`2647`) +- Fixed ``--reload`` incorrectly interpreting the ``serve`` command as a file argument. Thanks, `Daniel Bates `__. (`#2646 `__) + .. _v1_0_a24: 1.0a24 (2026-01-29) diff --git a/docs/contributing.rst b/docs/contributing.rst index 3d41a125..635ca60e 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -90,6 +90,7 @@ If you want to change Datasette's Python code you can use the ``--reload`` optio You can also use the ``fixtures.py`` script to recreate the testing version of ``metadata.json`` used by the unit tests. To do that:: uv run python tests/fixtures.py fixtures.db fixtures-metadata.json + Or to output the plugins used by the tests, run this:: uv run python tests/fixtures.py fixtures.db fixtures-metadata.json fixtures-plugins diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index 749d383c..fbc3f4a8 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -2,7 +2,6 @@ orphan: true --- -(upgrade_guide_v1_a20)= # Datasette 1.0a20 plugin upgrade guide Datasette 1.0a20 makes some breaking changes to Datasette's permission system. Plugins need to be updated if they use **any of the following**: diff --git a/docs/upgrade_guide.md b/docs/upgrade_guide.md index 861a8795..b67eb054 100644 --- a/docs/upgrade_guide.md +++ b/docs/upgrade_guide.md @@ -111,6 +111,7 @@ Instead, one should use the following methods on a Datasette class: - {ref}`get_resource_metadata() ` - {ref}`get_column_metadata() ` +(upgrade_guide_v1_a20)= ```{include} upgrade-1.0a20.md :heading-offset: 1 ``` From e4ff5e27d356ca5b3c807e821acedf8c71c37e47 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 16:54:51 -0800 Subject: [PATCH 48/53] Fix RST heading underlin --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index c0467793..1e6a8e90 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -35,7 +35,7 @@ A new :ref:`register_token_handler() ` plugi This includes a **backwards incompatible change**: the ``datasette.create_token()`` internal method is now an ``async`` method. Consult the :ref:`upgrade guide ` for details on how to update your code. ``render_cell()`` now receives a ``pks`` parameter -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The :ref:`render_cell() ` plugin hook now receives a ``pks`` parameter containing the list of primary key column names for the table being rendered. This avoids plugins needing to make redundant async calls to look up primary keys. (`#2641 `__) From 8f0d60236f844a6d12bd1439f57b1b3d65fcad36 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 17:01:03 -0800 Subject: [PATCH 49/53] Bump version for 1.0a25 --- datasette/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index de7585ca..2907e537 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a24" +__version__ = "1.0a25" __version_info__ = tuple(__version__.split(".")) From 1263380ea6b138ac63683edfd525323c6fe8eef9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 20:50:46 -0800 Subject: [PATCH 50/53] Better heading for write_wrapper() --- docs/changelog.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 1e6a8e90..2c9b7170 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,8 +9,8 @@ Changelog 1.0a25 (2026-02-25) ------------------- -``write_wrapper`` plugin hook for intercepting write operations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``write_wrapper()`` plugin hook for intercepting write operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A new :ref:`write_wrapper() ` plugin hook allows plugins to intercept and wrap database write operations. (`#2636 `__) From 97201f067c4f64b00ccf7e02f787d65c767f9bc9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 6 Mar 2026 20:16:50 -0800 Subject: [PATCH 51/53] Row pages link to foreign keys from table display, closes #1592 https://gisthost.github.io/?40813f5b3e4d83c0efe1c09135f84290/index.html Also now shows primary key column first and in bold on that page. --- datasette/views/row.py | 64 ++++++++++++++++++++++++++++++++++++++++-- tests/test_html.py | 32 +++++++++++++++++---- 2 files changed, 88 insertions(+), 8 deletions(-) diff --git a/datasette/views/row.py b/datasette/views/row.py index 9c59cd3b..7cc46368 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -5,12 +5,14 @@ from datasette.resources import TableResource from .base import DataView, BaseView, _error from datasette.utils import ( await_me_maybe, + CustomRow, make_slot_function, to_css_class, escape_sqlite, ) from datasette.plugins import pm import json +import markupsafe import sqlite_utils from .table import display_columns_and_rows, _get_extras @@ -42,13 +44,62 @@ class RowView(DataView): if not rows: raise NotFound(f"Record not found: {pk_values}") + pks = resolved.pks + async def template_data(): + # Reorder columns so primary keys come first + pk_set = set(pks) + pk_cols = [d for d in results.description if d[0] in pk_set] + non_pk_cols = [d for d in results.description if d[0] not in pk_set] + reordered_description = pk_cols + non_pk_cols + reordered_columns = [d[0] for d in reordered_description] + + # Reorder row data to match + reordered_rows = [] + for row in rows: + new_row = CustomRow(reordered_columns) + for col in reordered_columns: + new_row[col] = row[col] + reordered_rows.append(new_row) + + # Expand foreign key columns into dicts so display_columns_and_rows + # renders them as hyperlinks, matching the table view behavior + expanded_rows = reordered_rows + for fk in await db.foreign_keys_for_table(table): + column = fk["column"] + if column not in reordered_columns: + continue + column_index = reordered_columns.index(column) + values = [row[column_index] for row in expanded_rows] + expanded_labels = await self.ds.expand_foreign_keys( + request.actor, database, table, column, values + ) + if expanded_labels: + new_rows = [] + for row in expanded_rows: + new_row = CustomRow(reordered_columns) + for col in reordered_columns: + value = row[col] + if ( + col == column + and (col, value) in expanded_labels + and value is not None + ): + new_row[col] = { + "value": value, + "label": expanded_labels[(col, value)], + } + else: + new_row[col] = value + new_rows.append(new_row) + expanded_rows = new_rows + display_columns, display_rows = await display_columns_and_rows( self.ds, database, table, - results.description, - rows, + reordered_description, + expanded_rows, link_column=False, truncate_cells=0, request=request, @@ -56,6 +107,14 @@ class RowView(DataView): for column in display_columns: column["sortable"] = False + # Bold primary key cell values + for row in display_rows: + for cell in row: + if cell["column"] in pk_set: + cell["value"] = markupsafe.Markup( + "{}".format(cell["value"]) + ) + row_actions = [] for hook in pm.hook.row_actions( datasette=self.ds, @@ -71,6 +130,7 @@ class RowView(DataView): return { "private": private, + "columns": reordered_columns, "foreign_key_tables": await self.foreign_key_tables( database, table, pk_values ), diff --git a/tests/test_html.py b/tests/test_html.py index 757f3e6e..64ae7b2d 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -347,7 +347,7 @@ async def test_row_html_simple_primary_key(ds_client): assert ["id", "content"] == [th.string.strip() for th in table.select("thead th")] assert [ [ - '1', + '1', 'hello', ] ] == [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] @@ -363,7 +363,7 @@ async def test_row_html_no_primary_key(ds_client): ] expected = [ [ - '1', + '1', '1', 'a1', 'b1', @@ -406,6 +406,26 @@ async def test_row_links_from_other_tables( assert link == expected_link +@pytest.mark.asyncio +async def test_row_foreign_key_links(ds_client): + # Row detail page should render foreign key values as hyperlinks + response = await ds_client.get("/fixtures/foreign_key_references/1") + assert response.status_code == 200 + soup = Soup(response.text, "html.parser") + # foreign_key_with_label=1 references simple_primary_key(id=1, content="hello") + td = soup.find("td", {"class": "col-foreign_key_with_label"}) + a = td.find("a") + assert a is not None, "Expected foreign key value to be a hyperlink" + assert a["href"] == "/fixtures/simple_primary_key/1" + assert a.text == "hello" + # Primary key column should be first and bold + table = soup.find("table") + headers = [th.text.strip() for th in table.select("thead th")] + assert headers[0] == "pk" + first_td = table.select("tbody tr td")[0] + assert first_td.find("strong") is not None, "PK value should be bold" + + @pytest.mark.asyncio @pytest.mark.parametrize( "path,expected", @@ -414,8 +434,8 @@ async def test_row_links_from_other_tables( "/fixtures/compound_primary_key/a,b", [ [ - 'a', - 'b', + 'a', + 'b', 'c', ] ], @@ -424,8 +444,8 @@ async def test_row_links_from_other_tables( "/fixtures/compound_primary_key/a~2Fb,~2Ec~2Dd", [ [ - 'a/b', - '.c-d', + 'a/b', + '.c-d', 'c', ] ], From e2c1e81ec9505f02566de840c1dba5ea7b0b121d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Mar 2026 17:45:24 -0700 Subject: [PATCH 52/53] UI for selecting and re-ordering columns on the table page (#2662) New Web Component on table/view page with a dialog for selecting and re-ordering columns. Closes #2661 Refs #1298 --- datasette/static/app.css | 19 + datasette/static/column-chooser.js | 698 ++++++++++++++++++++++++++ datasette/static/navigation-search.js | 13 +- datasette/static/table.js | 58 +++ datasette/templates/table.html | 9 + datasette/views/table.py | 6 + tests/test_html.py | 23 +- tests/test_table_html.py | 63 +++ 8 files changed, 882 insertions(+), 7 deletions(-) create mode 100644 datasette/static/column-chooser.js diff --git a/datasette/static/app.css b/datasette/static/app.css index a7fc7fa3..4183b58e 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -63,6 +63,14 @@ em { } /* end reset */ +/* Modal CSS variables (shared by web components via Shadow DOM) */ +:root { + --modal-backdrop-bg: rgba(0, 0, 0, 0.5); + --modal-backdrop-blur: blur(4px); + --modal-border-radius: 0.75rem; + --modal-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04); + --modal-animation-duration: 0.2s; +} body { margin: 0; @@ -795,6 +803,17 @@ p.zero-results { .filters input.filter-value { width: 140px; } + button.choose-columns-mobile { + display: inline-block; + padding: 0.5rem 1rem; + margin-bottom: 1em; + font-size: 0.9rem; + font-family: inherit; + background: white; + border: 1px solid #ccc; + border-radius: 5px; + cursor: pointer; + } } svg.dropdown-menu-icon { diff --git a/datasette/static/column-chooser.js b/datasette/static/column-chooser.js new file mode 100644 index 00000000..9680398c --- /dev/null +++ b/datasette/static/column-chooser.js @@ -0,0 +1,698 @@ +class ColumnChooser extends HTMLElement { + constructor() { + super(); + this.attachShadow({ mode: "open" }); + + // State + this._items = []; + this._checked = new Set(); + this._savedItems = null; + this._savedChecked = null; + this._onApply = null; + + // Drag state + this._ghost = null; + this._dragSrcIdx = null; + this._dropTargetIdx = null; + this._dropPosition = null; + this._ghostOffX = 0; + this._ghostOffY = 0; + this._autoScrollRAF = null; + this._lastPointerY = 0; + this._lastPointerX = 0; + this._SCROLL_ZONE = 72; + this._SCROLL_SPEED = 0.4; + + // Bound handlers + this._onMove = this._onMove.bind(this); + this._onUp = this._onUp.bind(this); + + this.shadowRoot.innerHTML = ` + + + + +
    + + +
    +
    +
    +
    +
      +
      + +
      + `; + + // DOM refs + this._dialog = this.shadowRoot.querySelector("dialog"); + this._listWrap = this.shadowRoot.getElementById("listWrap"); + this._dragList = this.shadowRoot.getElementById("dragList"); + this._pulseTop = this.shadowRoot.getElementById("pulseTop"); + this._pulseBot = this.shadowRoot.getElementById("pulseBot"); + this._selectAllBtn = this.shadowRoot.getElementById("selectAllBtn"); + this._deselectAllBtn = this.shadowRoot.getElementById("deselectAllBtn"); + this._cancelBtn = this.shadowRoot.getElementById("cancelBtn"); + this._applyBtn = this.shadowRoot.getElementById("applyBtn"); + this._countEl = this.shadowRoot.getElementById("selectedCount"); + this._footerEl = this.shadowRoot.getElementById("footerInfo"); + + // Event listeners + this._selectAllBtn.addEventListener("click", () => this._selectAll()); + this._deselectAllBtn.addEventListener("click", () => this._deselectAll()); + this._cancelBtn.addEventListener("click", () => this._close()); + this._applyBtn.addEventListener("click", () => this._apply()); + this._dialog.addEventListener("click", (e) => { + if (e.target === this._dialog) this._close(); + }); + this._dialog.addEventListener("cancel", (e) => { + e.preventDefault(); + this._close(); + }); + } + + /** + * Open the column chooser dialog. + * @param {Object} opts + * @param {string[]} opts.columns - All available column names, in display order. + * @param {string[]} opts.selected - Column names that should be pre-checked. + * @param {function(string[]): void} opts.onApply - Called with the selected columns in order when Apply is clicked. + */ + open({ columns, selected = [], onApply }) { + this._items = [...columns]; + this._checked = new Set(selected); + this._onApply = onApply || null; + + // Save state for cancel/restore + this._savedItems = [...this._items]; + this._savedChecked = new Set(this._checked); + + this._render(); + this._dialog.showModal(); + } + + // ── Internal methods ── + + _close() { + this._items = this._savedItems ? [...this._savedItems] : this._items; + this._checked = this._savedChecked + ? new Set(this._savedChecked) + : this._checked; + this._dialog.close(); + } + + _selectAll() { + this._items.forEach((col) => this._checked.add(col)); + this._dragList.querySelectorAll('input[type="checkbox"]').forEach((cb) => { + cb.checked = true; + }); + this._updateCounts(); + } + + _deselectAll() { + this._checked.clear(); + this._dragList.querySelectorAll('input[type="checkbox"]').forEach((cb) => { + cb.checked = false; + }); + this._updateCounts(); + } + + _apply() { + const selected = this._items.filter((col) => this._checked.has(col)); + this._dialog.close(); + if (this._onApply) { + this._onApply(selected); + } + } + + _render() { + this._dragList.innerHTML = ""; + this._items.forEach((col, i) => { + const li = document.createElement("li"); + li.className = "drag-item"; + li.dataset.idx = i; + li.innerHTML = ` + + + + + + + + + + + +
      + `; + + li.querySelector("input").addEventListener("change", (e) => { + e.target.checked ? this._checked.add(col) : this._checked.delete(col); + this._updateCounts(); + }); + + li.querySelector(".drag-handle").addEventListener("pointerdown", (e) => + this._startDrag(e, i), + ); + this._dragList.appendChild(li); + }); + + this._updateCounts(); + } + + _updateCounts() { + const n = this._checked.size; + this._countEl.textContent = `${n} of ${this._items.length} selected`; + this._footerEl.textContent = `${this._items.length} columns`; + } + + // ── Drag engine ── + + _startDrag(e, idx) { + e.preventDefault(); + this._dragSrcIdx = idx; + + const srcEl = this._dragList.children[idx]; + const rect = srcEl.getBoundingClientRect(); + + this._ghostOffX = e.clientX - rect.left; + this._ghostOffY = e.clientY - rect.top; + + // Build ghost inside shadow DOM + this._ghost = document.createElement("div"); + this._ghost.className = "drag-ghost"; + this._ghost.style.width = rect.width + "px"; + this._ghost.style.height = rect.height + "px"; + this._ghost.innerHTML = srcEl.innerHTML; + this._ghost.querySelector(".drop-indicator")?.remove(); + const h = this._ghost.querySelector(".drag-handle"); + if (h) h.style.color = "var(--accent)"; + this.shadowRoot.appendChild(this._ghost); + + srcEl.classList.add("is-dragging"); + this._positionGhost(e.clientX, e.clientY); + + document.addEventListener("pointermove", this._onMove); + document.addEventListener("pointerup", this._onUp); + document.addEventListener("pointercancel", this._onUp); + } + + _positionGhost(cx, cy) { + this._ghost.style.left = cx - this._ghostOffX + "px"; + this._ghost.style.top = cy - this._ghostOffY + "px"; + } + + _onMove(e) { + this._lastPointerX = e.clientX; + this._lastPointerY = e.clientY; + this._positionGhost(e.clientX, e.clientY); + this._updateDropTarget(e.clientY); + this._updateAutoScroll(e.clientY); + } + + _onUp() { + document.removeEventListener("pointermove", this._onMove); + document.removeEventListener("pointerup", this._onUp); + document.removeEventListener("pointercancel", this._onUp); + + this._stopAutoScroll(); + + const noMove = + this._dropTargetIdx === null || this._dropTargetIdx === this._dragSrcIdx; + this._clearDropIndicators(); + + let dest = null; + if (!noMove) { + const moved = this._items.splice(this._dragSrcIdx, 1)[0]; + dest = this._dropTargetIdx; + if (this._dropPosition === "after") dest++; + if (dest > this._dragSrcIdx) dest--; + this._items.splice(dest, 0, moved); + } + + this._dragSrcIdx = null; + this._dropTargetIdx = null; + this._dropPosition = null; + + const g = this._ghost; + this._ghost = null; + + if (noMove) { + if (g) g.remove(); + this._render(); + return; + } + + this._render(); + + if (g && dest !== null) { + const landedEl = this._dragList.children[dest]; + if (landedEl) { + landedEl.style.opacity = "0"; + const r = landedEl.getBoundingClientRect(); + g.getBoundingClientRect(); + g.style.transition = + "left 0.15s cubic-bezier(0.22, 1, 0.36, 1), top 0.15s cubic-bezier(0.22, 1, 0.36, 1), box-shadow 0.15s, opacity 0.1s 0.1s"; + g.style.left = r.left + "px"; + g.style.top = r.top + "px"; + g.style.boxShadow = "0 1px 4px rgba(0,0,0,0.08)"; + g.style.opacity = "0"; + setTimeout(() => { + g.remove(); + if (landedEl) landedEl.style.opacity = ""; + }, 160); + } else { + g.remove(); + } + } else if (g) { + g.remove(); + } + } + + _updateDropTarget(clientY) { + this._clearDropIndicators(); + const listItems = [ + ...this._dragList.querySelectorAll(".drag-item:not(.is-dragging)"), + ]; + if (!listItems.length) return; + + let best = null, + bestDist = Infinity; + listItems.forEach((li) => { + const r = li.getBoundingClientRect(); + const mid = r.top + r.height / 2; + const dist = Math.abs(clientY - mid); + if (dist < bestDist) { + bestDist = dist; + best = li; + } + }); + + if (!best) return; + const r = best.getBoundingClientRect(); + const mid = r.top + r.height / 2; + const above = clientY < mid; + const indic = best.querySelector(".drop-indicator"); + + this._dropTargetIdx = parseInt(best.dataset.idx); + this._dropPosition = above ? "before" : "after"; + + if (indic) { + indic.className = "drop-indicator " + (above ? "top" : "bottom"); + } + } + + _clearDropIndicators() { + this._dragList.querySelectorAll(".drop-indicator").forEach((el) => { + el.className = "drop-indicator"; + }); + } + + _updateAutoScroll(clientY) { + const rect = this._listWrap.getBoundingClientRect(); + const relY = clientY - rect.top; + const distTop = relY; + const distBot = rect.height - relY; + + const inTop = distTop < this._SCROLL_ZONE && distTop >= 0; + const inBot = distBot < this._SCROLL_ZONE && distBot >= 0; + + this._pulseTop.classList.toggle("active", inTop); + this._pulseBot.classList.toggle("active", inBot); + + if ((inTop || inBot) && !this._autoScrollRAF) { + let lastTime = null; + const loop = (ts) => { + if (!this._ghost) { + this._stopAutoScroll(); + return; + } + if (lastTime !== null) { + const dt = ts - lastTime; + const rect2 = this._listWrap.getBoundingClientRect(); + const relY2 = this._lastPointerY - rect2.top; + const dTop = relY2; + const dBot = rect2.height - relY2; + + if (dTop < this._SCROLL_ZONE && dTop >= 0) { + const factor = 1 - dTop / this._SCROLL_ZONE; + this._listWrap.scrollTop -= this._SCROLL_SPEED * dt * factor * 2.5; + } else if (dBot < this._SCROLL_ZONE && dBot >= 0) { + const factor = 1 - dBot / this._SCROLL_ZONE; + this._listWrap.scrollTop += this._SCROLL_SPEED * dt * factor * 2.5; + } else { + this._stopAutoScroll(); + return; + } + this._updateDropTarget(this._lastPointerY); + } + lastTime = ts; + this._autoScrollRAF = requestAnimationFrame(loop); + }; + this._autoScrollRAF = requestAnimationFrame(loop); + } + + if (!inTop && !inBot) this._stopAutoScroll(); + } + + _stopAutoScroll() { + if (this._autoScrollRAF) { + cancelAnimationFrame(this._autoScrollRAF); + this._autoScrollRAF = null; + } + this._pulseTop.classList.remove("active"); + this._pulseBot.classList.remove("active"); + } +} + +customElements.define("column-chooser", ColumnChooser); diff --git a/datasette/static/navigation-search.js b/datasette/static/navigation-search.js index 48de5c4f..95e7dfc5 100644 --- a/datasette/static/navigation-search.js +++ b/datasette/static/navigation-search.js @@ -19,19 +19,20 @@ class NavigationSearch extends HTMLElement { dialog { border: none; - border-radius: 0.75rem; + border-radius: var(--modal-border-radius, 0.75rem); padding: 0; max-width: 90vw; width: 600px; max-height: 80vh; - box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04); - animation: slideIn 0.2s ease-out; + box-shadow: var(--modal-shadow, 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04)); + animation: slideIn var(--modal-animation-duration, 0.2s) ease-out; } dialog::backdrop { - background: rgba(0, 0, 0, 0.5); - backdrop-filter: blur(4px); - animation: fadeIn 0.2s ease-out; + background: var(--modal-backdrop-bg, rgba(0, 0, 0, 0.5)); + backdrop-filter: var(--modal-backdrop-blur, blur(4px)); + -webkit-backdrop-filter: var(--modal-backdrop-blur, blur(4px)); + animation: fadeIn var(--modal-animation-duration, 0.2s) ease-out; } @keyframes slideIn { diff --git a/datasette/static/table.js b/datasette/static/table.js index 0caeeb91..c26dda5a 100644 --- a/datasette/static/table.js +++ b/datasette/static/table.js @@ -4,6 +4,7 @@ var DROPDOWN_HTML = `
    • Sort ascending
    • Sort descending
    • Facet by this
    • +
    • Choose columns
    • Hide this column
    • Show all columns
    • Show not-blank rows
    • @@ -104,6 +105,7 @@ const initDatasetteTable = function (manager) { var notBlank = menu.querySelector("a.dropdown-not-blank"); var hideColumn = menu.querySelector("a.dropdown-hide-column"); var showAllColumns = menu.querySelector("a.dropdown-show-all-columns"); + var selectColumns = menu.querySelector("a.dropdown-choose-columns"); if (params.get("_sort") == column) { sort.parentNode.style.display = "none"; } else { @@ -129,6 +131,18 @@ const initDatasetteTable = function (manager) { } else { hideColumn.parentNode.style.display = "none"; } + /* Choose columns - show if web component exists */ + var columnChooser = document.querySelector("column-chooser"); + if (columnChooser && window._columnChooserData) { + selectColumns.parentNode.style.display = "block"; + selectColumns.addEventListener("click", function (ev) { + ev.preventDefault(); + closeMenu(); + openColumnChooser(); + }); + } else { + selectColumns.parentNode.style.display = "none"; + } /* Only show "Facet by this" if it's not the first column, not selected, not a single PK and the Datasette allow_facet setting is True */ var displayedFacets = Array.from( @@ -330,6 +344,49 @@ function initAutocompleteForFilterValues(manager) { }); } +/** Open the column-chooser web component */ +function openColumnChooser() { + var chooser = document.querySelector("column-chooser"); + var data = window._columnChooserData; + if (!chooser || !data) return; + + var nonPkColumns = data.allColumns.filter(function (col) { + return data.primaryKeys.indexOf(col) === -1; + }); + var selected = data.selectedColumns.filter(function (col) { + return data.primaryKeys.indexOf(col) === -1; + }); + + chooser.open({ + columns: nonPkColumns, + selected: selected, + onApply: function (cols) { + var params = new URLSearchParams(location.search); + params.delete("_col"); + params.delete("_nocol"); + params.delete("_next"); + + if (cols.length === nonPkColumns.length) { + // Check if order matches original - if so, no params needed + var orderMatches = cols.every(function (col, i) { + return col === nonPkColumns[i]; + }); + if (!orderMatches) { + cols.forEach(function (col) { + params.append("_col", col); + }); + } + } else { + cols.forEach(function (col) { + params.append("_col", col); + }); + } + var qs = params.toString(); + location.href = qs ? "?" + qs : location.pathname; + } + }); +} + // Ensures Table UI is initialized only after the Manager is ready. document.addEventListener("datasette_init", function (evt) { const { detail: manager } = evt; @@ -340,4 +397,5 @@ document.addEventListener("datasette_init", function (evt) { // Other UI functions with interactive JS needs addButtonsToFilterRows(manager); initAutocompleteForFilterValues(manager); + }); diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 25ff31ef..9c930918 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -4,6 +4,7 @@ {% block extra_head %} {{- super() -}} +