From 335814a7534d0791437e4a65ecde6a694a09f9a7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 30 Oct 2025 11:07:05 -0700 Subject: [PATCH 001/299] Prototype of default deny modes, refs #2540 --- datasette/app.py | 4 + datasette/cli.py | 16 ++++ datasette/default_permissions.py | 65 +++++++++---- tests/test_config_permission_rules.py | 127 ++++++++++++++++++++++++++ 4 files changed, 192 insertions(+), 20 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index bfbf2360..980da6b8 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -296,6 +296,8 @@ class Datasette: crossdb=False, nolock=False, internal=None, + private=False, + require_auth=False, ): self._startup_invoked = False assert config_dir is None or isinstance( @@ -340,6 +342,8 @@ class Datasette: raise self.crossdb = crossdb self.nolock = nolock + self.private = private + self.require_auth = require_auth if memory or crossdb or not self.files: self.add_database( Database(self, is_mutable=False, is_memory=True), name="_memory" diff --git a/datasette/cli.py b/datasette/cli.py index 24d87279..6a2c1623 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -490,6 +490,16 @@ def uninstall(packages, yes): type=click.Path(), help="Path to a persistent Datasette internal SQLite database", ) +@click.option( + "--private", + is_flag=True, + help="Default deny mode - all access blocked unless explicitly allowed", +) +@click.option( + "--require-auth", + is_flag=True, + help="Require authentication - only actors with an id can access", +) def serve( files, immutable, @@ -522,6 +532,8 @@ def serve( ssl_keyfile, ssl_certfile, internal, + private, + require_auth, return_instance=False, ): """Serve up specified SQLite database files with a web UI""" @@ -536,6 +548,8 @@ def serve( ) click.echo(formatter.getvalue()) sys.exit(0) + if private and require_auth: + raise click.UsageError("Cannot use both --private and --require-auth") if reload: import hupper @@ -588,6 +602,8 @@ def serve( crossdb=crossdb, nolock=nolock, internal=internal, + private=private, + require_auth=require_auth, ) # if files is a single directory, use that as config_dir= diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 32164260..ee477c6e 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -60,30 +60,55 @@ async def permission_resources_sql(datasette, actor, action): return rules # 5. Default allow actions (ONLY if no restrictions) + default_allow_actions = { + "view-instance", + "view-database", + "view-database-download", + "view-table", + "view-query", + "execute-sql", + } # If actor has restrictions, they've already added their own deny/allow rules has_restrictions = actor and "_r" in actor if not has_restrictions: - default_allow_actions = { - "view-instance", - "view-database", - "view-database-download", - "view-table", - "view-query", - "execute-sql", - } - if action in default_allow_actions: - reason = f"default allow for {action}".replace("'", "''") - sql = ( - "SELECT NULL AS parent, NULL AS child, 1 AS allow, " - f"'{reason}' AS reason" - ) - rules.append( - PermissionSQL( - source="default_permissions", - sql=sql, - params={}, + # Check for --private flag (complete default-deny mode) + if datasette.private: + # In private mode, don't grant any default allow permissions + pass + # Check for --require-auth flag (authenticated-only mode) + elif datasette.require_auth: + # Only grant default allow if actor has an id (is authenticated) + if actor and actor.get("id"): + if action in default_allow_actions: + reason = f"default allow for {action} (authenticated)".replace( + "'", "''" + ) + sql = ( + "SELECT NULL AS parent, NULL AS child, 1 AS allow, " + f"'{reason}' AS reason" + ) + rules.append( + PermissionSQL( + source="default_permissions", + sql=sql, + params={}, + ) + ) + else: + # Normal mode - grant default allow to everyone + if action in default_allow_actions: + reason = f"default allow for {action}".replace("'", "''") + sql = ( + "SELECT NULL AS parent, NULL AS child, 1 AS allow, " + f"'{reason}' AS reason" + ) + rules.append( + PermissionSQL( + source="default_permissions", + sql=sql, + params={}, + ) ) - ) if not rules: return None diff --git a/tests/test_config_permission_rules.py b/tests/test_config_permission_rules.py index 8327ecbf..f8b13ee9 100644 --- a/tests/test_config_permission_rules.py +++ b/tests/test_config_permission_rules.py @@ -161,3 +161,130 @@ async def test_view_instance_allow_block(): assert await ds.allowed(action="view-instance", actor={"id": "alice"}) assert not await ds.allowed(action="view-instance", actor={"id": "bob"}) + + +@pytest.mark.asyncio +async def test_private_mode_denies_all_by_default(): + """Test --private flag blocks all access unless explicitly allowed""" + ds = Datasette(memory=True, private=True) + ds.add_database(Database(ds, memory_name="test_memory"), name="test") + await ds.invoke_startup() + await ds.refresh_schemas() + + # Unauthenticated access should be denied for all default actions + assert not await ds.allowed(action="view-instance", actor=None) + assert not await ds.allowed( + action="view-database", resource=DatabaseResource(database="test"), actor=None + ) + assert not await ds.allowed( + action="view-table", + resource=TableResource(database="test", table="test"), + actor=None, + ) + + # Even authenticated users should be denied in private mode + assert not await ds.allowed(action="view-instance", actor={"id": "alice"}) + assert not await ds.allowed( + action="view-database", + resource=DatabaseResource(database="test"), + actor={"id": "alice"}, + ) + + +@pytest.mark.asyncio +async def test_private_mode_with_explicit_allow(): + """Test --private flag allows explicitly configured permissions""" + config = {"permissions": {"view-instance": {"id": "alice"}}} + ds = Datasette(memory=True, private=True, config=config) + ds.add_database(Database(ds, memory_name="test_memory"), name="test") + await ds.invoke_startup() + await ds.refresh_schemas() + + # Alice should be allowed due to explicit config + assert await ds.allowed(action="view-instance", actor={"id": "alice"}) + + # Bob should still be denied + assert not await ds.allowed(action="view-instance", actor={"id": "bob"}) + + # Unauthenticated should be denied + assert not await ds.allowed(action="view-instance", actor=None) + + +@pytest.mark.asyncio +async def test_require_auth_mode_allows_authenticated(): + """Test --require-auth flag allows actors with id""" + ds = Datasette(memory=True, require_auth=True) + ds.add_database(Database(ds, memory_name="test_memory"), name="test") + await ds.invoke_startup() + await ds.refresh_schemas() + + # Authenticated users should be allowed + assert await ds.allowed(action="view-instance", actor={"id": "alice"}) + assert await ds.allowed( + action="view-database", + resource=DatabaseResource(database="test"), + actor={"id": "bob"}, + ) + assert await ds.allowed( + action="view-table", + resource=TableResource(database="test", table="test"), + actor={"id": "charlie"}, + ) + + # Unauthenticated access should be denied + assert not await ds.allowed(action="view-instance", actor=None) + assert not await ds.allowed( + action="view-database", resource=DatabaseResource(database="test"), actor=None + ) + + # Actor without id should be denied + assert not await ds.allowed(action="view-instance", actor={"name": "anonymous"}) + + +@pytest.mark.asyncio +async def test_require_auth_mode_with_restrictions(): + """Test --require-auth mode works with actor restrictions""" + # Test with actor that has restrictions + ds = Datasette(memory=True, require_auth=True) + ds.add_database(Database(ds, memory_name="test_memory"), name="test") + await ds.invoke_startup() + await ds.refresh_schemas() + + # Actor with restrictions should have those restrictions applied + restricted_actor = {"id": "alice", "_r": {"a": ["view-table"]}} + # This actor has restrictions, so default allow won't apply + # Instead their restrictions define what they can do + assert await ds.allowed( + action="view-table", + resource=TableResource(database="test", table="test"), + actor=restricted_actor, + ) + + # Regular authenticated actor without restrictions should get default allow + normal_actor = {"id": "bob"} + assert await ds.allowed( + action="view-database", + resource=DatabaseResource(database="test"), + actor=normal_actor, + ) + + +@pytest.mark.asyncio +async def test_normal_mode_allows_all(): + """Test default behavior without --private or --require-auth""" + ds = Datasette(memory=True, private=False, require_auth=False) + ds.add_database(Database(ds, memory_name="test_memory"), name="test") + await ds.invoke_startup() + await ds.refresh_schemas() + + # Everyone should be allowed in normal mode + assert await ds.allowed(action="view-instance", actor=None) + assert await ds.allowed( + action="view-database", resource=DatabaseResource(database="test"), actor=None + ) + assert await ds.allowed(action="view-instance", actor={"id": "alice"}) + assert await ds.allowed( + action="view-database", + resource=DatabaseResource(database="test"), + actor={"id": "bob"}, + ) From 5247856bd42d95f21e389c127d1c574961ee16d6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 30 Oct 2025 15:44:06 -0700 Subject: [PATCH 002/299] Filter out temp database from attached_databases() Refs https://github.com/simonw/datasette/issues/2557#issuecomment-3470510837 --- datasette/database.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datasette/database.py b/datasette/database.py index 13baa1d9..e5858128 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -410,7 +410,12 @@ class Database: # But SQLite prior to 3.16.0 doesn't support pragma functions results = await self.execute("PRAGMA database_list;") # {'seq': 0, 'name': 'main', 'file': ''} - return [AttachedDatabase(*row) for row in results.rows if row["seq"] > 0] + return [ + AttachedDatabase(*row) + for row in results.rows + # Filter out the SQLite internal "temp" database, refs #2557 + if row["seq"] > 0 and row["name"] != "temp" + ] async def table_exists(self, table): results = await self.execute( From 6a71bde37fdbf6823a0975125943e9e32882259f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 30 Oct 2025 15:48:46 -0700 Subject: [PATCH 003/299] Permissions SQL API improvements (#2558) * Neater design for PermissionSQL class, refs #2556 - source is now automatically set to the source plugin - params is optional * PermissionSQL.allow() and PermissionSQL.deny() shortcuts Closes #2556 * Filter out temp database from attached_databases() Refs https://github.com/simonw/datasette/issues/2557#issuecomment-3470510837 --- datasette/default_permissions.py | 35 ++------ datasette/permissions.py | 25 +++++- datasette/templates/debug_rules.html | 2 + datasette/utils/actions_sql.py | 120 ++++++++++----------------- datasette/utils/permissions.py | 65 ++++++++++++++- datasette/views/special.py | 3 +- docs/plugin_hooks.rst | 42 +++++----- tests/plugins/my_plugin.py | 59 +++++++++---- tests/test_actions_sql.py | 31 +++---- tests/test_allowed_resources.py | 12 +-- tests/test_permission_endpoints.py | 4 - tests/test_plugins.py | 44 +++------- tests/test_table_api.py | 1 + tests/test_utils_permissions.py | 25 +----- 14 files changed, 241 insertions(+), 227 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 32164260..0f64cbc5 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -28,14 +28,7 @@ async def permission_resources_sql(datasette, actor, action): # Add a single global-level allow rule (NULL, NULL) for root # This allows root to access everything by default, but database-level # and table-level deny rules in config can still block specific resources - sql = "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'root user' AS reason" - rules.append( - PermissionSQL( - source="root_permissions", - sql=sql, - params={}, - ) - ) + rules.append(PermissionSQL.allow(reason="root user")) # 3. Config-based permission rules config_rules = await _config_permission_rules(datasette, actor, action) @@ -44,14 +37,7 @@ async def permission_resources_sql(datasette, actor, action): # 4. Check default_allow_sql setting for execute-sql action if action == "execute-sql" and not datasette.setting("default_allow_sql"): # Return a deny rule for all databases - sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, 'default_allow_sql is false' AS reason" - rules.append( - PermissionSQL( - source="default_allow_sql_setting", - sql=sql, - params={}, - ) - ) + rules.append(PermissionSQL.deny(reason="default_allow_sql is false")) # Early return - don't add default allow rule if not rules: return None @@ -73,17 +59,7 @@ async def permission_resources_sql(datasette, actor, action): } if action in default_allow_actions: reason = f"default allow for {action}".replace("'", "''") - sql = ( - "SELECT NULL AS parent, NULL AS child, 1 AS allow, " - f"'{reason}' AS reason" - ) - rules.append( - PermissionSQL( - source="default_permissions", - sql=sql, - params={}, - ) - ) + rules.append(PermissionSQL.allow(reason=reason)) if not rules: return None @@ -286,7 +262,7 @@ async def _config_permission_rules(datasette, actor, action) -> list[PermissionS params[f"{key}_reason"] = reason sql = "\nUNION ALL\n".join(parts) - return [PermissionSQL(source="config_permissions", sql=sql, params=params)] + return [PermissionSQL(sql=sql, params=params)] async def _restriction_permission_rules( @@ -343,7 +319,6 @@ async def _restriction_permission_rules( sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, :deny_reason AS reason" return [ PermissionSQL( - source="actor_restrictions", sql=sql, params={ "deny_reason": f"actor restrictions: {action} not in allowlist" @@ -402,7 +377,7 @@ async def _restriction_permission_rules( sql = "\nUNION ALL\n".join(selects) - return [PermissionSQL(source="actor_restrictions", sql=sql, params=params)] + return [PermissionSQL(sql=sql, params=params)] def restrictions_allow_action( diff --git a/datasette/permissions.py b/datasette/permissions.py index 42811aa0..669df47e 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Any, Dict, NamedTuple +from typing import Any, NamedTuple class Resource(ABC): @@ -79,6 +79,9 @@ class Action: also_requires: str | None = None # Optional action name that must also be allowed +_reason_id = 1 + + @dataclass class PermissionSQL: """ @@ -89,9 +92,25 @@ class PermissionSQL: reason TEXT """ - source: str # identifier used for auditing (e.g., plugin name) sql: str # SQL that SELECTs the 4 columns above - params: Dict[str, Any] # bound params for the SQL (values only; no ':' prefix) + params: dict[str, Any] | None = ( + None # bound params for the SQL (values only; no ':' prefix) + ) + source: str | None = None # System will set this to the plugin name + + @classmethod + def allow(cls, reason: str, _allow: bool = True) -> "PermissionSQL": + global _reason_id + i = _reason_id + _reason_id += 1 + return cls( + sql=f"SELECT NULL AS parent, NULL AS child, {1 if _allow else 0} AS allow, :reason_{i} AS reason", + params={f"reason_{i}": reason}, + ) + + @classmethod + def deny(cls, reason: str) -> "PermissionSQL": + return cls.allow(reason=reason, _allow=False) # This is obsolete, replaced by Action and ResourceType diff --git a/datasette/templates/debug_rules.html b/datasette/templates/debug_rules.html index d1dd5562..9a290803 100644 --- a/datasette/templates/debug_rules.html +++ b/datasette/templates/debug_rules.html @@ -137,6 +137,7 @@ function displayResults(data) { html += 'Resource Path'; html += 'Parent'; html += 'Child'; + html += 'Source Plugin'; html += 'Reason'; html += ''; html += ''; @@ -152,6 +153,7 @@ function displayResults(data) { html += `${escapeHtml(item.resource || '/')}`; html += `${escapeHtml(item.parent || '—')}`; html += `${escapeHtml(item.child || '—')}`; + html += `${escapeHtml(item.source_plugin || '—')}`; html += `${escapeHtml(item.reason || '—')}`; html += ''; } diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index a167bd87..13594a2d 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -23,42 +23,12 @@ The core pattern is: from typing import TYPE_CHECKING -from datasette.plugins import pm -from datasette.utils import await_me_maybe -from datasette.permissions import PermissionSQL +from datasette.utils.permissions import gather_permission_sql_from_hooks if TYPE_CHECKING: from datasette.app import Datasette -def _process_permission_results(results) -> tuple[list[str], dict]: - """ - Process plugin permission results into SQL fragments and parameters. - - Args: - results: Results from permission_resources_sql hook (may be list or single PermissionSQL) - - Returns: - A tuple of (list of SQL strings, dict of parameters) - """ - rule_sqls = [] - all_params = {} - - if results is None: - return rule_sqls, all_params - - if isinstance(results, list): - for plugin_sql in results: - if isinstance(plugin_sql, PermissionSQL): - rule_sqls.append(plugin_sql.sql) - all_params.update(plugin_sql.params) - elif isinstance(results, PermissionSQL): - rule_sqls.append(results.sql) - all_params.update(results.params) - - return rule_sqls, all_params - - async def build_allowed_resources_sql( datasette: "Datasette", actor: dict | None, @@ -179,22 +149,24 @@ async def _build_single_action_sql( # Get base resources SQL from the resource class base_resources_sql = await action_obj.resource_class.resources_sql(datasette) - # Get all permission rule fragments from plugins via the hook - rule_results = pm.hook.permission_resources_sql( + permission_sqls = await gather_permission_sql_from_hooks( datasette=datasette, actor=actor, action=action, ) - # Combine rule fragments and collect parameters all_params = {} rule_sqls = [] - for result in rule_results: - result = await await_me_maybe(result) - sqls, params = _process_permission_results(result) - rule_sqls.extend(sqls) - all_params.update(params) + for permission_sql in permission_sqls: + rule_sqls.append( + f""" + SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( + {permission_sql.sql} + ) + """.strip() + ) + all_params.update(permission_sql.params or {}) # If no rules, return empty result (deny all) if not rule_sqls: @@ -219,28 +191,21 @@ async def _build_single_action_sql( # If include_is_private, we need to build anonymous permissions too if include_is_private: - # Get anonymous permission rules - anon_rule_results = pm.hook.permission_resources_sql( + anon_permission_sqls = await gather_permission_sql_from_hooks( datasette=datasette, actor=None, action=action, ) - anon_rule_sqls = [] - anon_params = {} - for result in anon_rule_results: - result = await await_me_maybe(result) - sqls, params = _process_permission_results(result) - anon_rule_sqls.extend(sqls) - # Namespace anonymous params to avoid conflicts - for key, value in params.items(): - anon_params[f"anon_{key}"] = value - - # Rewrite anonymous SQL to use namespaced params anon_sqls_rewritten = [] - for sql in anon_rule_sqls: - for key in params.keys(): - sql = sql.replace(f":{key}", f":anon_{key}") - anon_sqls_rewritten.append(sql) + anon_params = {} + + for permission_sql in anon_permission_sqls: + rewritten_sql = permission_sql.sql + for key, value in (permission_sql.params or {}).items(): + anon_key = f"anon_{key}" + anon_params[anon_key] = value + rewritten_sql = rewritten_sql.replace(f":{key}", f":{anon_key}") + anon_sqls_rewritten.append(rewritten_sql) all_params.update(anon_params) @@ -261,8 +226,8 @@ async def _build_single_action_sql( " SELECT b.parent, b.child,", " MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny,", " MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow,", - " json_group_array(CASE WHEN ar.allow = 0 THEN ar.reason END) AS deny_reasons,", - " json_group_array(CASE WHEN ar.allow = 1 THEN ar.reason END) AS allow_reasons", + " json_group_array(CASE WHEN ar.allow = 0 THEN ar.source_plugin || ': ' || ar.reason END) AS deny_reasons,", + " json_group_array(CASE WHEN ar.allow = 1 THEN ar.source_plugin || ': ' || ar.reason END) AS allow_reasons", " FROM base b", " LEFT JOIN all_rules ar ON ar.parent = b.parent AND ar.child = b.child", " GROUP BY b.parent, b.child", @@ -271,8 +236,8 @@ async def _build_single_action_sql( " SELECT b.parent, b.child,", " MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny,", " MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow,", - " json_group_array(CASE WHEN ar.allow = 0 THEN ar.reason END) AS deny_reasons,", - " json_group_array(CASE WHEN ar.allow = 1 THEN ar.reason END) AS allow_reasons", + " json_group_array(CASE WHEN ar.allow = 0 THEN ar.source_plugin || ': ' || ar.reason END) AS deny_reasons,", + " json_group_array(CASE WHEN ar.allow = 1 THEN ar.source_plugin || ': ' || ar.reason END) AS allow_reasons", " FROM base b", " LEFT JOIN all_rules ar ON ar.parent = b.parent AND ar.child IS NULL", " GROUP BY b.parent, b.child", @@ -281,8 +246,8 @@ async def _build_single_action_sql( " SELECT b.parent, b.child,", " MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny,", " MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow,", - " json_group_array(CASE WHEN ar.allow = 0 THEN ar.reason END) AS deny_reasons,", - " json_group_array(CASE WHEN ar.allow = 1 THEN ar.reason END) AS allow_reasons", + " json_group_array(CASE WHEN ar.allow = 0 THEN ar.source_plugin || ': ' || ar.reason END) AS deny_reasons,", + " json_group_array(CASE WHEN ar.allow = 1 THEN ar.source_plugin || ': ' || ar.reason END) AS allow_reasons", " FROM base b", " LEFT JOIN all_rules ar ON ar.parent IS NULL AND ar.child IS NULL", " GROUP BY b.parent, b.child", @@ -430,32 +395,31 @@ async def build_permission_rules_sql( if not action_obj: raise ValueError(f"Unknown action: {action}") - # Get all permission rule fragments from plugins via the hook - rule_results = pm.hook.permission_resources_sql( + permission_sqls = await gather_permission_sql_from_hooks( datasette=datasette, actor=actor, action=action, ) - # Combine rule fragments and collect parameters - all_params = {} - rule_sqls = [] - - for result in rule_results: - result = await await_me_maybe(result) - sqls, params = _process_permission_results(result) - rule_sqls.extend(sqls) - all_params.update(params) - - # Build the UNION query - if not rule_sqls: - # Return empty result set + if not permission_sqls: return ( "SELECT NULL AS parent, NULL AS child, 0 AS allow, NULL AS reason, NULL AS source_plugin WHERE 0", {}, ) - rules_union = " UNION ALL ".join(rule_sqls) + union_parts = [] + all_params = {} + for permission_sql in permission_sqls: + union_parts.append( + f""" + SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( + {permission_sql.sql} + ) + """.strip() + ) + all_params.update(permission_sql.params or {}) + + rules_union = " UNION ALL ".join(union_parts) return rules_union, all_params diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py index 863b2e70..75307abf 100644 --- a/datasette/utils/permissions.py +++ b/datasette/utils/permissions.py @@ -6,6 +6,69 @@ from typing import Any, Dict, Iterable, List, Sequence, Tuple import sqlite3 from datasette.permissions import PermissionSQL +from datasette.plugins import pm +from datasette.utils import await_me_maybe + + +async def gather_permission_sql_from_hooks( + *, datasette, actor: dict | None, action: str +) -> List[PermissionSQL]: + """Collect PermissionSQL objects from the permission_resources_sql hook. + + Ensures that each returned PermissionSQL has a populated ``source``. + """ + + hook_caller = pm.hook.permission_resources_sql + hookimpls = hook_caller.get_hookimpls() + hook_results = list(hook_caller(datasette=datasette, actor=actor, action=action)) + + collected: List[PermissionSQL] = [] + actor_json = json.dumps(actor) if actor is not None else None + actor_id = actor.get("id") if isinstance(actor, dict) else None + + for index, result in enumerate(hook_results): + hookimpl = hookimpls[index] + resolved = await await_me_maybe(result) + default_source = _plugin_name_from_hookimpl(hookimpl) + for permission_sql in _iter_permission_sql_from_result(resolved, action=action): + if not permission_sql.source: + permission_sql.source = default_source + params = permission_sql.params or {} + params.setdefault("action", action) + params.setdefault("actor", actor_json) + params.setdefault("actor_id", actor_id) + collected.append(permission_sql) + + return collected + + +def _plugin_name_from_hookimpl(hookimpl) -> str: + if getattr(hookimpl, "plugin_name", None): + return hookimpl.plugin_name + plugin = getattr(hookimpl, "plugin", None) + if hasattr(plugin, "__name__"): + return plugin.__name__ + return repr(plugin) + + +def _iter_permission_sql_from_result( + result: Any, *, action: str +) -> Iterable[PermissionSQL]: + if result is None: + return [] + if isinstance(result, PermissionSQL): + return [result] + if isinstance(result, (list, tuple)): + collected: List[PermissionSQL] = [] + for item in result: + collected.extend(_iter_permission_sql_from_result(item, action=action)) + return collected + if callable(result): + permission_sql = result(action) # type: ignore[call-arg] + return _iter_permission_sql_from_result(permission_sql, action=action) + raise TypeError( + "Plugin providers must return PermissionSQL instances, sequences, or callables" + ) # ----------------------------- @@ -34,7 +97,7 @@ def build_rules_union( for p in plugins: # No namespacing - just use plugin params as-is - params.update(p.params) + params.update(p.params or {}) parts.append( f""" diff --git a/datasette/views/special.py b/datasette/views/special.py index c83ba33b..51af335f 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -444,7 +444,7 @@ class PermissionRulesView(BaseView): WITH rules AS ( {union_sql} ) - SELECT parent, child, allow, reason + SELECT parent, child, allow, reason, source_plugin FROM rules ORDER BY allow DESC, (parent IS NOT NULL), parent, child LIMIT :limit OFFSET :offset @@ -463,6 +463,7 @@ class PermissionRulesView(BaseView): "resource": _resource_path(parent, child), "allow": row["allow"], "reason": row["reason"], + "source_plugin": row["source_plugin"], } ) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index a06d3b4c..0dc4bd6e 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1457,17 +1457,28 @@ permission_resources_sql(datasette, actor, action) The permission action being evaluated. Examples include ``"view-table"`` or ``"insert-row"``. Return value - A :class:`datasette.utils.permissions.PluginSQL` object, ``None`` or an iterable of ``PluginSQL`` objects. + A :class:`datasette.permissions.PermissionSQL` object, ``None`` or an iterable of ``PermissionSQL`` objects. Datasette's action-based permission resolver calls this hook to gather SQL rows describing which resources an actor may access (``allow = 1``) or should be denied (``allow = 0``) for a specific action. Each SQL snippet should return ``parent``, ``child``, ``allow`` and ``reason`` columns. -**Parameter naming convention:** Plugin parameters in ``PluginSQL.params`` should use unique names +**Parameter naming convention:** Plugin parameters in ``PermissionSQL.params`` should use unique names to avoid conflicts with other plugins. The recommended convention is to prefix parameters with your plugin's source name (e.g., ``myplugin_user_id``). The system reserves these parameter names: ``:actor``, ``:actor_id``, ``:action``, and ``:filter_parent``. +You can also use return ``PermissionSQL.allow(reason="reason goes here")`` or ``PermissionSQL.deny(reason="reason goes here")`` as shortcuts for simple root-level allow or deny rules. These will create SQL snippets that look like this: + +.. code-block:: sql + + SELECT + NULL AS parent, + NULL AS child, + 1 AS allow, + 'reason goes here' AS reason + +Or ``0 AS allow`` for denies. Permission plugin examples ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1475,7 +1486,7 @@ Permission plugin examples These snippets show how to use the new ``permission_resources_sql`` hook to contribute rows to the action-based permission resolver. Each hook receives the current actor dictionary (or ``None``) and must return ``None`` or an instance or list of -``datasette.utils.permissions.PluginSQL`` (or a coroutine that resolves to that). +``datasette.permissions.PermissionSQL`` (or a coroutine that resolves to that). Allow Alice to view a specific table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1486,7 +1497,7 @@ This plugin grants the actor with ``id == "alice"`` permission to perform the .. code-block:: python from datasette import hookimpl - from datasette.utils.permissions import PluginSQL + from datasette.permissions import PermissionSQL @hookimpl @@ -1496,8 +1507,7 @@ This plugin grants the actor with ``id == "alice"`` permission to perform the if not actor or actor.get("id") != "alice": return None - return PluginSQL( - source="alice_sales_allow", + return PermissionSQL( sql=""" SELECT 'accounting' AS parent, @@ -1505,7 +1515,6 @@ This plugin grants the actor with ``id == "alice"`` permission to perform the 1 AS allow, 'alice can view accounting/sales' AS reason """, - params={}, ) Restrict execute-sql to a database prefix @@ -1518,7 +1527,7 @@ will pass through to the SQL snippet. .. code-block:: python from datasette import hookimpl - from datasette.utils.permissions import PluginSQL + from datasette.permissions import PermissionSQL @hookimpl @@ -1526,8 +1535,7 @@ will pass through to the SQL snippet. if action != "execute-sql": return None - return PluginSQL( - source="analytics_execute_sql", + return PermissionSQL( sql=""" SELECT parent, @@ -1551,7 +1559,7 @@ with columns ``(actor_id, action, parent, child, allow, reason)``. .. code-block:: python from datasette import hookimpl - from datasette.utils.permissions import PluginSQL + from datasette.permissions import PermissionSQL @hookimpl @@ -1559,8 +1567,7 @@ with columns ``(actor_id, action, parent, child, allow, reason)``. if not actor: return None - return PluginSQL( - source="permission_grants_table", + return PermissionSQL( sql=""" SELECT parent, @@ -1586,7 +1593,7 @@ The resolver will automatically apply the most specific rule. .. code-block:: python from datasette import hookimpl - from datasette.utils.permissions import PluginSQL + from datasette.permissions import PermissionSQL TRUSTED = {"alice", "bob"} @@ -1600,17 +1607,14 @@ The resolver will automatically apply the most specific rule. actor_id = (actor or {}).get("id") if actor_id not in TRUSTED: - return PluginSQL( - source="view_table_root_deny", + return PermissionSQL( sql=""" SELECT NULL AS parent, NULL AS child, 0 AS allow, 'default deny view-table' AS reason """, - params={}, ) - return PluginSQL( - source="trusted_allow", + return PermissionSQL( sql=""" SELECT NULL AS parent, NULL AS child, 0 AS allow, 'default deny view-table' AS reason diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index cf3d6125..2cdd75b0 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -473,6 +473,39 @@ def register_actions(datasette): takes_child=False, resource_class=DatabaseResource, ), + # Test actions for test_hook_permission_allowed + Action( + name="this_is_allowed", + abbr=None, + description=None, + takes_parent=False, + takes_child=False, + resource_class=InstanceResource, + ), + Action( + name="this_is_denied", + abbr=None, + description=None, + takes_parent=False, + takes_child=False, + resource_class=InstanceResource, + ), + Action( + name="this_is_allowed_async", + abbr=None, + description=None, + takes_parent=False, + takes_child=False, + resource_class=InstanceResource, + ), + Action( + name="this_is_denied_async", + abbr=None, + description=None, + takes_parent=False, + takes_child=False, + resource_class=InstanceResource, + ), ] # Support old-style config for backwards compatibility @@ -526,30 +559,27 @@ def permission_resources_sql(datasette, actor, action): # Handle test actions used in test_hook_permission_allowed if action == "this_is_allowed": - sql = "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'test plugin allows this_is_allowed' AS reason" - return PermissionSQL(source="my_plugin", sql=sql, params={}) + return PermissionSQL.allow(reason="test plugin allows this_is_allowed") elif action == "this_is_denied": - sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, 'test plugin denies this_is_denied' AS reason" - return PermissionSQL(source="my_plugin", sql=sql, params={}) + return PermissionSQL.deny(reason="test plugin denies this_is_denied") elif action == "this_is_allowed_async": - sql = "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'test plugin allows this_is_allowed_async' AS reason" - return PermissionSQL(source="my_plugin", sql=sql, params={}) + return PermissionSQL.allow(reason="test plugin allows this_is_allowed_async") elif action == "this_is_denied_async": - sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, 'test plugin denies this_is_denied_async' AS reason" - return PermissionSQL(source="my_plugin", sql=sql, params={}) + return PermissionSQL.deny(reason="test plugin denies this_is_denied_async") elif action == "view-database-download": # Return rule based on actor's can_download permission if actor and actor.get("can_download"): - sql = "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'actor has can_download' AS reason" + return PermissionSQL.allow(reason="actor has can_download") else: return None # No opinion - return PermissionSQL(source="my_plugin", sql=sql, params={}) elif action == "view-database": # Also grant view-database if actor has can_download (needed for download to work) if actor and actor.get("can_download"): - sql = "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'actor has can_download, grants view-database' AS reason" - return PermissionSQL(source="my_plugin", sql=sql, params={}) - return None + return PermissionSQL.allow( + reason="actor has can_download, grants view-database" + ) + else: + return None elif action in ( "insert-row", "create-table", @@ -560,7 +590,6 @@ def permission_resources_sql(datasette, actor, action): # Special permissions for latest.datasette.io demos actor_id = actor.get("id") if actor else None if actor_id == "todomvc": - sql = f"SELECT NULL AS parent, NULL AS child, 1 AS allow, 'todomvc actor allowed for {action}' AS reason" - return PermissionSQL(source="my_plugin", sql=sql, params={}) + return PermissionSQL.allow(reason=f"todomvc actor allowed for {action}") return None diff --git a/tests/test_actions_sql.py b/tests/test_actions_sql.py index 63f89bf5..adf26eeb 100644 --- a/tests/test_actions_sql.py +++ b/tests/test_actions_sql.py @@ -63,7 +63,7 @@ async def test_allowed_resources_global_allow(test_ds): def rules_callback(datasette, actor, action): if actor and actor.get("id") == "alice": sql = "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'global: alice has access' AS reason" - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -101,7 +101,7 @@ async def test_allowed_specific_resource(test_ds): UNION ALL SELECT 'analytics' AS parent, NULL AS child, 1 AS allow, 'analyst access' AS reason """ - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -145,7 +145,7 @@ async def test_allowed_resources_with_reasons(test_ds): SELECT 'analytics' AS parent, 'sensitive' AS child, 0 AS allow, 'child: sensitive data denied' AS reason """ - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -186,7 +186,7 @@ async def test_child_deny_overrides_parent_allow(test_ds): SELECT 'analytics' AS parent, 'sensitive' AS child, 0 AS allow, 'child: deny sensitive' AS reason """ - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -234,7 +234,7 @@ async def test_child_allow_overrides_parent_deny(test_ds): SELECT 'production' AS parent, 'orders' AS child, 1 AS allow, 'child: carol can see orders' AS reason """ - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -283,7 +283,7 @@ async def test_sql_does_filtering_not_python(test_ds): SELECT 'analytics' AS parent, 'users' AS child, 1 AS allow, 'specific allow' AS reason """ - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) plugin = PermissionRulesPlugin(rules_callback) pm.register(plugin, name="test_plugin") @@ -338,13 +338,15 @@ async def test_no_permission_rules_returns_correct_schema(): ) await ds._refresh_schemas() - # Temporarily block all permission_resources_sql hooks to simulate no rules - original_hook = pm.hook.permission_resources_sql + # Temporarily unregister all permission_resources_sql providers to simulate no rules + hook_caller = pm.hook.permission_resources_sql + hookimpls = hook_caller.get_hookimpls() + removed_plugins = [ + (impl.plugin_name, impl.plugin) for impl in hookimpls if impl.plugin is not None + ] - def empty_hook(*args, **kwargs): - return [] - - pm.hook.permission_resources_sql = empty_hook + for plugin_name, _ in removed_plugins: + pm.unregister(name=plugin_name) try: # Call build_allowed_resources_sql directly which will hit the no-rules code path @@ -366,5 +368,6 @@ async def test_no_permission_rules_returns_correct_schema(): assert len(result.rows) == 0 finally: - # Restore original hook - pm.hook.permission_resources_sql = original_hook + # Restore original plugins in the order they were removed + for plugin_name, plugin in removed_plugins: + pm.register(plugin, name=plugin_name) diff --git a/tests/test_allowed_resources.py b/tests/test_allowed_resources.py index 7e7a2691..56c5090d 100644 --- a/tests/test_allowed_resources.py +++ b/tests/test_allowed_resources.py @@ -58,7 +58,7 @@ async def test_tables_endpoint_global_access(test_ds): def rules_callback(datasette, actor, action): if actor and actor.get("id") == "alice": sql = "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'global: alice has access' AS reason" - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -98,7 +98,7 @@ async def test_tables_endpoint_database_restriction(test_ds): if actor and actor.get("role") == "analyst": # Allow only analytics database sql = "SELECT 'analytics' AS parent, NULL AS child, 1 AS allow, 'analyst access' AS reason" - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -145,7 +145,7 @@ async def test_tables_endpoint_table_exception(test_ds): UNION ALL SELECT 'analytics' AS parent, 'users' AS child, 1 AS allow, 'carol exception' AS reason """ - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -187,7 +187,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds): UNION ALL SELECT 'analytics' AS parent, 'sensitive' AS child, 0 AS allow, 'deny sensitive' AS reason """ - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -253,7 +253,7 @@ async def test_tables_endpoint_specific_table_only(test_ds): UNION ALL SELECT 'production' AS parent, 'orders' AS child, 1 AS allow, 'specific table 2' AS reason """ - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) @@ -291,7 +291,7 @@ async def test_tables_endpoint_empty_result(test_ds): if actor and actor.get("id") == "blocked": # Global deny sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, 'global deny' AS reason" - return PermissionSQL(source="test", sql=sql, params={}) + return PermissionSQL(sql=sql) return None plugin = PermissionRulesPlugin(rules_callback) diff --git a/tests/test_permission_endpoints.py b/tests/test_permission_endpoints.py index 65280a06..d7b7bf07 100644 --- a/tests/test_permission_endpoints.py +++ b/tests/test_permission_endpoints.py @@ -453,16 +453,12 @@ async def test_execute_sql_requires_view_database(): if action == "execute-sql": # Grant execute-sql on the "secret" database return PermissionSQL( - source="test_plugin", sql="SELECT 'secret' AS parent, NULL AS child, 1 AS allow, 'can execute sql' AS reason", - params={}, ) elif action == "view-database": # Deny view-database on the "secret" database return PermissionSQL( - source="test_plugin", sql="SELECT 'secret' AS parent, NULL AS child, 0 AS allow, 'cannot view db' AS reason", - params={}, ) return [] diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 0460d9c8..f1731b40 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -325,7 +325,11 @@ async def test_plugin_config_file(ds_client): ) def test_hook_extra_body_script(app_client, path, expected_extra_body_script): r = re.compile(r"") - json_data = r.search(app_client.get(path).text).group(1) + response = app_client.get(path) + assert response.status_code == 200, response.text + match = r.search(response.text) + assert match is not None, "No extra_body_script found in HTML" + json_data = match.group(1) actual_data = json.loads(json_data) assert expected_extra_body_script == actual_data @@ -673,39 +677,11 @@ async def test_existing_scope_actor_respected(ds_client): ], ) async def test_hook_permission_allowed(action, expected): - from datasette.permissions import Action - from datasette.resources import InstanceResource - - class TestPlugin: - __name__ = "TestPlugin" - - @hookimpl - def register_actions(self): - return [ - Action( - name=name, - abbr=None, - description=None, - takes_parent=False, - takes_child=False, - resource_class=InstanceResource, - ) - for name in ( - "this_is_allowed", - "this_is_denied", - "this_is_allowed_async", - "this_is_denied_async", - ) - ] - - pm.register(TestPlugin(), name="undo_register_extras") - try: - ds = Datasette(plugins_dir=PLUGINS_DIR) - await ds.invoke_startup() - actual = await ds.allowed(action=action, actor={"id": "actor"}) - assert expected == actual - finally: - pm.unregister(name="undo_register_extras") + # Test actions and permission logic are defined in tests/plugins/my_plugin.py + ds = Datasette(plugins_dir=PLUGINS_DIR) + await ds.invoke_startup() + actual = await ds.allowed(action=action, actor={"id": "actor"}) + assert expected == actual @pytest.mark.asyncio diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 0b722519..653679e4 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -383,6 +383,7 @@ async def test_sortable_columns_metadata(ds_client): @pytest.mark.asyncio +@pytest.mark.xfail @pytest.mark.parametrize( "path,expected_rows", [ diff --git a/tests/test_utils_permissions.py b/tests/test_utils_permissions.py index 7c6359c9..b412de0f 100644 --- a/tests/test_utils_permissions.py +++ b/tests/test_utils_permissions.py @@ -13,7 +13,6 @@ def db(): path = tempfile.mktemp(suffix="demo.db") db = ds.add_database(Database(ds, path=path)) - print(path) return db @@ -25,7 +24,6 @@ NO_RULES_SQL = ( def plugin_allow_all_for_user(user: str) -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: return PermissionSQL( - "allow_all", """ SELECT NULL AS parent, NULL AS child, 1 AS allow, 'global allow for ' || :allow_all_user || ' on ' || :allow_all_action AS reason @@ -42,7 +40,6 @@ def plugin_deny_specific_table( ) -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: return PermissionSQL( - "deny_specific_table", """ SELECT :deny_specific_table_parent AS parent, :deny_specific_table_child AS child, 0 AS allow, 'deny ' || :deny_specific_table_parent || '/' || :deny_specific_table_child || ' for ' || :deny_specific_table_user || ' on ' || :deny_specific_table_action AS reason @@ -62,7 +59,6 @@ def plugin_deny_specific_table( def plugin_org_policy_deny_parent(parent: str) -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: return PermissionSQL( - "org_policy_parent_deny", """ SELECT :org_policy_parent_deny_parent AS parent, NULL AS child, 0 AS allow, 'org policy: parent ' || :org_policy_parent_deny_parent || ' denied on ' || :org_policy_parent_deny_action AS reason @@ -81,7 +77,6 @@ def plugin_allow_parent_for_user( ) -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: return PermissionSQL( - "allow_parent", """ SELECT :allow_parent_parent AS parent, NULL AS child, 1 AS allow, 'allow full parent for ' || :allow_parent_user || ' on ' || :allow_parent_action AS reason @@ -102,7 +97,6 @@ def plugin_child_allow_for_user( ) -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: return PermissionSQL( - "allow_child", """ SELECT :allow_child_parent AS parent, :allow_child_child AS child, 1 AS allow, 'allow child for ' || :allow_child_user || ' on ' || :allow_child_action AS reason @@ -122,7 +116,6 @@ def plugin_child_allow_for_user( def plugin_root_deny_for_all() -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: return PermissionSQL( - "root_deny", """ SELECT NULL AS parent, NULL AS child, 0 AS allow, 'root deny for all on ' || :root_deny_action AS reason """, @@ -137,7 +130,6 @@ def plugin_conflicting_same_child_rules( ) -> List[Callable[[str], PermissionSQL]]: def allow_provider(action: str) -> PermissionSQL: return PermissionSQL( - "conflict_child_allow", """ SELECT :conflict_child_allow_parent AS parent, :conflict_child_allow_child AS child, 1 AS allow, 'team grant at child for ' || :conflict_child_allow_user || ' on ' || :conflict_child_allow_action AS reason @@ -153,7 +145,6 @@ def plugin_conflicting_same_child_rules( def deny_provider(action: str) -> PermissionSQL: return PermissionSQL( - "conflict_child_deny", """ SELECT :conflict_child_deny_parent AS parent, :conflict_child_deny_child AS child, 0 AS allow, 'exception deny at child for ' || :conflict_child_deny_user || ' on ' || :conflict_child_deny_action AS reason @@ -175,16 +166,10 @@ def plugin_allow_all_for_action( ) -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: if action != allowed_action: - return PermissionSQL( - f"allow_all_{allowed_action}_noop", - NO_RULES_SQL, - {}, - ) - source_name = f"allow_all_{allowed_action}" + return PermissionSQL(NO_RULES_SQL) # Sanitize parameter names by replacing hyphens with underscores - param_prefix = source_name.replace("-", "_") + param_prefix = action.replace("-", "_") return PermissionSQL( - source_name, f""" SELECT NULL AS parent, NULL AS child, 1 AS allow, 'global allow for ' || :{param_prefix}_user || ' on ' || :{param_prefix}_action AS reason @@ -513,7 +498,6 @@ async def test_actor_actor_id_action_parameters_available(db): def plugin_using_all_parameters() -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: return PermissionSQL( - "test_all_params", """ SELECT NULL AS parent, NULL AS child, 1 AS allow, 'Actor ID: ' || COALESCE(:actor_id, 'null') || @@ -521,8 +505,7 @@ async def test_actor_actor_id_action_parameters_available(db): ', Action: ' || :action AS reason WHERE :actor_id = 'test_user' AND :action = 'view-table' AND json_extract(:actor, '$.role') = 'admin' - """, - {}, + """ ) return provider @@ -567,7 +550,6 @@ async def test_multiple_plugins_with_own_parameters(db): if action != "view-table": return PermissionSQL("plugin_one", "SELECT NULL WHERE 0", {}) return PermissionSQL( - "plugin_one", """ SELECT database_name AS parent, table_name AS child, 1 AS allow, 'Plugin one used param: ' || :plugin1_param AS reason @@ -586,7 +568,6 @@ async def test_multiple_plugins_with_own_parameters(db): if action != "view-table": return PermissionSQL("plugin_two", "SELECT NULL WHERE 0", {}) return PermissionSQL( - "plugin_two", """ SELECT database_name AS parent, table_name AS child, 1 AS allow, 'Plugin two used param: ' || :plugin2_param AS reason From 87aa7981481e74b9c5aa8e87d0903b2ef4d5f41d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 30 Oct 2025 17:54:07 -0700 Subject: [PATCH 004/299] Permission tabs include allow debug page Closes #2559 --- datasette/templates/_permissions_debug_tabs.html | 1 + datasette/templates/allow_debug.html | 3 +++ datasette/views/special.py | 3 +++ 3 files changed, 7 insertions(+) diff --git a/datasette/templates/_permissions_debug_tabs.html b/datasette/templates/_permissions_debug_tabs.html index ab8be1fb..d7203c1e 100644 --- a/datasette/templates/_permissions_debug_tabs.html +++ b/datasette/templates/_permissions_debug_tabs.html @@ -49,5 +49,6 @@ Allowed Rules Actions + Allow debug {% endif %} diff --git a/datasette/templates/allow_debug.html b/datasette/templates/allow_debug.html index 610417d2..1ecc92df 100644 --- a/datasette/templates/allow_debug.html +++ b/datasette/templates/allow_debug.html @@ -33,6 +33,9 @@ p.message-warning {

Debug allow rules

+{% set current_tab = "allow_debug" %} +{% include "_permissions_debug_tabs.html" %} +

Use this tool to try out different actor and allow combinations. See Defining permissions with "allow" blocks for documentation.

diff --git a/datasette/views/special.py b/datasette/views/special.py index 51af335f..60e4b992 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -600,6 +600,9 @@ class AllowDebugView(BaseView): "error": "\n\n".join(errors) if errors else "", "actor_input": actor_input, "allow_input": allow_input, + "has_debug_permission": await self.ds.allowed( + action="permissions-debug", actor=request.actor + ), }, ) From e4be95b16c99dd8d49fc6d89a684764bf731f1d9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 30 Oct 2025 17:59:54 -0700 Subject: [PATCH 005/299] Update permissions documentation for new action system (#2551) --- datasette/app.py | 8 +- docs/authentication.rst | 235 ++++++++++++++++++++-------------------- docs/changelog.rst | 8 +- docs/internals.rst | 76 +++++++------ docs/json_api.rst | 10 +- tests/conftest.py | 8 +- 6 files changed, 177 insertions(+), 168 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index bfbf2360..15cf3495 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1087,11 +1087,7 @@ class Datasette: # Validate that resource is a Resource object or None if resource is not None and not isinstance(resource, Resource): - raise TypeError( - f"resource must be a Resource object or None, not {type(resource).__name__}. " - f"Use DatabaseResource(database=...), TableResource(database=..., table=...), " - f"or QueryResource(database=..., query=...) instead." - ) + raise TypeError(f"resource must be a Resource subclass instance or None.") # Check if actor can see it if not await self.allowed(action=action, resource=resource, actor=actor): @@ -1122,7 +1118,7 @@ class Datasette: parent: Optional parent filter (e.g., database name) to limit results include_is_private: If True, include is_private column showing if anonymous cannot access - Returns a tuple of (query, params) that can be executed against the internal database. + Returns a tuple of (query: str, params: dict) that can be executed against the internal database. The query returns rows with (parent, child, reason) columns, plus is_private if requested. Example: diff --git a/docs/authentication.rst b/docs/authentication.rst index e658e78b..28fb76bb 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -6,18 +6,18 @@ Datasette doesn't require authentication by default. Any visitor to a Datasette instance can explore the full data and execute read-only SQL queries. -Datasette's plugin system can be used to add many different styles of authentication, such as user accounts, single sign-on or API keys. +Datasette can be configured to only allow authenticated users, or to control which databases, tables, and queries can be accessed by the public or by specific users. Datasette's plugin system can be used to add many different styles of authentication, such as user accounts, single sign-on or API keys. .. _authentication_actor: Actors ====== -Through plugins, Datasette can support both authenticated users (with cookies) and authenticated API agents (via authentication tokens). The word "actor" is used to cover both of these cases. +Through plugins, Datasette can support both authenticated users (with cookies) and authenticated API clients (via authentication tokens). The word "actor" is used to cover both of these cases. -Every request to Datasette has an associated actor value, available in the code as ``request.actor``. This can be ``None`` for unauthenticated requests, or a JSON compatible Python dictionary for authenticated users or API agents. +Every request to Datasette has an associated actor value, available in the code as ``request.actor``. This can be ``None`` for unauthenticated requests, or a JSON compatible Python dictionary for authenticated users or API clients. -The actor dictionary can be any shape - the design of that data structure is left up to the plugins. A useful convention is to include an ``"id"`` string, as demonstrated by the "root" actor below. +The actor dictionary can be any shape - the design of that data structure is left up to the plugins. Actors should always include a unique ``"id"`` string, as demonstrated by the "root" actor below. Plugins can use the :ref:`plugin_hook_actor_from_request` hook to implement custom logic for authenticating an actor based on the incoming HTTP request. @@ -32,19 +32,21 @@ The one exception is the "root" account, which you can sign into while using Dat The ``--root`` flag is designed for local development and testing. When you start Datasette with ``--root``, the root user automatically receives every permission, including: -* All view permissions (view-instance, view-database, view-table, etc.) -* All write permissions (insert-row, update-row, delete-row, create-table, alter-table, drop-table) -* Debug permissions (permissions-debug, debug-menu) +* All view permissions (``view-instance``, ``view-database``, ``view-table``, etc.) +* All write permissions (``insert-row``, ``update-row``, ``delete-row``, ``create-table``, ``alter-table``, ``drop-table``) +* Debug permissions (``permissions-debug``, ``debug-menu``) * Any custom permissions defined by plugins -.. warning:: - The ``--root`` flag should only be used for local development. Never use it in production or on publicly accessible servers. +If you add explicit deny rules in ``datasette.yaml`` those can still block the +root actor from specific databases or tables. + +The ``--root`` flag sets an internal ``root_enabled`` switch—without it, a signed-in user with ``{"id": "root"}`` is treated like any other actor. To sign in as root, start Datasette using the ``--root`` command-line option, like this:: datasette --root -:: +Datasette will output a single-use-only login URL on startup:: http://127.0.0.1:8001/-/auth-token?token=786fc524e0199d70dc9a581d851f466244e114ca92f33aa3b42a139e9388daa7 INFO: Started server process [25801] @@ -52,7 +54,7 @@ To sign in as root, start Datasette using the ``--root`` command-line option, li INFO: Application startup complete. INFO: Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit) -The URL on the first line includes a one-use token which can be used to sign in as the "root" actor in your browser. Click on that link and then visit ``http://127.0.0.1:8001/-/actor`` to confirm that you are authenticated as an actor that looks like this: +Click on that link and then visit ``http://127.0.0.1:8001/-/actor`` to confirm that you are authenticated as an actor that looks like this: .. code-block:: json @@ -65,7 +67,7 @@ The URL on the first line includes a one-use token which can be used to sign in Permissions =========== -Datasette has an extensive permissions system built-in, which can be further extended and customized by plugins. +Datasette's permissions system is built around SQL queries. Datasette and its plugins construct SQL queries to resolve the list of resources that an actor cas access. The key question the permissions system answers is this: @@ -73,37 +75,47 @@ The key question the permissions system answers is this: **Actors** are :ref:`described above `. -An **action** is a string describing the action the actor would like to perform. A full list is :ref:`provided below ` - examples include ``view-table`` and ``execute-sql``. +An **action** is a string describing the action the actor would like to perform. A full list is :ref:`provided below ` - examples include ``view-table`` and ``execute-sql``. A **resource** is the item the actor wishes to interact with - for example a specific database or table. Some actions, such as ``permissions-debug``, are not associated with a particular resource. -Datasette's built-in view permissions (``view-database``, ``view-table`` etc) default to *allow* - unless you :ref:`configure additional permission rules ` unauthenticated users will be allowed to access content. +Datasette's built-in view actions (``view-database``, ``view-table`` etc) are allowed by Datasette's default configuration: unless you :ref:`configure additional permission rules ` unauthenticated users will be allowed to access content. -Permissions with potentially harmful effects should default to *deny*. Plugin authors should account for this when designing new plugins - for example, the `datasette-upload-csvs `__ plugin defaults to deny so that installations don't accidentally allow unauthenticated users to create new tables by uploading a CSV file. +Other actions, including those introduced by plugins, will default to *deny*. .. _authentication_permissions_explained: How permissions are resolved ---------------------------- -The :ref:`datasette.permission_allowed(actor, action, resource=None, default=...)` method is called to check if an actor is allowed to perform a specific action. +Datasette performs permission checks using the internal :ref:`datasette_allowed`, method which accepts keyword arguments for ``action``, ``resource`` and an optional ``actor``. -This method asks every plugin that implements the :ref:`plugin_hook_permission_allowed` hook if the actor is allowed to perform the action. +``resource`` should be an instance of the appropriate ``Resource`` subclass from :mod:`datasette.resources`—for example ``InstanceResource()``, ``DatabaseResource(database="...``)`` or ``TableResource(database="...", table="...")``. This defaults to ``InstanceResource()`` if not specified. -Each plugin can return ``True`` to indicate that the actor is allowed to perform the action, ``False`` if they are not allowed and ``None`` if the plugin has no opinion on the matter. +When a check runs Datasette gathers allow/deny rules from multiple sources and +compiles them into a SQL query. The resulting query describes all of the +resources an actor may access for that action, together with the reasons those +resources were allowed or denied. The combined sources are: -``False`` acts as a veto - if any plugin returns ``False`` then the permission check is denied. Otherwise, if any plugin returns ``True`` then the permission check is allowed. +* ``allow`` blocks configured in :ref:`datasette.yaml `. +* :ref:`Actor restrictions ` encoded into the actor dictionary or API token. +* The "root" user shortcut when ``--root`` (or :attr:`Datasette.root_enabled `) is active, replying ``True`` to all permission chucks unless configuration rules deny them at a more specific level. +* Any additional SQL provided by plugins implementing :ref:`plugin_hook_permission_resources_sql`. -The ``resource`` argument can be used to specify a specific resource that the action is being performed against. Some permissions, such as ``view-instance``, do not involve a resource. Others such as ``view-database`` have a resource that is a string naming the database. Permissions that take both a database name and the name of a table, view or canned query within that database use a resource that is a tuple of two strings, ``(database_name, resource_name)``. - -Plugins that implement the ``permission_allowed()`` hook can decide if they are going to consider the provided resource or not. +Datasette evaluates the SQL to determine if the requested ``resource`` is +included. Explicit deny rules returned by configuration or plugins will block +access even if other rules allowed it. .. _authentication_permissions_allow: Defining permissions with "allow" blocks ---------------------------------------- -The standard way to define permissions in Datasette is to use an ``"allow"`` block :ref:`in the datasette.yaml file `. This is a JSON document describing which actors are allowed to perform a permission. +One way to define permissions in Datasette is to use an ``"allow"`` block :ref:`in the datasette.yaml file `. This is a JSON document describing which actors are allowed to perform an action against a specific resource. + +Each ``allow`` block is compiled into SQL and combined with any +:ref:`plugin-provided rules ` to produce +the cascading allow/deny decisions that power :ref:`datasette_allowed`. The most basic form of allow block is this (`allow demo `__, `deny demo `__): @@ -425,7 +437,7 @@ You can control the following: * Access to specific tables and views * Access to specific :ref:`canned_queries` -If a user cannot access a specific database, they will not be able to access tables, views or queries within that database. If a user cannot access the instance they will not be able to access any of the databases, tables, views or queries. +If a user has permission to view a table they will be able to view that table, independent of if they have permission to view the database or instance that the table exists within. .. _authentication_permissions_instance: @@ -663,7 +675,7 @@ Controlling the ability to execute arbitrary SQL Datasette defaults to allowing any site visitor to execute their own custom SQL queries, for example using the form on `the database page `__ or by appending a ``?_where=`` parameter to the table page `like this `__. -Access to this ability is controlled by the :ref:`permissions_execute_sql` permission. +Access to this ability is controlled by the :ref:`actions_execute_sql` permission. The easiest way to disable arbitrary SQL queries is using the :ref:`default_allow_sql setting ` when you first start Datasette running. @@ -1027,9 +1039,25 @@ This example outputs the following:: Checking permissions in plugins =============================== -Datasette plugins can check if an actor has permission to perform an action using the :ref:`datasette.permission_allowed(...)` method. +Datasette plugins can check if an actor has permission to perform an action using :ref:`datasette_allowed`—for example:: -Datasette core performs a number of permission checks, :ref:`documented below `. Plugins can implement the :ref:`plugin_hook_permission_allowed` plugin hook to participate in decisions about whether an actor should be able to perform a specified action. + from datasette.resources import TableResource + + can_edit = await datasette.allowed( + action="update-row", + resource=TableResource(database="fixtures", table="facetable"), + actor=request.actor, + ) + +Use :ref:`datasette_ensure_permission` when you need to enforce a permission and +raise a ``Forbidden`` error automatically. + +Plugins that define new operations should return :class:`~datasette.permissions.Action` +objects from :ref:`plugin_register_actions` and can supply additional allow/deny +rules by returning :class:`~datasette.permissions.PermissionSQL` objects from the +:ref:`plugin_hook_permission_resources_sql` hook. Those rules are merged with +configuration ``allow`` blocks and actor restrictions to determine the final +result for each check. .. _authentication_actor_matches_allow: @@ -1049,12 +1077,14 @@ The currently authenticated actor is made available to plugins as ``request.acto .. _PermissionsDebugView: -The permissions debug tool -========================== +Permissions debug tools +======================= -The debug tool at ``/-/permissions`` is only available to the :ref:`authenticated root user ` (or any actor granted the ``permissions-debug`` action). +The debug tool at ``/-/permissions`` is available to any actor with the ``permissions-debug`` permission. By default this is just the :ref:`authenticated root user ` but you can open it up to all users by starting Datasette like this:: -It shows the thirty most recent permission checks that have been carried out by the Datasette instance. + datasette -s permissions.permissions-debug true data.db + +The page shows the permission checks that have been carried out by the Datasette instance. It also provides an interface for running hypothetical permission checks against a hypothetical actor. This is a useful way of confirming that your configured permissions work in the way you expect. @@ -1063,37 +1093,20 @@ This is designed to help administrators and plugin authors understand exactly ho .. _AllowedResourcesView: Allowed resources view -====================== +---------------------- -The ``/-/allowed`` endpoint displays resources that the current actor can access for a supplied ``action`` query string argument. +The ``/-/allowed`` endpoint displays resources that the current actor can access for a specified ``action``. This endpoint provides an interactive HTML form interface. Add ``.json`` to the URL path (e.g. ``/-/allowed.json``) to get the raw JSON response instead. Pass ``?action=view-table`` (or another action) to select the action. Optional ``parent=`` and ``child=`` query parameters can narrow the results to a specific database/table pair. -This endpoint is publicly accessible to help users understand their own permissions. However, potentially sensitive fields (``reason`` and ``source_plugin``) are only included in responses for users with the ``permissions-debug`` permission. - -Datasette includes helper endpoints for exploring the action-based permission resolver: - -``/-/allowed`` - Returns a paginated list of resources that the current actor is allowed to access for a given action. Pass ``?action=view-table`` (or another action) to select the action, and optional ``parent=``/``child=`` query parameters to narrow the results to a specific database/table pair. - -``/-/rules`` - Lists the raw permission rules (both allow and deny) contributing to each resource for the supplied action. This includes configuration-derived and plugin-provided rules. **Requires the permissions-debug permission** (only available to the root user by default). - -``/-/check`` - Evaluates whether the current actor can perform ``action`` against an optional ``parent``/``child`` resource tuple, returning the winning rule and reason. - -These endpoints work in conjunction with :ref:`plugin_hook_permission_resources_sql` and make it easier to verify that configuration allow blocks and plugins are behaving as intended. - -All three endpoints support both HTML and JSON responses. Visit the endpoint directly for an interactive HTML form interface, or add ``.json`` to the URL for a raw JSON response. - -**Security note:** The ``/-/check`` and ``/-/allowed`` endpoints are publicly accessible to help users understand their own permissions. However, potentially sensitive fields (``reason`` and ``source_plugin``) are only included in responses for users with the ``permissions-debug`` permission. The ``/-/rules`` endpoint requires the ``permissions-debug`` permission for all access. +This endpoint is publicly accessible to help users understand their own permissions. The potentially sensitive ``reason`` field is only shown to users with the ``permissions-debug`` permission - it shows the plugins and explanatory reasons that were responsible for each decision. .. _PermissionRulesView: Permission rules view -===================== +--------------------- The ``/-/rules`` endpoint displays all permission rules (both allow and deny) for each candidate resource for the requested action. @@ -1101,12 +1114,12 @@ This endpoint provides an interactive HTML form interface. Add ``.json`` to the Pass ``?action=`` as a query parameter to specify which action to check. -**Requires the permissions-debug permission** - this endpoint returns a 403 Forbidden error for users without this permission. +This endpoint requires the ``permissions-debug`` permission. .. _PermissionCheckView: Permission check view -===================== +--------------------- The ``/-/check`` endpoint evaluates a single action/resource pair and returns information indicating whether the access was allowed along with diagnostic information. @@ -1114,8 +1127,6 @@ This endpoint provides an interactive HTML form interface. Add ``.json`` to the Pass ``?action=`` to specify the action to check, and optional ``?parent=`` and ``?child=`` parameters to specify the resource. -This endpoint is publicly accessible to help users understand their own permissions. However, potentially sensitive fields (``reason`` and ``source_plugin``) are only included in responses for users with the ``permissions-debug`` permission. - .. _authentication_ds_actor: The ds_actor cookie @@ -1181,168 +1192,156 @@ The /-/logout page The page at ``/-/logout`` provides the ability to log out of a ``ds_actor`` cookie authentication session. -.. _permissions: +.. _actions: -Built-in permissions -==================== +Built-in actions +================ This section lists all of the permission checks that are carried out by Datasette core, along with the ``resource`` if it was passed. -.. _permissions_view_instance: +.. _actions_view_instance: view-instance ------------- Top level permission - Actor is allowed to view any pages within this instance, starting at https://latest.datasette.io/ -Default *allow*. - -.. _permissions_view_database: +.. _actions_view_database: view-database ------------- Actor is allowed to view a database page, e.g. https://latest.datasette.io/fixtures -``resource`` - string - The name of the database +``resource`` - ``datasette.permissions.DatabaseResource(database)`` + ``database`` is the name of the database (string) -Default *allow*. - -.. _permissions_view_database_download: +.. _actions_view_database_download: view-database-download ---------------------- Actor is allowed to download a database, e.g. https://latest.datasette.io/fixtures.db -``resource`` - string - The name of the database +``resource`` - ``datasette.resources.DatabaseResource(database)`` + ``database`` is the name of the database (string) -Default *allow*. - -.. _permissions_view_table: +.. _actions_view_table: view-table ---------- Actor is allowed to view a table (or view) page, e.g. https://latest.datasette.io/fixtures/complex_foreign_keys -``resource`` - tuple: (string, string) - The name of the database, then the name of the table +``resource`` - ``datasette.resources.TableResource(database, table)`` + ``database`` is the name of the database (string) -Default *allow*. + ``table`` is the name of the table (string) -.. _permissions_view_query: +.. _actions_view_query: view-query ---------- Actor is allowed to view (and execute) a :ref:`canned query ` page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size - this includes executing :ref:`canned_queries_writable`. -``resource`` - tuple: (string, string) - The name of the database, then the name of the canned query +``resource`` - ``datasette.resources.QueryResource(database, query)`` + ``database`` is the name of the database (string) + + ``query`` is the name of the canned query (string) -Default *allow*. - -.. _permissions_insert_row: +.. _actions_insert_row: insert-row ---------- Actor is allowed to insert rows into a table. -``resource`` - tuple: (string, string) - The name of the database, then the name of the table +``resource`` - ``datasette.resources.TableResource(database, table)`` + ``database`` is the name of the database (string) -Default *deny*. + ``table`` is the name of the table (string) -.. _permissions_delete_row: +.. _actions_delete_row: delete-row ---------- Actor is allowed to delete rows from a table. -``resource`` - tuple: (string, string) - The name of the database, then the name of the table +``resource`` - ``datasette.resources.TableResource(database, table)`` + ``database`` is the name of the database (string) -Default *deny*. + ``table`` is the name of the table (string) -.. _permissions_update_row: +.. _actions_update_row: update-row ---------- Actor is allowed to update rows in a table. -``resource`` - tuple: (string, string) - The name of the database, then the name of the table +``resource`` - ``datasette.resources.TableResource(database, table)`` + ``database`` is the name of the database (string) -Default *deny*. + ``table`` is the name of the table (string) -.. _permissions_create_table: +.. _actions_create_table: create-table ------------ Actor is allowed to create a database table. -``resource`` - string - The name of the database +``resource`` - ``datasette.resources.DatabaseResource(database)`` + ``database`` is the name of the database (string) -Default *deny*. - -.. _permissions_alter_table: +.. _actions_alter_table: alter-table ----------- Actor is allowed to alter a database table. -``resource`` - tuple: (string, string) - The name of the database, then the name of the table +``resource`` - ``datasette.resources.TableResource(database, table)`` + ``database`` is the name of the database (string) -Default *deny*. + ``table`` is the name of the table (string) -.. _permissions_drop_table: +.. _actions_drop_table: drop-table ---------- Actor is allowed to drop a database table. -``resource`` - tuple: (string, string) - The name of the database, then the name of the table +``resource`` - ``datasette.resources.TableResource(database, table)`` + ``database`` is the name of the database (string) -Default *deny*. + ``table`` is the name of the table (string) -.. _permissions_execute_sql: +.. _actions_execute_sql: execute-sql ----------- -Actor is allowed to run arbitrary SQL queries against a specific database, e.g. https://latest.datasette.io/fixtures?sql=select+100 +Actor is allowed to run arbitrary SQL queries against a specific database, e.g. https://latest.datasette.io/fixtures/-/query?sql=select+100 -``resource`` - string - The name of the database +``resource`` - ``datasette.resources.DatabaseResource(database)`` + ``database`` is the name of the database (string) -Default *allow*. See also :ref:`the default_allow_sql setting `. +See also :ref:`the default_allow_sql setting `. -.. _permissions_permissions_debug: +.. _actions_permissions_debug: permissions-debug ----------------- -Actor is allowed to view the ``/-/permissions`` debug page. +Actor is allowed to view the ``/-/permissions`` debug tools. -Default *deny*. - -.. _permissions_debug_menu: +.. _actions_debug_menu: debug-menu ---------- Controls if the various debug pages are displayed in the navigation menu. - -Default *deny*. diff --git a/docs/changelog.rst b/docs/changelog.rst index 35b3c3ac..b9340492 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -188,7 +188,7 @@ This alpha release adds basic alter table support to the Datasette Write API and Alter table support for create, insert, upsert and update ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The :ref:`JSON write API ` can now be used to apply simple alter table schema changes, provided the acting actor has the new :ref:`permissions_alter_table` permission. (:issue:`2101`) +The :ref:`JSON write API ` can now be used to apply simple alter table schema changes, provided the acting actor has the new :ref:`actions_alter_table` permission. (:issue:`2101`) The only alter operation supported so far is adding new columns to an existing table. @@ -203,12 +203,12 @@ Permissions fix for the upsert API The :ref:`/database/table/-/upsert API ` had a minor permissions bug, only affecting Datasette instances that had configured the ``insert-row`` and ``update-row`` permissions to apply to a specific table rather than the database or instance as a whole. Full details in issue :issue:`2262`. -To avoid similar mistakes in the future the :ref:`datasette.permission_allowed() ` method now specifies ``default=`` as a keyword-only argument. +To avoid similar mistakes in the future the ``datasette.permission_allowed()`` method now specifies ``default=`` as a keyword-only argument. Permission checks now consider opinions from every plugin ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The :ref:`datasette.permission_allowed() ` method previously consulted every plugin that implemented the :ref:`permission_allowed() ` plugin hook and obeyed the opinion of the last plugin to return a value. (:issue:`2275`) +The ``datasette.permission_allowed()`` method previously consulted every plugin that implemented the :ref:`permission_allowed() ` plugin hook and obeyed the opinion of the last plugin to return a value. (:issue:`2275`) Datasette now consults every plugin and checks to see if any of them returned ``False`` (the veto rule), and if none of them did, it then checks to see if any of them returned ``True``. @@ -1403,7 +1403,7 @@ Smaller changes - New :ref:`datasette.get_database() ` method. - Added ``_`` prefix to many private, undocumented methods of the Datasette class. (:issue:`576`) - Removed the ``db.get_outbound_foreign_keys()`` method which duplicated the behaviour of ``db.foreign_keys_for_table()``. -- New :ref:`await datasette.permission_allowed() ` method. +- New ``await datasette.permission_allowed()`` method. - ``/-/actor`` debugging endpoint for viewing the currently authenticated actor. - New ``request.cookies`` property. - ``/-/plugins`` endpoint now shows a list of hooks implemented by each plugin, e.g. https://latest.datasette.io/-/plugins?all=1 diff --git a/docs/internals.rst b/docs/internals.rst index 3f94f361..a0e2e5c8 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -342,33 +342,6 @@ If no plugins that implement that hook are installed, the default return value l "2": {"id": "2"} } -.. _datasette_permission_allowed: - -await .permission_allowed(actor, action, resource=None, default=...) --------------------------------------------------------------------- - -``actor`` - dictionary - The authenticated actor. This is usually ``request.actor``. - -``action`` - string - The name of the action that is being permission checked. - -``resource`` - string or tuple, optional - The resource, e.g. the name of the database, or a tuple of two strings containing the name of the database and the name of the table. Only some permissions apply to a resource. - -``default`` - optional: True, False or None - What value should be returned by default if nothing provides an opinion on this permission check. - Set to ``True`` for default allow or ``False`` for default deny. - If not specified the ``default`` from the ``Permission()`` tuple that was registered using :ref:`plugin_register_permissions` will be used. - -Check if the given actor has :ref:`permission ` to perform the given action on the given resource. - -Some permission checks are carried out against :ref:`rules defined in datasette.yaml `, while other custom permissions may be decided by plugins that implement the :ref:`plugin_hook_permission_allowed` plugin hook. - -If neither ``metadata.json`` nor any of the plugins provide an answer to the permission query the ``default`` argument will be returned. - -See :ref:`permissions` for a full list of permission actions included in Datasette core. - .. _datasette_allowed: await .allowed(\*, action, resource, actor=None) @@ -385,8 +358,6 @@ await .allowed(\*, action, resource, actor=None) This method checks if the given actor has permission to perform the given action on the given resource. All parameters must be passed as keyword arguments. -This is the modern resource-based permission checking method. It works with Resource objects that provide structured information about what is being accessed. - Example usage: .. code-block:: python @@ -414,7 +385,50 @@ Example usage: The method returns ``True`` if the permission is granted, ``False`` if denied. -For legacy string/tuple based permission checking, use :ref:`datasette_permission_allowed` instead. +.. _datasette_allowed_resources: + +await .allowed_resources(action, actor=None, \*, parent=None, include_is_private=False) +--------------------------------------------------------------------------------------- + +Returns a list of ``Resource`` objects that the actor can access for the +specified action. Each returned object is an instance of the action's +``resource_class`` and may include a ``.private`` attribute (when +``include_is_private=True``) to indicate that anonymous actors would be denied +access. + +Example:: + + tables = await datasette.allowed_resources( + "view-table", actor=request.actor, parent="fixtures" + ) + for table in tables: + print(table.parent, table.child) + +This method uses :ref:`datasette_allowed_resources_sql` under the hood and is an +efficient way to list the databases, tables or queries visible to a user. + +.. _datasette_allowed_resources_with_reasons: + +await .allowed_resources_with_reasons(action, actor=None) +--------------------------------------------------------- + +Returns a list of :class:`datasette.permissions.AllowedResource` tuples. Each tuple contains a ``Resource`` plus a list of strings describing the rules that granted access. This powers the debugging data shown by the ``/-/allowed`` endpoint and is helpful when building administrative tooling that needs to show why access was granted. + +.. _datasette_allowed_resources_sql: + +await .allowed_resources_sql(\*, action, actor=None, parent=None, include_is_private=False) +------------------------------------------------------------------------------------------- + +Builds the SQL query that Datasette uses to determine which resources an actor may access for a specific action. Returns a ``(sql: str, params: dict)`` tuple that can be executed against the internal ``catalog_*`` database tables. ``parent`` can be used to limit results to a specific database, and ``include_is_private`` adds a column indicating whether anonymous users would be denied access to that resource. + +Plugins that need to execute custom analysis over the raw allow/deny rules can use this helper to run the same query that powers the ``/-/allowed`` debugging interface. + +The SQL query built by this method will return the following columns: + +- ``parent``: The parent resource identifier (or NULL) +- ``child``: The child resource identifier (or NULL) +- ``reason``: The reason from the rule that granted access +- ``is_private``: (if ``include_is_private``) 1 if anonymous users cannot access, 0 otherwise .. _datasette_ensure_permission: @@ -422,7 +436,7 @@ await .ensure_permission(action, resource=None, actor=None) ----------------------------------------------------------- ``action`` - string - The action to check. See :ref:`permissions` for a list of available actions. + The action to check. See :ref:`actions` for a list of available actions. ``resource`` - Resource object (optional) The resource to check the permission against. Must be an instance of ``InstanceResource``, ``DatabaseResource``, or ``TableResource`` from the ``datasette.resources`` module. If omitted, defaults to ``InstanceResource()`` for instance-level permissions. diff --git a/docs/json_api.rst b/docs/json_api.rst index 3f696f39..3b9575de 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -623,7 +623,7 @@ Pass ``"ignore": true`` to ignore these errors and insert the other rows: Or you can pass ``"replace": true`` to replace any rows with conflicting primary keys with the new values. This requires the :ref:`permissions_update_row` permission. -Pass ``"alter: true`` to automatically add any missing columns to the table. This requires the :ref:`permissions_alter_table` permission. +Pass ``"alter: true`` to automatically add any missing columns to the table. This requires the :ref:`actions_alter_table` permission. .. _TableUpsertView: @@ -735,7 +735,7 @@ When using upsert you must provide the primary key column (or columns if the tab If your table does not have an explicit primary key you should pass the SQLite ``rowid`` key instead. -Pass ``"alter: true`` to automatically add any missing columns to the table. This requires the :ref:`permissions_alter_table` permission. +Pass ``"alter: true`` to automatically add any missing columns to the table. This requires the :ref:`actions_alter_table` permission. .. _RowUpdateView: @@ -792,7 +792,7 @@ The returned JSON will look like this: Any errors will return ``{"errors": ["... descriptive message ..."], "ok": false}``, and a ``400`` status code for a bad input or a ``403`` status code for an authentication or permission error. -Pass ``"alter: true`` to automatically add any missing columns to the table. This requires the :ref:`permissions_alter_table` permission. +Pass ``"alter: true`` to automatically add any missing columns to the table. This requires the :ref:`actions_alter_table` permission. .. _RowDeleteView: @@ -860,7 +860,7 @@ The JSON here describes the table that will be created: * ``pks`` can be used instead of ``pk`` to create a compound primary key. It should be a JSON list of column names to use in that primary key. * ``ignore`` can be set to ``true`` to ignore existing rows by primary key if the table already exists. * ``replace`` can be set to ``true`` to replace existing rows by primary key if the table already exists. This requires the :ref:`permissions_update_row` permission. -* ``alter`` can be set to ``true`` if you want to automatically add any missing columns to the table. This requires the :ref:`permissions_alter_table` permission. +* ``alter`` can be set to ``true`` if you want to automatically add any missing columns to the table. This requires the :ref:`actions_alter_table` permission. If the table is successfully created this will return a ``201`` status code and the following response: @@ -939,7 +939,7 @@ You can avoid this error by passing the same ``"ignore": true`` or ``"replace": To use the ``"replace": true`` option you will also need the :ref:`permissions_update_row` permission. -Pass ``"alter": true`` to automatically add any missing columns to the existing table that are present in the rows you are submitting. This requires the :ref:`permissions_alter_table` permission. +Pass ``"alter": true`` to automatically add any missing columns to the existing table that are present in the rows you are submitting. This requires the :ref:`actions_alter_table` permission. .. _TableDropView: diff --git a/tests/conftest.py b/tests/conftest.py index 31c45ed3..4797ab71 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -138,14 +138,14 @@ def restore_working_directory(tmpdir, request): @pytest.fixture(scope="session", autouse=True) -def check_permission_actions_are_documented(): +def check_actions_are_documented(): from datasette.plugins import pm content = ( pathlib.Path(__file__).parent.parent / "docs" / "authentication.rst" ).read_text() - permissions_re = re.compile(r"\.\. _permissions_([^\s:]+):") - documented_permission_actions = set(permissions_re.findall(content)).union( + permissions_re = re.compile(r"\.\. _actions_([^\s:]+):") + documented_actions = set(permissions_re.findall(content)).union( UNDOCUMENTED_PERMISSIONS ) @@ -160,7 +160,7 @@ def check_permission_actions_are_documented(): ) action = kwargs.get("action").replace("-", "_") assert ( - action in documented_permission_actions + action in documented_actions ), "Undocumented permission action: {}".format(action) pm.add_hookcall_monitoring( From ba654b5576a6b1fd309e266dee0b9ae773271372 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 30 Oct 2025 21:39:55 -0700 Subject: [PATCH 006/299] Forbid same DB passed twice or via config_dir, closes #2561 --- datasette/cli.py | 45 +++++++++++++++++++++++++++----- tests/test_cli.py | 65 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 92 insertions(+), 18 deletions(-) diff --git a/datasette/cli.py b/datasette/cli.py index 24d87279..94af09a2 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -590,13 +590,20 @@ def serve( internal=internal, ) - # if files is a single directory, use that as config_dir= - if 1 == len(files) and os.path.isdir(files[0]): - kwargs["config_dir"] = pathlib.Path(files[0]) - files = [] + # Separate directories from files + directories = [f for f in files if os.path.isdir(f)] + file_paths = [f for f in files if not os.path.isdir(f)] + + # Handle config_dir - only one directory allowed + if len(directories) > 1: + raise click.ClickException( + "Cannot pass multiple directories. Pass a single directory as config_dir." + ) + elif len(directories) == 1: + kwargs["config_dir"] = pathlib.Path(directories[0]) # Verify list of files, create if needed (and --create) - for file in files: + for file in file_paths: if not pathlib.Path(file).exists(): if create: sqlite3.connect(file).execute("vacuum") @@ -607,8 +614,32 @@ def serve( ) ) - # De-duplicate files so 'datasette db.db db.db' only attaches one /db - files = list(dict.fromkeys(files)) + # Check for duplicate files by resolving all paths to their absolute forms + # Collect all database files that will be loaded (explicit files + config_dir files) + all_db_files = [] + + # Add explicit files + for file in file_paths: + all_db_files.append((file, pathlib.Path(file).resolve())) + + # Add config_dir databases if config_dir is set + if "config_dir" in kwargs: + config_dir = kwargs["config_dir"] + for ext in ("db", "sqlite", "sqlite3"): + for db_file in config_dir.glob(f"*.{ext}"): + all_db_files.append((str(db_file), db_file.resolve())) + + # Check for duplicates + seen = {} + for original_path, resolved_path in all_db_files: + if resolved_path in seen: + raise click.ClickException( + f"Duplicate database file: '{original_path}' and '{seen[resolved_path]}' " + f"both refer to {resolved_path}" + ) + seen[resolved_path] = original_path + + files = file_paths try: ds = Datasette(files, **kwargs) diff --git a/tests/test_cli.py b/tests/test_cli.py index 537089ac..1c8f51ef 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -447,17 +447,6 @@ def test_serve_duplicate_database_names(tmpdir): assert {db["name"] for db in databases} == {"db", "db_2"} -def test_serve_deduplicate_same_database_path(tmpdir): - "'datasette db.db db.db' should only attach one database, /db" - runner = CliRunner() - db_path = str(tmpdir / "db.db") - sqlite3.connect(db_path).execute("vacuum") - result = runner.invoke(cli, [db_path, db_path, "--get", "/-/databases.json"]) - assert result.exit_code == 0, result.output - databases = json.loads(result.output) - assert {db["name"] for db in databases} == {"db"} - - @pytest.mark.parametrize( "filename", ["test-database (1).sqlite", "database (1).sqlite"] ) @@ -496,3 +485,57 @@ def test_internal_db(tmpdir): ) assert result.exit_code == 0 assert internal_path.exists() + + +def test_duplicate_database_files_error(tmpdir): + """Test that passing the same database file multiple times raises an error""" + runner = CliRunner() + db_path = str(tmpdir / "test.db") + sqlite3.connect(db_path).execute("vacuum") + + # Test with exact duplicate + result = runner.invoke(cli, ["serve", db_path, db_path, "--get", "/"]) + assert result.exit_code == 1 + assert "Duplicate database file" in result.output + assert "both refer to" in result.output + + # Test with different paths to same file (relative vs absolute) + result2 = runner.invoke( + cli, ["serve", db_path, str(pathlib.Path(db_path).resolve()), "--get", "/"] + ) + assert result2.exit_code == 1 + assert "Duplicate database file" in result2.output + + # Test that a file in the config_dir can't also be passed explicitly + config_dir = tmpdir / "config" + config_dir.mkdir() + config_db_path = str(config_dir / "data.db") + sqlite3.connect(config_db_path).execute("vacuum") + + result3 = runner.invoke( + cli, ["serve", config_db_path, str(config_dir), "--get", "/"] + ) + assert result3.exit_code == 1 + assert "Duplicate database file" in result3.output + assert "both refer to" in result3.output + + # Test that mixing a file NOT in the directory with a directory works fine + other_db_path = str(tmpdir / "other.db") + sqlite3.connect(other_db_path).execute("vacuum") + + result4 = runner.invoke( + cli, ["serve", other_db_path, str(config_dir), "--get", "/-/databases.json"] + ) + assert result4.exit_code == 0 + databases = json.loads(result4.output) + assert {db["name"] for db in databases} == {"other", "data"} + + # Test that multiple directories raise an error + config_dir2 = tmpdir / "config2" + config_dir2.mkdir() + + result5 = runner.invoke( + cli, ["serve", str(config_dir), str(config_dir2), "--get", "/"] + ) + assert result5.exit_code == 1 + assert "Cannot pass multiple directories" in result5.output From b7ef968c6ff707f4c452f1da17c969b733d73dc8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 31 Oct 2025 09:15:39 -0700 Subject: [PATCH 007/299] Fixed some rST labels I broke --- docs/changelog.rst | 2 +- docs/json_api.rst | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index b9340492..7b352ef6 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1068,7 +1068,7 @@ Smaller changes ~~~~~~~~~~~~~~~ - Wide tables shown within Datasette now scroll horizontally (:issue:`998`). This is achieved using a new ``
`` element which may impact the implementation of some plugins (for example `this change to datasette-cluster-map `__). -- New :ref:`permissions_debug_menu` permission. (:issue:`1068`) +- New :ref:`actions_debug_menu` permission. (:issue:`1068`) - Removed ``--debug`` option, which didn't do anything. (:issue:`814`) - ``Link:`` HTTP header pagination. (:issue:`1014`) - ``x`` button for clearing filters. (:issue:`1016`) diff --git a/docs/json_api.rst b/docs/json_api.rst index 3b9575de..91a2bb15 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -347,7 +347,7 @@ Special table arguments though this could potentially result in errors if the wrong syntax is used. ``?_where=SQL-fragment`` - If the :ref:`permissions_execute_sql` permission is enabled, this parameter + If the :ref:`actions_execute_sql` permission is enabled, this parameter can be used to pass one or more additional SQL fragments to be used in the `WHERE` clause of the SQL used to query the table. @@ -510,7 +510,7 @@ Datasette provides a write API for JSON data. This is a POST-only API that requi Inserting rows ~~~~~~~~~~~~~~ -This requires the :ref:`permissions_insert_row` permission. +This requires the :ref:`actions_insert_row` permission. A single row can be inserted using the ``"row"`` key: @@ -621,7 +621,7 @@ Pass ``"ignore": true`` to ignore these errors and insert the other rows: "ignore": true } -Or you can pass ``"replace": true`` to replace any rows with conflicting primary keys with the new values. This requires the :ref:`permissions_update_row` permission. +Or you can pass ``"replace": true`` to replace any rows with conflicting primary keys with the new values. This requires the :ref:`actions_update_row` permission. Pass ``"alter: true`` to automatically add any missing columns to the table. This requires the :ref:`actions_alter_table` permission. @@ -632,7 +632,7 @@ Upserting rows An upsert is an insert or update operation. If a row with a matching primary key already exists it will be updated - otherwise a new row will be inserted. -The upsert API is mostly the same shape as the :ref:`insert API `. It requires both the :ref:`permissions_insert_row` and :ref:`permissions_update_row` permissions. +The upsert API is mostly the same shape as the :ref:`insert API `. It requires both the :ref:`actions_insert_row` and :ref:`actions_update_row` permissions. :: @@ -742,7 +742,7 @@ Pass ``"alter: true`` to automatically add any missing columns to the table. Thi Updating a row ~~~~~~~~~~~~~~ -To update a row, make a ``POST`` to ``////-/update``. This requires the :ref:`permissions_update_row` permission. +To update a row, make a ``POST`` to ``//
//-/update``. This requires the :ref:`actions_update_row` permission. :: @@ -799,7 +799,7 @@ Pass ``"alter: true`` to automatically add any missing columns to the table. Thi Deleting a row ~~~~~~~~~~~~~~ -To delete a row, make a ``POST`` to ``//
//-/delete``. This requires the :ref:`permissions_delete_row` permission. +To delete a row, make a ``POST`` to ``//
//-/delete``. This requires the :ref:`actions_delete_row` permission. :: @@ -818,7 +818,7 @@ Any errors will return ``{"errors": ["... descriptive message ..."], "ok": false Creating a table ~~~~~~~~~~~~~~~~ -To create a table, make a ``POST`` to ``//-/create``. This requires the :ref:`permissions_create_table` permission. +To create a table, make a ``POST`` to ``//-/create``. This requires the :ref:`actions_create_table` permission. :: @@ -859,7 +859,7 @@ The JSON here describes the table that will be created: * ``pks`` can be used instead of ``pk`` to create a compound primary key. It should be a JSON list of column names to use in that primary key. * ``ignore`` can be set to ``true`` to ignore existing rows by primary key if the table already exists. -* ``replace`` can be set to ``true`` to replace existing rows by primary key if the table already exists. This requires the :ref:`permissions_update_row` permission. +* ``replace`` can be set to ``true`` to replace existing rows by primary key if the table already exists. This requires the :ref:`actions_update_row` permission. * ``alter`` can be set to ``true`` if you want to automatically add any missing columns to the table. This requires the :ref:`actions_alter_table` permission. If the table is successfully created this will return a ``201`` status code and the following response: @@ -906,7 +906,7 @@ Datasette will create a table with a schema that matches those rows and insert t "pk": "id" } -Doing this requires both the :ref:`permissions_create_table` and :ref:`permissions_insert_row` permissions. +Doing this requires both the :ref:`actions_create_table` and :ref:`actions_insert_row` permissions. The ``201`` response here will be similar to the ``columns`` form, but will also include the number of rows that were inserted as ``row_count``: @@ -937,7 +937,7 @@ If you pass a row to the create endpoint with a primary key that already exists You can avoid this error by passing the same ``"ignore": true`` or ``"replace": true`` options to the create endpoint as you can to the :ref:`insert endpoint `. -To use the ``"replace": true`` option you will also need the :ref:`permissions_update_row` permission. +To use the ``"replace": true`` option you will also need the :ref:`actions_update_row` permission. Pass ``"alter": true`` to automatically add any missing columns to the existing table that are present in the rows you are submitting. This requires the :ref:`actions_alter_table` permission. @@ -946,7 +946,7 @@ Pass ``"alter": true`` to automatically add any missing columns to the existing Dropping tables ~~~~~~~~~~~~~~~ -To drop a table, make a ``POST`` to ``//
/-/drop``. This requires the :ref:`permissions_drop_table` permission. +To drop a table, make a ``POST`` to ``//
/-/drop``. This requires the :ref:`actions_drop_table` permission. :: From 400fa08e4ccabb55f65a0fc9f0e53b7f1bc68e32 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 31 Oct 2025 14:50:46 -0700 Subject: [PATCH 008/299] Add keyset pagination to allowed_resources() (#2562) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add keyset pagination to allowed_resources() This replaces the unbounded list return with PaginatedResources, which supports efficient keyset pagination for handling thousands of resources. Closes #2560 Changes: - allowed_resources() now returns PaginatedResources instead of list - Added limit (1-1000, default 100) and next (keyset token) parameters - Added include_reasons parameter (replaces allowed_resources_with_reasons) - Removed allowed_resources_with_reasons() method entirely - PaginatedResources.all() async generator for automatic pagination - Uses tilde-encoding for tokens (matching table pagination) - Updated all callers to use .resources accessor - Updated documentation with new API and examples The PaginatedResources object has: - resources: List of Resource objects for current page - next: Token for next page (None if no more results) - all(): Async generator that yields all resources across pages Example usage: page = await ds.allowed_resources("view-table", actor, limit=100) for table in page.resources: print(table.child) # Iterate all pages automatically async for table in page.all(): print(table.child) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- datasette/app.py | 175 ++++++++++++++++++++------------ datasette/permissions.py | 4 + datasette/utils/__init__.py | 53 ++++++++++ datasette/views/database.py | 32 ++++-- datasette/views/index.py | 11 +- datasette/views/special.py | 92 +++++++---------- docs/internals.rst | 84 +++++++++++---- tests/test_actions_sql.py | 37 +++---- tests/test_allowed_resources.py | 73 ++++++------- tests/test_permissions.py | 36 ++++--- 10 files changed, 370 insertions(+), 227 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 15cf3495..7b9fb67d 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -71,6 +71,7 @@ from .url_builder import Urls from .database import Database, QueryInterrupted from .utils import ( + PaginatedResources, PrefixedUrlString, SPATIALITE_FUNCTIONS, StartupError, @@ -91,6 +92,7 @@ from .utils import ( resolve_env_secrets, resolve_routes, tilde_decode, + tilde_encode, to_css_class, urlsafe_components, redact_keys, @@ -1147,104 +1149,147 @@ class Datasette: *, parent: str | None = None, include_is_private: bool = False, - ) -> list["Resource"]: + include_reasons: bool = False, + limit: int = 100, + next: str | None = None, + ) -> PaginatedResources: """ - Return all resources the actor can access for the given action. + Return paginated resources the actor can access for the given action. - Uses SQL to filter resources based on cascading permission rules. - Returns instances of the appropriate Resource subclass. + Uses SQL with keyset pagination to efficiently filter resources. + Returns PaginatedResources with list of Resource instances and pagination metadata. Args: action: The action name (e.g., "view-table") actor: The actor dict (or None for unauthenticated) parent: Optional parent filter (e.g., database name) to limit results include_is_private: If True, adds a .private attribute to each Resource + include_reasons: If True, adds a .reasons attribute with List[str] of permission reasons + limit: Maximum number of results to return (1-1000, default 100) + next: Keyset token from previous page for pagination + + Returns: + PaginatedResources with: + - resources: List of Resource objects for this page + - next: Token for next page (None if no more results) Example: - # Get all tables - tables = await datasette.allowed_resources("view-table", actor) - for table in tables: + # Get first page of tables + page = await datasette.allowed_resources("view-table", actor, limit=50) + for table in page.resources: print(f"{table.parent}/{table.child}") - # Get tables for specific database with private flag - tables = await datasette.allowed_resources( - "view-table", actor, parent="mydb", include_is_private=True + # Get next page + if page.next: + next_page = await datasette.allowed_resources( + "view-table", actor, limit=50, next=page.next + ) + + # With reasons for debugging + page = await datasette.allowed_resources( + "view-table", actor, include_reasons=True ) - for table in tables: - if table.private: - print(f"{table.child} is private") + for table in page.resources: + print(f"{table.child}: {table.reasons}") + + # Iterate through all results with async generator + page = await datasette.allowed_resources("view-table", actor) + async for table in page.all(): + print(table.child) """ action_obj = self.actions.get(action) if not action_obj: raise ValueError(f"Unknown action: {action}") + # Validate and cap limit + limit = min(max(1, limit), 1000) + + # Get base SQL query query, params = await self.allowed_resources_sql( action=action, actor=actor, parent=parent, include_is_private=include_is_private, ) - result = await self.get_internal_database().execute(query, params) - # Instantiate the appropriate Resource subclass for each row + # Add keyset pagination WHERE clause if next token provided + if next: + try: + components = urlsafe_components(next) + if len(components) >= 2: + last_parent, last_child = components[0], components[1] + # Keyset condition: (parent > last) OR (parent = last AND child > last) + keyset_where = """ + (parent > :keyset_parent OR + (parent = :keyset_parent AND child > :keyset_child)) + """ + # Wrap original query and add keyset filter + query = f"SELECT * FROM ({query}) WHERE {keyset_where}" + params["keyset_parent"] = last_parent + params["keyset_child"] = last_child + except (ValueError, KeyError): + # Invalid token - ignore and start from beginning + pass + + # Add LIMIT (fetch limit+1 to detect if there are more results) + # Note: query from allowed_resources_sql() already includes ORDER BY parent, child + query = f"{query} LIMIT :limit" + params["limit"] = limit + 1 + + # Execute query + result = await self.get_internal_database().execute(query, params) + rows = list(result.rows) + + # Check if truncated (got more than limit rows) + truncated = len(rows) > limit + if truncated: + rows = rows[:limit] # Remove the extra row + + # Build Resource objects with optional attributes resources = [] - for row in result.rows: - # row[0]=parent, row[1]=child, row[2]=reason (ignored), row[3]=is_private (if requested) + for row in rows: + # row[0]=parent, row[1]=child, row[2]=reason, row[3]=is_private (if requested) resource = self.resource_for_action(action, parent=row[0], child=row[1]) + + # Add reasons if requested + if include_reasons: + reason_json = row[2] + try: + reasons_array = ( + json.loads(reason_json) if isinstance(reason_json, str) else [] + ) + resource.reasons = [r for r in reasons_array if r is not None] + except (json.JSONDecodeError, TypeError): + resource.reasons = [reason_json] if reason_json else [] + + # Add private flag if requested if include_is_private: resource.private = bool(row[3]) + resources.append(resource) - return resources + # Generate next token if there are more results + next_token = None + if truncated and resources: + last_resource = resources[-1] + # Use tilde-encoding like table pagination + next_token = "{},{}".format( + tilde_encode(str(last_resource.parent)), + tilde_encode(str(last_resource.child)), + ) - async def allowed_resources_with_reasons( - self, - action: str, - actor: dict | None = None, - ) -> list["AllowedResource"]: - """ - Return allowed resources with permission reasons for debugging. - - Uses SQL to filter resources and includes the reason each was allowed. - Returns list of AllowedResource named tuples with (resource, reason). - - Example: - debug_info = await datasette.allowed_resources_with_reasons("view-table", actor) - for allowed in debug_info: - print(f"{allowed.resource}: {allowed.reason}") - """ - from datasette.permissions import AllowedResource - - action_obj = self.actions.get(action) - if not action_obj: - raise ValueError(f"Unknown action: {action}") - - query, params = await self.allowed_resources_sql(action=action, actor=actor) - result = await self.get_internal_database().execute(query, params) - - resources = [] - for row in result.rows: - resource = self.resource_for_action(action, parent=row[0], child=row[1]) - reason_json = row[2] - - # Parse JSON array of reasons and filter out nulls - try: - import json - - reasons_array = ( - json.loads(reason_json) if isinstance(reason_json, str) else [] - ) - reasons_filtered = [r for r in reasons_array if r is not None] - # Store as list for multiple reasons, or keep empty list - reason = reasons_filtered - except (json.JSONDecodeError, TypeError): - # Fallback for backward compatibility - reason = [reason_json] if reason_json else [] - - resources.append(AllowedResource(resource=resource, reason=reason)) - - return resources + return PaginatedResources( + resources=resources, + next=next_token, + _datasette=self, + _action=action, + _actor=actor, + _parent=parent, + _include_is_private=include_is_private, + _include_reasons=include_reasons, + _limit=limit, + ) async def allowed( self, diff --git a/datasette/permissions.py b/datasette/permissions.py index 669df47e..0943eced 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -16,6 +16,10 @@ class Resource(ABC): name: str = None # e.g., "table", "database", "model" parent_name: str | None = None # e.g., "database" for tables + # Instance-level optional extra attributes + reasons: list[str] | None = None + include_reasons: bool | None = None + def __init__(self, parent: str | None = None, child: str | None = None): """ Create a resource instance. diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 38a16b79..ac2c74da 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -4,6 +4,7 @@ import aiofiles import click from collections import OrderedDict, namedtuple, Counter import copy +import dataclasses import base64 import hashlib import inspect @@ -27,6 +28,58 @@ from .sqlite import sqlite3, supports_table_xinfo if typing.TYPE_CHECKING: from datasette.database import Database + from datasette.permissions import Resource + + +@dataclasses.dataclass +class PaginatedResources: + """Paginated results from allowed_resources query.""" + + resources: List["Resource"] + next: str | None # Keyset token for next page (None if no more results) + _datasette: typing.Any = dataclasses.field(default=None, repr=False) + _action: str = dataclasses.field(default=None, repr=False) + _actor: typing.Any = dataclasses.field(default=None, repr=False) + _parent: str | None = dataclasses.field(default=None, repr=False) + _include_is_private: bool = dataclasses.field(default=False, repr=False) + _include_reasons: bool = dataclasses.field(default=False, repr=False) + _limit: int = dataclasses.field(default=100, repr=False) + + async def all(self): + """ + Async generator that yields all resources across all pages. + + Automatically handles pagination under the hood. This is useful when you need + to iterate through all results without manually managing pagination tokens. + + Yields: + Resource objects one at a time + + Example: + page = await datasette.allowed_resources("view-table", actor) + async for table in page.all(): + print(f"{table.parent}/{table.child}") + """ + # Yield all resources from current page + for resource in self.resources: + yield resource + + # Continue fetching subsequent pages if there are more + next_token = self.next + while next_token: + page = await self._datasette.allowed_resources( + self._action, + self._actor, + parent=self._parent, + include_is_private=self._include_is_private, + include_reasons=self._include_reasons, + limit=self._limit, + next=next_token, + ) + for resource in page.resources: + yield resource + next_token = page.next + # From https://www.sqlite.org/lang_keywords.html reserved_words = set( diff --git a/datasette/views/database.py b/datasette/views/database.py index 41eb4c57..51c752a0 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -70,12 +70,15 @@ class DatabaseView(View): metadata = await datasette.get_database_metadata(database) # Get all tables/views this actor can see in bulk with private flag - - allowed_tables = await datasette.allowed_resources( - "view-table", request.actor, parent=database, include_is_private=True + allowed_tables_page = await datasette.allowed_resources( + "view-table", + request.actor, + parent=database, + include_is_private=True, + limit=1000, ) # Create lookup dict for quick access - allowed_dict = {r.child: r for r in allowed_tables} + allowed_dict = {r.child: r for r in allowed_tables_page.resources} # Filter to just views view_names_set = set(await db.view_names()) @@ -88,14 +91,18 @@ class DatabaseView(View): tables = await get_tables(datasette, request, db, allowed_dict) # Get allowed queries using the new permission system - allowed_query_resources = await datasette.allowed_resources( - "view-query", request.actor, parent=database, include_is_private=True + allowed_query_page = await datasette.allowed_resources( + "view-query", + request.actor, + parent=database, + include_is_private=True, + limit=1000, ) # Build canned_queries list by looking up each allowed query all_queries = await datasette.get_canned_queries(database, request.actor) canned_queries = [] - for query_resource in allowed_query_resources: + for query_resource in allowed_query_page.resources: query_name = query_resource.child if query_name in all_queries: canned_queries.append( @@ -509,12 +516,15 @@ class QueryView(View): database = db.name # Get all tables/views this actor can see in bulk with private flag - - allowed_tables = await datasette.allowed_resources( - "view-table", request.actor, parent=database, include_is_private=True + allowed_tables_page = await datasette.allowed_resources( + "view-table", + request.actor, + parent=database, + include_is_private=True, + limit=1000, ) # Create lookup dict for quick access - allowed_dict = {r.child: r for r in allowed_tables} + allowed_dict = {r.child: r for r in allowed_tables_page.resources} # Are we a canned query? canned_query = None diff --git a/datasette/views/index.py b/datasette/views/index.py index a6bfc4d9..a59c687c 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -28,17 +28,18 @@ class IndexView(BaseView): await self.ds.ensure_permission(action="view-instance", actor=request.actor) # Get all allowed databases and tables in bulk - allowed_databases = await self.ds.allowed_resources( + db_page = await self.ds.allowed_resources( "view-database", request.actor, include_is_private=True ) + allowed_databases = [r async for r in db_page.all()] allowed_db_dict = {r.parent: r for r in allowed_databases} - allowed_tables = await self.ds.allowed_resources( + # Group tables by database + tables_by_db = {} + table_page = await self.ds.allowed_resources( "view-table", request.actor, include_is_private=True ) - # Group by database - tables_by_db = {} - for t in allowed_tables: + async for t in table_page.all(): if t.parent not in tables_by_db: tables_by_db[t.parent] = {} tables_by_db[t.parent][t.child] = t diff --git a/datasette/views/special.py b/datasette/views/special.py index 60e4b992..8de83fae 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -268,19 +268,38 @@ class AllowedResourcesView(BaseView): offset = (page - 1) * page_size # Use the simplified allowed_resources method - # If user has debug permission, use the with_reasons variant + # Collect all resources with optional reasons for debugging try: - if has_debug_permission: - allowed_resources = await self.ds.allowed_resources_with_reasons( - action=action, - actor=actor, - ) - else: - allowed_resources = await self.ds.allowed_resources( - action=action, - actor=actor, - parent=parent_filter, - ) + allowed_rows = [] + result = await self.ds.allowed_resources( + action=action, + actor=actor, + parent=parent_filter, + include_reasons=has_debug_permission, + ) + async for resource in result.all(): + parent_val = resource.parent + child_val = resource.child + + # Build resource path + if parent_val is None: + resource_path = "/" + elif child_val is None: + resource_path = f"/{parent_val}" + else: + resource_path = f"/{parent_val}/{child_val}" + + row = { + "parent": parent_val, + "child": child_val, + "resource": resource_path, + } + + # Add reason if we have it (from include_reasons=True) + if has_debug_permission and hasattr(resource, "reasons"): + row["reason"] = resource.reasons + + allowed_rows.append(row) except Exception: # If catalog tables don't exist yet, return empty results return ( @@ -295,46 +314,6 @@ class AllowedResourcesView(BaseView): 200, ) - # Convert to list of dicts with resource path - allowed_rows = [] - for item in allowed_resources: - # Extract resource and reason depending on what we got back - if has_debug_permission: - # allowed_resources_with_reasons returns AllowedResource(resource, reason) - resource = item.resource - reason = item.reason - else: - # allowed_resources returns plain Resource objects - resource = item - reason = None - - parent_val = resource.parent - child_val = resource.child - - # Apply parent filter if needed (when using with_reasons, we need to filter manually) - if parent_filter is not None and parent_val != parent_filter: - continue - - # Build resource path - if parent_val is None: - resource_path = "/" - elif child_val is None: - resource_path = f"/{parent_val}" - else: - resource_path = f"/{parent_val}/{child_val}" - - row = { - "parent": parent_val, - "child": child_val, - "resource": resource_path, - } - - # Add reason if we have it (it's already a list from allowed_resources_with_reasons) - if reason is not None: - row["reason"] = reason - - allowed_rows.append(row) - # Apply child filter if specified if child_filter is not None: allowed_rows = [row for row in allowed_rows if row["child"] == child_filter] @@ -652,10 +631,11 @@ class CreateTokenView(BaseView): async def shared(self, request): self.check_permission(request) # Build list of databases and tables the user has permission to view - allowed_databases = await self.ds.allowed_resources( - "view-database", request.actor - ) - allowed_tables = await self.ds.allowed_resources("view-table", request.actor) + db_page = await self.ds.allowed_resources("view-database", request.actor) + allowed_databases = [r async for r in db_page.all()] + + table_page = await self.ds.allowed_resources("view-table", request.actor) + allowed_tables = [r async for r in table_page.all()] # Build database -> tables mapping database_with_tables = [] diff --git a/docs/internals.rst b/docs/internals.rst index a0e2e5c8..f0d3c99a 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -387,32 +387,80 @@ The method returns ``True`` if the permission is granted, ``False`` if denied. .. _datasette_allowed_resources: -await .allowed_resources(action, actor=None, \*, parent=None, include_is_private=False) ---------------------------------------------------------------------------------------- +await .allowed_resources(action, actor=None, \*, parent=None, include_is_private=False, include_reasons=False, limit=100, next=None) +------------------------------------------------------------------------------------------------------------------------------------ -Returns a list of ``Resource`` objects that the actor can access for the -specified action. Each returned object is an instance of the action's -``resource_class`` and may include a ``.private`` attribute (when -``include_is_private=True``) to indicate that anonymous actors would be denied -access. +Returns a ``PaginatedResources`` object containing resources that the actor can access for the specified action, with support for keyset pagination. -Example:: +``action`` - string + The action name (e.g., "view-table", "view-database") - tables = await datasette.allowed_resources( - "view-table", actor=request.actor, parent="fixtures" +``actor`` - dictionary, optional + The authenticated actor. Defaults to ``None`` for unauthenticated requests. + +``parent`` - string, optional + Optional parent filter (e.g., database name) to limit results + +``include_is_private`` - boolean, optional + If True, adds a ``.private`` attribute to each Resource indicating whether anonymous users can access it + +``include_reasons`` - boolean, optional + If True, adds a ``.reasons`` attribute with a list of strings describing why access was granted (useful for debugging) + +``limit`` - integer, optional + Maximum number of results to return per page (1-1000, default 100) + +``next`` - string, optional + Keyset token from a previous page for pagination + +The method returns a ``PaginatedResources`` object (from ``datasette.utils``) with the following attributes: + +``resources`` - list + List of ``Resource`` objects for the current page + +``next`` - string or None + Token for the next page, or ``None`` if no more results exist + +Example usage: + +.. code-block:: python + + # Get first page of tables + page = await datasette.allowed_resources( + "view-table", + actor=request.actor, + parent="fixtures", + limit=50, ) - for table in tables: + + for table in page.resources: + print(table.parent, table.child) + if hasattr(table, "private"): + print(f" Private: {table.private}") + + # Get next page if available + if page.next: + next_page = await datasette.allowed_resources( + "view-table", actor=request.actor, next=page.next + ) + + # Iterate through all results automatically + page = await datasette.allowed_resources( + "view-table", actor=request.actor + ) + async for table in page.all(): print(table.parent, table.child) -This method uses :ref:`datasette_allowed_resources_sql` under the hood and is an -efficient way to list the databases, tables or queries visible to a user. + # With reasons for debugging + page = await datasette.allowed_resources( + "view-table", actor=request.actor, include_reasons=True + ) + for table in page.resources: + print(f"{table.child}: {table.reasons}") -.. _datasette_allowed_resources_with_reasons: +The ``page.all()`` async generator automatically handles pagination, fetching additional pages and yielding all resources one at a time. -await .allowed_resources_with_reasons(action, actor=None) ---------------------------------------------------------- - -Returns a list of :class:`datasette.permissions.AllowedResource` tuples. Each tuple contains a ``Resource`` plus a list of strings describing the rules that granted access. This powers the debugging data shown by the ``/-/allowed`` endpoint and is helpful when building administrative tooling that needs to show why access was granted. +This method uses :ref:`datasette_allowed_resources_sql` under the hood and is an efficient way to list the databases, tables or other resources that an actor can access for a specific action. .. _datasette_allowed_resources_sql: diff --git a/tests/test_actions_sql.py b/tests/test_actions_sql.py index adf26eeb..19d44528 100644 --- a/tests/test_actions_sql.py +++ b/tests/test_actions_sql.py @@ -2,9 +2,9 @@ Tests for the new Resource-based permission system. These tests verify: -1. The new Datasette.allowed_resources() method +1. The new Datasette.allowed_resources() method (with pagination) 2. The new Datasette.allowed() method -3. The new Datasette.allowed_resources_with_reasons() method +3. The include_reasons parameter for debugging 4. That SQL does the heavy lifting (no Python filtering) """ @@ -71,7 +71,8 @@ async def test_allowed_resources_global_allow(test_ds): try: # Use the new allowed_resources() method - tables = await test_ds.allowed_resources("view-table", {"id": "alice"}) + result = await test_ds.allowed_resources("view-table", {"id": "alice"}) + tables = result.resources # Alice should see all tables assert len(tables) == 5 @@ -133,9 +134,7 @@ async def test_allowed_specific_resource(test_ds): @pytest.mark.asyncio -async def test_allowed_resources_with_reasons(test_ds): - """Test allowed_resources_with_reasons() exposes debugging info""" - +async def test_allowed_resources_include_reasons(test_ds): def rules_callback(datasette, actor, action): if actor and actor.get("role") == "analyst": sql = """ @@ -152,21 +151,22 @@ async def test_allowed_resources_with_reasons(test_ds): pm.register(plugin, name="test_plugin") try: - # Use allowed_resources_with_reasons to get debugging info - allowed = await test_ds.allowed_resources_with_reasons( - "view-table", {"id": "bob", "role": "analyst"} + # Use allowed_resources with include_reasons to get debugging info + result = await test_ds.allowed_resources( + "view-table", {"id": "bob", "role": "analyst"}, include_reasons=True ) + allowed = result.resources # Should get analytics tables except sensitive assert len(allowed) >= 2 # At least users and events # Check we can access both resource and reason - for item in allowed: - assert isinstance(item.resource, TableResource) - assert isinstance(item.reason, list) - if item.resource.parent == "analytics": + for resource in allowed: + assert isinstance(resource, TableResource) + assert isinstance(resource.reasons, list) + if resource.parent == "analytics": # Should mention parent-level reason in at least one of the reasons - reasons_text = " ".join(item.reason).lower() + reasons_text = " ".join(resource.reasons).lower() assert "analyst access" in reasons_text finally: @@ -194,7 +194,8 @@ async def test_child_deny_overrides_parent_allow(test_ds): try: actor = {"id": "bob", "role": "analyst"} - tables = await test_ds.allowed_resources("view-table", actor) + result = await test_ds.allowed_resources("view-table", actor) + tables = result.resources # Should see analytics tables except sensitive analytics_tables = [t for t in tables if t.parent == "analytics"] @@ -242,7 +243,8 @@ async def test_child_allow_overrides_parent_deny(test_ds): try: actor = {"id": "carol"} - tables = await test_ds.allowed_resources("view-table", actor) + result = await test_ds.allowed_resources("view-table", actor) + tables = result.resources # Should only see production.orders production_tables = [t for t in tables if t.parent == "production"] @@ -305,7 +307,8 @@ async def test_sql_does_filtering_not_python(test_ds): ) # allowed_resources() should also use SQL filtering - tables = await test_ds.allowed_resources("view-table", actor) + result = await test_ds.allowed_resources("view-table", actor) + tables = result.resources assert len(tables) == 1 assert tables[0].parent == "analytics" assert tables[0].child == "users" diff --git a/tests/test_allowed_resources.py b/tests/test_allowed_resources.py index 56c5090d..cecffbe2 100644 --- a/tests/test_allowed_resources.py +++ b/tests/test_allowed_resources.py @@ -66,7 +66,7 @@ async def test_tables_endpoint_global_access(test_ds): try: # Use the allowed_resources API directly - tables = await test_ds.allowed_resources("view-table", {"id": "alice"}) + page = await test_ds.allowed_resources("view-table", {"id": "alice"}) # Convert to the format the endpoint returns result = [ @@ -74,7 +74,7 @@ async def test_tables_endpoint_global_access(test_ds): "name": f"{t.parent}/{t.child}", "url": test_ds.urls.table(t.parent, t.child), } - for t in tables + for t in page.resources ] # Alice should see all tables @@ -105,7 +105,7 @@ async def test_tables_endpoint_database_restriction(test_ds): pm.register(plugin, name="test_plugin") try: - tables = await test_ds.allowed_resources( + page = await test_ds.allowed_resources( "view-table", {"id": "bob", "role": "analyst"} ) result = [ @@ -113,7 +113,7 @@ async def test_tables_endpoint_database_restriction(test_ds): "name": f"{t.parent}/{t.child}", "url": test_ds.urls.table(t.parent, t.child), } - for t in tables + for t in page.resources ] # Bob should only see analytics tables @@ -152,13 +152,13 @@ async def test_tables_endpoint_table_exception(test_ds): pm.register(plugin, name="test_plugin") try: - tables = await test_ds.allowed_resources("view-table", {"id": "carol"}) + page = await test_ds.allowed_resources("view-table", {"id": "carol"}) result = [ { "name": f"{t.parent}/{t.child}", "url": test_ds.urls.table(t.parent, t.child), } - for t in tables + for t in page.resources ] # Carol should see analytics.users but not other analytics tables @@ -194,7 +194,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds): pm.register(plugin, name="test_plugin") try: - tables = await test_ds.allowed_resources( + page = await test_ds.allowed_resources( "view-table", {"id": "bob", "role": "analyst"} ) result = [ @@ -202,7 +202,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds): "name": f"{t.parent}/{t.child}", "url": test_ds.urls.table(t.parent, t.child), } - for t in tables + for t in page.resources ] analytics_tables = [m for m in result if m["name"].startswith("analytics/")] @@ -230,10 +230,10 @@ async def test_tables_endpoint_no_permissions(): await ds._refresh_schemas() # Unknown actor with no custom permissions - tables = await ds.allowed_resources("view-table", {"id": "unknown"}) + page = await ds.allowed_resources("view-table", {"id": "unknown"}) result = [ {"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)} - for t in tables + for t in page.resources ] # Should see tables (due to default_permissions.py providing default allow) @@ -260,13 +260,13 @@ async def test_tables_endpoint_specific_table_only(test_ds): pm.register(plugin, name="test_plugin") try: - tables = await test_ds.allowed_resources("view-table", {"id": "dave"}) + page = await test_ds.allowed_resources("view-table", {"id": "dave"}) result = [ { "name": f"{t.parent}/{t.child}", "url": test_ds.urls.table(t.parent, t.child), } - for t in tables + for t in page.resources ] # Should see only the two specifically allowed tables @@ -298,13 +298,13 @@ async def test_tables_endpoint_empty_result(test_ds): pm.register(plugin, name="test_plugin") try: - tables = await test_ds.allowed_resources("view-table", {"id": "blocked"}) + page = await test_ds.allowed_resources("view-table", {"id": "blocked"}) result = [ { "name": f"{t.parent}/{t.child}", "url": test_ds.urls.table(t.parent, t.child), } - for t in tables + for t in page.resources ] # Global deny should block access to all tables @@ -328,11 +328,11 @@ async def test_tables_endpoint_no_query_returns_all(): await ds._refresh_schemas() # Get all tables without query - all_tables = await ds.allowed_resources("view-table", None) + page = await ds.allowed_resources("view-table", None) # Should return all tables with truncated: false - assert len(all_tables) >= 3 - table_names = {f"{t.parent}/{t.child}" for t in all_tables} + assert len(page.resources) >= 3 + table_names = {f"{t.parent}/{t.child}" for t in page.resources} assert "test_db/users" in table_names assert "test_db/posts" in table_names assert "test_db/comments" in table_names @@ -350,12 +350,13 @@ async def test_tables_endpoint_truncation(): await db.execute_write(f"CREATE TABLE table_{i:03d} (id INTEGER)") await ds._refresh_schemas() - # Get all tables - should be truncated - all_tables = await ds.allowed_resources("view-table", None) - big_db_tables = [t for t in all_tables if t.parent == "big_db"] + # Get all tables - should be paginated with limit=100 by default + page = await ds.allowed_resources("view-table", None) + big_db_tables = [t for t in page.resources if t.parent == "big_db"] - # Should have exactly 105 tables in the database - assert len(big_db_tables) == 105 + # Should have exactly 100 tables in first page (default limit) + assert len(big_db_tables) == 100 + assert page.next is not None # More results available @pytest.mark.asyncio @@ -374,10 +375,10 @@ async def test_tables_endpoint_search_single_term(): await ds._refresh_schemas() # Get all tables in the new format - all_tables = await ds.allowed_resources("view-table", None) + page = await ds.allowed_resources("view-table", None) matches = [ {"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)} - for t in all_tables + for t in page.resources ] # Filter for "user" (extract table name from "db/table") @@ -411,10 +412,10 @@ async def test_tables_endpoint_search_multiple_terms(): await ds._refresh_schemas() # Get all tables in the new format - all_tables = await ds.allowed_resources("view-table", None) + page = await ds.allowed_resources("view-table", None) matches = [ {"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)} - for t in all_tables + for t in page.resources ] # Filter for "user profile" (two terms, extract table name from "db/table") @@ -453,10 +454,10 @@ async def test_tables_endpoint_search_ordering(): await ds._refresh_schemas() # Get all tables in the new format - all_tables = await ds.allowed_resources("view-table", None) + page = await ds.allowed_resources("view-table", None) matches = [ {"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)} - for t in all_tables + for t in page.resources ] # Filter for "user" and sort by table name length @@ -490,10 +491,10 @@ async def test_tables_endpoint_search_case_insensitive(): await ds._refresh_schemas() # Get all tables in the new format - all_tables = await ds.allowed_resources("view-table", None) + page = await ds.allowed_resources("view-table", None) matches = [ {"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)} - for t in all_tables + for t in page.resources ] # Filter for "user" (lowercase) should match all case variants @@ -525,10 +526,10 @@ async def test_tables_endpoint_search_no_matches(): await ds._refresh_schemas() # Get all tables in the new format - all_tables = await ds.allowed_resources("view-table", None) + page = await ds.allowed_resources("view-table", None) matches = [ {"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)} - for t in all_tables + for t in page.resources ] # Filter for "zzz" which doesn't exist @@ -563,10 +564,10 @@ async def test_tables_endpoint_config_database_allow(): await ds._refresh_schemas() # Root user should see restricted_db tables - root_tables = await ds.allowed_resources("view-table", {"id": "root"}) + root_page = await ds.allowed_resources("view-table", {"id": "root"}) root_list = [ {"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)} - for t in root_tables + for t in root_page.resources ] restricted_tables_root = [ m for m in root_list if m["name"].startswith("restricted_db/") @@ -577,10 +578,10 @@ async def test_tables_endpoint_config_database_allow(): assert "restricted_db/posts" in table_names # Alice should NOT see restricted_db tables - alice_tables = await ds.allowed_resources("view-table", {"id": "alice"}) + alice_page = await ds.allowed_resources("view-table", {"id": "alice"}) alice_list = [ {"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)} - for t in alice_tables + for t in alice_page.resources ] restricted_tables_alice = [ m for m in alice_list if m["name"].startswith("restricted_db/") diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 8f05b050..c5f547ea 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -1327,14 +1327,14 @@ async def test_actor_restrictions_filters_allowed_resources(perms_ds): actor = {"id": "user", "_r": {"r": {"perms_ds_one": {"t1": ["vt"]}}}} # Should only return t1 - allowed_tables = await perms_ds.allowed_resources("view-table", actor) - assert len(allowed_tables) == 1 - assert allowed_tables[0].parent == "perms_ds_one" - assert allowed_tables[0].child == "t1" + page = await perms_ds.allowed_resources("view-table", actor) + assert len(page.resources) == 1 + assert page.resources[0].parent == "perms_ds_one" + assert page.resources[0].child == "t1" # Database listing should be empty (no view-database permission) - allowed_dbs = await perms_ds.allowed_resources("view-database", actor) - assert len(allowed_dbs) == 0 + db_page = await perms_ds.allowed_resources("view-database", actor) + assert len(db_page.resources) == 0 @pytest.mark.asyncio @@ -1343,12 +1343,10 @@ async def test_actor_restrictions_database_level(perms_ds): actor = {"id": "user", "_r": {"d": {"perms_ds_one": ["vt"]}}} - allowed_tables = await perms_ds.allowed_resources( - "view-table", actor, parent="perms_ds_one" - ) + page = await perms_ds.allowed_resources("view-table", actor, parent="perms_ds_one") # Should return all tables in perms_ds_one - table_names = {r.child for r in allowed_tables} + table_names = {r.child for r in page.resources} assert "t1" in table_names assert "t2" in table_names assert "v1" in table_names # views too @@ -1360,11 +1358,11 @@ async def test_actor_restrictions_global_level(perms_ds): actor = {"id": "user", "_r": {"a": ["vt"]}} - allowed_tables = await perms_ds.allowed_resources("view-table", actor) + page = await perms_ds.allowed_resources("view-table", actor) # Should return all tables in all databases - assert len(allowed_tables) > 0 - dbs = {r.parent for r in allowed_tables} + assert len(page.resources) > 0 + dbs = {r.parent for r in page.resources} assert "perms_ds_one" in dbs assert "perms_ds_two" in dbs @@ -1430,8 +1428,8 @@ async def test_actor_restrictions_view_instance_only(perms_ds): data = response.json() # The instance is visible but databases list should be empty or minimal # Actually, let's check via allowed_resources - allowed_dbs = await perms_ds.allowed_resources("view-database", actor) - assert len(allowed_dbs) == 0 + page = await perms_ds.allowed_resources("view-database", actor) + assert len(page.resources) == 0 @pytest.mark.asyncio @@ -1441,11 +1439,11 @@ async def test_actor_restrictions_empty_allowlist(perms_ds): actor = {"id": "user", "_r": {}} # No actions in allowlist, so everything should be denied - allowed_tables = await perms_ds.allowed_resources("view-table", actor) - assert len(allowed_tables) == 0 + page1 = await perms_ds.allowed_resources("view-table", actor) + assert len(page1.resources) == 0 - allowed_dbs = await perms_ds.allowed_resources("view-database", actor) - assert len(allowed_dbs) == 0 + page2 = await perms_ds.allowed_resources("view-database", actor) + assert len(page2.resources) == 0 result = await perms_ds.allowed(action="view-instance", actor=actor) assert result is False From e5f392ae7a3aad7f778e7d6be7e06b1ad0b84878 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 31 Oct 2025 15:07:37 -0700 Subject: [PATCH 009/299] datasette.allowed_resources_sql() returns namedtuple --- datasette/app.py | 10 +++++++--- docs/internals.rst | 2 +- tests/test_internals_datasette.py | 13 ++++++++++++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 7b9fb67d..5a3d59eb 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -248,6 +248,9 @@ FAVICON_PATH = app_root / "datasette" / "static" / "favicon.png" DEFAULT_NOT_SET = object() +ResourcesSQL = collections.namedtuple("ResourcesSQL", ("sql", "params")) + + async def favicon(request, send): await asgi_send_file( send, @@ -1110,7 +1113,7 @@ class Datasette: actor: dict | None = None, parent: str | None = None, include_is_private: bool = False, - ) -> tuple[str, dict]: + ) -> ResourcesSQL: """ Build SQL query to get all resources the actor can access for the given action. @@ -1120,7 +1123,7 @@ class Datasette: parent: Optional parent filter (e.g., database name) to limit results include_is_private: If True, include is_private column showing if anonymous cannot access - Returns a tuple of (query: str, params: dict) that can be executed against the internal database. + Returns a namedtuple of (query: str, params: dict) that can be executed against the internal database. The query returns rows with (parent, child, reason) columns, plus is_private if requested. Example: @@ -1138,9 +1141,10 @@ class Datasette: if not action_obj: raise ValueError(f"Unknown action: {action}") - return await build_allowed_resources_sql( + sql, params = await build_allowed_resources_sql( self, actor, action, parent=parent, include_is_private=include_is_private ) + return ResourcesSQL(sql, params) async def allowed_resources( self, diff --git a/docs/internals.rst b/docs/internals.rst index f0d3c99a..0132fddf 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -467,7 +467,7 @@ This method uses :ref:`datasette_allowed_resources_sql` under the hood and is an await .allowed_resources_sql(\*, action, actor=None, parent=None, include_is_private=False) ------------------------------------------------------------------------------------------- -Builds the SQL query that Datasette uses to determine which resources an actor may access for a specific action. Returns a ``(sql: str, params: dict)`` tuple that can be executed against the internal ``catalog_*`` database tables. ``parent`` can be used to limit results to a specific database, and ``include_is_private`` adds a column indicating whether anonymous users would be denied access to that resource. +Builds the SQL query that Datasette uses to determine which resources an actor may access for a specific action. Returns a ``(sql: str, params: dict)`` namedtuple that can be executed against the internal ``catalog_*`` database tables. ``parent`` can be used to limit results to a specific database, and ``include_is_private`` adds a column indicating whether anonymous users would be denied access to that resource. Plugins that need to execute custom analysis over the raw allow/deny rules can use this helper to run the same query that powers the ``/-/allowed`` debugging interface. diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index 60bcfe25..c64620a6 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -4,7 +4,7 @@ Tests for the datasette.app.Datasette class import dataclasses from datasette import Context -from datasette.app import Datasette, Database +from datasette.app import Datasette, Database, ResourcesSQL from datasette.resources import DatabaseResource from itsdangerous import BadSignature import pytest @@ -195,3 +195,14 @@ async def test_apply_metadata_json(): assert (await ds.client.get("/")).status_code == 200 value = (await ds.get_instance_metadata()).get("weird_instance_value") assert value == '{"nested": [1, 2, 3]}' + + +@pytest.mark.asyncio +async def test_allowed_resources_sql(datasette): + result = await datasette.allowed_resources_sql( + action="view-table", + actor=None, + ) + assert isinstance(result, ResourcesSQL) + assert "all_rules AS" in result.sql + assert result.params["action"] == "view-table" From 3184bfae54adcce05547cd8a156f358e8fbca8ab Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 31 Oct 2025 15:37:30 -0700 Subject: [PATCH 010/299] Release notes for 1.0a20, refs #2550 --- docs/changelog.rst | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 7b352ef6..a4a50add 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,60 @@ Changelog ========= +.. _v1_0_a20: + +1.0a20 (2025-10-31) +------------------- + +This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning Datasette's permission system. See also `the annotated release notes `__. + +Permission system redesign +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Previously the permission system worked using ``datasette.permission_allowed()`` checks which consulted all available plugins in turn to determine whether a given actor was allowed to perform a given action on a given resource. + +This approach could become prohibitively expensive for large lists of items - for example to determine the list of tables that a user could view in a large Datasette instance, where the plugin hooks would be called N times for N tables. + +The new system instead uses SQL queries against Datasette's internal :ref:`catalog tables ` to derive the list of resources for which an actor has permission for a given action. + +Plugins can use the new :ref:`plugin_hook_permission_resources_sql` hook to return SQL fragments which will influence the construction of that query. + +Affected plugins should make the following changes: + +- Replace calls to ``datasette.permission_allowed()`` with calls to the new :ref:`datasette.allowed() ` method. The new method takes a ``resource=`` parameter which should be an instance of a ``Resource`` subclass, as described in the method documentation. +- The ``permission_allowed()`` plugin hook has been removed in favor of the new :ref:`permission_resources_sql() ` hook. +- The ``register_permissions()`` plugni hook has been removed in favor of :ref:`register_actions() `. + +Plugins can now make use of two new internal methods to help resolve permission checks: + +- :ref:`datasette.allowed_resources() ` returns a ``PaginatedResources`` object with a ``.resources`` list of ``Resource`` instances that an actor is allowed to access for a given action (and a ``.next`` token for pagination). +- :ref:`datasette.allowed_resources_sql() ` returns the SQL and parameters that can be executed against the internal catalog tables to determine which resources an actor is allowed to access for a given action. This can be combined with further SQL to perform advanced custom filtering. + +Related changes: + +- The way ``datasette --root`` works has changed. Running Datasette with this flag now causes the root actor to pass *all* permission checks. (:issue:`2521`) + +- Permission debugging improvements: + + - The ``/-/allowed`` endpoint shows resources the user is allowed to interact with for different actions. + + - ``/-/rules`` shows the raw allow/deny rules that apply to different permission checks. + + - ``/-/actions`` lists every available action. + + - ``/-/check`` can be used to try out different permission checks for the current actor. + +Other changes +~~~~~~~~~~~~~ + +- The internal ``catalog_views`` table now tracks SQLite views alongside tables in the introspection database. (:issue:`2495`) + +- Hitting the ``/`` brings up a search interface for navigating to tables that the current user can view. A new ``/-/tables`` endpoint supports this functionality. (:issue:`2523`) + +- Datasette attempts to detect some configuration errors on startup. + +- Datasette now supports Python 3.14 and no longer tests against Python 3.9. + .. _v1_0_a19: 1.0a19 (2025-04-21) From 223dcc7c0ef3aed43348cc3b3ee8866bb5e2730c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 31 Oct 2025 16:11:53 -0700 Subject: [PATCH 011/299] Remove unused link --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index a4a50add..a689e4bb 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,7 +9,7 @@ Changelog 1.0a20 (2025-10-31) ------------------- -This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning Datasette's permission system. See also `the annotated release notes `__. +This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning Datasette's permission system. Permission system redesign ~~~~~~~~~~~~~~~~~~~~~~~~~~ From 48982a0ff5d86e1a5c0f8313e82537dac92ccda0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 31 Oct 2025 16:12:54 -0700 Subject: [PATCH 012/299] Mark 1.0a20 unreleased Refs #2550 --- docs/changelog.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index a689e4bb..db43634a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -6,8 +6,8 @@ Changelog .. _v1_0_a20: -1.0a20 (2025-10-31) -------------------- +UNRELEASED 1.0a20 (2025-??-??) +------------------------------ This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning Datasette's permission system. From 47e40604694058b085224b457d85761a58f014b2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 31 Oct 2025 16:34:11 -0700 Subject: [PATCH 013/299] Enable MyST Markdown docs, port events.rst, refs #2565 --- Justfile | 2 +- docs/conf.py | 12 +++++++++++- docs/{events.rst => events.md} | 10 +++++----- pyproject.toml | 2 ++ 4 files changed, 19 insertions(+), 7 deletions(-) rename docs/{events.rst => events.md} (74%) diff --git a/Justfile b/Justfile index 8e4d6066..a9cdd94a 100644 --- a/Justfile +++ b/Justfile @@ -29,7 +29,7 @@ export DATASETTE_SECRET := "not_a_secret" # Serve live docs on localhost:8000 @docs: cog blacken-docs - cd docs && uv run make livehtml + uv sync --extra docs && cd docs && uv run make livehtml # Apply Black @black: diff --git a/docs/conf.py b/docs/conf.py index e13882b2..0879eeb9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -36,12 +36,19 @@ extensions = [ "sphinx.ext.extlinks", "sphinx.ext.autodoc", "sphinx_copybutton", + "myst_parser", + "sphinx_markdown_builder", ] if not os.environ.get("DISABLE_SPHINX_INLINE_TABS"): extensions += ["sphinx_inline_tabs"] autodoc_member_order = "bysource" +myst_enable_extensions = ["colon_fence"] + +markdown_http_base = "https://docs.datasette.io/en/stable" +markdown_uri_doc_suffix = ".html" + extlinks = { "issue": ("https://github.com/simonw/datasette/issues/%s", "#%s"), } @@ -53,7 +60,10 @@ templates_path = ["_templates"] # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = ".rst" +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} # The master toctree document. master_doc = "index" diff --git a/docs/events.rst b/docs/events.md similarity index 74% rename from docs/events.rst rename to docs/events.md index b86c8025..399317e9 100644 --- a/docs/events.rst +++ b/docs/events.md @@ -1,14 +1,14 @@ -.. _events: - -Events -====== +(events)= +# Events Datasette includes a mechanism for tracking events that occur while the software is running. This is primarily intended to be used by plugins, which can both trigger events and listen for events. The core Datasette application triggers events when certain things happen. This page describes those events. -Plugins can listen for events using the :ref:`plugin_hook_track_event` plugin hook, which will be called with instances of the following classes - or additional classes :ref:`registered by other plugins `. +Plugins can listen for events using the {ref}`plugin_hook_track_event` plugin hook, which will be called with instances of the following classes - or additional classes {ref}`registered by other plugins `. +```{eval-rst} .. automodule:: datasette.events :members: :exclude-members: Event +``` diff --git a/pyproject.toml b/pyproject.toml index 1536c09b..fb9f0453 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,8 @@ docs = [ "blacken-docs", "sphinx-copybutton", "sphinx-inline-tabs", + "myst-parser", + "sphinx-markdown-builder", "ruamel.yaml", ] test = [ From 1f8995e7768ae033cda4009a6339025ced02c25f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 31 Oct 2025 19:13:41 -0700 Subject: [PATCH 014/299] upgrade-1.0a20.md, refs #2564 And another Markdown conversion, refs #2565 --- Justfile | 4 + docs/upgrade-1.0a20.md | 105 +++++++++++++++ docs/{upgrade_guide.rst => upgrade_guide.md} | 130 +++++++++---------- 3 files changed, 167 insertions(+), 72 deletions(-) create mode 100644 docs/upgrade-1.0a20.md rename docs/{upgrade_guide.rst => upgrade_guide.md} (52%) diff --git a/Justfile b/Justfile index a9cdd94a..adb8cf0d 100644 --- a/Justfile +++ b/Justfile @@ -31,6 +31,10 @@ export DATASETTE_SECRET := "not_a_secret" @docs: cog blacken-docs uv sync --extra docs && cd docs && uv run make livehtml +# Build docs as static HTML +@docs-build: cog blacken-docs + rm -rf docs/_build && cd docs && uv run make html + # Apply Black @black: uv run black . diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md new file mode 100644 index 00000000..fcb77062 --- /dev/null +++ b/docs/upgrade-1.0a20.md @@ -0,0 +1,105 @@ +--- +orphan: true +--- + +# Datasette 1.0a20 plugin upgrade guide + + + +Datasette 1.0a20 makes some breaking changes to Datasette's permission system. Plugins need to be updated if they use any of the following: + +- The `register_permissions()` plugin hook - this should be replaced with `register_actions` +- The `permission_allowed()` plugin hook - this should be upgraded to `permission_resources_sql()`. +- The `datasette.permission_allowed()` internal method - this should be replaced with `datasette.allowed()` +- Logic that grants access to the `"root"` actor can be removed. + +## Permissions are now actions + +The `register_permissions()` hook shoud be replaced with `register_actions()`. + +Old code: + +```python +@hookimpl +def register_permissions(datasette): + return [ + Permission( + name="datasette-pins-write", + abbr=None, + description="Can pin, unpin, and re-order pins for datasette-pins", + takes_database=False, + takes_resource=False, + default=False, + ), + Permission( + name="datasette-pins-read", + abbr=None, + description="Can read pinned items.", + takes_database=False, + takes_resource=False, + default=False, + ), + ] +``` +The new `Action` does not have a `default=` parameter, and `takes_database` and `takes_resource` have been renamed to `takes_parent` and `takes_child. The new code would look like this: + +```python +from datasette.permissions import Action + +@hookimpl +def register_actions(datasette): + return [ + Action( + name="datasette-pins-write", + abbr=None, + description="Can pin, unpin, and re-order pins for datasette-pins", + takes_parent=False, + takes_child=False, + default=False, + ), + Action( + name="datasette-pins-read", + abbr=None, + description="Can read pinned items.", + takes_parent=False, + takes_child=False, + default=False, + ), + ] +``` + +## permission_allowed() hook is replaced by permission_resources_sql() + +The following old code: +```python +@hookimpl +def permission_allowed(action): + if action == "permissions-debug": + return True +``` +Can be replaced by: +```python +from datasette.permissions import PermissionSQL + +@hookimpl +def permission_resources_sql(action): + return PermissionSQL.allow(reason="datasette-allow-permissions-debug") +``` +A `.deny(reason="")` class method is also available. + +For more complex permission checks consult the documentation for that plugin hook: + + +## Fixing async with httpx.AsyncClient(app=app) + +Some older plugins may use the following pattern in their tests, which is no longer supported: +```python +app = Datasette([], memory=True).app() +async with httpx.AsyncClient(app=app) as client: + response = await client.get("http://localhost/path") +``` +The new pattern is to use `ds.client` like this: +```python +ds = Datasette([], memory=True) +response = ds.client.get("/path") +``` diff --git a/docs/upgrade_guide.rst b/docs/upgrade_guide.md similarity index 52% rename from docs/upgrade_guide.rst rename to docs/upgrade_guide.md index f983fb2d..105d7281 100644 --- a/docs/upgrade_guide.rst +++ b/docs/upgrade_guide.md @@ -1,90 +1,76 @@ -.. _upgrade_guide: +(upgrade_guide)= +# Upgrade guide -=============== - Upgrade guide -=============== - -.. _upgrade_guide_v1: - -Datasette 0.X -> 1.0 -==================== +(upgrade_guide_v1)= +## Datasette 0.X -> 1.0 This section reviews breaking changes Datasette ``1.0`` has when upgrading from a ``0.XX`` version. For new features that ``1.0`` offers, see the :ref:`changelog`. -.. _upgrade_guide_v1_sql_queries: - -New URL for SQL queries ------------------------ +(upgrade_guide_v1_sql_queries)= +### New URL for SQL queries Prior to ``1.0a14`` the URL for executing a SQL query looked like this: -:: - - /databasename?sql=select+1 - # Or for JSON: - /databasename.json?sql=select+1 +```text +/databasename?sql=select+1 +# Or for JSON: +/databasename.json?sql=select+1 +``` This endpoint served two purposes: without a ``?sql=`` it would list the tables in the database, but with that option it would return results of a query instead. -The URL for executing a SQL query now looks like this:: +The URL for executing a SQL query now looks like this: - /databasename/-/query?sql=select+1 - # Or for JSON: - /databasename/-/query.json?sql=select+1 +```text +/databasename/-/query?sql=select+1 +# Or for JSON: +/databasename/-/query.json?sql=select+1 +``` **This isn't a breaking change.** API calls to the older ``/databasename?sql=...`` endpoint will redirect to the new ``databasename/-/query?sql=...`` endpoint. Upgrading to the new URL is recommended to avoid the overhead of the additional redirect. -.. _upgrade_guide_v1_metadata: +(upgrade_guide_v1_metadata)= +### Metadata changes -Metadata changes ----------------- +Metadata was completely revamped for Datasette 1.0. There are a number of related breaking changes, from the ``metadata.yaml`` file to Python APIs, that you'll need to consider when upgrading. -Metadata was completely revamped for Datasette 1.0. There are a number of related breaking changes, from the ``metadata.yaml`` file to Python APIs, that you'll need to consider when upgrading. +(upgrade_guide_v1_metadata_split)= +#### ``metadata.yaml`` split into ``datasette.yaml`` -.. _upgrade_guide_v1_metadata_split: - -``metadata.yaml`` split into ``datasette.yaml`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Before Datasette 1.0, the ``metadata.yaml`` file became a kitchen sink if a mix of metadata, configuration, and settings. Now ``metadata.yaml`` is strictly for metaata (ex title and descriptions of database and tables, licensing info, etc). Other settings have been moved to a ``datasette.yml`` configuration file, described in :ref:`configuration`. +Before Datasette 1.0, the ``metadata.yaml`` file became a kitchen sink if a mix of metadata, configuration, and settings. Now ``metadata.yaml`` is strictly for metadata (ex title and descriptions of database and tables, licensing info, etc). Other settings have been moved to a ``datasette.yml`` configuration file, described in :ref:`configuration`. To start Datasette with both metadata and configuration files, run it like this: -.. code-block:: bash +```bash +datasette --metadata metadata.yaml --config datasette.yaml +# Or the shortened version: +datasette -m metadata.yml -c datasette.yml +``` - datasette --metadata metadata.yaml --config datasette.yaml - # Or the shortened version: - datasette -m metadata.yml -c datasette.yml +(upgrade_guide_v1_metadata_upgrade)= +#### Upgrading an existing ``metadata.yaml`` file -.. _upgrade_guide_v1_metadata_upgrade: +The [datasette-upgrade plugin](https://github.com/datasette/datasette-upgrade) can be used to split a Datasette 0.x.x ``metadata.yaml`` (or ``.json``) file into separate ``metadata.yaml`` and ``datasette.yaml`` files. First, install the plugin: -Upgrading an existing ``metadata.yaml`` file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The `datasette-upgrade plugin `__ can be used to split a Datasette 0.x.x ``metadata.yaml`` (or ``.json``) file into separate ``metadata.yaml`` and ``datasette.yaml`` files. First, install the plugin: - -.. code-block:: bash - - datasette install datasette-upgrade +```bash +datasette install datasette-upgrade +``` Then run it like this to produce the two new files: -.. code-block:: bash +```bash +datasette upgrade metadata-to-config metadata.json -m metadata.yml -c datasette.yml +``` - datasette upgrade metadata-to-config metadata.json -m metadata.yml -c datasette.yml - -Metadata "fallback" has been removed -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +#### Metadata "fallback" has been removed Certain keys in metadata like ``license`` used to "fallback" up the chain of ownership. For example, if you set an ``MIT`` to a database and a table within that database did not have a specified license, then that table would inherit an ``MIT`` license. This behavior has been removed in Datasette 1.0. Now license fields must be placed on all items, including individual databases and tables. -.. _upgrade_guide_v1_metadata_removed: - -The ``get_metadata()`` plugin hook has been removed -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +(upgrade_guide_v1_metadata_removed)= +#### The ``get_metadata()`` plugin hook has been removed In Datasette ``0.x`` plugins could implement a ``get_metadata()`` plugin hook to customize how metadata was retrieved for different instances, databases and tables. @@ -92,33 +78,29 @@ This hook could be inefficient, since some pages might load metadata for many di As of Datasette ``1.0a14`` (2024-08-05), the ``get_metadata()`` hook has been deprecated: -.. code-block:: python - - # ❌ DEPRECATED in Datasette 1.0 - @hookimpl - def get_metadata(datasette, key, database, table): - pass +```python +# ❌ DEPRECATED in Datasette 1.0 +@hookimpl +def get_metadata(datasette, key, database, table): + pass +``` Instead, plugins are encouraged to interact directly with Datasette's in-memory metadata tables in SQLite using the following methods on the :ref:`internals_datasette`: -- :ref:`get_instance_metadata() ` and :ref:`set_instance_metadata() ` -- :ref:`get_database_metadata() ` and :ref:`set_database_metadata() ` -- :ref:`get_resource_metadata() ` and :ref:`set_resource_metadata() ` -- :ref:`get_column_metadata() ` and :ref:`set_column_metadata() ` +- :ref:`get_instance_metadata() ` and :ref:`set_instance_metadata() ` +- :ref:`get_database_metadata() ` and :ref:`set_database_metadata() ` +- :ref:`get_resource_metadata() ` and :ref:`set_resource_metadata() ` +- :ref:`get_column_metadata() ` and :ref:`set_column_metadata() ` A plugin that stores or calculates its own metadata can implement the :ref:`plugin_hook_startup` hook to populate those items on startup, and then call those methods while it is running to persist any new metadata changes. -.. _upgrade_guide_v1_metadata_json_removed: - -The ``/metadata.json`` endpoint has been removed -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +(upgrade_guide_v1_metadata_json_removed)= +#### The ``/metadata.json`` endpoint has been removed As of Datasette ``1.0a14``, the root level ``/metadata.json`` endpoint has been removed. Metadata for tables will become available through currently in-development extras in a future alpha. -.. _upgrade_guide_v1_metadata_method_removed: - -The ``metadata()`` method on the Datasette class has been removed -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +(upgrade_guide_v1_metadata_method_removed)= +#### The ``metadata()`` method on the Datasette class has been removed As of Datasette ``1.0a14``, the ``.metadata()`` method on the Datasette Python API has been removed. @@ -128,3 +110,7 @@ Instead, one should use the following methods on a Datasette class: - :ref:`get_database_metadata() ` - :ref:`get_resource_metadata() ` - :ref:`get_column_metadata() ` + +```{include} upgrade-1.0a20.md +:heading-offset: 1 +``` From 5705ce0d95ebcfc2a21b305b9f0f28744363f40f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 11:35:08 -0700 Subject: [PATCH 015/299] Move takes_child/takes_parent information from Action to Resource (#2567) Simplified Action by moving takes_child/takes_parent logic to Resource - Removed InstanceResource - global actions are now simply those with resource_class=None - Resource.parent_class - Replaced parent_name: str with parent_class: type[Resource] | None for direct class references - Simplified Action dataclass - No more redundant fields, everything is derived from the Resource class structure - Validation - The __init_subclass__ method now checks parent_class.parent_class to enforce the 2-level hierarchy Closes #2563 --- datasette/app.py | 24 ++-- datasette/default_actions.py | 84 ++++-------- datasette/permissions.py | 52 ++++++- datasette/resources.py | 20 +-- datasette/views/special.py | 12 +- docs/plugin_hooks.rst | 22 +-- docs/upgrade-1.0a20.md | 34 ++++- tests/conftest.py | 4 + tests/plugins/my_plugin.py | 96 +++++++------ tests/test_plugins.py | 257 ++++++++++++++++++++++++++++++++--- 10 files changed, 418 insertions(+), 187 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 5a3d59eb..09936b3a 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1308,7 +1308,7 @@ class Datasette: Uses SQL to check permission for a single resource without fetching all resources. This is efficient - it does NOT call allowed_resources() and check membership. - If resource is not provided, defaults to InstanceResource() for instance-level actions. + For global actions, resource should be None (or omitted). Example: from datasette.resources import TableResource @@ -1318,14 +1318,12 @@ class Datasette: actor=actor ) - # For instance-level actions, resource can be omitted: + # For global actions, resource can be omitted: can_debug = await datasette.allowed(action="permissions-debug", actor=actor) """ from datasette.utils.actions_sql import check_permission_for_resource - from datasette.resources import InstanceResource - if resource is None: - resource = InstanceResource() + # For global actions, resource remains None # Check if this action has also_requires - if so, check that action first action_obj = self.actions.get(action) @@ -1338,12 +1336,16 @@ class Datasette: ): return False + # For global actions, resource is None + parent = resource.parent if resource else None + child = resource.child if resource else None + result = await check_permission_for_resource( datasette=self, actor=actor, action=action, - parent=resource.parent, - child=resource.child, + parent=parent, + child=child, ) # Log the permission check for debugging @@ -1352,8 +1354,8 @@ class Datasette: when=datetime.datetime.now(datetime.timezone.utc).isoformat(), actor=actor, action=action, - parent=resource.parent, - child=resource.child, + parent=parent, + child=child, result=result, ) ) @@ -1607,7 +1609,9 @@ class Datasette: "description": action.description, "takes_parent": action.takes_parent, "takes_child": action.takes_child, - "resource_class": action.resource_class.__name__, + "resource_class": ( + action.resource_class.__name__ if action.resource_class else None + ), "also_requires": action.also_requires, } for action in sorted(self.actions.values(), key=lambda a: a.name) diff --git a/datasette/default_actions.py b/datasette/default_actions.py index e06e906b..87d98fac 100644 --- a/datasette/default_actions.py +++ b/datasette/default_actions.py @@ -1,7 +1,6 @@ from datasette import hookimpl from datasette.permissions import Action from datasette.resources import ( - InstanceResource, DatabaseResource, TableResource, QueryResource, @@ -12,122 +11,91 @@ from datasette.resources import ( def register_actions(): """Register the core Datasette actions.""" return ( - # View actions + # Global actions (no resource_class) Action( name="view-instance", abbr="vi", description="View Datasette instance", - takes_parent=False, - takes_child=False, - resource_class=InstanceResource, ), + Action( + name="permissions-debug", + abbr="pd", + description="Access permission debug tool", + ), + Action( + name="debug-menu", + abbr="dm", + description="View debug menu items", + ), + # Database-level actions (parent-level) Action( name="view-database", abbr="vd", description="View database", - takes_parent=True, - takes_child=False, resource_class=DatabaseResource, ), Action( name="view-database-download", abbr="vdd", description="Download database file", - takes_parent=True, - takes_child=False, resource_class=DatabaseResource, also_requires="view-database", ), - Action( - name="view-table", - abbr="vt", - description="View table", - takes_parent=True, - takes_child=True, - resource_class=TableResource, - ), - Action( - name="view-query", - abbr="vq", - description="View named query results", - takes_parent=True, - takes_child=True, - resource_class=QueryResource, - ), Action( name="execute-sql", abbr="es", description="Execute read-only SQL queries", - takes_parent=True, - takes_child=False, resource_class=DatabaseResource, also_requires="view-database", ), - # Debug actions Action( - name="permissions-debug", - abbr="pd", - description="Access permission debug tool", - takes_parent=False, - takes_child=False, - resource_class=InstanceResource, + name="create-table", + abbr="ct", + description="Create tables", + resource_class=DatabaseResource, ), + # Table-level actions (child-level) Action( - name="debug-menu", - abbr="dm", - description="View debug menu items", - takes_parent=False, - takes_child=False, - resource_class=InstanceResource, + name="view-table", + abbr="vt", + description="View table", + resource_class=TableResource, ), - # Write actions on tables Action( name="insert-row", abbr="ir", description="Insert rows", - takes_parent=True, - takes_child=True, resource_class=TableResource, ), Action( name="delete-row", abbr="dr", description="Delete rows", - takes_parent=True, - takes_child=True, resource_class=TableResource, ), Action( name="update-row", abbr="ur", description="Update rows", - takes_parent=True, - takes_child=True, resource_class=TableResource, ), Action( name="alter-table", abbr="at", description="Alter tables", - takes_parent=True, - takes_child=True, resource_class=TableResource, ), Action( name="drop-table", abbr="dt", description="Drop tables", - takes_parent=True, - takes_child=True, resource_class=TableResource, ), - # Schema actions on databases + # Query-level actions (child-level) Action( - name="create-table", - abbr="ct", - description="Create tables", - takes_parent=True, - takes_child=False, - resource_class=DatabaseResource, + name="view-query", + abbr="vq", + description="View named query results", + resource_class=QueryResource, ), ) diff --git a/datasette/permissions.py b/datasette/permissions.py index 0943eced..8e0d0fc1 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -14,7 +14,7 @@ class Resource(ABC): # Class-level metadata (subclasses must define these) name: str = None # e.g., "table", "database", "model" - parent_name: str | None = None # e.g., "database" for tables + parent_class: type["Resource"] | None = None # e.g., DatabaseResource for tables # Instance-level optional extra attributes reasons: list[str] | None = None @@ -54,6 +54,29 @@ class Resource(ABC): def private(self, value: bool): self._private = value + @classmethod + def __init_subclass__(cls): + """ + Validate resource hierarchy doesn't exceed 2 levels. + + Raises: + ValueError: If this resource would create a 3-level hierarchy + """ + super().__init_subclass__() + + if cls.parent_class is None: + return # Top of hierarchy, nothing to validate + + # Check if our parent has a parent - that would create 3 levels + if cls.parent_class.parent_class is not None: + # We have a parent, and that parent has a parent + # This creates a 3-level hierarchy, which is not allowed + raise ValueError( + f"Resource {cls.__name__} creates a 3-level hierarchy: " + f"{cls.parent_class.parent_class.__name__} -> {cls.parent_class.__name__} -> {cls.__name__}. " + f"Maximum 2 levels allowed (parent -> child)." + ) + @classmethod @abstractmethod def resources_sql(cls) -> str: @@ -77,11 +100,32 @@ class Action: name: str abbr: str | None description: str | None - takes_parent: bool - takes_child: bool - resource_class: type[Resource] + resource_class: type[Resource] | None = None also_requires: str | None = None # Optional action name that must also be allowed + @property + def takes_parent(self) -> bool: + """ + Whether this action requires a parent identifier when instantiating its resource. + + Returns False for global-only actions (no resource_class). + Returns True for all actions with a resource_class (all resources require a parent identifier). + """ + return self.resource_class is not None + + @property + def takes_child(self) -> bool: + """ + Whether this action requires a child identifier when instantiating its resource. + + Returns False for global actions (no resource_class). + Returns False for parent-level resources (DatabaseResource - parent_class is None). + Returns True for child-level resources (TableResource, QueryResource - have a parent_class). + """ + if self.resource_class is None: + return False + return self.resource_class.parent_class is not None + _reason_id = 1 diff --git a/datasette/resources.py b/datasette/resources.py index 847f1686..641afb2f 100644 --- a/datasette/resources.py +++ b/datasette/resources.py @@ -3,25 +3,11 @@ from datasette.permissions import Resource -class InstanceResource(Resource): - """The Datasette instance itself.""" - - name = "instance" - parent_name = None - - def __init__(self): - super().__init__(parent=None, child=None) - - @classmethod - async def resources_sql(cls, datasette) -> str: - return "SELECT NULL AS parent, NULL AS child" - - class DatabaseResource(Resource): """A database in Datasette.""" name = "database" - parent_name = "instance" + parent_class = None # Top of the resource hierarchy def __init__(self, database: str): super().__init__(parent=database, child=None) @@ -38,7 +24,7 @@ class TableResource(Resource): """A table in a database.""" name = "table" - parent_name = "database" + parent_class = DatabaseResource def __init__(self, database: str, table: str): super().__init__(parent=database, child=table) @@ -58,7 +44,7 @@ class QueryResource(Resource): """A canned query in a database.""" name = "query" - parent_name = "database" + parent_class = DatabaseResource def __init__(self, database: str, query: str): super().__init__(parent=database, child=query) diff --git a/datasette/views/special.py b/datasette/views/special.py index 8de83fae..5a341911 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -1,7 +1,7 @@ import json import logging from datasette.events import LogoutEvent, LoginEvent, CreateTokenEvent -from datasette.resources import DatabaseResource, TableResource, InstanceResource +from datasette.resources import DatabaseResource, TableResource from datasette.utils.asgi import Response, Forbidden from datasette.utils import ( actor_matches_allow, @@ -491,12 +491,18 @@ async def _check_permission_for_actor(ds, action, parent, child, actor): if not action_obj: return {"error": f"Unknown action: {action}"}, 400 - if action_obj.takes_parent and action_obj.takes_child: + # Global actions (no resource_class) don't have a resource + if action_obj.resource_class is None: + resource_obj = None + elif action_obj.takes_parent and action_obj.takes_child: + # Child-level resource (e.g., TableResource, QueryResource) resource_obj = action_obj.resource_class(database=parent, table=child) elif action_obj.takes_parent: + # Parent-level resource (e.g., DatabaseResource) resource_obj = action_obj.resource_class(database=parent) else: - resource_obj = action_obj.resource_class() + # This shouldn't happen given validation in Action.__post_init__ + return {"error": f"Invalid action configuration: {action}"}, 500 allowed = await ds.allowed(action=action, resource=resource_obj, actor=actor) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 0dc4bd6e..859b0c84 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -883,24 +883,18 @@ Actions define what operations can be performed on resources (like viewing a tab name="list-documents", abbr="ld", description="List documents in a collection", - takes_parent=True, - takes_child=False, resource_class=DocumentCollectionResource, ), Action( name="view-document", abbr="vdoc", description="View document", - takes_parent=True, - takes_child=True, resource_class=DocumentResource, ), Action( name="edit-document", abbr="edoc", description="Edit document", - takes_parent=True, - takes_child=True, resource_class=DocumentResource, ), ] @@ -916,26 +910,20 @@ The fields of the ``Action`` dataclass are as follows: ``description`` - string or None A human-readable description of what the action allows you to do. -``takes_parent`` - boolean - ``True`` if this action requires a parent identifier (like a database name). - -``takes_child`` - boolean - ``True`` if this action requires a child identifier (like a table or document name). - -``resource_class`` - type[Resource] - The Resource subclass that defines what kind of resource this action applies to. Your Resource subclass must: +``resource_class`` - type[Resource] or None + The Resource subclass that defines what kind of resource this action applies to. Omit this (or set to ``None``) for global actions that apply only at the instance level with no associated resources (like ``debug-menu`` or ``permissions-debug``). Your Resource subclass must: - Define a ``name`` class attribute (e.g., ``"document"``) - - Optionally define a ``parent_name`` class attribute (e.g., ``"collection"``) + - Define a ``parent_class`` class attribute (``None`` for top-level resources like databases, or the parent ``Resource`` subclass for child resources) - Implement a ``resources_sql()`` classmethod that returns SQL returning all resources as ``(parent, child)`` columns - Have an ``__init__`` method that accepts appropriate parameters and calls ``super().__init__(parent=..., child=...)`` The ``resources_sql()`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``resources_sql()`` classmethod is crucial to Datasette's permission system. It returns a SQL query that lists all resources of that type that exist in the system. +The ``resources_sql()`` classmethod returns a SQL query that lists all resources of that type that exist in the system. -This SQL query is used by Datasette to efficiently check permissions across multiple resources at once. When a user requests a list of resources (like tables, documents, or other entities), Datasette uses this SQL to: +This query is used by Datasette to efficiently check permissions across multiple resources at once. When a user requests a list of resources (like tables, documents, or other entities), Datasette uses this SQL to: 1. Get all resources of this type from your data catalog 2. Combine it with permission rules from the ``permission_resources_sql`` hook diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index fcb77062..ec2b9a5a 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -41,7 +41,7 @@ def register_permissions(datasette): ), ] ``` -The new `Action` does not have a `default=` parameter, and `takes_database` and `takes_resource` have been renamed to `takes_parent` and `takes_child. The new code would look like this: +The new `Action` does not have a `default=` parameter. For global actions (those that don't apply to specific resources), omit `resource_class`: ```python from datasette.permissions import Action @@ -53,21 +53,41 @@ def register_actions(datasette): name="datasette-pins-write", abbr=None, description="Can pin, unpin, and re-order pins for datasette-pins", - takes_parent=False, - takes_child=False, - default=False, ), Action( name="datasette-pins-read", abbr=None, description="Can read pinned items.", - takes_parent=False, - takes_child=False, - default=False, ), ] ``` +For actions that apply to specific resources (like databases or tables), specify the `resource_class` instead of `takes_parent` and `takes_child`: + +```python +from datasette.permissions import Action +from datasette.resources import DatabaseResource, TableResource + +@hookimpl +def register_actions(datasette): + return [ + Action( + name="execute-sql", + abbr="es", + description="Execute SQL queries", + resource_class=DatabaseResource, # Parent-level resource + ), + Action( + name="insert-row", + abbr="ir", + description="Insert rows", + resource_class=TableResource, # Child-level resource + ), + ] +``` + +The hierarchy information (whether an action takes parent/child parameters) is now derived from the `Resource` class hierarchy. `Action` has `takes_parent` and `takes_child` properties that are computed based on the `resource_class` and its `parent_class` attribute. + ## permission_allowed() hook is replaced by permission_resources_sql() The following old code: diff --git a/tests/conftest.py b/tests/conftest.py index 4797ab71..4a8ef51d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,10 @@ UNDOCUMENTED_PERMISSIONS = { "this_is_allowed_async", "this_is_denied_async", "no_match", + # Test actions from test_hook_register_actions_with_custom_resources + "manage_documents", + "view_document_collection", + "view_document", } _ds_client = None diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 2cdd75b0..1435ce28 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -3,7 +3,7 @@ from datasette import hookimpl from datasette.facets import Facet from datasette import tracer from datasette.permissions import Action -from datasette.resources import DatabaseResource, InstanceResource +from datasette.resources import DatabaseResource from datasette.utils import path_with_added_args from datasette.utils.asgi import asgi_send_json, Response import base64 @@ -461,94 +461,90 @@ def register_actions(datasette): name="action-from-plugin", abbr="ap", description="New action added by a plugin", - takes_parent=True, - takes_child=False, resource_class=DatabaseResource, ), Action( name="view-collection", abbr="vc", description="View a collection", - takes_parent=True, - takes_child=False, resource_class=DatabaseResource, ), - # Test actions for test_hook_permission_allowed + # Test actions for test_hook_permission_allowed (global actions - no resource_class) Action( name="this_is_allowed", abbr=None, description=None, - takes_parent=False, - takes_child=False, - resource_class=InstanceResource, ), Action( name="this_is_denied", abbr=None, description=None, - takes_parent=False, - takes_child=False, - resource_class=InstanceResource, ), Action( name="this_is_allowed_async", abbr=None, description=None, - takes_parent=False, - takes_child=False, - resource_class=InstanceResource, ), Action( name="this_is_denied_async", abbr=None, description=None, - takes_parent=False, - takes_child=False, - resource_class=InstanceResource, ), ] # Support old-style config for backwards compatibility if extras_old: for p in extras_old["permissions"]: - # Map old takes_database/takes_resource to new takes_parent/takes_child - actions.append( - Action( - name=p["name"], - abbr=p["abbr"], - description=p["description"], - takes_parent=p.get("takes_database", False), - takes_child=p.get("takes_resource", False), - resource_class=( - DatabaseResource - if p.get("takes_database") - else InstanceResource - ), + # Map old takes_database/takes_resource to new global/resource_class + if p.get("takes_database"): + # Has database -> DatabaseResource + actions.append( + Action( + name=p["name"], + abbr=p["abbr"], + description=p["description"], + resource_class=DatabaseResource, + ) + ) + else: + # No database -> global action (no resource_class) + actions.append( + Action( + name=p["name"], + abbr=p["abbr"], + description=p["description"], + ) ) - ) # Support new-style config if extras_new: for a in extras_new["actions"]: - # Map string resource_class to actual class - resource_class_map = { - "InstanceResource": InstanceResource, - "DatabaseResource": DatabaseResource, - } - resource_class = resource_class_map.get( - a.get("resource_class", "InstanceResource"), InstanceResource - ) - - actions.append( - Action( - name=a["name"], - abbr=a["abbr"], - description=a["description"], - takes_parent=a.get("takes_parent", False), - takes_child=a.get("takes_child", False), - resource_class=resource_class, + # Check if this is a global action (no resource_class specified) + if not a.get("resource_class"): + actions.append( + Action( + name=a["name"], + abbr=a["abbr"], + description=a["description"], + ) + ) + else: + # Map string resource_class to actual class + resource_class_map = { + "DatabaseResource": DatabaseResource, + } + resource_class = resource_class_map.get( + a.get("resource_class", "DatabaseResource"), DatabaseResource + ) + + actions.append( + Action( + name=a["name"], + abbr=a["abbr"], + description=a["description"], + resource_class=resource_class, + ) ) - ) return actions diff --git a/tests/test_plugins.py b/tests/test_plugins.py index f1731b40..1c601b27 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -11,7 +11,8 @@ from datasette.app import Datasette from datasette import cli, hookimpl from datasette.filters import FilterArguments from datasette.plugins import get_plugins, DEFAULT_PLUGINS, pm -from datasette.permissions import PermissionSQL +from datasette.permissions import PermissionSQL, Action +from datasette.resources import DatabaseResource from datasette.utils.sqlite import sqlite3 from datasette.utils import StartupError, await_me_maybe from jinja2 import ChoiceLoader, FileSystemLoader @@ -1184,9 +1185,6 @@ async def test_hook_register_actions(extra_metadata): "name": "extra-from-metadata", "abbr": "efm", "description": "Extra from metadata", - "takes_parent": False, - "takes_child": False, - "resource_class": "InstanceResource", } ] } @@ -1202,8 +1200,6 @@ async def test_hook_register_actions(extra_metadata): name="action-from-plugin", abbr="ap", description="New action added by a plugin", - takes_parent=True, - takes_child=False, resource_class=DatabaseResource, ) if extra_metadata: @@ -1211,9 +1207,6 @@ async def test_hook_register_actions(extra_metadata): name="extra-from-metadata", abbr="efm", description="Extra from metadata", - takes_parent=False, - takes_child=False, - resource_class=InstanceResource, ) else: assert "extra-from-metadata" not in ds.actions @@ -1237,17 +1230,11 @@ async def test_hook_register_actions_no_duplicates(duplicate): "name": name1, "abbr": abbr1, "description": None, - "takes_parent": False, - "takes_child": False, - "resource_class": "InstanceResource", }, { "name": name2, "abbr": abbr2, "description": None, - "takes_parent": False, - "takes_child": False, - "resource_class": "InstanceResource", }, ] } @@ -1272,17 +1259,11 @@ async def test_hook_register_actions_allows_identical_duplicates(): "name": "name1", "abbr": "abbr1", "description": None, - "takes_parent": False, - "takes_child": False, - "resource_class": "InstanceResource", }, { "name": "name1", "abbr": "abbr1", "description": None, - "takes_parent": False, - "takes_child": False, - "resource_class": "InstanceResource", }, ] } @@ -1556,6 +1537,240 @@ async def test_hook_register_actions(): assert action.description == "View a collection" +@pytest.mark.asyncio +async def test_hook_register_actions_with_custom_resources(): + """ + Test registering actions with custom Resource classes: + - A global action (no resource) + - A parent-level action (DocumentCollectionResource) + - A child-level action (DocumentResource) + """ + from datasette.permissions import Resource, Action + + # Define custom Resource classes + class DocumentCollectionResource(Resource): + """A collection of documents.""" + + name = "document_collection" + parent_class = None # Top-level resource + + def __init__(self, collection: str): + super().__init__(parent=collection, child=None) + + @classmethod + async def resources_sql(cls, datasette) -> str: + return """ + SELECT 'collection1' AS parent, NULL AS child + UNION ALL + SELECT 'collection2' AS parent, NULL AS child + """ + + class DocumentResource(Resource): + """A document in a collection.""" + + name = "document" + parent_class = DocumentCollectionResource # Child of DocumentCollectionResource + + def __init__(self, collection: str, document: str): + super().__init__(parent=collection, child=document) + + @classmethod + async def resources_sql(cls, datasette) -> str: + return """ + SELECT 'collection1' AS parent, 'doc1' AS child + UNION ALL + SELECT 'collection1' AS parent, 'doc2' AS child + UNION ALL + SELECT 'collection2' AS parent, 'doc3' AS child + """ + + # Define a test plugin that registers these actions + class TestPlugin: + __name__ = "test_custom_resources_plugin" + + @hookimpl + def register_actions(self, datasette): + return [ + # Global action - no resource_class + Action( + name="manage-documents", + abbr="md", + description="Manage the document system", + ), + # Parent-level action - collection only + Action( + name="view-document-collection", + abbr="vdc", + description="View a document collection", + resource_class=DocumentCollectionResource, + ), + # Child-level action - collection + document + Action( + name="view-document", + abbr="vdoc", + description="View a document", + resource_class=DocumentResource, + ), + ] + + @hookimpl + def permission_resources_sql(self, datasette, actor, action): + from datasette.permissions import PermissionSQL + + # Grant user2 access to manage-documents globally + if actor and actor.get("id") == "user2" and action == "manage-documents": + return PermissionSQL.allow(reason="user2 granted manage-documents") + + # Grant user2 access to view-document-collection globally + if ( + actor + and actor.get("id") == "user2" + and action == "view-document-collection" + ): + return PermissionSQL.allow( + reason="user2 granted view-document-collection" + ) + + # Register the plugin temporarily + plugin = TestPlugin() + pm.register(plugin, name="test_custom_resources_plugin") + + try: + # Create datasette instance and invoke startup + datasette = Datasette(memory=True) + await datasette.invoke_startup() + + # Test global action + manage_docs = datasette.actions["manage-documents"] + assert manage_docs.name == "manage-documents" + assert manage_docs.abbr == "md" + assert manage_docs.resource_class is None + assert manage_docs.takes_parent is False + assert manage_docs.takes_child is False + + # Test parent-level action + view_collection = datasette.actions["view-document-collection"] + assert view_collection.name == "view-document-collection" + assert view_collection.abbr == "vdc" + assert view_collection.resource_class is DocumentCollectionResource + assert view_collection.takes_parent is True + assert view_collection.takes_child is False + + # Test child-level action + view_doc = datasette.actions["view-document"] + assert view_doc.name == "view-document" + assert view_doc.abbr == "vdoc" + assert view_doc.resource_class is DocumentResource + assert view_doc.takes_parent is True + assert view_doc.takes_child is True + + # Verify the resource classes have correct hierarchy + assert DocumentCollectionResource.parent_class is None + assert DocumentResource.parent_class is DocumentCollectionResource + + # Test that resources can be instantiated correctly + collection_resource = DocumentCollectionResource(collection="collection1") + assert collection_resource.parent == "collection1" + assert collection_resource.child is None + + doc_resource = DocumentResource(collection="collection1", document="doc1") + assert doc_resource.parent == "collection1" + assert doc_resource.child == "doc1" + + # Test permission checks with restricted actors + + # Test 1: Global action - no restrictions (custom actions default to deny) + unrestricted_actor = {"id": "user1"} + allowed = await datasette.allowed( + action="manage-documents", + actor=unrestricted_actor, + ) + assert allowed is False # Custom actions have no default allow + + # Test 2: Global action - user2 has explicit permission via plugin hook + restricted_global = {"id": "user2", "_r": {"a": ["md"]}} + allowed = await datasette.allowed( + action="manage-documents", + actor=restricted_global, + ) + assert allowed is True # Granted by plugin hook for user2 + + # Test 3: Global action - restricted but not in allowlist + restricted_no_access = {"id": "user3", "_r": {"a": ["vdc"]}} + allowed = await datasette.allowed( + action="manage-documents", + actor=restricted_no_access, + ) + assert allowed is False # Not in allowlist + + # Test 4: Collection-level action - allowed for specific collection + collection_resource = DocumentCollectionResource(collection="collection1") + restricted_collection = {"id": "user4", "_r": {"d": {"collection1": ["vdc"]}}} + allowed = await datasette.allowed( + action="view-document-collection", + resource=collection_resource, + actor=restricted_collection, + ) + assert allowed is True # Allowed for collection1 + + # Test 5: Collection-level action - denied for different collection + collection2_resource = DocumentCollectionResource(collection="collection2") + allowed = await datasette.allowed( + action="view-document-collection", + resource=collection2_resource, + actor=restricted_collection, + ) + assert allowed is False # Not allowed for collection2 + + # Test 6: Document-level action - allowed for specific document + doc1_resource = DocumentResource(collection="collection1", document="doc1") + restricted_document = { + "id": "user5", + "_r": {"r": {"collection1": {"doc1": ["vdoc"]}}}, + } + allowed = await datasette.allowed( + action="view-document", + resource=doc1_resource, + actor=restricted_document, + ) + assert allowed is True # Allowed for collection1/doc1 + + # Test 7: Document-level action - denied for different document + doc2_resource = DocumentResource(collection="collection1", document="doc2") + allowed = await datasette.allowed( + action="view-document", + resource=doc2_resource, + actor=restricted_document, + ) + assert allowed is False # Not allowed for collection1/doc2 + + # Test 8: Document-level action - globally allowed + doc_resource = DocumentResource(collection="collection2", document="doc3") + restricted_all_docs = {"id": "user6", "_r": {"a": ["vdoc"]}} + allowed = await datasette.allowed( + action="view-document", + resource=doc_resource, + actor=restricted_all_docs, + ) + assert allowed is True # Globally allowed for all documents + + # Test 9: Verify hierarchy - collection access doesn't grant document access + collection_only_actor = {"id": "user7", "_r": {"d": {"collection1": ["vdc"]}}} + doc_resource = DocumentResource(collection="collection1", document="doc1") + allowed = await datasette.allowed( + action="view-document", + resource=doc_resource, + actor=collection_only_actor, + ) + assert ( + allowed is False + ) # Collection permission doesn't grant document permission + + finally: + # Unregister the plugin + pm.unregister(plugin) + + @pytest.mark.skip(reason="TODO") @pytest.mark.parametrize( "metadata,config,expected_metadata,expected_config", From 2b962beaeb90e1966cd5dfe0d3d3ed8250367d2b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 11:51:22 -0700 Subject: [PATCH 016/299] Fix permissions_execute_sql warnings in documentation --- docs/pages.rst | 4 ++-- docs/plugin_hooks.rst | 2 +- docs/settings.rst | 2 +- docs/sql_queries.rst | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/pages.rst b/docs/pages.rst index 3ba20ea7..3d6530a3 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -28,7 +28,7 @@ The index page can also be accessed at ``/-/``, useful for if the default index Database ======== -Each database has a page listing the tables, views and canned queries available for that database. If the :ref:`permissions_execute_sql` permission is enabled (it's on by default) there will also be an interface for executing arbitrary SQL select queries against the data. +Each database has a page listing the tables, views and canned queries available for that database. If the :ref:`actions_execute_sql` permission is enabled (it's on by default) there will also be an interface for executing arbitrary SQL select queries against the data. Examples: @@ -60,7 +60,7 @@ The following tables are hidden by default: Queries ======= -The ``/database-name/-/query`` page can be used to execute an arbitrary SQL query against that database, if the :ref:`permissions_execute_sql` permission is enabled. This query is passed as the ``?sql=`` query string parameter. +The ``/database-name/-/query`` page can be used to execute an arbitrary SQL query against that database, if the :ref:`actions_execute_sql` permission is enabled. This query is passed as the ``?sql=`` query string parameter. This means you can link directly to a query by constructing the following URL: diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 859b0c84..3156aa7d 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1426,7 +1426,7 @@ Here's an example that allows users to view the ``admin_log`` table only if thei return inner -See :ref:`built-in permissions ` for a full list of permissions that are included in Datasette core. +See :ref:`built-in permissions ` for a full list of permissions that are included in Datasette core. Example: `datasette-permissions-sql `_ diff --git a/docs/settings.rst b/docs/settings.rst index 62810952..5cd49113 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -69,7 +69,7 @@ default_allow_sql Should users be able to execute arbitrary SQL queries by default? -Setting this to ``off`` causes permission checks for :ref:`permissions_execute_sql` to fail by default. +Setting this to ``off`` causes permission checks for :ref:`actions_execute_sql` to fail by default. :: diff --git a/docs/sql_queries.rst b/docs/sql_queries.rst index a95ccc87..7c3cd4ac 100644 --- a/docs/sql_queries.rst +++ b/docs/sql_queries.rst @@ -7,7 +7,7 @@ Datasette treats SQLite database files as read-only and immutable. This means it The easiest way to execute custom SQL against Datasette is through the web UI. The database index page includes a SQL editor that lets you run any SELECT query you like. You can also construct queries using the filter interface on the tables page, then click "View and edit SQL" to open that query in the custom SQL editor. -Note that this interface is only available if the :ref:`permissions_execute_sql` permission is allowed. See :ref:`authentication_permissions_execute_sql`. +Note that this interface is only available if the :ref:`actions_execute_sql` permission is allowed. See :ref:`authentication_permissions_execute_sql`. Any Datasette SQL query is reflected in the URL of the page, allowing you to bookmark them, share them with others and navigate through previous queries using your browser back button. From a528555e8491321ced5471540a84ec5b28a9cf83 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 18:38:29 -0700 Subject: [PATCH 017/299] Additional actor restriction should not grant access to additional actions (#2569) Closes #2568 --- datasette/default_permissions.py | 69 +++++++++++++++++++++++++++----- docs/authentication.rst | 6 +++ tests/test_permissions.py | 60 ++++++++++++++++++++++----- 3 files changed, 116 insertions(+), 19 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 0f64cbc5..1fc85ebf 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -88,17 +88,33 @@ async def _config_permission_rules(datasette, actor, action) -> list[PermissionS has_restrictions = actor_dict and "_r" in actor_dict if actor_dict else False restrictions = actor_dict.get("_r", {}) if actor_dict else {} + action_obj = datasette.actions.get(action) + action_checks = {action} + if action_obj and action_obj.abbr: + action_checks.add(action_obj.abbr) + + restricted_databases: set[str] = set() + restricted_tables: set[tuple[str, str]] = set() + if has_restrictions: + restricted_databases = { + db_name + for db_name, db_actions in (restrictions.get("d") or {}).items() + if action_checks.intersection(db_actions) + } + restricted_tables = { + (db_name, table_name) + for db_name, tables in (restrictions.get("r") or {}).items() + for table_name, table_actions in tables.items() + if action_checks.intersection(table_actions) + } + # Tables implicitly reference their parent databases + restricted_databases.update(db for db, _ in restricted_tables) + def is_in_restriction_allowlist(parent, child, action): """Check if a resource is in the actor's restriction allowlist for this action""" if not has_restrictions: return True # No restrictions, all resources allowed - # Check action with abbreviations - action_obj = datasette.actions.get(action) - action_checks = {action} - if action_obj and action_obj.abbr: - action_checks.add(action_obj.abbr) - # Check global allowlist if action_checks.intersection(restrictions.get("a", [])): return True @@ -110,9 +126,25 @@ async def _config_permission_rules(datasette, actor, action) -> list[PermissionS return True # Check table-level allowlist - if parent and child: - table_actions = restrictions.get("r", {}).get(parent, {}).get(child, []) - if action_checks.intersection(table_actions): + if parent: + table_restrictions = (restrictions.get("r", {}) or {}).get(parent, {}) + if child: + table_actions = table_restrictions.get(child, []) + if action_checks.intersection(table_actions): + return True + else: + # Parent query should proceed if any child in this database is allowlisted + for table_actions in table_restrictions.values(): + if action_checks.intersection(table_actions): + return True + + # Parent/child both None: include if any restrictions exist for this action + if parent is None and child is None: + if action_checks.intersection(restrictions.get("a", [])): + return True + if restricted_databases: + return True + if restricted_tables: return True return False @@ -142,15 +174,32 @@ async def _config_permission_rules(datasette, actor, action) -> list[PermissionS return result = evaluate(allow_block) + bool_result = bool(result) # If result is None (no match) or False, treat as deny rows.append( ( parent, child, - bool(result), # None becomes False, False stays False, True stays True + bool_result, # None becomes False, False stays False, True stays True f"config {'allow' if result else 'deny'} {scope}", ) ) + if has_restrictions and not bool_result and child is None: + reason = f"config deny {scope} (restriction gate)" + if parent is None: + # Root-level deny: add more specific denies for restricted resources + if action_obj and action_obj.takes_parent: + for db_name in restricted_databases: + rows.append((db_name, None, 0, reason)) + if action_obj and action_obj.takes_child: + for db_name, table_name in restricted_tables: + rows.append((db_name, table_name, 0, reason)) + else: + # Database-level deny: add child-level denies for restricted tables + if action_obj and action_obj.takes_child: + for db_name, table_name in restricted_tables: + if db_name == parent: + rows.append((db_name, table_name, 0, reason)) root_perm = (config.get("permissions") or {}).get(action) add_row(None, None, evaluate(root_perm), f"permissions for {action}") diff --git a/docs/authentication.rst b/docs/authentication.rst index 28fb76bb..e69b0aa4 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1033,6 +1033,12 @@ This example outputs the following:: } } +Restrictions act as an allowlist layered on top of the actor's existing +permissions. They can only remove access the actor would otherwise have—they +cannot grant new access. If the underlying actor is denied by ``allow`` rules in +``datasette.yaml`` or by a plugin, a token that lists that resource in its +``"_r"`` section will still be denied. + .. _permissions_plugins: diff --git a/tests/test_permissions.py b/tests/test_permissions.py index c5f547ea..6def3840 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -1117,16 +1117,29 @@ async def test_api_explorer_visibility( @pytest.mark.asyncio -async def test_view_table_token_can_access_table(perms_ds): - actor = { - "id": "restricted-token", - "token": "dstok", - # Restricted to just view-table on perms_ds_two/t1 - "_r": {"r": {"perms_ds_two": {"t1": ["vt"]}}}, +async def test_view_table_token_cannot_gain_access_without_base_permission(perms_ds): + # Only allow a different actor to view this table + previous_config = perms_ds.config + perms_ds.config = { + "databases": { + "perms_ds_two": { + # Only someone-else can see anything in this database + "allow": {"id": "someone-else"}, + } + } } - cookies = {"ds_actor": perms_ds.client.actor_cookie(actor)} - response = await perms_ds.client.get("/perms_ds_two/t1.json", cookies=cookies) - assert response.status_code == 200 + try: + actor = { + "id": "restricted-token", + "token": "dstok", + # Restricted token claims access to perms_ds_two/t1 only + "_r": {"r": {"perms_ds_two": {"t1": ["vt"]}}}, + } + cookies = {"ds_actor": perms_ds.client.actor_cookie(actor)} + response = await perms_ds.client.get("/perms_ds_two/t1.json", cookies=cookies) + assert response.status_code == 403 + finally: + perms_ds.config = previous_config @pytest.mark.asyncio @@ -1337,6 +1350,35 @@ async def test_actor_restrictions_filters_allowed_resources(perms_ds): assert len(db_page.resources) == 0 +@pytest.mark.asyncio +async def test_actor_restrictions_do_not_expand_allowed_resources(perms_ds): + """Restrictions cannot grant access not already allowed to the actor.""" + + previous_config = perms_ds.config + perms_ds.config = { + "databases": { + "perms_ds_one": { + "allow": {"id": "someone-else"}, + } + } + } + try: + actor = {"id": "user", "_r": {"r": {"perms_ds_one": {"t1": ["vt"]}}}} + + # Base actor is not allowed to see t1, so restrictions should not change that + page = await perms_ds.allowed_resources("view-table", actor) + assert len(page.resources) == 0 + + # And explicit permission checks should still deny + response = await perms_ds.client.get( + "/perms_ds_one/t1.json", + cookies={"ds_actor": perms_ds.client.actor_cookie(actor)}, + ) + assert response.status_code == 403 + finally: + perms_ds.config = previous_config + + @pytest.mark.asyncio async def test_actor_restrictions_database_level(perms_ds): """Test database-level restrictions allow all tables in database - issue #2534""" From 5c16c6687d3a55fbb6d0876ce1a4ba623b452f08 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 18:36:06 -0700 Subject: [PATCH 018/299] Split permissions_resources_sql() into 5 for readability Also remove an obsolete test that caused trouble with the new split plugin hook. Closes #2570 --- datasette/default_permissions.py | 83 ++++++++++++++++---------------- tests/test_actions_sql.py | 59 ----------------------- 2 files changed, 42 insertions(+), 100 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 1fc85ebf..41e1ea7f 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -12,60 +12,61 @@ import itsdangerous import time -@hookimpl -async def permission_resources_sql(datasette, actor, action): - rules: list[PermissionSQL] = [] +@hookimpl(specname="permission_resources_sql") +async def actor_restrictions_sql(datasette, actor, action): + """Handle actor restriction-based permission rules (_r key).""" + if not actor: + return None + return await _restriction_permission_rules(datasette, actor, action) - # 1. FIRST: Actor restrictions (if present) - # These act as a gating filter - must pass through before other checks - restriction_rules = await _restriction_permission_rules(datasette, actor, action) - rules.extend(restriction_rules) - # 2. Root user permissions - # Root user with root_enabled gets all permissions at global level - # Config rules at more specific levels (database/table) can still override +@hookimpl(specname="permission_resources_sql") +async def root_user_permissions_sql(datasette, actor, action): + """Grant root user full permissions when enabled.""" if datasette.root_enabled and actor and actor.get("id") == "root": # Add a single global-level allow rule (NULL, NULL) for root # This allows root to access everything by default, but database-level # and table-level deny rules in config can still block specific resources - rules.append(PermissionSQL.allow(reason="root user")) + return PermissionSQL.allow(reason="root user") + return None - # 3. Config-based permission rules - config_rules = await _config_permission_rules(datasette, actor, action) - rules.extend(config_rules) - # 4. Check default_allow_sql setting for execute-sql action +@hookimpl(specname="permission_resources_sql") +async def config_permissions_sql(datasette, actor, action): + """Apply config-based permission rules from datasette.yaml.""" + return await _config_permission_rules(datasette, actor, action) + + +@hookimpl(specname="permission_resources_sql") +async def default_allow_sql_check(datasette, actor, action): + """Enforce default_allow_sql setting for execute-sql action.""" if action == "execute-sql" and not datasette.setting("default_allow_sql"): - # Return a deny rule for all databases - rules.append(PermissionSQL.deny(reason="default_allow_sql is false")) - # Early return - don't add default allow rule - if not rules: - return None - if len(rules) == 1: - return rules[0] - return rules + return PermissionSQL.deny(reason="default_allow_sql is false") + return None - # 5. Default allow actions (ONLY if no restrictions) + +@hookimpl(specname="permission_resources_sql") +async def default_action_permissions_sql(datasette, actor, action): + """Apply default allow rules for standard view/execute actions.""" + # Only apply defaults if actor has no restrictions # If actor has restrictions, they've already added their own deny/allow rules has_restrictions = actor and "_r" in actor - if not has_restrictions: - default_allow_actions = { - "view-instance", - "view-database", - "view-database-download", - "view-table", - "view-query", - "execute-sql", - } - if action in default_allow_actions: - reason = f"default allow for {action}".replace("'", "''") - rules.append(PermissionSQL.allow(reason=reason)) - - if not rules: + if has_restrictions: return None - if len(rules) == 1: - return rules[0] - return rules + + default_allow_actions = { + "view-instance", + "view-database", + "view-database-download", + "view-table", + "view-query", + "execute-sql", + } + if action in default_allow_actions: + reason = f"default allow for {action}".replace("'", "''") + return PermissionSQL.allow(reason=reason) + + return None async def _config_permission_rules(datasette, actor, action) -> list[PermissionSQL]: diff --git a/tests/test_actions_sql.py b/tests/test_actions_sql.py index 19d44528..734a427d 100644 --- a/tests/test_actions_sql.py +++ b/tests/test_actions_sql.py @@ -315,62 +315,3 @@ async def test_sql_does_filtering_not_python(test_ds): finally: pm.unregister(plugin, name="test_plugin") - - -@pytest.mark.asyncio -async def test_no_permission_rules_returns_correct_schema(): - """ - Test that when no permission rules exist, the empty result has correct schema. - - This is a regression test for a bug where the empty result returned only - 2 columns (parent, child) instead of the documented 3 columns - (parent, child, reason), causing schema mismatches. - - See: https://github.com/simonw/datasette/pull/2515#discussion_r2457803901 - """ - from datasette.utils.actions_sql import build_allowed_resources_sql - - # Create a fresh datasette instance - ds = Datasette() - await ds.invoke_startup() - - # Add a test database - db = ds.add_memory_database("testdb") - await db.execute_write( - "CREATE TABLE IF NOT EXISTS test_table (id INTEGER PRIMARY KEY)" - ) - await ds._refresh_schemas() - - # Temporarily unregister all permission_resources_sql providers to simulate no rules - hook_caller = pm.hook.permission_resources_sql - hookimpls = hook_caller.get_hookimpls() - removed_plugins = [ - (impl.plugin_name, impl.plugin) for impl in hookimpls if impl.plugin is not None - ] - - for plugin_name, _ in removed_plugins: - pm.unregister(name=plugin_name) - - try: - # Call build_allowed_resources_sql directly which will hit the no-rules code path - sql, params = await build_allowed_resources_sql( - ds, actor={"id": "nobody"}, action="view-table" - ) - - # Execute the query to verify it has correct column structure - result = await ds.get_internal_database().execute(sql, params) - - # Should have 3 columns: parent, child, reason - # This assertion would fail if the empty result only had 2 columns - assert ( - len(result.columns) == 3 - ), f"Expected 3 columns, got {len(result.columns)}: {result.columns}" - assert result.columns == ["parent", "child", "reason"] - - # Should have no rows (no rules = no access) - assert len(result.rows) == 0 - - finally: - # Restore original plugins in the order they were removed - for plugin_name, plugin in removed_plugins: - pm.register(plugin, name=plugin_name) From b8cee8768ee298d618106a10a95eaa0c70b45d06 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 18:57:56 -0700 Subject: [PATCH 019/299] Completed upgrade guide, closes #2564 --- docs/upgrade-1.0a20.md | 143 ++++++++++++++++++++++++++++++----------- 1 file changed, 104 insertions(+), 39 deletions(-) diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index ec2b9a5a..339ab588 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -6,10 +6,10 @@ orphan: true -Datasette 1.0a20 makes some breaking changes to Datasette's permission system. Plugins need to be updated if they use any of the following: +Datasette 1.0a20 makes some breaking changes to Datasette's permission system. Plugins need to be updated if they use **any of the following**: - The `register_permissions()` plugin hook - this should be replaced with `register_actions` -- The `permission_allowed()` plugin hook - this should be upgraded to `permission_resources_sql()`. +- The `permission_allowed()` plugin hook - this should be upgraded to use `permission_resources_sql()`. - The `datasette.permission_allowed()` internal method - this should be replaced with `datasette.allowed()` - Logic that grants access to the `"root"` actor can be removed. @@ -24,47 +24,37 @@ Old code: def register_permissions(datasette): return [ Permission( - name="datasette-pins-write", + name="explain-sql", abbr=None, - description="Can pin, unpin, and re-order pins for datasette-pins", - takes_database=False, + description="Can explain SQL queries", + takes_database=True, takes_resource=False, default=False, ), Permission( - name="datasette-pins-read", + name="annotate-rows", abbr=None, - description="Can read pinned items.", + description="Can annotate rows", + takes_database=True, + takes_resource=True, + default=False, + ), + Permission( + name="view-debug-info", + abbr=None, + description="Can view debug information", takes_database=False, takes_resource=False, default=False, ), ] ``` -The new `Action` does not have a `default=` parameter. For global actions (those that don't apply to specific resources), omit `resource_class`: - -```python -from datasette.permissions import Action - -@hookimpl -def register_actions(datasette): - return [ - Action( - name="datasette-pins-write", - abbr=None, - description="Can pin, unpin, and re-order pins for datasette-pins", - ), - Action( - name="datasette-pins-read", - abbr=None, - description="Can read pinned items.", - ), - ] -``` - -For actions that apply to specific resources (like databases or tables), specify the `resource_class` instead of `takes_parent` and `takes_child`: +The new `Action` does not have a `default=` parameter. + +Here's the equivalent new code: ```python +from datasette import hookimpl from datasette.permissions import Action from datasette.resources import DatabaseResource, TableResource @@ -72,21 +62,26 @@ from datasette.resources import DatabaseResource, TableResource def register_actions(datasette): return [ Action( - name="execute-sql", - abbr="es", - description="Execute SQL queries", - resource_class=DatabaseResource, # Parent-level resource + name="explain-sql", + abbr=None, + description="Explain SQL queries", + resource_class=DatabaseResource, ), Action( - name="insert-row", - abbr="ir", - description="Insert rows", - resource_class=TableResource, # Child-level resource + name="annotate-rows", + abbr=None, + description="Annotate rows", + resource_class=TableResource, + ), + Action( + name="view-debug-info", + abbr=None, + description="View debug information", ), ] ``` -The hierarchy information (whether an action takes parent/child parameters) is now derived from the `Resource` class hierarchy. `Action` has `takes_parent` and `takes_child` properties that are computed based on the `resource_class` and its `parent_class` attribute. +For actions that apply to specific resources (like databases or tables), specify the `resource_class` instead of `takes_parent` and `takes_child`. Note that `view-debug-info` does not specify a `resource_class` because it applies globally. ## permission_allowed() hook is replaced by permission_resources_sql() @@ -110,6 +105,76 @@ A `.deny(reason="")` class method is also available. For more complex permission checks consult the documentation for that plugin hook: +## Using datasette.allowed() to check permissions instead of datasette.permission_allowed() + +The internal method `datasette.permission_allowed()` has been replaced by `datasette.allowed()`. + +The old method looked like this: +```python +can_debug = await datasette.permission_allowed( + request.actor, + "view-debug-info", +) +can_explain_sql = await datasette.permission_allowed( + request.actor, + "explain-sql", + resource="database_name", +) +can_annotate_rows = await datasette.permission_allowed( + request.actor, + "annotate-rows", + resource=(database_name, table_name), +) +``` +Note the confusing design here where `resource` could be either a string or a tuple depending on the permission being checked. + +The new keyword-only design makes this a lot more clear: +```python +from datasette.resources import DatabaseResource, TableResource +can_debug = await datasette.allowed( + actor=request.actor, + action="view-debug-info", +) +can_explain_sql = await datasette.allowed( + actor=request.actor, + action="explain-sql", + resource=DatabaseResource(database_name), +) +can_annotate_rows = await datasette.allowed( + actor=request.actor, + action="annotate-rows", + resource=TableResource(database_name, table_name), +) +``` + +## Root user checks are no longer necessary + +Some plugins would introduce their own custom permission and then ensure the `"root"` actor had access to it using a pattern like this: + +```python +@hookimpl +def register_permissions(datasette): + return [ + Permission( + name="upload-dbs", + abbr=None, + description="Upload SQLite database files", + takes_database=False, + takes_resource=False, + default=False, + ) + ] + + +@hookimpl +def permission_allowed(actor, action): + if action == "upload-dbs" and actor and actor.get("id") == "root": + return True +``` +This is no longer necessary in Datasette 1.0a20 - the `"root"` actor automatically has all permissions when Datasette is started with the `datasette --root` option. + +The `permission_allowed()` hook in this example can be entirely removed. + ## Fixing async with httpx.AsyncClient(app=app) Some older plugins may use the following pattern in their tests, which is no longer supported: @@ -121,5 +186,5 @@ async with httpx.AsyncClient(app=app) as client: The new pattern is to use `ds.client` like this: ```python ds = Datasette([], memory=True) -response = ds.client.get("/path") +response = await ds.client.get("/path") ``` From e37aa37edc116156595c25d879d8c37d7125e1ba Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 19:28:31 -0700 Subject: [PATCH 020/299] Further refactor to collapse some utility functions Refs #2570 --- datasette/default_permissions.py | 271 ++++++++++++++----------------- 1 file changed, 123 insertions(+), 148 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 41e1ea7f..23c96a23 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -17,7 +17,102 @@ async def actor_restrictions_sql(datasette, actor, action): """Handle actor restriction-based permission rules (_r key).""" if not actor: return None - return await _restriction_permission_rules(datasette, actor, action) + + restrictions = actor.get("_r") if isinstance(actor, dict) else None + if not restrictions: + return [] + + # Check if this action appears in restrictions (with abbreviations) + action_obj = datasette.actions.get(action) + action_checks = {action} + if action_obj and action_obj.abbr: + action_checks.add(action_obj.abbr) + + # Check if this action is in the allowlist anywhere in restrictions + is_in_allowlist = False + global_actions = restrictions.get("a", []) + if action_checks.intersection(global_actions): + is_in_allowlist = True + + if not is_in_allowlist: + for db_actions in restrictions.get("d", {}).values(): + if action_checks.intersection(db_actions): + is_in_allowlist = True + break + + if not is_in_allowlist: + for tables in restrictions.get("r", {}).values(): + for table_actions in tables.values(): + if action_checks.intersection(table_actions): + is_in_allowlist = True + break + if is_in_allowlist: + break + + # If action not in allowlist at all, add global deny and return + if not is_in_allowlist: + sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, :actor_deny_reason AS reason" + return [ + PermissionSQL( + sql=sql, + params={ + "actor_deny_reason": f"actor restrictions: {action} not in allowlist" + }, + ) + ] + + # Action IS in allowlist - build deny + specific allows + selects = [] + params = {} + param_counter = 0 + + def add_row(parent, child, allow, reason): + """Helper to add a parameterized SELECT statement.""" + nonlocal param_counter + prefix = f"restr_{param_counter}" + param_counter += 1 + + selects.append( + f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child, " + f":{prefix}_allow AS allow, :{prefix}_reason AS reason" + ) + params[f"{prefix}_parent"] = parent + params[f"{prefix}_child"] = child + params[f"{prefix}_allow"] = 1 if allow else 0 + params[f"{prefix}_reason"] = reason + + # If NOT globally allowed, add global deny as gatekeeper + is_globally_allowed = action_checks.intersection(global_actions) + if not is_globally_allowed: + add_row(None, None, 0, f"actor restrictions: {action} denied by default") + else: + # Globally allowed - add global allow + add_row(None, None, 1, f"actor restrictions: global {action}") + + # Add database-level allows + db_restrictions = restrictions.get("d", {}) + for db_name, db_actions in db_restrictions.items(): + if action_checks.intersection(db_actions): + add_row(db_name, None, 1, f"actor restrictions: database {db_name}") + + # Add resource/table-level allows + resource_restrictions = restrictions.get("r", {}) + for db_name, tables in resource_restrictions.items(): + for table_name, table_actions in tables.items(): + if action_checks.intersection(table_actions): + add_row( + db_name, + table_name, + 1, + f"actor restrictions: {db_name}/{table_name}", + ) + + if not selects: + return [] + + sql = "\nUNION ALL\n".join(selects) + + return [PermissionSQL(sql=sql, params=params)] @hookimpl(specname="permission_resources_sql") @@ -34,42 +129,6 @@ async def root_user_permissions_sql(datasette, actor, action): @hookimpl(specname="permission_resources_sql") async def config_permissions_sql(datasette, actor, action): """Apply config-based permission rules from datasette.yaml.""" - return await _config_permission_rules(datasette, actor, action) - - -@hookimpl(specname="permission_resources_sql") -async def default_allow_sql_check(datasette, actor, action): - """Enforce default_allow_sql setting for execute-sql action.""" - if action == "execute-sql" and not datasette.setting("default_allow_sql"): - return PermissionSQL.deny(reason="default_allow_sql is false") - return None - - -@hookimpl(specname="permission_resources_sql") -async def default_action_permissions_sql(datasette, actor, action): - """Apply default allow rules for standard view/execute actions.""" - # Only apply defaults if actor has no restrictions - # If actor has restrictions, they've already added their own deny/allow rules - has_restrictions = actor and "_r" in actor - if has_restrictions: - return None - - default_allow_actions = { - "view-instance", - "view-database", - "view-database-download", - "view-table", - "view-query", - "execute-sql", - } - if action in default_allow_actions: - reason = f"default allow for {action}".replace("'", "''") - return PermissionSQL.allow(reason=reason) - - return None - - -async def _config_permission_rules(datasette, actor, action) -> list[PermissionSQL]: config = datasette.config or {} if actor is None: @@ -85,7 +144,6 @@ async def _config_permission_rules(datasette, actor, action) -> list[PermissionS return None return actor_matches_allow(actor_dict, allow_block) - # Check if actor has restrictions - if so, we'll filter config rules has_restrictions = actor_dict and "_r" in actor_dict if actor_dict else False restrictions = actor_dict.get("_r", {}) if actor_dict else {} @@ -111,7 +169,7 @@ async def _config_permission_rules(datasette, actor, action) -> list[PermissionS # Tables implicitly reference their parent databases restricted_databases.update(db for db, _ in restricted_tables) - def is_in_restriction_allowlist(parent, child, action): + def is_in_restriction_allowlist(parent, child, action_name): """Check if a resource is in the actor's restriction allowlist for this action""" if not has_restrictions: return True # No restrictions, all resources allowed @@ -315,119 +373,36 @@ async def _config_permission_rules(datasette, actor, action) -> list[PermissionS return [PermissionSQL(sql=sql, params=params)] -async def _restriction_permission_rules( - datasette, actor, action -) -> list[PermissionSQL]: - """ - Generate PermissionSQL rules from actor restrictions (_r key). +@hookimpl(specname="permission_resources_sql") +async def default_allow_sql_check(datasette, actor, action): + """Enforce default_allow_sql setting for execute-sql action.""" + if action == "execute-sql" and not datasette.setting("default_allow_sql"): + return PermissionSQL.deny(reason="default_allow_sql is false") + return None - Actor restrictions define an allowlist. We implement this via: - 1. Global DENY rule for the action (blocks everything by default) - 2. Specific ALLOW rules for each allowlisted resource - The cascading logic (child → parent → global) ensures that: - - Allowlisted resources at child/parent level override global deny - - Non-allowlisted resources are blocked by global deny +@hookimpl(specname="permission_resources_sql") +async def default_action_permissions_sql(datasette, actor, action): + """Apply default allow rules for standard view/execute actions.""" + # Only apply defaults if actor has no restrictions + # If actor has restrictions, they've already added their own deny/allow rules + has_restrictions = actor and "_r" in actor + if has_restrictions: + return None - This creates a gating filter that runs BEFORE normal permission checks. - Restrictions cannot be overridden by config - they gate what gets checked. - """ - if not actor or "_r" not in actor: - return [] + default_allow_actions = { + "view-instance", + "view-database", + "view-database-download", + "view-table", + "view-query", + "execute-sql", + } + if action in default_allow_actions: + reason = f"default allow for {action}".replace("'", "''") + return PermissionSQL.allow(reason=reason) - restrictions = actor["_r"] - - # Check if this action appears in restrictions (with abbreviations) - action_obj = datasette.actions.get(action) - action_checks = {action} - if action_obj and action_obj.abbr: - action_checks.add(action_obj.abbr) - - # Check if this action is in the allowlist anywhere in restrictions - is_in_allowlist = False - global_actions = restrictions.get("a", []) - if action_checks.intersection(global_actions): - is_in_allowlist = True - - if not is_in_allowlist: - for db_actions in restrictions.get("d", {}).values(): - if action_checks.intersection(db_actions): - is_in_allowlist = True - break - - if not is_in_allowlist: - for tables in restrictions.get("r", {}).values(): - for table_actions in tables.values(): - if action_checks.intersection(table_actions): - is_in_allowlist = True - break - if is_in_allowlist: - break - - # If action not in allowlist at all, add global deny and return - if not is_in_allowlist: - sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, :deny_reason AS reason" - return [ - PermissionSQL( - sql=sql, - params={ - "deny_reason": f"actor restrictions: {action} not in allowlist" - }, - ) - ] - - # Action IS in allowlist - build deny + specific allows - selects = [] - params = {} - param_counter = 0 - - def add_row(parent, child, allow, reason): - """Helper to add a parameterized SELECT statement""" - nonlocal param_counter - prefix = f"restr_{param_counter}" - param_counter += 1 - - selects.append( - f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child, " - f":{prefix}_allow AS allow, :{prefix}_reason AS reason" - ) - params[f"{prefix}_parent"] = parent - params[f"{prefix}_child"] = child - params[f"{prefix}_allow"] = 1 if allow else 0 - params[f"{prefix}_reason"] = reason - - # If NOT globally allowed, add global deny as gatekeeper - is_globally_allowed = action_checks.intersection(global_actions) - if not is_globally_allowed: - add_row(None, None, 0, f"actor restrictions: {action} denied by default") - else: - # Globally allowed - add global allow - add_row(None, None, 1, f"actor restrictions: global {action}") - - # Add database-level allows - db_restrictions = restrictions.get("d", {}) - for db_name, db_actions in db_restrictions.items(): - if action_checks.intersection(db_actions): - add_row(db_name, None, 1, f"actor restrictions: database {db_name}") - - # Add resource/table-level allows - resource_restrictions = restrictions.get("r", {}) - for db_name, tables in resource_restrictions.items(): - for table_name, table_actions in tables.items(): - if action_checks.intersection(table_actions): - add_row( - db_name, - table_name, - 1, - f"actor restrictions: {db_name}/{table_name}", - ) - - if not selects: - return [] - - sql = "\nUNION ALL\n".join(selects) - - return [PermissionSQL(sql=sql, params=params)] + return None def restrictions_allow_action( From 7e09e1bf1b928eb259fd95394aa202abb8a98cc2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 19:30:56 -0700 Subject: [PATCH 021/299] Removed obsolete actor ID v.s. actor dict code, refs #2570 --- datasette/default_permissions.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 23c96a23..9afb088e 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -131,21 +131,13 @@ async def config_permissions_sql(datasette, actor, action): """Apply config-based permission rules from datasette.yaml.""" config = datasette.config or {} - if actor is None: - actor_dict: dict | None = None - elif isinstance(actor, dict): - actor_dict = actor - else: - actor_lookup = await datasette.actors_from_ids([actor]) - actor_dict = actor_lookup.get(actor) or {"id": actor} - def evaluate(allow_block): if allow_block is None: return None - return actor_matches_allow(actor_dict, allow_block) + return actor_matches_allow(actor, allow_block) - has_restrictions = actor_dict and "_r" in actor_dict if actor_dict else False - restrictions = actor_dict.get("_r", {}) if actor_dict else {} + has_restrictions = actor and "_r" in actor if actor else False + restrictions = actor.get("_r", {}) if actor else {} action_obj = datasette.actions.get(action) action_checks = {action} From 063bf7a96f42a62df6253128b48f230946e5450f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 20:20:17 -0700 Subject: [PATCH 022/299] Action() is kw_only, abbr= is optional, closes #2571 --- datasette/permissions.py | 4 ++-- docs/plugin_hooks.rst | 2 +- docs/upgrade-1.0a20.md | 4 +--- tests/test_plugins.py | 9 ++++++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/datasette/permissions.py b/datasette/permissions.py index 8e0d0fc1..7b1fc90c 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -95,11 +95,11 @@ class AllowedResource(NamedTuple): reason: str -@dataclass(frozen=True) +@dataclass(frozen=True, kw_only=True) class Action: name: str - abbr: str | None description: str | None + abbr: str | None = None resource_class: type[Resource] | None = None also_requires: str | None = None # Optional action name that must also be allowed diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 3156aa7d..51e4a69f 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -905,7 +905,7 @@ The fields of the ``Action`` dataclass are as follows: The name of the action, e.g. ``view-document``. This should be unique across all plugins. ``abbr`` - string or None - An abbreviation of the action, e.g. ``vdoc``. This is optional. Since this needs to be unique across all installed plugins it's best to choose carefully or use ``None``. + An abbreviation of the action, e.g. ``vdoc``. This is optional. Since this needs to be unique across all installed plugins it's best to choose carefully or omit it entirely (same as setting it to ``None``.) ``description`` - string or None A human-readable description of what the action allows you to do. diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index 339ab588..0dbb9626 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -63,23 +63,21 @@ def register_actions(datasette): return [ Action( name="explain-sql", - abbr=None, description="Explain SQL queries", resource_class=DatabaseResource, ), Action( name="annotate-rows", - abbr=None, description="Annotate rows", resource_class=TableResource, ), Action( name="view-debug-info", - abbr=None, description="View debug information", ), ] ``` +The `abbr=` is now optional and defaults to `None`. For actions that apply to specific resources (like databases or tables), specify the `resource_class` instead of `takes_parent` and `takes_child`. Note that `view-debug-info` does not specify a `resource_class` because it applies globally. diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 1c601b27..5a530b25 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1600,7 +1600,6 @@ async def test_hook_register_actions_with_custom_resources(): # Parent-level action - collection only Action( name="view-document-collection", - abbr="vdc", description="View a document collection", resource_class=DocumentCollectionResource, ), @@ -1651,7 +1650,7 @@ async def test_hook_register_actions_with_custom_resources(): # Test parent-level action view_collection = datasette.actions["view-document-collection"] assert view_collection.name == "view-document-collection" - assert view_collection.abbr == "vdc" + assert view_collection.abbr is None assert view_collection.resource_class is DocumentCollectionResource assert view_collection.takes_parent is True assert view_collection.takes_child is False @@ -1705,7 +1704,11 @@ async def test_hook_register_actions_with_custom_resources(): # Test 4: Collection-level action - allowed for specific collection collection_resource = DocumentCollectionResource(collection="collection1") - restricted_collection = {"id": "user4", "_r": {"d": {"collection1": ["vdc"]}}} + # This one does not have an abbreviation: + restricted_collection = { + "id": "user4", + "_r": {"d": {"collection1": ["view-document-collection"]}}, + } allowed = await datasette.allowed( action="view-document-collection", resource=collection_resource, From 506ce5b0ac5f332d93eb4d83b3a4836fb9093478 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 20:23:37 -0700 Subject: [PATCH 023/299] Remove docs for obsolete register_permissions() hook, refs #2528 Also removed docs for datasette.get_permission() method which no longer exists. --- docs/changelog.rst | 4 ++-- docs/internals.rst | 20 +++++------------ docs/plugin_hooks.rst | 50 ------------------------------------------- 3 files changed, 7 insertions(+), 67 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index db43634a..f98ad8ac 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -26,7 +26,7 @@ Affected plugins should make the following changes: - Replace calls to ``datasette.permission_allowed()`` with calls to the new :ref:`datasette.allowed() ` method. The new method takes a ``resource=`` parameter which should be an instance of a ``Resource`` subclass, as described in the method documentation. - The ``permission_allowed()`` plugin hook has been removed in favor of the new :ref:`permission_resources_sql() ` hook. -- The ``register_permissions()`` plugni hook has been removed in favor of :ref:`register_actions() `. +- The ``register_permissions()`` plugin hook has been removed in favor of :ref:`register_actions() `. Plugins can now make use of two new internal methods to help resolve permission checks: @@ -522,7 +522,7 @@ The third Datasette 1.0 alpha release adds upsert support to the JSON API, plus See `Datasette 1.0a2: Upserts and finely grained permissions `__ for an extended, annotated version of these release notes. - New ``/db/table/-/upsert`` API, :ref:`documented here `. upsert is an update-or-insert: existing rows will have specified keys updated, but if no row matches the incoming primary key a brand new row will be inserted instead. (:issue:`1878`) -- New :ref:`plugin_register_permissions` plugin hook. Plugins can now register named permissions, which will then be listed in various interfaces that show available permissions. (:issue:`1940`) +- New ``register_permissions()`` plugin hook. Plugins can now register named permissions, which will then be listed in various interfaces that show available permissions. (:issue:`1940`) - The ``/db/-/create`` API for :ref:`creating a table ` now accepts ``"ignore": true`` and ``"replace": true`` options when called with the ``"rows"`` property that creates a new table based on an example set of rows. This means the API can be called multiple times with different rows, setting rules for what should happen if a primary key collides with an existing row. (:issue:`1927`) - Arbitrary permissions can now be configured at the instance, database and resource (table, SQL view or canned query) level in Datasette's :ref:`metadata` JSON and YAML files. The new ``"permissions"`` key can be used to specify which actors should have which permissions. See :ref:`authentication_permissions_other` for details. (:issue:`1636`) - The ``/-/create-token`` page can now be used to create API tokens which are restricted to just a subset of actions, including against specific databases or resources. See :ref:`CreateTokenView` for details. (:issue:`1947`) diff --git a/docs/internals.rst b/docs/internals.rst index 0132fddf..406bc9b3 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -272,14 +272,14 @@ The dictionary keys are the name of the database that is used in the URL - e.g. All databases are listed, irrespective of user permissions. -.. _datasette_permissions: +.. _datasette_actions: -.permissions ------------- +.actions +-------- -Property exposing a dictionary of permissions that have been registered using the :ref:`plugin_register_permissions` plugin hook. +Property exposing a dictionary of actions that have been registered using the :ref:`plugin_register_actions` plugin hook. -The dictionary keys are the permission names - e.g. ``view-instance`` - and the values are ``Permission()`` objects describing the permission. Here is a :ref:`description of that object `. +The dictionary keys are the action names - e.g. ``view-instance`` - and the values are ``Action()`` objects describing the permission. .. _datasette_plugin_config: @@ -594,16 +594,6 @@ The following example creates a token that can access ``view-instance`` and ``vi }, ) -.. _datasette_get_permission: - -.get_permission(name_or_abbr) ------------------------------ - -``name_or_abbr`` - string - The name or abbreviation of the permission to look up, e.g. ``view-table`` or ``vt``. - -Returns a :ref:`Permission object ` representing the permission, or raises a ``KeyError`` if one is not found. - .. _datasette_get_database: .get_database(name) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 51e4a69f..93f7f476 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -777,56 +777,6 @@ The plugin hook can then be used to register the new facet class like this: def register_facet_classes(): return [SpecialFacet] -.. _plugin_register_permissions: - -register_permissions(datasette) -------------------------------- - -.. note:: - This hook is deprecated. Use :ref:`plugin_register_actions` instead, which provides a more flexible resource-based permission system. - -If your plugin needs to register additional permissions unique to that plugin - ``upload-csvs`` for example - you can return a list of those permissions from this hook. - -.. code-block:: python - - from datasette import hookimpl, Permission - - - @hookimpl - def register_permissions(datasette): - return [ - Permission( - name="upload-csvs", - abbr=None, - description="Upload CSV files", - takes_database=True, - takes_resource=False, - default=False, - ) - ] - -The fields of the ``Permission`` class are as follows: - -``name`` - string - The name of the permission, e.g. ``upload-csvs``. This should be unique across all plugins that the user might have installed, so choose carefully. - -``abbr`` - string or None - An abbreviation of the permission, e.g. ``uc``. This is optional - you can set it to ``None`` if you do not want to pick an abbreviation. Since this needs to be unique across all installed plugins it's best not to specify an abbreviation at all. If an abbreviation is provided it will be used when creating restricted signed API tokens. - -``description`` - string or None - A human-readable description of what the permission lets you do. Should make sense as the second part of a sentence that starts "A user with this permission can ...". - -``takes_database`` - boolean - ``True`` if this permission can be granted on a per-database basis, ``False`` if it is only valid at the overall Datasette instance level. - -``takes_resource`` - boolean - ``True`` if this permission can be granted on a per-resource basis. A resource is a database table, SQL view or :ref:`canned query `. - -``default`` - boolean - The default value for this permission if it is not explicitly granted to a user. ``True`` means the permission is granted by default, ``False`` means it is not. - - This should only be ``True`` if you want anonymous users to be able to take this action. - .. _plugin_register_actions: register_actions(datasette) From 24592850528d188fae65162f38e4ed7f63479c20 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 1 Nov 2025 20:32:38 -0700 Subject: [PATCH 024/299] Additional upgrade notes by Codex CLI Refs https://github.com/simonw/datasette/issues/2549#issuecomment-3477398336 Refs #2564 --- docs/upgrade-1.0a20.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index 0dbb9626..af57ca83 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -173,6 +173,14 @@ This is no longer necessary in Datasette 1.0a20 - the `"root"` actor automatical The `permission_allowed()` hook in this example can be entirely removed. +### Root-enabled instances during testing + +When writing tests that exercise root-only functionality, make sure to set `datasette.root_enabled = True` on the `Datasette` instance. Root permissions are only granted automatically when Datasette is started with `datasette --root` or when the flag is enabled directly in tests. + +## Target the new APIs exclusively + +Datasette 1.0a20’s permission system is substantially different from previous releases. Attempting to keep plugin code compatible with both the old `permission_allowed()` and the new `allowed()` interfaces leads to brittle workarounds. Prefer to adopt the 1.0a20 APIs (`register_actions`, `permission_resources_sql()`, and `datasette.allowed()`) outright and drop legacy fallbacks. + ## Fixing async with httpx.AsyncClient(app=app) Some older plugins may use the following pattern in their tests, which is no longer supported: From fa978ec1006297416e2cd87a2f0d3cac99283cf8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 2 Nov 2025 12:02:45 -0800 Subject: [PATCH 025/299] More upgrade tips, written by Claude Code Refs #2549 From the datasette-atom upgrade, https://gistpreview.github.io/?d5047e04bbd9c20c59437916e21754ae --- docs/upgrade-1.0a20.md | 94 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index af57ca83..6abcd23d 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -194,3 +194,97 @@ The new pattern is to use `ds.client` like this: ds = Datasette([], memory=True) response = await ds.client.get("/path") ``` + +## Migrating from metadata= to config= + +Datasette 1.0 separates metadata (titles, descriptions, licenses) from configuration (settings, plugins, queries, permissions). Plugin tests and code need to be updated accordingly. + +### Update test constructors + +Old code: +```python +ds = Datasette( + memory=True, + metadata={ + "databases": { + "_memory": {"queries": {"my_query": {"sql": "select 1", "title": "My Query"}}} + }, + "plugins": { + "my-plugin": {"setting": "value"} + } + } +) +``` + +New code: +```python +ds = Datasette( + memory=True, + config={ + "databases": { + "_memory": {"queries": {"my_query": {"sql": "select 1", "title": "My Query"}}} + }, + "plugins": { + "my-plugin": {"setting": "value"} + } + } +) +``` + +### Update datasette.metadata() calls + +The `datasette.metadata()` method has been removed. Use these methods instead: + +Old code: +```python +try: + title = datasette.metadata(database=database)["queries"][query_name]["title"] +except (KeyError, TypeError): + pass +``` + +New code: +```python +try: + query_info = await datasette.get_canned_query(database, query_name, request.actor) + if query_info and "title" in query_info: + title = query_info["title"] +except (KeyError, TypeError): + pass +``` + +### Update render functions to async + +If your plugin's render function needs to call `datasette.get_canned_query()` or other async Datasette methods, it must be declared as async: + +Old code: +```python +def render_atom(datasette, request, sql, columns, rows, database, table, query_name, view_name, data): + # ... + if query_name: + title = datasette.metadata(database=database)["queries"][query_name]["title"] +``` + +New code: +```python +async def render_atom(datasette, request, sql, columns, rows, database, table, query_name, view_name, data): + # ... + if query_name: + query_info = await datasette.get_canned_query(database, query_name, request.actor) + if query_info and "title" in query_info: + title = query_info["title"] +``` + +### Update query URLs in tests + +Datasette now redirects `?sql=` parameters from database pages to the query view: + +Old code: +```python +response = await ds.client.get("/_memory.atom?sql=select+1") +``` + +New code: +```python +response = await ds.client.get("/_memory/-/query.atom?sql=select+1") +``` From c76c3e6e6fcf541a10547b8647dbe288db269323 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 11:51:53 -0800 Subject: [PATCH 026/299] facet_suggest_time_limit_ms 200ms in tests, closes #2574 --- tests/conftest.py | 1 + tests/test_api.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 4a8ef51d..ad7243c1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -62,6 +62,7 @@ async def ds_client(): "default_page_size": 50, "max_returned_rows": 100, "sql_time_limit_ms": 200, + "facet_suggest_time_limit_ms": 200, # Up from 50 default # Default is 3 but this results in "too many open files" # errors when running the full test suite: "num_sql_threads": 1, diff --git a/tests/test_api.py b/tests/test_api.py index 2ac647c7..859c5809 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -875,7 +875,7 @@ async def test_settings_json(ds_client): "default_page_size": 50, "default_facet_size": 30, "default_allow_sql": True, - "facet_suggest_time_limit_ms": 50, + "facet_suggest_time_limit_ms": 200, "facet_time_limit_ms": 200, "max_returned_rows": 100, "max_insert_rows": 100, From 18fd373a8f95a30aa41bc059d18f21d9703bedd7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:17:51 -0800 Subject: [PATCH 027/299] New PermissionSQL.restriction_sql mechanism for actor restrictions Implement INTERSECT-based actor restrictions to prevent permission bypass Actor restrictions are now implemented as SQL filters using INTERSECT rather than as deny/allow permission rules. This ensures restrictions act as hard limits that cannot be overridden by other permission plugins or config blocks. Previously, actor restrictions (_r in actor dict) were implemented by generating permission rules with deny/allow logic. This approach had a critical flaw: database-level config allow blocks could bypass table-level restrictions, granting access to tables not in the actor's allowlist. The new approach separates concerns: - Permission rules determine what's allowed based on config and plugins - Restriction filters limit the result set to only allowlisted resources - Restrictions use INTERSECT to ensure all restriction criteria are met - Database-level restrictions (parent, NULL) properly match all child tables Implementation details: - Added restriction_sql field to PermissionSQL dataclass - Made PermissionSQL.sql optional to support restriction-only plugins - Updated actor_restrictions_sql() to return restriction filters instead of rules - Modified SQL builders to apply restrictions via INTERSECT and EXISTS clauses Closes #2572 --- datasette/default_permissions.py | 120 ++++------- datasette/permissions.py | 9 +- datasette/utils/actions_sql.py | 129 +++++++++--- datasette/utils/permissions.py | 153 +++++++++++++- datasette/views/special.py | 2 +- tests/test_actor_restriction_bug.py | 133 ++++++++++++ tests/test_plugins.py | 10 + tests/test_restriction_sql.py | 315 ++++++++++++++++++++++++++++ 8 files changed, 759 insertions(+), 112 deletions(-) create mode 100644 tests/test_actor_restriction_bug.py create mode 100644 tests/test_restriction_sql.py diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 9afb088e..5642cdfe 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -19,7 +19,7 @@ async def actor_restrictions_sql(datasette, actor, action): return None restrictions = actor.get("_r") if isinstance(actor, dict) else None - if not restrictions: + if restrictions is None: return [] # Check if this action appears in restrictions (with abbreviations) @@ -28,91 +28,63 @@ async def actor_restrictions_sql(datasette, actor, action): if action_obj and action_obj.abbr: action_checks.add(action_obj.abbr) - # Check if this action is in the allowlist anywhere in restrictions - is_in_allowlist = False + # Check if globally allowed in restrictions global_actions = restrictions.get("a", []) - if action_checks.intersection(global_actions): - is_in_allowlist = True + is_globally_allowed = action_checks.intersection(global_actions) - if not is_in_allowlist: - for db_actions in restrictions.get("d", {}).values(): - if action_checks.intersection(db_actions): - is_in_allowlist = True - break + if is_globally_allowed: + # Globally allowed - no restriction filtering needed + return [] - if not is_in_allowlist: - for tables in restrictions.get("r", {}).values(): - for table_actions in tables.values(): - if action_checks.intersection(table_actions): - is_in_allowlist = True - break - if is_in_allowlist: - break - - # If action not in allowlist at all, add global deny and return - if not is_in_allowlist: - sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, :actor_deny_reason AS reason" - return [ - PermissionSQL( - sql=sql, - params={ - "actor_deny_reason": f"actor restrictions: {action} not in allowlist" - }, - ) - ] - - # Action IS in allowlist - build deny + specific allows - selects = [] - params = {} + # Not globally allowed - build restriction_sql that lists allowlisted resources + restriction_selects = [] + restriction_params = {} param_counter = 0 - def add_row(parent, child, allow, reason): - """Helper to add a parameterized SELECT statement.""" - nonlocal param_counter - prefix = f"restr_{param_counter}" - param_counter += 1 - - selects.append( - f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child, " - f":{prefix}_allow AS allow, :{prefix}_reason AS reason" - ) - params[f"{prefix}_parent"] = parent - params[f"{prefix}_child"] = child - params[f"{prefix}_allow"] = 1 if allow else 0 - params[f"{prefix}_reason"] = reason - - # If NOT globally allowed, add global deny as gatekeeper - is_globally_allowed = action_checks.intersection(global_actions) - if not is_globally_allowed: - add_row(None, None, 0, f"actor restrictions: {action} denied by default") - else: - # Globally allowed - add global allow - add_row(None, None, 1, f"actor restrictions: global {action}") - - # Add database-level allows + # Add database-level allowlisted resources db_restrictions = restrictions.get("d", {}) for db_name, db_actions in db_restrictions.items(): if action_checks.intersection(db_actions): - add_row(db_name, None, 1, f"actor restrictions: database {db_name}") + prefix = f"restr_{param_counter}" + param_counter += 1 + restriction_selects.append( + f"SELECT :{prefix}_parent AS parent, NULL AS child" + ) + restriction_params[f"{prefix}_parent"] = db_name - # Add resource/table-level allows + # Add table-level allowlisted resources resource_restrictions = restrictions.get("r", {}) for db_name, tables in resource_restrictions.items(): for table_name, table_actions in tables.items(): if action_checks.intersection(table_actions): - add_row( - db_name, - table_name, - 1, - f"actor restrictions: {db_name}/{table_name}", + prefix = f"restr_{param_counter}" + param_counter += 1 + restriction_selects.append( + f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child" ) + restriction_params[f"{prefix}_parent"] = db_name + restriction_params[f"{prefix}_child"] = table_name - if not selects: - return [] + if not restriction_selects: + # Action not in allowlist - return empty restriction (INTERSECT will return no results) + return [ + PermissionSQL( + params={"deny": f"actor restrictions: {action} not in allowlist"}, + restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", # Empty set + ) + ] - sql = "\nUNION ALL\n".join(selects) + # Build restriction SQL that returns allowed (parent, child) pairs + restriction_sql = "\nUNION ALL\n".join(restriction_selects) - return [PermissionSQL(sql=sql, params=params)] + # Return restriction-only PermissionSQL (sql=None means no permission rules) + # The restriction_sql does the actual filtering via INTERSECT + return [ + PermissionSQL( + params=restriction_params, + restriction_sql=restriction_sql, + ) + ] @hookimpl(specname="permission_resources_sql") @@ -375,13 +347,11 @@ async def default_allow_sql_check(datasette, actor, action): @hookimpl(specname="permission_resources_sql") async def default_action_permissions_sql(datasette, actor, action): - """Apply default allow rules for standard view/execute actions.""" - # Only apply defaults if actor has no restrictions - # If actor has restrictions, they've already added their own deny/allow rules - has_restrictions = actor and "_r" in actor - if has_restrictions: - return None + """Apply default allow rules for standard view/execute actions. + With the INTERSECT-based restriction approach, these defaults are always generated + and then filtered by restriction_sql if the actor has restrictions. + """ default_allow_actions = { "view-instance", "view-database", diff --git a/datasette/permissions.py b/datasette/permissions.py index 7b1fc90c..c91385a0 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -138,13 +138,20 @@ class PermissionSQL: child TEXT NULL, allow INTEGER, -- 1 allow, 0 deny reason TEXT + + For restriction-only plugins, sql can be None and only restriction_sql is provided. """ - sql: str # SQL that SELECTs the 4 columns above + sql: str | None = ( + None # SQL that SELECTs the 4 columns above (can be None for restriction-only) + ) params: dict[str, Any] | None = ( None # bound params for the SQL (values only; no ':' prefix) ) source: str | None = None # System will set this to the plugin name + restriction_sql: str | None = ( + None # Optional SQL that returns (parent, child) for restriction filtering + ) @classmethod def allow(cls, reason: str, _allow: bool = True) -> "PermissionSQL": diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index 13594a2d..7121e2d0 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -157,8 +157,19 @@ async def _build_single_action_sql( all_params = {} rule_sqls = [] + restriction_sqls = [] for permission_sql in permission_sqls: + # Always collect params (even from restriction-only plugins) + all_params.update(permission_sql.params or {}) + + # Collect restriction SQL filters + if permission_sql.restriction_sql: + restriction_sqls.append(permission_sql.restriction_sql) + + # Skip plugins that only provide restriction_sql (no permission rules) + if permission_sql.sql is None: + continue rule_sqls.append( f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( @@ -166,7 +177,6 @@ async def _build_single_action_sql( ) """.strip() ) - all_params.update(permission_sql.params or {}) # If no rules, return empty result (deny all) if not rule_sqls: @@ -200,6 +210,9 @@ async def _build_single_action_sql( anon_params = {} for permission_sql in anon_permission_sqls: + # Skip plugins that only provide restriction_sql (no permission rules) + if permission_sql.sql is None: + continue rewritten_sql = permission_sql.sql for key, value in (permission_sql.params or {}).items(): anon_key = f"anon_{key}" @@ -360,6 +373,17 @@ async def _build_single_action_sql( query_parts.append(")") + # Add restriction list CTE if there are restrictions + if restriction_sqls: + # Wrap each restriction_sql in a subquery to avoid operator precedence issues + # with UNION ALL inside the restriction SQL statements + restriction_intersect = "\nINTERSECT\n".join( + f"SELECT * FROM ({sql})" for sql in restriction_sqls + ) + query_parts.extend( + [",", "restriction_list AS (", f" {restriction_intersect}", ")"] + ) + # Final SELECT select_cols = "parent, child, reason" if include_is_private: @@ -369,6 +393,17 @@ async def _build_single_action_sql( query_parts.append("FROM decisions") query_parts.append("WHERE is_allowed = 1") + # Add restriction filter if there are restrictions + if restriction_sqls: + query_parts.append( + """ + AND EXISTS ( + SELECT 1 FROM restriction_list r + WHERE (r.parent = decisions.parent OR r.parent IS NULL) + AND (r.child = decisions.child OR r.child IS NULL) + )""" + ) + # Add parent filter if specified if parent is not None: query_parts.append(" AND parent = :filter_parent") @@ -405,11 +440,24 @@ async def build_permission_rules_sql( return ( "SELECT NULL AS parent, NULL AS child, 0 AS allow, NULL AS reason, NULL AS source_plugin WHERE 0", {}, + [], ) union_parts = [] all_params = {} + restriction_sqls = [] + for permission_sql in permission_sqls: + all_params.update(permission_sql.params or {}) + + # Collect restriction SQL filters + if permission_sql.restriction_sql: + restriction_sqls.append(permission_sql.restriction_sql) + + # Skip plugins that only provide restriction_sql (no permission rules) + if permission_sql.sql is None: + continue + union_parts.append( f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( @@ -417,10 +465,9 @@ async def build_permission_rules_sql( ) """.strip() ) - all_params.update(permission_sql.params or {}) rules_union = " UNION ALL ".join(union_parts) - return rules_union, all_params + return rules_union, all_params, restriction_sqls async def check_permission_for_resource( @@ -447,7 +494,9 @@ async def check_permission_for_resource( This builds the cascading permission query and checks if the specific resource is in the allowed set. """ - rules_union, all_params = await build_permission_rules_sql(datasette, actor, action) + rules_union, all_params, restriction_sqls = await build_permission_rules_sql( + datasette, actor, action + ) # If no rules (empty SQL), default deny if not rules_union: @@ -457,43 +506,57 @@ async def check_permission_for_resource( all_params["_check_parent"] = parent all_params["_check_child"] = child + # If there are restriction filters, check if the resource passes them first + if restriction_sqls: + # Check if resource is in restriction allowlist + # Database-level restrictions (parent, NULL) should match all children (parent, *) + # Wrap each restriction_sql in a subquery to avoid operator precedence issues + restriction_check = "\nINTERSECT\n".join( + f"SELECT * FROM ({sql})" for sql in restriction_sqls + ) + restriction_query = f""" +WITH restriction_list AS ( + {restriction_check} +) +SELECT EXISTS ( + SELECT 1 FROM restriction_list + WHERE (parent = :_check_parent OR parent IS NULL) + AND (child = :_check_child OR child IS NULL) +) AS in_allowlist +""" + result = await datasette.get_internal_database().execute( + restriction_query, all_params + ) + if result.rows and not result.rows[0][0]: + # Resource not in restriction allowlist - deny + return False + query = f""" WITH all_rules AS ( {rules_union} ), -child_lvl AS ( - SELECT - MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny, - MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow +matched_rules AS ( + SELECT ar.*, + CASE + WHEN ar.child IS NOT NULL THEN 2 -- child-level (most specific) + WHEN ar.parent IS NOT NULL THEN 1 -- parent-level + ELSE 0 -- root/global + END AS depth FROM all_rules ar - WHERE ar.parent = :_check_parent AND ar.child = :_check_child + WHERE (ar.parent IS NULL OR ar.parent = :_check_parent) + AND (ar.child IS NULL OR ar.child = :_check_child) ), -parent_lvl AS ( - SELECT - MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny, - MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow - FROM all_rules ar - WHERE ar.parent = :_check_parent AND ar.child IS NULL -), -global_lvl AS ( - SELECT - MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny, - MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow - FROM all_rules ar - WHERE ar.parent IS NULL AND ar.child IS NULL +winner AS ( + SELECT * + FROM matched_rules + ORDER BY + depth DESC, -- specificity first (higher depth wins) + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow + source_plugin -- stable tie-break + LIMIT 1 ) -SELECT - CASE - WHEN cl.any_deny = 1 THEN 0 - WHEN cl.any_allow = 1 THEN 1 - WHEN pl.any_deny = 1 THEN 0 - WHEN pl.any_allow = 1 THEN 1 - WHEN gl.any_deny = 1 THEN 0 - WHEN gl.any_allow = 1 THEN 1 - ELSE 0 - END AS is_allowed -FROM child_lvl cl, parent_lvl pl, global_lvl gl +SELECT COALESCE((SELECT allow FROM winner), 0) AS is_allowed """ # Execute the query against the internal database diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py index 75307abf..58be53a3 100644 --- a/datasette/utils/permissions.py +++ b/datasette/utils/permissions.py @@ -99,6 +99,10 @@ def build_rules_union( # No namespacing - just use plugin params as-is params.update(p.params or {}) + # Skip plugins that only provide restriction_sql (no permission rules) + if p.sql is None: + continue + parts.append( f""" SELECT parent, child, allow, reason, '{p.source}' AS source_plugin FROM ( @@ -155,6 +159,8 @@ async def resolve_permissions_from_catalog( - resource (rendered "/parent/child" or "/parent" or "/") """ resolved_plugins: List[PermissionSQL] = [] + restriction_sqls: List[str] = [] + for plugin in plugins: if callable(plugin) and not isinstance(plugin, PermissionSQL): resolved = plugin(action) # type: ignore[arg-type] @@ -164,6 +170,10 @@ async def resolve_permissions_from_catalog( raise TypeError("Plugin providers must return PermissionSQL instances") resolved_plugins.append(resolved) + # Collect restriction SQL filters + if resolved.restriction_sql: + restriction_sqls.append(resolved.restriction_sql) + union_sql, rule_params = build_rules_union(actor, resolved_plugins) all_params = { **(candidate_params or {}), @@ -199,8 +209,8 @@ async def resolve_permissions_from_catalog( PARTITION BY parent, child ORDER BY depth DESC, -- specificity first - CASE WHEN allow=0 THEN 0 ELSE 1 END, -- deny over allow at same depth - source_plugin -- stable tie-break + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth + source_plugin -- stable tie-break ) AS rn FROM matched ), @@ -228,6 +238,145 @@ async def resolve_permissions_from_catalog( ORDER BY c.parent, c.child """ + # If there are restriction filters, wrap the query with INTERSECT + # This ensures only resources in the restriction allowlist are returned + if restriction_sqls: + # Start with the main query, but select only parent/child for the INTERSECT + main_query_for_intersect = f""" + WITH + cands AS ( + {candidate_sql} + ), + rules AS ( + {union_sql} + ), + matched AS ( + SELECT + c.parent, c.child, + r.allow, r.reason, r.source_plugin, + CASE + WHEN r.child IS NOT NULL THEN 2 -- child-level (most specific) + WHEN r.parent IS NOT NULL THEN 1 -- parent-level + ELSE 0 -- root/global + END AS depth + FROM cands c + JOIN rules r + ON (r.parent IS NULL OR r.parent = c.parent) + AND (r.child IS NULL OR r.child = c.child) + ), + ranked AS ( + SELECT *, + ROW_NUMBER() OVER ( + PARTITION BY parent, child + ORDER BY + depth DESC, -- specificity first + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth + source_plugin -- stable tie-break + ) AS rn + FROM matched + ), + winner AS ( + SELECT parent, child, + allow, reason, source_plugin, depth + FROM ranked WHERE rn = 1 + ), + permitted_resources AS ( + SELECT c.parent, c.child + FROM cands c + LEFT JOIN winner w + ON ((w.parent = c.parent) OR (w.parent IS NULL AND c.parent IS NULL)) + AND ((w.child = c.child ) OR (w.child IS NULL AND c.child IS NULL)) + WHERE COALESCE(w.allow, CASE WHEN :implicit_deny THEN 0 ELSE NULL END) = 1 + ) + SELECT parent, child FROM permitted_resources + """ + + # Build restriction list with INTERSECT (all must match) + # Then filter to resources that match hierarchically + # Wrap each restriction_sql in a subquery to avoid operator precedence issues + # with UNION ALL inside the restriction SQL statements + restriction_intersect = "\nINTERSECT\n".join( + f"SELECT * FROM ({sql})" for sql in restriction_sqls + ) + + # Combine: resources allowed by permissions AND in restriction allowlist + # Database-level restrictions (parent, NULL) should match all children (parent, *) + filtered_resources = f""" + WITH restriction_list AS ( + {restriction_intersect} + ), + permitted AS ( + {main_query_for_intersect} + ), + filtered AS ( + SELECT p.parent, p.child + FROM permitted p + WHERE EXISTS ( + SELECT 1 FROM restriction_list r + WHERE (r.parent = p.parent OR r.parent IS NULL) + AND (r.child = p.child OR r.child IS NULL) + ) + ) + """ + + # Now join back to get full results for only the filtered resources + sql = f""" + {filtered_resources} + , cands AS ( + {candidate_sql} + ), + rules AS ( + {union_sql} + ), + matched AS ( + SELECT + c.parent, c.child, + r.allow, r.reason, r.source_plugin, + CASE + WHEN r.child IS NOT NULL THEN 2 -- child-level (most specific) + WHEN r.parent IS NOT NULL THEN 1 -- parent-level + ELSE 0 -- root/global + END AS depth + FROM cands c + JOIN rules r + ON (r.parent IS NULL OR r.parent = c.parent) + AND (r.child IS NULL OR r.child = c.child) + ), + ranked AS ( + SELECT *, + ROW_NUMBER() OVER ( + PARTITION BY parent, child + ORDER BY + depth DESC, -- specificity first + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth + source_plugin -- stable tie-break + ) AS rn + FROM matched + ), + winner AS ( + SELECT parent, child, + allow, reason, source_plugin, depth + FROM ranked WHERE rn = 1 + ) + SELECT + c.parent, c.child, + COALESCE(w.allow, CASE WHEN :implicit_deny THEN 0 ELSE NULL END) AS allow, + COALESCE(w.reason, CASE WHEN :implicit_deny THEN 'implicit deny' ELSE NULL END) AS reason, + w.source_plugin, + COALESCE(w.depth, -1) AS depth, + :action AS action, + CASE + WHEN c.parent IS NULL THEN '/' + WHEN c.child IS NULL THEN '/' || c.parent + ELSE '/' || c.parent || '/' || c.child + END AS resource + FROM filtered c + LEFT JOIN winner w + ON ((w.parent = c.parent) OR (w.parent IS NULL AND c.parent IS NULL)) + AND ((w.child = c.child ) OR (w.child IS NULL AND c.child IS NULL)) + ORDER BY c.parent, c.child + """ + rows_iter: Iterable[sqlite3.Row] = await db.execute( sql, {**all_params, "implicit_deny": 1 if implicit_deny else 0}, diff --git a/datasette/views/special.py b/datasette/views/special.py index 5a341911..a1d736c5 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -403,7 +403,7 @@ class PermissionRulesView(BaseView): from datasette.utils.actions_sql import build_permission_rules_sql - union_sql, union_params = await build_permission_rules_sql( + union_sql, union_params, restriction_sqls = await build_permission_rules_sql( self.ds, actor, action ) await self.ds.refresh_schemas() diff --git a/tests/test_actor_restriction_bug.py b/tests/test_actor_restriction_bug.py new file mode 100644 index 00000000..0bfc9e1e --- /dev/null +++ b/tests/test_actor_restriction_bug.py @@ -0,0 +1,133 @@ +""" +Test for actor restrictions bug with database-level config. + +This test currently FAILS, demonstrating the bug where database-level +config allow blocks can bypass table-level restrictions. +""" + +import pytest +from datasette.app import Datasette +from datasette.resources import TableResource + + +@pytest.mark.asyncio +async def test_table_restrictions_not_bypassed_by_database_level_config(): + """ + Actor restrictions should act as hard limits that config cannot override. + + BUG: When an actor has table-level restrictions (e.g., only table2 and table3) + but config has a database-level allow block, the database-level config rule + currently allows ALL tables, not just those in the restriction allowlist. + + This test documents the expected behavior and will FAIL until the bug is fixed. + """ + # Config grants access at DATABASE level (not table level) + config = { + "databases": { + "test_db_rnbbdlc": { + "allow": { + "id": "user" + } # Database-level allow - grants access to all tables + } + } + } + + ds = Datasette(config=config) + await ds.invoke_startup() + db = ds.add_memory_database("test_db_rnbbdlc") + await db.execute_write("create table table1 (id integer primary key)") + await db.execute_write("create table table2 (id integer primary key)") + await db.execute_write("create table table3 (id integer primary key)") + await db.execute_write("create table table4 (id integer primary key)") + + # Actor restricted to ONLY table2 and table3 + # Even though config allows the whole database, restrictions should limit access + actor = { + "id": "user", + "_r": { + "r": { # Resource-level (table-level) restrictions + "test_db_rnbbdlc": { + "table2": ["vt"], # vt = view-table abbreviation + "table3": ["vt"], + } + } + }, + } + + # table2 should be allowed (in restriction allowlist AND config allows) + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table2"), + actor=actor, + ) + assert result is True, "table2 should be allowed - in restriction allowlist" + + # table3 should be allowed (in restriction allowlist AND config allows) + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table3"), + actor=actor, + ) + assert result is True, "table3 should be allowed - in restriction allowlist" + + # table1 should be DENIED (NOT in restriction allowlist) + # Even though database-level config allows it, restrictions should deny it + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table1"), + actor=actor, + ) + assert ( + result is False + ), "table1 should be DENIED - not in restriction allowlist, config cannot override" + + # table4 should be DENIED (NOT in restriction allowlist) + # Even though database-level config allows it, restrictions should deny it + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table4"), + actor=actor, + ) + assert ( + result is False + ), "table4 should be DENIED - not in restriction allowlist, config cannot override" + + +@pytest.mark.asyncio +async def test_database_restrictions_with_database_level_config(): + """ + Verify that database-level restrictions work correctly with database-level config. + + This should pass - it's testing the case where restriction granularity + matches config granularity. + """ + config = { + "databases": {"test_db_rwdl": {"allow": {"id": "user"}}} # Database-level allow + } + + ds = Datasette(config=config) + await ds.invoke_startup() + db = ds.add_memory_database("test_db_rwdl") + await db.execute_write("create table table1 (id integer primary key)") + await db.execute_write("create table table2 (id integer primary key)") + + # Actor has database-level restriction (all tables in test_db_rwdl) + actor = { + "id": "user", + "_r": {"d": {"test_db_rwdl": ["vt"]}}, # Database-level restrictions + } + + # Both tables should be allowed (database-level restriction matches database-level config) + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rwdl", "table1"), + actor=actor, + ) + assert result is True, "table1 should be allowed" + + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rwdl", "table2"), + actor=actor, + ) + assert result is True, "table2 should be allowed" diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 5a530b25..971b7e82 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1630,6 +1630,16 @@ async def test_hook_register_actions_with_custom_resources(): reason="user2 granted view-document-collection" ) + # Default allow for view-document-collection (like other view-* actions) + if action == "view-document-collection": + return PermissionSQL.allow( + reason="default allow for view-document-collection" + ) + + # Default allow for view-document (like other view-* actions) + if action == "view-document": + return PermissionSQL.allow(reason="default allow for view-document") + # Register the plugin temporarily plugin = TestPlugin() pm.register(plugin, name="test_custom_resources_plugin") diff --git a/tests/test_restriction_sql.py b/tests/test_restriction_sql.py new file mode 100644 index 00000000..7d6d8a5a --- /dev/null +++ b/tests/test_restriction_sql.py @@ -0,0 +1,315 @@ +import pytest +from datasette.app import Datasette +from datasette.permissions import PermissionSQL +from datasette.resources import TableResource + + +@pytest.mark.asyncio +async def test_multiple_restriction_sources_intersect(): + """ + Test that when multiple plugins return restriction_sql, they are INTERSECTed. + + This tests the case where both actor _r restrictions AND a plugin + provide restriction_sql - both must pass for access to be granted. + """ + from datasette import hookimpl + from datasette.plugins import pm + + class RestrictivePlugin: + __name__ = "RestrictivePlugin" + + @hookimpl + def permission_resources_sql(self, datasette, actor, action): + # Plugin adds additional restriction: only db1_multi_intersect allowed + if action == "view-table": + return PermissionSQL( + restriction_sql="SELECT 'db1_multi_intersect' AS parent, NULL AS child", + params={}, + ) + return None + + plugin = RestrictivePlugin() + pm.register(plugin, name="restrictive_plugin") + + try: + ds = Datasette() + await ds.invoke_startup() + db1 = ds.add_memory_database("db1_multi_intersect") + db2 = ds.add_memory_database("db2_multi_intersect") + await db1.execute_write("CREATE TABLE t1 (id INTEGER)") + await db2.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() # Populate catalog tables + + # Actor has restrictions allowing both databases + # But plugin only allows db1_multi_intersect + # INTERSECT means only db1_multi_intersect/t1 should pass + actor = { + "id": "user", + "_r": {"d": {"db1_multi_intersect": ["vt"], "db2_multi_intersect": ["vt"]}}, + } + + page = await ds.allowed_resources("view-table", actor) + resources = {(r.parent, r.child) for r in page.resources} + + # Should only see db1_multi_intersect/t1 (intersection of actor restrictions and plugin restrictions) + assert ("db1_multi_intersect", "t1") in resources + assert ("db2_multi_intersect", "t1") not in resources + finally: + pm.unregister(name="restrictive_plugin") + + +@pytest.mark.asyncio +async def test_restriction_sql_with_overlapping_databases_and_tables(): + """ + Test actor with both database-level and table-level restrictions for same database. + + When actor has: + - Database-level: db1_overlapping allowed (all tables) + - Table-level: db1_overlapping/t1 allowed + + Both entries are UNION'd (OR'ed) within the actor's restrictions. + Database-level restriction allows ALL tables, so table-level is redundant. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_overlapping") + await db.execute_write("CREATE TABLE t1 (id INTEGER)") + await db.execute_write("CREATE TABLE t2 (id INTEGER)") + await ds._refresh_schemas() + + # Actor has BOTH database-level (db1_overlapping all tables) AND table-level (db1_overlapping/t1 only) + actor = { + "id": "user", + "_r": { + "d": { + "db1_overlapping": ["vt"] + }, # Database-level: all tables in db1_overlapping + "r": { + "db1_overlapping": {"t1": ["vt"]} + }, # Table-level: only t1 in db1_overlapping + }, + } + + # Within actor restrictions, entries are UNION'd (OR'ed): + # - Database level allows: (db1_overlapping, NULL) → matches all tables via hierarchical matching + # - Table level allows: (db1_overlapping, t1) → redundant, already covered by database level + # Result: Both tables are allowed + page = await ds.allowed_resources("view-table", actor) + resources = {(r.parent, r.child) for r in page.resources} + + assert ("db1_overlapping", "t1") in resources + # Database-level restriction allows all tables, so t2 is also allowed + assert ("db1_overlapping", "t2") in resources + + +@pytest.mark.asyncio +async def test_restriction_sql_empty_allowlist_query(): + """ + Test the specific SQL query generated when action is not in allowlist. + + actor_restrictions_sql() returns "SELECT NULL AS parent, NULL AS child WHERE 0" + Verify this produces an empty result set. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_empty_allowlist") + await db.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() + + # Actor has restrictions but action not in allowlist + actor = {"id": "user", "_r": {"r": {"db1_empty_allowlist": {"t1": ["vt"]}}}} + + # Try to view-database (only view-table is in allowlist) + page = await ds.allowed_resources("view-database", actor) + + # Should be empty + assert len(page.resources) == 0 + + +@pytest.mark.asyncio +async def test_restriction_sql_with_pagination(): + """ + Test that restrictions work correctly with keyset pagination. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_pagination") + + # Create many tables + for i in range(10): + await db.execute_write(f"CREATE TABLE t{i:02d} (id INTEGER)") + await ds._refresh_schemas() + + # Actor restricted to only odd-numbered tables + restrictions = {"r": {"db1_pagination": {}}} + for i in range(10): + if i % 2 == 1: # Only odd tables + restrictions["r"]["db1_pagination"][f"t{i:02d}"] = ["vt"] + + actor = {"id": "user", "_r": restrictions} + + # Get first page with small limit + page1 = await ds.allowed_resources( + "view-table", actor, parent="db1_pagination", limit=2 + ) + assert len(page1.resources) == 2 + assert page1.next is not None + + # Get second page using next token + page2 = await ds.allowed_resources( + "view-table", actor, parent="db1_pagination", limit=2, next=page1.next + ) + assert len(page2.resources) == 2 + + # Should have no overlap + page1_ids = {r.child for r in page1.resources} + page2_ids = {r.child for r in page2.resources} + assert page1_ids.isdisjoint(page2_ids) + + # All should be odd-numbered tables + all_ids = page1_ids | page2_ids + for table_id in all_ids: + table_num = int(table_id[1:]) # Extract number from "t01", "t03", etc. + assert table_num % 2 == 1, f"Table {table_id} should be odd-numbered" + + +@pytest.mark.asyncio +async def test_also_requires_with_restrictions(): + """ + Test that also_requires actions properly respect restrictions. + + execute-sql requires view-database. With restrictions, both must pass. + """ + ds = Datasette() + await ds.invoke_startup() + db1 = ds.add_memory_database("db1_also_requires") + db2 = ds.add_memory_database("db2_also_requires") + await ds._refresh_schemas() + + # Actor restricted to only db1_also_requires for view-database + # execute-sql requires view-database, so should only work on db1_also_requires + actor = { + "id": "user", + "_r": { + "d": { + "db1_also_requires": ["vd", "es"], + "db2_also_requires": [ + "es" + ], # They have execute-sql but not view-database + } + }, + } + + # db1_also_requires should allow execute-sql + result = await ds.allowed( + action="execute-sql", + resource=TableResource("db1_also_requires", None), + actor=actor, + ) + assert result is True + + # db2_also_requires should not (they have execute-sql but not view-database) + result = await ds.allowed( + action="execute-sql", + resource=TableResource("db2_also_requires", None), + actor=actor, + ) + assert result is False + + +@pytest.mark.asyncio +async def test_restriction_abbreviations_and_full_names(): + """ + Test that both abbreviations and full action names work in restrictions. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_abbrev") + await db.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() + + # Test with abbreviation + actor_abbr = {"id": "user", "_r": {"r": {"db1_abbrev": {"t1": ["vt"]}}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("db1_abbrev", "t1"), + actor=actor_abbr, + ) + assert result is True + + # Test with full name + actor_full = {"id": "user", "_r": {"r": {"db1_abbrev": {"t1": ["view-table"]}}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("db1_abbrev", "t1"), + actor=actor_full, + ) + assert result is True + + # Test with mixed + actor_mixed = {"id": "user", "_r": {"d": {"db1_abbrev": ["view-database", "vt"]}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("db1_abbrev", "t1"), + actor=actor_mixed, + ) + assert result is True + + +@pytest.mark.asyncio +async def test_permission_resources_sql_multiple_restriction_sources_intersect(): + """ + Test that when multiple plugins return restriction_sql, they are INTERSECTed. + + This tests the case where both actor _r restrictions AND a plugin + provide restriction_sql - both must pass for access to be granted. + """ + from datasette import hookimpl + from datasette.plugins import pm + + class RestrictivePlugin: + __name__ = "RestrictivePlugin" + + @hookimpl + def permission_resources_sql(self, datasette, actor, action): + # Plugin adds additional restriction: only db1_multi_restrictions allowed + if action == "view-table": + return PermissionSQL( + restriction_sql="SELECT 'db1_multi_restrictions' AS parent, NULL AS child", + params={}, + ) + return None + + plugin = RestrictivePlugin() + pm.register(plugin, name="restrictive_plugin") + + try: + ds = Datasette() + await ds.invoke_startup() + db1 = ds.add_memory_database("db1_multi_restrictions") + db2 = ds.add_memory_database("db2_multi_restrictions") + await db1.execute_write("CREATE TABLE t1 (id INTEGER)") + await db2.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() # Populate catalog tables + + # Actor has restrictions allowing both databases + # But plugin only allows db1 + # INTERSECT means only db1/t1 should pass + actor = { + "id": "user", + "_r": { + "d": { + "db1_multi_restrictions": ["vt"], + "db2_multi_restrictions": ["vt"], + } + }, + } + + page = await ds.allowed_resources("view-table", actor) + resources = {(r.parent, r.child) for r in page.resources} + + # Should only see db1/t1 (intersection of actor restrictions and plugin restrictions) + assert ("db1_multi_restrictions", "t1") in resources + assert ("db2_multi_restrictions", "t1") not in resources + finally: + pm.unregister(name="restrictive_plugin") From b212895b9763068599e535fd7cd77fd5b8e2b14c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:26:20 -0800 Subject: [PATCH 028/299] Updated release notes for 1.0a20 Refs #2550 --- docs/changelog.rst | 22 ++++++++++++---------- docs/upgrade-1.0a20.md | 2 ++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index f98ad8ac..cc5e75af 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -6,27 +6,29 @@ Changelog .. _v1_0_a20: -UNRELEASED 1.0a20 (2025-??-??) ------------------------------- +1.0a20 (2025-11-03) +------------------- -This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning Datasette's permission system. +This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning how Datasette's permission system works. Permission system redesign ~~~~~~~~~~~~~~~~~~~~~~~~~~ Previously the permission system worked using ``datasette.permission_allowed()`` checks which consulted all available plugins in turn to determine whether a given actor was allowed to perform a given action on a given resource. -This approach could become prohibitively expensive for large lists of items - for example to determine the list of tables that a user could view in a large Datasette instance, where the plugin hooks would be called N times for N tables. +This approach could become prohibitively expensive for large lists of items - for example to determine the list of tables that a user could view in a large Datasette instance each plugin implementation of that hook would be fired for every table. -The new system instead uses SQL queries against Datasette's internal :ref:`catalog tables ` to derive the list of resources for which an actor has permission for a given action. +The new design uses SQL queries against Datasette's internal :ref:`catalog tables ` to derive the list of resources for which an actor has permission for a given action. This turns an N x M problem (N resources, M plugins) into a single SQL query. -Plugins can use the new :ref:`plugin_hook_permission_resources_sql` hook to return SQL fragments which will influence the construction of that query. +Plugins can use the new :ref:`plugin_hook_permission_resources_sql` hook to return SQL fragments which will be used as part of that query. -Affected plugins should make the following changes: +Plugins that use any of the following features will need to be updated to work with this and following alphas (and Datasette 1.0 stable itself): -- Replace calls to ``datasette.permission_allowed()`` with calls to the new :ref:`datasette.allowed() ` method. The new method takes a ``resource=`` parameter which should be an instance of a ``Resource`` subclass, as described in the method documentation. -- The ``permission_allowed()`` plugin hook has been removed in favor of the new :ref:`permission_resources_sql() ` hook. -- The ``register_permissions()`` plugin hook has been removed in favor of :ref:`register_actions() `. +- Checking permissions with ``datasette.permission_allowed()`` - this method has been replaced with :ref:`datasette.allowed() `. +- Implementing the ``permission_allowed()`` plugin hook - this hook has been removed in favor of :ref:`permission_resources_sql() `. +- Using ``register_permissions()`` to register permissions - this hook has been removed in favor of :ref:`register_actions() `. + +Consult the :ref:`v1.0a20 upgrade guide ` for further details on how to upgrade affected plugins. Plugins can now make use of two new internal methods to help resolve permission checks: diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index 6abcd23d..2aa782e0 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -2,6 +2,8 @@ orphan: true --- +(upgrade_guide_v1_a20)= + # Datasette 1.0a20 plugin upgrade guide From b3b8c5831be832f32e648dad3080317847778cdc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:34:29 -0800 Subject: [PATCH 029/299] Fixed some broken reference links on upgrade guide --- docs/upgrade-1.0a20.md | 4 ++-- docs/upgrade_guide.md | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index 2aa782e0..fbdcac2b 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -2,10 +2,10 @@ orphan: true --- -(upgrade_guide_v1_a20)= - # Datasette 1.0a20 plugin upgrade guide +(upgrade_guide_v1_a20)= + Datasette 1.0a20 makes some breaking changes to Datasette's permission system. Plugins need to be updated if they use **any of the following**: diff --git a/docs/upgrade_guide.md b/docs/upgrade_guide.md index 105d7281..a3c321a4 100644 --- a/docs/upgrade_guide.md +++ b/docs/upgrade_guide.md @@ -4,7 +4,7 @@ (upgrade_guide_v1)= ## Datasette 0.X -> 1.0 -This section reviews breaking changes Datasette ``1.0`` has when upgrading from a ``0.XX`` version. For new features that ``1.0`` offers, see the :ref:`changelog`. +This section reviews breaking changes Datasette ``1.0`` has when upgrading from a ``0.XX`` version. For new features that ``1.0`` offers, see the {ref}`changelog`. (upgrade_guide_v1_sql_queries)= ### New URL for SQL queries @@ -37,7 +37,7 @@ Metadata was completely revamped for Datasette 1.0. There are a number of relate (upgrade_guide_v1_metadata_split)= #### ``metadata.yaml`` split into ``datasette.yaml`` -Before Datasette 1.0, the ``metadata.yaml`` file became a kitchen sink if a mix of metadata, configuration, and settings. Now ``metadata.yaml`` is strictly for metadata (ex title and descriptions of database and tables, licensing info, etc). Other settings have been moved to a ``datasette.yml`` configuration file, described in :ref:`configuration`. +Before Datasette 1.0, the ``metadata.yaml`` file became a kitchen sink if a mix of metadata, configuration, and settings. Now ``metadata.yaml`` is strictly for metadata (ex title and descriptions of database and tables, licensing info, etc). Other settings have been moved to a ``datasette.yml`` configuration file, described in {ref}`configuration`. To start Datasette with both metadata and configuration files, run it like this: @@ -85,14 +85,14 @@ def get_metadata(datasette, key, database, table): pass ``` -Instead, plugins are encouraged to interact directly with Datasette's in-memory metadata tables in SQLite using the following methods on the :ref:`internals_datasette`: +Instead, plugins are encouraged to interact directly with Datasette's in-memory metadata tables in SQLite using the following methods on the {ref}`internals_datasette`: -- :ref:`get_instance_metadata() ` and :ref:`set_instance_metadata() ` -- :ref:`get_database_metadata() ` and :ref:`set_database_metadata() ` -- :ref:`get_resource_metadata() ` and :ref:`set_resource_metadata() ` -- :ref:`get_column_metadata() ` and :ref:`set_column_metadata() ` +- {ref}`get_instance_metadata() ` and {ref}`set_instance_metadata() ` +- {ref}`get_database_metadata() ` and {ref}`set_database_metadata() ` +- {ref}`get_resource_metadata() ` and {ref}`set_resource_metadata() ` +- {ref}`get_column_metadata() ` and {ref}`set_column_metadata() ` -A plugin that stores or calculates its own metadata can implement the :ref:`plugin_hook_startup` hook to populate those items on startup, and then call those methods while it is running to persist any new metadata changes. +A plugin that stores or calculates its own metadata can implement the {ref}`plugin_hook_startup` hook to populate those items on startup, and then call those methods while it is running to persist any new metadata changes. (upgrade_guide_v1_metadata_json_removed)= #### The ``/metadata.json`` endpoint has been removed @@ -106,10 +106,10 @@ As of Datasette ``1.0a14``, the ``.metadata()`` method on the Datasette Python A Instead, one should use the following methods on a Datasette class: -- :ref:`get_instance_metadata() ` -- :ref:`get_database_metadata() ` -- :ref:`get_resource_metadata() ` -- :ref:`get_column_metadata() ` +- {ref}`get_instance_metadata() ` +- {ref}`get_database_metadata() ` +- {ref}`get_resource_metadata() ` +- {ref}`get_column_metadata() ` ```{include} upgrade-1.0a20.md :heading-offset: 1 From 5d4dfcec6b91a47ab0f2053aa3bc30c68036f879 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:38:57 -0800 Subject: [PATCH 030/299] Fix for link from changelog not working Annoyingly we now get a warning in the docs build about a duplicate label, but it seems harmless enough. --- docs/upgrade-1.0a20.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index fbdcac2b..749d383c 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -2,11 +2,8 @@ orphan: true --- -# Datasette 1.0a20 plugin upgrade guide - (upgrade_guide_v1_a20)= - - +# Datasette 1.0a20 plugin upgrade guide Datasette 1.0a20 makes some breaking changes to Datasette's permission system. Plugins need to be updated if they use **any of the following**: From dc3f9fe9e4cbcda7e7dd88195f65cf0dad21ce5c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:42:59 -0800 Subject: [PATCH 031/299] Python 3.10, not 3.8 --- docs/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index 4771aa11..6be0247c 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -20,7 +20,7 @@ General guidelines Setting up a development environment ------------------------------------ -If you have Python 3.8 or higher installed on your computer (on OS X the quickest way to do this `is using homebrew `__) you can install an editable copy of Datasette using the following steps. +If you have Python 3.10 or higher installed on your computer (on OS X the quickest way to do this `is using homebrew `__) you can install an editable copy of Datasette using the following steps. If you want to use GitHub to publish your changes, first `create a fork of datasette `__ under your own GitHub account. From 95a1fef28000d0b44ceaa398421530f588c5c385 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:47:24 -0800 Subject: [PATCH 032/299] Release 1.0a20 Refs #2488, #2495, #2503, #2505, #2509, #2510, #2513, #2515, #2517, #2519, #2520, #2521, #2524, #2525, #2526, #2528, #2530, #2531, #2534, #2537, #2543, #2544, #2550, #2551, #2555, #2558, #2561, #2562, #2564, #2565, #2567, #2569, #2570, #2571, #2574 --- datasette/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index c1318c6f..20cb46c7 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a19" +__version__ = "1.0a20" __version_info__ = tuple(__version__.split(".")) From 295e4a2e87464fc1838d6278308bf151bc14ba73 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 15:05:17 -0800 Subject: [PATCH 033/299] Pin to httpx<1.0 Refs https://github.com/encode/httpx/issues/3635 Closes #2576 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fb9f0453..1395ce82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dependencies = [ "click-default-group>=1.2.3", "Jinja2>=2.10.3", "hupper>=1.9", - "httpx>=0.20", + "httpx>=0.20,<1.0", "pluggy>=1.0", "uvicorn>=0.11", "aiofiles>=0.4", From f257ca6edb64848c3b04b54d41e347c54fe57c05 Mon Sep 17 00:00:00 2001 From: James Jefferies Date: Wed, 5 Nov 2025 01:04:12 +0000 Subject: [PATCH 034/299] Fix for open redirect - identified in Issue 2429 (#2500) * Issue 2429 indicates the possiblity of an open redirect The 404 processing ends up redirecting a request with multiple path slashes to that site, i.e. https://my-site//shedcode.co.uk will redirect to https://shedcode.co.uk This commit uses a regular expression to remove the multiple leading slashes before redirecting. --- datasette/app.py | 5 +++++ tests/test_custom_pages.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/datasette/app.py b/datasette/app.py index 09936b3a..be507241 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2150,6 +2150,11 @@ class DatasetteRouter: context = {} if path.endswith(b"/"): path = path.rstrip(b"/") + + # If you redirect with a // at the beginning, you end up with an open redirect, so + # https://my.site//foo/ - will redirect to https://foo + path = re.sub(rb"^/+", b"/", path) + if request.scope["query_string"]: path += b"?" + request.scope["query_string"] await asgi_send_redirect(send, path.decode("latin1")) diff --git a/tests/test_custom_pages.py b/tests/test_custom_pages.py index f2cfe394..ccc139ce 100644 --- a/tests/test_custom_pages.py +++ b/tests/test_custom_pages.py @@ -97,3 +97,9 @@ def test_custom_route_pattern_404(custom_pages_client): assert response.status == 404 assert "

Error 404

" in response.text assert ">Oh no Date: Tue, 4 Nov 2025 17:08:06 -0800 Subject: [PATCH 035/299] Move open redirect fix to asgi_send_redirect, refs #2429 See https://github.com/simonw/datasette/pull/2500#issuecomment-3488632278 --- datasette/app.py | 5 ----- datasette/utils/asgi.py | 4 ++++ tests/test_custom_pages.py | 5 +++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index be507241..09936b3a 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2150,11 +2150,6 @@ class DatasetteRouter: context = {} if path.endswith(b"/"): path = path.rstrip(b"/") - - # If you redirect with a // at the beginning, you end up with an open redirect, so - # https://my.site//foo/ - will redirect to https://foo - path = re.sub(rb"^/+", b"/", path) - if request.scope["query_string"]: path += b"?" + request.scope["query_string"] await asgi_send_redirect(send, path.decode("latin1")) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 40214cbe..7f3329a6 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -6,6 +6,7 @@ from pathlib import Path from http.cookies import SimpleCookie, Morsel import aiofiles import aiofiles.os +import re # Workaround for adding samesite support to pre 3.8 python Morsel._reserved["samesite"] = "SameSite" @@ -248,6 +249,9 @@ async def asgi_send_html(send, html, status=200, headers=None): async def asgi_send_redirect(send, location, status=302): + # Prevent open redirect vulnerability: strip multiple leading slashes + # //example.com would be interpreted as a protocol-relative URL (e.g., https://example.com/) + location = re.sub(r"^/+", "/", location) await asgi_send( send, "", diff --git a/tests/test_custom_pages.py b/tests/test_custom_pages.py index ccc139ce..39a4c06b 100644 --- a/tests/test_custom_pages.py +++ b/tests/test_custom_pages.py @@ -100,6 +100,7 @@ def test_custom_route_pattern_404(custom_pages_client): def test_custom_route_pattern_with_slash_slash_302(custom_pages_client): - response = custom_pages_client.get("//nastyOpenRedirect/") + # https://github.com/simonw/datasette/issues/2429 + response = custom_pages_client.get("//example.com/") assert response.status == 302 - assert response.headers["location"] == "/nastyOpenRedirect" + assert response.headers["location"] == "/example.com" From 9f74dc22a8587b6322c4aa2747894e408ab58a9c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Nov 2025 18:11:20 -0800 Subject: [PATCH 036/299] Run cog with --extra test Previously it kept on adding stuff to cli-reference.rst that came from other plugins installed for my global environment --- Justfile | 4 ++-- datasette/cli.py | 28 ++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Justfile b/Justfile index adb8cf0d..abb134a6 100644 --- a/Justfile +++ b/Justfile @@ -21,11 +21,11 @@ export DATASETTE_SECRET := "not_a_secret" @lint: codespell uv run black . --check uv run flake8 - uv run cog --check README.md docs/*.rst + uv run --extra test cog --check README.md docs/*.rst # Rebuild docs with cog @cog: - uv run cog -r README.md docs/*.rst + uv run --extra test cog -r README.md docs/*.rst # Serve live docs on localhost:8000 @docs: cog blacken-docs diff --git a/datasette/cli.py b/datasette/cli.py index 94af09a2..7c1c2d44 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -442,6 +442,11 @@ def uninstall(packages, yes): "--get", help="Run an HTTP GET request against this path, print results and exit", ) +@click.option( + "--headers", + is_flag=True, + help="Include HTTP headers in --get output (requires --get)", +) @click.option( "--token", help="API token to send with --get requests", @@ -510,6 +515,7 @@ def serve( secret, root, get, + headers, token, actor, version_note, @@ -658,19 +664,33 @@ def serve( # Run async soundness checks - but only if we're not under pytest run_sync(lambda: check_databases(ds)) + if headers and not get: + raise click.ClickException("--headers can only be used with --get") + if token and not get: raise click.ClickException("--token can only be used with --get") if get: client = TestClient(ds) - headers = {} + request_headers = {} if token: - headers["Authorization"] = "Bearer {}".format(token) + request_headers["Authorization"] = "Bearer {}".format(token) cookies = {} if actor: cookies["ds_actor"] = client.actor_cookie(json.loads(actor)) - response = client.get(get, headers=headers, cookies=cookies) - click.echo(response.text) + response = client.get(get, headers=request_headers, cookies=cookies) + + if headers: + # Output HTTP status code, headers, two newlines, then the response body + click.echo(f"HTTP/1.1 {response.status}") + for key, value in response.headers.items(): + click.echo(f"{key}: {value}") + if response.text: + click.echo() + click.echo(response.text) + else: + click.echo(response.text) + exit_code = 0 if response.status == 200 else 1 sys.exit(exit_code) return From ce464da34b8e18617fa10579f1b4bb32b56bdc20 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Nov 2025 18:12:15 -0800 Subject: [PATCH 037/299] datasette --get --headers option, closes #2578 --- datasette/cli.py | 2 +- docs/cli-reference.rst | 1 + tests/test_cli_serve_get.py | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/datasette/cli.py b/datasette/cli.py index 7c1c2d44..aaf1b244 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -445,7 +445,7 @@ def uninstall(packages, yes): @click.option( "--headers", is_flag=True, - help="Include HTTP headers in --get output (requires --get)", + help="Include HTTP headers in --get output", ) @click.option( "--token", diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 67e06254..0e224916 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -121,6 +121,7 @@ Once started you can access it at ``http://localhost:8001`` the root user --get TEXT Run an HTTP GET request against this path, print results and exit + --headers Include HTTP headers in --get output --token TEXT API token to send with --get requests --actor TEXT Actor to use for --get requests (JSON string) --version-note TEXT Additional note to show on /-/versions diff --git a/tests/test_cli_serve_get.py b/tests/test_cli_serve_get.py index 513669a1..5cba5081 100644 --- a/tests/test_cli_serve_get.py +++ b/tests/test_cli_serve_get.py @@ -52,6 +52,27 @@ def test_serve_with_get(tmp_path_factory): pm.unregister(to_unregister) +def test_serve_with_get_headers(): + runner = CliRunner() + result = runner.invoke( + cli, + [ + "serve", + "--memory", + "--get", + "/_memory/", + "--headers", + ], + ) + # exit_code is 1 because it wasn't a 200 response + assert result.exit_code == 1, result.output + assert result.output == ( + "HTTP/1.1 302\n" + "location: /_memory\n" + "content-type: text/html; charset=utf-8\n" + ) + + def test_serve_with_get_and_token(): runner = CliRunner() result1 = runner.invoke( From b4385a3ff7ccc96b53312428ba496770e68cc3f8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Nov 2025 18:39:25 -0800 Subject: [PATCH 038/299] Made test_serve_with_get_headers a bit more forgiving --- tests/test_cli_serve_get.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_cli_serve_get.py b/tests/test_cli_serve_get.py index 5cba5081..5ad01bfa 100644 --- a/tests/test_cli_serve_get.py +++ b/tests/test_cli_serve_get.py @@ -66,11 +66,10 @@ def test_serve_with_get_headers(): ) # exit_code is 1 because it wasn't a 200 response assert result.exit_code == 1, result.output - assert result.output == ( - "HTTP/1.1 302\n" - "location: /_memory\n" - "content-type: text/html; charset=utf-8\n" - ) + lines = result.output.splitlines() + assert lines and lines[0] == "HTTP/1.1 302" + assert "location: /_memory" in lines + assert "content-type: text/html; charset=utf-8" in lines def test_serve_with_get_and_token(): From 12016342e716a4e779bddac3f3a1fc43408527f4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Nov 2025 18:40:58 -0800 Subject: [PATCH 039/299] Fix test_metadata_yaml I broke in #2578 --- tests/test_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 1c8f51ef..3bb360fb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -146,6 +146,7 @@ def test_metadata_yaml(): actor=None, version_note=None, get=None, + headers=False, help_settings=False, pdb=False, crossdb=False, From f12f6cc2aba0ab554bc0a985bc772741d7f36d7d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 09:28:41 -0800 Subject: [PATCH 040/299] Get publish cloudrun working with latest Cloud Run (#2581) Refs: - #2511 Filter out bad services, refs: - https://github.com/simonw/datasette/pull/2581#issuecomment-3492243400 --- .github/workflows/deploy-latest.yml | 11 +-- .github/workflows/publish.yml | 11 +-- datasette/publish/cloudrun.py | 100 ++++++++++++++++++++++++++-- docs/cli-reference.rst | 9 ++- tests/test_publish_cloudrun.py | 36 ++++++++-- 5 files changed, 146 insertions(+), 21 deletions(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 6907b438..8ffdbfd5 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -102,12 +102,13 @@ jobs: # jq '.plugins |= . + {"datasette-ephemeral-tables": {"table_ttl": 900}}' \ # > metadata.json # cat metadata.json - - name: Set up Cloud Run - uses: google-github-actions/setup-gcloud@v0 + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 with: - version: '318.0.0' - service_account_email: ${{ secrets.GCP_SA_EMAIL }} - service_account_key: ${{ secrets.GCP_SA_KEY }} + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 - name: Deploy to Cloud Run env: LATEST_DATASETTE_SECRET: ${{ secrets.LATEST_DATASETTE_SECRET }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 14bfaded..e94d0bdd 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -73,12 +73,13 @@ jobs: DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build sphinx-to-sqlite ../docs.db _build cd .. - - name: Set up Cloud Run - uses: google-github-actions/setup-gcloud@v0 + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 with: - version: '318.0.0' - service_account_email: ${{ secrets.GCP_SA_EMAIL }} - service_account_key: ${{ secrets.GCP_SA_KEY }} + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 - name: Deploy stable-docs.datasette.io to Cloud Run run: |- gcloud config set run/region us-central1 diff --git a/datasette/publish/cloudrun.py b/datasette/publish/cloudrun.py index 760ff0d1..63d22fe8 100644 --- a/datasette/publish/cloudrun.py +++ b/datasette/publish/cloudrun.py @@ -3,7 +3,7 @@ import click import json import os import re -from subprocess import check_call, check_output +from subprocess import CalledProcessError, check_call, check_output from .common import ( add_common_publish_arguments_and_options, @@ -23,7 +23,9 @@ def publish_subcommand(publish): help="Application name to use when building", ) @click.option( - "--service", default="", help="Cloud Run service to deploy (or over-write)" + "--service", + default="", + help="Cloud Run service to deploy (or over-write)", ) @click.option("--spatialite", is_flag=True, help="Enable SpatialLite extension") @click.option( @@ -55,13 +57,32 @@ def publish_subcommand(publish): @click.option( "--max-instances", type=int, - help="Maximum Cloud Run instances", + default=1, + show_default=True, + help="Maximum Cloud Run instances (use 0 to remove the limit)", ) @click.option( "--min-instances", type=int, help="Minimum Cloud Run instances", ) + @click.option( + "--artifact-repository", + default="datasette", + show_default=True, + help="Artifact Registry repository to store the image", + ) + @click.option( + "--artifact-region", + default="us", + show_default=True, + help="Artifact Registry location (region or multi-region)", + ) + @click.option( + "--artifact-project", + default=None, + help="Project ID for Artifact Registry (defaults to the active project)", + ) def cloudrun( files, metadata, @@ -91,6 +112,9 @@ def publish_subcommand(publish): apt_get_extras, max_instances, min_instances, + artifact_repository, + artifact_region, + artifact_project, ): "Publish databases to Datasette running on Cloud Run" fail_if_publish_binary_not_installed( @@ -100,6 +124,21 @@ def publish_subcommand(publish): "gcloud config get-value project", shell=True, universal_newlines=True ).strip() + artifact_project = artifact_project or project + + # Ensure Artifact Registry exists for the target image + _ensure_artifact_registry( + artifact_project=artifact_project, + artifact_region=artifact_region, + artifact_repository=artifact_repository, + ) + + artifact_host = ( + artifact_region + if artifact_region.endswith("-docker.pkg.dev") + else f"{artifact_region}-docker.pkg.dev" + ) + if not service: # Show the user their current services, then prompt for one click.echo("Please provide a service name for this deployment\n") @@ -117,6 +156,11 @@ def publish_subcommand(publish): click.echo("") service = click.prompt("Service name", type=str) + image_id = ( + f"{artifact_host}/{artifact_project}/" + f"{artifact_repository}/datasette-{service}" + ) + extra_metadata = { "title": title, "license": license, @@ -173,7 +217,6 @@ def publish_subcommand(publish): print(fp.read()) print("\n====================\n") - image_id = f"gcr.io/{project}/datasette-{service}" check_call( "gcloud builds submit --tag {}{}".format( image_id, " --timeout {}".format(timeout) if timeout else "" @@ -187,7 +230,7 @@ def publish_subcommand(publish): ("--max-instances", max_instances), ("--min-instances", min_instances), ): - if value: + if value is not None: extra_deploy_options.append("{} {}".format(option, value)) check_call( "gcloud run deploy --allow-unauthenticated --platform=managed --image {} {}{}".format( @@ -199,6 +242,52 @@ def publish_subcommand(publish): ) +def _ensure_artifact_registry(artifact_project, artifact_region, artifact_repository): + """Ensure Artifact Registry API is enabled and the repository exists.""" + + enable_cmd = ( + "gcloud services enable artifactregistry.googleapis.com " + f"--project {artifact_project} --quiet" + ) + try: + check_call(enable_cmd, shell=True) + except CalledProcessError as exc: + raise click.ClickException( + "Failed to enable artifactregistry.googleapis.com. " + "Please ensure you have permissions to manage services." + ) from exc + + describe_cmd = ( + "gcloud artifacts repositories describe {repo} --project {project} " + "--location {location} --quiet" + ).format( + repo=artifact_repository, + project=artifact_project, + location=artifact_region, + ) + try: + check_call(describe_cmd, shell=True) + return + except CalledProcessError: + create_cmd = ( + "gcloud artifacts repositories create {repo} --repository-format=docker " + '--location {location} --project {project} --description "Datasette Cloud Run images" --quiet' + ).format( + repo=artifact_repository, + location=artifact_region, + project=artifact_project, + ) + try: + check_call(create_cmd, shell=True) + click.echo(f"Created Artifact Registry repository '{artifact_repository}'") + except CalledProcessError as exc: + raise click.ClickException( + "Failed to create Artifact Registry repository. " + "Use --artifact-repository/--artifact-region to point to an existing repo " + "or create one manually." + ) from exc + + def get_existing_services(): services = json.loads( check_output( @@ -214,6 +303,7 @@ def get_existing_services(): "url": service["status"]["address"]["url"], } for service in services + if "url" in service["status"] ] diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 0e224916..f002d05a 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -489,8 +489,15 @@ See :ref:`publish_cloud_run`. --cpu [1|2|4] Number of vCPUs to allocate in Cloud Run --timeout INTEGER Build timeout in seconds --apt-get-install TEXT Additional packages to apt-get install - --max-instances INTEGER Maximum Cloud Run instances + --max-instances INTEGER Maximum Cloud Run instances (use 0 to remove + the limit) [default: 1] --min-instances INTEGER Minimum Cloud Run instances + --artifact-repository TEXT Artifact Registry repository to store the + image [default: datasette] + --artifact-region TEXT Artifact Registry location (region or multi- + region) [default: us] + --artifact-project TEXT Project ID for Artifact Registry (defaults to + the active project) --help Show this message and exit. diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index 818fa2d3..f53e5059 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -57,12 +57,20 @@ def test_publish_cloudrun_prompts_for_service( "Service name: input-service" ) == result.output.strip() assert 0 == result.exit_code - tag = "gcr.io/myproject/datasette-input-service" + tag = "us-docker.pkg.dev/myproject/datasette/datasette-input-service" mock_call.assert_has_calls( [ + mock.call( + "gcloud services enable artifactregistry.googleapis.com --project myproject --quiet", + shell=True, + ), + mock.call( + "gcloud artifacts repositories describe datasette --project myproject --location us --quiet", + shell=True, + ), mock.call(f"gcloud builds submit --tag {tag}", shell=True), mock.call( - "gcloud run deploy --allow-unauthenticated --platform=managed --image {} input-service".format( + "gcloud run deploy --allow-unauthenticated --platform=managed --image {} input-service --max-instances 1".format( tag ), shell=True, @@ -86,12 +94,20 @@ def test_publish_cloudrun(mock_call, mock_output, mock_which, tmp_path_factory): cli.cli, ["publish", "cloudrun", "test.db", "--service", "test"] ) assert 0 == result.exit_code - tag = f"gcr.io/{mock_output.return_value}/datasette-test" + tag = f"us-docker.pkg.dev/{mock_output.return_value}/datasette/datasette-test" mock_call.assert_has_calls( [ + mock.call( + f"gcloud services enable artifactregistry.googleapis.com --project {mock_output.return_value} --quiet", + shell=True, + ), + mock.call( + f"gcloud artifacts repositories describe datasette --project {mock_output.return_value} --location us --quiet", + shell=True, + ), mock.call(f"gcloud builds submit --tag {tag}", shell=True), mock.call( - "gcloud run deploy --allow-unauthenticated --platform=managed --image {} test".format( + "gcloud run deploy --allow-unauthenticated --platform=managed --image {} test --max-instances 1".format( tag ), shell=True, @@ -167,7 +183,7 @@ def test_publish_cloudrun_memory_cpu( assert 2 == result.exit_code return assert 0 == result.exit_code - tag = f"gcr.io/{mock_output.return_value}/datasette-test" + tag = f"us-docker.pkg.dev/{mock_output.return_value}/datasette/datasette-test" expected_call = ( "gcloud run deploy --allow-unauthenticated --platform=managed" " --image {} test".format(tag) @@ -179,8 +195,18 @@ def test_publish_cloudrun_memory_cpu( expected_call += " --cpu {}".format(cpu) if timeout: expected_build_call += f" --timeout {timeout}" + # max_instances defaults to 1 + expected_call += " --max-instances 1" mock_call.assert_has_calls( [ + mock.call( + f"gcloud services enable artifactregistry.googleapis.com --project {mock_output.return_value} --quiet", + shell=True, + ), + mock.call( + f"gcloud artifacts repositories describe datasette --project {mock_output.return_value} --location us --quiet", + shell=True, + ), mock.call(expected_build_call, shell=True), mock.call( expected_call, From 3c2254463be78199678093a8db0fc1e6ad0c4cde Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 10:25:37 -0800 Subject: [PATCH 041/299] Release notes for 0.65.2 Adding those to main. Refs #2579 --- docs/changelog.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index cc5e75af..dbcff8cb 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,16 @@ Changelog ========= +.. _v0_65_2: + +0.65.2 (2025-11-05) +------------------- + +* Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) +* Upgraded for compatibility with Python 3.14. +* Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) +* Minor upgrades to fix warnings, including ``pkg_resources`` deprecation. + .. _v1_0_a20: 1.0a20 (2025-11-03) From ec99bb46f8854ec1e17600fa0373461e66cdb26c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 10:51:46 -0800 Subject: [PATCH 042/299] stable-docs YAML workflow, refs #2582 --- .github/workflows/stable-docs.yml | 76 +++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 .github/workflows/stable-docs.yml diff --git a/.github/workflows/stable-docs.yml b/.github/workflows/stable-docs.yml new file mode 100644 index 00000000..3119d617 --- /dev/null +++ b/.github/workflows/stable-docs.yml @@ -0,0 +1,76 @@ +name: Update Stable Docs + +on: + release: + types: [published] + push: + branches: + - main + +permissions: + contents: write + +jobs: + update_stable_docs: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 0 # We need all commits to find docs/ changes + - name: Set up Git user + run: | + git config user.name "Automated" + git config user.email "actions@users.noreply.github.com" + - name: Create stable branch if it does not yet exist + run: | + if ! git ls-remote --heads origin stable | grep -qE '\bstable\b'; then + # Make sure we have all tags locally + git fetch --tags --quiet + + # Latest tag that is just numbers and dots (optionally prefixed with 'v') + # e.g., 0.65.2 or v0.65.2 — excludes 1.0a20, 1.0-rc1, etc. + LATEST_RELEASE=$( + git tag -l --sort=-v:refname \ + | grep -E '^v?[0-9]+(\.[0-9]+){1,3}$' \ + | head -n1 + ) + + git checkout -b stable + + # If there are any stable releases, copy docs/ from the most recent + if [ -n "$LATEST_RELEASE" ]; then + rm -rf docs/ + git checkout "$LATEST_RELEASE" -- docs/ || true + fi + + git commit -m "Populate docs/ from $LATEST_RELEASE" || echo "No changes" + git push -u origin stable + fi + - name: Handle Release + if: github.event_name == 'release' && !github.event.release.prerelease + run: | + git fetch --all + git checkout stable + git reset --hard ${GITHUB_REF#refs/tags/} + git push origin stable --force + - name: Handle Commit to Main + if: contains(github.event.head_commit.message, '!stable-docs') + run: | + git fetch origin + git checkout -b stable origin/stable + # Get the list of modified files in docs/ from the current commit + FILES=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} -- docs/) + # Check if the list of files is non-empty + if [[ -n "$FILES" ]]; then + # Checkout those files to the stable branch to over-write with their contents + for FILE in $FILES; do + git checkout ${{ github.sha }} -- $FILE + done + git add docs/ + git commit -m "Doc changes from ${{ github.sha }}" + git push origin stable + else + echo "No changes to docs/ in this commit." + exit 0 + fi From d814e81b32087c7c395faca9a9f9a7d3966ade76 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 13:38:01 -0800 Subject: [PATCH 043/299] datasette.client.get(..., skip_permission_checks=True) Closes #2580 --- datasette/app.py | 109 +++++++++++---- datasette/permissions.py | 27 ++++ datasette/utils/actions_sql.py | 21 +++ datasette/utils/permissions.py | 15 ++- docs/internals.rst | 30 +++++ tests/test_internals_datasette_client.py | 162 +++++++++++++++++++++++ 6 files changed, 335 insertions(+), 29 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 09936b3a..177debe2 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2363,6 +2363,12 @@ class NotFoundExplicit(NotFound): class DatasetteClient: + """Internal HTTP client for making requests to a Datasette instance. + + Used for testing and for internal operations that need to make HTTP requests + to the Datasette app without going through an actual HTTP server. + """ + def __init__(self, ds): self.ds = ds self.app = ds.app() @@ -2378,40 +2384,87 @@ class DatasetteClient: path = f"http://localhost{path}" return path - async def _request(self, method, path, **kwargs): - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await getattr(client, method)(self._fix(path), **kwargs) + async def _request(self, method, path, skip_permission_checks=False, **kwargs): + from datasette.permissions import SkipPermissions - async def get(self, path, **kwargs): - return await self._request("get", path, **kwargs) + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) - async def options(self, path, **kwargs): - return await self._request("options", path, **kwargs) + async def get(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "get", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def head(self, path, **kwargs): - return await self._request("head", path, **kwargs) + async def options(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "options", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def post(self, path, **kwargs): - return await self._request("post", path, **kwargs) + async def head(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "head", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def put(self, path, **kwargs): - return await self._request("put", path, **kwargs) + async def post(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "post", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def patch(self, path, **kwargs): - return await self._request("patch", path, **kwargs) + async def put(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "put", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def delete(self, path, **kwargs): - return await self._request("delete", path, **kwargs) + async def patch(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "patch", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def delete(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "delete", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def request(self, method, path, skip_permission_checks=False, **kwargs): + """Make an HTTP request with the specified method. + + Args: + method: HTTP method (e.g., "GET", "POST", "PUT") + path: The path to request + skip_permission_checks: If True, bypass all permission checks for this request + **kwargs: Additional arguments to pass to httpx + + Returns: + httpx.Response: The response from the request + """ + from datasette.permissions import SkipPermissions - async def request(self, method, path, **kwargs): avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await client.request( - method, self._fix(path, avoid_path_rewrites), **kwargs - ) + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) diff --git a/datasette/permissions.py b/datasette/permissions.py index c91385a0..c48293ac 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -1,6 +1,33 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Any, NamedTuple +import contextvars + + +# Context variable to track when permission checks should be skipped +_skip_permission_checks = contextvars.ContextVar( + "skip_permission_checks", default=False +) + + +class SkipPermissions: + """Context manager to temporarily skip permission checks. + + This is not a stable API and may change in future releases. + + Usage: + with SkipPermissions(): + # Permission checks are skipped within this block + response = await datasette.client.get("/protected") + """ + + def __enter__(self): + self.token = _skip_permission_checks.set(True) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + _skip_permission_checks.reset(self.token) + return False class Resource(ABC): diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index 7121e2d0..9c2add0e 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -155,6 +155,16 @@ async def _build_single_action_sql( action=action, ) + # If permission_sqls is the sentinel, skip all permission checks + # Return SQL that allows all resources + from datasette.utils.permissions import SKIP_PERMISSION_CHECKS + + if permission_sqls is SKIP_PERMISSION_CHECKS: + cols = "parent, child, 'skip_permission_checks' AS reason" + if include_is_private: + cols += ", 0 AS is_private" + return f"SELECT {cols} FROM ({base_resources_sql})", {} + all_params = {} rule_sqls = [] restriction_sqls = [] @@ -436,6 +446,17 @@ async def build_permission_rules_sql( action=action, ) + # If permission_sqls is the sentinel, skip all permission checks + # Return SQL that allows everything + from datasette.utils.permissions import SKIP_PERMISSION_CHECKS + + if permission_sqls is SKIP_PERMISSION_CHECKS: + return ( + "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'skip_permission_checks' AS reason, 'skip' AS source_plugin", + {}, + [], + ) + if not permission_sqls: return ( "SELECT NULL AS parent, NULL AS child, 0 AS allow, NULL AS reason, NULL AS source_plugin WHERE 0", diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py index 58be53a3..6c30a12a 100644 --- a/datasette/utils/permissions.py +++ b/datasette/utils/permissions.py @@ -10,13 +10,26 @@ from datasette.plugins import pm from datasette.utils import await_me_maybe +# Sentinel object to indicate permission checks should be skipped +SKIP_PERMISSION_CHECKS = object() + + async def gather_permission_sql_from_hooks( *, datasette, actor: dict | None, action: str -) -> List[PermissionSQL]: +) -> List[PermissionSQL] | object: """Collect PermissionSQL objects from the permission_resources_sql hook. Ensures that each returned PermissionSQL has a populated ``source``. + + Returns SKIP_PERMISSION_CHECKS sentinel if skip_permission_checks context variable + is set, signaling that all permission checks should be bypassed. """ + from datasette.permissions import _skip_permission_checks + + # Check if we should skip permission checks BEFORE calling hooks + # This avoids creating unawaited coroutines + if _skip_permission_checks.get(): + return SKIP_PERMISSION_CHECKS hook_caller = pm.hook.permission_resources_sql hookimpls = hook_caller.get_hookimpls() diff --git a/docs/internals.rst b/docs/internals.rst index 406bc9b3..468b3f95 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1045,6 +1045,36 @@ These methods can be used with :ref:`internals_datasette_urls` - for example: For documentation on available ``**kwargs`` options and the shape of the HTTPX Response object refer to the `HTTPX Async documentation `__. +Bypassing permission checks +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All ``datasette.client`` methods accept an optional ``skip_permission_checks=True`` parameter. When set, all permission checks will be bypassed for that request, allowing access to any resource regardless of the configured permissions. + +This is useful for plugins and internal operations that need to access all resources without being subject to permission restrictions. + +Example usage: + +.. code-block:: python + + # Regular request - respects permissions + response = await datasette.client.get( + "/private-db/secret-table.json" + ) + # May return 403 Forbidden if access is denied + + # With skip_permission_checks - bypasses all permission checks + response = await datasette.client.get( + "/private-db/secret-table.json", + skip_permission_checks=True, + ) + # Will return 200 OK and the data, regardless of permissions + +This parameter works with all HTTP methods (``get``, ``post``, ``put``, ``patch``, ``delete``, ``options``, ``head``) and the generic ``request`` method. + +.. warning:: + + Use ``skip_permission_checks=True`` with caution. It completely bypasses Datasette's permission system and should only be used in trusted plugin code or internal operations where you need guaranteed access to resources. + .. _internals_datasette_urls: datasette.urls diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index afc9b335..55f7392f 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -1,6 +1,7 @@ import httpx import pytest import pytest_asyncio +from datasette.app import Datasette @pytest_asyncio.fixture @@ -9,6 +10,23 @@ async def datasette(ds_client): return ds_client.ds +@pytest_asyncio.fixture +async def datasette_with_permissions(): + """A datasette instance with permission restrictions for testing""" + ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) + await ds.invoke_startup() + db = ds.add_memory_database("test_db") + await db.execute_write( + "create table if not exists test_table (id integer primary key, name text)" + ) + await db.execute_write( + "insert or ignore into test_table (id, name) values (1, 'Alice')" + ) + # Trigger catalog refresh + await ds.client.get("/") + return ds + + @pytest.mark.asyncio @pytest.mark.parametrize( "method,path,expected_status", @@ -65,3 +83,147 @@ async def test_client_path(datasette, prefix, expected_path): assert path == expected_path finally: datasette._settings["base_url"] = original_base_url + + +@pytest.mark.asyncio +async def test_skip_permission_checks_allows_forbidden_access( + datasette_with_permissions, +): + """Test that skip_permission_checks=True bypasses permission checks""" + ds = datasette_with_permissions + + # Without skip_permission_checks, anonymous user should get 403 for protected database + response = await ds.client.get("/test_db.json") + assert response.status_code == 403 + + # With skip_permission_checks=True, should get 200 + response = await ds.client.get("/test_db.json", skip_permission_checks=True) + assert response.status_code == 200 + data = response.json() + assert data["database"] == "test_db" + + +@pytest.mark.asyncio +async def test_skip_permission_checks_on_table(datasette_with_permissions): + """Test skip_permission_checks works for table access""" + ds = datasette_with_permissions + + # Without skip_permission_checks, should get 403 + response = await ds.client.get("/test_db/test_table.json") + assert response.status_code == 403 + + # With skip_permission_checks=True, should get table data + response = await ds.client.get( + "/test_db/test_table.json", skip_permission_checks=True + ) + assert response.status_code == 200 + data = response.json() + assert data["rows"] == [{"id": 1, "name": "Alice"}] + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "method", ["get", "post", "put", "patch", "delete", "options", "head"] +) +async def test_skip_permission_checks_all_methods(datasette_with_permissions, method): + """Test that skip_permission_checks works with all HTTP methods""" + ds = datasette_with_permissions + + # All methods should work with skip_permission_checks=True + client_method = getattr(ds.client, method) + response = await client_method("/test_db.json", skip_permission_checks=True) + # We don't check status code since some methods might not be allowed, + # but we verify the request doesn't fail due to permissions + assert isinstance(response, httpx.Response) + + +@pytest.mark.asyncio +async def test_skip_permission_checks_request_method(datasette_with_permissions): + """Test that skip_permission_checks works with client.request()""" + ds = datasette_with_permissions + + # Without skip_permission_checks + response = await ds.client.request("GET", "/test_db.json") + assert response.status_code == 403 + + # With skip_permission_checks=True + response = await ds.client.request( + "GET", "/test_db.json", skip_permission_checks=True + ) + assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_skip_permission_checks_isolated_to_request(datasette_with_permissions): + """Test that skip_permission_checks doesn't affect other concurrent requests""" + ds = datasette_with_permissions + + # First request with skip_permission_checks=True should succeed + response1 = await ds.client.get("/test_db.json", skip_permission_checks=True) + assert response1.status_code == 200 + + # Subsequent request without it should still get 403 + response2 = await ds.client.get("/test_db.json") + assert response2.status_code == 403 + + # And another with skip should succeed again + response3 = await ds.client.get("/test_db.json", skip_permission_checks=True) + assert response3.status_code == 200 + + +@pytest.mark.asyncio +async def test_skip_permission_checks_with_admin_actor(datasette_with_permissions): + """Test that skip_permission_checks works even when actor is provided""" + ds = datasette_with_permissions + + # Admin actor should normally have access + admin_cookies = {"ds_actor": ds.client.actor_cookie({"id": "admin"})} + response = await ds.client.get("/test_db.json", cookies=admin_cookies) + assert response.status_code == 200 + + # Non-admin actor should get 403 + user_cookies = {"ds_actor": ds.client.actor_cookie({"id": "user"})} + response = await ds.client.get("/test_db.json", cookies=user_cookies) + assert response.status_code == 403 + + # Non-admin actor with skip_permission_checks=True should get 200 + response = await ds.client.get( + "/test_db.json", cookies=user_cookies, skip_permission_checks=True + ) + assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_skip_permission_checks_shows_denied_tables(): + """Test that skip_permission_checks=True shows tables from denied databases in /-/tables.json""" + ds = Datasette( + config={ + "databases": { + "fixtures": {"allow": False} # Deny all access to this database + } + } + ) + await ds.invoke_startup() + db = ds.add_memory_database("fixtures") + await db.execute_write( + "CREATE TABLE test_table (id INTEGER PRIMARY KEY, name TEXT)" + ) + await db.execute_write("INSERT INTO test_table (id, name) VALUES (1, 'Alice')") + await ds._refresh_schemas() + + # Without skip_permission_checks, tables from denied database should not appear in /-/tables.json + response = await ds.client.get("/-/tables.json") + assert response.status_code == 200 + data = response.json() + table_names = [match["name"] for match in data["matches"]] + # Should not see any fixtures tables since access is denied + fixtures_tables = [name for name in table_names if name.startswith("fixtures:")] + assert len(fixtures_tables) == 0 + + # With skip_permission_checks=True, tables from denied database SHOULD appear + response = await ds.client.get("/-/tables.json", skip_permission_checks=True) + assert response.status_code == 200 + data = response.json() + table_names = [match["name"] for match in data["matches"]] + # Should see fixtures tables when permission checks are skipped + assert "fixtures: test_table" in table_names From 257e1c1b1b432e82b3d58e9579a2b5fd57f6a46a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 13:51:58 -0800 Subject: [PATCH 044/299] Release 1.0a21 Refs #2429, #2511, #2578, #2583 --- datasette/version.py | 2 +- docs/changelog.rst | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/datasette/version.py b/datasette/version.py index 20cb46c7..01f00fcd 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a20" +__version__ = "1.0a21" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index dbcff8cb..7696fd89 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,15 +4,25 @@ Changelog ========= +.. _v1_0_a21: + +1.0a21 (2025-11-05) +------------------- + +- Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) +- Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) +- New ``datasette --get /path --headers`` option for inspecting the headers returned by a path. (:issue:`2578`) +- New ``datasette.client.get(..., skip_permission_checks=True)`` parameter to bypass permission checks when making requests using the internal client. (:issue:`2583`) + .. _v0_65_2: 0.65.2 (2025-11-05) ------------------- -* Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) -* Upgraded for compatibility with Python 3.14. -* Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) -* Minor upgrades to fix warnings, including ``pkg_resources`` deprecation. +- Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) +- Upgraded for compatibility with Python 3.14. +- Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) +- Minor upgrades to fix warnings, including ``pkg_resources`` deprecation. .. _v1_0_a20: @@ -52,22 +62,16 @@ Related changes: - Permission debugging improvements: - The ``/-/allowed`` endpoint shows resources the user is allowed to interact with for different actions. - - ``/-/rules`` shows the raw allow/deny rules that apply to different permission checks. - - ``/-/actions`` lists every available action. - - ``/-/check`` can be used to try out different permission checks for the current actor. Other changes ~~~~~~~~~~~~~ - The internal ``catalog_views`` table now tracks SQLite views alongside tables in the introspection database. (:issue:`2495`) - - Hitting the ``/`` brings up a search interface for navigating to tables that the current user can view. A new ``/-/tables`` endpoint supports this functionality. (:issue:`2523`) - - Datasette attempts to detect some configuration errors on startup. - - Datasette now supports Python 3.14 and no longer tests against Python 3.9. .. _v1_0_a19: From 1df4028d783f27a88bd304d47990ae4d0e7147bb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 15:18:17 -0800 Subject: [PATCH 045/299] add_memory_database(memory_name, name=None, route=None) --- datasette/app.py | 6 ++++-- docs/internals.rst | 8 +++++--- tests/test_internals_datasette_client.py | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 177debe2..45d34991 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -729,8 +729,10 @@ class Datasette: self.databases = new_databases return db - def add_memory_database(self, memory_name): - return self.add_database(Database(self, memory_name=memory_name)) + def add_memory_database(self, memory_name, name=None, route=None): + return self.add_database( + Database(self, memory_name=memory_name), name=name, route=route + ) def remove_database(self, name): self.get_database(name).close() diff --git a/docs/internals.rst b/docs/internals.rst index 468b3f95..2e01a8e8 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -781,8 +781,8 @@ Use ``is_mutable=False`` to add an immutable database. .. _datasette_add_memory_database: -.add_memory_database(name) --------------------------- +.add_memory_database(memory_name, name=None, route=None) +-------------------------------------------------------- Adds a shared in-memory database with the specified name: @@ -800,7 +800,9 @@ This is a shortcut for the following: Database(datasette, memory_name="statistics") ) -Using either of these pattern will result in the in-memory database being served at ``/statistics``. +Using either of these patterns will result in the in-memory database being served at ``/statistics``. + +The ``name`` and ``route`` parameters are optional and work the same way as they do for :ref:`datasette_add_database`. .. _datasette_remove_database: diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index 55f7392f..a15d294f 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -15,7 +15,7 @@ async def datasette_with_permissions(): """A datasette instance with permission restrictions for testing""" ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) await ds.invoke_startup() - db = ds.add_memory_database("test_db") + db = ds.add_memory_database("test_datasette_with_permissions", name="test_db") await db.execute_write( "create table if not exists test_table (id integer primary key, name text)" ) From 8bc9b1ee03c3e9deb43f4df5fc257e3093e7f484 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 7 Nov 2025 12:01:23 -0800 Subject: [PATCH 046/299] /-/schema and /db/-/schema and /db/table/-/schema pages (plus .json/.md) * Add schema endpoints for databases, instances, and tables Closes: #2586 This commit adds new endpoints to view database schemas in multiple formats: - /-/schema - View schemas for all databases (HTML, JSON, MD) - /database/-/schema - View schema for a specific database (HTML, JSON, MD) - /database/table/-/schema - View schema for a specific table (JSON, MD) Features: - Supports HTML, JSON, and Markdown output formats - Respects view-database and view-table permissions - Uses group_concat(sql, ';' || CHAR(10)) from sqlite_master to retrieve schemas - Includes comprehensive tests covering all formats and permission checks The JSON endpoints return: - Instance level: {"schemas": [{"database": "name", "schema": "sql"}, ...]} - Database level: {"database": "name", "schema": "sql"} - Table level: {"database": "name", "table": "name", "schema": "sql"} Markdown format provides formatted output with headings and SQL code blocks. Co-Authored-By: Claude --- datasette/app.py | 15 ++ datasette/templates/database.html | 2 +- datasette/templates/schema.html | 41 +++++ datasette/views/special.py | 179 ++++++++++++++++++++- docs/pages.rst | 43 ++++++ tests/test_html.py | 2 +- tests/test_schema_endpoints.py | 248 ++++++++++++++++++++++++++++++ 7 files changed, 526 insertions(+), 4 deletions(-) create mode 100644 datasette/templates/schema.html create mode 100644 tests/test_schema_endpoints.py diff --git a/datasette/app.py b/datasette/app.py index 45d34991..60a20032 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -58,6 +58,9 @@ from .views.special import ( PermissionRulesView, PermissionCheckView, TablesView, + InstanceSchemaView, + DatabaseSchemaView, + TableSchemaView, ) from .views.table import ( TableInsertView, @@ -1910,6 +1913,10 @@ class Datasette: TablesView.as_view(self), r"/-/tables(\.(?Pjson))?$", ) + add_route( + InstanceSchemaView.as_view(self), + r"/-/schema(\.(?Pjson|md))?$", + ) add_route( LogoutView.as_view(self), r"/-/logout$", @@ -1951,6 +1958,10 @@ class Datasette: r"/(?P[^\/\.]+)(\.(?P\w+))?$", ) add_route(TableCreateView.as_view(self), r"/(?P[^\/\.]+)/-/create$") + add_route( + DatabaseSchemaView.as_view(self), + r"/(?P[^\/\.]+)/-/schema(\.(?Pjson|md))?$", + ) add_route( wrap_view(QueryView, self), r"/(?P[^\/\.]+)/-/query(\.(?P\w+))?$", @@ -1975,6 +1986,10 @@ class Datasette: TableDropView.as_view(self), r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/drop$", ) + add_route( + TableSchemaView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/schema(\.(?Pjson|md))?$", + ) add_route( RowDeleteView.as_view(self), r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/delete$", diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 66f288dc..42b4ca0b 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -56,7 +56,7 @@ {% endif %} {% if tables %} -

Tables

+

Tables schema

{% endif %} {% for table in tables %} diff --git a/datasette/templates/schema.html b/datasette/templates/schema.html new file mode 100644 index 00000000..2fd8637e --- /dev/null +++ b/datasette/templates/schema.html @@ -0,0 +1,41 @@ +{% extends "base.html" %} + +{% block title %}{% if is_instance %}Schema for all databases{% elif table_name %}Schema for {{ schemas[0].database }}.{{ table_name }}{% else %}Schema for {{ schemas[0].database }}{% endif %}{% endblock %} + +{% block body_class %}schema{% endblock %} + +{% block crumbs %} +{% if is_instance %} +{{ crumbs.nav(request=request) }} +{% elif table_name %} +{{ crumbs.nav(request=request, database=schemas[0].database, table=table_name) }} +{% else %} +{{ crumbs.nav(request=request, database=schemas[0].database) }} +{% endif %} +{% endblock %} + +{% block content %} + + +{% for item in schemas %} + {% if is_instance %} +

{{ item.database }}

+ {% endif %} + + {% if item.schema %} +
{{ item.schema }}
+ {% else %} +

No schema available for this database.

+ {% endif %} + + {% if not loop.last %} +
+ {% endif %} +{% endfor %} + +{% if not schemas %} +

No databases with viewable schemas found.

+{% endif %} +{% endblock %} diff --git a/datasette/views/special.py b/datasette/views/special.py index a1d736c5..411363ec 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -761,8 +761,6 @@ class ApiExplorerView(BaseView): async def example_links(self, request): databases = [] for name, db in self.ds.databases.items(): - if name == "_internal": - continue database_visible, _ = await self.ds.check_visibility( request.actor, action="view-database", @@ -981,3 +979,180 @@ class TablesView(BaseView): ] return Response.json({"matches": matches, "truncated": truncated}) + + +class SchemaBaseView(BaseView): + """Base class for schema views with common response formatting.""" + + has_json_alternate = False + + async def get_database_schema(self, database_name): + """Get schema SQL for a database.""" + db = self.ds.databases[database_name] + result = await db.execute( + "select group_concat(sql, ';' || CHAR(10)) as schema from sqlite_master where sql is not null" + ) + row = result.first() + return row["schema"] if row and row["schema"] else "" + + def format_json_response(self, data): + """Format data as JSON response with CORS headers if needed.""" + headers = {} + if self.ds.cors: + add_cors_headers(headers) + return Response.json(data, headers=headers) + + def format_error_response(self, error_message, format_, status=404): + """Format error response based on requested format.""" + if format_ == "json": + headers = {} + if self.ds.cors: + add_cors_headers(headers) + return Response.json( + {"ok": False, "error": error_message}, status=status, headers=headers + ) + else: + return Response.text(error_message, status=status) + + def format_markdown_response(self, heading, schema): + """Format schema as Markdown response.""" + md_output = f"# {heading}\n\n```sql\n{schema}\n```\n" + return Response.text( + md_output, headers={"content-type": "text/markdown; charset=utf-8"} + ) + + async def format_html_response( + self, request, schemas, is_instance=False, table_name=None + ): + """Format schema as HTML response.""" + context = { + "schemas": schemas, + "is_instance": is_instance, + } + if table_name: + context["table_name"] = table_name + return await self.render(["schema.html"], request=request, context=context) + + +class InstanceSchemaView(SchemaBaseView): + """ + Displays schema for all databases in the instance. + Supports HTML, JSON, and Markdown formats. + """ + + name = "instance_schema" + + async def get(self, request): + format_ = request.url_vars.get("format") or "html" + + # Get all databases the actor can view + allowed_databases_page = await self.ds.allowed_resources( + "view-database", + request.actor, + ) + allowed_databases = [r.parent async for r in allowed_databases_page.all()] + + # Get schema for each database + schemas = [] + for database_name in allowed_databases: + schema = await self.get_database_schema(database_name) + schemas.append({"database": database_name, "schema": schema}) + + if format_ == "json": + return self.format_json_response({"schemas": schemas}) + elif format_ == "md": + md_parts = [ + f"# Schema for {item['database']}\n\n```sql\n{item['schema']}\n```" + for item in schemas + ] + return Response.text( + "\n\n".join(md_parts), + headers={"content-type": "text/markdown; charset=utf-8"}, + ) + else: + return await self.format_html_response(request, schemas, is_instance=True) + + +class DatabaseSchemaView(SchemaBaseView): + """ + Displays schema for a specific database. + Supports HTML, JSON, and Markdown formats. + """ + + name = "database_schema" + + async def get(self, request): + database_name = request.url_vars["database"] + format_ = request.url_vars.get("format") or "html" + + # Check if database exists + if database_name not in self.ds.databases: + return self.format_error_response("Database not found", format_) + + # Check view-database permission + await self.ds.ensure_permission( + action="view-database", + resource=DatabaseResource(database=database_name), + actor=request.actor, + ) + + schema = await self.get_database_schema(database_name) + + if format_ == "json": + return self.format_json_response( + {"database": database_name, "schema": schema} + ) + elif format_ == "md": + return self.format_markdown_response(f"Schema for {database_name}", schema) + else: + schemas = [{"database": database_name, "schema": schema}] + return await self.format_html_response(request, schemas) + + +class TableSchemaView(SchemaBaseView): + """ + Displays schema for a specific table. + Supports HTML, JSON, and Markdown formats. + """ + + name = "table_schema" + + async def get(self, request): + database_name = request.url_vars["database"] + table_name = request.url_vars["table"] + format_ = request.url_vars.get("format") or "html" + + # Check view-table permission + await self.ds.ensure_permission( + action="view-table", + resource=TableResource(database=database_name, table=table_name), + actor=request.actor, + ) + + # Get schema for the table + db = self.ds.databases[database_name] + result = await db.execute( + "select sql from sqlite_master where name = ? and sql is not null", + [table_name], + ) + row = result.first() + + # Return 404 if table doesn't exist + if not row or not row["sql"]: + return self.format_error_response("Table not found", format_) + + schema = row["sql"] + + if format_ == "json": + return self.format_json_response( + {"database": database_name, "table": table_name, "schema": schema} + ) + elif format_ == "md": + return self.format_markdown_response( + f"Schema for {database_name}.{table_name}", schema + ) + else: + schemas = [{"database": database_name, "schema": schema}] + return await self.format_html_response( + request, schemas, table_name=table_name + ) diff --git a/docs/pages.rst b/docs/pages.rst index 3d6530a3..2e54ce2f 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -107,3 +107,46 @@ Note that this URL includes the encoded primary key of the record. Here's that same page as JSON: `../people/uk~2Eorg~2Epublicwhip~2Fperson~2F10001.json `_ + + +.. _pages_schemas: + +Schemas +======= + +Datasette offers ``/-/schema`` endpoints to expose the SQL schema for databases and tables. + +.. _InstanceSchemaView: + +Instance schema +--------------- + +Access ``/-/schema`` to see the complete schema for all attached databases in the Datasette instance. + +Use ``/-/schema.md`` to get the same information as Markdown. + +Use ``/-/schema.json`` to get the same information as JSON, which looks like this: + +.. code-block:: json + + { + "schemas": [ + { + "database": "content", + "schema": "create table posts ..." + } + } + +.. _DatabaseSchemaView: + +Database schema +--------------- + +Use ``/database-name/-/schema`` to see the complete schema for a specific database. The ``.md`` and ``.json`` extensions work here too. The JSON returns an object with ``"database"`` and ``"schema"`` keys. + +.. _TableSchemaView: + +Table schema +------------ + +Use ``/database-name/table-name/-/schema`` to see the schema for a specific table. The ``.md`` and ``.json`` extensions work here too. The JSON returns an object with ``"database"``, ``"table"``, and ``"schema"`` keys. diff --git a/tests/test_html.py b/tests/test_html.py index dbe993c4..9997279b 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -142,7 +142,7 @@ async def test_database_page(ds_client): # And a list of tables for fragment in ( - '

Tables

', + '

Tables', '

sortable

', "

pk, foreign_key_with_label, foreign_key_with_blank_label, ", ): diff --git a/tests/test_schema_endpoints.py b/tests/test_schema_endpoints.py new file mode 100644 index 00000000..5500a7b0 --- /dev/null +++ b/tests/test_schema_endpoints.py @@ -0,0 +1,248 @@ +import asyncio +import pytest +import pytest_asyncio +from datasette.app import Datasette + + +@pytest_asyncio.fixture(scope="module") +async def schema_ds(): + """Create a Datasette instance with test databases and permission config.""" + ds = Datasette( + config={ + "databases": { + "schema_private_db": {"allow": {"id": "root"}}, + } + } + ) + + # Create public database with multiple tables + public_db = ds.add_memory_database("schema_public_db") + await public_db.execute_write( + "CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT)" + ) + await public_db.execute_write( + "CREATE TABLE IF NOT EXISTS posts (id INTEGER PRIMARY KEY, title TEXT)" + ) + await public_db.execute_write( + "CREATE VIEW IF NOT EXISTS recent_posts AS SELECT * FROM posts ORDER BY id DESC" + ) + + # Create a database with restricted access (requires root permission) + private_db = ds.add_memory_database("schema_private_db") + await private_db.execute_write( + "CREATE TABLE IF NOT EXISTS secret_data (id INTEGER PRIMARY KEY, value TEXT)" + ) + + # Create an empty database + ds.add_memory_database("schema_empty_db") + + return ds + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "format_ext,expected_in_content", + [ + ("json", None), + ("md", ["# Schema for", "```sql"]), + ("", ["Schema for", "CREATE TABLE"]), + ], +) +async def test_database_schema_formats(schema_ds, format_ext, expected_in_content): + """Test /database/-/schema endpoint in different formats.""" + url = "/schema_public_db/-/schema" + if format_ext: + url += f".{format_ext}" + response = await schema_ds.client.get(url) + assert response.status_code == 200 + + if format_ext == "json": + data = response.json() + assert "database" in data + assert data["database"] == "schema_public_db" + assert "schema" in data + assert "CREATE TABLE users" in data["schema"] + else: + content = response.text + for expected in expected_in_content: + assert expected in content + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "format_ext,expected_in_content", + [ + ("json", None), + ("md", ["# Schema for", "```sql"]), + ("", ["Schema for all databases"]), + ], +) +async def test_instance_schema_formats(schema_ds, format_ext, expected_in_content): + """Test /-/schema endpoint in different formats.""" + url = "/-/schema" + if format_ext: + url += f".{format_ext}" + response = await schema_ds.client.get(url) + assert response.status_code == 200 + + if format_ext == "json": + data = response.json() + assert "schemas" in data + assert isinstance(data["schemas"], list) + db_names = [item["database"] for item in data["schemas"]] + # Should see schema_public_db and schema_empty_db, but not schema_private_db (anonymous user) + assert "schema_public_db" in db_names + assert "schema_empty_db" in db_names + assert "schema_private_db" not in db_names + # Check schemas are present + for item in data["schemas"]: + if item["database"] == "schema_public_db": + assert "CREATE TABLE users" in item["schema"] + else: + content = response.text + for expected in expected_in_content: + assert expected in content + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "format_ext,expected_in_content", + [ + ("json", None), + ("md", ["# Schema for", "```sql"]), + ("", ["Schema for users"]), + ], +) +async def test_table_schema_formats(schema_ds, format_ext, expected_in_content): + """Test /database/table/-/schema endpoint in different formats.""" + url = "/schema_public_db/users/-/schema" + if format_ext: + url += f".{format_ext}" + response = await schema_ds.client.get(url) + assert response.status_code == 200 + + if format_ext == "json": + data = response.json() + assert "database" in data + assert data["database"] == "schema_public_db" + assert "table" in data + assert data["table"] == "users" + assert "schema" in data + assert "CREATE TABLE users" in data["schema"] + else: + content = response.text + for expected in expected_in_content: + assert expected in content + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "url", + [ + "/schema_private_db/-/schema.json", + "/schema_private_db/secret_data/-/schema.json", + ], +) +async def test_schema_permission_enforcement(schema_ds, url): + """Test that permissions are enforced for schema endpoints.""" + # Anonymous user should get 403 + response = await schema_ds.client.get(url) + assert response.status_code == 403 + + # Authenticated user with permission should succeed + response = await schema_ds.client.get( + url, + cookies={"ds_actor": schema_ds.client.actor_cookie({"id": "root"})}, + ) + assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_instance_schema_respects_database_permissions(schema_ds): + """Test that /-/schema only shows databases the user can view.""" + # Anonymous user should only see public databases + response = await schema_ds.client.get("/-/schema.json") + assert response.status_code == 200 + data = response.json() + db_names = [item["database"] for item in data["schemas"]] + assert "schema_public_db" in db_names + assert "schema_empty_db" in db_names + assert "schema_private_db" not in db_names + + # Authenticated user should see all databases + response = await schema_ds.client.get( + "/-/schema.json", + cookies={"ds_actor": schema_ds.client.actor_cookie({"id": "root"})}, + ) + assert response.status_code == 200 + data = response.json() + db_names = [item["database"] for item in data["schemas"]] + assert "schema_public_db" in db_names + assert "schema_empty_db" in db_names + assert "schema_private_db" in db_names + + +@pytest.mark.asyncio +async def test_database_schema_with_multiple_tables(schema_ds): + """Test schema with multiple tables in a database.""" + response = await schema_ds.client.get("/schema_public_db/-/schema.json") + assert response.status_code == 200 + data = response.json() + schema = data["schema"] + + # All objects should be in the schema + assert "CREATE TABLE users" in schema + assert "CREATE TABLE posts" in schema + assert "CREATE VIEW recent_posts" in schema + + +@pytest.mark.asyncio +async def test_empty_database_schema(schema_ds): + """Test schema for an empty database.""" + response = await schema_ds.client.get("/schema_empty_db/-/schema.json") + assert response.status_code == 200 + data = response.json() + assert data["database"] == "schema_empty_db" + assert data["schema"] == "" + + +@pytest.mark.asyncio +async def test_database_not_exists(schema_ds): + """Test schema for a non-existent database returns 404.""" + # Test JSON format + response = await schema_ds.client.get("/nonexistent_db/-/schema.json") + assert response.status_code == 404 + data = response.json() + assert data["ok"] is False + assert "not found" in data["error"].lower() + + # Test HTML format (returns text) + response = await schema_ds.client.get("/nonexistent_db/-/schema") + assert response.status_code == 404 + assert "not found" in response.text.lower() + + # Test Markdown format (returns text) + response = await schema_ds.client.get("/nonexistent_db/-/schema.md") + assert response.status_code == 404 + assert "not found" in response.text.lower() + + +@pytest.mark.asyncio +async def test_table_not_exists(schema_ds): + """Test schema for a non-existent table returns 404.""" + # Test JSON format + response = await schema_ds.client.get("/schema_public_db/nonexistent/-/schema.json") + assert response.status_code == 404 + data = response.json() + assert data["ok"] is False + assert "not found" in data["error"].lower() + + # Test HTML format (returns text) + response = await schema_ds.client.get("/schema_public_db/nonexistent/-/schema") + assert response.status_code == 404 + assert "not found" in response.text.lower() + + # Test Markdown format (returns text) + response = await schema_ds.client.get("/schema_public_db/nonexistent/-/schema.md") + assert response.status_code == 404 + assert "not found" in response.text.lower() From a508fc4a8e63ec28d9c1516f60dce718cc10f330 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 7 Nov 2025 16:50:00 -0800 Subject: [PATCH 047/299] Remove permission_allowed hook docs, closes #2588 Refs #2528 --- docs/plugin_hooks.rst | 72 ++------------------------------------ tests/plugins/my_plugin.py | 4 +-- tests/test_html.py | 8 ++--- tests/test_plugins.py | 2 +- 4 files changed, 10 insertions(+), 76 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 93f7f476..118a6bde 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1314,72 +1314,6 @@ This example plugin causes 0 results to be returned if ``?_nothing=1`` is added Example: `datasette-leaflet-freedraw `_ -.. _plugin_hook_permission_allowed: - -permission_allowed(datasette, actor, action, resource) ------------------------------------------------------- - -``datasette`` - :ref:`internals_datasette` - You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``, or to execute SQL queries. - -``actor`` - dictionary - The current actor, as decided by :ref:`plugin_hook_actor_from_request`. - -``action`` - string - The action to be performed, e.g. ``"edit-table"``. - -``resource`` - string or None - An identifier for the individual resource, e.g. the name of the table. - -Called to check that an actor has permission to perform an action on a resource. Can return ``True`` if the action is allowed, ``False`` if the action is not allowed or ``None`` if the plugin does not have an opinion one way or the other. - -Here's an example plugin which randomly selects if a permission should be allowed or denied, except for ``view-instance`` which always uses the default permission scheme instead. - -.. code-block:: python - - from datasette import hookimpl - import random - - - @hookimpl - def permission_allowed(action): - if action != "view-instance": - # Return True or False at random - return random.random() > 0.5 - # Returning None falls back to default permissions - -This function can alternatively return an awaitable function which itself returns ``True``, ``False`` or ``None``. You can use this option if you need to execute additional database queries using ``await datasette.execute(...)``. - -Here's an example that allows users to view the ``admin_log`` table only if their actor ``id`` is present in the ``admin_users`` table. It aso disallows arbitrary SQL queries for the ``staff.db`` database for all users. - -.. code-block:: python - - @hookimpl - def permission_allowed(datasette, actor, action, resource): - async def inner(): - if action == "execute-sql" and resource == "staff": - return False - if action == "view-table" and resource == ( - "staff", - "admin_log", - ): - if not actor: - return False - user_id = actor["id"] - result = await datasette.get_database( - "staff" - ).execute( - "select count(*) from admin_users where user_id = :user_id", - {"user_id": user_id}, - ) - return result.first()[0] > 0 - - return inner - -See :ref:`built-in permissions ` for a full list of permissions that are included in Datasette core. - -Example: `datasette-permissions-sql `_ - .. _plugin_hook_permission_resources_sql: permission_resources_sql(datasette, actor, action) @@ -1981,16 +1915,16 @@ This example adds a new database action for creating a table, if the user has th .. code-block:: python from datasette import hookimpl + from datasette.resources import DatabaseResource @hookimpl def database_actions(datasette, actor, database): async def inner(): - if not await datasette.permission_allowed( + if not await datasette.allowed( actor, "edit-schema", - resource=database, - default=False, + resource=DatabaseResource("database"), ): return [] return [ diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 1435ce28..96a8b4d7 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -469,7 +469,7 @@ def register_actions(datasette): description="View a collection", resource_class=DatabaseResource, ), - # Test actions for test_hook_permission_allowed (global actions - no resource_class) + # Test actions for test_hook_custom_allowed (global actions - no resource_class) Action( name="this_is_allowed", abbr=None, @@ -553,7 +553,7 @@ def register_actions(datasette): def permission_resources_sql(datasette, actor, action): from datasette.permissions import PermissionSQL - # Handle test actions used in test_hook_permission_allowed + # Handle test actions used in test_hook_custom_allowed if action == "this_is_allowed": return PermissionSQL.allow(reason="test plugin allows this_is_allowed") elif action == "this_is_denied": diff --git a/tests/test_html.py b/tests/test_html.py index 9997279b..35b839ec 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -935,7 +935,7 @@ async def test_edit_sql_link_on_canned_queries(ds_client, path, expected): @pytest.mark.parametrize( - "permission_allowed", + "has_permission", [ pytest.param( True, @@ -943,15 +943,15 @@ async def test_edit_sql_link_on_canned_queries(ds_client, path, expected): False, ], ) -def test_edit_sql_link_not_shown_if_user_lacks_permission(permission_allowed): +def test_edit_sql_link_not_shown_if_user_lacks_permission(has_permission): with make_app_client( config={ - "allow_sql": None if permission_allowed else {"id": "not-you"}, + "allow_sql": None if has_permission else {"id": "not-you"}, "databases": {"fixtures": {"queries": {"simple": "select 1 + 1"}}}, } ) as client: response = client.get("/fixtures/simple") - if permission_allowed: + if has_permission: assert "Edit SQL" in response.text else: assert "Edit SQL" not in response.text diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 971b7e82..4a8c60d7 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -677,7 +677,7 @@ async def test_existing_scope_actor_respected(ds_client): ("this_is_denied_async", False), ], ) -async def test_hook_permission_allowed(action, expected): +async def test_hook_custom_allowed(action, expected): # Test actions and permission logic are defined in tests/plugins/my_plugin.py ds = Datasette(plugins_dir=PLUGINS_DIR) await ds.invoke_startup() From 354d7a28732b701d5ebee334fc32a6e6e74ce0b2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 9 Nov 2025 15:42:11 -0800 Subject: [PATCH 048/299] Bump a few versions, deploy on push to main Refs: - #2511 --- .github/workflows/deploy-latest.yml | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 8ffdbfd5..9f53b01e 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -2,10 +2,10 @@ name: Deploy latest.datasette.io on: workflow_dispatch: - # push: - # branches: - # - main - # - 1.0-dev + push: + branches: + - main + # - 1.0-dev permissions: contents: read @@ -15,19 +15,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out datasette - uses: actions/checkout@v3 + uses: actions/checkout@v5 - name: Set up Python uses: actions/setup-python@v6 - # Using Python 3.10 for gcloud compatibility: with: - python-version: "3.10" - - uses: actions/cache@v4 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }} - restore-keys: | - ${{ runner.os }}-pip- + python-version: "3.13" + cache: pip - name: Install Python dependencies run: | python -m pip install --upgrade pip @@ -104,7 +97,7 @@ jobs: # cat metadata.json - id: auth name: Authenticate to Google Cloud - uses: google-github-actions/auth@v2 + uses: google-github-actions/auth@v3 with: credentials_json: ${{ secrets.GCP_SA_KEY }} - name: Set up Cloud SDK From 291f71ec6b52bb7d346f8cad74ca60122db392e3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 11 Nov 2025 21:59:26 -0800 Subject: [PATCH 049/299] Remove out-dated plugin_hook_permission_allowed references --- docs/changelog.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 7696fd89..66d46bce 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -278,7 +278,7 @@ To avoid similar mistakes in the future the ``datasette.permission_allowed()`` m Permission checks now consider opinions from every plugin ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``datasette.permission_allowed()`` method previously consulted every plugin that implemented the :ref:`permission_allowed() ` plugin hook and obeyed the opinion of the last plugin to return a value. (:issue:`2275`) +The ``datasette.permission_allowed()`` method previously consulted every plugin that implemented the ``permission_allowed()`` plugin hook and obeyed the opinion of the last plugin to return a value. (:issue:`2275`) Datasette now consults every plugin and checks to see if any of them returned ``False`` (the veto rule), and if none of them did, it then checks to see if any of them returned ``True``. @@ -1397,7 +1397,7 @@ You can use the new ``"allow"`` block syntax in ``metadata.json`` (or ``metadata See :ref:`authentication_permissions_allow` for more details. -Plugins can implement their own custom permission checks using the new :ref:`plugin_hook_permission_allowed` hook. +Plugins can implement their own custom permission checks using the new ``plugin_hook_permission_allowed()`` plugin hook. A new debug page at ``/-/permissions`` shows recent permission checks, to help administrators and plugin authors understand exactly what checks are being performed. This tool defaults to only being available to the root user, but can be exposed to other users by plugins that respond to the ``permissions-debug`` permission. (:issue:`788`) From 32a425868cd6b58c66d9e255fd59017be0cd34c4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Nov 2025 06:07:16 -0800 Subject: [PATCH 050/299] Bump black from 25.9.0 to 25.11.0 in the python-packages group (#2590) Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). Updates `black` from 25.9.0 to 25.11.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/25.9.0...25.11.0) --- updated-dependencies: - dependency-name: black dependency-version: 25.11.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1395ce82..4f487458 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ test = [ "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", "beautifulsoup4>=4.8.1", - "black==25.9.0", + "black==25.11.0", "blacken-docs==1.20.0", "pytest-timeout>=1.4.2", "trustme>=0.7", From 23a640d38bebd55d9cc3b13a83ef6bc89d717fab Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 12 Nov 2025 16:14:21 -0800 Subject: [PATCH 051/299] datasette serve --default-deny option (#2593) Closes #2592 --- datasette/app.py | 2 + datasette/cli.py | 7 ++ datasette/default_permissions.py | 4 + docs/authentication.rst | 33 ++++++++ docs/cli-reference.rst | 1 + tests/test_cli.py | 1 + tests/test_default_deny.py | 129 +++++++++++++++++++++++++++++++ 7 files changed, 177 insertions(+) create mode 100644 tests/test_default_deny.py diff --git a/datasette/app.py b/datasette/app.py index 60a20032..5f2a484e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -304,6 +304,7 @@ class Datasette: crossdb=False, nolock=False, internal=None, + default_deny=False, ): self._startup_invoked = False assert config_dir is None or isinstance( @@ -512,6 +513,7 @@ class Datasette: self._permission_checks = collections.deque(maxlen=200) self._root_token = secrets.token_hex(32) self.root_enabled = False + self.default_deny = default_deny self.client = DatasetteClient(self) async def apply_metadata_json(self): diff --git a/datasette/cli.py b/datasette/cli.py index aaf1b244..21420491 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -438,6 +438,11 @@ def uninstall(packages, yes): help="Output URL that sets a cookie authenticating the root user", is_flag=True, ) +@click.option( + "--default-deny", + help="Deny all permissions by default", + is_flag=True, +) @click.option( "--get", help="Run an HTTP GET request against this path, print results and exit", @@ -514,6 +519,7 @@ def serve( settings, secret, root, + default_deny, get, headers, token, @@ -594,6 +600,7 @@ def serve( crossdb=crossdb, nolock=nolock, internal=internal, + default_deny=default_deny, ) # Separate directories from files diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 5642cdfe..12e6c1ef 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -352,6 +352,10 @@ async def default_action_permissions_sql(datasette, actor, action): With the INTERSECT-based restriction approach, these defaults are always generated and then filtered by restriction_sql if the actor has restrictions. """ + # Skip default allow rules if default_deny is enabled + if datasette.default_deny: + return None + default_allow_actions = { "view-instance", "view-database", diff --git a/docs/authentication.rst b/docs/authentication.rst index e69b0aa4..69a6f606 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -83,6 +83,39 @@ Datasette's built-in view actions (``view-database``, ``view-table`` etc) are al Other actions, including those introduced by plugins, will default to *deny*. +.. _authentication_default_deny: + +Denying all permissions by default +---------------------------------- + +By default, Datasette allows unauthenticated access to view databases, tables, and execute SQL queries. + +You may want to run Datasette in a mode where **all** access is denied by default, and you explicitly grant permissions only to authenticated users, either using the :ref:`--root mechanism ` or through :ref:`configuration file rules ` or plugins. + +Use the ``--default-deny`` command-line option to run Datasette in this mode:: + + datasette --default-deny data.db --root + +With ``--default-deny`` enabled: + +* Anonymous users are denied access to view the instance, databases, tables, and queries +* Authenticated users are also denied access unless they're explicitly granted permissions +* The root user (when using ``--root``) still has access to everything +* You can grant permissions using :ref:`configuration file rules ` or plugins + +For example, to allow only a specific user to access your instance:: + + datasette --default-deny data.db --config datasette.yaml + +Where ``datasette.yaml`` contains: + +.. code-block:: yaml + + allow: + id: alice + +This configuration will deny access to everyone except the user with ``id`` of ``alice``. + .. _authentication_permissions_explained: How permissions are resolved diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index f002d05a..7ca88c4e 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -119,6 +119,7 @@ Once started you can access it at ``http://localhost:8001`` signed cookies --root Output URL that sets a cookie authenticating the root user + --default-deny Deny all permissions by default --get TEXT Run an HTTP GET request against this path, print results and exit --headers Include HTTP headers in --get output diff --git a/tests/test_cli.py b/tests/test_cli.py index 3bb360fb..21b86569 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -142,6 +142,7 @@ def test_metadata_yaml(): settings=[], secret=None, root=False, + default_deny=False, token=None, actor=None, version_note=None, diff --git a/tests/test_default_deny.py b/tests/test_default_deny.py new file mode 100644 index 00000000..81e95b84 --- /dev/null +++ b/tests/test_default_deny.py @@ -0,0 +1,129 @@ +import pytest +from datasette.app import Datasette +from datasette.resources import DatabaseResource, TableResource + + +@pytest.mark.asyncio +async def test_default_deny_denies_default_permissions(): + """Test that default_deny=True denies default permissions""" + # Without default_deny, anonymous users can view instance/database/tables + ds_normal = Datasette() + await ds_normal.invoke_startup() + + # Add a test database + db = ds_normal.add_memory_database("test_db_normal") + await db.execute_write("create table test_table (id integer primary key)") + await ds_normal._refresh_schemas() # Trigger catalog refresh + + # Test default behavior - anonymous user should be able to view + response = await ds_normal.client.get("/") + assert response.status_code == 200 + + response = await ds_normal.client.get("/test_db_normal") + assert response.status_code == 200 + + response = await ds_normal.client.get("/test_db_normal/test_table") + assert response.status_code == 200 + + # With default_deny=True, anonymous users should be denied + ds_deny = Datasette(default_deny=True) + await ds_deny.invoke_startup() + + # Add the same test database + db = ds_deny.add_memory_database("test_db_deny") + await db.execute_write("create table test_table (id integer primary key)") + await ds_deny._refresh_schemas() # Trigger catalog refresh + + # Anonymous user should be denied + response = await ds_deny.client.get("/") + assert response.status_code == 403 + + response = await ds_deny.client.get("/test_db_deny") + assert response.status_code == 403 + + response = await ds_deny.client.get("/test_db_deny/test_table") + assert response.status_code == 403 + + +@pytest.mark.asyncio +async def test_default_deny_with_root_user(): + """Test that root user still has access when default_deny=True""" + ds = Datasette(default_deny=True) + ds.root_enabled = True + await ds.invoke_startup() + + root_actor = {"id": "root"} + + # Root user should have all permissions even with default_deny + assert await ds.allowed(action="view-instance", actor=root_actor) is True + assert ( + await ds.allowed( + action="view-database", + actor=root_actor, + resource=DatabaseResource("test_db"), + ) + is True + ) + assert ( + await ds.allowed( + action="view-table", + actor=root_actor, + resource=TableResource("test_db", "test_table"), + ) + is True + ) + assert ( + await ds.allowed( + action="execute-sql", actor=root_actor, resource=DatabaseResource("test_db") + ) + is True + ) + + +@pytest.mark.asyncio +async def test_default_deny_with_config_allow(): + """Test that config allow rules still work with default_deny=True""" + ds = Datasette(default_deny=True, config={"allow": {"id": "user1"}}) + await ds.invoke_startup() + + # Anonymous user should be denied + assert await ds.allowed(action="view-instance", actor=None) is False + + # Authenticated user with explicit permission should have access + assert await ds.allowed(action="view-instance", actor={"id": "user1"}) is True + + # Different user should be denied + assert await ds.allowed(action="view-instance", actor={"id": "user2"}) is False + + +@pytest.mark.asyncio +async def test_default_deny_basic_permissions(): + """Test that default_deny=True denies basic permissions""" + ds = Datasette(default_deny=True) + await ds.invoke_startup() + + # Anonymous user should be denied all default permissions + assert await ds.allowed(action="view-instance", actor=None) is False + assert ( + await ds.allowed( + action="view-database", actor=None, resource=DatabaseResource("test_db") + ) + is False + ) + assert ( + await ds.allowed( + action="view-table", + actor=None, + resource=TableResource("test_db", "test_table"), + ) + is False + ) + assert ( + await ds.allowed( + action="execute-sql", actor=None, resource=DatabaseResource("test_db") + ) + is False + ) + + # Authenticated user without explicit permission should also be denied + assert await ds.allowed(action="view-instance", actor={"id": "user"}) is False From 5125bef5735c0823b72b27088cb11a189502e323 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 09:56:06 -0800 Subject: [PATCH 052/299] datasette.in_client() method, closes #2594 --- datasette/app.py | 63 ++++++++++++----- docs/internals.rst | 22 ++++++ tests/test_internals_datasette_client.py | 86 ++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 18 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 5f2a484e..a5efdad5 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2,6 +2,7 @@ from __future__ import annotations from asgi_csrf import Errors import asyncio +import contextvars from typing import TYPE_CHECKING, Any, Dict, Iterable, List if TYPE_CHECKING: @@ -130,6 +131,22 @@ from .resources import DatabaseResource, TableResource app_root = Path(__file__).parent.parent +# Context variable to track when code is executing within a datasette.client request +_in_datasette_client = contextvars.ContextVar("in_datasette_client", default=False) + + +class _DatasetteClientContext: + """Context manager to mark code as executing within a datasette.client request.""" + + def __enter__(self): + self.token = _in_datasette_client.set(True) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + _in_datasette_client.reset(self.token) + return False + + @dataclasses.dataclass class PermissionCheck: """Represents a logged permission check for debugging purposes.""" @@ -666,6 +683,14 @@ class Datasette: def unsign(self, signed, namespace="default"): return URLSafeSerializer(self._secret, namespace).loads(signed) + def in_client(self) -> bool: + """Check if the current code is executing within a datasette.client request. + + Returns: + bool: True if currently executing within a datasette.client request, False otherwise. + """ + return _in_datasette_client.get() + def create_token( self, actor_id: str, @@ -2406,19 +2431,20 @@ class DatasetteClient: async def _request(self, method, path, skip_permission_checks=False, **kwargs): from datasette.permissions import SkipPermissions - if skip_permission_checks: - with SkipPermissions(): + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) + else: async with httpx.AsyncClient( transport=httpx.ASGITransport(app=self.app), cookies=kwargs.pop("cookies", None), ) as client: return await getattr(client, method)(self._fix(path), **kwargs) - else: - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await getattr(client, method)(self._fix(path), **kwargs) async def get(self, path, skip_permission_checks=False, **kwargs): return await self._request( @@ -2470,8 +2496,17 @@ class DatasetteClient: from datasette.permissions import SkipPermissions avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) - if skip_permission_checks: - with SkipPermissions(): + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) + else: async with httpx.AsyncClient( transport=httpx.ASGITransport(app=self.app), cookies=kwargs.pop("cookies", None), @@ -2479,11 +2514,3 @@ class DatasetteClient: return await client.request( method, self._fix(path, avoid_path_rewrites), **kwargs ) - else: - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await client.request( - method, self._fix(path, avoid_path_rewrites), **kwargs - ) diff --git a/docs/internals.rst b/docs/internals.rst index 2e01a8e8..09fb7572 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1077,6 +1077,28 @@ This parameter works with all HTTP methods (``get``, ``post``, ``put``, ``patch` Use ``skip_permission_checks=True`` with caution. It completely bypasses Datasette's permission system and should only be used in trusted plugin code or internal operations where you need guaranteed access to resources. +Detecting internal client requests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``datasette.in_client()`` - returns bool + Returns ``True`` if the current code is executing within a ``datasette.client`` request, ``False`` otherwise. + +This method is useful for plugins that need to behave differently when called through ``datasette.client`` versus when handling external HTTP requests. + +Example usage: + +.. code-block:: python + + async def fetch_documents(datasette): + if not datasette.in_client(): + return Response.text( + "Only available via internal client requests", + status=403 + ) + ... + +Note that ``datasette.in_client()`` is independent of ``skip_permission_checks``. A request made through ``datasette.client`` will always have ``in_client()`` return ``True``, regardless of whether ``skip_permission_checks`` is set. + .. _internals_datasette_urls: datasette.urls diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index a15d294f..b254c5e4 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -227,3 +227,89 @@ async def test_skip_permission_checks_shows_denied_tables(): table_names = [match["name"] for match in data["matches"]] # Should see fixtures tables when permission checks are skipped assert "fixtures: test_table" in table_names + + +@pytest.mark.asyncio +async def test_in_client_returns_false_outside_request(datasette): + """Test that datasette.in_client() returns False outside of a client request""" + assert datasette.in_client() is False + + +@pytest.mark.asyncio +async def test_in_client_returns_true_inside_request(): + """Test that datasette.in_client() returns True inside a client request""" + from datasette import hookimpl, Response + from datasette.plugins import pm + + class TestPlugin: + __name__ = "test_in_client_plugin" + + @hookimpl + def register_routes(self): + async def test_view(datasette): + # Assert in_client() returns True within the view + assert datasette.in_client() is True + return Response.json({"in_client": datasette.in_client()}) + + return [ + (r"^/-/test-in-client$", test_view), + ] + + pm.register(TestPlugin(), name="test_in_client_plugin") + try: + ds = Datasette() + await ds.invoke_startup() + + # Outside of a client request, should be False + assert ds.in_client() is False + + # Make a request via datasette.client + response = await ds.client.get("/-/test-in-client") + assert response.status_code == 200 + assert response.json()["in_client"] is True + + # After the request, should be False again + assert ds.in_client() is False + finally: + pm.unregister(name="test_in_client_plugin") + + +@pytest.mark.asyncio +async def test_in_client_with_skip_permission_checks(): + """Test that in_client() works regardless of skip_permission_checks value""" + from datasette import hookimpl + from datasette.plugins import pm + from datasette.utils.asgi import Response + + in_client_values = [] + + class TestPlugin: + __name__ = "test_in_client_skip_plugin" + + @hookimpl + def register_routes(self): + async def test_view(datasette): + in_client_values.append(datasette.in_client()) + return Response.json({"in_client": datasette.in_client()}) + + return [ + (r"^/-/test-in-client$", test_view), + ] + + pm.register(TestPlugin(), name="test_in_client_skip_plugin") + try: + ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) + await ds.invoke_startup() + + # Request without skip_permission_checks + await ds.client.get("/-/test-in-client") + # Request with skip_permission_checks=True + await ds.client.get("/-/test-in-client", skip_permission_checks=True) + + # Both should have detected in_client as True + assert ( + len(in_client_values) == 2 + ), f"Expected 2 values, got {len(in_client_values)}" + assert all(in_client_values), f"Expected all True, got {in_client_values}" + finally: + pm.unregister(name="test_in_client_skip_plugin") From 4b4add4d311ce9c8b3e6b08b2f81db1bbd9cbf7e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 10:31:03 -0800 Subject: [PATCH 053/299] datasette.pm property, closes #2595 --- datasette/app.py | 16 +++++++++- datasette/plugins.py | 19 +++++++----- docs/internals.rst | 2 +- docs/testing_plugins.rst | 9 +++--- tests/test_actions_sql.py | 25 ++++++++-------- tests/test_allowed_resources.py | 25 ++++++++-------- tests/test_docs_plugins.py | 8 ++--- tests/test_internals_datasette_client.py | 18 +++++------ tests/test_permission_endpoints.py | 10 +++---- tests/test_plugins.py | 38 ++++++++++++------------ tests/test_restriction_sql.py | 20 ++++++------- 11 files changed, 101 insertions(+), 89 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index a5efdad5..2d8283a4 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -631,6 +631,17 @@ class Datasette: def urls(self): return Urls(self) + @property + def pm(self): + """ + Return the global plugin manager instance. + + This provides access to the pluggy PluginManager that manages all + Datasette plugins and hooks. Use datasette.pm.hook.hook_name() to + call plugin hooks. + """ + return pm + async def invoke_startup(self): # This must be called for Datasette to be in a usable state if self._startup_invoked: @@ -2415,7 +2426,10 @@ class DatasetteClient: def __init__(self, ds): self.ds = ds - self.app = ds.app() + + @property + def app(self): + return self.ds.app() def actor_cookie(self, actor): # Utility method, mainly for tests diff --git a/datasette/plugins.py b/datasette/plugins.py index 392ab60d..e9818885 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -94,21 +94,24 @@ def get_plugins(): for plugin in pm.get_plugins(): static_path = None templates_path = None - if plugin.__name__ not in DEFAULT_PLUGINS: + plugin_name = ( + plugin.__name__ + if hasattr(plugin, "__name__") + else plugin.__class__.__name__ + ) + if plugin_name not in DEFAULT_PLUGINS: try: - if (importlib_resources.files(plugin.__name__) / "static").is_dir(): - static_path = str( - importlib_resources.files(plugin.__name__) / "static" - ) - if (importlib_resources.files(plugin.__name__) / "templates").is_dir(): + if (importlib_resources.files(plugin_name) / "static").is_dir(): + static_path = str(importlib_resources.files(plugin_name) / "static") + if (importlib_resources.files(plugin_name) / "templates").is_dir(): templates_path = str( - importlib_resources.files(plugin.__name__) / "templates" + importlib_resources.files(plugin_name) / "templates" ) except (TypeError, ModuleNotFoundError): # Caused by --plugins_dir= plugins pass plugin_info = { - "name": plugin.__name__, + "name": plugin_name, "static_path": static_path, "templates_path": templates_path, "hooks": [h.name for h in pm.get_hookcallers(plugin)], diff --git a/docs/internals.rst b/docs/internals.rst index 09fb7572..09d45c90 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1093,7 +1093,7 @@ Example usage: if not datasette.in_client(): return Response.text( "Only available via internal client requests", - status=403 + status=403, ) ... diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index e4fad500..fc1aa6f6 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -283,13 +283,12 @@ Here's a test for that plugin that mocks the HTTPX outbound request: Registering a plugin for the duration of a test ----------------------------------------------- -When writing tests for plugins you may find it useful to register a test plugin just for the duration of a single test. You can do this using ``pm.register()`` and ``pm.unregister()`` like this: +When writing tests for plugins you may find it useful to register a test plugin just for the duration of a single test. You can do this using ``datasette.pm.register()`` and ``datasette.pm.unregister()`` like this: .. code-block:: python from datasette import hookimpl from datasette.app import Datasette - from datasette.plugins import pm import pytest @@ -305,14 +304,14 @@ When writing tests for plugins you may find it useful to register a test plugin (r"^/error$", lambda: 1 / 0), ] - pm.register(TestPlugin(), name="undo") + datasette = Datasette() try: # The test implementation goes here - datasette = Datasette() + datasette.pm.register(TestPlugin(), name="undo") response = await datasette.client.get("/error") assert response.status_code == 500 finally: - pm.unregister(name="undo") + datasette.pm.unregister(name="undo") To reuse the same temporary plugin in multiple tests, you can register it inside a fixture in your ``conftest.py`` file like this: diff --git a/tests/test_actions_sql.py b/tests/test_actions_sql.py index 734a427d..863d2529 100644 --- a/tests/test_actions_sql.py +++ b/tests/test_actions_sql.py @@ -11,7 +11,6 @@ These tests verify: import pytest import pytest_asyncio from datasette.app import Datasette -from datasette.plugins import pm from datasette.permissions import PermissionSQL from datasette.resources import TableResource from datasette import hookimpl @@ -67,7 +66,7 @@ async def test_allowed_resources_global_allow(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: # Use the new allowed_resources() method @@ -87,7 +86,7 @@ async def test_allowed_resources_global_allow(test_ds): assert ("production", "orders") in table_set finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -106,7 +105,7 @@ async def test_allowed_specific_resource(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "bob", "role": "analyst"} @@ -130,7 +129,7 @@ async def test_allowed_specific_resource(test_ds): ) finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -148,7 +147,7 @@ async def test_allowed_resources_include_reasons(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: # Use allowed_resources with include_reasons to get debugging info @@ -170,7 +169,7 @@ async def test_allowed_resources_include_reasons(test_ds): assert "analyst access" in reasons_text finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -190,7 +189,7 @@ async def test_child_deny_overrides_parent_allow(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "bob", "role": "analyst"} @@ -219,7 +218,7 @@ async def test_child_deny_overrides_parent_allow(test_ds): ) finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -239,7 +238,7 @@ async def test_child_allow_overrides_parent_deny(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "carol"} @@ -264,7 +263,7 @@ async def test_child_allow_overrides_parent_deny(test_ds): ) finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -288,7 +287,7 @@ async def test_sql_does_filtering_not_python(test_ds): return PermissionSQL(sql=sql) plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "dave"} @@ -314,4 +313,4 @@ async def test_sql_does_filtering_not_python(test_ds): assert tables[0].child == "users" finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") diff --git a/tests/test_allowed_resources.py b/tests/test_allowed_resources.py index cecffbe2..0cd48ea9 100644 --- a/tests/test_allowed_resources.py +++ b/tests/test_allowed_resources.py @@ -8,7 +8,6 @@ based on permission rules from plugins and configuration. import pytest import pytest_asyncio from datasette.app import Datasette -from datasette.plugins import pm from datasette.permissions import PermissionSQL from datasette import hookimpl @@ -62,7 +61,7 @@ async def test_tables_endpoint_global_access(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: # Use the allowed_resources API directly @@ -87,7 +86,7 @@ async def test_tables_endpoint_global_access(test_ds): assert "production/orders" in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -102,7 +101,7 @@ async def test_tables_endpoint_database_restriction(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources( @@ -130,7 +129,7 @@ async def test_tables_endpoint_database_restriction(test_ds): # Note: default_permissions.py provides default allows, so we just check analytics are present finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -149,7 +148,7 @@ async def test_tables_endpoint_table_exception(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources("view-table", {"id": "carol"}) @@ -172,7 +171,7 @@ async def test_tables_endpoint_table_exception(test_ds): assert "analytics/sensitive" not in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -191,7 +190,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources( @@ -214,7 +213,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds): assert "analytics/sensitive" not in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -257,7 +256,7 @@ async def test_tables_endpoint_specific_table_only(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources("view-table", {"id": "dave"}) @@ -280,7 +279,7 @@ async def test_tables_endpoint_specific_table_only(test_ds): assert "production/orders" in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -295,7 +294,7 @@ async def test_tables_endpoint_empty_result(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources("view-table", {"id": "blocked"}) @@ -311,7 +310,7 @@ async def test_tables_endpoint_empty_result(test_ds): assert len(result) == 0 finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio diff --git a/tests/test_docs_plugins.py b/tests/test_docs_plugins.py index 92b4514c..c51858d3 100644 --- a/tests/test_docs_plugins.py +++ b/tests/test_docs_plugins.py @@ -2,7 +2,6 @@ # -- start datasette_with_plugin_fixture -- from datasette import hookimpl from datasette.app import Datasette -from datasette.plugins import pm import pytest import pytest_asyncio @@ -18,11 +17,12 @@ async def datasette_with_plugin(): (r"^/error$", lambda: 1 / 0), ] - pm.register(TestPlugin(), name="undo") + datasette = Datasette() + datasette.pm.register(TestPlugin(), name="undo") try: - yield Datasette() + yield datasette finally: - pm.unregister(name="undo") + datasette.pm.unregister(name="undo") # -- end datasette_with_plugin_fixture -- diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index b254c5e4..326fcdc0 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -239,7 +239,6 @@ async def test_in_client_returns_false_outside_request(datasette): async def test_in_client_returns_true_inside_request(): """Test that datasette.in_client() returns True inside a client request""" from datasette import hookimpl, Response - from datasette.plugins import pm class TestPlugin: __name__ = "test_in_client_plugin" @@ -255,10 +254,10 @@ async def test_in_client_returns_true_inside_request(): (r"^/-/test-in-client$", test_view), ] - pm.register(TestPlugin(), name="test_in_client_plugin") + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(TestPlugin(), name="test_in_client_plugin") try: - ds = Datasette() - await ds.invoke_startup() # Outside of a client request, should be False assert ds.in_client() is False @@ -271,14 +270,13 @@ async def test_in_client_returns_true_inside_request(): # After the request, should be False again assert ds.in_client() is False finally: - pm.unregister(name="test_in_client_plugin") + ds.pm.unregister(name="test_in_client_plugin") @pytest.mark.asyncio async def test_in_client_with_skip_permission_checks(): """Test that in_client() works regardless of skip_permission_checks value""" from datasette import hookimpl - from datasette.plugins import pm from datasette.utils.asgi import Response in_client_values = [] @@ -296,10 +294,10 @@ async def test_in_client_with_skip_permission_checks(): (r"^/-/test-in-client$", test_view), ] - pm.register(TestPlugin(), name="test_in_client_skip_plugin") + ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) + await ds.invoke_startup() + ds.pm.register(TestPlugin(), name="test_in_client_skip_plugin") try: - ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) - await ds.invoke_startup() # Request without skip_permission_checks await ds.client.get("/-/test-in-client") @@ -312,4 +310,4 @@ async def test_in_client_with_skip_permission_checks(): ), f"Expected 2 values, got {len(in_client_values)}" assert all(in_client_values), f"Expected all True, got {in_client_values}" finally: - pm.unregister(name="test_in_client_skip_plugin") + ds.pm.unregister(name="test_in_client_skip_plugin") diff --git a/tests/test_permission_endpoints.py b/tests/test_permission_endpoints.py index d7b7bf07..84f3370f 100644 --- a/tests/test_permission_endpoints.py +++ b/tests/test_permission_endpoints.py @@ -439,7 +439,6 @@ async def test_execute_sql_requires_view_database(): be able to execute SQL on that database. """ from datasette.permissions import PermissionSQL - from datasette.plugins import pm from datasette import hookimpl class TestPermissionPlugin: @@ -464,11 +463,12 @@ async def test_execute_sql_requires_view_database(): return [] plugin = TestPermissionPlugin() - pm.register(plugin, name="test_plugin") + + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(plugin, name="test_plugin") try: - ds = Datasette() - await ds.invoke_startup() ds.add_memory_database("secret") await ds.refresh_schemas() @@ -498,4 +498,4 @@ async def test_execute_sql_requires_view_database(): f"but got {response.status_code}" ) finally: - pm.unregister(plugin) + ds.pm.unregister(plugin) diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 4a8c60d7..42995c0d 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -691,7 +691,7 @@ async def test_hook_permission_resources_sql(): await ds.invoke_startup() collected = [] - for block in pm.hook.permission_resources_sql( + for block in ds.pm.hook.permission_resources_sql( datasette=ds, actor={"id": "alice"}, action="view-table", @@ -1161,12 +1161,12 @@ async def test_hook_filters_from_request(ds_client): if request.args.get("_nothing"): return FilterArguments(["1 = 0"], human_descriptions=["NOTHING"]) - pm.register(ReturnNothingPlugin(), name="ReturnNothingPlugin") + ds_client.ds.pm.register(ReturnNothingPlugin(), name="ReturnNothingPlugin") response = await ds_client.get("/fixtures/facetable?_nothing=1") assert "0 rows\n where NOTHING" in response.text json_response = await ds_client.get("/fixtures/facetable.json?_nothing=1") assert json_response.json()["rows"] == [] - pm.unregister(name="ReturnNothingPlugin") + ds_client.ds.pm.unregister(name="ReturnNothingPlugin") @pytest.mark.asyncio @@ -1327,7 +1327,7 @@ async def test_hook_actors_from_ids(): return inner try: - pm.register(ActorsFromIdsPlugin(), name="ActorsFromIdsPlugin") + ds.pm.register(ActorsFromIdsPlugin(), name="ActorsFromIdsPlugin") actors2 = await ds.actors_from_ids(["3", "5", "7"]) assert actors2 == { "3": {"id": "3", "name": "Cate Blanchett"}, @@ -1335,7 +1335,7 @@ async def test_hook_actors_from_ids(): "7": {"id": "7", "name": "Sarah Paulson"}, } finally: - pm.unregister(name="ReturnNothingPlugin") + ds.pm.unregister(name="ReturnNothingPlugin") @pytest.mark.asyncio @@ -1350,14 +1350,14 @@ async def test_plugin_is_installed(): return {} try: - pm.register(DummyPlugin(), name="DummyPlugin") + datasette.pm.register(DummyPlugin(), name="DummyPlugin") response = await datasette.client.get("/-/plugins.json") assert response.status_code == 200 installed_plugins = {p["name"] for p in response.json()} assert "DummyPlugin" in installed_plugins finally: - pm.unregister(name="DummyPlugin") + datasette.pm.unregister(name="DummyPlugin") @pytest.mark.asyncio @@ -1384,7 +1384,7 @@ async def test_hook_jinja2_environment_from_request(tmpdir): datasette = Datasette(memory=True) try: - pm.register(EnvironmentPlugin(), name="EnvironmentPlugin") + datasette.pm.register(EnvironmentPlugin(), name="EnvironmentPlugin") response = await datasette.client.get("/") assert response.status_code == 200 assert "Hello museums!" not in response.text @@ -1395,7 +1395,7 @@ async def test_hook_jinja2_environment_from_request(tmpdir): assert response2.status_code == 200 assert "Hello museums!" in response2.text finally: - pm.unregister(name="EnvironmentPlugin") + datasette.pm.unregister(name="EnvironmentPlugin") class SlotPlugin: @@ -1433,48 +1433,48 @@ class SlotPlugin: @pytest.mark.asyncio async def test_hook_top_homepage(): + datasette = Datasette(memory=True) try: - pm.register(SlotPlugin(), name="SlotPlugin") - datasette = Datasette(memory=True) + datasette.pm.register(SlotPlugin(), name="SlotPlugin") response = await datasette.client.get("/?z=foo") assert response.status_code == 200 assert "Xtop_homepage:foo" in response.text finally: - pm.unregister(name="SlotPlugin") + datasette.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio async def test_hook_top_database(): + datasette = Datasette(memory=True) try: - pm.register(SlotPlugin(), name="SlotPlugin") - datasette = Datasette(memory=True) + datasette.pm.register(SlotPlugin(), name="SlotPlugin") response = await datasette.client.get("/_memory?z=bar") assert response.status_code == 200 assert "Xtop_database:_memory:bar" in response.text finally: - pm.unregister(name="SlotPlugin") + datasette.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio async def test_hook_top_table(ds_client): try: - pm.register(SlotPlugin(), name="SlotPlugin") + ds_client.ds.pm.register(SlotPlugin(), name="SlotPlugin") response = await ds_client.get("/fixtures/facetable?z=baz") assert response.status_code == 200 assert "Xtop_table:fixtures:facetable:baz" in response.text finally: - pm.unregister(name="SlotPlugin") + ds_client.ds.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio async def test_hook_top_row(ds_client): try: - pm.register(SlotPlugin(), name="SlotPlugin") + ds_client.ds.pm.register(SlotPlugin(), name="SlotPlugin") response = await ds_client.get("/fixtures/facet_cities/1?z=bax") assert response.status_code == 200 assert "Xtop_row:fixtures:facet_cities:San Francisco:bax" in response.text finally: - pm.unregister(name="SlotPlugin") + ds_client.ds.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio diff --git a/tests/test_restriction_sql.py b/tests/test_restriction_sql.py index 7d6d8a5a..f23eb839 100644 --- a/tests/test_restriction_sql.py +++ b/tests/test_restriction_sql.py @@ -13,7 +13,6 @@ async def test_multiple_restriction_sources_intersect(): provide restriction_sql - both must pass for access to be granted. """ from datasette import hookimpl - from datasette.plugins import pm class RestrictivePlugin: __name__ = "RestrictivePlugin" @@ -29,11 +28,12 @@ async def test_multiple_restriction_sources_intersect(): return None plugin = RestrictivePlugin() - pm.register(plugin, name="restrictive_plugin") + + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(plugin, name="restrictive_plugin") try: - ds = Datasette() - await ds.invoke_startup() db1 = ds.add_memory_database("db1_multi_intersect") db2 = ds.add_memory_database("db2_multi_intersect") await db1.execute_write("CREATE TABLE t1 (id INTEGER)") @@ -55,7 +55,7 @@ async def test_multiple_restriction_sources_intersect(): assert ("db1_multi_intersect", "t1") in resources assert ("db2_multi_intersect", "t1") not in resources finally: - pm.unregister(name="restrictive_plugin") + ds.pm.unregister(name="restrictive_plugin") @pytest.mark.asyncio @@ -265,7 +265,6 @@ async def test_permission_resources_sql_multiple_restriction_sources_intersect() provide restriction_sql - both must pass for access to be granted. """ from datasette import hookimpl - from datasette.plugins import pm class RestrictivePlugin: __name__ = "RestrictivePlugin" @@ -281,11 +280,12 @@ async def test_permission_resources_sql_multiple_restriction_sources_intersect() return None plugin = RestrictivePlugin() - pm.register(plugin, name="restrictive_plugin") + + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(plugin, name="restrictive_plugin") try: - ds = Datasette() - await ds.invoke_startup() db1 = ds.add_memory_database("db1_multi_restrictions") db2 = ds.add_memory_database("db2_multi_restrictions") await db1.execute_write("CREATE TABLE t1 (id INTEGER)") @@ -312,4 +312,4 @@ async def test_permission_resources_sql_multiple_restriction_sources_intersect() assert ("db1_multi_restrictions", "t1") in resources assert ("db2_multi_restrictions", "t1") not in resources finally: - pm.unregister(name="restrictive_plugin") + ds.pm.unregister(name="restrictive_plugin") From 93b455239a4063c80d52da795db700c6a88e4d16 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 10:40:24 -0800 Subject: [PATCH 054/299] Release notes for 1.0a22, closes #2596 --- docs/changelog.rst | 9 +++++++++ docs/internals.rst | 2 ++ 2 files changed, 11 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 66d46bce..feba9390 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,15 @@ Changelog ========= +.. _v1_0_a22: + +1.0a22 (2025-11-13) +------------------- + +- ``datasette serve --default-deny`` option for running Datasette configured to :ref:`deny all permissions by default `. (:issue:`2592`) +- ``datasette.is_client()`` method for detecting if code is :ref:`executing inside a datasette.client request `. (:issue:`2594`) +- ``datasette.pm`` property can now be used to :ref:`register and unregister plugins in tests `. (:issue:`2595`) + .. _v1_0_a21: 1.0a21 (2025-11-05) diff --git a/docs/internals.rst b/docs/internals.rst index 09d45c90..cfd78593 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1077,6 +1077,8 @@ This parameter works with all HTTP methods (``get``, ``post``, ``put``, ``patch` Use ``skip_permission_checks=True`` with caution. It completely bypasses Datasette's permission system and should only be used in trusted plugin code or internal operations where you need guaranteed access to resources. +.. _internals_datasette_is_client: + Detecting internal client requests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 2125115cd9b609def872cd8051912ac80179f510 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 10:41:02 -0800 Subject: [PATCH 055/299] Release 1.0a22 Refs #2592, #2594, #2595, #2596 --- datasette/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 01f00fcd..d0ff6ab1 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a21" +__version__ = "1.0a22" __version_info__ = tuple(__version__.split(".")) From 68f1179bac991b5e37b99a5482c40134f317c04f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 26 Nov 2025 17:12:52 -0800 Subject: [PATCH 056/299] Fix for text None shown on /-/actions, closes #2599 --- datasette/templates/debug_actions.html | 2 +- tests/test_html.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/datasette/templates/debug_actions.html b/datasette/templates/debug_actions.html index 6dd5ac0e..0ef7b329 100644 --- a/datasette/templates/debug_actions.html +++ b/datasette/templates/debug_actions.html @@ -31,7 +31,7 @@

- + diff --git a/tests/test_html.py b/tests/test_html.py index 35b839ec..7b667301 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1194,6 +1194,21 @@ async def test_actions_page(ds_client): ds_client.ds.root_enabled = original_root_enabled +@pytest.mark.asyncio +async def test_actions_page_does_not_display_none_string(ds_client): + """Ensure the Resource column doesn't display the string 'None' for null values.""" + # https://github.com/simonw/datasette/issues/2599 + original_root_enabled = ds_client.ds.root_enabled + try: + ds_client.ds.root_enabled = True + cookies = {"ds_actor": ds_client.actor_cookie({"id": "root"})} + response = await ds_client.get("/-/actions", cookies=cookies) + assert response.status_code == 200 + assert "None" not in response.text + finally: + ds_client.ds.root_enabled = original_root_enabled + + @pytest.mark.asyncio async def test_permission_debug_tabs_with_query_string(ds_client): """Test that navigation tabs persist query strings across Check, Allowed, and Rules pages""" From c6c2a238c3e890384eef6bf9bca062fd784d9157 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 16:22:42 -0800 Subject: [PATCH 057/299] Fix for stale internal database bug, closes #2605 --- datasette/utils/internal_db.py | 3 +++ tests/test_internal_db.py | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index a3afbab2..587ea7b1 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -116,6 +116,9 @@ async def populate_schema_tables(internal_db, db): database_name = db.name def delete_everything(conn): + conn.execute( + "DELETE FROM catalog_databases WHERE database_name = ?", [database_name] + ) conn.execute( "DELETE FROM catalog_tables WHERE database_name = ?", [database_name] ) diff --git a/tests/test_internal_db.py b/tests/test_internal_db.py index 59516225..7a0d1630 100644 --- a/tests/test_internal_db.py +++ b/tests/test_internal_db.py @@ -91,3 +91,51 @@ async def test_internal_foreign_key_references(ds_client): ) await internal_db.execute_fn(inner) + + +@pytest.mark.asyncio +async def test_stale_catalog_entry_database_fix(tmp_path): + """ + Test for https://github.com/simonw/datasette/issues/2605 + + When the internal database persists across restarts and has entries in + catalog_databases for databases that no longer exist, accessing the + index page should not cause a 500 error (KeyError). + """ + from datasette.app import Datasette + + internal_db_path = str(tmp_path / "internal.db") + data_db_path = str(tmp_path / "data.db") + + # Create a data database file + import sqlite3 + + conn = sqlite3.connect(data_db_path) + conn.execute("CREATE TABLE test_table (id INTEGER PRIMARY KEY)") + conn.close() + + # First Datasette instance: with the data database and persistent internal db + ds1 = Datasette(files=[data_db_path], internal=internal_db_path) + await ds1.invoke_startup() + + # Access the index page to populate the internal catalog + response = await ds1.client.get("/") + assert "data" in ds1.databases + assert response.status_code == 200 + + # Second Datasette instance: reusing internal.db but WITHOUT the data database + # This simulates restarting Datasette after removing a database + ds2 = Datasette(internal=internal_db_path) + await ds2.invoke_startup() + + # The database is not in ds2.databases + assert "data" not in ds2.databases + + # Accessing the index page should NOT cause a 500 error + # This is the bug: it currently raises KeyError when trying to + # access ds.databases["data"] for the stale catalog entry + response = await ds2.client.get("/") + assert response.status_code == 200, ( + f"Index page should return 200, not {response.status_code}. " + "This fails due to stale catalog entries causing KeyError." + ) From 170b3ff61c1c7bc49b999ecbe43853af9727f2f1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:00:13 -0800 Subject: [PATCH 058/299] Better fix for stale catalog_databases, closes #2606 Refs 2605 --- datasette/app.py | 9 +++++++++ datasette/utils/internal_db.py | 3 --- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 2d8283a4..b9955925 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -606,6 +606,15 @@ class Datasette: "select database_name, schema_version from catalog_databases" ) } + # Delete stale entries for databases that are no longer attached + stale_databases = set(current_schema_versions.keys()) - set( + self.databases.keys() + ) + for stale_db_name in stale_databases: + await internal_db.execute_write( + "DELETE FROM catalog_databases WHERE database_name = ?", + [stale_db_name], + ) for database_name, db in self.databases.items(): schema_version = (await db.execute("PRAGMA schema_version")).first()[0] # Compare schema versions to see if we should skip it diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index 587ea7b1..a3afbab2 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -116,9 +116,6 @@ async def populate_schema_tables(internal_db, db): database_name = db.name def delete_everything(conn): - conn.execute( - "DELETE FROM catalog_databases WHERE database_name = ?", [database_name] - ) conn.execute( "DELETE FROM catalog_tables WHERE database_name = ?", [database_name] ) From 0a924524be06a331f20d2e1314ec82370995630b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:11:31 -0800 Subject: [PATCH 059/299] Split default_permissions.py into a package (#2603) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Split default_permissions.py into a package, refs #2602 * Remove unused is_resource_allowed() method, improve test coverage - Remove dead code: is_resource_allowed() method was never called - Change isinstance check to assertion with error message - Add test cases for table-level restrictions in restrictions_allow_action() - Coverage for restrictions.py improved from 79% to 99% 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * Additional permission test for gap spotted by coverage --- datasette/default_permissions.py | 494 ------------------ datasette/default_permissions/__init__.py | 59 +++ datasette/default_permissions/config.py | 442 ++++++++++++++++ datasette/default_permissions/defaults.py | 70 +++ datasette/default_permissions/helpers.py | 85 +++ datasette/default_permissions/restrictions.py | 195 +++++++ datasette/default_permissions/root.py | 29 + datasette/default_permissions/tokens.py | 95 ++++ tests/test_permissions.py | 59 +++ 9 files changed, 1034 insertions(+), 494 deletions(-) delete mode 100644 datasette/default_permissions.py create mode 100644 datasette/default_permissions/__init__.py create mode 100644 datasette/default_permissions/config.py create mode 100644 datasette/default_permissions/defaults.py create mode 100644 datasette/default_permissions/helpers.py create mode 100644 datasette/default_permissions/restrictions.py create mode 100644 datasette/default_permissions/root.py create mode 100644 datasette/default_permissions/tokens.py diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py deleted file mode 100644 index 12e6c1ef..00000000 --- a/datasette/default_permissions.py +++ /dev/null @@ -1,494 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette import hookimpl -from datasette.permissions import PermissionSQL -from datasette.utils import actor_matches_allow -import itsdangerous -import time - - -@hookimpl(specname="permission_resources_sql") -async def actor_restrictions_sql(datasette, actor, action): - """Handle actor restriction-based permission rules (_r key).""" - if not actor: - return None - - restrictions = actor.get("_r") if isinstance(actor, dict) else None - if restrictions is None: - return [] - - # Check if this action appears in restrictions (with abbreviations) - action_obj = datasette.actions.get(action) - action_checks = {action} - if action_obj and action_obj.abbr: - action_checks.add(action_obj.abbr) - - # Check if globally allowed in restrictions - global_actions = restrictions.get("a", []) - is_globally_allowed = action_checks.intersection(global_actions) - - if is_globally_allowed: - # Globally allowed - no restriction filtering needed - return [] - - # Not globally allowed - build restriction_sql that lists allowlisted resources - restriction_selects = [] - restriction_params = {} - param_counter = 0 - - # Add database-level allowlisted resources - db_restrictions = restrictions.get("d", {}) - for db_name, db_actions in db_restrictions.items(): - if action_checks.intersection(db_actions): - prefix = f"restr_{param_counter}" - param_counter += 1 - restriction_selects.append( - f"SELECT :{prefix}_parent AS parent, NULL AS child" - ) - restriction_params[f"{prefix}_parent"] = db_name - - # Add table-level allowlisted resources - resource_restrictions = restrictions.get("r", {}) - for db_name, tables in resource_restrictions.items(): - for table_name, table_actions in tables.items(): - if action_checks.intersection(table_actions): - prefix = f"restr_{param_counter}" - param_counter += 1 - restriction_selects.append( - f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child" - ) - restriction_params[f"{prefix}_parent"] = db_name - restriction_params[f"{prefix}_child"] = table_name - - if not restriction_selects: - # Action not in allowlist - return empty restriction (INTERSECT will return no results) - return [ - PermissionSQL( - params={"deny": f"actor restrictions: {action} not in allowlist"}, - restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", # Empty set - ) - ] - - # Build restriction SQL that returns allowed (parent, child) pairs - restriction_sql = "\nUNION ALL\n".join(restriction_selects) - - # Return restriction-only PermissionSQL (sql=None means no permission rules) - # The restriction_sql does the actual filtering via INTERSECT - return [ - PermissionSQL( - params=restriction_params, - restriction_sql=restriction_sql, - ) - ] - - -@hookimpl(specname="permission_resources_sql") -async def root_user_permissions_sql(datasette, actor, action): - """Grant root user full permissions when enabled.""" - if datasette.root_enabled and actor and actor.get("id") == "root": - # Add a single global-level allow rule (NULL, NULL) for root - # This allows root to access everything by default, but database-level - # and table-level deny rules in config can still block specific resources - return PermissionSQL.allow(reason="root user") - return None - - -@hookimpl(specname="permission_resources_sql") -async def config_permissions_sql(datasette, actor, action): - """Apply config-based permission rules from datasette.yaml.""" - config = datasette.config or {} - - def evaluate(allow_block): - if allow_block is None: - return None - return actor_matches_allow(actor, allow_block) - - has_restrictions = actor and "_r" in actor if actor else False - restrictions = actor.get("_r", {}) if actor else {} - - action_obj = datasette.actions.get(action) - action_checks = {action} - if action_obj and action_obj.abbr: - action_checks.add(action_obj.abbr) - - restricted_databases: set[str] = set() - restricted_tables: set[tuple[str, str]] = set() - if has_restrictions: - restricted_databases = { - db_name - for db_name, db_actions in (restrictions.get("d") or {}).items() - if action_checks.intersection(db_actions) - } - restricted_tables = { - (db_name, table_name) - for db_name, tables in (restrictions.get("r") or {}).items() - for table_name, table_actions in tables.items() - if action_checks.intersection(table_actions) - } - # Tables implicitly reference their parent databases - restricted_databases.update(db for db, _ in restricted_tables) - - def is_in_restriction_allowlist(parent, child, action_name): - """Check if a resource is in the actor's restriction allowlist for this action""" - if not has_restrictions: - return True # No restrictions, all resources allowed - - # Check global allowlist - if action_checks.intersection(restrictions.get("a", [])): - return True - - # Check database-level allowlist - if parent and action_checks.intersection( - restrictions.get("d", {}).get(parent, []) - ): - return True - - # Check table-level allowlist - if parent: - table_restrictions = (restrictions.get("r", {}) or {}).get(parent, {}) - if child: - table_actions = table_restrictions.get(child, []) - if action_checks.intersection(table_actions): - return True - else: - # Parent query should proceed if any child in this database is allowlisted - for table_actions in table_restrictions.values(): - if action_checks.intersection(table_actions): - return True - - # Parent/child both None: include if any restrictions exist for this action - if parent is None and child is None: - if action_checks.intersection(restrictions.get("a", [])): - return True - if restricted_databases: - return True - if restricted_tables: - return True - - return False - - rows = [] - - def add_row(parent, child, result, scope): - if result is None: - return - rows.append( - ( - parent, - child, - bool(result), - f"config {'allow' if result else 'deny'} {scope}", - ) - ) - - def add_row_allow_block(parent, child, allow_block, scope): - """For 'allow' blocks, always add a row if the block exists - deny if no match""" - if allow_block is None: - return - - # If actor has restrictions and this resource is NOT in allowlist, skip this config rule - # Restrictions act as a gating filter - config cannot grant access to restricted-out resources - if not is_in_restriction_allowlist(parent, child, action): - return - - result = evaluate(allow_block) - bool_result = bool(result) - # If result is None (no match) or False, treat as deny - rows.append( - ( - parent, - child, - bool_result, # None becomes False, False stays False, True stays True - f"config {'allow' if result else 'deny'} {scope}", - ) - ) - if has_restrictions and not bool_result and child is None: - reason = f"config deny {scope} (restriction gate)" - if parent is None: - # Root-level deny: add more specific denies for restricted resources - if action_obj and action_obj.takes_parent: - for db_name in restricted_databases: - rows.append((db_name, None, 0, reason)) - if action_obj and action_obj.takes_child: - for db_name, table_name in restricted_tables: - rows.append((db_name, table_name, 0, reason)) - else: - # Database-level deny: add child-level denies for restricted tables - if action_obj and action_obj.takes_child: - for db_name, table_name in restricted_tables: - if db_name == parent: - rows.append((db_name, table_name, 0, reason)) - - root_perm = (config.get("permissions") or {}).get(action) - add_row(None, None, evaluate(root_perm), f"permissions for {action}") - - for db_name, db_config in (config.get("databases") or {}).items(): - db_perm = (db_config.get("permissions") or {}).get(action) - add_row( - db_name, None, evaluate(db_perm), f"permissions for {action} on {db_name}" - ) - - for table_name, table_config in (db_config.get("tables") or {}).items(): - table_perm = (table_config.get("permissions") or {}).get(action) - add_row( - db_name, - table_name, - evaluate(table_perm), - f"permissions for {action} on {db_name}/{table_name}", - ) - - if action == "view-table": - table_allow = (table_config or {}).get("allow") - add_row_allow_block( - db_name, - table_name, - table_allow, - f"allow for {action} on {db_name}/{table_name}", - ) - - for query_name, query_config in (db_config.get("queries") or {}).items(): - # query_config can be a string (just SQL) or a dict (with SQL and options) - if isinstance(query_config, dict): - query_perm = (query_config.get("permissions") or {}).get(action) - add_row( - db_name, - query_name, - evaluate(query_perm), - f"permissions for {action} on {db_name}/{query_name}", - ) - if action == "view-query": - query_allow = query_config.get("allow") - add_row_allow_block( - db_name, - query_name, - query_allow, - f"allow for {action} on {db_name}/{query_name}", - ) - - if action == "view-database": - db_allow = db_config.get("allow") - add_row_allow_block( - db_name, None, db_allow, f"allow for {action} on {db_name}" - ) - - if action == "execute-sql": - db_allow_sql = db_config.get("allow_sql") - add_row_allow_block(db_name, None, db_allow_sql, f"allow_sql for {db_name}") - - if action == "view-table": - # Database-level allow block affects all tables in that database - db_allow = db_config.get("allow") - add_row_allow_block( - db_name, None, db_allow, f"allow for {action} on {db_name}" - ) - - if action == "view-query": - # Database-level allow block affects all queries in that database - db_allow = db_config.get("allow") - add_row_allow_block( - db_name, None, db_allow, f"allow for {action} on {db_name}" - ) - - # Root-level allow block applies to all view-* actions - if action == "view-instance": - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-instance") - - if action == "view-database": - # Root-level allow block also applies to view-database - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-database") - - if action == "view-table": - # Root-level allow block also applies to view-table - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-table") - - if action == "view-query": - # Root-level allow block also applies to view-query - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-query") - - if action == "execute-sql": - allow_sql = config.get("allow_sql") - add_row_allow_block(None, None, allow_sql, "allow_sql") - - if not rows: - return [] - - parts = [] - params = {} - for idx, (parent, child, allow, reason) in enumerate(rows): - key = f"cfg_{idx}" - parts.append( - f"SELECT :{key}_parent AS parent, :{key}_child AS child, :{key}_allow AS allow, :{key}_reason AS reason" - ) - params[f"{key}_parent"] = parent - params[f"{key}_child"] = child - params[f"{key}_allow"] = 1 if allow else 0 - params[f"{key}_reason"] = reason - - sql = "\nUNION ALL\n".join(parts) - return [PermissionSQL(sql=sql, params=params)] - - -@hookimpl(specname="permission_resources_sql") -async def default_allow_sql_check(datasette, actor, action): - """Enforce default_allow_sql setting for execute-sql action.""" - if action == "execute-sql" and not datasette.setting("default_allow_sql"): - return PermissionSQL.deny(reason="default_allow_sql is false") - return None - - -@hookimpl(specname="permission_resources_sql") -async def default_action_permissions_sql(datasette, actor, action): - """Apply default allow rules for standard view/execute actions. - - With the INTERSECT-based restriction approach, these defaults are always generated - and then filtered by restriction_sql if the actor has restrictions. - """ - # Skip default allow rules if default_deny is enabled - if datasette.default_deny: - return None - - default_allow_actions = { - "view-instance", - "view-database", - "view-database-download", - "view-table", - "view-query", - "execute-sql", - } - if action in default_allow_actions: - reason = f"default allow for {action}".replace("'", "''") - return PermissionSQL.allow(reason=reason) - - return None - - -def restrictions_allow_action( - datasette: "Datasette", - restrictions: dict, - action: str, - resource: str | tuple[str, str], -): - """ - Check if actor restrictions allow the requested action against the requested resource. - - Restrictions work on an exact-match basis: if an actor has view-table permission, - they can view tables, but NOT automatically view-instance or view-database. - Each permission is checked independently without implication logic. - """ - # Does this action have an abbreviation? - to_check = {action} - action_obj = datasette.actions.get(action) - if action_obj and action_obj.abbr: - to_check.add(action_obj.abbr) - - # Check if restrictions explicitly allow this action - # Restrictions can be at three levels: - # - "a": global (any resource) - # - "d": per-database - # - "r": per-table/resource - - # Check global level (any resource) - all_allowed = restrictions.get("a") - if all_allowed is not None: - assert isinstance(all_allowed, list) - if to_check.intersection(all_allowed): - return True - - # Check database level - if resource: - if isinstance(resource, str): - database_name = resource - else: - database_name = resource[0] - database_allowed = restrictions.get("d", {}).get(database_name) - if database_allowed is not None: - assert isinstance(database_allowed, list) - if to_check.intersection(database_allowed): - return True - - # Check table/resource level - if resource is not None and not isinstance(resource, str) and len(resource) == 2: - database, table = resource - table_allowed = restrictions.get("r", {}).get(database, {}).get(table) - if table_allowed is not None: - assert isinstance(table_allowed, list) - if to_check.intersection(table_allowed): - return True - - # This action is not explicitly allowed, so reject it - return False - - -@hookimpl -def actor_from_request(datasette, request): - prefix = "dstok_" - if not datasette.setting("allow_signed_tokens"): - return None - max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") - authorization = request.headers.get("authorization") - if not authorization: - return None - if not authorization.startswith("Bearer "): - return None - token = authorization[len("Bearer ") :] - if not token.startswith(prefix): - return None - token = token[len(prefix) :] - try: - decoded = datasette.unsign(token, namespace="token") - except itsdangerous.BadSignature: - return None - if "t" not in decoded: - # Missing timestamp - return None - created = decoded["t"] - if not isinstance(created, int): - # Invalid timestamp - return None - duration = decoded.get("d") - if duration is not None and not isinstance(duration, int): - # Invalid duration - return None - if (duration is None and max_signed_tokens_ttl) or ( - duration is not None - and max_signed_tokens_ttl - and duration > max_signed_tokens_ttl - ): - duration = max_signed_tokens_ttl - if duration: - if time.time() - created > duration: - # Expired - return None - actor = {"id": decoded["a"], "token": "dstok"} - if "_r" in decoded: - actor["_r"] = decoded["_r"] - if duration: - actor["token_expires"] = created + duration - return actor - - -@hookimpl -def skip_csrf(scope): - # Skip CSRF check for requests with content-type: application/json - if scope["type"] == "http": - headers = scope.get("headers") or {} - if dict(headers).get(b"content-type") == b"application/json": - return True - - -@hookimpl -def canned_queries(datasette, database, actor): - """Return canned queries from datasette configuration.""" - queries = ( - ((datasette.config or {}).get("databases") or {}).get(database) or {} - ).get("queries") or {} - return queries diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py new file mode 100644 index 00000000..4c82d705 --- /dev/null +++ b/datasette/default_permissions/__init__.py @@ -0,0 +1,59 @@ +""" +Default permission implementations for Datasette. + +This module provides the built-in permission checking logic through implementations +of the permission_resources_sql hook. The hooks are organized by their purpose: + +1. Actor Restrictions - Enforces _r allowlists embedded in actor tokens +2. Root User - Grants full access when --root flag is used +3. Config Rules - Applies permissions from datasette.yaml +4. Default Settings - Enforces default_allow_sql and default view permissions + +IMPORTANT: These hooks return PermissionSQL objects that are combined using SQL +UNION/INTERSECT operations. The order of evaluation is: + - restriction_sql fields are INTERSECTed (all must match) + - Regular sql fields are UNIONed and evaluated with cascading priority +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl + +# Re-export all hooks and public utilities +from .restrictions import ( + actor_restrictions_sql, + restrictions_allow_action, + ActorRestrictions, +) +from .root import root_user_permissions_sql +from .config import config_permissions_sql +from .defaults import ( + default_allow_sql_check, + default_action_permissions_sql, + DEFAULT_ALLOW_ACTIONS, +) +from .tokens import actor_from_signed_api_token + + +@hookimpl +def skip_csrf(scope) -> Optional[bool]: + """Skip CSRF check for JSON content-type requests.""" + if scope["type"] == "http": + headers = scope.get("headers") or {} + if dict(headers).get(b"content-type") == b"application/json": + return True + return None + + +@hookimpl +def canned_queries(datasette: "Datasette", database: str, actor) -> dict: + """Return canned queries defined in datasette.yaml configuration.""" + queries = ( + ((datasette.config or {}).get("databases") or {}).get(database) or {} + ).get("queries") or {} + return queries diff --git a/datasette/default_permissions/config.py b/datasette/default_permissions/config.py new file mode 100644 index 00000000..aab87c1c --- /dev/null +++ b/datasette/default_permissions/config.py @@ -0,0 +1,442 @@ +""" +Config-based permission handling for Datasette. + +Applies permission rules from datasette.yaml configuration. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL +from datasette.utils import actor_matches_allow + +from .helpers import PermissionRowCollector, get_action_name_variants + + +class ConfigPermissionProcessor: + """ + Processes permission rules from datasette.yaml configuration. + + Configuration structure: + + permissions: # Root-level permissions block + view-instance: + id: admin + + databases: + mydb: + permissions: # Database-level permissions + view-database: + id: admin + allow: # Database-level allow block (for view-*) + id: viewer + allow_sql: # execute-sql allow block + id: analyst + tables: + users: + permissions: # Table-level permissions + view-table: + id: admin + allow: # Table-level allow block + id: viewer + queries: + my_query: + permissions: # Query-level permissions + view-query: + id: admin + allow: # Query-level allow block + id: viewer + """ + + def __init__( + self, + datasette: "Datasette", + actor: Optional[dict], + action: str, + ): + self.datasette = datasette + self.actor = actor + self.action = action + self.config = datasette.config or {} + self.collector = PermissionRowCollector(prefix="cfg") + + # Pre-compute action variants + self.action_checks = get_action_name_variants(datasette, action) + self.action_obj = datasette.actions.get(action) + + # Parse restrictions if present + self.has_restrictions = actor and "_r" in actor if actor else False + self.restrictions = actor.get("_r", {}) if actor else {} + + # Pre-compute restriction info for efficiency + self.restricted_databases: Set[str] = set() + self.restricted_tables: Set[Tuple[str, str]] = set() + + if self.has_restrictions: + self.restricted_databases = { + db_name + for db_name, db_actions in (self.restrictions.get("d") or {}).items() + if self.action_checks.intersection(db_actions) + } + self.restricted_tables = { + (db_name, table_name) + for db_name, tables in (self.restrictions.get("r") or {}).items() + for table_name, table_actions in tables.items() + if self.action_checks.intersection(table_actions) + } + # Tables implicitly reference their parent databases + self.restricted_databases.update(db for db, _ in self.restricted_tables) + + def evaluate_allow_block(self, allow_block: Any) -> Optional[bool]: + """Evaluate an allow block against the current actor.""" + if allow_block is None: + return None + return actor_matches_allow(self.actor, allow_block) + + def is_in_restriction_allowlist( + self, + parent: Optional[str], + child: Optional[str], + ) -> bool: + """Check if resource is allowed by actor restrictions.""" + if not self.has_restrictions: + return True # No restrictions, all resources allowed + + # Check global allowlist + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + + # Check database-level allowlist + if parent and self.action_checks.intersection( + self.restrictions.get("d", {}).get(parent, []) + ): + return True + + # Check table-level allowlist + if parent: + table_restrictions = (self.restrictions.get("r", {}) or {}).get(parent, {}) + if child: + table_actions = table_restrictions.get(child, []) + if self.action_checks.intersection(table_actions): + return True + else: + # Parent query should proceed if any child in this database is allowlisted + for table_actions in table_restrictions.values(): + if self.action_checks.intersection(table_actions): + return True + + # Parent/child both None: include if any restrictions exist for this action + if parent is None and child is None: + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + if self.restricted_databases: + return True + if self.restricted_tables: + return True + + return False + + def add_permissions_rule( + self, + parent: Optional[str], + child: Optional[str], + permissions_block: Optional[dict], + scope_desc: str, + ) -> None: + """Add a rule from a permissions:{action} block.""" + if permissions_block is None: + return + + action_allow_block = permissions_block.get(self.action) + result = self.evaluate_allow_block(action_allow_block) + + self.collector.add( + parent=parent, + child=child, + allow=result, + reason=f"config {'allow' if result else 'deny'} {scope_desc}", + if_not_none=True, + ) + + def add_allow_block_rule( + self, + parent: Optional[str], + child: Optional[str], + allow_block: Any, + scope_desc: str, + ) -> None: + """ + Add rules from an allow:{} block. + + For allow blocks, if the block exists but doesn't match the actor, + this is treated as a deny. We also handle the restriction-gate logic. + """ + if allow_block is None: + return + + # Skip if resource is not in restriction allowlist + if not self.is_in_restriction_allowlist(parent, child): + return + + result = self.evaluate_allow_block(allow_block) + bool_result = bool(result) + + self.collector.add( + parent, + child, + bool_result, + f"config {'allow' if result else 'deny'} {scope_desc}", + ) + + # Handle restriction-gate: add explicit denies for restricted resources + self._add_restriction_gate_denies(parent, child, bool_result, scope_desc) + + def _add_restriction_gate_denies( + self, + parent: Optional[str], + child: Optional[str], + is_allowed: bool, + scope_desc: str, + ) -> None: + """ + When a config rule denies at a higher level, add explicit denies + for restricted resources to prevent child-level allows from + incorrectly granting access. + """ + if is_allowed or child is not None or not self.has_restrictions: + return + + if not self.action_obj: + return + + reason = f"config deny {scope_desc} (restriction gate)" + + if parent is None: + # Root-level deny: add denies for all restricted resources + if self.action_obj.takes_parent: + for db_name in self.restricted_databases: + self.collector.add(db_name, None, False, reason) + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + self.collector.add(db_name, table_name, False, reason) + else: + # Database-level deny: add denies for tables in that database + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + if db_name == parent: + self.collector.add(db_name, table_name, False, reason) + + def process(self) -> Optional[PermissionSQL]: + """Process all config rules and return combined PermissionSQL.""" + self._process_root_permissions() + self._process_databases() + self._process_root_allow_blocks() + + return self.collector.to_permission_sql() + + def _process_root_permissions(self) -> None: + """Process root-level permissions block.""" + root_perms = self.config.get("permissions") or {} + self.add_permissions_rule( + None, + None, + root_perms, + f"permissions for {self.action}", + ) + + def _process_databases(self) -> None: + """Process database-level and nested configurations.""" + databases = self.config.get("databases") or {} + + for db_name, db_config in databases.items(): + self._process_database(db_name, db_config or {}) + + def _process_database(self, db_name: str, db_config: dict) -> None: + """Process a single database's configuration.""" + # Database-level permissions block + db_perms = db_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + None, + db_perms, + f"permissions for {self.action} on {db_name}", + ) + + # Process tables + for table_name, table_config in (db_config.get("tables") or {}).items(): + self._process_table(db_name, table_name, table_config or {}) + + # Process queries + for query_name, query_config in (db_config.get("queries") or {}).items(): + self._process_query(db_name, query_name, query_config) + + # Database-level allow blocks + self._process_database_allow_blocks(db_name, db_config) + + def _process_table( + self, + db_name: str, + table_name: str, + table_config: dict, + ) -> None: + """Process a single table's configuration.""" + # Table-level permissions block + table_perms = table_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + table_name, + table_perms, + f"permissions for {self.action} on {db_name}/{table_name}", + ) + + # Table-level allow block (for view-table) + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + table_name, + table_config.get("allow"), + f"allow for {self.action} on {db_name}/{table_name}", + ) + + def _process_query( + self, + db_name: str, + query_name: str, + query_config: Any, + ) -> None: + """Process a single query's configuration.""" + # Query config can be a string (just SQL) or dict + if not isinstance(query_config, dict): + return + + # Query-level permissions block + query_perms = query_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + query_name, + query_perms, + f"permissions for {self.action} on {db_name}/{query_name}", + ) + + # Query-level allow block (for view-query) + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + query_name, + query_config.get("allow"), + f"allow for {self.action} on {db_name}/{query_name}", + ) + + def _process_database_allow_blocks( + self, + db_name: str, + db_config: dict, + ) -> None: + """Process database-level allow/allow_sql blocks.""" + # view-database allow block + if self.action == "view-database": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # execute-sql allow_sql block + if self.action == "execute-sql": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow_sql"), + f"allow_sql for {db_name}", + ) + + # view-table uses database-level allow for inheritance + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # view-query uses database-level allow for inheritance + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + def _process_root_allow_blocks(self) -> None: + """Process root-level allow/allow_sql blocks.""" + root_allow = self.config.get("allow") + + if self.action == "view-instance": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-instance", + ) + + if self.action == "view-database": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-database", + ) + + if self.action == "view-table": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-table", + ) + + if self.action == "view-query": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-query", + ) + + if self.action == "execute-sql": + self.add_allow_block_rule( + None, + None, + self.config.get("allow_sql"), + "allow_sql", + ) + + +@hookimpl(specname="permission_resources_sql") +async def config_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Apply permission rules from datasette.yaml configuration. + + This processes: + - permissions: blocks at root, database, table, and query levels + - allow: blocks for view-* actions + - allow_sql: blocks for execute-sql action + """ + processor = ConfigPermissionProcessor(datasette, actor, action) + result = processor.process() + + if result is None: + return [] + + return [result] diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py new file mode 100644 index 00000000..f5a6a270 --- /dev/null +++ b/datasette/default_permissions/defaults.py @@ -0,0 +1,70 @@ +""" +Default permission settings for Datasette. + +Provides default allow rules for standard view/execute actions. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +# Actions that are allowed by default (unless --default-deny is used) +DEFAULT_ALLOW_ACTIONS = frozenset( + { + "view-instance", + "view-database", + "view-database-download", + "view-table", + "view-query", + "execute-sql", + } +) + + +@hookimpl(specname="permission_resources_sql") +async def default_allow_sql_check( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Enforce the default_allow_sql setting. + + When default_allow_sql is false (the default), execute-sql is denied + unless explicitly allowed by config or other rules. + """ + if action == "execute-sql": + if not datasette.setting("default_allow_sql"): + return PermissionSQL.deny(reason="default_allow_sql is false") + + return None + + +@hookimpl(specname="permission_resources_sql") +async def default_action_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Provide default allow rules for standard view/execute actions. + + These defaults are skipped when datasette is started with --default-deny. + The restriction_sql mechanism (from actor_restrictions_sql) will still + filter these results if the actor has restrictions. + """ + if datasette.default_deny: + return None + + if action in DEFAULT_ALLOW_ACTIONS: + reason = f"default allow for {action}".replace("'", "''") + return PermissionSQL.allow(reason=reason) + + return None diff --git a/datasette/default_permissions/helpers.py b/datasette/default_permissions/helpers.py new file mode 100644 index 00000000..47e03569 --- /dev/null +++ b/datasette/default_permissions/helpers.py @@ -0,0 +1,85 @@ +""" +Shared helper utilities for default permission implementations. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette.permissions import PermissionSQL + + +def get_action_name_variants(datasette: "Datasette", action: str) -> Set[str]: + """ + Get all name variants for an action (full name and abbreviation). + + Example: + get_action_name_variants(ds, "view-table") -> {"view-table", "vt"} + """ + variants = {action} + action_obj = datasette.actions.get(action) + if action_obj and action_obj.abbr: + variants.add(action_obj.abbr) + return variants + + +def action_in_list(datasette: "Datasette", action: str, action_list: list) -> bool: + """Check if an action (or its abbreviation) is in a list.""" + return bool(get_action_name_variants(datasette, action).intersection(action_list)) + + +@dataclass +class PermissionRow: + """A single permission rule row.""" + + parent: Optional[str] + child: Optional[str] + allow: bool + reason: str + + +class PermissionRowCollector: + """Collects permission rows and converts them to PermissionSQL.""" + + def __init__(self, prefix: str = "row"): + self.rows: List[PermissionRow] = [] + self.prefix = prefix + + def add( + self, + parent: Optional[str], + child: Optional[str], + allow: Optional[bool], + reason: str, + if_not_none: bool = False, + ) -> None: + """Add a permission row. If if_not_none=True, only add if allow is not None.""" + if if_not_none and allow is None: + return + self.rows.append(PermissionRow(parent, child, allow, reason)) + + def to_permission_sql(self) -> Optional[PermissionSQL]: + """Convert collected rows to a PermissionSQL object.""" + if not self.rows: + return None + + parts = [] + params = {} + + for idx, row in enumerate(self.rows): + key = f"{self.prefix}_{idx}" + parts.append( + f"SELECT :{key}_parent AS parent, :{key}_child AS child, " + f":{key}_allow AS allow, :{key}_reason AS reason" + ) + params[f"{key}_parent"] = row.parent + params[f"{key}_child"] = row.child + params[f"{key}_allow"] = 1 if row.allow else 0 + params[f"{key}_reason"] = row.reason + + sql = "\nUNION ALL\n".join(parts) + return PermissionSQL(sql=sql, params=params) diff --git a/datasette/default_permissions/restrictions.py b/datasette/default_permissions/restrictions.py new file mode 100644 index 00000000..a22cd7e5 --- /dev/null +++ b/datasette/default_permissions/restrictions.py @@ -0,0 +1,195 @@ +""" +Actor restriction handling for Datasette permissions. + +This module handles the _r (restrictions) key in actor dictionaries, which +contains allowlists of resources the actor can access. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + +from .helpers import action_in_list, get_action_name_variants + + +@dataclass +class ActorRestrictions: + """Parsed actor restrictions from the _r key.""" + + global_actions: List[str] # _r.a - globally allowed actions + database_actions: dict # _r.d - {db_name: [actions]} + table_actions: dict # _r.r - {db_name: {table: [actions]}} + + @classmethod + def from_actor(cls, actor: Optional[dict]) -> Optional["ActorRestrictions"]: + """Parse restrictions from actor dict. Returns None if no restrictions.""" + if not actor: + return None + assert isinstance(actor, dict), "actor must be a dictionary" + + restrictions = actor.get("_r") + if restrictions is None: + return None + + return cls( + global_actions=restrictions.get("a", []), + database_actions=restrictions.get("d", {}), + table_actions=restrictions.get("r", {}), + ) + + def is_action_globally_allowed(self, datasette: "Datasette", action: str) -> bool: + """Check if action is in the global allowlist.""" + return action_in_list(datasette, action, self.global_actions) + + def get_allowed_databases(self, datasette: "Datasette", action: str) -> Set[str]: + """Get database names where this action is allowed.""" + allowed = set() + for db_name, db_actions in self.database_actions.items(): + if action_in_list(datasette, action, db_actions): + allowed.add(db_name) + return allowed + + def get_allowed_tables( + self, datasette: "Datasette", action: str + ) -> Set[Tuple[str, str]]: + """Get (database, table) pairs where this action is allowed.""" + allowed = set() + for db_name, tables in self.table_actions.items(): + for table_name, table_actions in tables.items(): + if action_in_list(datasette, action, table_actions): + allowed.add((db_name, table_name)) + return allowed + + +@hookimpl(specname="permission_resources_sql") +async def actor_restrictions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Handle actor restriction-based permission rules. + + When an actor has an "_r" key, it contains an allowlist of resources they + can access. This function returns restriction_sql that filters the final + results to only include resources in that allowlist. + + The _r structure: + { + "a": ["vi", "pd"], # Global actions allowed + "d": {"mydb": ["vt", "es"]}, # Database-level actions + "r": {"mydb": {"users": ["vt"]}} # Table-level actions + } + """ + if not actor: + return None + + restrictions = ActorRestrictions.from_actor(actor) + + if restrictions is None: + # No restrictions - all resources allowed + return [] + + # If globally allowed, no filtering needed + if restrictions.is_action_globally_allowed(datasette, action): + return [] + + # Build restriction SQL + allowed_dbs = restrictions.get_allowed_databases(datasette, action) + allowed_tables = restrictions.get_allowed_tables(datasette, action) + + # If nothing is allowed for this action, return empty-set restriction + if not allowed_dbs and not allowed_tables: + return [ + PermissionSQL( + params={"deny": f"actor restrictions: {action} not in allowlist"}, + restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", + ) + ] + + # Build UNION of allowed resources + selects = [] + params = {} + counter = 0 + + # Database-level entries (parent, NULL) - allows all children + for db_name in allowed_dbs: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, NULL AS child") + params[f"{key}_parent"] = db_name + + # Table-level entries (parent, child) + for db_name, table_name in allowed_tables: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, :{key}_child AS child") + params[f"{key}_parent"] = db_name + params[f"{key}_child"] = table_name + + restriction_sql = "\nUNION ALL\n".join(selects) + + return [PermissionSQL(params=params, restriction_sql=restriction_sql)] + + +def restrictions_allow_action( + datasette: "Datasette", + restrictions: dict, + action: str, + resource: Optional[str | Tuple[str, str]], +) -> bool: + """ + Check if restrictions allow the requested action on the requested resource. + + This is a synchronous utility function for use by other code that needs + to quickly check restriction allowlists. + + Args: + datasette: The Datasette instance + restrictions: The _r dict from an actor + action: The action name to check + resource: None for global, str for database, (db, table) tuple for table + + Returns: + True if allowed, False if denied + """ + # Does this action have an abbreviation? + to_check = get_action_name_variants(datasette, action) + + # Check global level (any resource) + all_allowed = restrictions.get("a") + if all_allowed is not None: + assert isinstance(all_allowed, list) + if to_check.intersection(all_allowed): + return True + + # Check database level + if resource: + if isinstance(resource, str): + database_name = resource + else: + database_name = resource[0] + database_allowed = restrictions.get("d", {}).get(database_name) + if database_allowed is not None: + assert isinstance(database_allowed, list) + if to_check.intersection(database_allowed): + return True + + # Check table/resource level + if resource is not None and not isinstance(resource, str) and len(resource) == 2: + database, table = resource + table_allowed = restrictions.get("r", {}).get(database, {}).get(table) + if table_allowed is not None: + assert isinstance(table_allowed, list) + if to_check.intersection(table_allowed): + return True + + # This action is not explicitly allowed, so reject it + return False diff --git a/datasette/default_permissions/root.py b/datasette/default_permissions/root.py new file mode 100644 index 00000000..4931f7ff --- /dev/null +++ b/datasette/default_permissions/root.py @@ -0,0 +1,29 @@ +""" +Root user permission handling for Datasette. + +Grants full permissions to the root user when --root flag is used. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +@hookimpl(specname="permission_resources_sql") +async def root_user_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], +) -> Optional[PermissionSQL]: + """ + Grant root user full permissions when --root flag is used. + """ + if not datasette.root_enabled: + return None + if actor is not None and actor.get("id") == "root": + return PermissionSQL.allow(reason="root user") diff --git a/datasette/default_permissions/tokens.py b/datasette/default_permissions/tokens.py new file mode 100644 index 00000000..474b0c23 --- /dev/null +++ b/datasette/default_permissions/tokens.py @@ -0,0 +1,95 @@ +""" +Token authentication for Datasette. + +Handles signed API tokens (dstok_ prefix). +""" + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +import itsdangerous + +from datasette import hookimpl + + +@hookimpl(specname="actor_from_request") +def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dict]: + """ + Authenticate requests using signed API tokens (dstok_ prefix). + + Token structure (signed JSON): + { + "a": "actor_id", # Actor ID + "t": 1234567890, # Timestamp (Unix epoch) + "d": 3600, # Optional: Duration in seconds + "_r": {...} # Optional: Restrictions + } + """ + prefix = "dstok_" + + # Check if tokens are enabled + if not datasette.setting("allow_signed_tokens"): + return None + + max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") + + # Get authorization header + authorization = request.headers.get("authorization") + if not authorization: + return None + if not authorization.startswith("Bearer "): + return None + + token = authorization[len("Bearer ") :] + if not token.startswith(prefix): + return None + + # Remove prefix and verify signature + token = token[len(prefix) :] + try: + decoded = datasette.unsign(token, namespace="token") + except itsdangerous.BadSignature: + return None + + # Validate timestamp + if "t" not in decoded: + return None + created = decoded["t"] + if not isinstance(created, int): + return None + + # Handle duration/expiry + duration = decoded.get("d") + if duration is not None and not isinstance(duration, int): + return None + + # Apply max TTL if configured + if (duration is None and max_signed_tokens_ttl) or ( + duration is not None + and max_signed_tokens_ttl + and duration > max_signed_tokens_ttl + ): + duration = max_signed_tokens_ttl + + # Check expiry + if duration: + if time.time() - created > duration: + return None + + # Build actor dict + actor = {"id": decoded["a"], "token": "dstok"} + + # Copy restrictions if present + if "_r" in decoded: + actor["_r"] = decoded["_r"] + + # Add expiry timestamp if applicable + if duration: + actor["token_expires"] = created + duration + + return actor diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 6def3840..e2dd92b8 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -1323,6 +1323,20 @@ async def test_actor_restrictions( ("dbname2", "tablename"), False, ), + # Table-level restriction allows access to that specific table + ( + {"r": {"dbname": {"tablename": ["view-table"]}}}, + "view-table", + ("dbname", "tablename"), + True, + ), + # But not to a different table in the same database + ( + {"r": {"dbname": {"tablename": ["view-table"]}}}, + "view-table", + ("dbname", "other_table"), + False, + ), ), ) async def test_restrictions_allow_action(restrictions, action, resource, expected): @@ -1653,3 +1667,48 @@ async def test_permission_check_view_requires_debug_permission(): data = response.json() assert data["action"] == "view-instance" assert data["allowed"] is True + + +@pytest.mark.asyncio +async def test_root_allow_block_with_table_restricted_actor(): + """ + Test that root-level allow: blocks are processed for actors with + table-level restrictions. + + This covers the case in config.py is_in_restriction_allowlist() where + parent=None, child=None and actor has table restrictions but not global. + """ + from datasette.resources import TableResource + + # Config with root-level allow block that denies non-admin users + ds = Datasette( + config={ + "allow": {"id": "admin"}, # Root-level allow block + } + ) + await ds.invoke_startup() + db = ds.add_memory_database("mydb") + await db.execute_write("create table t1 (id integer primary key)") + await ds.client.get("/") # Trigger catalog refresh + + # Actor with table-level restrictions only (not global) + actor = {"id": "user", "_r": {"r": {"mydb": {"t1": ["view-table"]}}}} + + # The root-level allow: {id: admin} should be processed and deny this user + # because they're not "admin", even though they have table restrictions + result = await ds.allowed( + action="view-table", + resource=TableResource("mydb", "t1"), + actor=actor, + ) + # Should be False because root allow: {id: admin} denies non-admin users + assert result is False + + # But admin with same restrictions should be allowed + admin_actor = {"id": "admin", "_r": {"r": {"mydb": {"t1": ["view-table"]}}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("mydb", "t1"), + actor=admin_actor, + ) + assert result is True From 3eca3ad6d45c94da16a09b51a648052bbeeeaf2f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:16:39 -0800 Subject: [PATCH 060/299] Better recipe for 'just docs' --- Justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Justfile b/Justfile index abb134a6..a47662c3 100644 --- a/Justfile +++ b/Justfile @@ -29,7 +29,7 @@ export DATASETTE_SECRET := "not_a_secret" # Serve live docs on localhost:8000 @docs: cog blacken-docs - uv sync --extra docs && cd docs && uv run make livehtml + uv run --extra docs make -C docs livehtml # Build docs as static HTML @docs-build: cog blacken-docs From 03ab3592083c6677bde58f1bd20002963c980344 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:19:48 -0800 Subject: [PATCH 061/299] tool.uv.package = true --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 4f487458..8ec1c6b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,3 +93,6 @@ datasette = ["templates/*.html"] [tool.setuptools.dynamic] version = {attr = "datasette.version.__version__"} + +[tool.uv] +package = true From 2ca00b6c75b165c3318d06e6dc6eb228b9b60338 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:20:43 -0800 Subject: [PATCH 062/299] Release 1.0a23 Refs #2605, #2599 --- datasette/version.py | 2 +- docs/changelog.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index d0ff6ab1..fff37a72 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a22" +__version__ = "1.0a23" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index feba9390..feba7e86 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Changelog ========= +.. _v1_0_a23: + +1.0a23 (2025-12-02) +------------------- + +- Fix for bug where a stale database entry in ``internal.db`` could cause a 500 error on the homepage. (:issue:`2605`) +- Cosmetic improvement to ``/-/actions`` page. (:issue:`2599`) + .. _v1_0_a22: 1.0a22 (2025-11-13) From 1d4448fc5603f479f11b37b9da0ee11c2b1a19e4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 4 Dec 2025 21:36:39 -0800 Subject: [PATCH 063/299] Use subtests in tests/test_docs.py (#2609) Closes #2608 --- pyproject.toml | 2 +- tests/test_docs.py | 53 +++++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8ec1c6b7..f3053447 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ docs = [ "ruamel.yaml", ] test = [ - "pytest>=5.2.2", + "pytest>=9", "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", "beautifulsoup4>=4.8.1", diff --git a/tests/test_docs.py b/tests/test_docs.py index 953224dd..b94a6f23 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -28,9 +28,10 @@ def settings_headings(): return get_headings((docs_path / "settings.rst").read_text(), "~") -@pytest.mark.parametrize("setting", app.SETTINGS) -def test_settings_are_documented(settings_headings, setting): - assert setting.name in settings_headings +def test_settings_are_documented(settings_headings, subtests): + for setting in app.SETTINGS: + with subtests.test(setting=setting.name): + assert setting.name in settings_headings @pytest.fixture(scope="session") @@ -38,21 +39,21 @@ def plugin_hooks_content(): return (docs_path / "plugin_hooks.rst").read_text() -@pytest.mark.parametrize( - "plugin", [name for name in dir(app.pm.hook) if not name.startswith("_")] -) -def test_plugin_hooks_are_documented(plugin, plugin_hooks_content): +def test_plugin_hooks_are_documented(plugin_hooks_content, subtests): headings = set() headings.update(get_headings(plugin_hooks_content, "-")) headings.update(get_headings(plugin_hooks_content, "~")) - assert plugin in headings - hook_caller = getattr(app.pm.hook, plugin) - arg_names = [a for a in hook_caller.spec.argnames if a != "__multicall__"] - # Check for plugin_name(arg1, arg2, arg3) - expected = f"{plugin}({', '.join(arg_names)})" - assert ( - expected in plugin_hooks_content - ), f"Missing from plugin hook documentation: {expected}" + plugins = [name for name in dir(app.pm.hook) if not name.startswith("_")] + for plugin in plugins: + with subtests.test(plugin=plugin): + assert plugin in headings + hook_caller = getattr(app.pm.hook, plugin) + arg_names = [a for a in hook_caller.spec.argnames if a != "__multicall__"] + # Check for plugin_name(arg1, arg2, arg3) + expected = f"{plugin}({', '.join(arg_names)})" + assert ( + expected in plugin_hooks_content + ), f"Missing from plugin hook documentation: {expected}" @pytest.fixture(scope="session") @@ -68,9 +69,11 @@ def documented_views(): return view_labels -@pytest.mark.parametrize("view_class", [v for v in dir(app) if v.endswith("View")]) -def test_view_classes_are_documented(documented_views, view_class): - assert view_class in documented_views +def test_view_classes_are_documented(documented_views, subtests): + view_classes = [v for v in dir(app) if v.endswith("View")] + for view_class in view_classes: + with subtests.test(view_class=view_class): + assert view_class in documented_views @pytest.fixture(scope="session") @@ -85,9 +88,10 @@ def documented_table_filters(): } -@pytest.mark.parametrize("filter", [f.key for f in Filters._filters]) -def test_table_filters_are_documented(documented_table_filters, filter): - assert filter in documented_table_filters +def test_table_filters_are_documented(documented_table_filters, subtests): + for f in Filters._filters: + with subtests.test(filter=f.key): + assert f.key in documented_table_filters @pytest.fixture(scope="session") @@ -101,9 +105,10 @@ def documented_fns(): } -@pytest.mark.parametrize("fn", utils.functions_marked_as_documented) -def test_functions_marked_with_documented_are_documented(documented_fns, fn): - assert fn.__name__ in documented_fns +def test_functions_marked_with_documented_are_documented(documented_fns, subtests): + for fn in utils.functions_marked_as_documented: + with subtests.test(fn=fn.__name__): + assert fn.__name__ in documented_fns def test_rst_heading_underlines_match_title_length(): From 4cbdfcc07d36c36ac77243d586836b91f90be67c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 11 Dec 2025 17:32:58 -0800 Subject: [PATCH 064/299] dependency-groups and uv (#2611) * dependency-groups and uv, closes #2610 * New .readthedocs config for --group dev --- .github/workflows/deploy-latest.yml | 3 +- .github/workflows/publish.yml | 4 +- .github/workflows/spellcheck.yml | 2 +- .github/workflows/test-coverage.yml | 2 +- .github/workflows/test-sqlite-support.yml | 2 +- .github/workflows/test.yml | 5 +- .readthedocs.yaml | 25 ++++----- Justfile | 8 +-- docs/contributing.rst | 65 ++++++++++------------- pyproject.toml | 28 +++++----- 10 files changed, 67 insertions(+), 77 deletions(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 9f53b01e..7349a1ab 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -24,8 +24,7 @@ jobs: - name: Install Python dependencies run: | python -m pip install --upgrade pip - python -m pip install -e .[test] - python -m pip install -e .[docs] + python -m pip install . --group dev python -m pip install sphinx-to-sqlite==0.1a1 - name: Run tests if: ${{ github.ref == 'refs/heads/main' }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e94d0bdd..2e8cea9c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -23,7 +23,7 @@ jobs: cache-dependency-path: pyproject.toml - name: Install dependencies run: | - pip install -e '.[test]' + pip install . --group dev - name: Run tests run: | pytest @@ -65,7 +65,7 @@ jobs: cache-dependency-path: pyproject.toml - name: Install dependencies run: | - python -m pip install -e .[docs] + python -m pip install . --group dev python -m pip install sphinx-to-sqlite==0.1a1 - name: Build docs.db run: |- diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml index 7c5370ce..d42ae96b 100644 --- a/.github/workflows/spellcheck.yml +++ b/.github/workflows/spellcheck.yml @@ -18,7 +18,7 @@ jobs: cache-dependency-path: '**/pyproject.toml' - name: Install dependencies run: | - pip install -e '.[docs]' + pip install . --group dev - name: Check spelling run: | codespell README.md --ignore-words docs/codespell-ignore-words.txt diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index 8d73b64d..1b3d2f2c 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -25,7 +25,7 @@ jobs: - name: Install Python dependencies run: | python -m pip install --upgrade pip - python -m pip install -e .[test] + python -m pip install . --group dev python -m pip install pytest-cov - name: Run tests run: |- diff --git a/.github/workflows/test-sqlite-support.yml b/.github/workflows/test-sqlite-support.yml index 76ea138a..c81a3c0b 100644 --- a/.github/workflows/test-sqlite-support.yml +++ b/.github/workflows/test-sqlite-support.yml @@ -45,7 +45,7 @@ jobs: (cd tests && gcc ext.c -fPIC -shared -o ext.so) - name: Install dependencies run: | - pip install -e '.[test]' + pip install . --group dev pip freeze - name: Run tests run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1e5e03d2..3790c788 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,7 +25,7 @@ jobs: (cd tests && gcc ext.c -fPIC -shared -o ext.so) - name: Install dependencies run: | - pip install -e '.[test]' + pip install . --group dev pip freeze - name: Run tests run: | @@ -33,9 +33,6 @@ jobs: pytest -m "serial" # And the test that exceeds a localhost HTTPS server tests/test_datasette_https_server.sh - - name: Install docs dependencies - run: | - pip install -e '.[docs]' - name: Black run: black --check . - name: Check if cog needs to be run diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 5b30e75a..8b3e54aa 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,16 +1,17 @@ version: 2 -build: - os: ubuntu-20.04 - tools: - python: "3.11" - sphinx: - configuration: docs/conf.py + configuration: docs/conf.py -python: - install: - - method: pip - path: . - extra_requirements: - - docs +build: + os: ubuntu-24.04 + tools: + python: "3.13" + jobs: + install: + - pip install --upgrade pip + - pip install . --group dev + +formats: +- pdf +- epub diff --git a/Justfile b/Justfile index a47662c3..8c50e5ca 100644 --- a/Justfile +++ b/Justfile @@ -5,7 +5,7 @@ export DATASETTE_SECRET := "not_a_secret" # Setup project @init: - uv sync --extra test --extra docs + uv sync # Run pytest with supplied options @test *options: init @@ -21,15 +21,15 @@ export DATASETTE_SECRET := "not_a_secret" @lint: codespell uv run black . --check uv run flake8 - uv run --extra test cog --check README.md docs/*.rst + uv run cog --check README.md docs/*.rst # Rebuild docs with cog @cog: - uv run --extra test cog -r README.md docs/*.rst + uv run cog -r README.md docs/*.rst # Serve live docs on localhost:8000 @docs: cog blacken-docs - uv run --extra docs make -C docs livehtml + uv run make -C docs livehtml # Build docs as static HTML @docs-build: cog blacken-docs diff --git a/docs/contributing.rst b/docs/contributing.rst index 6be0247c..3d41a125 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -32,17 +32,18 @@ If you want to get started without creating your own fork, you can do this inste git clone git@github.com:simonw/datasette -The next step is to create a virtual environment for your project and use it to install Datasette's dependencies:: +The quickest way to set up a development environment is to use `uv `__. From the repository root you can run the tests directly:: cd datasette - # Create a virtual environment in ./venv - python3 -m venv ./venv - # Now activate the virtual environment, so pip can install into it - source venv/bin/activate - # Install Datasette and its testing dependencies - python3 -m pip install -e '.[test]' + uv run pytest -That last line does most of the work: ``pip install -e`` means "install this package in a way that allows me to edit the source code in place". The ``.[test]`` option means "install the optional testing dependencies as well". +This will create a local ``.venv/`` and install Datasette plus its development dependencies. + +If you prefer to manage your own virtual environment with pip, create and activate one and then install the development dependency group:: + + python3 -m venv ./venv + source venv/bin/activate + python3 -m pip install -e . --group dev .. _contributing_running_tests: @@ -51,15 +52,15 @@ Running the tests Once you have done this, you can run the Datasette unit tests from inside your ``datasette/`` directory using `pytest `__ like so:: - pytest + uv run pytest You can run the tests faster using multiple CPU cores with `pytest-xdist `__ like this:: - pytest -n auto -m "not serial" + uv run pytest -n auto -m "not serial" ``-n auto`` detects the number of available cores automatically. The ``-m "not serial"`` skips tests that don't work well in a parallel test environment. You can run those tests separately like so:: - pytest -m "serial" + uv run pytest -m "serial" .. _contributing_using_fixtures: @@ -72,11 +73,11 @@ You're going to need at least one SQLite database. A quick way to get started is You can create a copy of that database by running this command:: - python tests/fixtures.py fixtures.db + uv run python tests/fixtures.py fixtures.db Now you can run Datasette against the new fixtures database like so:: - datasette fixtures.db + uv run datasette fixtures.db This will start a server at ``http://127.0.0.1:8001/``. @@ -84,15 +85,14 @@ Any changes you make in the ``datasette/templates`` or ``datasette/static`` fold If you want to change Datasette's Python code you can use the ``--reload`` option to cause Datasette to automatically reload any time the underlying code changes:: - datasette --reload fixtures.db + uv run datasette --reload fixtures.db You can also use the ``fixtures.py`` script to recreate the testing version of ``metadata.json`` used by the unit tests. To do that:: - python tests/fixtures.py fixtures.db fixtures-metadata.json - + uv run python tests/fixtures.py fixtures.db fixtures-metadata.json Or to output the plugins used by the tests, run this:: - python tests/fixtures.py fixtures.db fixtures-metadata.json fixtures-plugins + uv run python tests/fixtures.py fixtures.db fixtures-metadata.json fixtures-plugins Test tables written to fixtures.db - metadata written to fixtures-metadata.json Wrote plugin: fixtures-plugins/register_output_renderer.py @@ -103,7 +103,7 @@ Or to output the plugins used by the tests, run this:: Then run Datasette like this:: - datasette fixtures.db -m fixtures-metadata.json --plugins-dir=fixtures-plugins/ + uv run datasette fixtures.db -m fixtures-metadata.json --plugins-dir=fixtures-plugins/ .. _contributing_debugging: @@ -114,11 +114,11 @@ Any errors that occur while Datasette is running while display a stack trace on You can tell Datasette to open an interactive ``pdb`` (or ``ipdb``, if present) debugger session if an error occurs using the ``--pdb`` option:: - datasette --pdb fixtures.db + uv run datasette --pdb fixtures.db For `ipdb `__, first run this:: - datasette install ipdb + uv run datasette install ipdb .. _contributing_formatting: @@ -145,9 +145,9 @@ Or run both at the same time:: Running Black ~~~~~~~~~~~~~ -Black will be installed when you run ``pip install -e '.[test]'``. To test that your code complies with Black, run the following in your root ``datasette`` repository checkout:: +Black is installed as part of the development dependency group. To test that your code complies with Black, run the following in your root ``datasette`` repository checkout:: - black . --check + uv run black . --check :: @@ -156,7 +156,7 @@ Black will be installed when you run ``pip install -e '.[test]'``. To test that If any of your code does not conform to Black you can run this to automatically fix those problems:: - black . + uv run black . :: @@ -171,7 +171,7 @@ blacken-docs The `blacken-docs `__ command applies Black formatting rules to code examples in the documentation. Run it like this:: - blacken-docs -l 60 docs/*.rst + uv run blacken-docs -l 60 docs/*.rst .. _contributing_formatting_prettier: @@ -208,17 +208,10 @@ Datasette's documentation lives in the ``docs/`` directory and is deployed autom The documentation is written using reStructuredText. You may find this article on `The subset of reStructuredText worth committing to memory `__ useful. -You can build it locally by installing ``sphinx`` and ``sphinx_rtd_theme`` in your Datasette development environment and then running ``make html`` directly in the ``docs/`` directory:: +You can build it locally once you have installed the development dependency group (which includes Sphinx and related tools) and then running ``make html`` directly in the ``docs/`` directory:: - # You may first need to activate your virtual environment: - source venv/bin/activate - - # Install the dependencies needed to build the docs - pip install -e .[docs] - - # Now build the docs cd docs/ - make html + uv run make html This will create the HTML version of the documentation in ``docs/_build/html``. You can open it in your browser like so:: @@ -228,9 +221,9 @@ Any time you make changes to a ``.rst`` file you can re-run ``make html`` to upd For added productivity, you can use use `sphinx-autobuild `__ to run Sphinx in auto-build mode. This will run a local webserver serving the docs that automatically rebuilds them and refreshes the page any time you hit save in your editor. -``sphinx-autobuild`` will have been installed when you ran ``pip install -e .[docs]``. In your ``docs/`` directory you can start the server by running the following:: +``sphinx-autobuild`` is included in the development dependency group. In your ``docs/`` directory you can start the server by running the following:: - make livehtml + uv run make livehtml Now browse to ``http://localhost:8000/`` to view the documentation. Any edits you make should be instantly reflected in your browser. @@ -243,7 +236,7 @@ Some pages of documentation (in particular the :ref:`cli_reference`) are automat To update these pages, run the following command:: - cog -r docs/*.rst + uv run cog -r docs/*.rst .. _contributing_continuous_deployment: diff --git a/pyproject.toml b/pyproject.toml index f3053447..87884341 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,20 +55,8 @@ CI = "https://github.com/simonw/datasette/actions?query=workflow%3ATest" [project.scripts] datasette = "datasette.cli:cli" -[project.optional-dependencies] -docs = [ - "Sphinx==7.4.7", - "furo==2025.9.25", - "sphinx-autobuild", - "codespell>=2.2.5", - "blacken-docs", - "sphinx-copybutton", - "sphinx-inline-tabs", - "myst-parser", - "sphinx-markdown-builder", - "ruamel.yaml", -] -test = [ +[dependency-groups] +dev = [ "pytest>=9", "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", @@ -78,7 +66,19 @@ test = [ "pytest-timeout>=1.4.2", "trustme>=0.7", "cogapp>=3.3.0", + # docs + "Sphinx==7.4.7", + "furo==2025.9.25", + "sphinx-autobuild", + "codespell>=2.2.5", + "sphinx-copybutton", + "sphinx-inline-tabs", + "myst-parser", + "sphinx-markdown-builder", + "ruamel.yaml", ] + +[project.optional-dependencies] rich = ["rich"] [build-system] From 3b4c7e1abed15c8343a46ff9bc0a8171511a3624 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 12 Dec 2025 21:43:00 -0800 Subject: [PATCH 065/299] {"ok": true} on row API, to be consistent with table --- datasette/views/row.py | 1 + tests/test_api.py | 1 + 2 files changed, 2 insertions(+) diff --git a/datasette/views/row.py b/datasette/views/row.py index c9b74b12..4f896632 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -95,6 +95,7 @@ class RowView(DataView): } data = { + "ok": True, "database": database, "table": table, "rows": rows, diff --git a/tests/test_api.py b/tests/test_api.py index 859c5809..16e1d8e6 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -710,6 +710,7 @@ async def test_invalid_custom_sql(ds_client): async def test_row(ds_client): response = await ds_client.get("/fixtures/simple_primary_key/1.json?_shape=objects") assert response.status_code == 200 + assert response.json()["ok"] is True assert response.json()["rows"] == [{"id": 1, "content": "hello"}] From 232a404743ad007285b02838c222845ee4d39cbd Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 12 Dec 2025 22:18:35 -0800 Subject: [PATCH 066/299] Switch searchable_fts test table to FTS5, closes #2613 --- tests/fixtures.py | 5 +-- tests/test_api.py | 68 ++++++++++++-------------------- tests/test_internals_database.py | 6 +-- 3 files changed, 31 insertions(+), 48 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 8d600c9b..01c501f2 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -536,9 +536,8 @@ INSERT INTO searchable_tags (searchable_id, tag) VALUES ; CREATE VIRTUAL TABLE "searchable_fts" - USING FTS4 (text1, text2, [name with . and spaces], content="searchable"); -INSERT INTO "searchable_fts" (rowid, text1, text2, [name with . and spaces]) - SELECT rowid, text1, text2, [name with . and spaces] FROM searchable; + USING FTS5 (text1, text2, [name with . and spaces], content="searchable", content_rowid="pk"); +INSERT INTO "searchable_fts" (searchable_fts) VALUES ('rebuild'); CREATE TABLE [select] ( [group] text, diff --git a/tests/test_api.py b/tests/test_api.py index 16e1d8e6..008fc42b 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -515,22 +515,13 @@ async def test_database_page(ds_client): "private": False, }, { - "columns": Either( - [ - "text1", - "text2", - "name with . and spaces", - "searchable_fts", - "docid", - "__langid", - ], - # Get tests to pass on SQLite 3.25 as well - [ - "text1", - "text2", - "name with . and spaces", - ], - ), + "columns": [ + "text1", + "text2", + "name with . and spaces", + "searchable_fts", + "rank", + ], "count": 2, "foreign_keys": {"incoming": [], "outgoing": []}, "fts_table": "searchable_fts", @@ -540,26 +531,9 @@ async def test_database_page(ds_client): "private": False, }, { - "name": "searchable_fts_docsize", - "columns": ["docid", "size"], - "primary_keys": ["docid"], - "count": 2, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "searchable_fts_segdir", - "columns": [ - "level", - "idx", - "start_block", - "leaves_end_block", - "end_block", - "root", - ], - "primary_keys": ["level", "idx"], + "name": "searchable_fts_config", + "columns": ["k", "v"], + "primary_keys": ["k"], "count": 1, "hidden": True, "fts_table": None, @@ -567,19 +541,29 @@ async def test_database_page(ds_client): "private": False, }, { - "name": "searchable_fts_segments", - "columns": ["blockid", "block"], - "primary_keys": ["blockid"], - "count": 0, + "name": "searchable_fts_data", + "columns": ["id", "block"], + "primary_keys": ["id"], + "count": 3, "hidden": True, "fts_table": None, "foreign_keys": {"incoming": [], "outgoing": []}, "private": False, }, { - "name": "searchable_fts_stat", - "columns": ["id", "value"], + "name": "searchable_fts_docsize", + "columns": ["id", "sz"], "primary_keys": ["id"], + "count": 2, + "hidden": True, + "fts_table": None, + "foreign_keys": {"incoming": [], "outgoing": []}, + "private": False, + }, + { + "name": "searchable_fts_idx", + "columns": ["segid", "term", "pgno"], + "primary_keys": ["segid", "term"], "count": 1, "hidden": True, "fts_table": None, diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 4a078f75..d2e06073 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -436,10 +436,10 @@ async def test_table_names(db): "searchable", "searchable_tags", "searchable_fts", - "searchable_fts_segments", - "searchable_fts_segdir", + "searchable_fts_data", + "searchable_fts_idx", "searchable_fts_docsize", - "searchable_fts_stat", + "searchable_fts_config", "select", "infinity", "facet_cities", From 97496d5a672c78271735dd77abde3248eea8b967 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 21 Dec 2025 19:52:49 -0800 Subject: [PATCH 067/299] ?_extra=render_cells for tables, refs #2619 --- datasette/views/table.py | 31 ++++++++++++++++++++ tests/test_table_api.py | 62 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/datasette/views/table.py b/datasette/views/table.py index 007c0c85..c8f209d6 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1492,6 +1492,36 @@ async def table_view_data( async def extra_display_rows(run_display_columns_and_rows): return run_display_columns_and_rows["rows"] + async def extra_render_cells(): + "Rendered HTML for each cell using the render_cell plugin hook" + columns = [col[0] for col in results.description] + rendered_rows = [] + for row in rows: + rendered_row = {} + for value, column in zip(row, columns): + # Call render_cell plugin hook + plugin_display_value = None + for candidate in pm.hook.render_cell( + row=row, + value=value, + column=column, + table=table_name, + database=database_name, + datasette=datasette, + request=request, + ): + candidate = await await_me_maybe(candidate) + if candidate is not None: + plugin_display_value = candidate + break + if plugin_display_value: + rendered_row[column] = str(plugin_display_value) + else: + # Default: convert value to string + rendered_row[column] = "" if value is None else str(value) + rendered_rows.append(rendered_row) + return rendered_rows + async def extra_query(): "Details of the underlying SQL query" return { @@ -1678,6 +1708,7 @@ async def table_view_data( run_display_columns_and_rows, extra_display_columns, extra_display_rows, + extra_render_cells, extra_debug, extra_request, extra_query, diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 653679e4..d5a8ca41 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1383,3 +1383,65 @@ async def test_table_extras(ds_client, extra, expected_json): ) assert response.status_code == 200 assert response.json() == expected_json + + +@pytest.mark.asyncio +async def test_extra_render_cells(): + """Test that _extra=render_cells returns rendered HTML from render_cell plugin hook""" + from datasette import hookimpl + from datasette.app import Datasette + + class TestRenderCellPlugin: + __name__ = "TestRenderCellPlugin" + + @hookimpl + def render_cell(self, value, column, table, database): + # Only modify cells in our test table + if table == "test_render" and column == "name": + return f"{value}" + return None + + ds = Datasette(memory=True) + await ds.invoke_startup() + db = ds.add_memory_database("test") + await db.execute_write( + "create table test_render (id integer primary key, name text)" + ) + await db.execute_write("insert into test_render values (1, 'Alice')") + await db.execute_write("insert into test_render values (2, 'Bob')") + + # Register our test plugin + ds.pm.register(TestRenderCellPlugin(), name="TestRenderCellPlugin") + + try: + # Request with _extra=render_cells + response = await ds.client.get("/test/test_render.json?_extra=render_cells") + assert response.status_code == 200 + data = response.json() + + # Verify the response structure + assert "render_cells" in data + assert "rows" in data + + # render_cells should be a list of rows, each row being a dict of column -> rendered HTML + render_cells = data["render_cells"] + assert len(render_cells) == 2 + + # First row: id=1, name='Alice' + # The 'name' column should be rendered by our plugin as Alice + assert render_cells[0]["name"] == "Alice" + # The 'id' column should use default rendering (just the value as string) + assert render_cells[0]["id"] == "1" + + # Second row: id=2, name='Bob' + assert render_cells[1]["name"] == "Bob" + assert render_cells[1]["id"] == "2" + + # The regular rows should still contain raw values + assert data["rows"] == [ + {"id": 1, "name": "Alice"}, + {"id": 2, "name": "Bob"}, + ] + + finally: + ds.pm.unregister(name="TestRenderCellPlugin") From eae94dc2c3db39ac2574a1f6394d67f1f07cc9fc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 21 Dec 2025 20:03:10 -0800 Subject: [PATCH 068/299] Initial render_cell and foreign_key_tables extras for row Closes #2619, refs #2050 --- datasette/views/row.py | 39 +++++++++++++++++++++++++++- datasette/views/table.py | 4 +-- tests/test_api.py | 56 ++++++++++++++++++++++++++++++++++++++++ tests/test_table_api.py | 28 ++++++++++---------- 4 files changed, 111 insertions(+), 16 deletions(-) diff --git a/datasette/views/row.py b/datasette/views/row.py index 4f896632..077c33c2 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -12,7 +12,7 @@ from datasette.utils import ( from datasette.plugins import pm import json import sqlite_utils -from .table import display_columns_and_rows +from .table import display_columns_and_rows, _get_extras class RowView(DataView): @@ -104,11 +104,48 @@ class RowView(DataView): "primary_key_values": pk_values, } + # Handle _extra parameter (new style) + extras = _get_extras(request) + + # Also support legacy _extras parameter for backward compatibility if "foreign_key_tables" in (request.args.get("_extras") or "").split(","): + extras.add("foreign_key_tables") + + # Process extras + if "foreign_key_tables" in extras: data["foreign_key_tables"] = await self.foreign_key_tables( database, table, pk_values ) + if "render_cell" in extras: + # Call render_cell plugin hook for each cell + rendered_rows = [] + for row in rows: + rendered_row = {} + for value, column in zip(row, columns): + # Call render_cell plugin hook + plugin_display_value = None + for candidate in pm.hook.render_cell( + row=row, + value=value, + column=column, + table=table, + database=database, + datasette=self.ds, + request=request, + ): + candidate = await await_me_maybe(candidate) + if candidate is not None: + plugin_display_value = candidate + break + if plugin_display_value: + rendered_row[column] = str(plugin_display_value) + else: + # Default: convert value to string + rendered_row[column] = "" if value is None else str(value) + rendered_rows.append(rendered_row) + data["render_cell"] = rendered_rows + return ( data, template_data, diff --git a/datasette/views/table.py b/datasette/views/table.py index c8f209d6..9a3ae69f 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1492,7 +1492,7 @@ async def table_view_data( async def extra_display_rows(run_display_columns_and_rows): return run_display_columns_and_rows["rows"] - async def extra_render_cells(): + async def extra_render_cell(): "Rendered HTML for each cell using the render_cell plugin hook" columns = [col[0] for col in results.description] rendered_rows = [] @@ -1708,7 +1708,7 @@ async def table_view_data( run_display_columns_and_rows, extra_display_columns, extra_display_rows, - extra_render_cells, + extra_render_cell, extra_debug, extra_request, extra_query, diff --git a/tests/test_api.py b/tests/test_api.py index 008fc42b..1571fd5d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -752,6 +752,62 @@ async def test_row_foreign_key_tables(ds_client): ] +@pytest.mark.asyncio +async def test_row_extra_render_cell(): + """Test that _extra=render_cell returns rendered HTML from render_cell plugin hook on row pages""" + from datasette import hookimpl + from datasette.app import Datasette + + class TestRenderCellPlugin: + __name__ = "TestRenderCellPlugin" + + @hookimpl + def render_cell(self, value, column, table, database): + # Only modify cells in our test table + if table == "test_render" and column == "name": + return f"{value}" + return None + + ds = Datasette(memory=True) + await ds.invoke_startup() + db = ds.add_memory_database("test_row_render") + await db.execute_write( + "create table test_render (id integer primary key, name text)" + ) + await db.execute_write("insert into test_render values (1, 'Alice')") + + # Register our test plugin + ds.pm.register(TestRenderCellPlugin(), name="TestRenderCellPlugin") + + try: + # Request row with _extra=render_cell + response = await ds.client.get( + "/test_row_render/test_render/1.json?_extra=render_cell" + ) + assert response.status_code == 200 + data = response.json() + + # Verify the response structure + assert "render_cell" in data + assert "rows" in data + + # render_cell should be a list with one row (since this is a row page) + render_cell = data["render_cell"] + assert len(render_cell) == 1 + + # The row: id=1, name='Alice' + # The 'name' column should be rendered by our plugin as Alice + assert render_cell[0]["name"] == "Alice" + # The 'id' column should use default rendering (just the value as string) + assert render_cell[0]["id"] == "1" + + # The regular rows should still contain raw values + assert data["rows"] == [{"id": 1, "name": "Alice"}] + + finally: + ds.pm.unregister(name="TestRenderCellPlugin") + + def test_databases_json(app_client_two_attached_databases_one_immutable): response = app_client_two_attached_databases_one_immutable.get("/-/databases.json") databases = response.json diff --git a/tests/test_table_api.py b/tests/test_table_api.py index d5a8ca41..25419bb8 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1386,8 +1386,8 @@ async def test_table_extras(ds_client, extra, expected_json): @pytest.mark.asyncio -async def test_extra_render_cells(): - """Test that _extra=render_cells returns rendered HTML from render_cell plugin hook""" +async def test_extra_render_cell(): + """Test that _extra=render_cell returns rendered HTML from render_cell plugin hook""" from datasette import hookimpl from datasette.app import Datasette @@ -1403,7 +1403,7 @@ async def test_extra_render_cells(): ds = Datasette(memory=True) await ds.invoke_startup() - db = ds.add_memory_database("test") + db = ds.add_memory_database("test_table_render") await db.execute_write( "create table test_render (id integer primary key, name text)" ) @@ -1414,28 +1414,30 @@ async def test_extra_render_cells(): ds.pm.register(TestRenderCellPlugin(), name="TestRenderCellPlugin") try: - # Request with _extra=render_cells - response = await ds.client.get("/test/test_render.json?_extra=render_cells") + # Request with _extra=render_cell + response = await ds.client.get( + "/test_table_render/test_render.json?_extra=render_cell" + ) assert response.status_code == 200 data = response.json() # Verify the response structure - assert "render_cells" in data + assert "render_cell" in data assert "rows" in data - # render_cells should be a list of rows, each row being a dict of column -> rendered HTML - render_cells = data["render_cells"] - assert len(render_cells) == 2 + # render_cell should be a list of rows, each row being a dict of column -> rendered HTML + render_cell = data["render_cell"] + assert len(render_cell) == 2 # First row: id=1, name='Alice' # The 'name' column should be rendered by our plugin as Alice - assert render_cells[0]["name"] == "Alice" + assert render_cell[0]["name"] == "Alice" # The 'id' column should use default rendering (just the value as string) - assert render_cells[0]["id"] == "1" + assert render_cell[0]["id"] == "1" # Second row: id=2, name='Bob' - assert render_cells[1]["name"] == "Bob" - assert render_cells[1]["id"] == "2" + assert render_cell[1]["name"] == "Bob" + assert render_cell[1]["id"] == "2" # The regular rows should still contain raw values assert data["rows"] == [ From 6fede23a2ebb586c9f5dd6159907e259ff8f3082 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 21 Dec 2025 20:18:26 -0800 Subject: [PATCH 069/299] Only return render_coll columns that differ from default, refs #2619 --- datasette/views/row.py | 3 --- datasette/views/table.py | 3 --- tests/test_api.py | 5 +++-- tests/test_table_api.py | 7 ++++--- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/datasette/views/row.py b/datasette/views/row.py index 077c33c2..718ee00c 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -140,9 +140,6 @@ class RowView(DataView): break if plugin_display_value: rendered_row[column] = str(plugin_display_value) - else: - # Default: convert value to string - rendered_row[column] = "" if value is None else str(value) rendered_rows.append(rendered_row) data["render_cell"] = rendered_rows diff --git a/datasette/views/table.py b/datasette/views/table.py index 9a3ae69f..b07b62ae 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1516,9 +1516,6 @@ async def table_view_data( break if plugin_display_value: rendered_row[column] = str(plugin_display_value) - else: - # Default: convert value to string - rendered_row[column] = "" if value is None else str(value) rendered_rows.append(rendered_row) return rendered_rows diff --git a/tests/test_api.py b/tests/test_api.py index 1571fd5d..41bad84e 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -792,14 +792,15 @@ async def test_row_extra_render_cell(): assert "rows" in data # render_cell should be a list with one row (since this is a row page) + # Only columns modified by plugins are included (sparse output) render_cell = data["render_cell"] assert len(render_cell) == 1 # The row: id=1, name='Alice' # The 'name' column should be rendered by our plugin as Alice assert render_cell[0]["name"] == "Alice" - # The 'id' column should use default rendering (just the value as string) - assert render_cell[0]["id"] == "1" + # The 'id' column is not included since no plugin modified it + assert "id" not in render_cell[0] # The regular rows should still contain raw values assert data["rows"] == [{"id": 1, "name": "Alice"}] diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 25419bb8..527550fb 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1426,18 +1426,19 @@ async def test_extra_render_cell(): assert "rows" in data # render_cell should be a list of rows, each row being a dict of column -> rendered HTML + # Only columns modified by plugins are included (sparse output) render_cell = data["render_cell"] assert len(render_cell) == 2 # First row: id=1, name='Alice' # The 'name' column should be rendered by our plugin as Alice assert render_cell[0]["name"] == "Alice" - # The 'id' column should use default rendering (just the value as string) - assert render_cell[0]["id"] == "1" + # The 'id' column is not included since no plugin modified it + assert "id" not in render_cell[0] # Second row: id=2, name='Bob' assert render_cell[1]["name"] == "Bob" - assert render_cell[1]["id"] == "2" + assert "id" not in render_cell[1] # The regular rows should still contain raw values assert data["rows"] == [ From 757ce92bafb91bc40c74f41fffd9c3d3c6fffdec Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Jan 2026 07:58:18 -0800 Subject: [PATCH 070/299] datasette.utils.StartupError() now becomes a click exception, closes #2624 --- datasette/cli.py | 10 ++++++++-- docs/plugin_hooks.rst | 8 +++++--- tests/test_cli.py | 26 ++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/datasette/cli.py b/datasette/cli.py index 21420491..1d0cb022 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -666,7 +666,10 @@ def serve( return ds # Run the "startup" plugin hooks - run_sync(ds.invoke_startup) + try: + run_sync(ds.invoke_startup) + except StartupError as e: + raise click.ClickException(e.args[0]) # Run async soundness checks - but only if we're not under pytest run_sync(lambda: check_databases(ds)) @@ -815,7 +818,10 @@ def create_token( ds = Datasette(secret=secret, plugins_dir=plugins_dir) # Run ds.invoke_startup() in an event loop - run_sync(ds.invoke_startup) + try: + run_sync(ds.invoke_startup) + except StartupError as e: + raise click.ClickException(e.args[0]) # Warn about any unknown actions actions = [] diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 118a6bde..da49811a 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -965,12 +965,13 @@ Here is an example that validates required plugin configuration. The server will .. code-block:: python + from datasette.utils import StartupError + @hookimpl def startup(datasette): config = datasette.plugin_config("my-plugin") or {} - assert ( - "required-setting" in config - ), "my-plugin requires setting required-setting" + if "required-setting" not in config: + raise StartupError("my-plugin requires setting required-setting") You can also return an async function, which will be awaited on startup. Use this option if you need to execute any database queries, for example this function which creates the ``my_table`` database table if it does not yet exist: @@ -994,6 +995,7 @@ Potential use-cases: * Run some initialization code for the plugin * Create database tables that a plugin needs on startup * Validate the configuration for a plugin on startup, and raise an error if it is invalid +* Raise a ``datasette.utils.StartupError("message")`` exception to prevent Datasette from starting and display that message to the user. .. note:: diff --git a/tests/test_cli.py b/tests/test_cli.py index 21b86569..36d90e82 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -304,6 +304,32 @@ def test_plugin_s_overwrite(): ) +def test_startup_error_from_plugin_is_click_exception(tmp_path): + plugins_dir = tmp_path / "plugins" + plugins_dir.mkdir() + (plugins_dir / "startup_error.py").write_text( + "from datasette import hookimpl\n" + "from datasette.utils import StartupError\n" + "\n" + "@hookimpl\n" + "def startup(datasette):\n" + ' raise StartupError("boom")\n', + "utf-8", + ) + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--plugins-dir", + str(plugins_dir), + "--get", + "/", + ], + ) + assert result.exit_code == 1 + assert "Error: boom" in result.output + + def test_setting_type_validation(): runner = CliRunner() result = runner.invoke(cli, ["--setting", "default_page_size", "dog"]) From b52655e85684c44690105e22a7028bad36ee5557 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Jan 2026 07:59:07 -0800 Subject: [PATCH 071/299] Ignore *.db in gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 70e6bbeb..ce256606 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ scratchpad uv.lock data.db +# test databases +*.db + # We don't use Pipfile, so ignore them Pipfile Pipfile.lock From b0436faa5e3c35977607da6a653425fc6bf43403 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 22 Jan 2026 07:03:05 -0800 Subject: [PATCH 072/299] Fix test isolation bug in test_startup_error_from_plugin_is_click_exception (#2627) * Fix test isolation bug in test_startup_error_from_plugin_is_click_exception The test creates a plugin that raises StartupError("boom") and registers it in the global plugin manager (pm). Without cleanup, this plugin leaks to subsequent tests, causing test_setting_boolean_validation_false_values to fail with "Error: boom" instead of "Forbidden". Add try/finally block to ensure the plugin is unregistered after the test completes, following the established cleanup pattern used elsewhere in the test suite. * Fix blacken-docs formatting in plugin_hooks.rst Apply blacken-docs formatting to code example that exceeded the 60 character line limit. --------- Co-authored-by: Claude --- docs/plugin_hooks.rst | 5 ++++- tests/test_cli.py | 14 +++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index da49811a..ad4a70f8 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -967,11 +967,14 @@ Here is an example that validates required plugin configuration. The server will from datasette.utils import StartupError + @hookimpl def startup(datasette): config = datasette.plugin_config("my-plugin") or {} if "required-setting" not in config: - raise StartupError("my-plugin requires setting required-setting") + raise StartupError( + "my-plugin requires setting required-setting" + ) You can also return an async function, which will be awaited on startup. Use this option if you need to execute any database queries, for example this function which creates the ``my_table`` database table if it does not yet exist: diff --git a/tests/test_cli.py b/tests/test_cli.py index 36d90e82..6cdfd924 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,7 +4,7 @@ from .fixtures import ( EXPECTED_PLUGINS, ) from datasette.app import SETTINGS -from datasette.plugins import DEFAULT_PLUGINS +from datasette.plugins import DEFAULT_PLUGINS, pm from datasette.cli import cli, serve from datasette.version import __version__ from datasette.utils import tilde_encode @@ -326,8 +326,16 @@ def test_startup_error_from_plugin_is_click_exception(tmp_path): "/", ], ) - assert result.exit_code == 1 - assert "Error: boom" in result.output + try: + assert result.exit_code == 1 + assert "Error: boom" in result.output + finally: + # Cleanup: Unregister the plugin to avoid test isolation issues + to_unregister = [ + p for p in pm.get_plugins() if p.__name__ == "startup_error.py" + ] + if to_unregister: + pm.unregister(to_unregister[0]) def test_setting_type_validation(): From 66d2a033f8ad124e08cf4f0b488454c76dfdb63f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 23 Jan 2026 20:43:16 -0800 Subject: [PATCH 073/299] Switch to ruff and fix all lint errors, refs #2630 --- .github/workflows/test.yml | 2 ++ Justfile | 12 +++++++---- datasette/app.py | 4 ++-- datasette/default_permissions/__init__.py | 18 ++++++++-------- datasette/views/base.py | 1 - pyproject.toml | 5 +++++ setup.cfg | 3 --- tests/test_allowed_resources.py | 1 - tests/test_api.py | 26 ++++++----------------- tests/test_config_dir.py | 2 +- tests/test_crossdb.py | 2 +- tests/test_csv.py | 6 ------ tests/test_filters.py | 21 ------------------ tests/test_html.py | 9 +------- tests/test_internals_datasette.py | 2 +- tests/test_permissions.py | 3 +-- tests/test_plugins.py | 6 ++---- tests/test_restriction_sql.py | 4 ++-- tests/test_schema_endpoints.py | 1 - tests/test_table_api.py | 9 +------- tests/test_table_html.py | 8 ++----- 21 files changed, 44 insertions(+), 101 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3790c788..b1ba3232 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,6 +35,8 @@ jobs: tests/test_datasette_https_server.sh - name: Black run: black --check . + - name: Ruff + run: ruff check datasette tests - name: Check if cog needs to be run run: | cog --check docs/*.rst diff --git a/Justfile b/Justfile index 8c50e5ca..657881be 100644 --- a/Justfile +++ b/Justfile @@ -17,12 +17,16 @@ export DATASETTE_SECRET := "not_a_secret" uv run codespell datasette -S datasette/static --ignore-words docs/codespell-ignore-words.txt uv run codespell tests --ignore-words docs/codespell-ignore-words.txt -# Run linters: black, flake8, mypy, cog +# Run linters: black, ruff, cog @lint: codespell - uv run black . --check - uv run flake8 + uv run black datasette tests --check + uv run ruff check datasette tests uv run cog --check README.md docs/*.rst +# Apply ruff fixes +@fix: + uv run ruff check --fix datasette tests + # Rebuild docs with cog @cog: uv run cog -r README.md docs/*.rst @@ -37,7 +41,7 @@ export DATASETTE_SECRET := "not_a_secret" # Apply Black @black: - uv run black . + uv run black datasette tests # Apply blacken-docs @blacken-docs: diff --git a/datasette/app.py b/datasette/app.py index b9955925..a5cd75c5 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -6,7 +6,7 @@ import contextvars from typing import TYPE_CHECKING, Any, Dict, Iterable, List if TYPE_CHECKING: - from datasette.permissions import AllowedResource, Resource + from datasette.permissions import Resource import asgi_csrf import collections import dataclasses @@ -1144,7 +1144,7 @@ class Datasette: # Validate that resource is a Resource object or None if resource is not None and not isinstance(resource, Resource): - raise TypeError(f"resource must be a Resource subclass instance or None.") + raise TypeError("resource must be a Resource subclass instance or None.") # Check if actor can see it if not await self.allowed(action=action, resource=resource, actor=actor): diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py index 4c82d705..40373fa7 100644 --- a/datasette/default_permissions/__init__.py +++ b/datasette/default_permissions/__init__.py @@ -26,18 +26,18 @@ from datasette import hookimpl # Re-export all hooks and public utilities from .restrictions import ( - actor_restrictions_sql, - restrictions_allow_action, - ActorRestrictions, + actor_restrictions_sql as actor_restrictions_sql, + restrictions_allow_action as restrictions_allow_action, + ActorRestrictions as ActorRestrictions, ) -from .root import root_user_permissions_sql -from .config import config_permissions_sql +from .root import root_user_permissions_sql as root_user_permissions_sql +from .config import config_permissions_sql as config_permissions_sql from .defaults import ( - default_allow_sql_check, - default_action_permissions_sql, - DEFAULT_ALLOW_ACTIONS, + default_allow_sql_check as default_allow_sql_check, + default_action_permissions_sql as default_action_permissions_sql, + DEFAULT_ALLOW_ACTIONS as DEFAULT_ALLOW_ACTIONS, ) -from .tokens import actor_from_signed_api_token +from .tokens import actor_from_signed_api_token as actor_from_signed_api_token @hookimpl diff --git a/datasette/views/base.py b/datasette/views/base.py index 5216924f..bdc9f742 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -1,7 +1,6 @@ import asyncio import csv import hashlib -import json import sys import textwrap import time diff --git a/pyproject.toml b/pyproject.toml index 87884341..6fca673d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ dev = [ "pytest-timeout>=1.4.2", "trustme>=0.7", "cogapp>=3.3.0", + "ruff>=0.9", # docs "Sphinx==7.4.7", "furo==2025.9.25", @@ -94,5 +95,9 @@ datasette = ["templates/*.html"] [tool.setuptools.dynamic] version = {attr = "datasette.version.__version__"} +[tool.ruff] +line-length = 160 +select = ["E", "F", "W"] + [tool.uv] package = true diff --git a/setup.cfg b/setup.cfg index ebf43062..b7e47898 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,2 @@ [aliases] test=pytest - -[flake8] -max-line-length = 160 diff --git a/tests/test_allowed_resources.py b/tests/test_allowed_resources.py index 0cd48ea9..08adbe48 100644 --- a/tests/test_allowed_resources.py +++ b/tests/test_allowed_resources.py @@ -117,7 +117,6 @@ async def test_tables_endpoint_database_restriction(test_ds): # Bob should only see analytics tables analytics_tables = [m for m in result if m["name"].startswith("analytics/")] - production_tables = [m for m in result if m["name"].startswith("production/")] assert len(analytics_tables) == 3 table_names = {m["name"] for m in analytics_tables} diff --git a/tests/test_api.py b/tests/test_api.py index 41bad84e..907d7445 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,21 +1,7 @@ from datasette.app import Datasette from datasette.plugins import DEFAULT_PLUGINS from datasette.version import __version__ -from .fixtures import ( # noqa - app_client, - app_client_no_files, - app_client_with_dot, - app_client_shorter_time_limit, - app_client_two_attached_databases_one_immutable, - app_client_larger_cache_size, - app_client_with_cors, - app_client_two_attached_databases, - app_client_conflicting_database_names, - app_client_immutable_and_inspect_file, - make_app_client, - EXPECTED_PLUGINS, - METADATA, -) +from .fixtures import make_app_client, EXPECTED_PLUGINS import pathlib import pytest import sys @@ -815,14 +801,14 @@ def test_databases_json(app_client_two_attached_databases_one_immutable): assert 2 == len(databases) extra_database, fixtures_database = databases assert "extra database" == extra_database["name"] - assert None == extra_database["hash"] - assert True == extra_database["is_mutable"] - assert False == extra_database["is_memory"] + assert extra_database["hash"] is None + assert extra_database["is_mutable"] is True + assert extra_database["is_memory"] is False assert "fixtures" == fixtures_database["name"] assert fixtures_database["hash"] is not None - assert False == fixtures_database["is_mutable"] - assert False == fixtures_database["is_memory"] + assert fixtures_database["is_mutable"] is False + assert fixtures_database["is_memory"] is False @pytest.mark.asyncio diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py index 0598a4a6..f9a90fbe 100644 --- a/tests/test_config_dir.py +++ b/tests/test_config_dir.py @@ -87,7 +87,7 @@ def test_invalid_settings(config_dir): ) try: with pytest.raises(StartupError) as ex: - ds = Datasette([], config_dir=config_dir) + Datasette([], config_dir=config_dir) assert ex.value.args[0] == "Invalid setting 'invalid' in config file" finally: (config_dir / "datasette.json").write_text(previous, "utf-8") diff --git a/tests/test_crossdb.py b/tests/test_crossdb.py index 1ec1a05c..7807cd5d 100644 --- a/tests/test_crossdb.py +++ b/tests/test_crossdb.py @@ -67,7 +67,7 @@ def test_crossdb_attached_database_list_display( ): app_client = app_client_two_attached_databases_crossdb_enabled response = app_client.get("/_memory") - response2 = app_client.get("/") + app_client.get("/") for fragment in ( "databases are attached to this connection", "
  • fixtures - ", diff --git a/tests/test_csv.py b/tests/test_csv.py index b4a71169..5589bd97 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -1,12 +1,6 @@ from datasette.app import Datasette from bs4 import BeautifulSoup as Soup import pytest -from .fixtures import ( # noqa - app_client, - app_client_csv_max_mb_one, - app_client_with_cors, - app_client_with_trace, -) import urllib.parse EXPECTED_TABLE_CSV = """id,content diff --git a/tests/test_filters.py b/tests/test_filters.py index a3fada98..eda9e9a1 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -103,27 +103,6 @@ async def test_through_filters_from_request(ds_client): assert filter_args.extra_context == {} -@pytest.mark.asyncio -async def test_through_filters_from_request(ds_client): - request = Request.fake( - '/?_through={"table":"roadside_attraction_characteristics","column":"characteristic_id","value":"1"}' - ) - filter_args = await through_filters( - request=request, - datasette=ds_client.ds, - table="roadside_attractions", - database="fixtures", - )() - assert filter_args.where_clauses == [ - "pk in (select attraction_id from roadside_attraction_characteristics where characteristic_id = :p0)" - ] - assert filter_args.params == {"p0": "1"} - assert filter_args.human_descriptions == [ - 'roadside_attraction_characteristics.characteristic_id = "1"' - ] - assert filter_args.extra_context == {} - - @pytest.mark.asyncio async def test_where_filters_from_request(ds_client): await ds_client.ds.invoke_startup() diff --git a/tests/test_html.py b/tests/test_html.py index 7b667301..8fad5764 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1,14 +1,7 @@ from bs4 import BeautifulSoup as Soup from datasette.app import Datasette from datasette.utils import allowed_pragmas -from .fixtures import ( # noqa - app_client, - app_client_base_url_prefix, - app_client_shorter_time_limit, - app_client_two_attached_databases, - make_app_client, - METADATA, -) +from .fixtures import make_app_client from .utils import assert_footer_links, inner_html import copy import json diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index c64620a6..b378a158 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -158,7 +158,7 @@ def test_datasette_error_if_string_not_list(tmpdir): # https://github.com/simonw/datasette/issues/1985 db_path = str(tmpdir / "data.db") with pytest.raises(ValueError): - ds = Datasette(db_path) + Datasette(db_path) @pytest.mark.asyncio diff --git a/tests/test_permissions.py b/tests/test_permissions.py index e2dd92b8..96c0cf6f 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -2,7 +2,7 @@ import collections from datasette.app import Datasette from datasette.cli import cli from datasette.default_permissions import restrictions_allow_action -from .fixtures import app_client, assert_permissions_checked, make_app_client +from .fixtures import assert_permissions_checked, make_app_client from click.testing import CliRunner from bs4 import BeautifulSoup as Soup import copy @@ -1481,7 +1481,6 @@ async def test_actor_restrictions_view_instance_only(perms_ds): assert response.status_code == 200 # But no databases should be visible (no view-database permission) - data = response.json() # The instance is visible but databases list should be empty or minimal # Actually, let's check via allowed_resources page = await perms_ds.allowed_resources("view-database", actor) diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 42995c0d..6c23b3ef 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1172,8 +1172,6 @@ async def test_hook_filters_from_request(ds_client): @pytest.mark.asyncio @pytest.mark.parametrize("extra_metadata", (False, True)) async def test_hook_register_actions(extra_metadata): - from datasette.permissions import Action - from datasette.resources import DatabaseResource, InstanceResource ds = Datasette( config=( @@ -1527,7 +1525,7 @@ async def test_hook_register_events(): @pytest.mark.asyncio -async def test_hook_register_actions(): +async def test_hook_register_actions_view_collection(): datasette = Datasette(memory=True, plugins_dir=PLUGINS_DIR) await datasette.invoke_startup() # Check that the custom action from my_plugin.py is registered @@ -1545,7 +1543,7 @@ async def test_hook_register_actions_with_custom_resources(): - A parent-level action (DocumentCollectionResource) - A child-level action (DocumentResource) """ - from datasette.permissions import Resource, Action + from datasette.permissions import Resource # Define custom Resource classes class DocumentCollectionResource(Resource): diff --git a/tests/test_restriction_sql.py b/tests/test_restriction_sql.py index f23eb839..df6abd29 100644 --- a/tests/test_restriction_sql.py +++ b/tests/test_restriction_sql.py @@ -182,8 +182,8 @@ async def test_also_requires_with_restrictions(): """ ds = Datasette() await ds.invoke_startup() - db1 = ds.add_memory_database("db1_also_requires") - db2 = ds.add_memory_database("db2_also_requires") + ds.add_memory_database("db1_also_requires") + ds.add_memory_database("db2_also_requires") await ds._refresh_schemas() # Actor restricted to only db1_also_requires for view-database diff --git a/tests/test_schema_endpoints.py b/tests/test_schema_endpoints.py index 5500a7b0..50742df2 100644 --- a/tests/test_schema_endpoints.py +++ b/tests/test_schema_endpoints.py @@ -1,4 +1,3 @@ -import asyncio import pytest import pytest_asyncio from datasette.app import Datasette diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 527550fb..49df3ad5 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1,13 +1,6 @@ from datasette.utils import detect_json1 from datasette.utils.sqlite import sqlite_version -from .fixtures import ( # noqa - app_client, - app_client_with_trace, - app_client_returned_rows_matches_page_size, - generate_compound_rows, - generate_sortable_rows, - make_app_client, -) +from .fixtures import generate_compound_rows, generate_sortable_rows, make_app_client import json import pytest import urllib diff --git a/tests/test_table_html.py b/tests/test_table_html.py index e3ddb4b0..90be591a 100644 --- a/tests/test_table_html.py +++ b/tests/test_table_html.py @@ -1,10 +1,6 @@ from datasette.app import Datasette from bs4 import BeautifulSoup as Soup -from .fixtures import ( # noqa - app_client, - make_app_client, - app_client_with_dot, -) +from .fixtures import make_app_client import pathlib import pytest import urllib.parse @@ -1263,7 +1259,7 @@ async def test_foreign_key_labels_obey_permissions(config): "insert or replace into b (id, name, a_id) values (1, 'world', 1)" ) # Anonymous user can see table b but not table a - blah = await ds.client.get("/foreign_key_labels.json") + await ds.client.get("/foreign_key_labels.json") anon_a = await ds.client.get("/foreign_key_labels/a.json?_labels=on") assert anon_a.status_code == 403 anon_b = await ds.client.get("/foreign_key_labels/b.json?_labels=on") From 7915c46ddd50e058cfc441c6b061cee177d6c562 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 23 Jan 2026 20:57:25 -0800 Subject: [PATCH 074/299] Fix flaky test_database_page test with deterministic ordering (#2628) * Fix flaky test_database_page test with deterministic ordering - Add ORDER BY to table_names() query in database.py - Sort foreign keys deterministically in get_all_foreign_keys() - Refactor test_database_page to use property-based assertions instead of 500+ lines of hardcoded expected data - Run blacken-docs on plugin_hooks.rst * Update test_row_foreign_key_tables for new deterministic FK ordering The foreign keys are now sorted by (other_table, column, other_column), so complex_foreign_keys comes before foreign_key_references alphabetically. * Update test_table_names for new alphabetical ordering The table_names() method now returns tables sorted alphabetically. * Fix for test that fails prior to SQLite 3.37 --------- Co-authored-by: Claude --- datasette/database.py | 2 +- datasette/utils/__init__.py | 14 +- tests/test_api.py | 725 +++++++++---------------------- tests/test_internals_database.py | 45 +- 4 files changed, 243 insertions(+), 543 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index e5858128..8e4ee2b6 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -431,7 +431,7 @@ class Database: async def table_names(self): results = await self.execute( - "select name from sqlite_master where type='table'" + "select name from sqlite_master where type='table' order by name" ) return [r[0] for r in results.rows] diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index ac2c74da..fb864077 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -612,7 +612,10 @@ def get_outbound_foreign_keys(conn, table): def get_all_foreign_keys(conn): tables = [ - r[0] for r in conn.execute('select name from sqlite_master where type="table"') + r[0] + for r in conn.execute( + 'select name from sqlite_master where type="table" order by name' + ) ] table_to_foreign_keys = {} for table in tables: @@ -634,6 +637,15 @@ def get_all_foreign_keys(conn): {"other_table": table_name, "column": from_, "other_column": to_} ) + # Sort foreign keys for deterministic ordering + for table in table_to_foreign_keys: + table_to_foreign_keys[table]["incoming"].sort( + key=lambda fk: (fk["other_table"], fk["column"], fk["other_column"]) + ) + table_to_foreign_keys[table]["outgoing"].sort( + key=lambda fk: (fk["other_table"], fk["column"], fk["other_column"]) + ) + return table_to_foreign_keys diff --git a/tests/test_api.py b/tests/test_api.py index 907d7445..e3951df9 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,5 +1,6 @@ from datasette.app import Datasette from datasette.plugins import DEFAULT_PLUGINS +from datasette.utils.sqlite import sqlite_version from datasette.version import __version__ from .fixtures import make_app_client, EXPECTED_PLUGINS import pathlib @@ -59,504 +60,189 @@ async def test_database_page(ds_client): assert response.status_code == 200 data = response.json() assert data["database"] == "fixtures" - assert data["tables"] == [ - { - "name": "123_starts_with_digits", - "columns": ["content"], - "primary_keys": [], - "count": 0, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "Table With Space In Name", - "columns": ["pk", "content"], - "primary_keys": ["pk"], - "count": 0, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "attraction_characteristic", - "columns": ["pk", "name"], - "primary_keys": ["pk"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "roadside_attraction_characteristics", - "column": "pk", - "other_column": "characteristic_id", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "binary_data", - "columns": ["data"], - "primary_keys": [], - "count": 3, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "complex_foreign_keys", - "columns": ["pk", "f1", "f2", "f3"], - "primary_keys": ["pk"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "simple_primary_key", - "column": "f3", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "f2", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "f1", - "other_column": "id", - }, - ], - }, - "private": False, - }, - { - "name": "compound_primary_key", - "columns": ["pk1", "pk2", "content"], - "primary_keys": ["pk1", "pk2"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "compound_three_primary_keys", - "columns": ["pk1", "pk2", "pk3", "content"], - "primary_keys": ["pk1", "pk2", "pk3"], - "count": 1001, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "custom_foreign_key_label", - "columns": ["pk", "foreign_key_with_custom_label"], - "primary_keys": ["pk"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "primary_key_multiple_columns_explicit_label", - "column": "foreign_key_with_custom_label", - "other_column": "id", - } - ], - }, - "private": False, - }, - { - "name": "facet_cities", - "columns": ["id", "name"], - "primary_keys": ["id"], - "count": 4, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "facetable", - "column": "id", - "other_column": "_city_id", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "facetable", - "columns": [ - "pk", - "created", - "planet_int", - "on_earth", - "state", - "_city_id", - "_neighborhood", - "tags", - "complex_array", - "distinct_some_null", - "n", - ], - "primary_keys": ["pk"], - "count": 15, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "facet_cities", - "column": "_city_id", - "other_column": "id", - } - ], - }, - "private": False, - }, - { - "name": "foreign_key_references", - "columns": [ - "pk", - "foreign_key_with_label", - "foreign_key_with_blank_label", - "foreign_key_with_no_label", - "foreign_key_compound_pk1", - "foreign_key_compound_pk2", - ], - "primary_keys": ["pk"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "primary_key_multiple_columns", - "column": "foreign_key_with_no_label", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "foreign_key_with_blank_label", - "other_column": "id", - }, - { - "other_table": "simple_primary_key", - "column": "foreign_key_with_label", - "other_column": "id", - }, - ], - }, - "private": False, - }, - ] + [ - { - "name": "infinity", - "columns": ["value"], - "primary_keys": [], - "count": 3, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "primary_key_multiple_columns", - "columns": ["id", "content", "content2"], - "primary_keys": ["id"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "foreign_key_references", - "column": "id", - "other_column": "foreign_key_with_no_label", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "primary_key_multiple_columns_explicit_label", - "columns": ["id", "content", "content2"], - "primary_keys": ["id"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "custom_foreign_key_label", - "column": "id", - "other_column": "foreign_key_with_custom_label", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "roadside_attraction_characteristics", - "columns": ["attraction_id", "characteristic_id"], - "primary_keys": [], - "count": 5, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - { - "other_table": "attraction_characteristic", - "column": "characteristic_id", - "other_column": "pk", - }, - { - "other_table": "roadside_attractions", - "column": "attraction_id", - "other_column": "pk", - }, - ], - }, - "private": False, - }, - { - "name": "roadside_attractions", - "columns": ["pk", "name", "address", "url", "latitude", "longitude"], - "primary_keys": ["pk"], - "count": 4, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "roadside_attraction_characteristics", - "column": "pk", - "other_column": "attraction_id", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "searchable", - "columns": ["pk", "text1", "text2", "name with . and spaces"], - "primary_keys": ["pk"], - "count": 2, - "hidden": False, - "fts_table": "searchable_fts", - "foreign_keys": { - "incoming": [ - { - "other_table": "searchable_tags", - "column": "pk", - "other_column": "searchable_id", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "searchable_tags", - "columns": ["searchable_id", "tag"], - "primary_keys": ["searchable_id", "tag"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [], - "outgoing": [ - {"other_table": "tags", "column": "tag", "other_column": "tag"}, - { - "other_table": "searchable", - "column": "searchable_id", - "other_column": "pk", - }, - ], - }, - "private": False, - }, - { - "name": "select", - "columns": ["group", "having", "and", "json"], - "primary_keys": [], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "simple_primary_key", - "columns": ["id", "content"], - "primary_keys": ["id"], - "count": 5, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "foreign_key_references", - "column": "id", - "other_column": "foreign_key_with_blank_label", - }, - { - "other_table": "foreign_key_references", - "column": "id", - "other_column": "foreign_key_with_label", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f3", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f2", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f1", - }, - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "sortable", - "columns": [ - "pk1", - "pk2", - "content", - "sortable", - "sortable_with_nulls", - "sortable_with_nulls_2", - "text", - ], - "primary_keys": ["pk1", "pk2"], - "count": 201, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "table/with/slashes.csv", - "columns": ["pk", "content"], - "primary_keys": ["pk"], - "count": 1, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "tags", - "columns": ["tag"], - "primary_keys": ["tag"], - "count": 2, - "hidden": False, - "fts_table": None, - "foreign_keys": { - "incoming": [ - { - "other_table": "searchable_tags", - "column": "tag", - "other_column": "tag", - } - ], - "outgoing": [], - }, - "private": False, - }, - { - "name": "no_primary_key", - "columns": ["content", "a", "b", "c"], - "primary_keys": [], - "count": 201, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "columns": [ - "text1", - "text2", - "name with . and spaces", - "searchable_fts", - "rank", - ], - "count": 2, - "foreign_keys": {"incoming": [], "outgoing": []}, - "fts_table": "searchable_fts", - "hidden": True, - "name": "searchable_fts", - "primary_keys": [], - "private": False, - }, - { - "name": "searchable_fts_config", - "columns": ["k", "v"], - "primary_keys": ["k"], - "count": 1, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "searchable_fts_data", - "columns": ["id", "block"], - "primary_keys": ["id"], - "count": 3, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "searchable_fts_docsize", - "columns": ["id", "sz"], - "primary_keys": ["id"], - "count": 2, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - { - "name": "searchable_fts_idx", - "columns": ["segid", "term", "pgno"], - "primary_keys": ["segid", "term"], - "count": 1, - "hidden": True, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, - ] + + # Build lookup for easier assertions + tables = data["tables"] + tables_by_name = {t["name"]: t for t in tables} + + # Verify tables are sorted by (hidden, name) - visible first, then hidden + table_names = [t["name"] for t in tables] + expected_order = sorted(tables, key=lambda t: (t["hidden"], t["name"])) + assert table_names == [t["name"] for t in expected_order] + + # Expected visible tables (not hidden) + expected_visible_tables = { + "123_starts_with_digits", + "Table With Space In Name", + "attraction_characteristic", + "binary_data", + "complex_foreign_keys", + "compound_primary_key", + "compound_three_primary_keys", + "custom_foreign_key_label", + "facet_cities", + "facetable", + "foreign_key_references", + "infinity", + "primary_key_multiple_columns", + "primary_key_multiple_columns_explicit_label", + "roadside_attraction_characteristics", + "roadside_attractions", + "searchable", + "searchable_tags", + "select", + "simple_primary_key", + "sortable", + "table/with/slashes.csv", + "tags", + } + + # Expected hidden tables + expected_hidden_tables = { + "no_primary_key", + "searchable_fts", + "searchable_fts_config", + "searchable_fts_data", + "searchable_fts_docsize", + "searchable_fts_idx", + } + + # Verify all expected tables exist + assert expected_visible_tables.issubset(tables_by_name.keys()) + assert expected_hidden_tables.issubset(tables_by_name.keys()) + + # Verify hidden status + visible_tables = {t["name"] for t in tables if not t["hidden"]} + hidden_tables = {t["name"] for t in tables if t["hidden"]} + assert expected_visible_tables == visible_tables + assert expected_hidden_tables == hidden_tables + + # Helper to compare foreign keys (order-insensitive) + def fk_set(fks): + return {(fk["other_table"], fk["column"], fk["other_column"]) for fk in fks} + + # Test specific table properties + # -- facetable: has outgoing FK to facet_cities + facetable = tables_by_name["facetable"] + assert facetable["count"] == 15 + assert facetable["primary_keys"] == ["pk"] + assert facetable["fts_table"] is None + assert facetable["private"] is False + assert fk_set(facetable["foreign_keys"]["outgoing"]) == { + ("facet_cities", "_city_id", "id") + } + assert fk_set(facetable["foreign_keys"]["incoming"]) == set() + + # -- facet_cities: has incoming FK from facetable + facet_cities = tables_by_name["facet_cities"] + assert facet_cities["count"] == 4 + assert facet_cities["columns"] == ["id", "name"] + assert fk_set(facet_cities["foreign_keys"]["incoming"]) == { + ("facetable", "id", "_city_id") + } + + # -- simple_primary_key: has multiple incoming FKs + simple_pk = tables_by_name["simple_primary_key"] + assert simple_pk["count"] == 5 + assert simple_pk["columns"] == ["id", "content"] + assert simple_pk["primary_keys"] == ["id"] + # Should have incoming FKs from complex_foreign_keys (f1, f2, f3) and foreign_key_references + incoming = fk_set(simple_pk["foreign_keys"]["incoming"]) + assert ("complex_foreign_keys", "id", "f1") in incoming + assert ("complex_foreign_keys", "id", "f2") in incoming + assert ("complex_foreign_keys", "id", "f3") in incoming + assert ("foreign_key_references", "id", "foreign_key_with_label") in incoming + assert ("foreign_key_references", "id", "foreign_key_with_blank_label") in incoming + + # -- complex_foreign_keys: has multiple outgoing FKs to same table + complex_fk = tables_by_name["complex_foreign_keys"] + assert complex_fk["count"] == 1 + assert complex_fk["columns"] == ["pk", "f1", "f2", "f3"] + outgoing = fk_set(complex_fk["foreign_keys"]["outgoing"]) + assert outgoing == { + ("simple_primary_key", "f1", "id"), + ("simple_primary_key", "f2", "id"), + ("simple_primary_key", "f3", "id"), + } + + # -- searchable: has FTS table association + searchable = tables_by_name["searchable"] + assert searchable["count"] == 2 + assert searchable["fts_table"] == "searchable_fts" + assert searchable["columns"] == ["pk", "text1", "text2", "name with . and spaces"] + + # -- searchable_fts: is the FTS virtual table (hidden) + searchable_fts = tables_by_name["searchable_fts"] + assert searchable_fts["hidden"] is True + assert searchable_fts["fts_table"] == "searchable_fts" + # The "rank" column became visible in pragma_table_info in SQLite 3.37+ + if sqlite_version() >= (3, 37, 0): + assert "rank" in searchable_fts["columns"] + + # -- compound primary keys + compound_pk = tables_by_name["compound_primary_key"] + assert compound_pk["primary_keys"] == ["pk1", "pk2"] + assert compound_pk["count"] == 2 + + compound_three = tables_by_name["compound_three_primary_keys"] + assert compound_three["primary_keys"] == ["pk1", "pk2", "pk3"] + assert compound_three["count"] == 1001 + + # -- sortable: generated data + sortable = tables_by_name["sortable"] + assert sortable["count"] == 201 + assert sortable["primary_keys"] == ["pk1", "pk2"] + + # -- no_primary_key: hidden table with generated data + no_pk = tables_by_name["no_primary_key"] + assert no_pk["hidden"] is True + assert no_pk["count"] == 201 + assert no_pk["primary_keys"] == [] + + # -- roadside attractions relationship chain + attractions = tables_by_name["roadside_attractions"] + assert attractions["count"] == 4 + assert fk_set(attractions["foreign_keys"]["incoming"]) == { + ("roadside_attraction_characteristics", "pk", "attraction_id") + } + + characteristics = tables_by_name["attraction_characteristic"] + assert characteristics["count"] == 2 + assert fk_set(characteristics["foreign_keys"]["incoming"]) == { + ("roadside_attraction_characteristics", "pk", "characteristic_id") + } + + # -- searchable_tags: multiple outgoing FKs + searchable_tags = tables_by_name["searchable_tags"] + assert searchable_tags["primary_keys"] == ["searchable_id", "tag"] + outgoing = fk_set(searchable_tags["foreign_keys"]["outgoing"]) + assert outgoing == { + ("searchable", "searchable_id", "pk"), + ("tags", "tag", "tag"), + } + + # -- tables with special names + assert "123_starts_with_digits" in tables_by_name + assert "Table With Space In Name" in tables_by_name + assert "table/with/slashes.csv" in tables_by_name + assert "select" in tables_by_name # SQL reserved word + + # Verify select table has SQL reserved word columns + select_table = tables_by_name["select"] + assert set(select_table["columns"]) == {"group", "having", "and", "json"} + + # Verify all tables have required fields + for table in tables: + assert "name" in table + assert "columns" in table + assert "primary_keys" in table + assert "count" in table + assert "hidden" in table + assert "fts_table" in table + assert "foreign_keys" in table + assert "private" in table + assert "incoming" in table["foreign_keys"] + assert "outgoing" in table["foreign_keys"] def test_no_files_uses_memory_database(app_client_no_files): @@ -699,7 +385,29 @@ async def test_row_foreign_key_tables(ds_client): "/fixtures/simple_primary_key/1.json?_extras=foreign_key_tables" ) assert response.status_code == 200 + # Foreign keys are sorted by (other_table, column, other_column) assert response.json()["foreign_key_tables"] == [ + { + "other_table": "complex_foreign_keys", + "column": "id", + "other_column": "f1", + "count": 1, + "link": "/fixtures/complex_foreign_keys?f1=1", + }, + { + "other_table": "complex_foreign_keys", + "column": "id", + "other_column": "f2", + "count": 0, + "link": "/fixtures/complex_foreign_keys?f2=1", + }, + { + "other_table": "complex_foreign_keys", + "column": "id", + "other_column": "f3", + "count": 1, + "link": "/fixtures/complex_foreign_keys?f3=1", + }, { "other_table": "foreign_key_references", "column": "id", @@ -714,27 +422,6 @@ async def test_row_foreign_key_tables(ds_client): "count": 1, "link": "/fixtures/foreign_key_references?foreign_key_with_label=1", }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f3", - "count": 1, - "link": "/fixtures/complex_foreign_keys?f3=1", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f2", - "count": 0, - "link": "/fixtures/complex_foreign_keys?f2=1", - }, - { - "other_table": "complex_foreign_keys", - "column": "id", - "other_column": "f1", - "count": 1, - "link": "/fixtures/complex_foreign_keys?f1=1", - }, ] diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index d2e06073..02c67bfc 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -418,36 +418,37 @@ async def test_get_all_foreign_keys(db): @pytest.mark.asyncio async def test_table_names(db): table_names = await db.table_names() + # Tables are sorted alphabetically by name assert table_names == [ - "simple_primary_key", - "primary_key_multiple_columns", - "primary_key_multiple_columns_explicit_label", - "compound_primary_key", - "compound_three_primary_keys", - "foreign_key_references", - "sortable", - "no_primary_key", "123_starts_with_digits", "Table With Space In Name", - "table/with/slashes.csv", + "attraction_characteristic", + "binary_data", "complex_foreign_keys", + "compound_primary_key", + "compound_three_primary_keys", "custom_foreign_key_label", - "tags", - "searchable", - "searchable_tags", - "searchable_fts", - "searchable_fts_data", - "searchable_fts_idx", - "searchable_fts_docsize", - "searchable_fts_config", - "select", - "infinity", "facet_cities", "facetable", - "binary_data", - "roadside_attractions", - "attraction_characteristic", + "foreign_key_references", + "infinity", + "no_primary_key", + "primary_key_multiple_columns", + "primary_key_multiple_columns_explicit_label", "roadside_attraction_characteristics", + "roadside_attractions", + "searchable", + "searchable_fts", + "searchable_fts_config", + "searchable_fts_data", + "searchable_fts_docsize", + "searchable_fts_idx", + "searchable_tags", + "select", + "simple_primary_key", + "sortable", + "table/with/slashes.csv", + "tags", ] From 7988a179fe317cdb3dfa5c13d879d192ae36898d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 23 Jan 2026 21:03:16 -0800 Subject: [PATCH 075/299] Throttle schema refreshes to at most once per second, refs #2629 --- datasette/app.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datasette/app.py b/datasette/app.py index a5cd75c5..75f6071e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -589,6 +589,10 @@ class Datasette: return None async def refresh_schemas(self): + # Throttle schema refreshes to at most once per second + if time.monotonic() - getattr(self, "_last_schema_refresh", 0) < 1.0: + return + self._last_schema_refresh = time.monotonic() if self._refresh_schemas_lock.locked(): return async with self._refresh_schemas_lock: From 2f7b120177f3285a8d504d5810fb081711d1b979 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 24 Jan 2026 22:07:54 -0800 Subject: [PATCH 076/299] Minor speedup for remove_infinites, refs #2629 --- datasette/utils/__init__.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index fb864077..4aaed967 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -901,18 +901,26 @@ _infinities = {float("inf"), float("-inf")} def remove_infinites(row): - to_check = row + """ + Replace float('inf') and float('-inf') with None in a row. + + Returns the original row object unchanged if no infinities are found. + """ if isinstance(row, dict): - to_check = row.values() - if not any((c in _infinities) if isinstance(c, float) else 0 for c in to_check): - return row - if isinstance(row, dict): - return { - k: (None if (isinstance(v, float) and v in _infinities) else v) - for k, v in row.items() - } + for v in row.values(): + if isinstance(v, float) and v in _infinities: + return { + k: (None if isinstance(v2, float) and v2 in _infinities else v2) + for k, v2 in row.items() + } else: - return [None if (isinstance(c, float) and c in _infinities) else c for c in row] + for v in row: + if isinstance(v, float) and v in _infinities: + return [ + None if isinstance(v2, float) and v2 in _infinities else v2 + for v2 in row + ] + return row class StaticMount(click.ParamType): From 3f8f97e92a2ec058d38dbc151eef40245cb234a3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 28 Jan 2026 09:55:25 -0800 Subject: [PATCH 077/299] Close more connections in test suite To try and avoid too many open files on macOS --- tests/test_api_write.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 3a76e655..05835e51 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -20,7 +20,12 @@ def ds_write(tmp_path_factory): ds = Datasette([db_path], immutables=[db_path_immutable]) ds.root_enabled = True yield ds - db.close() + # Close both setup connections plus any Datasette-managed connections. + db1.close() + db2.close() + for database in ds.databases.values(): + if not database.is_memory: + database.close() def write_token(ds, actor_id="root", permissions=None): From ffadb5f74cf4e649671be42d9f56d0c233d381fb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 28 Jan 2026 18:34:00 -0800 Subject: [PATCH 078/299] Workaround for intermittent test failure on SQLite 3.25.3 Closes: - #2632 --- datasette/utils/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 4aaed967..d0d216eb 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -706,8 +706,11 @@ def table_column_details(conn, table): ).fetchall() ] else: - # Treat hidden as 0 for all columns + # First trigger a query against sqlite_master to fix an intermittent + # test failure, see https://github.com/simonw/datasette/issues/2632 + conn.execute("select 1 from sqlite_master limit 1").fetchall() return [ + # Treat hidden as 0 for all columns. Column(*(list(r) + [0])) for r in conn.execute( f"PRAGMA table_info({escape_sqlite(table)});" From 40a37307ded36311a07eb2577cb74c92a2639f9d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 28 Jan 2026 18:41:03 -0800 Subject: [PATCH 079/299] Add request.form() for multipart form data and file uploads * Add request.form() for multipart form data and file uploads New Request.form() method that handles both application/x-www-form-urlencoded and multipart/form-data content types with streaming parsing. Features: - Streaming multipart parser that doesn't buffer entire body in memory - Files spill to disk above 1MB threshold via SpooledTemporaryFile - files=False (default) discards file content, files=True stores them - Security limits: max_request_size, max_file_size, max_fields, max_files - FormData container with dict-like access and getlist() for multiple values - UploadedFile class with async read(), seek(), filename, content_type, size - Support for RFC 5987 filename* encoding for international filenames Uses multipart-form-data-conformance test suite for validation. * Update views to use request.form() and document new API - Migrate PermissionsDebugView, MessagesDebugView, and CreateTokenView from post_vars() to form() - Add documentation for request.form(), FormData, and UploadedFile classes Centralize multipart defaults and expose stricter limits via Request.form(). Enforce header, part, file, and disk space limits even when files are discarded; detect truncated bodies and client disconnects; and move blocking work off the event loop. Add FormData close/aclose context managers, update internals docs, and expand multipart tests (including len semantics and stricter conformance expectations). --- datasette/utils/asgi.py | 81 +++ datasette/utils/multipart.py | 757 ++++++++++++++++++++++ datasette/views/special.py | 26 +- docs/internals.rst | 131 +++- pyproject.toml | 1 + tests/test_multipart.py | 1152 ++++++++++++++++++++++++++++++++++ 6 files changed, 2133 insertions(+), 15 deletions(-) create mode 100644 datasette/utils/multipart.py create mode 100644 tests/test_multipart.py diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 7f3329a6..35f243b6 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -1,5 +1,21 @@ import json +from typing import Optional from datasette.utils import MultiParams, calculate_etag +from datasette.utils.multipart import ( + parse_form_data, + MultipartParseError, + FormData, + DEFAULT_MAX_FILE_SIZE, + DEFAULT_MAX_REQUEST_SIZE, + DEFAULT_MAX_FIELDS, + DEFAULT_MAX_FILES, + DEFAULT_MAX_PARTS, + DEFAULT_MAX_FIELD_SIZE, + DEFAULT_MAX_MEMORY_FILE_SIZE, + DEFAULT_MAX_PART_HEADER_BYTES, + DEFAULT_MAX_PART_HEADER_LINES, + DEFAULT_MIN_FREE_DISK_BYTES, +) from mimetypes import guess_type from urllib.parse import parse_qs, urlunparse, parse_qsl from pathlib import Path @@ -139,6 +155,71 @@ class Request: body = await self.post_body() return dict(parse_qsl(body.decode("utf-8"), keep_blank_values=True)) + async def form( + self, + files: bool = False, + max_file_size: int = DEFAULT_MAX_FILE_SIZE, + max_request_size: int = DEFAULT_MAX_REQUEST_SIZE, + max_fields: int = DEFAULT_MAX_FIELDS, + max_files: int = DEFAULT_MAX_FILES, + max_parts: Optional[int] = DEFAULT_MAX_PARTS, + max_field_size: int = DEFAULT_MAX_FIELD_SIZE, + max_memory_file_size: int = DEFAULT_MAX_MEMORY_FILE_SIZE, + max_part_header_bytes: int = DEFAULT_MAX_PART_HEADER_BYTES, + max_part_header_lines: int = DEFAULT_MAX_PART_HEADER_LINES, + min_free_disk_bytes: int = DEFAULT_MIN_FREE_DISK_BYTES, + ) -> FormData: + """ + Parse form data from the request body. + + Supports both application/x-www-form-urlencoded and multipart/form-data. + + Args: + files: If True, store file uploads; if False (default), discard them + max_file_size: Maximum size per file in bytes (default 50MB) + max_request_size: Maximum total request size in bytes (default 100MB) + max_fields: Maximum number of form fields (default 1000) + max_files: Maximum number of file uploads (default 100) + max_parts: Maximum number of multipart parts (default max_fields + max_files) + max_field_size: Maximum size of a text field value in bytes (default 100KB) + max_memory_file_size: Threshold before files spill to disk (default 1MB) + max_part_header_bytes: Maximum bytes allowed in part headers (default 16KB) + max_part_header_lines: Maximum header lines per part (default 100) + min_free_disk_bytes: Minimum free bytes required in temp dir (default 50MB) + + Returns: + FormData object with dict-like access to fields and files. + Use form["key"] for first value, form.getlist("key") for all values. + + Raises: + BadRequest: If content-type is missing, unsupported, or parsing fails + """ + content_type = self.headers.get("content-type", "") + if not content_type: + raise BadRequest( + "Missing Content-Type header; expected application/x-www-form-urlencoded " + "or multipart/form-data" + ) + + try: + return await parse_form_data( + receive=self.receive, + content_type=content_type, + files=files, + max_file_size=max_file_size, + max_request_size=max_request_size, + max_fields=max_fields, + max_files=max_files, + max_parts=max_parts, + max_field_size=max_field_size, + max_memory_file_size=max_memory_file_size, + max_part_header_bytes=max_part_header_bytes, + max_part_header_lines=max_part_header_lines, + min_free_disk_bytes=min_free_disk_bytes, + ) + except MultipartParseError as e: + raise BadRequest(str(e)) + @classmethod def fake(cls, path_with_query_string, method="GET", scheme="http", url_vars=None): """Useful for constructing Request objects for tests""" diff --git a/datasette/utils/multipart.py b/datasette/utils/multipart.py new file mode 100644 index 00000000..cfa77486 --- /dev/null +++ b/datasette/utils/multipart.py @@ -0,0 +1,757 @@ +""" +Streaming multipart/form-data parser for ASGI applications. + +Supports: +- Streaming parsing without buffering entire body in memory +- Files spill to disk above configurable threshold +- Security limits on request size, file size, field count +- Both multipart/form-data and application/x-www-form-urlencoded +""" + +import asyncio +import shutil +import tempfile +from dataclasses import dataclass, field +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) +from urllib.parse import parse_qsl + +# Centralized defaults for multipart/form-data parsing +DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB +DEFAULT_MAX_REQUEST_SIZE = 100 * 1024 * 1024 # 100MB +DEFAULT_MAX_FIELDS = 1000 +DEFAULT_MAX_FILES = 100 +# If max_parts is not specified, it defaults to max_fields + max_files +DEFAULT_MAX_PARTS: Optional[int] = None +DEFAULT_MAX_FIELD_SIZE = 100 * 1024 # 100KB +DEFAULT_MAX_MEMORY_FILE_SIZE = 1024 * 1024 # 1MB +DEFAULT_MAX_PART_HEADER_BYTES = 16 * 1024 # 16KB +DEFAULT_MAX_PART_HEADER_LINES = 100 +DEFAULT_MIN_FREE_DISK_BYTES = 50 * 1024 * 1024 # 50MB + + +class MultipartParseError(Exception): + """Raised when multipart parsing fails.""" + + pass + + +@dataclass +class UploadedFile: + """ + Represents an uploaded file from a multipart form. + + Attributes: + name: The form field name + filename: The original filename from the upload + content_type: The MIME type of the file + size: Size in bytes + """ + + name: str + filename: str + content_type: Optional[str] + size: int + _file: tempfile.SpooledTemporaryFile = field(repr=False) + + async def read(self, size: int = -1) -> bytes: + """Read file contents.""" + return await asyncio.to_thread(self._file.read, size) + + async def seek(self, offset: int, whence: int = 0) -> int: + """Seek to position in file.""" + return await asyncio.to_thread(self._file.seek, offset, whence) + + async def close(self) -> None: + """Close the underlying file.""" + await asyncio.to_thread(self._file.close) + + def close_sync(self) -> None: + """Close the underlying file synchronously.""" + self._file.close() + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.close() + + def __del__(self): + try: + self._file.close() + except Exception: + pass + + +class FormData: + """ + Container for parsed form data, supporting both fields and files. + + Provides dict-like access with support for multiple values per key. + """ + + def __init__(self): + self._data: List[Tuple[str, Union[str, UploadedFile]]] = [] + + def append(self, key: str, value: Union[str, UploadedFile]) -> None: + """Add a key-value pair.""" + self._data.append((key, value)) + + def __getitem__(self, key: str) -> Union[str, UploadedFile]: + """Get the first value for a key.""" + for k, v in self._data: + if k == key: + return v + raise KeyError(key) + + def get(self, key: str, default: Any = None) -> Optional[Union[str, UploadedFile]]: + """Get the first value for a key, or default if not found.""" + try: + return self[key] + except KeyError: + return default + + def getlist(self, key: str) -> List[Union[str, UploadedFile]]: + """Get all values for a key.""" + return [v for k, v in self._data if k == key] + + def __contains__(self, key: str) -> bool: + """Check if key exists.""" + return any(k == key for k, _ in self._data) + + def __len__(self) -> int: + """Return number of items.""" + return len(self._data) + + def __iter__(self): + """Iterate over unique keys.""" + seen = set() + for k, _ in self._data: + if k not in seen: + seen.add(k) + yield k + + def keys(self): + """Return unique keys.""" + return list(self) + + def items(self) -> List[Tuple[str, Union[str, UploadedFile]]]: + """Return all key-value pairs.""" + return list(self._data) + + def values(self) -> List[Union[str, UploadedFile]]: + """Return all values.""" + return [v for _, v in self._data] + + def _uploaded_files(self) -> List[UploadedFile]: + """Return UploadedFile instances contained in this form.""" + return [v for _, v in self._data if isinstance(v, UploadedFile)] + + def close(self) -> None: + """ + Close any uploaded files. + + This provides deterministic cleanup for spooled temp files. + """ + for uploaded in self._uploaded_files(): + try: + uploaded.close_sync() + except Exception: + # Best-effort cleanup; ignore close errors + pass + + async def aclose(self) -> None: + """Asynchronously close any uploaded files.""" + for uploaded in self._uploaded_files(): + try: + await uploaded.close() + except Exception: + # Best-effort cleanup; ignore close errors + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + self.close() + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.aclose() + + +def parse_content_disposition(header: str) -> Dict[str, Optional[str]]: + """ + Parse Content-Disposition header value. + + Returns dict with 'name', 'filename' keys (filename may be None). + """ + result: Dict[str, Optional[str]] = {"name": None, "filename": None} + + # Split on semicolons, handling quoted strings + parts = [] + current = "" + in_quotes = False + i = 0 + while i < len(header): + char = header[i] + if char == '"' and (i == 0 or header[i - 1] != "\\"): + in_quotes = not in_quotes + current += char + elif char == ";" and not in_quotes: + parts.append(current.strip()) + current = "" + else: + current += char + i += 1 + if current.strip(): + parts.append(current.strip()) + + for part in parts[1:]: # Skip the "form-data" part + if "=" not in part: + continue + + key, _, value = part.partition("=") + key = key.strip().lower() + value = value.strip() + + # Handle filename* (RFC 5987 encoding) + if key == "filename*": + # Format: utf-8''encoded_filename or charset'language'encoded_filename + if "'" in value: + parts_star = value.split("'", 2) + if len(parts_star) >= 3: + # charset = parts_star[0] + # language = parts_star[1] + encoded = parts_star[2] + # URL decode + try: + from urllib.parse import unquote + + result["filename"] = unquote(encoded, encoding="utf-8") + except Exception: + pass + continue + + # Remove quotes if present + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + # Unescape backslash sequences + value = value.replace('\\"', '"').replace("\\\\", "\\") + + if key == "name": + result["name"] = value + elif key == "filename": + # Only set if filename* hasn't already set it + if result["filename"] is None: + # Strip path components (security) + # Handle both Unix and Windows paths + value = value.replace("\\", "/") + if "/" in value: + value = value.rsplit("/", 1)[-1] + result["filename"] = value + + return result + + +def parse_content_type(header: str) -> Tuple[str, Dict[str, str]]: + """ + Parse Content-Type header value. + + Returns (media_type, parameters_dict). + """ + parts = header.split(";") + media_type = parts[0].strip().lower() + params = {} + + for part in parts[1:]: + part = part.strip() + if "=" in part: + key, _, value = part.partition("=") + key = key.strip().lower() + value = value.strip() + # Remove quotes if present + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + params[key] = value + + return media_type, params + + +class MultipartParser: + """ + Streaming multipart/form-data parser. + + Processes the body chunk by chunk without loading everything into memory. + """ + + # Parser states + STATE_PREAMBLE = 0 + STATE_HEADER = 1 + STATE_BODY = 2 + STATE_DONE = 3 + + def __init__( + self, + boundary: bytes, + max_file_size: int = DEFAULT_MAX_FILE_SIZE, + max_request_size: int = DEFAULT_MAX_REQUEST_SIZE, + max_fields: int = DEFAULT_MAX_FIELDS, + max_files: int = DEFAULT_MAX_FILES, + max_parts: Optional[int] = DEFAULT_MAX_PARTS, + max_field_size: int = DEFAULT_MAX_FIELD_SIZE, + max_memory_file_size: int = DEFAULT_MAX_MEMORY_FILE_SIZE, + max_part_header_bytes: int = DEFAULT_MAX_PART_HEADER_BYTES, + max_part_header_lines: int = DEFAULT_MAX_PART_HEADER_LINES, + min_free_disk_bytes: int = DEFAULT_MIN_FREE_DISK_BYTES, + handle_files: bool = False, + ): + self.boundary = b"--" + boundary + self.end_boundary = self.boundary + b"--" + self.max_file_size = max_file_size + self.max_request_size = max_request_size + self.max_fields = max_fields + self.max_files = max_files + # If not specified, tie max_parts to the other cardinality limits + if max_parts is None: + max_parts = max_fields + max_files + self.max_parts = max_parts + self.max_field_size = max_field_size + self.max_memory_file_size = max_memory_file_size + self.max_part_header_bytes = max_part_header_bytes + self.max_part_header_lines = max_part_header_lines + self.min_free_disk_bytes = min_free_disk_bytes + self.handle_files = handle_files + + self.state = self.STATE_PREAMBLE + self.buffer = bytearray() + self.total_bytes = 0 + self.field_count = 0 + self.file_count = 0 + self.part_count = 0 + self.current_part_size = 0 + self.current_header_bytes = 0 + self.current_header_lines = 0 + + self.form_data = FormData() + self._disk_check_interval_bytes = 1024 * 1024 # 1MB between disk checks + self._bytes_since_disk_check = 0 + self._tempdir = tempfile.gettempdir() + + # Current part state + self.current_headers: Dict[str, str] = {} + self.current_file: Optional[tempfile.SpooledTemporaryFile] = None + self.current_body = bytearray() + self.current_name: Optional[str] = None + self.current_filename: Optional[str] = None + self.current_content_type: Optional[str] = None + + def feed(self, chunk: bytes) -> None: + """Feed a chunk of data to the parser.""" + self.total_bytes += len(chunk) + if self.total_bytes > self.max_request_size: + raise MultipartParseError("Request body too large") + + self.buffer.extend(chunk) + self._process() + + def _process(self) -> None: + """Process buffered data.""" + while True: + if self.state == self.STATE_PREAMBLE: + if not self._process_preamble(): + break + elif self.state == self.STATE_HEADER: + if not self._process_header(): + break + elif self.state == self.STATE_BODY: + if not self._process_body(): + break + elif self.state == self.STATE_DONE: + break + + def _process_preamble(self) -> bool: + """Skip preamble and find first boundary.""" + # Look for boundary (could be at start or after preamble) + # Try both \r\n prefixed and bare boundary at start + idx = self.buffer.find(self.boundary) + if idx == -1: + # Keep potential partial boundary at end + keep = len(self.boundary) - 1 + if len(self.buffer) > keep: + self.buffer = self.buffer[-keep:] + return False + + # Found boundary, skip to after it + after_boundary = idx + len(self.boundary) + + # Check for end boundary + if self.buffer[idx : idx + len(self.end_boundary)] == self.end_boundary: + self.state = self.STATE_DONE + return False + + # Skip CRLF or LF after boundary + if after_boundary < len(self.buffer): + if self.buffer[after_boundary : after_boundary + 2] == b"\r\n": + after_boundary += 2 + elif self.buffer[after_boundary : after_boundary + 1] == b"\n": + after_boundary += 1 + + self.buffer = self.buffer[after_boundary:] + self.state = self.STATE_HEADER + self.current_headers = {} + self.current_header_bytes = 0 + self.current_header_lines = 0 + return True + + def _process_header(self) -> bool: + """Parse part headers.""" + while True: + # Look for end of header line + crlf_idx = self.buffer.find(b"\r\n") + lf_idx = self.buffer.find(b"\n") + + if crlf_idx == -1 and lf_idx == -1: + # Guard against unbounded header buffering if no newline is ever sent + if len(self.buffer) > self.max_part_header_bytes: + raise MultipartParseError("Part headers too large") + return False # Need more data + + # Use whichever comes first + if crlf_idx != -1 and (lf_idx == -1 or crlf_idx < lf_idx): + idx = crlf_idx + line_end_len = 2 + else: + idx = lf_idx + line_end_len = 1 + + line = self.buffer[:idx] + self.buffer = self.buffer[idx + line_end_len :] + + self.current_header_lines += 1 + self.current_header_bytes += idx + line_end_len + if ( + self.current_header_lines > self.max_part_header_lines + or self.current_header_bytes > self.max_part_header_bytes + ): + raise MultipartParseError("Part headers too large") + + if not line: + # Empty line = end of headers + self._start_body() + self.state = self.STATE_BODY + return True + + # Parse header + try: + line_str = line.decode("utf-8", errors="replace") + except Exception: + line_str = line.decode("latin-1") + + if ":" in line_str: + name, _, value = line_str.partition(":") + self.current_headers[name.strip().lower()] = value.strip() + + def _start_body(self) -> None: + """Initialize body parsing for current part.""" + self.part_count += 1 + if self.part_count > self.max_parts: + raise MultipartParseError("Too many parts") + + # Parse Content-Disposition + cd = self.current_headers.get("content-disposition", "") + parsed = parse_content_disposition(cd) + self.current_name = parsed.get("name") + self.current_filename = parsed.get("filename") + self.current_content_type = self.current_headers.get("content-type") + self.current_part_size = 0 + + if self.current_filename is not None: + # It's a file + self.file_count += 1 + if self.file_count > self.max_files: + raise MultipartParseError("Too many files") + if self.handle_files: + self.current_file = tempfile.SpooledTemporaryFile( + max_size=self.max_memory_file_size + ) + else: + # Will discard file content + self.current_file = None + else: + # It's a text field + self.field_count += 1 + if self.field_count > self.max_fields: + raise MultipartParseError("Too many fields") + self.current_body = bytearray() + self.current_file = None + + # Check disk space before allocating a spooled temp file + if self.current_filename is not None and self.handle_files: + self._ensure_disk_space() + + def _process_body(self) -> bool: + """Process body data for current part.""" + # Look for boundary in buffer + # Need to handle boundary potentially split across chunks + + # The boundary is preceded by \r\n (or \n for lenient parsing) + search_boundary = b"\r\n" + self.boundary + + idx = self.buffer.find(search_boundary) + if idx == -1: + # Try LF-only boundary (lenient) + search_boundary_lf = b"\n" + self.boundary + idx = self.buffer.find(search_boundary_lf) + if idx != -1: + search_boundary = search_boundary_lf + + if idx == -1: + # No boundary found yet + # Keep potential partial boundary at end of buffer + safe_len = len(self.buffer) - len(search_boundary) - 1 + if safe_len > 0: + safe_data = self.buffer[:safe_len] + self._write_body_data(bytes(safe_data)) + self.buffer = self.buffer[safe_len:] + return False + + # Found boundary - write remaining body data + body_data = self.buffer[:idx] + self._write_body_data(bytes(body_data)) + + # Move past the boundary + after_boundary = idx + len(search_boundary) + + # Check for end boundary + remaining = self.buffer[after_boundary:] + if remaining.startswith(b"--"): + # End boundary + self._finish_part() + self.state = self.STATE_DONE + return False + + # Skip CRLF or LF after boundary + if remaining.startswith(b"\r\n"): + after_boundary += 2 + elif remaining.startswith(b"\n"): + after_boundary += 1 + + self.buffer = self.buffer[after_boundary:] + self._finish_part() + self.state = self.STATE_HEADER + self.current_headers = {} + self.current_header_bytes = 0 + self.current_header_lines = 0 + return True + + def _write_body_data(self, data: bytes) -> None: + """Write data to current part body.""" + if not data: + return + + self.current_part_size += len(data) + + if self.current_filename is not None: + # File data + if self.current_part_size > self.max_file_size: + raise MultipartParseError("File too large") + if self.handle_files and self.current_file: + self._bytes_since_disk_check += len(data) + if self._bytes_since_disk_check >= self._disk_check_interval_bytes: + self._ensure_disk_space() + self._bytes_since_disk_check = 0 + self.current_file.write(data) + # else: discard file data + else: + # Field data + if self.current_part_size > self.max_field_size: + raise MultipartParseError("Field value too large") + self.current_body.extend(data) + + def _finish_part(self) -> None: + """Finalize current part and add to form data.""" + if self.current_name is None: + return + + if self.current_filename is not None: + # File + if self.handle_files and self.current_file: + self.current_file.seek(0) + uploaded = UploadedFile( + name=self.current_name, + filename=self.current_filename, + content_type=self.current_content_type, + size=self.current_part_size, + _file=self.current_file, + ) + self.form_data.append(self.current_name, uploaded) + # else: file was discarded + else: + # Text field + try: + value = bytes(self.current_body).decode("utf-8") + except UnicodeDecodeError: + value = bytes(self.current_body).decode("latin-1") + self.form_data.append(self.current_name, value) + + # Reset part state + self.current_file = None + self.current_body = bytearray() + self.current_name = None + self.current_filename = None + self.current_content_type = None + + def finalize(self) -> FormData: + """Finalize parsing and return form data.""" + # Process any remaining data + self._process() + if self.state != self.STATE_DONE: + raise MultipartParseError( + "Truncated multipart body (missing closing boundary)" + ) + return self.form_data + + def _ensure_disk_space(self) -> None: + """ + Ensure there is enough free space on the temp filesystem. + + This is a best-effort guard against filling the disk with uploads. + """ + if not self.handle_files: + return + if self.min_free_disk_bytes <= 0: + return + free_bytes = shutil.disk_usage(self._tempdir).free + if free_bytes < self.min_free_disk_bytes: + raise MultipartParseError("Insufficient disk space for uploads") + + +async def parse_form_data( + receive: Callable, + content_type: str, + files: bool = False, + max_file_size: int = DEFAULT_MAX_FILE_SIZE, + max_request_size: int = DEFAULT_MAX_REQUEST_SIZE, + max_fields: int = DEFAULT_MAX_FIELDS, + max_files: int = DEFAULT_MAX_FILES, + max_parts: Optional[int] = DEFAULT_MAX_PARTS, + max_field_size: int = DEFAULT_MAX_FIELD_SIZE, + max_memory_file_size: int = DEFAULT_MAX_MEMORY_FILE_SIZE, + max_part_header_bytes: int = DEFAULT_MAX_PART_HEADER_BYTES, + max_part_header_lines: int = DEFAULT_MAX_PART_HEADER_LINES, + min_free_disk_bytes: int = DEFAULT_MIN_FREE_DISK_BYTES, +) -> FormData: + """ + Parse form data from an ASGI receive callable. + + Supports both application/x-www-form-urlencoded and multipart/form-data. + + Args: + receive: ASGI receive callable + content_type: Content-Type header value + files: If True, store file uploads; if False, discard them + max_file_size: Maximum size per file in bytes + max_request_size: Maximum total request size in bytes + max_fields: Maximum number of form fields + max_files: Maximum number of file uploads + max_field_size: Maximum size of a text field value + max_memory_file_size: File size threshold before spilling to disk + + Returns: + FormData object containing parsed fields and files + """ + media_type, params = parse_content_type(content_type) + + if media_type == "application/x-www-form-urlencoded": + # Read entire body for URL-encoded forms (they're typically small) + body = bytearray() + total = 0 + while True: + message = await receive() + message_type = message.get("type") + if message_type == "http.disconnect": + raise MultipartParseError("Client disconnected during request body") + if message_type is not None and message_type != "http.request": + continue + chunk = message.get("body", b"") + total += len(chunk) + if total > max_request_size: + raise MultipartParseError("Request body too large") + body.extend(chunk) + if not message.get("more_body", False): + break + + form_data = FormData() + try: + pairs = parse_qsl(bytes(body).decode("utf-8"), keep_blank_values=True) + except UnicodeDecodeError: + pairs = parse_qsl(bytes(body).decode("latin-1"), keep_blank_values=True) + + for key, value in pairs: + form_data.append(key, value) + + return form_data + + elif media_type == "multipart/form-data": + boundary = params.get("boundary") + if not boundary: + raise MultipartParseError("Missing boundary in Content-Type") + + parser = MultipartParser( + boundary=boundary.encode("utf-8"), + max_file_size=max_file_size, + max_request_size=max_request_size, + max_fields=max_fields, + max_files=max_files, + max_parts=max_parts, + max_field_size=max_field_size, + max_memory_file_size=max_memory_file_size, + max_part_header_bytes=max_part_header_bytes, + max_part_header_lines=max_part_header_lines, + min_free_disk_bytes=min_free_disk_bytes, + handle_files=files, + ) + + # Stream body through parser + batch_target = 64 * 1024 + batch = bytearray() + + async def flush_batch() -> None: + if batch: + data = bytes(batch) + batch.clear() + await asyncio.to_thread(parser.feed, data) + + while True: + message = await receive() + message_type = message.get("type") + if message_type == "http.disconnect": + raise MultipartParseError("Client disconnected during request body") + if message_type is not None and message_type != "http.request": + continue + chunk = message.get("body", b"") + if chunk: + batch.extend(chunk) + if len(batch) >= batch_target: + await flush_batch() + if not message.get("more_body", False): + break + + await flush_batch() + return await asyncio.to_thread(parser.finalize) + + else: + raise MultipartParseError( + f"Unsupported Content-Type: {media_type}. " + "Expected application/x-www-form-urlencoded or multipart/form-data" + ) diff --git a/datasette/views/special.py b/datasette/views/special.py index 411363ec..57a3024d 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -177,11 +177,11 @@ class PermissionsDebugView(BaseView): async def post(self, request): await self.ds.ensure_permission(action="view-instance", actor=request.actor) await self.ds.ensure_permission(action="permissions-debug", actor=request.actor) - vars = await request.post_vars() - actor = json.loads(vars["actor"]) - permission = vars["permission"] - parent = vars.get("resource_1") or None - child = vars.get("resource_2") or None + form = await request.form() + actor = json.loads(form["actor"]) + permission = form["permission"] + parent = form.get("resource_1") or None + child = form.get("resource_2") or None response, status = await _check_permission_for_actor( self.ds, permission, parent, child, actor @@ -602,9 +602,9 @@ class MessagesDebugView(BaseView): async def post(self, request): await self.ds.ensure_permission(action="view-instance", actor=request.actor) - post = await request.post_vars() - message = post.get("message", "") - message_type = post.get("message_type") or "INFO" + form = await request.form() + message = form.get("message", "") + message_type = form.get("message_type") or "INFO" assert message_type in ("INFO", "WARNING", "ERROR", "all") datasette = self.ds if message_type == "all": @@ -688,11 +688,11 @@ class CreateTokenView(BaseView): async def post(self, request): self.check_permission(request) - post = await request.post_vars() + form = await request.form() errors = [] expires_after = None - if post.get("expire_type"): - duration_string = post.get("expire_duration") + if form.get("expire_type"): + duration_string = form.get("expire_duration") if ( not duration_string or not duration_string.isdigit() @@ -700,7 +700,7 @@ class CreateTokenView(BaseView): ): errors.append("Invalid expire duration") else: - unit = post["expire_type"] + unit = form["expire_type"] if unit == "minutes": expires_after = int(duration_string) * 60 elif unit == "hours": @@ -715,7 +715,7 @@ class CreateTokenView(BaseView): restrict_database = {} restrict_resource = {} - for key in post: + for key in form: if key.startswith("all:") and key.count(":") == 1: restrict_all.append(key.split(":")[1]) elif key.startswith("database:") and key.count(":") == 2: diff --git a/docs/internals.rst b/docs/internals.rst index cfd78593..0491c1f7 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -52,10 +52,59 @@ The request object is passed to various plugin hooks. It represents an incoming ``.actor`` - dictionary (str -> Any) or None The currently authenticated actor (see :ref:`actors `), or ``None`` if the request is unauthenticated. -The object also has two awaitable methods: +The object also has the following awaitable methods: + +``await request.form(files=False, ...)`` - FormData + Parses form data from the request body. Supports both ``application/x-www-form-urlencoded`` and ``multipart/form-data`` content types. + + Returns a :ref:`internals_formdata` object with dict-like access to form fields and uploaded files. + + Requirements and errors: + + - A ``Content-Type`` header is required. Missing or unsupported content types raise ``BadRequest``. + - For ``multipart/form-data``, the ``boundary=...`` parameter is required. + + Parameters: + + - ``files`` (bool, default ``False``): If ``True``, uploaded files are stored and accessible. If ``False`` (default), file content is discarded but form fields are still available. + - ``max_file_size`` (int, default 50MB): Maximum size per uploaded file in bytes. + - ``max_request_size`` (int, default 100MB): Maximum total request body size in bytes. + - ``max_fields`` (int, default 1000): Maximum number of form fields. + - ``max_files`` (int, default 100): Maximum number of uploaded files. + - ``max_parts`` (int, default ``max_fields + max_files``): Maximum number of multipart parts in total. + - ``max_field_size`` (int, default 100KB): Maximum size of a text field value in bytes. + - ``max_memory_file_size`` (int, default 1MB): File size threshold before uploads spill to disk. + - ``max_part_header_bytes`` (int, default 16KB): Maximum total bytes allowed in part headers. + - ``max_part_header_lines`` (int, default 100): Maximum header lines per part. + - ``min_free_disk_bytes`` (int, default 50MB): Minimum free bytes required in the temp directory before accepting file uploads. + + Example usage: + + .. code-block:: python + + # Parse form fields only (files are discarded) + form = await request.form() + username = form["username"] + tags = form.getlist("tags") # For multiple values + + # Parse form fields AND files + form = await request.form(files=True) + uploaded = form["avatar"] + content = await uploaded.read() + print( + uploaded.filename, uploaded.content_type, uploaded.size + ) + + Cleanup note: + + When using ``files=True``, call ``await form.aclose()`` once you are done with the uploads + to ensure spooled temporary files are closed promptly. You can also use + ``async with form: ...`` for automatic cleanup. + + Don't forget to read about :ref:`internals_csrf`! ``await request.post_vars()`` - dictionary - Returns a dictionary of form variables that were submitted in the request body via ``POST``. Don't forget to read about :ref:`internals_csrf`! + Returns a dictionary of form variables that were submitted in the request body via ``POST`` using ``application/x-www-form-urlencoded`` encoding. For multipart forms or file uploads, use ``request.form()`` instead. ``await request.post_body()`` - bytes Returns the un-parsed body of a request submitted by ``POST`` - useful for things like incoming JSON data. @@ -117,6 +166,84 @@ Consider the query string ``?foo=1&foo=2&bar=3`` - with two values for ``foo`` a ``len(request.args)`` - integer Returns the number of keys. +.. _internals_formdata: + +The FormData class +================== + +``await request.form()`` returns a ``FormData`` object - a dictionary-like object which provides access to form fields and uploaded files. It has a similar interface to ``MultiParams``. + +``form[key]`` - string or UploadedFile + Returns the first value for that key, or raises a ``KeyError`` if the key is missing. + +``form.get(key)`` - string, UploadedFile, or None + Returns the first value for that key, or ``None`` if the key is missing. Pass a second argument to specify a different default. + +``form.getlist(key)`` - list + Returns the list of values for that key. If the key is missing an empty list will be returned. + +``form.keys()`` - list of strings + Returns the list of available keys. + +``key in form`` - True or False + You can use ``if key in form`` to check if a key is present. + +``for key in form`` - iterator + This lets you loop through every available key. + +``len(form)`` - integer + Returns the total number of submitted values. + +.. _internals_uploadedfile: + +The UploadedFile class +====================== + +When parsing multipart form data with ``files=True``, file uploads are returned as ``UploadedFile`` objects with the following properties and methods: + +``uploaded_file.name`` - string + The form field name. + +``uploaded_file.filename`` - string + The original filename provided by the client. Note: This is sanitized to remove path components for security. + +``uploaded_file.content_type`` - string or None + The MIME type of the uploaded file, if provided by the client. + +``uploaded_file.size`` - integer + The size of the uploaded file in bytes. + +``await uploaded_file.read(size=-1)`` - bytes + Read and return up to ``size`` bytes from the file. If ``size`` is -1 (default), read the entire file. + +``await uploaded_file.seek(offset, whence=0)`` - integer + Seek to the given position in the file. Returns the new position. + +``await uploaded_file.close()`` + Close the underlying file. This is called automatically when the object is garbage collected. + +Files smaller than 1MB are stored in memory. Larger files are automatically spilled to temporary files on disk and cleaned up when the request completes. + +Example: + +.. code-block:: python + + form = await request.form(files=True) + uploaded = form["document"] + + # Check file metadata + print(f"Filename: {uploaded.filename}") + print(f"Content-Type: {uploaded.content_type}") + print(f"Size: {uploaded.size} bytes") + + # Read file content + content = await uploaded.read() + + # Or read in chunks + await uploaded.seek(0) + while chunk := await uploaded.read(8192): + process_chunk(chunk) + .. _internals_response: Response class diff --git a/pyproject.toml b/pyproject.toml index 6fca673d..d9ef2a73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ dev = [ "pytest-timeout>=1.4.2", "trustme>=0.7", "cogapp>=3.3.0", + "multipart-form-data-conformance==0.1a0", "ruff>=0.9", # docs "Sphinx==7.4.7", diff --git a/tests/test_multipart.py b/tests/test_multipart.py new file mode 100644 index 00000000..0dc3ecd7 --- /dev/null +++ b/tests/test_multipart.py @@ -0,0 +1,1152 @@ +""" +Tests for request.form() multipart form data parsing. + +Uses TDD approach - these tests are written first, then implementation follows. +""" + +import base64 +import json +import pytest +from collections import namedtuple + +from multipart_form_data_conformance import get_tests_dir + +from datasette.utils.asgi import Request, BadRequest + + +def make_receive(body: bytes): + """Create an async receive callable that yields body in chunks.""" + consumed = False + + async def receive(): + nonlocal consumed + if consumed: + return {"type": "http.request", "body": b"", "more_body": False} + consumed = True + return {"type": "http.request", "body": body, "more_body": False} + + return receive + + +def make_chunked_receive(body: bytes, chunk_size: int = 64): + """Create an async receive callable that yields body in small chunks.""" + offset = 0 + + async def receive(): + nonlocal offset + chunk = body[offset : offset + chunk_size] + offset += chunk_size + more_body = offset < len(body) + return {"type": "http.request", "body": chunk, "more_body": more_body} + + return receive + + +def make_receive_with_noise(body: bytes): + """ + Create an async receive callable that includes an unexpected ASGI message. + + The parser should ignore the unknown message type and continue. + """ + messages = [ + {"type": "http.response.start", "status": 200, "headers": []}, + {"type": "http.request", "body": body, "more_body": False}, + ] + index = 0 + + async def receive(): + nonlocal index + if index >= len(messages): + return {"type": "http.request", "body": b"", "more_body": False} + message = messages[index] + index += 1 + return message + + return receive + + +def make_disconnect_receive(body: bytes, chunk_size: int = 64): + """ + Create an async receive callable that disconnects mid-request. + + The parser should raise on the disconnect. + """ + offset = 0 + disconnected = False + + async def receive(): + nonlocal offset, disconnected + if disconnected: + return {"type": "http.disconnect"} + chunk = body[offset : offset + chunk_size] + offset += chunk_size + more_body = offset < len(body) + if more_body: + disconnected = True + return {"type": "http.request", "body": chunk, "more_body": more_body} + + return receive + + +class TestFormUrlEncoded: + """Test request.form() with application/x-www-form-urlencoded data.""" + + @pytest.mark.asyncio + async def test_basic_form_fields(self): + """Basic URL-encoded form should be parseable via request.form().""" + body = b"username=john&password=secret" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["username"] == "john" + assert form["password"] == "secret" + + @pytest.mark.asyncio + async def test_form_with_multiple_values(self): + """Multiple values for same key should be accessible via getlist().""" + body = b"tag=python&tag=web&tag=api" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["tag"] == "python" # First value + assert form.getlist("tag") == ["python", "web", "api"] + + @pytest.mark.asyncio + async def test_empty_form(self): + """Empty form should return empty FormData.""" + body = b"" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert len(form) == 0 + + @pytest.mark.asyncio + async def test_form_with_special_characters(self): + """URL-encoded special characters should be decoded properly.""" + body = b"message=hello%20world&emoji=%F0%9F%91%8B" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["message"] == "hello world" + assert form["emoji"] == "👋" + + +class TestMultipartBasic: + """Test request.form() with multipart/form-data (fields only, no files).""" + + @pytest.mark.asyncio + async def test_single_text_field(self): + """Single text field in multipart should be parseable.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="username"\r\n' + b"\r\n" + b"john_doe\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["username"] == "john_doe" + + @pytest.mark.asyncio + async def test_multiple_text_fields(self): + """Multiple text fields in multipart should all be accessible.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="first_name"\r\n' + b"\r\n" + b"John\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="last_name"\r\n' + b"\r\n" + b"Doe\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + + assert form["first_name"] == "John" + assert form["last_name"] == "Doe" + + @pytest.mark.asyncio + async def test_file_discarded_when_files_false(self): + """File content should be discarded when files=False (default).""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="title"\r\n' + b"\r\n" + b"My Document\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="doc.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"File content here\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="description"\r\n' + b"\r\n" + b"A sample document\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() # files=False is default + + # Text fields should be present + assert form["title"] == "My Document" + assert form["description"] == "A sample document" + # File should NOT be present + assert "file" not in form + + @pytest.mark.asyncio + async def test_chunked_body_parsing(self): + """Multipart should work when body arrives in small chunks.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="username"\r\n' + b"\r\n" + b"john_doe\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + # Use small chunks to test streaming parser + request = Request(scope, make_chunked_receive(body, chunk_size=16)) + + form = await request.form() + + assert form["username"] == "john_doe" + + +class TestMultipartWithFiles: + """Test request.form(files=True) for file uploads.""" + + @pytest.mark.asyncio + async def test_single_file_upload(self): + """Single file upload should create UploadedFile object.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="document"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello, World!\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + uploaded_file = form["document"] + assert uploaded_file.filename == "test.txt" + assert uploaded_file.content_type == "text/plain" + assert await uploaded_file.read() == b"Hello, World!" + assert uploaded_file.size == 13 + + @pytest.mark.asyncio + async def test_mixed_fields_and_files(self): + """Mixed form fields and files should all be accessible.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="title"\r\n' + b"\r\n" + b"My Document\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="doc.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Document content\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="description"\r\n' + b"\r\n" + b"A sample\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + # Text fields + assert form["title"] == "My Document" + assert form["description"] == "A sample" + # File + uploaded_file = form["file"] + assert uploaded_file.filename == "doc.txt" + assert await uploaded_file.read() == b"Document content" + + @pytest.mark.asyncio + async def test_multiple_files_same_name(self): + """Multiple files with same name should be accessible via getlist().""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="files"; filename="a.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"File A\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="files"; filename="b.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"File B\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + files = form.getlist("files") + assert len(files) == 2 + assert files[0].filename == "a.txt" + assert files[1].filename == "b.txt" + + @pytest.mark.asyncio + async def test_large_file_spills_to_disk(self): + """Files larger than threshold should spill to temp file.""" + boundary = "----TestBoundary123" + # Create a body larger than the in-memory threshold (1MB) + large_content = b"x" * (2 * 1024 * 1024) # 2MB + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="bigfile"; filename="large.bin"\r\n' + b"Content-Type: application/octet-stream\r\n" + b"\r\n" + large_content + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + uploaded_file = form["bigfile"] + assert uploaded_file.size == len(large_content) + # Content should still be readable + content = await uploaded_file.read() + assert content == large_content + + @pytest.mark.asyncio + async def test_uploaded_file_seek_and_read(self): + """UploadedFile should support seek and multiple reads.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello, World!\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + uploaded_file = form["file"] + + # First read + content1 = await uploaded_file.read() + assert content1 == b"Hello, World!" + + # Seek back to start + await uploaded_file.seek(0) + + # Second read + content2 = await uploaded_file.read() + assert content2 == b"Hello, World!" + + +class TestMultipartCleanup: + """Test deterministic cleanup of uploaded files.""" + + @pytest.mark.asyncio + async def test_formdata_close_closes_uploaded_files(self): + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + form = await request.form(files=True) + uploaded_file = form["file"] + + form.close() + + with pytest.raises(ValueError): + await uploaded_file.read() + + @pytest.mark.asyncio + async def test_formdata_async_context_manager_closes_files(self): + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + form = await request.form(files=True) + uploaded_file = form["file"] + + async with form: + pass + + with pytest.raises(ValueError): + await uploaded_file.read() + + +class TestMultipartEdgeCases: + """Test edge cases in multipart parsing.""" + + @pytest.mark.asyncio + async def test_empty_file_upload(self): + """Empty file (filename but no content) should be handled.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="empty.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + uploaded_file = form["file"] + assert uploaded_file.filename == "empty.txt" + assert uploaded_file.size == 0 + assert await uploaded_file.read() == b"" + + @pytest.mark.asyncio + async def test_filename_with_path(self): + """Filename containing path should extract just the filename.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="C:\\Users\\test\\doc.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"content\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form(files=True) + + # Should extract just the filename, not the full path + uploaded_file = form["file"] + assert uploaded_file.filename == "doc.txt" + + @pytest.mark.asyncio + async def test_missing_content_type_header(self): + """Missing content-type in request should raise BadRequest.""" + body = b"some body" + scope = { + "type": "http", + "method": "POST", + "headers": [], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest): + await request.form() + + @pytest.mark.asyncio + async def test_invalid_content_type(self): + """Non-form content-type should raise BadRequest.""" + body = b'{"key": "value"}' + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/json"), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest): + await request.form() + + @pytest.mark.asyncio + async def test_missing_boundary(self): + """Multipart without boundary should raise BadRequest.""" + body = b"some body" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"multipart/form-data"), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest): + await request.form() + + +class TestSecurityLimits: + """Test security limits on form parsing.""" + + @pytest.mark.asyncio + async def test_max_fields_limit(self): + """Should reject requests with too many fields.""" + boundary = "----TestBoundary123" + # Create body with many fields + parts = [] + for i in range(1001): # Default max is 1000 + parts.append( + f"------TestBoundary123\r\n" + f'Content-Disposition: form-data; name="field{i}"\r\n' + f"\r\n" + f"value{i}\r\n" + ) + parts.append("------TestBoundary123--\r\n") + body = "".join(parts).encode() + + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="(?i)too many"): + await request.form(max_fields=1000) + + @pytest.mark.asyncio + async def test_max_file_size_limit(self): + """Should reject files exceeding size limit.""" + boundary = "----TestBoundary123" + large_content = b"x" * (11 * 1024 * 1024) # 11MB + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="big.bin"\r\n' + b"Content-Type: application/octet-stream\r\n" + b"\r\n" + large_content + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="(?i)file.*too large|too large"): + await request.form(files=True, max_file_size=10 * 1024 * 1024) + + @pytest.mark.asyncio + async def test_max_request_size_limit(self): + """Should reject requests exceeding total size limit.""" + boundary = "----TestBoundary123" + large_content = b"x" * (6 * 1024 * 1024) # 6MB + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="big.bin"\r\n' + b"Content-Type: application/octet-stream\r\n" + b"\r\n" + large_content + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="(?i)too large|request.*too large"): + await request.form(files=True, max_request_size=5 * 1024 * 1024) + + +class TestMultipartStrictnessAndLimits: + """Tests that enforce stricter ASGI and multipart behaviors.""" + + @pytest.mark.asyncio + async def test_multipart_truncated_body_is_error(self): + """Truncated multipart without closing boundary should raise.""" + boundary = "----TestBoundary123" + # Missing the final closing boundary line + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"\r\n' + b"\r\n" + b"value\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="Truncated multipart body"): + await request.form() + + @pytest.mark.asyncio + async def test_disconnect_mid_body_is_error(self): + """Client disconnect during body streaming should raise.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"\r\n' + b"\r\n" + b"value\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_disconnect_receive(body, chunk_size=16)) + + with pytest.raises(BadRequest, match="disconnected"): + await request.form() + + @pytest.mark.asyncio + async def test_unknown_asgi_message_type_is_ignored(self): + """Unexpected ASGI message types should be ignored.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"\r\n' + b"\r\n" + b"value\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive_with_noise(body)) + + form = await request.form() + assert form["field"] == "value" + + @pytest.mark.asyncio + async def test_max_files_enforced_even_when_files_false(self): + """File count limits should apply even when file handling is disabled.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="f1"; filename="a.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"a\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="f2"; filename="b.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"b\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="Too many files"): + await request.form(files=False, max_files=1) + + @pytest.mark.asyncio + async def test_max_parts_limit(self): + """Total part count should be bounded.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="a"\r\n' + b"\r\n" + b"1\r\n" + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="b"\r\n' + b"\r\n" + b"2\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="Too many parts"): + await request.form(max_parts=1) + + @pytest.mark.asyncio + async def test_max_file_size_enforced_even_when_files_false(self): + """File size limits should apply even when file handling is disabled.""" + boundary = "----TestBoundary123" + big_content = b"x" * 2048 + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="big.bin"\r\n' + b"Content-Type: application/octet-stream\r\n" + b"\r\n" + big_content + b"\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="File too large"): + await request.form(files=False, max_file_size=1024) + + @pytest.mark.asyncio + async def test_part_header_limits(self): + """Overly large part headers should be rejected.""" + boundary = "----TestBoundary123" + huge_header_value = "x" * 5000 + body = ( + b"------TestBoundary123\r\n" + + f'Content-Disposition: form-data; name="field"; foo="{huge_header_value}"\r\n'.encode() + + b"\r\n" + + b"value\r\n" + + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="headers too large"): + await request.form(max_part_header_bytes=1024) + + @pytest.mark.asyncio + async def test_insufficient_disk_space_rejects_upload(self, monkeypatch): + """Uploads should be rejected when free disk is below the floor.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n" + b"\r\n" + b"Hello\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + + DiskUsage = namedtuple("DiskUsage", ("total", "used", "free")) + monkeypatch.setattr( + "datasette.utils.multipart.shutil.disk_usage", + lambda path: DiskUsage(total=100, used=95, free=5), + ) + + request = Request(scope, make_receive(body)) + with pytest.raises(BadRequest, match="Insufficient disk space"): + await request.form(files=True, min_free_disk_bytes=50) + + @pytest.mark.asyncio + async def test_low_disk_space_does_not_block_field_only_forms(self, monkeypatch): + """Low disk space should not reject multipart forms with no file parts.""" + boundary = "----TestBoundary123" + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"\r\n' + b"\r\n" + b"value\r\n" + b"------TestBoundary123--\r\n" + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + + DiskUsage = namedtuple("DiskUsage", ("total", "used", "free")) + monkeypatch.setattr( + "datasette.utils.multipart.shutil.disk_usage", + lambda path: DiskUsage(total=100, used=99, free=1), + ) + + request = Request(scope, make_receive(body)) + form = await request.form(files=True, min_free_disk_bytes=50) + assert form["field"] == "value" + + @pytest.mark.asyncio + async def test_headers_without_newline_hit_header_byte_limit(self): + """Headers that never terminate should still hit the header byte limit.""" + boundary = "----TestBoundary123" + huge = b"x" * 5000 + # No CRLF is included after the header line + body = ( + b"------TestBoundary123\r\n" + b'Content-Disposition: form-data; name="field"; foo="' + huge + b'"' + ) + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", f"multipart/form-data; boundary={boundary}".encode()), + ], + } + request = Request(scope, make_receive(body)) + + with pytest.raises(BadRequest, match="headers too large"): + await request.form(max_part_header_bytes=1024) + + +class TestFormDataLenSemantics: + """Test that FormData.__len__ reflects number of items, not unique keys.""" + + @pytest.mark.asyncio + async def test_len_counts_items(self): + body = b"tag=python&tag=web&tag=api" + scope = { + "type": "http", + "method": "POST", + "headers": [ + (b"content-type", b"application/x-www-form-urlencoded"), + ], + } + request = Request(scope, make_receive(body)) + + form = await request.form() + assert len(form) == 3 + + +# Conformance test suite using multipart-form-data-conformance + +# Tests where our parser intentionally differs from strict spec for security/practicality +# Our parser sanitizes filenames (strips paths) while the conformance suite expects raw +FILENAME_SANITIZATION_TESTS = { + "026-filename-with-backslash", # We preserve backslashes but they test expects raw + "029-filename-path-traversal", # We strip path components for security +} + +# Tests for optional/lenient features we don't implement +OPTIONAL_TESTS = { + "085-header-folding", # Obsolete header folding feature +} + +# Tests for malformed input where we're lenient instead of erroring +LENIENT_PARSING_TESTS = { + "203-missing-content-disposition", + "204-invalid-content-disposition", +} + + +def load_conformance_test_cases(): + """Load all test cases from multipart-form-data-conformance.""" + tests_dir = get_tests_dir() + test_cases = [] + + for category_dir in sorted(tests_dir.iterdir()): + if not category_dir.is_dir(): + continue + for test_dir in sorted(category_dir.iterdir()): + if not test_dir.is_dir(): + continue + test_json = test_dir / "test.json" + headers_json = test_dir / "headers.json" + input_raw = test_dir / "input.raw" + + if not all(f.exists() for f in [test_json, headers_json, input_raw]): + continue + + with open(test_json) as f: + test_spec = json.load(f) + with open(headers_json) as f: + headers = json.load(f) + with open(input_raw, "rb") as f: + body = f.read() + + test_id = test_spec["id"] + + # Add marks for tests we handle differently + marks = [] + if test_id in FILENAME_SANITIZATION_TESTS: + marks.append( + pytest.mark.xfail(reason="Parser sanitizes filenames for security") + ) + elif test_id in OPTIONAL_TESTS: + marks.append( + pytest.mark.xfail(reason="Optional feature not implemented") + ) + elif test_id in LENIENT_PARSING_TESTS: + marks.append( + pytest.mark.xfail(reason="Parser is lenient with malformed input") + ) + + test_cases.append( + pytest.param( + test_spec, + headers, + body, + id=test_id, + marks=marks, + ) + ) + + return test_cases + + +CONFORMANCE_TEST_CASES = load_conformance_test_cases() + + +@pytest.mark.parametrize("test_spec,headers,body", CONFORMANCE_TEST_CASES) +@pytest.mark.asyncio +async def test_conformance(test_spec, headers, body): + """ + Run conformance test cases from multipart-form-data-conformance. + + Each test case specifies: + - headers: HTTP headers including Content-Type with boundary + - body: Raw multipart body bytes + - expected: Expected parse result (valid/invalid, parts list) + """ + scope = { + "type": "http", + "method": "POST", + "headers": [(k.encode(), v.encode()) for k, v in headers.items()], + } + request = Request(scope, make_receive(body)) + + expected = test_spec["expected"] + + if not expected["valid"]: + # Should raise an error for invalid input + with pytest.raises((BadRequest, ValueError)): + await request.form(files=True) + return + + # Parse form data + form = await request.form(files=True) + + # Verify each expected part + for i, expected_part in enumerate(expected["parts"]): + name = expected_part["name"] + + # Get value(s) for this name + values = form.getlist(name) + + # Find the value at the correct index for this name + # (handles multiple values with same name) + same_name_count = sum(1 for p in expected["parts"][:i] if p["name"] == name) + + if same_name_count >= len(values): + pytest.fail( + f"Expected part {name} at index {same_name_count} but only {len(values)} found" + ) + + value = values[same_name_count] + + # Determine expected content + if "body_base64" in expected_part: + expected_content = base64.b64decode(expected_part["body_base64"]) + elif "body_text" in expected_part: + expected_content = expected_part["body_text"].encode("utf-8") + else: + expected_content = None + + # Check for file vs field + # A part is a file if it has a filename OR filename_star + is_file = ( + expected_part.get("filename") is not None + or expected_part.get("filename_star") is not None + ) + + if is_file: + # It's a file + assert hasattr(value, "filename"), f"Expected file for {name}" + + # Check filename - use filename_star if present, else filename + expected_filename = expected_part.get("filename_star") or expected_part.get( + "filename" + ) + if expected_filename: + assert ( + value.filename == expected_filename + ), f"Filename mismatch: expected {expected_filename!r}, got {value.filename!r}" + + if expected_part.get("content_type"): + assert value.content_type == expected_part["content_type"] + + content = await value.read() + assert ( + len(content) == expected_part["body_size"] + ), f"Size mismatch: expected {expected_part['body_size']}, got {len(content)}" + if expected_content is not None: + assert content == expected_content + else: + # It's a text field + if hasattr(value, "filename"): + pytest.fail(f"Expected text field for {name}, got file") + + if expected_content is not None: + # For text fields, value is a string + try: + expected_text = expected_content.decode("utf-8") + except UnicodeDecodeError: + expected_text = expected_content.decode("latin-1") + assert ( + value == expected_text + ), f"Value mismatch: expected {expected_text!r}, got {value!r}" From b771e930bc16e128b48da80c9ccbba20cba177b5 Mon Sep 17 00:00:00 2001 From: Daniel Olasubomi Sobowale Date: Wed, 28 Jan 2026 20:41:58 -0600 Subject: [PATCH 080/299] Fix filter-input and search-input zoom on iOS Safari Closes #2346 --- .gitignore | 2 ++ datasette/static/app.css | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ce256606..12acd87e 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,5 @@ node_modules tests/*.dylib tests/*.so tests/*.dll + +.idea \ No newline at end of file diff --git a/datasette/static/app.css b/datasette/static/app.css index a3117152..a7fc7fa3 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -647,10 +647,14 @@ button.core[type=button] { border-radius: 3px; -webkit-appearance: none; padding: 9px 4px; - font-size: 1em; + font-size: 16px; font-family: Helvetica, sans-serif; } +#_search { + font-size: 16px; +} + From 5873578d49a894e358f8480fee27e17e37f6c97e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 29 Jan 2026 09:00:22 -0800 Subject: [PATCH 081/299] Release 1.0a24 Refs #2050, #2346, #2608, #2609, #2610, #2611, #2613, #2619, #2624, #2627, #2628, #2629, #2630, #2632 --- datasette/version.py | 2 +- docs/changelog.rst | 55 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index fff37a72..de7585ca 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a23" +__version__ = "1.0a24" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index feba7e86..67ceeece 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,61 @@ Changelog ========= +.. _v1_0_a24: + +1.0a24 (2026-01-29) +------------------- + +``request.form()`` method for POST data and file uploads +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Datasette now includes a ``request.form()`` method for parsing form submissions, including handling file uploads. (`#2626 `__) + +This supports both ``application/x-www-form-urlencoded`` and ``multipart/form-data`` content types, and uses a new streaming multipart parser that processes uploads without buffering entire request bodies in memory. + +.. code-block:: python + + # Parse form fields (files are discarded by default) + form = await request.form() + username = form["username"] + + # Parse form fields AND file uploads + form = await request.form(files=True) + uploaded = form["avatar"] + content = await uploaded.read() + +The returned :ref:`FormData ` object provides dictionary-style access with support for multiple values per key via ``form.getlist("key")``. Uploaded files are represented as :ref:`UploadedFile ` objects with ``filename``, ``content_type``, ``size`` properties and async ``read()`` and ``seek()`` methods. + +Files smaller than 1MB are held in memory; larger files automatically spill to temporary files on disk. Configurable limits control maximum file size, request size, field counts and more. + +Several internal views (permissions debug, messages debug, create token) now use ``request.form()`` instead of ``request.post_vars()``. + +``request.post_vars()`` remains available for backwards compatibility but is no longer the recommended API for handling POST data. + +``render_cell`` and ``foreign_key_tables`` extras for the JSON API +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The table JSON API now supports ``?_extra=render_cell``, which returns the rendered HTML for each cell as produced by the :ref:`render_cell plugin hook `. Only columns whose rendered output differs from the default are included. (:issue:`2619`) + +The row JSON API also gains ``?_extra=render_cell`` and ``?_extra=foreign_key_tables`` extras, bringing it closer to parity with the table API. + +The row JSON API now returns ``"ok": true`` in its response, for consistency with the table API. + +``uv run pytest`` with a ``dev=`` dependency group +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The recommended development environment for Datasette now uses `uv `__. You can now set up a development environment and run the test suite with just ``uv run pytest`` — no manual virtualenv or ``pip install`` step required. (:issue:`2611`) + +Other changes +~~~~~~~~~~~~~ + +- Plugins that raise ``datasette.utils.StartupError()`` during startup now display a clean error message instead of a full traceback. (:issue:`2624`) +- Schema refreshes are now throttled to at most once per second, providing a small performance increase. (:issue:`2629`) +- Minor performance improvement to ``remove_infinites`` — rows without infinity values now skip the list/dict reconstruction step. (:issue:`2629`) +- Filter inputs and the search input no longer trigger unwanted zoom on iOS Safari. Thanks, `Daniel Olasubomi Sobowale `__. (:issue:`2346`) +- ``table_names()`` and ``get_all_foreign_keys()`` now return results in deterministic sorted order. (:issue:`2628`) +- Switched linting to `ruff `__ and fixed all lint errors. (:issue:`2630`) + .. _v1_0_a23: 1.0a23 (2025-12-02) From 80b7f987cad59113896f28a29828ffe856218216 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Feb 2026 13:20:33 -0800 Subject: [PATCH 082/299] write_wrapper plugin hook for intercepting write operations (#2636) * Implement write_wrapper plugin hook for intercepting database writes Add a new `write_wrapper` plugin hook that lets plugins wrap write operations with before/after logic using a generator-based context manager pattern. The hook receives (datasette, database, request, transaction) and returns a generator function that takes a conn, yields once to let the write execute, and can run cleanup after. The write result is sent back via `generator.send()` and exceptions are thrown via `generator.throw()`, giving plugins full visibility. Also adds `request=None` parameter to execute_write, execute_write_fn, execute_write_script, and execute_write_many, and threads request through all view-layer call sites (insert, upsert, update, delete, drop, create table, canned queries). * Add documentation for wrap_write hook, fix lint issues Document the wrap_write plugin hook in plugin_hooks.rst with parameter descriptions and two examples: a simple logging wrapper and an advanced SQLite authorizer-based table protection pattern. Also fix black formatting and remove unused variable flagged by ruff. * Rename wrap_write hook to write_wrapper for consistency with asgi_wrapper * Move write_wrapper docs to just below prepare_connection * Refactor write_wrapper tests to use pytest.parametrize Consolidate duplicate test cases: merge before/after tests for execute_write_fn and execute_write into one parametrized test, and merge three parameter-passing tests into one parametrized test. Claude Code transcript: https://gisthost.github.io/?c4c12079434e69677e4aa8ac664b21b8/index.html --- datasette/database.py | 77 ++++++- datasette/hookspecs.py | 22 ++ datasette/views/database.py | 6 +- datasette/views/row.py | 4 +- datasette/views/table.py | 4 +- docs/plugin_hooks.rst | 87 ++++++++ tests/test_plugins.py | 30 +++ tests/test_write_wrapper.py | 387 ++++++++++++++++++++++++++++++++++++ 8 files changed, 604 insertions(+), 13 deletions(-) create mode 100644 tests/test_write_wrapper.py diff --git a/datasette/database.py b/datasette/database.py index 8e4ee2b6..1e6f9032 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -130,25 +130,25 @@ class Database: for connection in self._all_file_connections: connection.close() - async def execute_write(self, sql, params=None, block=True): + async def execute_write(self, sql, params=None, block=True, request=None): def _inner(conn): return conn.execute(sql, params or []) with trace("sql", database=self.name, sql=sql.strip(), params=params): - results = await self.execute_write_fn(_inner, block=block) + results = await self.execute_write_fn(_inner, block=block, request=request) return results - async def execute_write_script(self, sql, block=True): + async def execute_write_script(self, sql, block=True, request=None): def _inner(conn): return conn.executescript(sql) with trace("sql", database=self.name, sql=sql.strip(), executescript=True): results = await self.execute_write_fn( - _inner, block=block, transaction=False + _inner, block=block, transaction=False, request=request ) return results - async def execute_write_many(self, sql, params_seq, block=True): + async def execute_write_many(self, sql, params_seq, block=True, request=None): def _inner(conn): count = 0 @@ -163,7 +163,9 @@ class Database: with trace( "sql", database=self.name, sql=sql.strip(), executemany=True ) as kwargs: - results, count = await self.execute_write_fn(_inner, block=block) + results, count = await self.execute_write_fn( + _inner, block=block, request=request + ) kwargs["count"] = count return results @@ -187,7 +189,8 @@ class Database: # Threaded mode - send to write thread return await self._send_to_write_thread(fn, isolated_connection=True) - async def execute_write_fn(self, fn, block=True, transaction=True): + async def execute_write_fn(self, fn, block=True, transaction=True, request=None): + fn = self._wrap_fn_with_hooks(fn, request, transaction) if self.ds.executor is None: # non-threaded mode if self._write_connection is None: @@ -203,6 +206,25 @@ class Database: fn, block=block, transaction=transaction ) + def _wrap_fn_with_hooks(self, fn, request, transaction): + from .plugins import pm + + wrappers = pm.hook.write_wrapper( + datasette=self.ds, + database=self.name, + request=request, + transaction=transaction, + ) + wrappers = [w for w in wrappers if w is not None] + if not wrappers: + return fn + # Build the wrapped fn by nesting context manager generators. + # The first wrapper returned by pluggy is outermost. + original_fn = fn + for wrapper_factory in reversed(wrappers): + original_fn = _apply_write_wrapper(original_fn, wrapper_factory) + return original_fn + async def _send_to_write_thread( self, fn, block=True, isolated_connection=False, transaction=True ): @@ -680,6 +702,47 @@ class Database: return f"" +def _apply_write_wrapper(fn, wrapper_factory): + """Apply a single write_wrapper context manager around fn. + + ``wrapper_factory`` is a callable that takes ``(conn)`` and returns a + generator that yields exactly once. Code before the yield runs before + ``fn(conn)``, code after the yield runs after. The result of + ``fn(conn)`` is sent into the generator via ``.send()``, and any + exception raised by ``fn(conn)`` is thrown via ``.throw()``. + """ + + def wrapped(conn): + gen = wrapper_factory(conn) + # Advance to the yield point (run "before" code) + try: + next(gen) + except StopIteration: + # Generator didn't yield — just run fn unchanged + return fn(conn) + + # Execute the actual write + try: + result = fn(conn) + except Exception: + # Throw exception into generator so it can handle it + try: + gen.throw(*sys.exc_info()) + except StopIteration: + pass + # Re-raise the original exception + raise + else: + # Send the result back through the yield + try: + gen.send(result) + except StopIteration: + pass + return result + + return wrapped + + class WriteTask: __slots__ = ("fn", "task_id", "reply_queue", "isolated_connection", "transaction") diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 3f6a1425..b993fb61 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -220,3 +220,25 @@ def top_query(datasette, request, database, sql): @hookspec def top_canned_query(datasette, request, database, query_name): """HTML to include at the top of the canned query page""" + + +@hookspec +def write_wrapper(datasette, database, request, transaction): + """Called when a write function is about to execute. + + Return a generator function that accepts a ``conn`` argument. + The generator should ``yield`` exactly once: code before the + ``yield`` runs before the write, code after the ``yield`` runs + after the write completes. The result of the write is sent + back through the ``yield``, so you can capture it with + ``result = yield``. + + If the write raises an exception, it is thrown into the generator + so you can handle it with a try/except around the ``yield``. + + ``request`` may be ``None`` for writes not originating from an + HTTP request. ``transaction`` is ``True`` if the write will + be wrapped in a transaction. + + Return ``None`` to skip wrapping. + """ diff --git a/datasette/views/database.py b/datasette/views/database.py index 51c752a0..e5f2cf16 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -466,7 +466,9 @@ class QueryView(View): ok = None redirect_url = None try: - cursor = await db.execute_write(canned_query["sql"], params_for_query) + cursor = await db.execute_write( + canned_query["sql"], params_for_query, request=request + ) # success message can come from on_success_message or on_success_message_sql message = None message_type = datasette.INFO @@ -1119,7 +1121,7 @@ class TableCreateView(BaseView): return table.schema try: - schema = await db.execute_write_fn(create_table) + schema = await db.execute_write_fn(create_table, request=request) except Exception as e: return _error([str(e)]) diff --git a/datasette/views/row.py b/datasette/views/row.py index 718ee00c..ff0a3594 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -245,7 +245,7 @@ class RowDeleteView(BaseView): sqlite_utils.Database(conn)[resolved.table].delete(resolved.pk_values) try: - await resolved.db.execute_write_fn(delete_row) + await resolved.db.execute_write_fn(delete_row, request=request) except Exception as e: return _error([str(e)], 500) @@ -305,7 +305,7 @@ class RowUpdateView(BaseView): ) try: - await resolved.db.execute_write_fn(update_row) + await resolved.db.execute_write_fn(update_row, request=request) except Exception as e: return _error([str(e)], 400) diff --git a/datasette/views/table.py b/datasette/views/table.py index b07b62ae..d4dbc194 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -550,7 +550,7 @@ class TableInsertView(BaseView): method_all(rows, **kwargs) try: - rows = await db.execute_write_fn(insert_or_upsert_rows) + rows = await db.execute_write_fn(insert_or_upsert_rows, request=request) except Exception as e: return _error([str(e)]) result = {"ok": True} @@ -670,7 +670,7 @@ class TableDropView(BaseView): def drop_table(conn): sqlite_utils.Database(conn)[table_name].drop() - await db.execute_write_fn(drop_table) + await db.execute_write_fn(drop_table, request=request) await self.ds.track_event( DropTableEvent( actor=request.actor, database=database_name, table=table_name diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index ad4a70f8..468b0ade 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -61,6 +61,92 @@ arguments and can be called like this:: Examples: `datasette-jellyfish `__, `datasette-jq `__, `datasette-haversine `__, `datasette-rure `__ +.. _plugin_hook_write_wrapper: + +write_wrapper(datasette, database, request, transaction) +-------------------------------------------------------- + +``datasette`` - :ref:`internals_datasette` + You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``. + +``database`` - string + The name of the database being written to. + +``request`` - :ref:`internals_request` or ``None`` + The HTTP request that triggered this write, if available. This will be ``None`` for writes that do not originate from an HTTP request (e.g. writes triggered by plugins during startup). + +``transaction`` - bool + ``True`` if the write will be wrapped in a database transaction. + +Return a generator function that accepts a ``conn`` argument (a SQLite connection object). The generator should ``yield`` exactly once. Code before the ``yield`` runs before the write function executes; code after the ``yield`` runs after it completes. + +The result of the write function is sent back through the ``yield``, so you can capture it with ``result = yield``. + +If the write function raises an exception, it is thrown into the generator so you can handle it with a ``try`` / ``except`` around the ``yield``. + +Return ``None`` to skip wrapping for this particular write. + +This example logs every write operation: + +.. code-block:: python + + from datasette import hookimpl + + + @hookimpl + def write_wrapper(datasette, database, request): + def wrapper(conn): + print(f"Before write to {database}") + result = yield + print(f"After write to {database}") + + return wrapper + +This more advanced example uses the SQLite authorizer callback to block writes to a specific table for non-admin users: + +.. code-block:: python + + import sqlite3 + from datasette import hookimpl + + WRITE_ACTIONS = ( + sqlite3.SQLITE_INSERT, + sqlite3.SQLITE_UPDATE, + sqlite3.SQLITE_DELETE, + ) + + + @hookimpl + def write_wrapper(datasette, database, request): + actor = None + if request: + actor = request.actor + if actor and actor.get("id") == "admin": + return None + + def wrapper(conn): + def authorizer( + action, arg1, arg2, db_name, trigger + ): + if ( + action in WRITE_ACTIONS + and arg1 == "protected_table" + ): + return sqlite3.SQLITE_DENY + return sqlite3.SQLITE_OK + + conn.set_authorizer(authorizer) + try: + yield + finally: + conn.set_authorizer(None) + + return wrapper + +The ``conn`` object passed to the generator is the same connection that the write function will use. Because the generator and the write function execute together in a single call on the write thread, any state you set on the connection (authorizers, pragmas, temporary tables) is visible to the write and can be cleaned up afterwards. + +When multiple plugins implement ``write_wrapper``, they are nested following pluggy's default calling convention. + .. _plugin_hook_prepare_jinja2_environment: prepare_jinja2_environment(env, datasette) @@ -2249,3 +2335,4 @@ The plugin can then call ``datasette.track_event(...)`` to send a ``ban-user`` e await datasette.track_event( BanUserEvent(user={"id": 1, "username": "cleverbot"}) ) + diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 6c23b3ef..7c2180e8 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1524,6 +1524,36 @@ async def test_hook_register_events(): assert any(k.__name__ == "OneEvent" for k in datasette.event_classes) +@pytest.mark.asyncio +async def test_hook_write_wrapper(): + datasette = Datasette(memory=True) + log = [] + + class WrapWritePlugin: + __name__ = "WrapWritePlugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + if database != "_memory": + return None + + def wrapper(conn): + log.append("before") + yield + log.append("after") + + return wrapper + + pm.register(WrapWritePlugin(), name="WrapWritePluginTest") + try: + db = datasette.get_database("_memory") + await db.execute_write("create table t (id integer primary key)") + assert log == ["before", "after"] + finally: + pm.unregister(name="WrapWritePluginTest") + + @pytest.mark.asyncio async def test_hook_register_actions_view_collection(): datasette = Datasette(memory=True, plugins_dir=PLUGINS_DIR) diff --git a/tests/test_write_wrapper.py b/tests/test_write_wrapper.py new file mode 100644 index 00000000..e05a2a9f --- /dev/null +++ b/tests/test_write_wrapper.py @@ -0,0 +1,387 @@ +""" +Tests for the write_wrapper plugin hook. +""" + +from datasette.app import Datasette +from datasette.hookspecs import hookimpl +from datasette.plugins import pm +import pytest +import time + + +@pytest.fixture +def datasette(tmp_path): + db_path = str(tmp_path / "test.db") + ds = Datasette([db_path]) + return ds + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "use_execute_write", + (False, True), + ids=["execute_write_fn", "execute_write"], +) +async def test_write_wrapper_before_and_after(datasette, use_execute_write): + """Test that code before and after yield both execute.""" + log = [] + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + log.append("before") + yield + log.append("after") + + return wrapper + + pm.register(Plugin(), name="test_before_after") + try: + db = datasette.get_database("test") + if use_execute_write: + await db.execute_write( + "create table if not exists t (id integer primary key)" + ) + else: + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t (id integer primary key)" + ) + ) + assert log == ["before", "after"] + finally: + pm.unregister(name="test_before_after") + + +@pytest.mark.asyncio +async def test_write_wrapper_receives_result_via_yield(datasette): + """Test that the result of fn(conn) is sent back through yield.""" + captured = {} + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + result = yield + captured["result"] = result + + return wrapper + + pm.register(Plugin(), name="test_result") + try: + db = datasette.get_database("test") + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t2 (id integer primary key)" + ) + ) + assert "result" in captured + # Should be a sqlite3 Cursor + assert captured["result"] is not None + finally: + pm.unregister(name="test_result") + + +@pytest.mark.asyncio +async def test_write_wrapper_exception_thrown_into_generator(datasette): + """Test that exceptions from fn(conn) are thrown into the generator.""" + caught = {} + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + try: + yield + except Exception as e: + caught["error"] = e + + return wrapper + + pm.register(Plugin(), name="test_exception") + try: + db = datasette.get_database("test") + with pytest.raises(Exception, match="deliberate"): + await db.execute_write_fn( + lambda conn: (_ for _ in ()).throw(Exception("deliberate")) + ) + assert "error" in caught + assert str(caught["error"]) == "deliberate" + finally: + pm.unregister(name="test_exception") + + +@pytest.mark.asyncio +async def test_write_wrapper_conn_is_usable(datasette): + """Test that the conn passed to the wrapper can execute SQL.""" + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + conn.execute("create table if not exists hook_log (msg text)") + conn.execute("insert into hook_log values ('before')") + yield + conn.execute("insert into hook_log values ('after')") + + return wrapper + + pm.register(Plugin(), name="test_conn") + try: + db = datasette.get_database("test") + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t3 (id integer primary key)" + ) + ) + result = await db.execute("select msg from hook_log order by rowid") + messages = [row[0] for row in result.rows] + assert messages == ["before", "after"] + finally: + pm.unregister(name="test_conn") + + +@pytest.mark.asyncio +async def test_write_wrapper_multiple_plugins_nest(datasette): + """Test that multiple write_wrapper plugins nest correctly.""" + log = [] + + class PluginA: + __name__ = "PluginA" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + log.append("A-before") + yield + log.append("A-after") + + return wrapper + + class PluginB: + __name__ = "PluginB" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + def wrapper(conn): + log.append("B-before") + yield + log.append("B-after") + + return wrapper + + pm.register(PluginA(), name="PluginA") + pm.register(PluginB(), name="PluginB") + try: + db = datasette.get_database("test") + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t4 (id integer primary key)" + ) + ) + assert set(log) == {"A-before", "A-after", "B-before", "B-after"} + # Verify proper nesting: each plugin's before/after should be + # symmetric around the write + a_before = log.index("A-before") + a_after = log.index("A-after") + b_before = log.index("B-before") + b_after = log.index("B-after") + if a_before < b_before: + assert a_after > b_after, "A is outer so A-after should come after B-after" + else: + assert b_after > a_after, "B is outer so B-after should come after A-after" + finally: + pm.unregister(name="PluginA") + pm.unregister(name="PluginB") + + +@pytest.mark.asyncio +async def test_write_wrapper_return_none_skips(datasette): + """Test that returning None from write_wrapper means no wrapping.""" + log = [] + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + log.append("hook-called") + return None + + pm.register(Plugin(), name="test_skip") + try: + db = datasette.get_database("test") + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t5 (id integer primary key)" + ) + ) + assert log == ["hook-called"] + finally: + pm.unregister(name="test_skip") + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "request_value,transaction_value,expected_request,expected_transaction", + ( + ("fake-request", True, "fake-request", True), + (None, True, None, True), + (None, False, None, False), + ), + ids=["with-request", "request-none-by-default", "transaction-false"], +) +async def test_write_wrapper_hook_parameters( + datasette, + request_value, + transaction_value, + expected_request, + expected_transaction, +): + """Test that request and transaction parameters are passed through.""" + captured = {} + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + captured["request"] = request + captured["database"] = database + captured["transaction"] = transaction + + pm.register(Plugin(), name="test_params") + try: + db = datasette.get_database("test") + kwargs = {"transaction": transaction_value} + if request_value is not None: + kwargs["request"] = request_value + await db.execute_write_fn( + lambda conn: conn.execute( + "create table if not exists t6 (id integer primary key)" + ), + **kwargs, + ) + assert captured["request"] == expected_request + assert captured["database"] == "test" + assert captured["transaction"] == expected_transaction + finally: + pm.unregister(name="test_params") + + +@pytest.mark.asyncio +async def test_write_wrapper_via_api(tmp_path): + """Test that write_wrapper fires for API write operations.""" + log = [] + + db_path = str(tmp_path / "test.db") + ds = Datasette([db_path], pdb=False) + ds.root_enabled = True + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + if database != "test": + return None + + def wrapper(conn): + log.append("before") + yield + log.append("after") + + return wrapper + + pm.register(Plugin(), name="test_api") + try: + db = ds.get_database("test") + await db.execute_write( + "create table if not exists api_test (id integer primary key, name text)" + ) + log.clear() + + token = "dstok_{}".format( + ds.sign( + {"a": "root", "token": "dstok", "t": int(time.time())}, + namespace="token", + ) + ) + response = await ds.client.post( + "/test/api_test/-/insert", + json={"row": {"name": "test"}, "return": True}, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + assert response.status_code == 201, response.json() + assert log == ["before", "after"] + finally: + pm.unregister(name="test_api") + + +@pytest.mark.asyncio +async def test_write_wrapper_change_group_pattern(datasette): + """Test the motivating use case: activating a change group around a write.""" + db = datasette.get_database("test") + + await db.execute_write( + "create table if not exists groups (id integer primary key, current integer)" + ) + await db.execute_write( + "create table if not exists data (id integer primary key, value text)" + ) + await db.execute_write("insert into groups (id, current) values (1, null)") + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + if request and getattr(request, "group_id", None): + group_id = request.group_id + + def wrapper(conn): + conn.execute( + "update groups set current = 1 where id = ?", [group_id] + ) + yield + conn.execute("update groups set current = null where current = 1") + + return wrapper + + pm.register(Plugin(), name="test_change_group") + try: + + class FakeRequest: + group_id = 1 + + await db.execute_write_fn( + lambda conn: conn.execute("insert into data (value) values ('test')"), + request=FakeRequest(), + ) + + result = await db.execute("select current from groups where id = 1") + assert result.rows[0][0] is None + finally: + pm.unregister(name="test_change_group") From 8a315f3d7df8c668fdca216bbb55fe7ef44626dd Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Feb 2026 13:27:23 -0800 Subject: [PATCH 083/299] Added a test to exercise the write_wrapper example This example in the docs is now dulicated in a test: https://github.com/simonw/datasette/blob/80b7f987cad59113896f28a29828ffe856218216/docs/plugin_hooks.rst#write-wrapper-datasette-database-request-transaction Refs #2637 --- tests/test_write_wrapper.py | 90 +++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tests/test_write_wrapper.py b/tests/test_write_wrapper.py index e05a2a9f..38e5c94e 100644 --- a/tests/test_write_wrapper.py +++ b/tests/test_write_wrapper.py @@ -6,6 +6,7 @@ from datasette.app import Datasette from datasette.hookspecs import hookimpl from datasette.plugins import pm import pytest +import sqlite3 import time @@ -385,3 +386,92 @@ async def test_write_wrapper_change_group_pattern(datasette): assert result.rows[0][0] is None finally: pm.unregister(name="test_change_group") + + +WRITE_ACTIONS = ( + sqlite3.SQLITE_INSERT, + sqlite3.SQLITE_UPDATE, + sqlite3.SQLITE_DELETE, +) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "actor,table,should_deny", + ( + (None, "protected_table", True), + ({"id": "regular"}, "protected_table", True), + ({"id": "admin"}, "protected_table", False), + (None, "other_table", False), + ({"id": "regular"}, "other_table", False), + ), + ids=[ + "no-actor-protected", + "regular-user-protected", + "admin-protected", + "no-actor-other", + "regular-user-other", + ], +) +async def test_write_wrapper_set_authorizer(datasette, actor, table, should_deny): + """Test the docs example that uses set_authorizer to block writes to a protected table.""" + db = datasette.get_database("test") + await db.execute_write( + "create table if not exists protected_table (id integer primary key, value text)" + ) + await db.execute_write( + "create table if not exists other_table (id integer primary key, value text)" + ) + + class Plugin: + __name__ = "Plugin" + + @staticmethod + @hookimpl + def write_wrapper(datasette, database, request, transaction): + actor = None + if request: + actor = request.actor + if actor and actor.get("id") == "admin": + return None + + def wrapper(conn): + def authorizer(action, arg1, arg2, db_name, trigger): + if action in WRITE_ACTIONS and arg1 == "protected_table": + return sqlite3.SQLITE_DENY + return sqlite3.SQLITE_OK + + conn.set_authorizer(authorizer) + try: + yield + finally: + conn.set_authorizer(None) + + return wrapper + + class FakeRequest: + def __init__(self, actor): + self.actor = actor + + pm.register(Plugin(), name="test_set_authorizer") + try: + request = FakeRequest(actor) + if should_deny: + with pytest.raises(Exception): + await db.execute_write_fn( + lambda conn: conn.execute( + f"insert into {table} (value) values ('test')" + ), + request=request, + ) + else: + await db.execute_write_fn( + lambda conn: conn.execute( + f"insert into {table} (value) values ('test')" + ), + request=request, + ) + result = await db.execute(f"select value from {table} order by rowid desc limit 1") + assert result.rows[0][0] == "test" + finally: + pm.unregister(name="test_set_authorizer") From 170f9de774fd3d7487a40c9f67dc12a2c626e96e Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 18:21:25 +0000 Subject: [PATCH 084/299] Add pks parameter to render_cell() plugin hook The render_cell() hook now receives a pks parameter containing the list of primary key column names for the table being rendered. This avoids plugins needing to make redundant async calls to look up primary keys. For tables without an explicit primary key, pks is ["rowid"]. For custom SQL queries and views, pks is an empty list []. https://claude.ai/code/session_01HFYfevAziq4fSYTNRD9ZCh --- datasette/hookspecs.py | 2 +- datasette/views/database.py | 1 + datasette/views/row.py | 1 + datasette/views/table.py | 3 +++ docs/plugin_hooks.rst | 9 +++++--- tests/fixtures.py | 2 ++ tests/plugins/my_plugin.py | 3 ++- tests/test_plugins.py | 46 +++++++++++++++++++++++++++++++++++++ 8 files changed, 62 insertions(+), 5 deletions(-) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index b993fb61..89be6a65 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -55,7 +55,7 @@ def publish_subcommand(publish): @hookspec -def render_cell(row, value, column, table, database, datasette, request): +def render_cell(row, value, column, table, pks, database, datasette, request): """Customize rendering of HTML table cell values""" diff --git a/datasette/views/database.py b/datasette/views/database.py index e5f2cf16..a42ac758 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1205,6 +1205,7 @@ async def display_rows(datasette, database, request, rows, columns): value=value, column=column, table=None, + pks=[], database=database, datasette=datasette, request=request, diff --git a/datasette/views/row.py b/datasette/views/row.py index ff0a3594..9c59cd3b 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -130,6 +130,7 @@ class RowView(DataView): value=value, column=column, table=table, + pks=resolved.pks, database=database, datasette=self.ds, request=request, diff --git a/datasette/views/table.py b/datasette/views/table.py index d4dbc194..594e925e 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -235,6 +235,7 @@ async def display_columns_and_rows( value=value, column=column, table=table_name, + pks=pks_for_display, database=database_name, datasette=datasette, request=request, @@ -1494,6 +1495,7 @@ async def table_view_data( async def extra_render_cell(): "Rendered HTML for each cell using the render_cell plugin hook" + pks_for_display = pks if pks else (["rowid"] if not is_view else []) columns = [col[0] for col in results.description] rendered_rows = [] for row in rows: @@ -1506,6 +1508,7 @@ async def table_view_data( value=value, column=column, table=table_name, + pks=pks_for_display, database=database_name, datasette=datasette, request=request, diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 468b0ade..068469a8 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -9,7 +9,7 @@ Each plugin can implement one or more hooks using the ``@hookimpl`` decorator ag When you implement a plugin hook you can accept any or all of the parameters that are documented as being passed to that hook. -For example, you can implement the ``render_cell`` plugin hook like this even though the full documented hook signature is ``render_cell(row, value, column, table, database, datasette)``: +For example, you can implement the ``render_cell`` plugin hook like this even though the full documented hook signature is ``render_cell(row, value, column, table, pks, database, datasette, request)``: .. code-block:: python @@ -474,8 +474,8 @@ Examples: `datasette-publish-fly Date: Tue, 17 Feb 2026 20:09:04 +0000 Subject: [PATCH 085/299] Fix test assertions broken by new fixture rows in 170f9de The render_cell pks parameter commit added rows to compound_primary_key (2->3 rows) and no_primary_key (201->202 rows) tables but did not update existing tests that had hardcoded row count expectations. https://claude.ai/code/session_01XfPSZfK57bzRRiEa7Kz5n1 --- tests/test_api.py | 4 ++-- tests/test_table_api.py | 17 +++++++++-------- tests/test_table_html.py | 6 ++++++ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index e3951df9..95958a72 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -182,7 +182,7 @@ async def test_database_page(ds_client): # -- compound primary keys compound_pk = tables_by_name["compound_primary_key"] assert compound_pk["primary_keys"] == ["pk1", "pk2"] - assert compound_pk["count"] == 2 + assert compound_pk["count"] == 3 compound_three = tables_by_name["compound_three_primary_keys"] assert compound_three["primary_keys"] == ["pk1", "pk2", "pk3"] @@ -196,7 +196,7 @@ async def test_database_page(ds_client): # -- no_primary_key: hidden table with generated data no_pk = tables_by_name["no_primary_key"] assert no_pk["hidden"] is True - assert no_pk["count"] == 201 + assert no_pk["count"] == 202 assert no_pk["primary_keys"] == [] # -- roadside attractions relationship chain diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 49df3ad5..943a1549 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -136,6 +136,7 @@ async def test_table_shape_object_compound_primary_key(ds_client): assert response.json() == { "a,b": {"pk1": "a", "pk2": "b", "content": "c"}, "a~2Fb,~2Ec-d": {"pk1": "a/b", "pk2": ".c-d", "content": "c"}, + "d,e": {"pk1": "d", "pk2": "e", "content": "RENDER_CELL_DEMO"}, } @@ -169,11 +170,11 @@ async def test_table_with_reserved_word_name(ds_client): @pytest.mark.parametrize( "path,expected_rows,expected_pages", [ - ("/fixtures/no_primary_key.json", 201, 5), - ("/fixtures/paginated_view.json", 201, 9), - ("/fixtures/no_primary_key.json?_size=25", 201, 9), - ("/fixtures/paginated_view.json?_size=50", 201, 5), - ("/fixtures/paginated_view.json?_size=max", 201, 3), + ("/fixtures/no_primary_key.json", 202, 5), + ("/fixtures/paginated_view.json", 202, 9), + ("/fixtures/no_primary_key.json?_size=25", 202, 9), + ("/fixtures/paginated_view.json?_size=50", 202, 5), + ("/fixtures/paginated_view.json?_size=max", 202, 3), ("/fixtures/123_starts_with_digits.json", 0, 1), # Ensure faceting doesn't break pagination: ("/fixtures/compound_three_primary_keys.json?_facet=pk1", 1001, 21), @@ -232,7 +233,7 @@ async def test_page_size_zero(ds_client): ) assert response.status_code == 200 assert [] == response.json()["rows"] - assert 201 == response.json()["count"] + assert 202 == response.json()["count"] assert None is response.json()["next"] assert None is response.json()["next_url"] @@ -722,11 +723,11 @@ def test_page_size_matching_max_returned_rows( while path: response = app_client_returned_rows_matches_page_size.get(path) fetched.extend(response.json["rows"]) - assert len(response.json["rows"]) in (1, 50) + assert len(response.json["rows"]) in (2, 50) path = response.json["next_url"] if path: path = path.replace("http://localhost", "") - assert len(fetched) == 201 + assert len(fetched) == 202 @pytest.mark.asyncio diff --git a/tests/test_table_html.py b/tests/test_table_html.py index 90be591a..00cf9e19 100644 --- a/tests/test_table_html.py +++ b/tests/test_table_html.py @@ -597,6 +597,12 @@ async def test_table_html_compound_primary_key(ds_client): '
  • ', '', ], + [ + '', + '', + '', + '', + ], ] assert [ [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") From 5c3137d14858c0750c93bb61ef593d807cadba43 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 17 Feb 2026 13:30:24 -0800 Subject: [PATCH 086/299] Black formatting --- datasette/app.py | 10 ++----- datasette/cli.py | 8 ++--- datasette/database.py | 36 +++++------------------ datasette/default_permissions/defaults.py | 1 - datasette/facets.py | 12 ++------ datasette/inspect.py | 17 +++-------- datasette/permissions.py | 1 - datasette/utils/__init__.py | 4 +-- datasette/utils/actions_sql.py | 18 ++++-------- datasette/utils/internal_db.py | 14 +++------ datasette/utils/permissions.py | 7 ++--- datasette/views/base.py | 8 ++--- datasette/views/database.py | 8 ++--- datasette/views/index.py | 1 - datasette/views/special.py | 1 - tests/conftest.py | 1 - tests/fixtures.py | 20 +++---------- tests/plugins/my_plugin.py | 8 ++--- tests/test_cli.py | 8 ++--- tests/test_cli_serve_get.py | 4 +-- tests/test_config_dir.py | 6 ++-- tests/test_csv.py | 22 ++++---------- tests/test_html.py | 7 ++--- tests/test_internals_database.py | 12 +++----- tests/test_plugins.py | 10 ++----- tests/test_publish_cloudrun.py | 14 +++------ tests/test_routes.py | 6 ++-- tests/test_table_api.py | 8 ++--- tests/test_utils.py | 22 ++++---------- tests/test_utils_permissions.py | 6 ++-- tests/test_write_wrapper.py | 4 ++- 31 files changed, 82 insertions(+), 222 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 75f6071e..6efaa430 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -633,9 +633,7 @@ class Datasette: """ INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version) VALUES {} - """.format( - placeholders - ), + """.format(placeholders), values, ) await populate_schema_tables(internal_db, db) @@ -813,14 +811,12 @@ class Datasette: return orig async def get_instance_metadata(self): - rows = await self.get_internal_database().execute( - """ + rows = await self.get_internal_database().execute(""" SELECT key, value FROM metadata_instance - """ - ) + """) return dict(rows) async def get_database_metadata(self, database_name: str): diff --git a/datasette/cli.py b/datasette/cli.py index 1d0cb022..121911ab 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -109,15 +109,11 @@ def sqlite_extensions(fn): return fn(*args, **kwargs) except AttributeError as e: if "enable_load_extension" in str(e): - raise click.ClickException( - textwrap.dedent( - """ + raise click.ClickException(textwrap.dedent(""" Your Python installation does not have the ability to load SQLite extensions. More information: https://datasette.io/help/extensions - """ - ).strip() - ) + """).strip()) raise return wrapped diff --git a/datasette/database.py b/datasette/database.py index 1e6f9032..fcf69c7f 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -532,10 +532,7 @@ class Database: ] if sqlite_version()[1] >= 37: - hidden_tables += [ - x[0] - for x in await self.execute( - """ + hidden_tables += [x[0] for x in await self.execute(""" with shadow_tables as ( select name from pragma_table_list @@ -554,14 +551,9 @@ class Database: select name from core_tables ) select name from combined order by 1 - """ - ) - ] + """)] else: - hidden_tables += [ - x[0] - for x in await self.execute( - """ + hidden_tables += [x[0] for x in await self.execute(""" WITH base AS ( SELECT name FROM sqlite_master @@ -607,22 +599,15 @@ class Database: SELECT name FROM fts3_shadow_tables ) SELECT name FROM final ORDER BY 1 - """ - ) - ] + """)] # Also hide any FTS tables that have a content= argument - hidden_tables += [ - x[0] - for x in await self.execute( - """ + hidden_tables += [x[0] for x in await self.execute(""" SELECT name FROM sqlite_master WHERE sql LIKE '%VIRTUAL TABLE%' AND sql LIKE '%USING FTS%' AND sql LIKE '%content=%' - """ - ) - ] + """)] has_spatialite = await self.execute_fn(detect_spatialite) if has_spatialite: @@ -641,16 +626,11 @@ class Database: "KNN", "KNN2", ] + [ - r[0] - for r in ( - await self.execute( - """ + r[0] for r in (await self.execute(""" select name from sqlite_master where name like "idx_%" and type = "table" - """ - ) - ).rows + """)).rows ] return hidden_tables diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index f5a6a270..4c74219d 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -14,7 +14,6 @@ if TYPE_CHECKING: from datasette import hookimpl from datasette.permissions import PermissionSQL - # Actions that are allowed by default (unless --default-deny is used) DEFAULT_ALLOW_ACTIONS = frozenset( { diff --git a/datasette/facets.py b/datasette/facets.py index dd149424..bc4b6904 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -233,9 +233,7 @@ class ColumnFacet(Facet): ) where {col} is not null group by {col} order by count desc, value limit {limit} - """.format( - col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 - ) + """.format(col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1) try: facet_rows_results = await self.ds.execute( self.database, @@ -482,9 +480,7 @@ class DateFacet(Facet): select date({column}) from ( select * from ({sql}) limit 100 ) where {column} glob "????-??-*" - """.format( - column=escape_sqlite(column), sql=self.sql - ) + """.format(column=escape_sqlite(column), sql=self.sql) try: results = await self.ds.execute( self.database, @@ -530,9 +526,7 @@ class DateFacet(Facet): ) where date({col}) is not null group by date({col}) order by count desc, value limit {limit} - """.format( - col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 - ) + """.format(col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1) try: facet_rows_results = await self.ds.execute( self.database, diff --git a/datasette/inspect.py b/datasette/inspect.py index ede142d0..5e681e03 100644 --- a/datasette/inspect.py +++ b/datasette/inspect.py @@ -10,7 +10,6 @@ from .utils import ( sqlite3, ) - HASH_BLOCK_SIZE = 1024 * 1024 @@ -70,16 +69,11 @@ def inspect_tables(conn, database_metadata): tables[table]["foreign_keys"] = info # Mark tables 'hidden' if they relate to FTS virtual tables - hidden_tables = [ - r["name"] - for r in conn.execute( - """ + hidden_tables = [r["name"] for r in conn.execute(""" select name from sqlite_master where rootpage = 0 and sql like '%VIRTUAL TABLE%USING FTS%' - """ - ) - ] + """)] if detect_spatialite(conn): # Also hide Spatialite internal tables @@ -94,14 +88,11 @@ def inspect_tables(conn, database_metadata): "views_geometry_columns", "virts_geometry_columns", ] + [ - r["name"] - for r in conn.execute( - """ + r["name"] for r in conn.execute(""" select name from sqlite_master where name like "idx_%" and type = "table" - """ - ) + """) ] for t in tables.keys(): diff --git a/datasette/permissions.py b/datasette/permissions.py index c48293ac..b5e72b8e 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -3,7 +3,6 @@ from dataclasses import dataclass from typing import Any, NamedTuple import contextvars - # Context variable to track when permission checks should be skipped _skip_permission_checks = contextvars.ContextVar( "skip_permission_checks", default=False diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index d0d216eb..c6973d06 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -677,9 +677,7 @@ def detect_fts_sql(table): and sql like '%VIRTUAL TABLE%USING FTS%' ) ) - """.format( - table=table.replace("'", "''") - ) + """.format(table=table.replace("'", "''")) def detect_json1(conn=None): diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index 9c2add0e..14383253 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -180,13 +180,11 @@ async def _build_single_action_sql( # Skip plugins that only provide restriction_sql (no permission rules) if permission_sql.sql is None: continue - rule_sqls.append( - f""" + rule_sqls.append(f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( {permission_sql.sql} ) - """.strip() - ) + """.strip()) # If no rules, return empty result (deny all) if not rule_sqls: @@ -405,14 +403,12 @@ async def _build_single_action_sql( # Add restriction filter if there are restrictions if restriction_sqls: - query_parts.append( - """ + query_parts.append(""" AND EXISTS ( SELECT 1 FROM restriction_list r WHERE (r.parent = decisions.parent OR r.parent IS NULL) AND (r.child = decisions.child OR r.child IS NULL) - )""" - ) + )""") # Add parent filter if specified if parent is not None: @@ -479,13 +475,11 @@ async def build_permission_rules_sql( if permission_sql.sql is None: continue - union_parts.append( - f""" + union_parts.append(f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( {permission_sql.sql} ) - """.strip() - ) + """.strip()) rules_union = " UNION ALL ".join(union_parts) return rules_union, all_params, restriction_sqls diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index a3afbab2..e4ebddde 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -3,8 +3,7 @@ from datasette.utils import table_column_details async def init_internal_db(db): - create_tables_sql = textwrap.dedent( - """ + create_tables_sql = textwrap.dedent(""" CREATE TABLE IF NOT EXISTS catalog_databases ( database_name TEXT PRIMARY KEY, path TEXT, @@ -68,16 +67,13 @@ async def init_internal_db(db): FOREIGN KEY (database_name) REFERENCES catalog_databases(database_name), FOREIGN KEY (database_name, table_name) REFERENCES catalog_tables(database_name, table_name) ); - """ - ).strip() + """).strip() await db.execute_write_script(create_tables_sql) await initialize_metadata_tables(db) async def initialize_metadata_tables(db): - await db.execute_write_script( - textwrap.dedent( - """ + await db.execute_write_script(textwrap.dedent(""" CREATE TABLE IF NOT EXISTS metadata_instance ( key text, value text, @@ -107,9 +103,7 @@ async def initialize_metadata_tables(db): value text, unique(database_name, resource_name, column_name, key) ); - """ - ) - ) + """)) async def populate_schema_tables(internal_db, db): diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py index 6c30a12a..fd1e41a1 100644 --- a/datasette/utils/permissions.py +++ b/datasette/utils/permissions.py @@ -9,7 +9,6 @@ from datasette.permissions import PermissionSQL from datasette.plugins import pm from datasette.utils import await_me_maybe - # Sentinel object to indicate permission checks should be skipped SKIP_PERMISSION_CHECKS = object() @@ -116,13 +115,11 @@ def build_rules_union( if p.sql is None: continue - parts.append( - f""" + parts.append(f""" SELECT parent, child, allow, reason, '{p.source}' AS source_plugin FROM ( {p.sql} ) - """.strip() - ) + """.strip()) if not parts: # Empty UNION that returns no rows diff --git a/datasette/views/base.py b/datasette/views/base.py index bdc9f742..e4c1c738 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -241,8 +241,7 @@ class DataView(BaseView): data, extra_template_data, templates = response_or_template_contexts except QueryInterrupted as ex: raise DatasetteError( - textwrap.dedent( - """ + textwrap.dedent("""

    SQL query took too long. The time limit is controlled by the sql_time_limit_ms configuration option.

    @@ -251,10 +250,7 @@ class DataView(BaseView): let ta = document.querySelector("textarea"); ta.style.height = ta.scrollHeight + "px"; - """.format( - escape(ex.sql) - ) - ).strip(), + """.format(escape(ex.sql))).strip(), title="SQL Interrupted", status=400, message_is_html=True, diff --git a/datasette/views/database.py b/datasette/views/database.py index a42ac758..93ad8eda 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -615,8 +615,7 @@ class QueryView(View): rows = results.rows except QueryInterrupted as ex: raise DatasetteError( - textwrap.dedent( - """ + textwrap.dedent("""

    SQL query took too long. The time limit is controlled by the sql_time_limit_ms configuration option.

    @@ -625,10 +624,7 @@ class QueryView(View): let ta = document.querySelector("textarea"); ta.style.height = ta.scrollHeight + "px"; - """.format( - markupsafe.escape(ex.sql) - ) - ).strip(), + """.format(markupsafe.escape(ex.sql))).strip(), title="SQL Interrupted", status=400, message_is_html=True, diff --git a/datasette/views/index.py b/datasette/views/index.py index a59c687c..6a9462ac 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -12,7 +12,6 @@ from datasette.version import __version__ from .base import BaseView - # Truncate table list on homepage at: TRUNCATE_AT = 5 diff --git a/datasette/views/special.py b/datasette/views/special.py index 57a3024d..640c82eb 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -13,7 +13,6 @@ from .base import BaseView, View import secrets import urllib - logger = logging.getLogger(__name__) diff --git a/tests/conftest.py b/tests/conftest.py index ad7243c1..efa02c0a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,6 @@ import time from dataclasses import dataclass from datasette import Event, hookimpl - try: import pysqlite3 as sqlite3 except ImportError: diff --git a/tests/fixtures.py b/tests/fixtures.py index 0c110a94..9f99519a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -13,7 +13,6 @@ import string import tempfile import textwrap - # This temp file is used by one of the plugin config tests TEMP_PLUGIN_SECRET_FILE = os.path.join(tempfile.gettempdir(), "plugin-secret") @@ -331,16 +330,14 @@ CONFIG = { "sql": "select :_header_user_agent as user_agent, :_now_datetime_utc as datetime", }, "neighborhood_search": { - "sql": textwrap.dedent( - """ + "sql": textwrap.dedent(""" select _neighborhood, facet_cities.name, state from facetable join facet_cities on facetable._city_id = facet_cities.id where _neighborhood like '%' || :text || '%' order by _neighborhood; - """ - ), + """), "title": "Search neighborhoods", "description_html": "Demonstrating simple like search", "fragment": "fragment-goes-here", @@ -710,19 +707,10 @@ CREATE VIEW searchable_view_configured_by_metadata AS for a, b, c, content in generate_compound_rows(1001) ] ) - + "\n".join( - [ - """INSERT INTO sortable VALUES ( + + "\n".join(["""INSERT INTO sortable VALUES ( "{pk1}", "{pk2}", "{content}", {sortable}, {sortable_with_nulls}, {sortable_with_nulls_2}, "{text}"); - """.format( - **row - ).replace( - "None", "null" - ) - for row in generate_sortable_rows(201) - ] - ) + """.format(**row).replace("None", "null") for row in generate_sortable_rows(201)]) ) TABLE_PARAMETERIZED_SQL = [ ("insert into binary_data (data) values (?);", [b"\x15\x1c\x02\xc7\xad\x05\xfe"]), diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index c8794fad..20e7d111 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -261,8 +261,7 @@ def register_routes(): response = Response.redirect("/") datasette.set_actor_cookie(response, {"id": "root"}) return response - return Response.html( - """ + return Response.html("""

    @@ -271,10 +270,7 @@ def register_routes(): style="font-size: 2em; padding: 0.1em 0.5em;">

    - """.format( - request.path, request.scope["csrftoken"]() - ) - ) + """.format(request.path, request.scope["csrftoken"]())) def asgi_scope(scope): return Response.json(scope, default=repr) diff --git a/tests/test_cli.py b/tests/test_cli.py index 6cdfd924..7673c3f3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -115,13 +115,9 @@ def test_plugins_cli(app_client): def test_metadata_yaml(): - yaml_file = io.StringIO( - textwrap.dedent( - """ + yaml_file = io.StringIO(textwrap.dedent(""" title: Hello from YAML - """ - ) - ) + """)) # Annoyingly we have to provide all default arguments here: ds = serve.callback( [], diff --git a/tests/test_cli_serve_get.py b/tests/test_cli_serve_get.py index 5ad01bfa..dc852201 100644 --- a/tests/test_cli_serve_get.py +++ b/tests/test_cli_serve_get.py @@ -16,9 +16,7 @@ def test_serve_with_get(tmp_path_factory): def startup(datasette): with open("{}", "w") as fp: fp.write("hello") - """.format( - str(plugins_dir / "hello.txt") - ), + """.format(str(plugins_dir / "hello.txt")), ), "utf-8", ) diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py index f9a90fbe..ae7fe500 100644 --- a/tests/test_config_dir.py +++ b/tests/test_config_dir.py @@ -51,8 +51,7 @@ def config_dir(tmp_path_factory): for dbname in ("demo.db", "immutable.db", "j.sqlite3", "k.sqlite"): db = sqlite3.connect(str(config_dir / dbname)) - db.executescript( - """ + db.executescript(""" CREATE TABLE cities ( id integer primary key, name text @@ -60,8 +59,7 @@ def config_dir(tmp_path_factory): INSERT INTO cities (id, name) VALUES (1, 'San Francisco') ; - """ - ) + """) # Mark "immutable.db" as immutable (config_dir / "inspect-data.json").write_text( diff --git a/tests/test_csv.py b/tests/test_csv.py index 5589bd97..a2f03776 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -9,16 +9,12 @@ EXPECTED_TABLE_CSV = """id,content 3, 4,RENDER_CELL_DEMO 5,RENDER_CELL_ASYNC -""".replace( - "\n", "\r\n" -) +""".replace("\n", "\r\n") EXPECTED_CUSTOM_CSV = """content hello world -""".replace( - "\n", "\r\n" -) +""".replace("\n", "\r\n") EXPECTED_TABLE_WITH_LABELS_CSV = """ pk,created,planet_int,on_earth,state,_city_id,_city_id_label,_neighborhood,tags,complex_array,distinct_some_null,n @@ -37,17 +33,13 @@ pk,created,planet_int,on_earth,state,_city_id,_city_id_label,_neighborhood,tags, 13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[],[],, 14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[],[],, 15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[],[],, -""".lstrip().replace( - "\n", "\r\n" -) +""".lstrip().replace("\n", "\r\n") EXPECTED_TABLE_WITH_NULLABLE_LABELS_CSV = """ pk,foreign_key_with_label,foreign_key_with_label_label,foreign_key_with_blank_label,foreign_key_with_blank_label_label,foreign_key_with_no_label,foreign_key_with_no_label_label,foreign_key_compound_pk1,foreign_key_compound_pk2 1,1,hello,3,,1,1,a,b 2,,,,,,,, -""".lstrip().replace( - "\n", "\r\n" -) +""".lstrip().replace("\n", "\r\n") @pytest.mark.asyncio @@ -108,8 +100,7 @@ async def test_table_csv_with_invalid_labels(): ) await ds.invoke_startup() db = ds.add_memory_database("db_2214") - await db.execute_write_script( - """ + await db.execute_write_script(""" create table t1 (id integer primary key, name text); insert into t1 (id, name) values (1, 'one'); insert into t1 (id, name) values (2, 'two'); @@ -124,8 +115,7 @@ async def test_table_csv_with_invalid_labels(): insert into maintable (id, fk_integer, fk_text) values (1, 1, 'a'); insert into maintable (id, fk_integer, fk_text) values (2, 3, 'b'); -- invalid fk_integer insert into maintable (id, fk_integer, fk_text) values (3, 2, 'c'); -- invalid fk_text - """ - ) + """) response = await ds.client.get("/db_2214/maintable.csv?_labels=1") assert response.status_code == 200 assert response.text == ( diff --git a/tests/test_html.py b/tests/test_html.py index 8fad5764..757f3e6e 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -620,14 +620,11 @@ async def test_urlify_custom_queries(ds_client): response = await ds_client.get(path) assert response.status_code == 200 soup = Soup(response.content, "html.parser") - assert ( - """
    """ - == soup.find("td", {"class": "col-user_url"}).prettify().strip() - ) +""" == soup.find("td", {"class": "col-user_url"}).prettify().strip() @pytest.mark.asyncio diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 02c67bfc..5e3459cd 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -747,19 +747,15 @@ async def test_replace_database(tmpdir): path1 = str(tmpdir / "data1.db") (tmpdir / "two").mkdir() path2 = str(tmpdir / "two" / "data1.db") - sqlite3.connect(path1).executescript( - """ + sqlite3.connect(path1).executescript(""" create table t (id integer primary key); insert into t (id) values (1); insert into t (id) values (2); - """ - ) - sqlite3.connect(path2).executescript( - """ + """) + sqlite3.connect(path2).executescript(""" create table t (id integer primary key); insert into t (id) values (1); - """ - ) + """) datasette = Datasette([path1]) db = datasette.get_database("data1") count = (await db.execute("select count(*) from t")).first()[0] diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 190ef659..754b199c 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -233,9 +233,7 @@ async def test_hook_render_cell_pks_compound_pk(ds_client): @pytest.mark.asyncio async def test_hook_render_cell_pks_rowid_table(ds_client): """pks should be ["rowid"] for a table with no explicit primary key""" - response = await ds_client.get( - "/fixtures/no_primary_key?content=RENDER_CELL_DEMO" - ) + response = await ds_client.get("/fixtures/no_primary_key?content=RENDER_CELL_DEMO") soup = Soup(response.text, "html.parser") td = soup.find("td", {"class": "col-content"}) data = json.loads(td.string) @@ -457,14 +455,12 @@ def view_names_client(tmp_path_factory): ): (templates / template).write_text("view_name:{{ view_name }}", "utf-8") (plugins / "extra_vars.py").write_text( - textwrap.dedent( - """ + textwrap.dedent(""" from datasette import hookimpl @hookimpl def extra_template_vars(view_name): return {"view_name": view_name} - """ - ), + """), "utf-8", ) db_path = str(tmpdir / "fixtures.db") diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index f53e5059..6617bc77 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -231,16 +231,12 @@ def test_publish_cloudrun_plugin_secrets( with open("test.db", "w") as fp: fp.write("data") with open("metadata.yml", "w") as fp: - fp.write( - textwrap.dedent( - """ + fp.write(textwrap.dedent(""" title: Hello from metadata YAML plugins: datasette-auth-github: foo: bar - """ - ).strip() - ) + """).strip()) result = runner.invoke( cli.cli, [ @@ -333,8 +329,7 @@ def test_publish_cloudrun_apt_get_install( .split("\n====================\n")[0] .strip() ) - expected = textwrap.dedent( - r""" + expected = textwrap.dedent(r""" FROM python:3.11.0-slim-bullseye COPY . /app WORKDIR /app @@ -350,8 +345,7 @@ def test_publish_cloudrun_apt_get_install( ENV PORT 8001 EXPOSE 8001 CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --setting force_https_urls on --port $PORT - """ - ).strip() + """).strip() assert expected == dockerfile diff --git a/tests/test_routes.py b/tests/test_routes.py index 9866cc76..24c702fc 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -63,12 +63,10 @@ async def ds_with_route(): ds.remove_database("_memory") db = Database(ds, is_memory=True, memory_name="route-name-db") ds.add_database(db, name="original-name", route="custom-route-name") - await db.execute_write_script( - """ + await db.execute_write_script(""" create table if not exists t (id integer primary key); insert or replace into t (id) values (1); - """ - ) + """) return ds diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 943a1549..51e40ad1 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -1243,9 +1243,7 @@ async def test_paginate_using_link_header(ds_client, qs): reason="generated columns were added in SQLite 3.31.0", ) def test_generated_columns_are_visible_in_datasette(): - with make_app_client( - extra_databases={ - "generated.db": """ + with make_app_client(extra_databases={"generated.db": """ CREATE TABLE generated_columns ( body TEXT, id INT GENERATED ALWAYS AS (json_extract(body, '$.number')) STORED, @@ -1253,9 +1251,7 @@ def test_generated_columns_are_visible_in_datasette(): ); INSERT INTO generated_columns (body) VALUES ( '{"number": 1, "string": "This is a string"}' - );""" - } - ) as client: + );"""}) as client: response = client.get("/generated/generated_columns.json?_shape=array") assert response.json == [ { diff --git a/tests/test_utils.py b/tests/test_utils.py index b8d047e9..85ab9e6b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -201,9 +201,7 @@ def test_detect_fts(open_quote, close_quote): CREATE VIEW Test_View AS SELECT * FROM Dumb_Table; CREATE VIRTUAL TABLE {open}Street_Tree_List_fts{close} USING FTS4 ("qAddress", "qCaretaker", "qSpecies", content={open}Street_Tree_List{close}); CREATE VIRTUAL TABLE r USING rtree(a, b, c); - """.format( - open=open_quote, close=close_quote - ) + """.format(open=open_quote, close=close_quote) conn = utils.sqlite3.connect(":memory:") conn.executescript(sql) assert None is utils.detect_fts(conn, "Dumb_Table") @@ -220,9 +218,7 @@ def test_detect_fts_different_table_names(table): "qSpecies" TEXT ); CREATE VIRTUAL TABLE [{table}_fts] USING FTS4 ("qSpecies", content="{table}"); - """.format( - table=table - ) + """.format(table=table) conn = utils.sqlite3.connect(":memory:") conn.executescript(sql) assert "{table}_fts".format(table=table) == utils.detect_fts(conn, table) @@ -347,27 +343,21 @@ def test_compound_keys_after_sql(): ((a > :p0) or (a = :p0 and b > :p1)) - """.strip() == utils.compound_keys_after_sql( - ["a", "b"] - ) + """.strip() == utils.compound_keys_after_sql(["a", "b"]) assert """ ((a > :p0) or (a = :p0 and b > :p1) or (a = :p0 and b = :p1 and c > :p2)) - """.strip() == utils.compound_keys_after_sql( - ["a", "b", "c"] - ) + """.strip() == utils.compound_keys_after_sql(["a", "b", "c"]) def test_table_columns(): conn = sqlite3.connect(":memory:") - conn.executescript( - """ + conn.executescript(""" create table places (id integer primary key, name text, bob integer) - """ - ) + """) assert ["id", "name", "bob"] == utils.table_columns(conn, "places") diff --git a/tests/test_utils_permissions.py b/tests/test_utils_permissions.py index b412de0f..bc3599c2 100644 --- a/tests/test_utils_permissions.py +++ b/tests/test_utils_permissions.py @@ -497,16 +497,14 @@ async def test_actor_actor_id_action_parameters_available(db): def plugin_using_all_parameters() -> Callable[[str], PermissionSQL]: def provider(action: str) -> PermissionSQL: - return PermissionSQL( - """ + return PermissionSQL(""" SELECT NULL AS parent, NULL AS child, 1 AS allow, 'Actor ID: ' || COALESCE(:actor_id, 'null') || ', Actor JSON: ' || COALESCE(:actor, 'null') || ', Action: ' || :action AS reason WHERE :actor_id = 'test_user' AND :action = 'view-table' AND json_extract(:actor, '$.role') = 'admin' - """ - ) + """) return provider diff --git a/tests/test_write_wrapper.py b/tests/test_write_wrapper.py index 38e5c94e..cb320c06 100644 --- a/tests/test_write_wrapper.py +++ b/tests/test_write_wrapper.py @@ -471,7 +471,9 @@ async def test_write_wrapper_set_authorizer(datasette, actor, table, should_deny ), request=request, ) - result = await db.execute(f"select value from {table} order by rowid desc limit 1") + result = await db.execute( + f"select value from {table} order by rowid desc limit 1" + ) assert result.rows[0][0] == "test" finally: pm.unregister(name="test_set_authorizer") From 1c6c6d2e6897c1173ed6e209c8b7133688e75c58 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 17 Feb 2026 13:30:46 -0800 Subject: [PATCH 087/299] Fix test_write_wrapper_set_authorizer: use permissive callback instead of None conn.set_authorizer(None) does not clear the authorizer - SQLite treats None as an invalid callback. The denied state persists on the shared write connection, causing subsequent non-deny test cases to fail. Fixes test added in 8a315f3d. --- tests/test_write_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_write_wrapper.py b/tests/test_write_wrapper.py index cb320c06..55e0461e 100644 --- a/tests/test_write_wrapper.py +++ b/tests/test_write_wrapper.py @@ -445,7 +445,7 @@ async def test_write_wrapper_set_authorizer(datasette, actor, table, should_deny try: yield finally: - conn.set_authorizer(None) + conn.set_authorizer(lambda *args: sqlite3.SQLITE_OK) return wrapper From 7a66456615cad38d9e70267a14ca30dcc4bca701 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 Feb 2026 11:19:19 -0800 Subject: [PATCH 088/299] black --version --- .github/workflows/test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b1ba3232..a0f5477b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,7 +34,9 @@ jobs: # And the test that exceeds a localhost HTTPS server tests/test_datasette_https_server.sh - name: Black - run: black --check . + run: | + black --version + black --check . - name: Ruff run: ruff check datasette tests - name: Check if cog needs to be run From 2f0e64df681c7bf65e8ce3065380be36a4ccd266 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 Feb 2026 11:24:52 -0800 Subject: [PATCH 089/299] black==26.1.0 I'm getting CI failures for Black, maybe this will help --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d9ef2a73..2ab2ce10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ dev = [ "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", "beautifulsoup4>=4.8.1", - "black==25.11.0", + "black==26.1.0", "blacken-docs==1.20.0", "pytest-timeout>=1.4.2", "trustme>=0.7", From 6a2c27b15b300ba1b924ce00a61532943482392e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 Feb 2026 11:28:39 -0800 Subject: [PATCH 090/299] blacken-docs --- docs/plugin_hooks.rst | 13 ++++--------- docs/spatialite.rst | 6 ++---- docs/testing_plugins.rst | 8 ++------ 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 068469a8..fa335368 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1074,11 +1074,9 @@ You can also return an async function, which will be awaited on startup. Use thi async def inner(): db = datasette.get_database() if "my_table" not in await db.table_names(): - await db.execute_write( - """ + await db.execute_write(""" create table my_table (mycol text) - """ - ) + """) return inner @@ -1561,7 +1559,6 @@ The resolver will automatically apply the most specific rule. from datasette import hookimpl from datasette.permissions import PermissionSQL - TRUSTED = {"alice", "bob"} @@ -2261,8 +2258,7 @@ This example logs events to a ``datasette_events`` table in a database called `` def startup(datasette): async def inner(): db = datasette.get_database("events") - await db.execute_write( - """ + await db.execute_write(""" create table if not exists datasette_events ( id integer primary key, event_type text, @@ -2270,8 +2266,7 @@ This example logs events to a ``datasette_events`` table in a database called `` actor text, properties text ) - """ - ) + """) return inner diff --git a/docs/spatialite.rst b/docs/spatialite.rst index fbe0d75f..c93c1e00 100644 --- a/docs/spatialite.rst +++ b/docs/spatialite.rst @@ -90,12 +90,10 @@ Here's a recipe for taking a table with existing latitude and longitude columns, "SELECT AddGeometryColumn('museums', 'point_geom', 4326, 'POINT', 2);" ) # Now update that geometry column with the lat/lon points - conn.execute( - """ + conn.execute(""" UPDATE museums SET point_geom = GeomFromText('POINT('||"longitude"||' '||"latitude"||')',4326); - """ - ) + """) # Now add a spatial index to that column conn.execute( 'select CreateSpatialIndex("museums", "point_geom");' diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index fc1aa6f6..b0713e7c 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -233,15 +233,11 @@ As an example, here's a very simple plugin which executes an HTTP response and r async def fetch_url(datasette, request): if request.method == "GET": - return Response.html( - """ + return Response.html("""
    - """.format( - request.scope["csrftoken"]() - ) - ) + """.format(request.scope["csrftoken"]())) vars = await request.post_vars() url = vars["url"] return Response.text(httpx.get(url).text) From c96dc5ce2656607b9e81743acf600f8fd5f6a795 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 16:32:45 -0800 Subject: [PATCH 091/299] register_token_handler() plugin hook for custom API token backends (#2650) Closes #2649 * Add register_token_handler plugin hook for pluggable token backends Adds a new register_token_handler hook that allows plugins to provide custom token creation and verification backends. This enables plugins like datasette-oauth to issue tokens without depending on specific backend plugins like datasette-auth-tokens. Key changes: - New datasette/tokens.py with TokenHandler base class and SignedTokenHandler (the default signed-token implementation moved here) - New register_token_handler hookspec in hookspecs.py - Datasette.create_token() is now async and delegates to token handlers - New Datasette.verify_token() method tries all handlers in sequence - handler= parameter on create_token() to select a specific backend - TokenHandler exported from datasette package for plugin use - Fixed actor_from_request loop to await all coroutines (avoids warnings) * Add documentation and hook test for register_token_handler Fixes CI failures: the new hook needs a section in docs/plugin_hooks.rst (checked by test_plugin_hooks_are_documented) and a test_hook_* function in test_plugins.py (checked by test_plugin_hooks_have_tests). * Register tokens module as separate default plugin Instead of re-exporting hookimpls from default_permissions/__init__.py, register datasette.default_permissions.tokens as its own DEFAULT_PLUGINS entry. Cleaner and avoids confusing import-for-side-effect patterns. * Replace restrict_x params with TokenRestrictions dataclass Consolidates the three separate restrict_all, restrict_database, and restrict_resource parameters into a single TokenRestrictions dataclass. Cleaner API surface for both Datasette.create_token() and TokenHandler.create_token(). Also clarifies docs re: default handler selection via pluggy ordering. * Add builder methods to TokenRestrictions Adds allow_all(), allow_database(), and allow_resource() methods that return self for chaining. Callers no longer need to manipulate nested dicts directly: restrictions = (TokenRestrictions() .allow_all("view-instance") .allow_database("mydb", "create-table") .allow_resource("mydb", "mytable", "insert-row")) * docs: add 1.0a25 upgrade guide section for create_token() signature change Ref: https://github.com/simonw/datasette/issues/2649#issuecomment-3962639393 * docs: note that create_token() is now async in upgrade guide * docs: update internals, plugin_hooks, authentication for new token API - internals.rst: new async create_token() signature with restrictions and handler params, add TokenRestrictions reference docs - plugin_hooks.rst: show full create_token signature in TokenHandler example, note list returns and error cases - authentication.rst: cross-reference TokenRestrictions from the restrictions section * style: apply black formatting to token handler files * docs: fix RST heading underline length in internals.rst * tests: add restrictions round-trip and expiration tests for token handler Covers allow_database/allow_resource builders, _r payload encoding, and token_expires in verified actors. Coverage 76% -> 90%. * tests: add test for signed tokens disabled * fix: add TokenRestrictions TYPE_CHECKING import to fix ruff F821 * docs: regenerate plugins.rst with cog * docs: reformat code blocks in plugin_hooks.rst with blacken-docs * docs: add await .verify_token() to internals.rst * tests: rewrite register_token_handler test to use real plugin handler Adds a HardcodedTokenHandler to the test plugins dir that creates tokens like dstok_hardcoded_token_1. The test now exercises creating tokens via the default handler (which is the plugin's hardcoded one), by explicitly naming the hardcoded handler, and by explicitly naming the signed handler -- then verifies each token round-trips correctly. * tests: clarify test_token_handler_via_http tests the default signed handler * fix: use handler="signed" explicitly where signed tokens are expected The HardcodedTokenHandler in my_plugin.py gets globally registered, so create_token() without a handler name picks it up as the default. Fix the create-token view, CLI, and tests to explicitly request the signed handler where they depend on signed token behavior. * fix: use handler="signed" in test_create_table_permissions https://claude.ai/code/session_013cQFiDQjYRrRBH2biFfKuS --- datasette/__init__.py | 1 + datasette/app.py | 102 +++++--- datasette/cli.py | 30 ++- datasette/default_permissions/__init__.py | 1 - datasette/default_permissions/tokens.py | 85 ++---- datasette/hookspecs.py | 5 + datasette/plugins.py | 1 + datasette/tokens.py | 180 +++++++++++++ datasette/views/special.py | 34 +-- docs/authentication.rst | 1 + docs/internals.rst | 81 ++++-- docs/plugin_hooks.rst | 59 +++++ docs/plugins.rst | 11 +- docs/upgrade_guide.md | 40 +++ tests/fixtures.py | 1 + tests/plugins/my_plugin.py | 27 ++ tests/test_api_write.py | 9 +- tests/test_permissions.py | 2 +- tests/test_plugins.py | 32 +++ tests/test_token_handler.py | 301 ++++++++++++++++++++++ 20 files changed, 839 insertions(+), 164 deletions(-) create mode 100644 datasette/tokens.py create mode 100644 tests/test_token_handler.py diff --git a/datasette/__init__.py b/datasette/__init__.py index 47d2b4f6..eb18e59e 100644 --- a/datasette/__init__.py +++ b/datasette/__init__.py @@ -1,6 +1,7 @@ from datasette.permissions import Permission # noqa from datasette.version import __version_info__, __version__ # noqa from datasette.events import Event # noqa +from datasette.tokens import TokenHandler, TokenRestrictions # noqa from datasette.utils.asgi import Forbidden, NotFound, Request, Response # noqa from datasette.utils import actor_matches_allow # noqa from datasette.views import Context # noqa diff --git a/datasette/app.py b/datasette/app.py index 6efaa430..2df6e4e8 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List if TYPE_CHECKING: from datasette.permissions import Resource + from datasette.tokens import TokenRestrictions import asgi_csrf import collections import dataclasses @@ -713,44 +714,70 @@ class Datasette: """ return _in_datasette_client.get() - def create_token( + def _token_handlers(self): + """Collect all registered token handlers from plugins.""" + from datasette.tokens import TokenHandler + + handlers = [] + for result in pm.hook.register_token_handler(datasette=self): + if isinstance(result, TokenHandler): + handlers.append(result) + elif isinstance(result, list): + handlers.extend(h for h in result if isinstance(h, TokenHandler)) + return handlers + + async def create_token( self, actor_id: str, *, expires_after: int | None = None, - restrict_all: Iterable[str] | None = None, - restrict_database: Dict[str, Iterable[str]] | None = None, - restrict_resource: Dict[str, Dict[str, Iterable[str]]] | None = None, - ): - token = {"a": actor_id, "t": int(time.time())} - if expires_after: - token["d"] = expires_after + restrictions: "TokenRestrictions | None" = None, + handler: str | None = None, + ) -> str: + """ + Create an API token for the given actor. - def abbreviate_action(action): - # rename to abbr if possible - action_obj = self.actions.get(action) - if not action_obj: - return action - return action_obj.abbr or action + Uses the first registered token handler by default, or a specific + handler if ``handler`` is provided (matched by handler name). - if expires_after: - token["d"] = expires_after - if restrict_all or restrict_database or restrict_resource: - token["_r"] = {} - if restrict_all: - token["_r"]["a"] = [abbreviate_action(a) for a in restrict_all] - if restrict_database: - token["_r"]["d"] = {} - for database, actions in restrict_database.items(): - token["_r"]["d"][database] = [abbreviate_action(a) for a in actions] - if restrict_resource: - token["_r"]["r"] = {} - for database, resources in restrict_resource.items(): - for resource, actions in resources.items(): - token["_r"]["r"].setdefault(database, {})[resource] = [ - abbreviate_action(a) for a in actions - ] - return "dstok_{}".format(self.sign(token, namespace="token")) + Pass a :class:`TokenRestrictions` to limit which actions the token + can perform. + """ + handlers = self._token_handlers() + if not handlers: + raise RuntimeError("No token handlers are registered") + + if handler is not None: + matched = [h for h in handlers if h.name == handler] + if not matched: + available = [h.name for h in handlers] + raise ValueError( + f"Token handler {handler!r} not found. " + f"Available handlers: {available}" + ) + chosen = matched[0] + else: + chosen = handlers[0] + + return await chosen.create_token( + self, + actor_id, + expires_after=expires_after, + restrictions=restrictions, + ) + + async def verify_token(self, token: str) -> dict | None: + """ + Verify an API token by trying all registered token handlers. + + Returns an actor dict from the first handler that recognizes the + token, or None if no handler accepts it. + """ + for token_handler in self._token_handlers(): + result = await token_handler.verify_token(self, token) + if result is not None: + return result + return None def get_database(self, name=None, route=None): if route is not None: @@ -2159,10 +2186,13 @@ class DatasetteRouter: # Handle authentication default_actor = scope.get("actor") or None actor = None - for actor in pm.hook.actor_from_request(datasette=self.ds, request=request): - actor = await await_me_maybe(actor) - if actor: - break + results = pm.hook.actor_from_request(datasette=self.ds, request=request) + for result in results: + result = await await_me_maybe(result) + if result and actor is None: + actor = result + # Don't break — we must await all coroutines to avoid + # "coroutine was never awaited" warnings scope_modifications["actor"] = actor or default_actor scope = dict(scope, **scope_modifications) diff --git a/datasette/cli.py b/datasette/cli.py index 121911ab..b473fbb7 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -832,21 +832,23 @@ def create_token( err=True, ) - restrict_database = {} - for database, action in databases: - restrict_database.setdefault(database, []).append(action) - restrict_resource = {} - for database, resource, action in resources: - restrict_resource.setdefault(database, {}).setdefault(resource, []).append( - action - ) + from datasette.tokens import TokenRestrictions - token = ds.create_token( - id, - expires_after=expires_after, - restrict_all=alls, - restrict_database=restrict_database, - restrict_resource=restrict_resource, + restrictions = TokenRestrictions() + for action in alls: + restrictions.allow_all(action) + for database, action in databases: + restrictions.allow_database(database, action) + for database, resource, action in resources: + restrictions.allow_resource(database, resource, action) + + token = run_sync( + lambda: ds.create_token( + id, + expires_after=expires_after, + restrictions=restrictions, + handler="signed", + ) ) click.echo(token) if debug: diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py index 40373fa7..4ebe6147 100644 --- a/datasette/default_permissions/__init__.py +++ b/datasette/default_permissions/__init__.py @@ -37,7 +37,6 @@ from .defaults import ( default_action_permissions_sql as default_action_permissions_sql, DEFAULT_ALLOW_ACTIONS as DEFAULT_ALLOW_ACTIONS, ) -from .tokens import actor_from_signed_api_token as actor_from_signed_api_token @hookimpl diff --git a/datasette/default_permissions/tokens.py b/datasette/default_permissions/tokens.py index 474b0c23..7a359dc6 100644 --- a/datasette/default_permissions/tokens.py +++ b/datasette/default_permissions/tokens.py @@ -1,44 +1,35 @@ """ Token authentication for Datasette. -Handles signed API tokens (dstok_ prefix). +Registers the default SignedTokenHandler and delegates token verification +to datasette.verify_token() so all registered handlers are tried. """ from __future__ import annotations -import time from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from datasette.app import Datasette -import itsdangerous - from datasette import hookimpl +from datasette.tokens import SignedTokenHandler + + +@hookimpl +def register_token_handler(datasette: "Datasette"): + """Register the default signed token handler.""" + return SignedTokenHandler() @hookimpl(specname="actor_from_request") -def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dict]: +async def actor_from_signed_api_token( + datasette: "Datasette", request +) -> Optional[dict]: """ - Authenticate requests using signed API tokens (dstok_ prefix). - - Token structure (signed JSON): - { - "a": "actor_id", # Actor ID - "t": 1234567890, # Timestamp (Unix epoch) - "d": 3600, # Optional: Duration in seconds - "_r": {...} # Optional: Restrictions - } + Authenticate requests using API tokens by delegating to all registered + token handlers via datasette.verify_token(). """ - prefix = "dstok_" - - # Check if tokens are enabled - if not datasette.setting("allow_signed_tokens"): - return None - - max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") - - # Get authorization header authorization = request.headers.get("authorization") if not authorization: return None @@ -46,50 +37,4 @@ def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dic return None token = authorization[len("Bearer ") :] - if not token.startswith(prefix): - return None - - # Remove prefix and verify signature - token = token[len(prefix) :] - try: - decoded = datasette.unsign(token, namespace="token") - except itsdangerous.BadSignature: - return None - - # Validate timestamp - if "t" not in decoded: - return None - created = decoded["t"] - if not isinstance(created, int): - return None - - # Handle duration/expiry - duration = decoded.get("d") - if duration is not None and not isinstance(duration, int): - return None - - # Apply max TTL if configured - if (duration is None and max_signed_tokens_ttl) or ( - duration is not None - and max_signed_tokens_ttl - and duration > max_signed_tokens_ttl - ): - duration = max_signed_tokens_ttl - - # Check expiry - if duration: - if time.time() - created > duration: - return None - - # Build actor dict - actor = {"id": decoded["a"], "token": "dstok"} - - # Copy restrictions if present - if "_r" in decoded: - actor["_r"] = decoded["_r"] - - # Add expiry timestamp if applicable - if duration: - actor["token_expires"] = created + duration - - return actor + return await datasette.verify_token(token) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 89be6a65..64901900 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -222,6 +222,11 @@ def top_canned_query(datasette, request, database, query_name): """HTML to include at the top of the canned query page""" +@hookspec +def register_token_handler(datasette): + """Return a TokenHandler instance for token creation and verification""" + + @hookspec def write_wrapper(datasette, database, request, transaction): """Called when a write function is about to execute. diff --git a/datasette/plugins.py b/datasette/plugins.py index e9818885..992137bd 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -23,6 +23,7 @@ DEFAULT_PLUGINS = ( "datasette.sql_functions", "datasette.actor_auth_cookie", "datasette.default_permissions", + "datasette.default_permissions.tokens", "datasette.default_actions", "datasette.default_magic_parameters", "datasette.blob_renderer", diff --git a/datasette/tokens.py b/datasette/tokens.py new file mode 100644 index 00000000..5a12d8e0 --- /dev/null +++ b/datasette/tokens.py @@ -0,0 +1,180 @@ +""" +Token handler system for Datasette. + +Provides a base class for token handlers and the default signed token handler. +Plugins can implement register_token_handler to provide custom token backends +(e.g. database-backed tokens that can be revoked and audited). +""" + +from __future__ import annotations + +import dataclasses +import time +from typing import TYPE_CHECKING, Optional + +import itsdangerous + +if TYPE_CHECKING: + from datasette.app import Datasette + + +@dataclasses.dataclass +class TokenRestrictions: + """ + Restrictions to apply to a token, limiting which actions it can perform. + + Use the builder methods to construct restrictions:: + + restrictions = (TokenRestrictions() + .allow_all("view-instance") + .allow_database("mydb", "create-table") + .allow_resource("mydb", "mytable", "insert-row")) + """ + + all: list[str] = dataclasses.field(default_factory=list) + database: dict[str, list[str]] = dataclasses.field(default_factory=dict) + resource: dict[str, dict[str, list[str]]] = dataclasses.field(default_factory=dict) + + def allow_all(self, action: str) -> "TokenRestrictions": + """Allow an action across all databases and resources.""" + self.all.append(action) + return self + + def allow_database(self, database: str, action: str) -> "TokenRestrictions": + """Allow an action on a specific database.""" + self.database.setdefault(database, []).append(action) + return self + + def allow_resource( + self, database: str, resource: str, action: str + ) -> "TokenRestrictions": + """Allow an action on a specific resource within a database.""" + self.resource.setdefault(database, {}).setdefault(resource, []).append(action) + return self + + +class TokenHandler: + """ + Base class for token handlers. + + Subclass this and implement create_token() and verify_token() to provide + a custom token backend. Return an instance from the register_token_handler hook. + """ + + name: str = "" + + async def create_token( + self, + datasette: "Datasette", + actor_id: str, + *, + expires_after: Optional[int] = None, + restrictions: Optional[TokenRestrictions] = None, + ) -> str: + """Create and return a token string for the given actor.""" + raise NotImplementedError + + async def verify_token(self, datasette: "Datasette", token: str) -> Optional[dict]: + """ + Verify a token and return an actor dict, or None if this handler + does not recognize the token. + """ + raise NotImplementedError + + +class SignedTokenHandler(TokenHandler): + """ + Default token handler using itsdangerous signed tokens (dstok_ prefix). + """ + + name = "signed" + + async def create_token( + self, + datasette: "Datasette", + actor_id: str, + *, + expires_after: Optional[int] = None, + restrictions: Optional[TokenRestrictions] = None, + ) -> str: + if not datasette.setting("allow_signed_tokens"): + raise ValueError( + "Signed tokens are not enabled for this Datasette instance" + ) + + token = {"a": actor_id, "t": int(time.time())} + + def abbreviate_action(action): + action_obj = datasette.actions.get(action) + if not action_obj: + return action + return action_obj.abbr or action + + if expires_after: + token["d"] = expires_after + if restrictions and ( + restrictions.all or restrictions.database or restrictions.resource + ): + token["_r"] = {} + if restrictions.all: + token["_r"]["a"] = [abbreviate_action(a) for a in restrictions.all] + if restrictions.database: + token["_r"]["d"] = {} + for database, actions in restrictions.database.items(): + token["_r"]["d"][database] = [abbreviate_action(a) for a in actions] + if restrictions.resource: + token["_r"]["r"] = {} + for database, resources in restrictions.resource.items(): + for resource, actions in resources.items(): + token["_r"]["r"].setdefault(database, {})[resource] = [ + abbreviate_action(a) for a in actions + ] + return "dstok_{}".format(datasette.sign(token, namespace="token")) + + async def verify_token(self, datasette: "Datasette", token: str) -> Optional[dict]: + prefix = "dstok_" + + if not datasette.setting("allow_signed_tokens"): + return None + + max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") + + if not token.startswith(prefix): + return None + + raw = token[len(prefix) :] + try: + decoded = datasette.unsign(raw, namespace="token") + except itsdangerous.BadSignature: + return None + + if "t" not in decoded: + return None + created = decoded["t"] + if not isinstance(created, int): + return None + + duration = decoded.get("d") + if duration is not None and not isinstance(duration, int): + return None + + if (duration is None and max_signed_tokens_ttl) or ( + duration is not None + and max_signed_tokens_ttl + and duration > max_signed_tokens_ttl + ): + duration = max_signed_tokens_ttl + + if duration: + if time.time() - created > duration: + return None + + actor = {"id": decoded["a"], "token": "dstok"} + + if "_r" in decoded: + actor["_r"] = decoded["_r"] + + if duration: + actor["token_expires"] = created + duration + + return actor diff --git a/datasette/views/special.py b/datasette/views/special.py index 640c82eb..dbe5eab1 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -710,42 +710,36 @@ class CreateTokenView(BaseView): errors.append("Invalid expire duration unit") # Are there any restrictions? - restrict_all = [] - restrict_database = {} - restrict_resource = {} + from datasette.tokens import TokenRestrictions + + restrictions = TokenRestrictions() for key in form: if key.startswith("all:") and key.count(":") == 1: - restrict_all.append(key.split(":")[1]) + restrictions.allow_all(key.split(":")[1]) elif key.startswith("database:") and key.count(":") == 2: bits = key.split(":") - database = tilde_decode(bits[1]) - action = bits[2] - restrict_database.setdefault(database, []).append(action) + restrictions.allow_database(tilde_decode(bits[1]), bits[2]) elif key.startswith("resource:") and key.count(":") == 3: bits = key.split(":") - database = tilde_decode(bits[1]) - resource = tilde_decode(bits[2]) - action = bits[3] - restrict_resource.setdefault(database, {}).setdefault( - resource, [] - ).append(action) + restrictions.allow_resource( + tilde_decode(bits[1]), tilde_decode(bits[2]), bits[3] + ) - token = self.ds.create_token( + token = await self.ds.create_token( request.actor["id"], expires_after=expires_after, - restrict_all=restrict_all, - restrict_database=restrict_database, - restrict_resource=restrict_resource, + restrictions=restrictions, + handler="signed", ) token_bits = self.ds.unsign(token[len("dstok_") :], namespace="token") await self.ds.track_event( CreateTokenEvent( actor=request.actor, expires_after=expires_after, - restrict_all=restrict_all, - restrict_database=restrict_database, - restrict_resource=restrict_resource, + restrict_all=restrictions.all, + restrict_database=restrictions.database, + restrict_resource=restrictions.resource, ) ) context = await self.shared(request) diff --git a/docs/authentication.rst b/docs/authentication.rst index 69a6f606..1b949f9a 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1072,6 +1072,7 @@ cannot grant new access. If the underlying actor is denied by ``allow`` rules in ``datasette.yaml`` or by a plugin, a token that lists that resource in its ``"_r"`` section will still be denied. +To create tokens with restrictions in Python code, use the :ref:`TokenRestrictions ` builder and pass it to :ref:`datasette.create_token() `. .. _permissions_plugins: diff --git a/docs/internals.rst b/docs/internals.rst index 0491c1f7..7d607bfe 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -673,8 +673,8 @@ This example checks if the user can access a specific table, and sets ``private` .. _datasette_create_token: -.create_token(actor_id, expires_after=None, restrict_all=None, restrict_database=None, restrict_resource=None) --------------------------------------------------------------------------------------------------------------- +await .create_token(actor_id, expires_after=None, restrictions=None, handler=None) +---------------------------------------------------------------------------------- ``actor_id`` - string The ID of the actor to create a token for. @@ -682,16 +682,13 @@ This example checks if the user can access a specific table, and sets ``private` ``expires_after`` - int, optional The number of seconds after which the token should expire. -``restrict_all`` - iterable, optional - A list of actions that this token should be restricted to across all databases and resources. +``restrictions`` - :ref:`TokenRestrictions `, optional + A :ref:`TokenRestrictions ` object limiting which actions the token can perform. -``restrict_database`` - dict, optional - For restricting actions within specific databases, e.g. ``{"mydb": ["view-table", "view-query"]}``. +``handler`` - string, optional + The name of a specific token handler to use. If omitted, the first registered handler is used. See :ref:`plugin_hook_register_token_handler`. -``restrict_resource`` - dict, optional - For restricting actions to specific resources (tables, SQL views and :ref:`canned_queries`) within a database. For example: ``{"mydb": {"mytable": ["insert-row", "update-row"]}}``. - -This method returns a signed :ref:`API token ` of the format ``dstok_...`` which can be used to authenticate requests to the Datasette API. +This is an ``async`` method that returns an :ref:`API token ` string which can be used to authenticate requests to the Datasette API. The default ``SignedTokenHandler`` returns tokens of the format ``dstok_...``. All tokens must have an ``actor_id`` string indicating the ID of the actor which the token will act on behalf of. @@ -699,28 +696,72 @@ Tokens default to lasting forever, but can be set to expire after a given number .. code-block:: python - token = datasette.create_token( + token = await datasette.create_token( actor_id="user1", expires_after=3600, ) -The three ``restrict_*`` arguments can be used to create a token that has additional restrictions beyond what the associated actor is allowed to do. +.. _TokenRestrictions: + +TokenRestrictions +~~~~~~~~~~~~~~~~~ + +The ``TokenRestrictions`` class uses a builder pattern to specify which actions a token is allowed to perform. Import it from ``datasette.tokens``: + +.. code-block:: python + + from datasette.tokens import TokenRestrictions + + restrictions = ( + TokenRestrictions() + .allow_all("view-instance") + .allow_all("view-table") + .allow_database("docs", "view-query") + .allow_resource("docs", "attachments", "insert-row") + .allow_resource("docs", "attachments", "update-row") + ) + +The builder methods are: + +- ``allow_all(action)`` - allow an action across all databases and resources +- ``allow_database(database, action)`` - allow an action on a specific database +- ``allow_resource(database, resource, action)`` - allow an action on a specific resource (table, SQL view or :ref:`canned query `) within a database + +Each method returns the ``TokenRestrictions`` instance so calls can be chained. The following example creates a token that can access ``view-instance`` and ``view-table`` across everything, can additionally use ``view-query`` for anything in the ``docs`` database and is allowed to execute ``insert-row`` and ``update-row`` in the ``attachments`` table in that database: .. code-block:: python - token = datasette.create_token( + token = await datasette.create_token( actor_id="user1", - restrict_all=("view-instance", "view-table"), - restrict_database={"docs": ("view-query",)}, - restrict_resource={ - "docs": { - "attachments": ("insert-row", "update-row") - } - }, + restrictions=( + TokenRestrictions() + .allow_all("view-instance") + .allow_all("view-table") + .allow_database("docs", "view-query") + .allow_resource("docs", "attachments", "insert-row") + .allow_resource("docs", "attachments", "update-row") + ), ) +.. _datasette_verify_token: + +await .verify_token(token) +-------------------------- + +``token`` - string + The token string to verify. + +This is an ``async`` method that verifies an API token by trying each registered token handler in order. Returns an actor dictionary from the first handler that recognizes the token, or ``None`` if no handler accepts it. + +.. code-block:: python + + actor = await datasette.verify_token(token) + if actor: + # Token was valid + print(actor["id"]) + .. _datasette_get_database: .get_database(name) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index fa335368..b9701f7c 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -2334,3 +2334,62 @@ The plugin can then call ``datasette.track_event(...)`` to send a ``ban-user`` e BanUserEvent(user={"id": 1, "username": "cleverbot"}) ) +.. _plugin_hook_register_token_handler: + +register_token_handler(datasette) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``datasette`` - :ref:`internals_datasette` + You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``. + +Return a ``TokenHandler`` instance to provide a custom token creation and verification backend. This hook can return a single ``TokenHandler`` or a list of them. + +The default ``SignedTokenHandler`` uses itsdangerous signed tokens (``dstok_`` prefix). Plugins can provide alternative backends such as database-backed tokens that support revocation and auditing. + +.. code-block:: python + + from datasette import hookimpl, TokenHandler + + + class DatabaseTokenHandler(TokenHandler): + name = "database" + + async def create_token( + self, + datasette, + actor_id, + *, + expires_after=None, + restrictions=None + ): + # Store token in database and return token string + ... + + async def verify_token(self, datasette, token): + # Look up token in database, return actor dict or None + ... + + + @hookimpl + def register_token_handler(datasette): + return DatabaseTokenHandler() + +The ``create_token`` method receives a ``restrictions`` argument which will be a :ref:`TokenRestrictions ` instance or ``None``. + +Tokens can then be created and verified using :ref:`datasette.create_token() ` and ``datasette.verify_token()``, which delegate to the registered handlers. If no ``handler`` is specified, the first handler is used according to `pluggy call-time ordering `_. Use the ``handler`` parameter to select a specific backend by name: + +.. code-block:: python + + # Uses first registered handler (default) + token = await datasette.create_token("user123") + + # Uses a specific handler by name + token = await datasette.create_token( + "user123", handler="database" + ) + + # Verification tries all handlers + actor = await datasette.verify_token(token) + +If no handlers are registered, ``create_token()`` raises ``RuntimeError``. If the requested ``handler`` name is not found, it raises ``ValueError``. + diff --git a/docs/plugins.rst b/docs/plugins.rst index d5a98923..60bdc111 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -231,12 +231,21 @@ If you run ``datasette plugins --all`` it will include default plugins that ship "templates": false, "version": null, "hooks": [ - "actor_from_request", "canned_queries", "permission_resources_sql", "skip_csrf" ] }, + { + "name": "datasette.default_permissions.tokens", + "static": false, + "templates": false, + "version": null, + "hooks": [ + "actor_from_request", + "register_token_handler" + ] + }, { "name": "datasette.events", "static": false, diff --git a/docs/upgrade_guide.md b/docs/upgrade_guide.md index a3c321a4..861a8795 100644 --- a/docs/upgrade_guide.md +++ b/docs/upgrade_guide.md @@ -114,3 +114,43 @@ Instead, one should use the following methods on a Datasette class: ```{include} upgrade-1.0a20.md :heading-offset: 1 ``` + +(upgrade_guide_v1_a25)= +### Datasette 1.0a25: `create_token()` signature change + +`datasette.create_token()` is now an `async` method (previously it was synchronous). The `restrict_all`, `restrict_database`, and `restrict_resource` keyword arguments have been replaced by a single `restrictions` parameter that accepts a {ref}`TokenRestrictions ` object. + +Old code: + +```python +token = datasette.create_token( + actor_id="user1", + restrict_all=["view-instance", "view-table"], + restrict_database={"docs": ["view-query"]}, + restrict_resource={ + "docs": { + "attachments": ["insert-row", "update-row"] + } + }, +) +``` + +New code: + +```python +from datasette.tokens import TokenRestrictions + +token = await datasette.create_token( + actor_id="user1", + restrictions=( + TokenRestrictions() + .allow_all("view-instance") + .allow_all("view-table") + .allow_database("docs", "view-query") + .allow_resource("docs", "attachments", "insert-row") + .allow_resource("docs", "attachments", "update-row") + ), +) +``` + +The `datasette create-token` CLI command is unchanged. diff --git a/tests/fixtures.py b/tests/fixtures.py index 9f99519a..1f6c491d 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -51,6 +51,7 @@ EXPECTED_PLUGINS = [ "register_facet_classes", "register_magic_parameters", "register_routes", + "register_token_handler", "render_cell", "row_actions", "skip_csrf", diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 20e7d111..77079557 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -1,6 +1,7 @@ import asyncio from datasette import hookimpl from datasette.facets import Facet +from datasette.tokens import TokenHandler from datasette import tracer from datasette.permissions import Action from datasette.resources import DatabaseResource @@ -586,3 +587,29 @@ def permission_resources_sql(datasette, actor, action): return PermissionSQL.allow(reason=f"todomvc actor allowed for {action}") return None + + +class HardcodedTokenHandler(TokenHandler): + name = "hardcoded" + _counter = 0 + + async def create_token( + self, + datasette, + actor_id, + *, + expires_after=None, + restrictions=None, + ): + HardcodedTokenHandler._counter += 1 + return f"dstok_hardcoded_token_{HardcodedTokenHandler._counter}" + + async def verify_token(self, datasette, token): + if token.startswith("dstok_hardcoded_token_"): + return {"id": "hardcoded-actor", "token": "hardcoded"} + return None + + +@hookimpl +def register_token_handler(datasette): + return HardcodedTokenHandler() diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 05835e51..e59c4295 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -1362,7 +1362,14 @@ async def test_create_table( async def test_create_table_permissions( ds_write, permissions, body, expected_status, expected_errors ): - token = ds_write.create_token("root", restrict_all=["view-instance"] + permissions) + from datasette.tokens import TokenRestrictions + + restrictions = TokenRestrictions() + for action in ["view-instance"] + permissions: + restrictions.allow_all(action) + token = await ds_write.create_token( + "root", handler="signed", restrictions=restrictions + ) response = await ds_write.client.post( "/data/-/create", json=body, diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 96c0cf6f..42a19ca4 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -1657,7 +1657,7 @@ async def test_permission_check_view_requires_debug_permission(): # Root user should have access (root has all permissions) ds_with_root = Datasette() ds_with_root.root_enabled = True - root_token = ds_with_root.create_token("root") + root_token = await ds_with_root.create_token("root", handler="signed") response = await ds_with_root.client.get( "/-/check.json?action=view-instance", headers={"Authorization": f"Bearer {root_token}"}, diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 754b199c..fa9d1a1f 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1566,6 +1566,38 @@ async def test_hook_register_events(): assert any(k.__name__ == "OneEvent" for k in datasette.event_classes) +@pytest.mark.asyncio +async def test_hook_register_token_handler(ds_client): + handlers = ds_client.ds._token_handlers() + handler_names = [h.name for h in handlers] + # Both the default signed handler and the test hardcoded handler + assert "signed" in handler_names + assert "hardcoded" in handler_names + + # Create a token using the hardcoded handler (first registered from plugins dir) + token = await ds_client.ds.create_token("test-user") + assert token.startswith("dstok_hardcoded_token_") + + # Verify it + actor = await ds_client.ds.verify_token(token) + assert actor["id"] == "hardcoded-actor" + assert actor["token"] == "hardcoded" + + # Create a token by explicitly requesting the hardcoded handler by name + token2 = await ds_client.ds.create_token("test-user", handler="hardcoded") + assert token2.startswith("dstok_hardcoded_token_") + actor2 = await ds_client.ds.verify_token(token2) + assert actor2["id"] == "hardcoded-actor" + + # Create a token by explicitly requesting the signed handler by name + signed_token = await ds_client.ds.create_token("test-user", handler="signed") + assert signed_token.startswith("dstok_") + assert not signed_token.startswith("dstok_hardcoded_token_") + signed_actor = await ds_client.ds.verify_token(signed_token) + assert signed_actor["id"] == "test-user" + assert signed_actor["token"] == "dstok" + + @pytest.mark.asyncio async def test_hook_write_wrapper(): datasette = Datasette(memory=True) diff --git a/tests/test_token_handler.py b/tests/test_token_handler.py new file mode 100644 index 00000000..83f09046 --- /dev/null +++ b/tests/test_token_handler.py @@ -0,0 +1,301 @@ +""" +Tests for the register_token_handler plugin hook. +""" + +from datasette.app import Datasette +from datasette.hookspecs import hookimpl +from datasette.plugins import pm +from datasette.tokens import TokenHandler, TokenRestrictions, SignedTokenHandler +import pytest + + +@pytest.fixture +def datasette(): + return Datasette() + + +@pytest.mark.asyncio +async def test_default_signed_handler_registered(datasette): + """The default SignedTokenHandler should be registered automatically.""" + handlers = datasette._token_handlers() + assert len(handlers) >= 1 + assert any(isinstance(h, SignedTokenHandler) for h in handlers) + assert any(h.name == "signed" for h in handlers) + + +@pytest.mark.asyncio +async def test_create_token_default(datasette): + """create_token() with handler='signed' should create a signed token.""" + token = await datasette.create_token("test_actor", handler="signed") + assert token.startswith("dstok_") + + +@pytest.mark.asyncio +async def test_create_token_with_restrictions(datasette): + """create_token() should handle restriction parameters.""" + token = await datasette.create_token( + "test_actor", + handler="signed", + expires_after=3600, + restrictions=TokenRestrictions().allow_all("view-instance"), + ) + assert token.startswith("dstok_") + # Verify the token contains the expected data + decoded = datasette.unsign(token[len("dstok_") :], namespace="token") + assert decoded["a"] == "test_actor" + assert decoded["d"] == 3600 + assert "_r" in decoded + assert "a" in decoded["_r"] + + +@pytest.mark.asyncio +async def test_verify_token_default(datasette): + """verify_token() should verify signed tokens.""" + token = await datasette.create_token("test_actor", handler="signed") + actor = await datasette.verify_token(token) + assert actor is not None + assert actor["id"] == "test_actor" + assert actor["token"] == "dstok" + + +@pytest.mark.asyncio +async def test_verify_token_unknown_returns_none(datasette): + """verify_token() should return None for unrecognized tokens.""" + result = await datasette.verify_token("unknown_token_format_xyz") + assert result is None + + +@pytest.mark.asyncio +async def test_verify_token_bad_signature_returns_none(datasette): + """verify_token() should return None for tokens with bad signatures.""" + result = await datasette.verify_token("dstok_tampered_data_here") + assert result is None + + +@pytest.mark.asyncio +async def test_create_token_with_named_handler(datasette): + """create_token(handler='signed') should select the signed handler.""" + token = await datasette.create_token("test_actor", handler="signed") + assert token.startswith("dstok_") + + +@pytest.mark.asyncio +async def test_create_token_unknown_handler_raises(datasette): + """create_token(handler='nonexistent') should raise ValueError.""" + with pytest.raises(ValueError, match="Token handler 'nonexistent' not found"): + await datasette.create_token("test_actor", handler="nonexistent") + + +@pytest.mark.asyncio +async def test_custom_token_handler(datasette): + """A custom token handler should be usable for both create and verify.""" + + class CustomHandler(TokenHandler): + name = "custom" + + async def create_token(self, datasette, actor_id, **kwargs): + return f"custom_{actor_id}" + + async def verify_token(self, datasette, token): + if token.startswith("custom_"): + return {"id": token[len("custom_") :], "token": "custom"} + return None + + class Plugin: + __name__ = "CustomTokenPlugin" + + @staticmethod + @hookimpl + def register_token_handler(datasette): + return CustomHandler() + + pm.register(Plugin(), name="test_custom_handler") + try: + handlers = datasette._token_handlers() + assert any(h.name == "custom" for h in handlers) + + # Create with custom handler + token = await datasette.create_token("alice", handler="custom") + assert token == "custom_alice" + + # Verify custom token + actor = await datasette.verify_token("custom_alice") + assert actor is not None + assert actor["id"] == "alice" + assert actor["token"] == "custom" + + # Signed tokens should still work + signed_token = await datasette.create_token("bob", handler="signed") + assert signed_token.startswith("dstok_") + actor = await datasette.verify_token(signed_token) + assert actor["id"] == "bob" + finally: + pm.unregister(name="test_custom_handler") + + +@pytest.mark.asyncio +async def test_verify_token_tries_all_handlers(datasette): + """verify_token() should try each handler until one matches.""" + + class HandlerA(TokenHandler): + name = "handler_a" + + async def create_token(self, datasette, actor_id, **kwargs): + return f"a_{actor_id}" + + async def verify_token(self, datasette, token): + if token.startswith("a_"): + return {"id": token[2:], "token": "handler_a"} + return None + + class HandlerB(TokenHandler): + name = "handler_b" + + async def create_token(self, datasette, actor_id, **kwargs): + return f"b_{actor_id}" + + async def verify_token(self, datasette, token): + if token.startswith("b_"): + return {"id": token[2:], "token": "handler_b"} + return None + + class PluginA: + __name__ = "PluginA" + + @staticmethod + @hookimpl + def register_token_handler(datasette): + return HandlerA() + + class PluginB: + __name__ = "PluginB" + + @staticmethod + @hookimpl + def register_token_handler(datasette): + return HandlerB() + + pm.register(PluginA(), name="test_handler_a") + pm.register(PluginB(), name="test_handler_b") + try: + # Both handler tokens should verify + actor_a = await datasette.verify_token("a_alice") + assert actor_a is not None + assert actor_a["id"] == "alice" + assert actor_a["token"] == "handler_a" + + actor_b = await datasette.verify_token("b_bob") + assert actor_b is not None + assert actor_b["id"] == "bob" + assert actor_b["token"] == "handler_b" + + # Unknown token should return None + assert await datasette.verify_token("c_charlie") is None + finally: + pm.unregister(name="test_handler_a") + pm.unregister(name="test_handler_b") + + +@pytest.mark.asyncio +async def test_token_handler_via_http(datasette): + """Default signed tokens should work through HTTP auth.""" + token = await datasette.create_token("http_user", handler="signed") + response = await datasette.client.get( + "/-/actor.json", + headers={"Authorization": f"Bearer {token}"}, + ) + assert response.status_code == 200 + actor = response.json()["actor"] + assert actor["id"] == "http_user" + assert actor["token"] == "dstok" + + +@pytest.mark.asyncio +async def test_custom_handler_via_http(datasette): + """Custom handler tokens should work through HTTP auth.""" + + class CustomHandler(TokenHandler): + name = "custom_http" + + async def create_token(self, datasette, actor_id, **kwargs): + return f"chttp_{actor_id}" + + async def verify_token(self, datasette, token): + if token.startswith("chttp_"): + return {"id": token[len("chttp_") :], "token": "custom_http"} + return None + + class Plugin: + __name__ = "CustomHTTPPlugin" + + @staticmethod + @hookimpl + def register_token_handler(datasette): + return CustomHandler() + + pm.register(Plugin(), name="test_custom_http") + try: + token = await datasette.create_token("web_user", handler="custom_http") + response = await datasette.client.get( + "/-/actor.json", + headers={"Authorization": f"Bearer {token}"}, + ) + assert response.status_code == 200 + actor = response.json()["actor"] + assert actor["id"] == "web_user" + assert actor["token"] == "custom_http" + finally: + pm.unregister(name="test_custom_http") + + +@pytest.mark.asyncio +async def test_token_handler_base_class_raises(): + """TokenHandler base class methods should raise NotImplementedError.""" + handler = TokenHandler() + ds = Datasette() + with pytest.raises(NotImplementedError): + await handler.create_token(ds, "test") + with pytest.raises(NotImplementedError): + await handler.verify_token(ds, "test") + + +@pytest.mark.asyncio +async def test_restrictions_round_trip(datasette): + """Tokens with database/resource restrictions should round-trip correctly.""" + restrictions = ( + TokenRestrictions() + .allow_all("view-instance") + .allow_database("docs", "view-query") + .allow_resource("docs", "attachments", "insert-row") + ) + token = await datasette.create_token( + "test_actor", handler="signed", restrictions=restrictions + ) + actor = await datasette.verify_token(token) + assert actor is not None + assert actor["id"] == "test_actor" + assert actor["_r"]["a"] == ["view-instance"] + assert actor["_r"]["d"] == {"docs": ["view-query"]} + assert actor["_r"]["r"] == {"docs": {"attachments": ["insert-row"]}} + + +@pytest.mark.asyncio +async def test_expires_after_round_trip(datasette): + """Tokens with expires_after should include token_expires in the actor.""" + token = await datasette.create_token( + "test_actor", handler="signed", expires_after=3600 + ) + actor = await datasette.verify_token(token) + assert actor is not None + assert actor["id"] == "test_actor" + assert "token_expires" in actor + + +@pytest.mark.asyncio +async def test_signed_tokens_disabled(): + """create_token and verify_token should fail/skip when signed tokens are disabled.""" + ds = Datasette(settings={"allow_signed_tokens": False}) + with pytest.raises(ValueError, match="Signed tokens are not enabled"): + await ds.create_token("test_actor", handler="signed") + # verify_token should return None rather than raising + assert await ds.verify_token("dstok_anything") is None From 24d801b7f799912cb4eb897a97e4f4a9fe76b966 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 16:33:27 -0800 Subject: [PATCH 092/299] Respect metadata-defined facet ordering in sorted_facet_results (#2648) * Preserve metadata-defined facet ordering on table pages When facets are explicitly defined in table metadata/config, they now appear in the order specified in the configuration rather than being sorted by result count. Request-added facets still appear after metadata-defined facets, sorted by count as before. * Document metadata-defined facet ordering behavior * Apply black formatting https://claude.ai/code/session_01PbSHtjsUpNk3Fx7xjvVqDb --- datasette/views/table.py | 34 ++++++++++++++++++++++++++----- docs/facets.rst | 2 ++ tests/test_facets.py | 44 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 71 insertions(+), 9 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 594e925e..e1e5507f 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1580,11 +1580,35 @@ async def table_view_data( ] async def extra_sorted_facet_results(extra_facet_results): - return sorted( - extra_facet_results["results"].values(), - key=lambda f: (len(f["results"]), f["name"]), - reverse=True, - ) + facet_configs = table_metadata.get("facets", []) + if facet_configs: + # Build ordered list of facet names from metadata config + metadata_facet_names = [] + for fc in facet_configs: + if isinstance(fc, str): + metadata_facet_names.append(fc) + elif isinstance(fc, dict): + metadata_facet_names.append(list(fc.values())[0]) + metadata_order = {name: i for i, name in enumerate(metadata_facet_names)} + metadata_facets = [] + request_facets = [] + for f in extra_facet_results["results"].values(): + if f["name"] in metadata_order: + metadata_facets.append(f) + else: + request_facets.append(f) + metadata_facets.sort(key=lambda f: metadata_order[f["name"]]) + request_facets.sort( + key=lambda f: (len(f["results"]), f["name"]), + reverse=True, + ) + return metadata_facets + request_facets + else: + return sorted( + extra_facet_results["results"].values(), + key=lambda f: (len(f["results"]), f["name"]), + reverse=True, + ) async def extra_table_definition(): return await db.get_table_definition(table_name) diff --git a/docs/facets.rst b/docs/facets.rst index 15fe7227..2a135b69 100644 --- a/docs/facets.rst +++ b/docs/facets.rst @@ -153,6 +153,8 @@ Here's an example that turns on faceting by default for the ``qLegalStatus`` col Facets defined in this way will always be shown in the interface and returned in the API, regardless of the ``_facet`` arguments passed to the view. +Facets defined in metadata will be displayed in the order they are listed in the configuration. Any additional facets added via query string parameters (e.g. ``?_facet=column_name``) will appear after the metadata-defined facets, sorted by the number of unique values. + You can specify :ref:`array ` or :ref:`date ` facets in metadata using JSON objects with a single key of ``array`` or ``date`` and a value specifying the column, like this: .. [[[cog diff --git a/tests/test_facets.py b/tests/test_facets.py index a2b505ec..8c22ffce 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -623,12 +623,48 @@ def test_other_types_of_facet_in_metadata(): } ) as client: response = client.get("/fixtures/facetable") - for fragment in ( - "created (date)\n", - "tags (array)\n", + fragments = ( "state\n", - ): + "tags (array)\n", + "created (date)\n", + ) + for fragment in fragments: assert fragment in response.text + # Verify they appear in the metadata-defined order + positions = [response.text.index(f) for f in fragments] + assert positions == sorted( + positions + ), "Facets should appear in metadata-defined order" + + +def test_metadata_facet_ordering(): + with make_app_client( + metadata={ + "databases": { + "fixtures": { + "tables": { + "facetable": { + "facets": ["state", {"array": "tags"}, {"date": "created"}] + } + } + } + } + } + ) as client: + # JSON response should have facets in the metadata-defined order + response = client.get("/fixtures/facetable.json?_extra=sorted_facet_results") + data = response.json + facet_names = [f["name"] for f in data["sorted_facet_results"]] + assert facet_names == ["state", "tags", "created"] + + # With an additional request-based facet, metadata facets come first + # in their defined order, followed by request-based facets + response2 = client.get( + "/fixtures/facetable.json?_extra=sorted_facet_results&_facet=_city_id" + ) + data2 = response2.json + facet_names2 = [f["name"] for f in data2["sorted_facet_results"]] + assert facet_names2 == ["state", "tags", "created", "_city_id"] @pytest.mark.asyncio From 2bc1dd2275978e75622c5764729a4273ebac957e Mon Sep 17 00:00:00 2001 From: Daniel Bates Date: Wed, 25 Feb 2026 16:46:29 -0800 Subject: [PATCH 093/299] Fix --reload interpreting 'serve' command as a file argument (#2646) When hupper spawns the worker process, it calls the function specified by worker_path directly. Using "datasette.cli.serve" causes Click to parse sys.argv without going through the CLI group, so the literal word "serve" from the original command gets treated as a positional file argument. Change the worker path to "datasette.cli.cli" so the worker process goes through the Click group dispatcher, which properly recognizes "serve" as a subcommand and strips it from the argument list. Closes #2123 Co-authored-by: Claude Opus 4.6 Co-authored-by: Simon Willison --- datasette/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/cli.py b/datasette/cli.py index b473fbb7..db777fe8 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -547,7 +547,7 @@ def serve( if reload: import hupper - reloader = hupper.start_reloader("datasette.cli.serve") + reloader = hupper.start_reloader("datasette.cli.cli") if immutable: reloader.watch_files(immutable) if config: From 1246c6576bb2f1ba9dc5c7d9811427d00d440976 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 16:49:14 -0800 Subject: [PATCH 094/299] Release 1.0a25 Refs #2636, #2641, #2646, #2647, #2650 --- docs/changelog.rst | 41 +++++++++++++++++++++++++++++++++++++++++ docs/contributing.rst | 1 + docs/upgrade-1.0a20.md | 1 - docs/upgrade_guide.md | 1 + 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 67ceeece..c0467793 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,47 @@ Changelog ========= +.. _v1_0_a25: + +1.0a25 (2026-02-25) +------------------- + +``write_wrapper`` plugin hook for intercepting write operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A new :ref:`write_wrapper() ` plugin hook allows plugins to intercept and wrap database write operations. (`#2636 `__) + +Plugins implement the hook as a generator-based context manager: + +.. code-block:: python + + @hookimpl + def write_wrapper(datasette, database, request): + def wrapper(conn): + # Setup code runs before the write + yield + # Cleanup code runs after the write + + return wrapper + +``register_token_handler()`` plugin hook for custom API token backends +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A new :ref:`register_token_handler() ` plugin hook allows plugins to provide custom token backends for API authentication. (`#2650 `__) + +This includes a **backwards incompatible change**: the ``datasette.create_token()`` internal method is now an ``async`` method. Consult the :ref:`upgrade guide ` for details on how to update your code. + +``render_cell()`` now receives a ``pks`` parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :ref:`render_cell() ` plugin hook now receives a ``pks`` parameter containing the list of primary key column names for the table being rendered. This avoids plugins needing to make redundant async calls to look up primary keys. (`#2641 `__) + +Other changes +~~~~~~~~~~~~~ + +- Facets defined in metadata now preserve their configured order, instead of being sorted by result count. Request-based facets added via the ``_facet`` parameter are still sorted by result count and appear after metadata-defined facets. (:issue:`2647`) +- Fixed ``--reload`` incorrectly interpreting the ``serve`` command as a file argument. Thanks, `Daniel Bates `__. (`#2646 `__) + .. _v1_0_a24: 1.0a24 (2026-01-29) diff --git a/docs/contributing.rst b/docs/contributing.rst index 3d41a125..635ca60e 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -90,6 +90,7 @@ If you want to change Datasette's Python code you can use the ``--reload`` optio You can also use the ``fixtures.py`` script to recreate the testing version of ``metadata.json`` used by the unit tests. To do that:: uv run python tests/fixtures.py fixtures.db fixtures-metadata.json + Or to output the plugins used by the tests, run this:: uv run python tests/fixtures.py fixtures.db fixtures-metadata.json fixtures-plugins diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index 749d383c..fbc3f4a8 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -2,7 +2,6 @@ orphan: true --- -(upgrade_guide_v1_a20)= # Datasette 1.0a20 plugin upgrade guide Datasette 1.0a20 makes some breaking changes to Datasette's permission system. Plugins need to be updated if they use **any of the following**: diff --git a/docs/upgrade_guide.md b/docs/upgrade_guide.md index 861a8795..b67eb054 100644 --- a/docs/upgrade_guide.md +++ b/docs/upgrade_guide.md @@ -111,6 +111,7 @@ Instead, one should use the following methods on a Datasette class: - {ref}`get_resource_metadata() ` - {ref}`get_column_metadata() ` +(upgrade_guide_v1_a20)= ```{include} upgrade-1.0a20.md :heading-offset: 1 ``` From e4ff5e27d356ca5b3c807e821acedf8c71c37e47 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 16:54:51 -0800 Subject: [PATCH 095/299] Fix RST heading underlin --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index c0467793..1e6a8e90 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -35,7 +35,7 @@ A new :ref:`register_token_handler() ` plugi This includes a **backwards incompatible change**: the ``datasette.create_token()`` internal method is now an ``async`` method. Consult the :ref:`upgrade guide ` for details on how to update your code. ``render_cell()`` now receives a ``pks`` parameter -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The :ref:`render_cell() ` plugin hook now receives a ``pks`` parameter containing the list of primary key column names for the table being rendered. This avoids plugins needing to make redundant async calls to look up primary keys. (`#2641 `__) From 8f0d60236f844a6d12bd1439f57b1b3d65fcad36 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 17:01:03 -0800 Subject: [PATCH 096/299] Bump version for 1.0a25 --- datasette/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index de7585ca..2907e537 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a24" +__version__ = "1.0a25" __version_info__ = tuple(__version__.split(".")) From 1263380ea6b138ac63683edfd525323c6fe8eef9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 25 Feb 2026 20:50:46 -0800 Subject: [PATCH 097/299] Better heading for write_wrapper() --- docs/changelog.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 1e6a8e90..2c9b7170 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,8 +9,8 @@ Changelog 1.0a25 (2026-02-25) ------------------- -``write_wrapper`` plugin hook for intercepting write operations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``write_wrapper()`` plugin hook for intercepting write operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A new :ref:`write_wrapper() ` plugin hook allows plugins to intercept and wrap database write operations. (`#2636 `__) From 97201f067c4f64b00ccf7e02f787d65c767f9bc9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 6 Mar 2026 20:16:50 -0800 Subject: [PATCH 098/299] Row pages link to foreign keys from table display, closes #1592 https://gisthost.github.io/?40813f5b3e4d83c0efe1c09135f84290/index.html Also now shows primary key column first and in bold on that page. --- datasette/views/row.py | 64 ++++++++++++++++++++++++++++++++++++++++-- tests/test_html.py | 32 +++++++++++++++++---- 2 files changed, 88 insertions(+), 8 deletions(-) diff --git a/datasette/views/row.py b/datasette/views/row.py index 9c59cd3b..7cc46368 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -5,12 +5,14 @@ from datasette.resources import TableResource from .base import DataView, BaseView, _error from datasette.utils import ( await_me_maybe, + CustomRow, make_slot_function, to_css_class, escape_sqlite, ) from datasette.plugins import pm import json +import markupsafe import sqlite_utils from .table import display_columns_and_rows, _get_extras @@ -42,13 +44,62 @@ class RowView(DataView): if not rows: raise NotFound(f"Record not found: {pk_values}") + pks = resolved.pks + async def template_data(): + # Reorder columns so primary keys come first + pk_set = set(pks) + pk_cols = [d for d in results.description if d[0] in pk_set] + non_pk_cols = [d for d in results.description if d[0] not in pk_set] + reordered_description = pk_cols + non_pk_cols + reordered_columns = [d[0] for d in reordered_description] + + # Reorder row data to match + reordered_rows = [] + for row in rows: + new_row = CustomRow(reordered_columns) + for col in reordered_columns: + new_row[col] = row[col] + reordered_rows.append(new_row) + + # Expand foreign key columns into dicts so display_columns_and_rows + # renders them as hyperlinks, matching the table view behavior + expanded_rows = reordered_rows + for fk in await db.foreign_keys_for_table(table): + column = fk["column"] + if column not in reordered_columns: + continue + column_index = reordered_columns.index(column) + values = [row[column_index] for row in expanded_rows] + expanded_labels = await self.ds.expand_foreign_keys( + request.actor, database, table, column, values + ) + if expanded_labels: + new_rows = [] + for row in expanded_rows: + new_row = CustomRow(reordered_columns) + for col in reordered_columns: + value = row[col] + if ( + col == column + and (col, value) in expanded_labels + and value is not None + ): + new_row[col] = { + "value": value, + "label": expanded_labels[(col, value)], + } + else: + new_row[col] = value + new_rows.append(new_row) + expanded_rows = new_rows + display_columns, display_rows = await display_columns_and_rows( self.ds, database, table, - results.description, - rows, + reordered_description, + expanded_rows, link_column=False, truncate_cells=0, request=request, @@ -56,6 +107,14 @@ class RowView(DataView): for column in display_columns: column["sortable"] = False + # Bold primary key cell values + for row in display_rows: + for cell in row: + if cell["column"] in pk_set: + cell["value"] = markupsafe.Markup( + "{}".format(cell["value"]) + ) + row_actions = [] for hook in pm.hook.row_actions( datasette=self.ds, @@ -71,6 +130,7 @@ class RowView(DataView): return { "private": private, + "columns": reordered_columns, "foreign_key_tables": await self.foreign_key_tables( database, table, pk_values ), diff --git a/tests/test_html.py b/tests/test_html.py index 757f3e6e..64ae7b2d 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -347,7 +347,7 @@ async def test_row_html_simple_primary_key(ds_client): assert ["id", "content"] == [th.string.strip() for th in table.select("thead th")] assert [ [ - '
    ', + '', '', ] ] == [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] @@ -363,7 +363,7 @@ async def test_row_html_no_primary_key(ds_client): ] expected = [ [ - '', + '', '', '', '', @@ -406,6 +406,26 @@ async def test_row_links_from_other_tables( assert link == expected_link +@pytest.mark.asyncio +async def test_row_foreign_key_links(ds_client): + # Row detail page should render foreign key values as hyperlinks + response = await ds_client.get("/fixtures/foreign_key_references/1") + assert response.status_code == 200 + soup = Soup(response.text, "html.parser") + # foreign_key_with_label=1 references simple_primary_key(id=1, content="hello") + td = soup.find("td", {"class": "col-foreign_key_with_label"}) + a = td.find("a") + assert a is not None, "Expected foreign key value to be a hyperlink" + assert a["href"] == "/fixtures/simple_primary_key/1" + assert a.text == "hello" + # Primary key column should be first and bold + table = soup.find("table") + headers = [th.text.strip() for th in table.select("thead th")] + assert headers[0] == "pk" + first_td = table.select("tbody tr td")[0] + assert first_td.find("strong") is not None, "PK value should be bold" + + @pytest.mark.asyncio @pytest.mark.parametrize( "path,expected", @@ -414,8 +434,8 @@ async def test_row_links_from_other_tables( "/fixtures/compound_primary_key/a,b", [ [ - '', - '', + '', + '', '', ] ], @@ -424,8 +444,8 @@ async def test_row_links_from_other_tables( "/fixtures/compound_primary_key/a~2Fb,~2Ec~2Dd", [ [ - '', - '', + '', + '', '', ] ], From e2c1e81ec9505f02566de840c1dba5ea7b0b121d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Mar 2026 17:45:24 -0700 Subject: [PATCH 099/299] UI for selecting and re-ordering columns on the table page (#2662) New Web Component on table/view page with a dialog for selecting and re-ordering columns. Closes #2661 Refs #1298 --- datasette/static/app.css | 19 + datasette/static/column-chooser.js | 698 ++++++++++++++++++++++++++ datasette/static/navigation-search.js | 13 +- datasette/static/table.js | 58 +++ datasette/templates/table.html | 9 + datasette/views/table.py | 6 + tests/test_html.py | 23 +- tests/test_table_html.py | 63 +++ 8 files changed, 882 insertions(+), 7 deletions(-) create mode 100644 datasette/static/column-chooser.js diff --git a/datasette/static/app.css b/datasette/static/app.css index a7fc7fa3..4183b58e 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -63,6 +63,14 @@ em { } /* end reset */ +/* Modal CSS variables (shared by web components via Shadow DOM) */ +:root { + --modal-backdrop-bg: rgba(0, 0, 0, 0.5); + --modal-backdrop-blur: blur(4px); + --modal-border-radius: 0.75rem; + --modal-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04); + --modal-animation-duration: 0.2s; +} body { margin: 0; @@ -795,6 +803,17 @@ p.zero-results { .filters input.filter-value { width: 140px; } + button.choose-columns-mobile { + display: inline-block; + padding: 0.5rem 1rem; + margin-bottom: 1em; + font-size: 0.9rem; + font-family: inherit; + background: white; + border: 1px solid #ccc; + border-radius: 5px; + cursor: pointer; + } } svg.dropdown-menu-icon { diff --git a/datasette/static/column-chooser.js b/datasette/static/column-chooser.js new file mode 100644 index 00000000..9680398c --- /dev/null +++ b/datasette/static/column-chooser.js @@ -0,0 +1,698 @@ +class ColumnChooser extends HTMLElement { + constructor() { + super(); + this.attachShadow({ mode: "open" }); + + // State + this._items = []; + this._checked = new Set(); + this._savedItems = null; + this._savedChecked = null; + this._onApply = null; + + // Drag state + this._ghost = null; + this._dragSrcIdx = null; + this._dropTargetIdx = null; + this._dropPosition = null; + this._ghostOffX = 0; + this._ghostOffY = 0; + this._autoScrollRAF = null; + this._lastPointerY = 0; + this._lastPointerX = 0; + this._SCROLL_ZONE = 72; + this._SCROLL_SPEED = 0.4; + + // Bound handlers + this._onMove = this._onMove.bind(this); + this._onUp = this._onUp.bind(this); + + this.shadowRoot.innerHTML = ` + + + + +
    + + +
    +
    +
    +
    +
      +
      + +
      + `; + + // DOM refs + this._dialog = this.shadowRoot.querySelector("dialog"); + this._listWrap = this.shadowRoot.getElementById("listWrap"); + this._dragList = this.shadowRoot.getElementById("dragList"); + this._pulseTop = this.shadowRoot.getElementById("pulseTop"); + this._pulseBot = this.shadowRoot.getElementById("pulseBot"); + this._selectAllBtn = this.shadowRoot.getElementById("selectAllBtn"); + this._deselectAllBtn = this.shadowRoot.getElementById("deselectAllBtn"); + this._cancelBtn = this.shadowRoot.getElementById("cancelBtn"); + this._applyBtn = this.shadowRoot.getElementById("applyBtn"); + this._countEl = this.shadowRoot.getElementById("selectedCount"); + this._footerEl = this.shadowRoot.getElementById("footerInfo"); + + // Event listeners + this._selectAllBtn.addEventListener("click", () => this._selectAll()); + this._deselectAllBtn.addEventListener("click", () => this._deselectAll()); + this._cancelBtn.addEventListener("click", () => this._close()); + this._applyBtn.addEventListener("click", () => this._apply()); + this._dialog.addEventListener("click", (e) => { + if (e.target === this._dialog) this._close(); + }); + this._dialog.addEventListener("cancel", (e) => { + e.preventDefault(); + this._close(); + }); + } + + /** + * Open the column chooser dialog. + * @param {Object} opts + * @param {string[]} opts.columns - All available column names, in display order. + * @param {string[]} opts.selected - Column names that should be pre-checked. + * @param {function(string[]): void} opts.onApply - Called with the selected columns in order when Apply is clicked. + */ + open({ columns, selected = [], onApply }) { + this._items = [...columns]; + this._checked = new Set(selected); + this._onApply = onApply || null; + + // Save state for cancel/restore + this._savedItems = [...this._items]; + this._savedChecked = new Set(this._checked); + + this._render(); + this._dialog.showModal(); + } + + // ── Internal methods ── + + _close() { + this._items = this._savedItems ? [...this._savedItems] : this._items; + this._checked = this._savedChecked + ? new Set(this._savedChecked) + : this._checked; + this._dialog.close(); + } + + _selectAll() { + this._items.forEach((col) => this._checked.add(col)); + this._dragList.querySelectorAll('input[type="checkbox"]').forEach((cb) => { + cb.checked = true; + }); + this._updateCounts(); + } + + _deselectAll() { + this._checked.clear(); + this._dragList.querySelectorAll('input[type="checkbox"]').forEach((cb) => { + cb.checked = false; + }); + this._updateCounts(); + } + + _apply() { + const selected = this._items.filter((col) => this._checked.has(col)); + this._dialog.close(); + if (this._onApply) { + this._onApply(selected); + } + } + + _render() { + this._dragList.innerHTML = ""; + this._items.forEach((col, i) => { + const li = document.createElement("li"); + li.className = "drag-item"; + li.dataset.idx = i; + li.innerHTML = ` + + + + + + + + + + + +
      + `; + + li.querySelector("input").addEventListener("change", (e) => { + e.target.checked ? this._checked.add(col) : this._checked.delete(col); + this._updateCounts(); + }); + + li.querySelector(".drag-handle").addEventListener("pointerdown", (e) => + this._startDrag(e, i), + ); + this._dragList.appendChild(li); + }); + + this._updateCounts(); + } + + _updateCounts() { + const n = this._checked.size; + this._countEl.textContent = `${n} of ${this._items.length} selected`; + this._footerEl.textContent = `${this._items.length} columns`; + } + + // ── Drag engine ── + + _startDrag(e, idx) { + e.preventDefault(); + this._dragSrcIdx = idx; + + const srcEl = this._dragList.children[idx]; + const rect = srcEl.getBoundingClientRect(); + + this._ghostOffX = e.clientX - rect.left; + this._ghostOffY = e.clientY - rect.top; + + // Build ghost inside shadow DOM + this._ghost = document.createElement("div"); + this._ghost.className = "drag-ghost"; + this._ghost.style.width = rect.width + "px"; + this._ghost.style.height = rect.height + "px"; + this._ghost.innerHTML = srcEl.innerHTML; + this._ghost.querySelector(".drop-indicator")?.remove(); + const h = this._ghost.querySelector(".drag-handle"); + if (h) h.style.color = "var(--accent)"; + this.shadowRoot.appendChild(this._ghost); + + srcEl.classList.add("is-dragging"); + this._positionGhost(e.clientX, e.clientY); + + document.addEventListener("pointermove", this._onMove); + document.addEventListener("pointerup", this._onUp); + document.addEventListener("pointercancel", this._onUp); + } + + _positionGhost(cx, cy) { + this._ghost.style.left = cx - this._ghostOffX + "px"; + this._ghost.style.top = cy - this._ghostOffY + "px"; + } + + _onMove(e) { + this._lastPointerX = e.clientX; + this._lastPointerY = e.clientY; + this._positionGhost(e.clientX, e.clientY); + this._updateDropTarget(e.clientY); + this._updateAutoScroll(e.clientY); + } + + _onUp() { + document.removeEventListener("pointermove", this._onMove); + document.removeEventListener("pointerup", this._onUp); + document.removeEventListener("pointercancel", this._onUp); + + this._stopAutoScroll(); + + const noMove = + this._dropTargetIdx === null || this._dropTargetIdx === this._dragSrcIdx; + this._clearDropIndicators(); + + let dest = null; + if (!noMove) { + const moved = this._items.splice(this._dragSrcIdx, 1)[0]; + dest = this._dropTargetIdx; + if (this._dropPosition === "after") dest++; + if (dest > this._dragSrcIdx) dest--; + this._items.splice(dest, 0, moved); + } + + this._dragSrcIdx = null; + this._dropTargetIdx = null; + this._dropPosition = null; + + const g = this._ghost; + this._ghost = null; + + if (noMove) { + if (g) g.remove(); + this._render(); + return; + } + + this._render(); + + if (g && dest !== null) { + const landedEl = this._dragList.children[dest]; + if (landedEl) { + landedEl.style.opacity = "0"; + const r = landedEl.getBoundingClientRect(); + g.getBoundingClientRect(); + g.style.transition = + "left 0.15s cubic-bezier(0.22, 1, 0.36, 1), top 0.15s cubic-bezier(0.22, 1, 0.36, 1), box-shadow 0.15s, opacity 0.1s 0.1s"; + g.style.left = r.left + "px"; + g.style.top = r.top + "px"; + g.style.boxShadow = "0 1px 4px rgba(0,0,0,0.08)"; + g.style.opacity = "0"; + setTimeout(() => { + g.remove(); + if (landedEl) landedEl.style.opacity = ""; + }, 160); + } else { + g.remove(); + } + } else if (g) { + g.remove(); + } + } + + _updateDropTarget(clientY) { + this._clearDropIndicators(); + const listItems = [ + ...this._dragList.querySelectorAll(".drag-item:not(.is-dragging)"), + ]; + if (!listItems.length) return; + + let best = null, + bestDist = Infinity; + listItems.forEach((li) => { + const r = li.getBoundingClientRect(); + const mid = r.top + r.height / 2; + const dist = Math.abs(clientY - mid); + if (dist < bestDist) { + bestDist = dist; + best = li; + } + }); + + if (!best) return; + const r = best.getBoundingClientRect(); + const mid = r.top + r.height / 2; + const above = clientY < mid; + const indic = best.querySelector(".drop-indicator"); + + this._dropTargetIdx = parseInt(best.dataset.idx); + this._dropPosition = above ? "before" : "after"; + + if (indic) { + indic.className = "drop-indicator " + (above ? "top" : "bottom"); + } + } + + _clearDropIndicators() { + this._dragList.querySelectorAll(".drop-indicator").forEach((el) => { + el.className = "drop-indicator"; + }); + } + + _updateAutoScroll(clientY) { + const rect = this._listWrap.getBoundingClientRect(); + const relY = clientY - rect.top; + const distTop = relY; + const distBot = rect.height - relY; + + const inTop = distTop < this._SCROLL_ZONE && distTop >= 0; + const inBot = distBot < this._SCROLL_ZONE && distBot >= 0; + + this._pulseTop.classList.toggle("active", inTop); + this._pulseBot.classList.toggle("active", inBot); + + if ((inTop || inBot) && !this._autoScrollRAF) { + let lastTime = null; + const loop = (ts) => { + if (!this._ghost) { + this._stopAutoScroll(); + return; + } + if (lastTime !== null) { + const dt = ts - lastTime; + const rect2 = this._listWrap.getBoundingClientRect(); + const relY2 = this._lastPointerY - rect2.top; + const dTop = relY2; + const dBot = rect2.height - relY2; + + if (dTop < this._SCROLL_ZONE && dTop >= 0) { + const factor = 1 - dTop / this._SCROLL_ZONE; + this._listWrap.scrollTop -= this._SCROLL_SPEED * dt * factor * 2.5; + } else if (dBot < this._SCROLL_ZONE && dBot >= 0) { + const factor = 1 - dBot / this._SCROLL_ZONE; + this._listWrap.scrollTop += this._SCROLL_SPEED * dt * factor * 2.5; + } else { + this._stopAutoScroll(); + return; + } + this._updateDropTarget(this._lastPointerY); + } + lastTime = ts; + this._autoScrollRAF = requestAnimationFrame(loop); + }; + this._autoScrollRAF = requestAnimationFrame(loop); + } + + if (!inTop && !inBot) this._stopAutoScroll(); + } + + _stopAutoScroll() { + if (this._autoScrollRAF) { + cancelAnimationFrame(this._autoScrollRAF); + this._autoScrollRAF = null; + } + this._pulseTop.classList.remove("active"); + this._pulseBot.classList.remove("active"); + } +} + +customElements.define("column-chooser", ColumnChooser); diff --git a/datasette/static/navigation-search.js b/datasette/static/navigation-search.js index 48de5c4f..95e7dfc5 100644 --- a/datasette/static/navigation-search.js +++ b/datasette/static/navigation-search.js @@ -19,19 +19,20 @@ class NavigationSearch extends HTMLElement { dialog { border: none; - border-radius: 0.75rem; + border-radius: var(--modal-border-radius, 0.75rem); padding: 0; max-width: 90vw; width: 600px; max-height: 80vh; - box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04); - animation: slideIn 0.2s ease-out; + box-shadow: var(--modal-shadow, 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04)); + animation: slideIn var(--modal-animation-duration, 0.2s) ease-out; } dialog::backdrop { - background: rgba(0, 0, 0, 0.5); - backdrop-filter: blur(4px); - animation: fadeIn 0.2s ease-out; + background: var(--modal-backdrop-bg, rgba(0, 0, 0, 0.5)); + backdrop-filter: var(--modal-backdrop-blur, blur(4px)); + -webkit-backdrop-filter: var(--modal-backdrop-blur, blur(4px)); + animation: fadeIn var(--modal-animation-duration, 0.2s) ease-out; } @keyframes slideIn { diff --git a/datasette/static/table.js b/datasette/static/table.js index 0caeeb91..c26dda5a 100644 --- a/datasette/static/table.js +++ b/datasette/static/table.js @@ -4,6 +4,7 @@ var DROPDOWN_HTML = `
      {{ action.name }} {% if action.abbr %}{{ action.abbr }}{% endif %} {{ action.description or "" }}{{ action.resource_class }}{% if action.resource_class %}{{ action.resource_class }}{% endif %} {% if action.takes_parent %}✓{% endif %} {% if action.takes_child %}✓{% endif %} {% if action.also_requires %}{{ action.also_requires }}{% endif %}.c-dcde{"row": {"pk1": "d", "pk2": "e", "content": "RENDER_CELL_DEMO"}, "column": "content", "table": "compound_primary_key", "database": "fixtures", "pks": ["pk1", "pk2"], "config": {"depth": "database"}} + assert """ https://twitter.com/simonw -11hello111a1b1ababca/b.c-da/b.c-dc
      [^\/\.]+)(\.(?P\w+))?$", diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 8b405da5..7c251e2c 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -67,6 +67,7 @@ {% if not hide_sql %}{% endif %} {{ show_hide_hidden }} + {% if save_query_url %}Save query{% endif %} {% if canned_query and edit_sql_url %}Edit SQL{% endif %}

      diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html new file mode 100644 index 00000000..0e6a7b37 --- /dev/null +++ b/datasette/templates/query_create.html @@ -0,0 +1,71 @@ +{% extends "base.html" %} + +{% block title %}Create query{% endblock %} + +{% block extra_head %} +{{- super() -}} +{% include "_codemirror.html" %} +{% endblock %} + +{% block body_class %}query-create db-{{ database|to_css_class }}{% endblock %} + +{% block crumbs %} +{{ crumbs.nav(request=request, database=database) }} +{% endblock %} + +{% block content %} + +

      Create query

      + +
      +

      +

      +


      +

      + + +

      +

      +

      + {% if can_publish %} +

      + {% endif %} + +

      Analysis

      + {% if analysis_error %} +

      {{ analysis_error }}

      + {% elif analysis_rows %} +
      + + + + + + + + + + + + {% for row in analysis_rows %} + + + + + + + + + {% endfor %} + +
      OperationDatabaseTablerequired permissionAllowedSource
      {{ row.operation }}{{ row.database }}{{ row.table }}{{ row.required_permission }}{% if row.allowed is none %}{% elif row.allowed %}yes{% else %}no{% endif %}{{ row.source or "" }}
      + {% else %} +

      Analysis will show each affected table and required permission.

      + {% endif %} + +

      + + +{% include "_codemirror_foot.html" %} + +{% endblock %} diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index e679ae76..891ee913 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -241,6 +241,14 @@ async def _build_single_action_sql( "),", ] ) + else: + query_parts.extend( + [ + "anon_rules AS (", + " SELECT NULL AS parent, NULL AS child, 0 AS allow, NULL AS reason WHERE 0", + "),", + ] + ) # Continue with the cascading logic query_parts.extend( diff --git a/datasette/views/database.py b/datasette/views/database.py index 0cf93832..f40c434c 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -12,7 +12,7 @@ import textwrap from datasette.events import AlterTableEvent, CreateTableEvent, InsertRowsEvent from datasette.database import QueryInterrupted -from datasette.resources import DatabaseResource, QueryResource +from datasette.resources import DatabaseResource, QueryResource, TableResource from datasette.utils import ( add_cors_headers, await_me_maybe, @@ -302,6 +302,9 @@ class QueryContext(Context): allow_execute_sql: bool = field( metadata={"help": "Boolean indicating if custom SQL can be executed"} ) + save_query_url: str = field( + metadata={"help": "URL to save the current arbitrary SQL as a query"} + ) tables: list = field(metadata={"help": "List of table objects in the database"}) named_parameter_values: dict = field( metadata={"help": "Dictionary of parameter names/values"} @@ -417,6 +420,510 @@ async def database_download(request, datasette): ) +_query_name_re = re.compile(r"^[^/\.\n]+$") + +_query_fields = { + "sql", + "title", + "description", + "description_html", + "hide_sql", + "fragment", + "parameters", + "params", + "published", + "on_success_message", + "on_success_message_sql", + "on_success_redirect", + "on_error_message", + "on_error_redirect", +} + +_query_create_fields = _query_fields | {"name", "mode", "csrftoken"} +_query_update_fields = _query_fields +_query_write_fields = { + "on_success_message", + "on_success_message_sql", + "on_success_redirect", + "on_error_message", + "on_error_redirect", +} + + +class QueryValidationError(Exception): + def __init__(self, message, status=400): + self.message = message + self.status = status + + +def _actor_id(actor): + if isinstance(actor, dict): + return actor.get("id") + return None + + +def _as_bool(value): + if isinstance(value, bool): + return value + if value is None: + return False + if isinstance(value, int): + return bool(value) + if isinstance(value, str): + return value.lower() in {"1", "true", "t", "yes", "on"} + return bool(value) + + +def _derived_query_parameters(sql): + parameters = [] + seen = set() + for parameter in derive_named_parameters(sql): + if parameter.startswith("_"): + raise QueryValidationError("Magic parameters are not allowed") + if parameter not in seen: + parameters.append(parameter) + seen.add(parameter) + return parameters + + +def _coerce_query_parameters(value, derived): + if value is None: + return derived + if isinstance(value, str): + parameters = [ + parameter.strip() + for parameter in re.split(r"[\s,]+", value) + if parameter.strip() + ] + elif isinstance(value, list): + parameters = value + else: + raise QueryValidationError("parameters must be a list of strings") + if not all(isinstance(parameter, str) for parameter in parameters): + raise QueryValidationError("parameters must be a list of strings") + if any(parameter.startswith("_") for parameter in parameters): + raise QueryValidationError("Magic parameters are not allowed") + if set(parameters) != set(derived): + raise QueryValidationError("parameters must match SQL named parameters") + return parameters + + +async def _json_or_form_payload(request): + content_type = request.headers.get("content-type", "") + if content_type.startswith("application/json"): + body = await request.post_body() + try: + return json.loads(body or b"{}"), True + except json.JSONDecodeError as e: + raise QueryValidationError("Invalid JSON: {}".format(e)) + return await request.post_vars(), False + + +async def _check_query_name(db, name, *, existing=False): + if not name or not isinstance(name, str): + raise QueryValidationError("Query name is required") + if not _query_name_re.match(name): + raise QueryValidationError("Invalid query name") + if not existing and (await db.table_exists(name) or await db.view_exists(name)): + raise QueryValidationError("Query name conflicts with a table or view") + + +async def _analyze_user_query(datasette, db, sql, *, actor, published): + if not sql or not isinstance(sql, str): + raise QueryValidationError("SQL is required") + derived = _derived_query_parameters(sql) + params = {parameter: "" for parameter in derived} + try: + analysis = await db.analyze_sql(sql, params) + except sqlite3.DatabaseError as ex: + raise QueryValidationError("Could not analyze query: {}".format(ex)) from ex + + is_write = any( + access.operation in {"insert", "update", "delete"} + for access in analysis.table_accesses + ) + if is_write: + if published: + raise QueryValidationError("Writable queries cannot be published") + try: + await datasette.ensure_query_write_permissions(db.name, sql, actor=actor) + except Forbidden as ex: + raise QueryValidationError(str(ex), status=403) from ex + else: + try: + validate_sql_select(sql) + except InvalidSql as ex: + raise QueryValidationError(str(ex)) from ex + return is_write, derived, analysis + + +def _analysis_rows(analysis): + write_actions = { + "insert": "insert-row", + "update": "update-row", + "delete": "delete-row", + } + return [ + { + "operation": access.operation, + "database": access.database, + "table": access.table, + "required_permission": write_actions.get(access.operation, ""), + "source": access.source, + } + for access in analysis.table_accesses + ] + + +def _apply_query_data_types(data): + typed = dict(data) + for key in ("hide_sql", "published"): + if key in typed: + typed[key] = _as_bool(typed[key]) + return typed + + +async def _prepare_query_create(datasette, request, db, data): + invalid_keys = set(data) - _query_create_fields + if invalid_keys: + raise QueryValidationError("Invalid keys: {}".format(", ".join(invalid_keys))) + + data = _apply_query_data_types(data) + name = data.get("name") + await _check_query_name(db, name) + if await datasette.get_query(db.name, name) is not None: + raise QueryValidationError("Query already exists") + + published = _as_bool(data.get("published")) + is_write, derived, analysis = await _analyze_user_query( + datasette, + db, + data.get("sql"), + actor=request.actor, + published=published, + ) + if published and not await datasette.allowed( + action="publish-query", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + raise QueryValidationError("Permission denied: need publish-query", status=403) + if not is_write and any(data.get(field) for field in _query_write_fields): + raise QueryValidationError("Writable query fields require writable SQL") + + parameters = _coerce_query_parameters( + data.get("parameters", data.get("params")), + derived, + ) + return { + "name": name, + "sql": data["sql"], + "title": data.get("title"), + "description": data.get("description"), + "description_html": data.get("description_html"), + "hide_sql": _as_bool(data.get("hide_sql")), + "fragment": data.get("fragment"), + "parameters": parameters, + "is_write": is_write, + "published": published, + "source": "user", + "owner_id": _actor_id(request.actor), + "on_success_message": data.get("on_success_message"), + "on_success_message_sql": data.get("on_success_message_sql"), + "on_success_redirect": data.get("on_success_redirect"), + "on_error_message": data.get("on_error_message"), + "on_error_redirect": data.get("on_error_redirect"), + "analysis": analysis, + } + + +async def _prepare_query_update(datasette, request, db, existing, update): + invalid_keys = set(update) - _query_update_fields + if invalid_keys: + raise QueryValidationError("Invalid keys: {}".format(", ".join(invalid_keys))) + + update = _apply_query_data_types(update) + sql = update.get("sql", existing["sql"]) + published = update.get("published", existing["published"]) + query_is_write = existing["is_write"] + derived = _derived_query_parameters(sql) + parameters = None + + if "sql" in update: + query_is_write, derived, _ = await _analyze_user_query( + datasette, + db, + sql, + actor=request.actor, + published=published, + ) + elif published and query_is_write: + raise QueryValidationError("Writable queries cannot be published") + if published and not existing["published"]: + if not await datasette.allowed( + action="publish-query", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + raise QueryValidationError( + "Permission denied: need publish-query", status=403 + ) + + if "parameters" in update or "params" in update: + parameters = _coerce_query_parameters( + update.get("parameters", update.get("params")), + derived, + ) + elif "sql" in update: + parameters = derived + + if not query_is_write and any(update.get(field) for field in _query_write_fields): + raise QueryValidationError("Writable query fields require writable SQL") + + field_values = { + "sql": sql, + "title": update.get("title"), + "description": update.get("description"), + "description_html": update.get("description_html"), + "hide_sql": update.get("hide_sql"), + "fragment": update.get("fragment"), + "parameters": parameters, + "is_write": query_is_write, + "published": published, + "on_success_message": update.get("on_success_message"), + "on_success_message_sql": update.get("on_success_message_sql"), + "on_success_redirect": update.get("on_success_redirect"), + "on_error_message": update.get("on_error_message"), + "on_error_redirect": update.get("on_error_redirect"), + } + update_kwargs = {} + for field, value in field_values.items(): + if field in update: + update_kwargs[field] = value + if parameters is not None: + update_kwargs["parameters"] = parameters + if "sql" in update: + update_kwargs["is_write"] = query_is_write + return update_kwargs + + +class QueryListView(BaseView): + name = "query-list" + + async def get(self, request): + db = await self.ds.resolve_database(request) + page = await self.ds.allowed_resources( + "view-query", + request.actor, + parent=db.name, + limit=1000, + ) + all_queries = await self.ds.get_queries(db.name) + queries = [ + all_queries[resource.child] + for resource in page.resources + if resource.child in all_queries + ] + return Response.json({"ok": True, "database": db.name, "queries": queries}) + + +class QueryCreateView(BaseView): + name = "query-create" + has_json_alternate = False + + async def get(self, request): + db = await self.ds.resolve_database(request) + await self.ds.ensure_permission( + action="execute-sql", + resource=DatabaseResource(db.name), + actor=request.actor, + ) + await self.ds.ensure_permission( + action="insert-query", + resource=DatabaseResource(db.name), + actor=request.actor, + ) + + sql = request.args.get("sql") or "" + analysis_error = None + analysis_rows = [] + parameter_names = [] + if sql: + try: + parameter_names = _derived_query_parameters(sql) + params = {parameter: "" for parameter in parameter_names} + analysis = await db.analyze_sql(sql, params) + rows = _analysis_rows(analysis) + for row in rows: + permission = row["required_permission"] + if permission: + row["allowed"] = await self.ds.allowed( + action=permission, + resource=TableResource(row["database"], row["table"]), + actor=request.actor, + ) + else: + row["allowed"] = None + analysis_rows = rows + except (QueryValidationError, sqlite3.DatabaseError) as ex: + analysis_error = getattr(ex, "message", str(ex)) + + return await self.render( + ["query_create.html"], + request, + { + "database": db.name, + "database_color": db.color, + "sql": sql, + "parameter_names": parameter_names, + "can_publish": await self.ds.allowed( + action="publish-query", + resource=DatabaseResource(db.name), + actor=request.actor, + ), + "analysis_error": analysis_error, + "analysis_rows": analysis_rows, + "save_disabled": bool( + analysis_error + or any(row["allowed"] is False for row in analysis_rows) + ), + }, + ) + + +class QueryInsertView(BaseView): + name = "query-insert" + + async def post(self, request): + db = await self.ds.resolve_database(request) + if not await self.ds.allowed( + action="execute-sql", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + return _error(["Permission denied: need execute-sql"], 403) + if not await self.ds.allowed( + action="insert-query", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + return _error(["Permission denied: need insert-query"], 403) + + try: + data, is_json = await _json_or_form_payload(request) + if not isinstance(data, dict): + raise QueryValidationError("JSON must be a dictionary") + query_data = data.get("query") if is_json else data + if not isinstance(query_data, dict): + raise QueryValidationError("JSON must contain a query dictionary") + prepared = await _prepare_query_create(self.ds, request, db, query_data) + except QueryValidationError as ex: + return _error([ex.message], ex.status) + + prepared.pop("analysis") + name = prepared.pop("name") + try: + await self.ds.add_query(db.name, name, replace=False, **prepared) + except sqlite3.IntegrityError as ex: + return _error([str(ex)], 400) + + query = await self.ds.get_query(db.name, name) + if is_json: + return Response.json({"ok": True, "query": query}, status=201) + self.ds.add_message(request, "Query saved", self.ds.INFO) + return Response.redirect(self.ds.urls.path(self.ds.urls.table(db.name, name))) + + +class QueryDefinitionView(BaseView): + name = "query-definition" + + async def get(self, request): + db = await self.ds.resolve_database(request) + query_name = tilde_decode(request.url_vars["query"]) + query = await self.ds.get_query(db.name, query_name) + if query is None: + return _error(["Query not found: {}".format(query_name)], 404) + if not await self.ds.allowed( + action="view-query", + resource=QueryResource(db.name, query_name), + actor=request.actor, + ): + return _error(["Permission denied"], 403) + return Response.json({"ok": True, "query": query}) + + +class QueryUpdateView(BaseView): + name = "query-update" + + async def post(self, request): + db = await self.ds.resolve_database(request) + query_name = tilde_decode(request.url_vars["query"]) + existing = await self.ds.get_query(db.name, query_name) + if existing is None: + return _error(["Query not found: {}".format(query_name)], 404) + if not await self.ds.allowed( + action="update-query", + resource=QueryResource(db.name, query_name), + actor=request.actor, + ): + return _error(["Permission denied: need update-query"], 403) + + try: + data, _ = await _json_or_form_payload(request) + if not isinstance(data, dict): + raise QueryValidationError("JSON must be a dictionary") + invalid_keys = set(data) - {"update", "return"} + if invalid_keys: + raise QueryValidationError( + "Invalid keys: {}".format(", ".join(invalid_keys)) + ) + update = data.get("update") + if not isinstance(update, dict): + raise QueryValidationError("JSON must contain an update dictionary") + if "sql" in update and not await self.ds.allowed( + action="execute-sql", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + raise QueryValidationError( + "Permission denied: need execute-sql", status=403 + ) + update_kwargs = await _prepare_query_update( + self.ds, request, db, existing, update + ) + except QueryValidationError as ex: + return _error([ex.message], ex.status) + + await self.ds.update_query(db.name, query_name, **update_kwargs) + if data.get("return"): + return Response.json( + { + "ok": True, + "query": await self.ds.get_query(db.name, query_name), + } + ) + return Response.json({"ok": True}) + + +class QueryDeleteView(BaseView): + name = "query-delete" + + async def post(self, request): + db = await self.ds.resolve_database(request) + query_name = tilde_decode(request.url_vars["query"]) + existing = await self.ds.get_query(db.name, query_name) + if existing is None: + return _error(["Query not found: {}".format(query_name)], 404) + if not await self.ds.allowed( + action="delete-query", + resource=QueryResource(db.name, query_name), + actor=request.actor, + ): + return _error(["Permission denied: need delete-query"], 403) + await self.ds.remove_query(db.name, query_name) + return Response.json({"ok": True}) + + class QueryView(View): async def post(self, request, datasette): from datasette.app import TableNotFound @@ -741,6 +1248,11 @@ class QueryView(View): resource=DatabaseResource(database=database), actor=request.actor, ) + allow_insert_query = await datasette.allowed( + action="insert-query", + resource=DatabaseResource(database=database), + actor=request.actor, + ) show_hide_hidden = "" if canned_query and canned_query.get("hide_sql"): @@ -790,6 +1302,19 @@ class QueryView(View): } ) ) + save_query_url = None + if ( + not canned_query + and allow_execute_sql + and allow_insert_query + and is_validated_sql + and ":_" not in sql + ): + save_query_url = ( + datasette.urls.database(database) + + "/-/queries/-/create?" + + urlencode({"sql": sql}) + ) async def query_actions(): query_actions = [] @@ -827,6 +1352,7 @@ class QueryView(View): show_hide_text=show_hide_text, editable=not canned_query, allow_execute_sql=allow_execute_sql, + save_query_url=save_query_url, tables=await get_tables(datasette, request, db, allowed_dict), named_parameter_values=named_parameter_values, edit_sql_url=edit_sql_url, diff --git a/docs/authentication.rst b/docs/authentication.rst index 7daefab7..543f069b 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1285,12 +1285,56 @@ Actor is allowed to view a table (or view) page, e.g. https://latest.datasette.i view-query ---------- -Actor is allowed to view (and execute) a :ref:`canned query ` page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size - this includes executing :ref:`canned_queries_writable`. +Actor is allowed to view (and execute) a saved query page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size - this includes executing :ref:`canned_queries_writable`. ``resource`` - ``datasette.resources.QueryResource(database, query)`` ``database`` is the name of the database (string) - - ``query`` is the name of the canned query (string) + + ``query`` is the name of the query (string) + +.. _actions_insert_query: + +insert-query +------------ + +Actor is allowed to create saved queries in a database. + +``resource`` - ``datasette.resources.DatabaseResource(database)`` + ``database`` is the name of the database (string) + +.. _actions_publish_query: + +publish-query +------------- + +Actor is allowed to publish a saved read-only query so actors without ``execute-sql`` can run it. + +``resource`` - ``datasette.resources.DatabaseResource(database)`` + ``database`` is the name of the database (string) + +.. _actions_update_query: + +update-query +------------ + +Actor is allowed to update a saved query. + +``resource`` - ``datasette.resources.QueryResource(database, query)`` + ``database`` is the name of the database (string) + + ``query`` is the name of the query (string) + +.. _actions_delete_query: + +delete-query +------------ + +Actor is allowed to delete a saved query. + +``resource`` - ``datasette.resources.QueryResource(database, query)`` + ``database`` is the name of the database (string) + + ``query`` is the name of the query (string) .. _actions_insert_row: diff --git a/tests/test_queries.py b/tests/test_queries.py index 01174a18..8e802b75 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -179,9 +179,7 @@ async def test_query_resources_come_from_internal_table(): page = await ds.allowed_resources("view-query", actor=None) - assert [(r.parent, r.child) for r in page.resources] == [ - ("data", "internal_query") - ] + assert [(r.parent, r.child) for r in page.resources] == [("data", "internal_query")] @pytest.mark.asyncio @@ -279,3 +277,216 @@ async def test_analyze_write_query_rejects_writes_to_attached_databases(): "insert into extra.cats (id) values (1)", actor={"id": "writer"}, ) + + +@pytest.mark.asyncio +async def test_query_insert_api_creates_read_only_query(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("query_insert_api", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + response = await ds.client.post( + "/data/-/queries/-/insert", + actor={"id": "root"}, + json={ + "query": { + "name": "by_name", + "sql": "select * from dogs where name = :name", + "title": "By name", + } + }, + ) + + assert response.status_code == 201 + data = response.json() + assert data["ok"] is True + assert data["query"]["name"] == "by_name" + assert data["query"]["parameters"] == ["name"] + assert data["query"]["is_write"] is False + assert data["query"]["source"] == "user" + assert data["query"]["owner_id"] == "root" + + +@pytest.mark.asyncio +async def test_query_list_and_definition_api(): + ds = Datasette(memory=True) + ds.root_enabled = True + ds.add_memory_database("query_list_api", name="data") + await ds.invoke_startup() + await ds.add_query("data", "listed", "select 1", title="Listed", published=True) + + list_response = await ds.client.get( + "/data/-/queries", + actor={"id": "root"}, + ) + definition_response = await ds.client.get( + "/data/listed/-/definition", + actor={"id": "root"}, + ) + + assert list_response.status_code == 200 + assert list_response.json()["queries"][0]["name"] == "listed" + assert definition_response.status_code == 200 + assert definition_response.json()["query"]["title"] == "Listed" + + +@pytest.mark.asyncio +async def test_query_insert_api_publish_requires_publish_query(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-sql": {"id": "writer"}, + "insert-query": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("query_publish_api", name="data") + await ds.invoke_startup() + + response = await ds.client.post( + "/data/-/queries/-/insert", + actor={"id": "writer"}, + json={"query": {"name": "public", "sql": "select 1", "published": True}}, + ) + + assert response.status_code == 403 + assert response.json()["errors"] == ["Permission denied: need publish-query"] + + +@pytest.mark.asyncio +async def test_query_insert_api_creates_writable_query(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("query_write_api", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + response = await ds.client.post( + "/data/-/queries/-/insert", + actor={"id": "root"}, + json={ + "query": { + "name": "insert_dog", + "sql": "insert into dogs (name) values (:name)", + } + }, + ) + + assert response.status_code == 201 + query = response.json()["query"] + assert query["is_write"] is True + assert query["published"] is False + assert query["parameters"] == ["name"] + + bad_response = await ds.client.post( + "/data/-/queries/-/insert", + actor={"id": "root"}, + json={ + "query": { + "name": "published_insert", + "sql": "insert into dogs (name) values (:name)", + "published": True, + } + }, + ) + + assert bad_response.status_code == 400 + assert bad_response.json()["errors"] == ["Writable queries cannot be published"] + + +@pytest.mark.asyncio +async def test_query_update_and_delete_api(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + ds.add_memory_database("query_update_api", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "editable", + "select 1", + title="Original", + source="user", + owner_id="root", + ) + + update_response = await ds.client.post( + "/data/editable/-/update", + actor={"id": "root"}, + json={ + "update": { + "title": "Updated", + "description": "Fresh", + "on_success_redirect": None, + }, + "return": True, + }, + ) + + assert update_response.status_code == 200 + updated = update_response.json()["query"] + assert updated["title"] == "Updated" + assert updated["description"] == "Fresh" + assert updated["on_success_redirect"] is None + + delete_response = await ds.client.post( + "/data/editable/-/delete", + actor={"id": "root"}, + json={}, + ) + + assert delete_response.status_code == 200 + assert delete_response.json() == {"ok": True} + assert await ds.get_query("data", "editable") is None + + +@pytest.mark.asyncio +async def test_query_insert_api_rejects_magic_parameters(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + ds.add_memory_database("query_magic_api", name="data") + await ds.invoke_startup() + + response = await ds.client.post( + "/data/-/queries/-/insert", + actor={"id": "root"}, + json={"query": {"name": "magic", "sql": "select :_actor_id"}}, + ) + + assert response.status_code == 400 + assert response.json()["errors"] == ["Magic parameters are not allowed"] + + +@pytest.mark.asyncio +async def test_create_query_ui_and_arbitrary_sql_save_link(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("query_create_ui", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + create_response = await ds.client.get( + "/data/-/queries/-/create?sql=select+*+from+dogs", + actor={"id": "root"}, + ) + query_response = await ds.client.get( + "/data/-/query?sql=select+*+from+dogs", + actor={"id": "root"}, + ) + + assert create_response.status_code == 200 + assert "Create query" in create_response.text + assert "Read-only" in create_response.text + assert "Writable" in create_response.text + assert "required permission" in create_response.text + assert query_response.status_code == 200 + assert "Save query" in query_response.text + assert "/data/-/queries/-/create?sql=select+%2A+from+dogs" in query_response.text From 040e42ddca047a2e616d412b733fd36de233d2b2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 24 May 2026 22:58:50 -0700 Subject: [PATCH 214/299] Enforce query ownership and remove canned query hook Refs #2735 --- datasette/default_permissions/__init__.py | 16 ----- datasette/default_permissions/defaults.py | 30 ++++++++ datasette/hookspecs.py | 5 -- datasette/views/database.py | 12 ++++ tests/fixtures.py | 2 - tests/plugins/my_plugin.py | 5 -- tests/plugins/my_plugin_2.py | 26 +++---- tests/test_canned_queries.py | 34 +++++---- tests/test_permissions.py | 40 ++++------- tests/test_plugins.py | 27 ++------ tests/test_queries.py | 84 +++++++++++++++++++++++ 11 files changed, 182 insertions(+), 99 deletions(-) diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py index 5a53dbe7..a9f2d8bd 100644 --- a/datasette/default_permissions/__init__.py +++ b/datasette/default_permissions/__init__.py @@ -17,13 +17,6 @@ UNION/INTERSECT operations. The order of evaluation is: from __future__ import annotations -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette import hookimpl - # Re-export all hooks and public utilities from .restrictions import ( actor_restrictions_sql as actor_restrictions_sql, @@ -38,12 +31,3 @@ from .defaults import ( default_query_permissions_sql as default_query_permissions_sql, DEFAULT_ALLOW_ACTIONS as DEFAULT_ALLOW_ACTIONS, ) - - -@hookimpl -def canned_queries(datasette: "Datasette", database: str, actor) -> dict: - """Return canned queries defined in datasette.yaml configuration.""" - queries = ( - ((datasette.config or {}).get("databases") or {}).get(database) or {} - ).get("queries") or {} - return queries diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index 2613c4f4..9737de96 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -74,6 +74,22 @@ async def default_query_permissions_sql( actor: Optional[dict], action: str, ) -> Optional[PermissionSQL]: + actor_id = actor.get("id") if isinstance(actor, dict) else None + + if action in {"update-query", "delete-query"}: + if actor_id is None: + return None + return PermissionSQL( + sql=""" + SELECT database_name AS parent, name AS child, 1 AS allow, + 'query owner' AS reason + FROM queries + WHERE source = 'user' + AND owner_id = :query_owner_id + """, + params={"query_owner_id": actor_id}, + ) + if action != "view-query": return None @@ -98,6 +114,19 @@ async def default_query_permissions_sql( AND source IN ('config', 'plugin') """ + user_writable_sql = "" + if actor_id is not None: + params["query_owner_id"] = actor_id + user_writable_sql = """ + UNION ALL + SELECT database_name AS parent, name AS child, 1 AS allow, + 'query owner' AS reason + FROM queries + WHERE is_write = 1 + AND source = 'user' + AND owner_id = :query_owner_id + """ + return PermissionSQL( sql=f""" WITH execute_sql_allowed AS ( @@ -118,6 +147,7 @@ async def default_query_permissions_sql( WHERE q.is_write = 0 AND q.published = 0 {trusted_writable_sql} + {user_writable_sql} """, params=params, ) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index cf95abcb..a4067eaa 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -137,11 +137,6 @@ def permission_resources_sql(datasette, actor, action): """ -@hookspec -def canned_queries(datasette, database, actor): - """Return a dictionary of canned query definitions or an awaitable function that returns them""" - - @hookspec def register_magic_parameters(datasette): """Return a list of (name, function) magic parameter functions""" diff --git a/datasette/views/database.py b/datasette/views/database.py index f40c434c..2cdaab9f 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -945,6 +945,18 @@ class QueryView(View): # That should not have happened raise DatasetteError("Unexpected table found on POST", status=404) + if not await datasette.allowed( + action="view-query", + resource=QueryResource(database=db.name, query=canned_query["name"]), + actor=request.actor, + ): + raise Forbidden("You do not have permission to view this query") + + if canned_query.get("write") and canned_query.get("source") == "user": + await datasette.ensure_query_write_permissions( + db.name, canned_query["sql"], actor=request.actor + ) + # If database is immutable, return an error if not db.is_mutable: raise Forbidden("Database is immutable") diff --git a/tests/fixtures.py b/tests/fixtures.py index 71884294..8ab3633f 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -35,7 +35,6 @@ EXPECTED_PLUGINS = [ "hooks": [ "actor_from_request", "asgi_wrapper", - "canned_queries", "database_actions", "extra_body_script", "extra_css_urls", @@ -68,7 +67,6 @@ EXPECTED_PLUGINS = [ "hooks": [ "actor_from_request", "asgi_wrapper", - "canned_queries", "extra_js_urls", "extra_template_vars", "handle_exception", diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 4e401c07..1dd9ed3e 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -314,11 +314,6 @@ def startup(datasette): _ = (Response, Forbidden, NotFound, hookimpl, actor_matches_allow) -@hookimpl -def canned_queries(datasette, database, actor): - return {"from_hook": f"select 1, '{actor['id'] if actor else 'null'}' as actor_id"} - - @hookimpl def register_magic_parameters(): from uuid import uuid4 diff --git a/tests/plugins/my_plugin_2.py b/tests/plugins/my_plugin_2.py index 9e8d9b2b..e3d3e760 100644 --- a/tests/plugins/my_plugin_2.py +++ b/tests/plugins/my_plugin_2.py @@ -139,20 +139,20 @@ def startup(datasette): datasette._startup_catalog_databases = [ row["database_name"] for row in catalog_rows ] - - return inner - - -@hookimpl -def canned_queries(datasette, database): - async def inner(): - return { - "from_async_hook": "select {}".format( - ( - await datasette.get_database(database).execute("select 1 + 1") - ).first()[0] + for database in datasette.databases: + await datasette.add_query( + database, + "from_hook", + "select 1, 'null' as actor_id", + source="plugin", + ) + result = await datasette.get_database(database).execute("select 1 + 1") + await datasette.add_query( + database, + "from_async_hook", + "select {}".format(result.first()[0]), + source="plugin", ) - } return inner diff --git a/tests/test_canned_queries.py b/tests/test_canned_queries.py index 5e36a87a..e06ad189 100644 --- a/tests/test_canned_queries.py +++ b/tests/test_canned_queries.py @@ -1,10 +1,16 @@ from bs4 import BeautifulSoup as Soup +from asgiref.sync import async_to_sync import json import pytest import re from .fixtures import make_app_client +def update_query(client, name, **kwargs): + async_to_sync(client.ds.invoke_startup)() + async_to_sync(client.ds.update_query)("data", name, **kwargs) + + @pytest.fixture def canned_write_client(tmpdir): template_dir = tmpdir / "canned_write_templates" @@ -153,9 +159,7 @@ def test_insert_error(canned_write_client): ) assert [["UNIQUE constraint failed: names.rowid", 3]] == messages # How about with a custom error message? - canned_write_client.ds.config["databases"]["data"]["queries"][ - "add_name_specify_id" - ]["on_error_message"] = "ERROR" + update_query(canned_write_client, "add_name_specify_id", on_error_message="ERROR") response = canned_write_client.post( "/data/add_name_specify_id", {"rowid": 1, "name": "Should fail"}, @@ -327,12 +331,16 @@ def magic_parameters_client(): ], ) def test_magic_parameters(magic_parameters_client, magic_parameter, expected_re): - magic_parameters_client.ds.config["databases"]["data"]["queries"]["runme_post"][ - "sql" - ] = f"insert into logs (line) values (:{magic_parameter})" - magic_parameters_client.ds.config["databases"]["data"]["queries"]["runme_get"][ - "sql" - ] = f"select :{magic_parameter} as result" + update_query( + magic_parameters_client, + "runme_post", + sql=f"insert into logs (line) values (:{magic_parameter})", + ) + update_query( + magic_parameters_client, + "runme_get", + sql=f"select :{magic_parameter} as result", + ) cookies = { "ds_actor": magic_parameters_client.actor_cookie({"id": "root"}), "foo": "bar", @@ -366,9 +374,11 @@ def test_magic_parameters(magic_parameters_client, magic_parameter, expected_re) @pytest.mark.parametrize("use_csrf", [True, False]) @pytest.mark.parametrize("return_json", [True, False]) def test_magic_parameters_csrf_json(magic_parameters_client, use_csrf, return_json): - magic_parameters_client.ds.config["databases"]["data"]["queries"]["runme_post"][ - "sql" - ] = "insert into logs (line) values (:_header_host)" + update_query( + magic_parameters_client, + "runme_post", + sql="insert into logs (line) values (:_header_host)", + ) qs = "" if return_json: qs = "?_json=1" diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 8166532f..04800ed3 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -1,4 +1,5 @@ import collections +from asgiref.sync import async_to_sync from datasette.app import Datasette from datasette.cli import cli from datasette.default_permissions import restrictions_allow_action @@ -609,6 +610,10 @@ def test_padlocks_on_database_page(cascade_app_client): previous_config = cascade_app_client.ds.config try: cascade_app_client.ds.config = config + async_to_sync(cascade_app_client.ds.invoke_startup)() + async_to_sync(cascade_app_client.ds.add_query)( + "fixtures", "query_two", "select 2", source="config" + ) response = cascade_app_client.get( "/fixtures", cookies={"ds_actor": cascade_app_client.actor_cookie({"id": "test"})}, @@ -624,6 +629,7 @@ def test_padlocks_on_database_page(cascade_app_client): assert ">simple_view" in response.text finally: cascade_app_client.ds.config = previous_config + async_to_sync(cascade_app_client.ds.remove_query)("fixtures", "query_two") @pytest.mark.asyncio @@ -954,39 +960,20 @@ async def test_permissions_in_config( @pytest.mark.asyncio -async def test_allowed_resources_view_query_includes_actor_specific_canned_queries(): - """ - Actor-specific canned queries should be listed by allowed_resources("view-query"). - - This test is intentionally explicit about the previous bug: - - the canned query only exists for actor "alice" - - the permission rule only allows actor "alice" to view it - - allowed() succeeds for that specific query resource - - allowed_resources("view-query", actor) must include the same query - - Before the fix, QueryResource.resources_sql() called canned_queries(..., actor=None), - so the query was omitted from resource enumeration and allowed_resources() returned - an empty list even though allowed() returned True. - """ +async def test_allowed_resources_view_query_includes_actor_specific_query_permissions(): from datasette import hookimpl from datasette.permissions import PermissionSQL from datasette.resources import QueryResource - class ActorSpecificQueryPlugin: - __name__ = "ActorSpecificQueryPlugin" - - @hookimpl - def canned_queries(self, datasette, database, actor): - if database == "testdb" and actor and actor.get("id") == "alice": - return {"user_only": {"sql": "select 1 as n"}} - return {} + class ActorSpecificQueryPermissionPlugin: + __name__ = "ActorSpecificQueryPermissionPlugin" @hookimpl def permission_resources_sql(self, datasette, actor, action): if action == "view-query" and actor and actor.get("id") == "alice": return PermissionSQL(sql=""" SELECT 'testdb' AS parent, 'user_only' AS child, 1 AS allow, - 'alice can view her actor-specific canned query' AS reason + 'alice can view this query' AS reason """) return None @@ -994,9 +981,10 @@ async def test_allowed_resources_view_query_includes_actor_specific_canned_queri await ds.invoke_startup() ds.add_memory_database("testdb") await ds._refresh_schemas() + await ds.add_query("testdb", "user_only", "select 1 as n") - plugin = ActorSpecificQueryPlugin() - ds.pm.register(plugin, name="actor_specific_query_plugin") + plugin = ActorSpecificQueryPermissionPlugin() + ds.pm.register(plugin, name="actor_specific_query_permission_plugin") try: actor = {"id": "alice"} @@ -1012,7 +1000,7 @@ async def test_allowed_resources_view_query_includes_actor_specific_canned_queri ("testdb", "user_only") ] finally: - ds.pm.unregister(name="actor_specific_query_plugin") + ds.pm.unregister(name="actor_specific_query_permission_plugin") @pytest.mark.asyncio diff --git a/tests/test_plugins.py b/tests/test_plugins.py index c5b9aef0..b5a13ae5 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -885,40 +885,27 @@ async def test_hook_startup_catalog_populated(ds_client): @pytest.mark.asyncio -async def test_hook_canned_queries(ds_client): +async def test_plugin_startup_queries(ds_client): queries = (await ds_client.get("/fixtures.json")).json()["queries"] queries_by_name = {q["name"]: q for q in queries} - assert { - "sql": "select 2", - "name": "from_async_hook", - "private": False, - } == queries_by_name["from_async_hook"] - assert { - "sql": "select 1, 'null' as actor_id", - "name": "from_hook", - "private": False, - } == queries_by_name["from_hook"] + assert queries_by_name["from_async_hook"]["sql"] == "select 2" + assert queries_by_name["from_async_hook"]["private"] is False + assert queries_by_name["from_hook"]["sql"] == "select 1, 'null' as actor_id" + assert queries_by_name["from_hook"]["private"] is False @pytest.mark.asyncio -async def test_hook_canned_queries_non_async(ds_client): +async def test_plugin_startup_query_from_hook(ds_client): response = await ds_client.get("/fixtures/from_hook.json?_shape=array") assert [{"1": 1, "actor_id": "null"}] == response.json() @pytest.mark.asyncio -async def test_hook_canned_queries_async(ds_client): +async def test_plugin_startup_query_from_async_hook(ds_client): response = await ds_client.get("/fixtures/from_async_hook.json?_shape=array") assert [{"2": 2}] == response.json() -@pytest.mark.asyncio -async def test_hook_canned_queries_actor(ds_client): - assert ( - await ds_client.get("/fixtures/from_hook.json?_bot=1&_shape=array") - ).json() == [{"1": 1, "actor_id": "bot"}] - - def test_hook_register_magic_parameters(restore_working_directory): with make_app_client( extra_databases={"data.db": "create table logs (line text)"}, diff --git a/tests/test_queries.py b/tests/test_queries.py index 8e802b75..c6685d6c 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -490,3 +490,87 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): assert query_response.status_code == 200 assert "Save query" in query_response.text assert "/data/-/queries/-/create?sql=select+%2A+from+dogs" in query_response.text + + +@pytest.mark.asyncio +async def test_query_owner_gets_update_delete_and_writable_view_defaults(): + ds = Datasette(memory=True, default_deny=True) + ds.add_memory_database("query_owner_defaults", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "insert_dog", + "insert into dogs (name) values (:name)", + is_write=True, + source="user", + owner_id="alice", + ) + + for action in ("view-query", "update-query", "delete-query"): + assert await ds.allowed( + action=action, + resource=QueryResource("data", "insert_dog"), + actor={"id": "alice"}, + ) + assert not await ds.allowed( + action=action, + resource=QueryResource("data", "insert_dog"), + actor={"id": "bob"}, + ) + + +@pytest.mark.asyncio +async def test_user_writable_query_execution_rechecks_table_permissions(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "tables": { + "dogs": { + "permissions": { + "insert-row": {"id": "alice"}, + } + } + } + } + } + }, + ) + db = ds.add_memory_database("query_write_execution", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + await ds.add_query( + "data", + "insert_dog", + "insert into dogs (name) values (:name)", + is_write=True, + source="user", + owner_id="alice", + ) + await ds.add_query( + "data", + "insert_cat", + "insert into dogs (name) values (:name)", + is_write=True, + source="user", + owner_id="bob", + ) + + allowed_response = await ds.client.post( + "/data/insert_dog?_json=1", + actor={"id": "alice"}, + data={"name": "Cleo"}, + ) + denied_response = await ds.client.post( + "/data/insert_cat?_json=1", + actor={"id": "bob"}, + data={"name": "Milo"}, + ) + + assert allowed_response.status_code == 200 + assert allowed_response.json()["ok"] is True + assert denied_response.status_code == 403 + rows = (await db.execute("select name from dogs")).dicts() + assert rows == [{"name": "Cleo"}] From 3b26b7aff03ed78fae6a17a5a65edc5b83415dee Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 24 May 2026 23:00:00 -0700 Subject: [PATCH 215/299] Document canned query hook removal Refs #2735 --- docs/plugin_hooks.rst | 73 ++----------------------------------------- docs/plugins.rst | 1 - 2 files changed, 2 insertions(+), 72 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 2b8f5eb2..b2676b3e 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1212,77 +1212,8 @@ Examples: `datasette-saved-queries `. - -Use this hook to return a dictionary of additional :ref:`canned query ` definitions for the specified database. The return value should be the same shape as the JSON described in the :ref:`canned query ` documentation. - -.. code-block:: python - - from datasette import hookimpl - - - @hookimpl - def canned_queries(datasette, database): - if database == "mydb": - return { - "my_query": { - "sql": "select * from my_table where id > :min_id" - } - } - -The hook can alternatively return an awaitable function that returns a list. Here's an example that returns queries that have been stored in the ``saved_queries`` database table, if one exists: - -.. code-block:: python - - from datasette import hookimpl - - - @hookimpl - def canned_queries(datasette, database): - async def inner(): - db = datasette.get_database(database) - if await db.table_exists("saved_queries"): - results = await db.execute( - "select name, sql from saved_queries" - ) - return { - result["name"]: {"sql": result["sql"]} - for result in results - } - - return inner - -The actor parameter can be used to include the currently authenticated actor in your decision. Here's an example that returns saved queries that were saved by that actor: - -.. code-block:: python - - from datasette import hookimpl - - - @hookimpl - def canned_queries(datasette, database, actor): - async def inner(): - db = datasette.get_database(database) - if actor is not None and await db.table_exists( - "saved_queries" - ): - results = await db.execute( - "select name, sql from saved_queries where actor_id = :id", - {"id": actor["id"]}, - ) - return { - result["name"]: {"sql": result["sql"]} - for result in results - } - - return inner +This hook has been removed. Plugins that need to add saved queries should use +the :ref:`plugin_hook_startup` hook and call ``await datasette.add_query(...)``. Example: `datasette-saved-queries `__ diff --git a/docs/plugins.rst b/docs/plugins.rst index 77958205..8fa49d6d 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -249,7 +249,6 @@ If you run ``datasette plugins --all`` it will include default plugins that ship "templates": false, "version": null, "hooks": [ - "canned_queries", "permission_resources_sql" ] }, From 2d77e3334b48417c5e27355bb4016c7c76acf30e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 24 May 2026 23:06:01 -0700 Subject: [PATCH 216/299] Clean up query management test coverage Refs #2735 --- datasette/views/database.py | 6 ++-- docs/json_api.rst | 42 +++++++++++++++++++++++ tests/plugins/my_plugin_2.py | 14 -------- tests/test_canned_queries.py | 4 --- tests/test_html.py | 2 -- tests/test_permissions.py | 1 - tests/test_plugins.py | 65 ++++++++++++++++++++++++++++-------- 7 files changed, 96 insertions(+), 38 deletions(-) diff --git a/datasette/views/database.py b/datasette/views/database.py index 2cdaab9f..d521f7ad 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -697,9 +697,9 @@ async def _prepare_query_update(datasette, request, db, existing, update): "on_error_redirect": update.get("on_error_redirect"), } update_kwargs = {} - for field, value in field_values.items(): - if field in update: - update_kwargs[field] = value + for field_name, value in field_values.items(): + if field_name in update: + update_kwargs[field_name] = value if parameters is not None: update_kwargs["parameters"] = parameters if "sql" in update: diff --git a/docs/json_api.rst b/docs/json_api.rst index 48c70af6..d5cd231c 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -505,6 +505,48 @@ The JSON write API Datasette provides a write API for JSON data. This is a POST-only API that requires an authenticated API token, see :ref:`CreateTokenView`. The token will need to have the specified :ref:`authentication_permissions`. +.. _QueryListView: + +Listing saved queries +~~~~~~~~~~~~~~~~~~~~~ + +``GET //-/queries`` returns saved query definitions the actor can view. + +.. _QueryCreateView: + +Creating saved queries in the UI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``GET //-/queries/-/create`` provides a form for creating saved queries. + +.. _QueryInsertView: + +Creating saved queries +~~~~~~~~~~~~~~~~~~~~~~ + +``POST //-/queries/-/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. + +.. _QueryDefinitionView: + +Getting a saved query definition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``GET ///-/definition`` returns a saved query definition without executing it. + +.. _QueryUpdateView: + +Updating saved queries +~~~~~~~~~~~~~~~~~~~~~~ + +``POST ///-/update`` updates a saved query using a JSON body with an ``"update"`` object. + +.. _QueryDeleteView: + +Deleting saved queries +~~~~~~~~~~~~~~~~~~~~~~ + +``POST ///-/delete`` deletes a saved query. + .. _TableInsertView: Inserting rows diff --git a/tests/plugins/my_plugin_2.py b/tests/plugins/my_plugin_2.py index e3d3e760..864637a6 100644 --- a/tests/plugins/my_plugin_2.py +++ b/tests/plugins/my_plugin_2.py @@ -139,20 +139,6 @@ def startup(datasette): datasette._startup_catalog_databases = [ row["database_name"] for row in catalog_rows ] - for database in datasette.databases: - await datasette.add_query( - database, - "from_hook", - "select 1, 'null' as actor_id", - source="plugin", - ) - result = await datasette.get_database(database).execute("select 1 + 1") - await datasette.add_query( - database, - "from_async_hook", - "select {}".format(result.first()[0]), - source="plugin", - ) return inner diff --git a/tests/test_canned_queries.py b/tests/test_canned_queries.py index e06ad189..c46fd86f 100644 --- a/tests/test_canned_queries.py +++ b/tests/test_canned_queries.py @@ -254,10 +254,8 @@ def test_canned_query_permissions_on_database_page(canned_write_client): } assert query_names == { "add_name_specify_id_with_error_in_on_success_message_sql", - "from_hook", "update_name", "add_name_specify_id", - "from_async_hook", "canned_read", "add_name", } @@ -284,8 +282,6 @@ def test_canned_query_permissions_on_database_page(canned_write_client): }, {"name": "canned_read", "private": False}, {"name": "delete_name", "private": True}, - {"name": "from_async_hook", "private": False}, - {"name": "from_hook", "private": False}, {"name": "update_name", "private": False}, ] diff --git a/tests/test_html.py b/tests/test_html.py index efc1040d..e5f00e17 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -158,8 +158,6 @@ async def test_database_page(ds_client): queries_ul = soup.find("h2", string="Queries").find_next_sibling("ul") assert queries_ul is not None assert [ - ("/fixtures/from_async_hook", "from_async_hook"), - ("/fixtures/from_hook", "from_hook"), ("/fixtures/magic_parameters", "magic_parameters"), ("/fixtures/neighborhood_search#fragment-goes-here", "Search neighborhoods"), ("/fixtures/pragma_cache_size", "pragma_cache_size"), diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 04800ed3..22f294bb 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -622,7 +622,6 @@ def test_padlocks_on_database_page(cascade_app_client): assert ">123_starts_with_digits" in response.text assert ">Table With Space In Name 🔒" in response.text # Queries - assert ">from_async_hook 🔒" in response.text assert ">query_two" in response.text # Views assert ">paginated_view 🔒" in response.text diff --git a/tests/test_plugins.py b/tests/test_plugins.py index b5a13ae5..f7adbd66 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -885,24 +885,61 @@ async def test_hook_startup_catalog_populated(ds_client): @pytest.mark.asyncio -async def test_plugin_startup_queries(ds_client): - queries = (await ds_client.get("/fixtures.json")).json()["queries"] +async def test_plugin_startup_can_add_queries(): + ds = Datasette(memory=True) + ds.add_memory_database("plugin_startup_queries", name="data") + + class AddQueriesPlugin: + __name__ = "AddQueriesPlugin" + + @hookimpl + def startup(self, datasette): + async def inner(): + result = await datasette.get_database("data").execute("select 1 + 1") + await datasette.add_query( + "data", + "from_startup", + "select {}".format(result.first()[0]), + source="plugin", + ) + + return inner + + ds.pm.register(AddQueriesPlugin(), name="add_queries_plugin") + try: + response = await ds.client.get("/data.json") + finally: + ds.pm.unregister(name="add_queries_plugin") + + queries = response.json()["queries"] queries_by_name = {q["name"]: q for q in queries} - assert queries_by_name["from_async_hook"]["sql"] == "select 2" - assert queries_by_name["from_async_hook"]["private"] is False - assert queries_by_name["from_hook"]["sql"] == "select 1, 'null' as actor_id" - assert queries_by_name["from_hook"]["private"] is False + assert queries_by_name["from_startup"]["sql"] == "select 2" + assert queries_by_name["from_startup"]["private"] is False @pytest.mark.asyncio -async def test_plugin_startup_query_from_hook(ds_client): - response = await ds_client.get("/fixtures/from_hook.json?_shape=array") - assert [{"1": 1, "actor_id": "null"}] == response.json() +async def test_plugin_startup_query_can_execute(): + ds = Datasette(memory=True) + ds.add_memory_database("plugin_startup_query_execute", name="data") + class AddQueryPlugin: + __name__ = "AddQueryPlugin" + + @hookimpl + def startup(self, datasette): + async def inner(): + await datasette.add_query( + "data", "from_startup", "select 2", source="plugin" + ) + + return inner + + ds.pm.register(AddQueryPlugin(), name="add_query_plugin") + try: + response = await ds.client.get("/data/from_startup.json?_shape=array") + finally: + ds.pm.unregister(name="add_query_plugin") -@pytest.mark.asyncio -async def test_plugin_startup_query_from_async_hook(ds_client): - response = await ds_client.get("/fixtures/from_async_hook.json?_shape=array") assert [{"2": 2}] == response.json() @@ -1514,9 +1551,9 @@ async def test_hook_top_query(ds_client): async def test_hook_top_canned_query(ds_client): try: pm.register(SlotPlugin(), name="SlotPlugin") - response = await ds_client.get("/fixtures/from_hook?z=xyz") + response = await ds_client.get("/fixtures/magic_parameters?z=xyz") assert response.status_code == 200 - assert "Xtop_query:fixtures:from_hook:xyz" in response.text + assert "Xtop_query:fixtures:magic_parameters:xyz" in response.text finally: pm.unregister(name="SlotPlugin") From ef43c103880fe819206f4e0dd12fa62add1c927c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 08:30:49 -0700 Subject: [PATCH 217/299] Add arbitrary write SQL execution page Refs #2735 --- datasette/app.py | 21 +- datasette/default_actions.py | 7 + datasette/templates/execute_write.html | 71 +++++++ datasette/templates/query_create.html | 3 + datasette/views/database.py | 266 +++++++++++++++++++++++-- docs/authentication.rst | 12 +- docs/json_api.rst | 9 + tests/test_queries.py | 122 ++++++++++++ 8 files changed, 487 insertions(+), 24 deletions(-) create mode 100644 datasette/templates/execute_write.html diff --git a/datasette/app.py b/datasette/app.py index ce85f447..409aed23 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -46,6 +46,7 @@ from .views import Context from .views.database import ( database_download, DatabaseView, + ExecuteWriteView, TableCreateView, QueryView, QueryCreateView, @@ -1249,18 +1250,22 @@ class Datasette: ) return {row["name"]: self._query_row_to_dict(row) for row in rows} - async def ensure_query_write_permissions(self, database, sql, *, actor=None): + async def ensure_query_write_permissions( + self, database, sql, *, actor=None, params=None, analysis=None + ): write_actions = { "insert": "insert-row", "update": "update-row", "delete": "delete-row", } db = self.get_database(database) - params = {name: "" for name in named_parameters(sql)} - try: - analysis = await db.analyze_sql(sql, params) - except sqlite3.DatabaseError as ex: - raise Forbidden(f"Could not analyze query: {ex}") from ex + if analysis is None: + if params is None: + params = {name: "" for name in named_parameters(sql)} + try: + analysis = await db.analyze_sql(sql, params) + except sqlite3.DatabaseError as ex: + raise Forbidden(f"Could not analyze query: {ex}") from ex for access in analysis.table_accesses: action = write_actions.get(access.operation) @@ -2547,6 +2552,10 @@ class Datasette: QueryInsertView.as_view(self), r"/(?P[^\/\.]+)/-/queries/-/insert$", ) + add_route( + ExecuteWriteView.as_view(self), + r"/(?P[^\/\.]+)/-/execute-write$", + ) add_route( DatabaseSchemaView.as_view(self), r"/(?P[^\/\.]+)/-/schema(\.(?Pjson|md))?$", diff --git a/datasette/default_actions.py b/datasette/default_actions.py index e0e0aee5..6787b80e 100644 --- a/datasette/default_actions.py +++ b/datasette/default_actions.py @@ -48,6 +48,13 @@ def register_actions(): resource_class=DatabaseResource, also_requires="view-database", ), + Action( + name="execute-write-sql", + abbr="ews", + description="Execute writable SQL queries", + resource_class=DatabaseResource, + also_requires="view-database", + ), Action( name="create-table", abbr="ct", diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html new file mode 100644 index 00000000..5b4f30d9 --- /dev/null +++ b/datasette/templates/execute_write.html @@ -0,0 +1,71 @@ +{% extends "base.html" %} + +{% block title %}Execute write SQL{% endblock %} + +{% block extra_head %} +{{- super() -}} +{% include "_codemirror.html" %} +{% endblock %} + +{% block body_class %}execute-write db-{{ database|to_css_class }}{% endblock %} + +{% block crumbs %} +{{ crumbs.nav(request=request, database=database) }} +{% endblock %} + +{% block content %} + +

      Execute write SQL

      + +{% if execution_message %} +

      {{ execution_message }}

      +{% endif %} + +
      +

      + + {% if parameter_names %} +

      Parameters

      + {% for parameter in parameter_names %} +

      + {% endfor %} + {% endif %} + +

      Analysis

      + {% if analysis_error %} +

      {{ analysis_error }}

      + {% elif analysis_rows %} +
      + + + + + + + + + + + + {% for row in analysis_rows %} + + + + + + + + + {% endfor %} + +
      OperationDatabaseTablerequired permissionAllowedSource
      {{ row.operation }}{{ row.database }}{{ row.table }}{{ row.required_permission }}{% if row.allowed is none %}{% elif row.allowed %}yes{% else %}no{% endif %}{{ row.source or "" }}
      + {% else %} +

      Analysis will show each affected table and required permission.

      + {% endif %} + +

      +
      + +{% include "_codemirror_foot.html" %} + +{% endblock %} diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index 0e6a7b37..1b3d30a8 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -30,6 +30,9 @@ {% if can_publish %}

      {% endif %} + {% if sql and analysis_is_write %} +

      Execute write SQL

      + {% endif %}

      Analysis

      {% if analysis_error %} diff --git a/datasette/views/database.py b/datasette/views/database.py index d521f7ad..a90d889e 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -508,6 +508,27 @@ def _coerce_query_parameters(value, derived): return parameters +def _analysis_is_write(analysis): + return any( + access.operation in {"insert", "update", "delete"} + for access in analysis.table_accesses + ) + + +def _block_framing(response): + response.headers["Content-Security-Policy"] = "frame-ancestors 'none'" + response.headers["X-Frame-Options"] = "DENY" + return response + + +def _wants_json(request, is_json, data): + return ( + is_json + or request.headers.get("accept") == "application/json" + or (isinstance(data, dict) and data.get("_json")) + ) + + async def _json_or_form_payload(request): content_type = request.headers.get("content-type", "") if content_type.startswith("application/json"): @@ -538,15 +559,14 @@ async def _analyze_user_query(datasette, db, sql, *, actor, published): except sqlite3.DatabaseError as ex: raise QueryValidationError("Could not analyze query: {}".format(ex)) from ex - is_write = any( - access.operation in {"insert", "update", "delete"} - for access in analysis.table_accesses - ) + is_write = _analysis_is_write(analysis) if is_write: if published: raise QueryValidationError("Writable queries cannot be published") try: - await datasette.ensure_query_write_permissions(db.name, sql, actor=actor) + await datasette.ensure_query_write_permissions( + db.name, sql, actor=actor, analysis=analysis + ) except Forbidden as ex: raise QueryValidationError(str(ex), status=403) from ex else: @@ -575,6 +595,69 @@ def _analysis_rows(analysis): ] +async def _analysis_rows_with_permissions(datasette, analysis, actor): + rows = _analysis_rows(analysis) + for row in rows: + permission = row["required_permission"] + if permission: + row["allowed"] = await datasette.allowed( + action=permission, + resource=TableResource(row["database"], row["table"]), + actor=actor, + ) + else: + row["allowed"] = None + return rows + + +def _coerce_execute_write_payload(data, is_json): + if not isinstance(data, dict): + raise QueryValidationError("JSON must be a dictionary") + if is_json: + invalid_keys = set(data) - {"sql", "params"} + if invalid_keys: + raise QueryValidationError( + "Invalid keys: {}".format(", ".join(sorted(invalid_keys))) + ) + params = data.get("params") or {} + else: + params = { + key: value + for key, value in data.items() + if key not in {"sql", "csrftoken", "_json"} + } + if not isinstance(params, dict): + raise QueryValidationError("params must be a dictionary") + return data.get("sql"), params + + +async def _prepare_execute_write(datasette, db, sql, params, actor): + if not sql or not isinstance(sql, str): + raise QueryValidationError("SQL is required") + parameter_names = _derived_query_parameters(sql) + extra_params = set(params) - set(parameter_names) + if extra_params: + raise QueryValidationError( + "Unknown parameters: {}".format(", ".join(sorted(extra_params))) + ) + params = {name: params.get(name, "") for name in parameter_names} + try: + analysis = await db.analyze_sql(sql, params) + except sqlite3.DatabaseError as ex: + raise QueryValidationError("Could not analyze query: {}".format(ex)) from ex + if not _analysis_is_write(analysis): + raise QueryValidationError( + "Use /-/query for read-only SQL; this endpoint only executes writes" + ) + try: + await datasette.ensure_query_write_permissions( + db.name, sql, actor=actor, analysis=analysis + ) + except Forbidden as ex: + raise QueryValidationError(str(ex), status=403) from ex + return parameter_names, params, analysis + + def _apply_query_data_types(data): typed = dict(data) for key in ("hide_sql", "published"): @@ -707,6 +790,160 @@ async def _prepare_query_update(datasette, request, db, existing, update): return update_kwargs +class ExecuteWriteView(BaseView): + name = "execute-write" + has_json_alternate = False + + async def _render_form( + self, + request, + db, + *, + sql="", + parameter_values=None, + analysis=None, + analysis_error=None, + execution_message=None, + execution_ok=None, + status=200, + ): + parameter_values = parameter_values or {} + parameter_names = [] + analysis_rows = [] + if sql and analysis_error is None: + try: + parameter_names = _derived_query_parameters(sql) + if analysis is None: + params = {parameter: "" for parameter in parameter_names} + analysis = await db.analyze_sql(sql, params) + if _analysis_is_write(analysis): + analysis_rows = await _analysis_rows_with_permissions( + self.ds, analysis, request.actor + ) + else: + analysis_error = ( + "Use /-/query for read-only SQL; " + "this endpoint only executes writes" + ) + except (QueryValidationError, sqlite3.DatabaseError) as ex: + analysis_error = getattr(ex, "message", str(ex)) + + response = await self.render( + ["execute_write.html"], + request, + { + "database": db.name, + "database_color": db.color, + "sql": sql, + "parameter_names": parameter_names, + "parameter_values": parameter_values, + "analysis_error": analysis_error, + "analysis_rows": analysis_rows, + "execution_message": execution_message, + "execution_ok": execution_ok, + "execute_disabled": bool( + (not sql) + or analysis_error + or any(row["allowed"] is False for row in analysis_rows) + ), + }, + ) + response.status = status + return _block_framing(response) + + async def get(self, request): + db = await self.ds.resolve_database(request) + await self.ds.ensure_permission( + action="execute-write-sql", + resource=DatabaseResource(db.name), + actor=request.actor, + ) + return await self._render_form( + request, + db, + sql=request.args.get("sql") or "", + ) + + async def post(self, request): + db = await self.ds.resolve_database(request) + if not await self.ds.allowed( + action="execute-write-sql", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + return _block_framing( + _error(["Permission denied: need execute-write-sql"], 403) + ) + if not db.is_mutable: + return _block_framing(_error(["Database is immutable"], 403)) + + data = {} + is_json = request.headers.get("content-type", "").startswith("application/json") + sql = "" + provided_params = {} + try: + data, is_json = await _json_or_form_payload(request) + sql, provided_params = _coerce_execute_write_payload(data, is_json) + parameter_names, params, analysis = await _prepare_execute_write( + self.ds, db, sql, provided_params, request.actor + ) + except QueryValidationError as ex: + if _wants_json(request, is_json, data): + return _block_framing(_error([ex.message], ex.status)) + return await self._render_form( + request, + db, + sql=sql or "", + parameter_values=provided_params, + analysis_error=ex.message, + execution_message=ex.message, + execution_ok=False, + status=ex.status, + ) + + try: + cursor = await db.execute_write(sql, params, request=request) + except sqlite3.DatabaseError as ex: + message = str(ex) + if _wants_json(request, is_json, data): + return _block_framing(_error([message], 400)) + return await self._render_form( + request, + db, + sql=sql, + parameter_values=params, + analysis=analysis, + execution_message=message, + execution_ok=False, + status=400, + ) + + message = "Query executed, {} row{} affected".format( + cursor.rowcount, "" if cursor.rowcount == 1 else "s" + ) + if _wants_json(request, is_json, data): + return _block_framing( + Response.json( + { + "ok": True, + "message": message, + "rowcount": cursor.rowcount, + "analysis": _analysis_rows(analysis), + } + ) + ) + + return await self._render_form( + request, + db, + sql=sql, + parameter_values={name: params.get(name, "") for name in parameter_names}, + analysis=analysis, + execution_message=message, + execution_ok=True, + ) + + class QueryListView(BaseView): name = "query-list" @@ -753,18 +990,9 @@ class QueryCreateView(BaseView): parameter_names = _derived_query_parameters(sql) params = {parameter: "" for parameter in parameter_names} analysis = await db.analyze_sql(sql, params) - rows = _analysis_rows(analysis) - for row in rows: - permission = row["required_permission"] - if permission: - row["allowed"] = await self.ds.allowed( - action=permission, - resource=TableResource(row["database"], row["table"]), - actor=request.actor, - ) - else: - row["allowed"] = None - analysis_rows = rows + analysis_rows = await _analysis_rows_with_permissions( + self.ds, analysis, request.actor + ) except (QueryValidationError, sqlite3.DatabaseError) as ex: analysis_error = getattr(ex, "message", str(ex)) @@ -783,6 +1011,10 @@ class QueryCreateView(BaseView): ), "analysis_error": analysis_error, "analysis_rows": analysis_rows, + "analysis_is_write": bool( + analysis_rows + and any(row["required_permission"] for row in analysis_rows) + ), "save_disabled": bool( analysis_error or any(row["allowed"] is False for row in analysis_rows) diff --git a/docs/authentication.rst b/docs/authentication.rst index 543f069b..b6a4cb7e 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1423,13 +1423,23 @@ Actor is allowed to drop a database table. execute-sql ----------- -Actor is allowed to run arbitrary SQL queries against a specific database, e.g. https://latest.datasette.io/fixtures/-/query?sql=select+100 +Actor is allowed to run arbitrary read-only SQL queries against a specific database, e.g. https://latest.datasette.io/fixtures/-/query?sql=select+100 ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) See also :ref:`the default_allow_sql setting `. +.. _actions_execute_write_sql: + +execute-write-sql +----------------- + +Actor is allowed to run arbitrary writable SQL queries against a specific database, subject to table-level write permissions such as ``insert-row``, ``update-row`` and ``delete-row``. + +``resource`` - ``datasette.resources.DatabaseResource(database)`` + ``database`` is the name of the database (string) + .. _actions_permissions_debug: permissions-debug diff --git a/docs/json_api.rst b/docs/json_api.rst index d5cd231c..e4c9e86e 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -526,6 +526,15 @@ Creating saved queries ``POST //-/queries/-/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. +.. _ExecuteWriteView: + +Executing write SQL +~~~~~~~~~~~~~~~~~~~ + +``GET //-/execute-write`` displays a form for executing writable SQL. A ``?sql=`` query string pre-populates the form without executing it. + +``POST //-/execute-write`` executes writable SQL. This requires ``execute-write-sql`` for the database plus the relevant table-level write permissions. + .. _QueryDefinitionView: Getting a saved query definition diff --git a/tests/test_queries.py b/tests/test_queries.py index c6685d6c..05bc5ee1 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -212,6 +212,7 @@ async def test_query_actions_are_registered(): ds = Datasette() await ds.invoke_startup() + assert ds.get_action("execute-write-sql").resource_class is DatabaseResource assert ds.get_action("insert-query").resource_class is DatabaseResource assert ds.get_action("publish-query").resource_class is DatabaseResource assert ds.get_action("update-query").resource_class is QueryResource @@ -492,6 +493,127 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): assert "/data/-/queries/-/create?sql=select+%2A+from+dogs" in query_response.text +@pytest.mark.asyncio +async def test_execute_write_get_prepopulates_without_executing(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_get", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + response = await ds.client.get( + "/data/-/execute-write?sql=insert+into+dogs+(name)+values+('Cleo')", + actor={"id": "root"}, + ) + + assert response.status_code == 200 + assert response.headers["content-security-policy"] == "frame-ancestors 'none'" + assert response.headers["x-frame-options"] == "DENY" + assert "Execute write SQL" in response.text + assert 'action="/data/-/execute-write"' in response.text + assert "insert into dogs (name) values ('Cleo')" in response.text + assert (await db.execute("select count(*) from dogs")).first()[0] == 0 + + +@pytest.mark.asyncio +async def test_execute_write_post_requires_database_and_table_permissions(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + db = ds.add_memory_database("execute_write_permissions", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + no_database_permission = await ds.client.post( + "/data/-/execute-write", + actor={"id": "outsider"}, + json={ + "sql": "insert into dogs (name) values (:name)", + "params": {"name": "Cleo"}, + }, + ) + no_table_permission = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": "insert into dogs (name) values (:name)", + "params": {"name": "Cleo"}, + }, + ) + + assert no_database_permission.status_code == 403 + assert no_database_permission.json()["errors"] == [ + "Permission denied: need execute-write-sql" + ] + assert no_table_permission.status_code == 403 + assert no_table_permission.json()["errors"] == [ + "Permission denied: need insert-row on data/dogs" + ] + + ds.config = { + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "dogs": { + "permissions": { + "insert-row": {"id": "writer"}, + } + } + }, + } + } + } + allowed = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": "insert into dogs (name) values (:name)", + "params": {"name": "Cleo"}, + }, + ) + + assert allowed.status_code == 200 + assert allowed.json()["ok"] is True + assert allowed.json()["rowcount"] == 1 + assert allowed.json()["analysis"][0]["operation"] == "insert" + assert (await db.execute("select name from dogs")).first()[0] == "Cleo" + + +@pytest.mark.asyncio +async def test_execute_write_post_rejects_read_only_sql(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_read_only", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "root"}, + json={"sql": "select * from dogs"}, + ) + + assert response.status_code == 400 + assert response.json()["errors"] == [ + "Use /-/query for read-only SQL; this endpoint only executes writes" + ] + + @pytest.mark.asyncio async def test_query_owner_gets_update_delete_and_writable_view_defaults(): ds = Datasette(memory=True, default_deny=True) From b7505a9fc22fd96f0c6aad60c8b149bc1978d7b0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 08:49:18 -0700 Subject: [PATCH 218/299] Add execute write SQL database action Refs #2735 --- datasette/default_database_actions.py | 22 +++++++++++++++++ datasette/plugins.py | 1 + tests/test_queries.py | 34 +++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100644 datasette/default_database_actions.py diff --git a/datasette/default_database_actions.py b/datasette/default_database_actions.py new file mode 100644 index 00000000..78055392 --- /dev/null +++ b/datasette/default_database_actions.py @@ -0,0 +1,22 @@ +from datasette import hookimpl +from datasette.resources import DatabaseResource + + +@hookimpl +def database_actions(datasette, actor, database, request): + async def inner(): + if not await datasette.allowed( + action="execute-write-sql", + resource=DatabaseResource(database), + actor=actor, + ): + return [] + return [ + { + "href": datasette.urls.database(database) + "/-/execute-write", + "label": "Execute write SQL", + "description": "Run writable SQL with table permission checks.", + } + ] + + return inner diff --git a/datasette/plugins.py b/datasette/plugins.py index f532ac60..5a31cdad 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -30,6 +30,7 @@ DEFAULT_PLUGINS = ( "datasette.blob_renderer", "datasette.default_debug_menu", "datasette.default_jump_items", + "datasette.default_database_actions", "datasette.handle_exception", "datasette.forbidden", "datasette.events", diff --git a/tests/test_queries.py b/tests/test_queries.py index 05bc5ee1..1c9175cc 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -515,6 +515,40 @@ async def test_execute_write_get_prepopulates_without_executing(): assert (await db.execute("select count(*) from dogs")).first()[0] == 0 +@pytest.mark.asyncio +async def test_database_action_menu_links_to_execute_write_for_permitted_actor(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": { + "id": ["writer", "viewer"], + }, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("execute_write_menu", name="data") + await ds.invoke_startup() + + anonymous_response = await ds.client.get("/data") + viewer_response = await ds.client.get("/data", actor={"id": "viewer"}) + writer_response = await ds.client.get("/data", actor={"id": "writer"}) + + assert anonymous_response.status_code == 403 + assert viewer_response.status_code == 200 + assert "Execute write SQL" not in viewer_response.text + assert writer_response.status_code == 200 + assert "Database actions" in writer_response.text + assert 'href="/data/-/execute-write"' in writer_response.text + assert "Execute write SQL" in writer_response.text + + @pytest.mark.asyncio async def test_execute_write_post_requires_database_and_table_permissions(): ds = Datasette( From e0d39ba69f677be1af1cf580beb83dbc56c8ef87 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 09:41:32 -0700 Subject: [PATCH 219/299] Store query options as JSON Refs #2735 --- datasette/app.py | 105 ++++++++++++++++++++++++--------- datasette/utils/internal_db.py | 8 +-- docs/internals.rst | 20 +++++++ queries-plan.md | 19 +++--- tests/test_queries.py | 45 +++++++++++--- 5 files changed, 143 insertions(+), 54 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 409aed23..023568dd 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -283,6 +283,16 @@ FAVICON_PATH = app_root / "datasette" / "static" / "favicon.png" DEFAULT_NOT_SET = object() UNCHANGED = object() +QUERY_OPTION_FIELDS = ( + "hide_sql", + "fragment", + "on_success_message", + "on_success_message_sql", + "on_success_redirect", + "on_error_message", + "on_error_redirect", +) + ResourcesSQL = collections.namedtuple("ResourcesSQL", ("sql", "params")) @@ -1056,6 +1066,7 @@ class Datasette: if row is None: return None parameters = json.loads(row["parameters"] or "[]") + options = json.loads(row["options"] or "{}") is_write = bool(row["is_write"]) return { "database": row["database_name"], @@ -1064,8 +1075,8 @@ class Datasette: "title": row["title"], "description": row["description"], "description_html": row["description_html"], - "hide_sql": bool(row["hide_sql"]), - "fragment": row["fragment"], + "hide_sql": bool(options.get("hide_sql")), + "fragment": options.get("fragment"), "params": parameters, "parameters": parameters, "is_write": is_write, @@ -1073,13 +1084,25 @@ class Datasette: "published": bool(row["published"]), "source": row["source"], "owner_id": row["owner_id"], - "on_success_message": row["on_success_message"], - "on_success_message_sql": row["on_success_message_sql"], - "on_success_redirect": row["on_success_redirect"], - "on_error_message": row["on_error_message"], - "on_error_redirect": row["on_error_redirect"], + "on_success_message": options.get("on_success_message"), + "on_success_message_sql": options.get("on_success_message_sql"), + "on_success_redirect": options.get("on_success_redirect"), + "on_error_message": options.get("on_error_message"), + "on_error_redirect": options.get("on_error_redirect"), } + @staticmethod + def _query_options_json(options): + options_dict = {} + for field in QUERY_OPTION_FIELDS: + value = options.get(field) + if field == "hide_sql": + if value: + options_dict[field] = True + elif value is not None: + options_dict[field] = value + return json.dumps(options_dict, sort_keys=True) + async def add_query( self, database, @@ -1104,13 +1127,22 @@ class Datasette: replace=True, ): parameters_json = json.dumps(list(parameters or [])) + options_json = self._query_options_json( + { + "hide_sql": hide_sql, + "fragment": fragment, + "on_success_message": on_success_message, + "on_success_message_sql": on_success_message_sql, + "on_success_redirect": on_success_redirect, + "on_error_message": on_error_message, + "on_error_redirect": on_error_redirect, + } + ) sql_statement = """ INSERT INTO queries ( database_name, name, sql, title, description, description_html, - hide_sql, fragment, parameters, is_write, published, source, - owner_id, on_success_message, on_success_message_sql, - on_success_redirect, on_error_message, on_error_redirect - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + options, parameters, is_write, published, source, owner_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ if replace: sql_statement += """ @@ -1119,18 +1151,12 @@ class Datasette: title = excluded.title, description = excluded.description, description_html = excluded.description_html, - hide_sql = excluded.hide_sql, - fragment = excluded.fragment, + options = excluded.options, parameters = excluded.parameters, is_write = excluded.is_write, published = excluded.published, source = excluded.source, owner_id = excluded.owner_id, - on_success_message = excluded.on_success_message, - on_success_message_sql = excluded.on_success_message_sql, - on_success_redirect = excluded.on_success_redirect, - on_error_message = excluded.on_error_message, - on_error_redirect = excluded.on_error_redirect, updated_at = CURRENT_TIMESTAMP """ await self.get_internal_database().execute_write( @@ -1142,18 +1168,12 @@ class Datasette: title, description, description_html, - int(bool(hide_sql)), - fragment, + options_json, parameters_json, int(bool(is_write)), int(bool(published)), source, owner_id, - on_success_message, - on_success_message_sql, - on_success_redirect, - on_error_message, - on_error_redirect, ], ) @@ -1184,13 +1204,15 @@ class Datasette: "title": title, "description": description, "description_html": description_html, - "hide_sql": hide_sql, - "fragment": fragment, "parameters": parameters, "is_write": is_write, "published": published, "source": source, "owner_id": owner_id, + } + option_fields = { + "hide_sql": hide_sql, + "fragment": fragment, "on_success_message": on_success_message, "on_success_message_sql": on_success_message_sql, "on_success_redirect": on_success_redirect, @@ -1202,12 +1224,39 @@ class Datasette: for field, value in fields.items(): if value is UNCHANGED: continue - if field in {"hide_sql", "is_write", "published"}: + if field in {"is_write", "published"}: value = int(bool(value)) elif field == "parameters": value = json.dumps(list(value or [])) updates.append(f"{field} = ?") params.append(value) + changed_options = { + field: value + for field, value in option_fields.items() + if value is not UNCHANGED + } + if changed_options: + rows = await self.get_internal_database().execute( + """ + SELECT options FROM queries + WHERE database_name = ? AND name = ? + """, + [database, name], + ) + row = rows.first() + options = json.loads(row["options"] or "{}") if row is not None else {} + for field, value in changed_options.items(): + if field == "hide_sql": + if value: + options[field] = True + else: + options.pop(field, None) + elif value is None: + options.pop(field, None) + else: + options[field] = value + updates.append("options = ?") + params.append(json.dumps(options, sort_keys=True)) if not updates: return updates.append("updated_at = CURRENT_TIMESTAMP") diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index 9008c083..854e8784 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -120,18 +120,12 @@ async def initialize_metadata_tables(db): title TEXT, description TEXT, description_html TEXT, - hide_sql INTEGER NOT NULL DEFAULT 0 CHECK (hide_sql IN (0, 1)), - fragment TEXT, + options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), published INTEGER NOT NULL DEFAULT 0 CHECK (published IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, - on_success_message TEXT, - on_success_message_sql TEXT, - on_success_redirect TEXT, - on_error_message TEXT, - on_error_redirect TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (database_name, name), diff --git a/docs/internals.rst b/docs/internals.rst index e0123a7b..a0845ade 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -2148,6 +2148,26 @@ The internal database schema is as follows: config TEXT, PRIMARY KEY (database_name, resource_name, column_name) ); + CREATE TABLE queries ( + database_name TEXT NOT NULL, + name TEXT NOT NULL, + sql TEXT NOT NULL, + title TEXT, + description TEXT, + description_html TEXT, + options TEXT NOT NULL DEFAULT '{}', + parameters TEXT NOT NULL DEFAULT '[]', + is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), + published INTEGER NOT NULL DEFAULT 0 CHECK (published IN (0, 1)), + source TEXT NOT NULL DEFAULT 'user', + owner_id TEXT, + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (database_name, name), + CHECK (is_write = 0 OR published = 0) + ); + CREATE INDEX queries_owner_idx + ON queries(owner_id); .. [[[end]]] diff --git a/queries-plan.md b/queries-plan.md index 283ca866..dbc46101 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -42,18 +42,12 @@ CREATE TABLE IF NOT EXISTS queries ( title TEXT, description TEXT, description_html TEXT, - hide_sql INTEGER NOT NULL DEFAULT 0 CHECK (hide_sql IN (0, 1)), - fragment TEXT, + options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), published INTEGER NOT NULL DEFAULT 0 CHECK (published IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, - on_success_message TEXT, - on_success_message_sql TEXT, - on_success_redirect TEXT, - on_error_message TEXT, - on_error_redirect TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (database_name, name), @@ -67,9 +61,10 @@ CREATE INDEX IF NOT EXISTS queries_owner_idx Column notes: - `database_name`, `name`, and `sql` are the routing and execution core. -- Display fields become columns: `title`, `description`, `description_html`, `hide_sql`, and `fragment`. +- Display fields become columns: `title`, `description`, and `description_html`. +- Less common presentation and writable-query behavior lives in `options`, stored as a JSON object. That covers `hide_sql`, `fragment`, `on_success_message`, `on_success_message_sql`, `on_success_redirect`, `on_error_message`, and `on_error_redirect`. - `parameters` is a JSON array of parameter names, stored as text. This preserves explicit parameter order, but does not support labels or default values. -- Existing writable query behavior gets columns too: `is_write`, success/error messages, success/error redirects, and `on_success_message_sql`. +- Existing writable query behavior gets `is_write` as a column. Success/error messages, success/error redirects, and `on_success_message_sql` are stored in `options`. - `published` only applies to read-only queries. A writable query can still be public through explicit `view-query` permissions, but the "publish for users without execute-sql" shortcut should be read-only. - `source` distinguishes `user`, `config`, and `plugin` rows. - `owner_id` is the actor id for user-created rows. It is `NULL` for config/plugin rows. @@ -372,11 +367,11 @@ await datasette.update_query( ) ``` -That call should set `on_success_redirect` to SQL `NULL`; omitting `on_success_redirect` should leave the existing value unchanged. +For column-backed fields, `None` should write SQL `NULL`. For option fields, `None` should remove that key from the JSON object so `get_query()` returns `None`; omitting the field should leave the existing option unchanged. Implementation detail: build the `UPDATE` statement dynamically from fields whose value is not `UNCHANGED`, validate non-nullable fields before writing, and update `updated_at` whenever at least one field changes. -The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `published`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. +The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `published`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. Option values should be unpacked from the `options` JSON object and returned as the same top-level keys accepted by `add_query()` and `update_query()`. ## Query page save UI @@ -430,7 +425,7 @@ The existing edit-SQL flow from query pages can continue to point back to arbitr - Query update uses `POST /{database}/{query}/-/update` with an `{"update": {...}}` body. - Query delete uses `POST /{database}/{query}/-/delete`. - There are no `PATCH` or HTTP `DELETE` routes for query management. -- `datasette.update_query(..., field=None)` writes `NULL`, while omitted fields are left unchanged. +- `datasette.update_query(..., field=None)` writes `NULL` for column-backed fields and removes JSON keys for option fields, while omitted fields are left unchanged. - Owner gets default `update-query` and `delete-query` for their own user-created rows. - Admin can manage other users' queries with `update-query` and `delete-query`. - User API rejects magic parameters. diff --git a/tests/test_queries.py b/tests/test_queries.py index 1c9175cc..edb9484a 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1,3 +1,5 @@ +import json + import pytest from datasette.app import Datasette @@ -25,18 +27,12 @@ async def test_queries_internal_table_schema(): "title", "description", "description_html", - "hide_sql", - "fragment", + "options", "parameters", "is_write", "published", "source", "owner_id", - "on_success_message", - "on_success_message_sql", - "on_success_redirect", - "on_error_message", - "on_error_redirect", "created_at", "updated_at", ] @@ -62,6 +58,20 @@ async def test_add_get_and_remove_query(): owner_id="alice", ) + options_row = ( + await ds.get_internal_database().execute( + """ + SELECT options FROM queries + WHERE database_name = ? AND name = ? + """, + ["data", "top_customers"], + ) + ).first() + assert json.loads(options_row["options"]) == { + "fragment": "chart", + "hide_sql": True, + } + query = await ds.get_query("data", "top_customers") assert query == { "database": "data", @@ -108,6 +118,17 @@ async def test_update_query_only_updates_provided_fields(): parameters=["one"], ) + options_row = ( + await ds.get_internal_database().execute( + """ + SELECT options FROM queries + WHERE database_name = ? AND name = ? + """, + ["data", "redirect"], + ) + ).first() + assert json.loads(options_row["options"]) == {"on_success_redirect": "/original"} + await ds.update_query( "data", "redirect", @@ -123,6 +144,16 @@ async def test_update_query_only_updates_provided_fields(): assert query["on_success_redirect"] is None assert query["sql"] == "select 1" assert query["published"] is False + options_row = ( + await ds.get_internal_database().execute( + """ + SELECT options FROM queries + WHERE database_name = ? AND name = ? + """, + ["data", "redirect"], + ) + ).first() + assert json.loads(options_row["options"]) == {} @pytest.mark.asyncio From e62a5ea3378095832b0388ac5c6014c23127a577 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 09:46:39 -0700 Subject: [PATCH 220/299] Rename query publication flag Refs #2735 --- datasette/app.py | 18 ++++----- datasette/default_permissions/defaults.py | 4 +- datasette/templates/query_create.html | 2 +- datasette/utils/internal_db.py | 4 +- datasette/views/database.py | 26 ++++++------- docs/internals.rst | 4 +- queries-plan.md | 46 +++++++++++------------ tests/test_queries.py | 22 +++++------ 8 files changed, 63 insertions(+), 63 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 023568dd..40877802 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -615,7 +615,7 @@ class Datasette: fragment=query_config.get("fragment"), parameters=query_config.get("params"), is_write=bool(query_config.get("write")), - published=bool(query_config.get("published")), + is_published=bool(query_config.get("is_published")), source="config", on_success_message=query_config.get("on_success_message"), on_success_message_sql=query_config.get("on_success_message_sql"), @@ -1081,7 +1081,7 @@ class Datasette: "parameters": parameters, "is_write": is_write, "write": is_write, - "published": bool(row["published"]), + "is_published": bool(row["is_published"]), "source": row["source"], "owner_id": row["owner_id"], "on_success_message": options.get("on_success_message"), @@ -1116,7 +1116,7 @@ class Datasette: fragment=None, parameters=None, is_write=False, - published=False, + is_published=False, source="plugin", owner_id=None, on_success_message=None, @@ -1141,7 +1141,7 @@ class Datasette: sql_statement = """ INSERT INTO queries ( database_name, name, sql, title, description, description_html, - options, parameters, is_write, published, source, owner_id + options, parameters, is_write, is_published, source, owner_id ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ if replace: @@ -1154,7 +1154,7 @@ class Datasette: options = excluded.options, parameters = excluded.parameters, is_write = excluded.is_write, - published = excluded.published, + is_published = excluded.is_published, source = excluded.source, owner_id = excluded.owner_id, updated_at = CURRENT_TIMESTAMP @@ -1171,7 +1171,7 @@ class Datasette: options_json, parameters_json, int(bool(is_write)), - int(bool(published)), + int(bool(is_published)), source, owner_id, ], @@ -1190,7 +1190,7 @@ class Datasette: fragment=UNCHANGED, parameters=UNCHANGED, is_write=UNCHANGED, - published=UNCHANGED, + is_published=UNCHANGED, source=UNCHANGED, owner_id=UNCHANGED, on_success_message=UNCHANGED, @@ -1206,7 +1206,7 @@ class Datasette: "description_html": description_html, "parameters": parameters, "is_write": is_write, - "published": published, + "is_published": is_published, "source": source, "owner_id": owner_id, } @@ -1224,7 +1224,7 @@ class Datasette: for field, value in fields.items(): if value is UNCHANGED: continue - if field in {"is_write", "published"}: + if field in {"is_write", "is_published"}: value = int(bool(value)) elif field == "parameters": value = json.dumps(list(value or [])) diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index 9737de96..58deea01 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -136,7 +136,7 @@ async def default_query_permissions_sql( 'published query' AS reason FROM queries WHERE is_write = 0 - AND published = 1 + AND is_published = 1 UNION ALL SELECT q.database_name AS parent, q.name AS child, 1 AS allow, 'execute-sql allows query' AS reason @@ -145,7 +145,7 @@ async def default_query_permissions_sql( ON es.parent = q.database_name AND es.child IS NULL WHERE q.is_write = 0 - AND q.published = 0 + AND q.is_published = 0 {trusted_writable_sql} {user_writable_sql} """, diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index 1b3d30a8..fb2599d2 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -28,7 +28,7 @@

      {% if can_publish %} -

      +

      {% endif %} {% if sql and analysis_is_write %}

      Execute write SQL

      diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index 854e8784..0f84e886 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -123,13 +123,13 @@ async def initialize_metadata_tables(db): options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - published INTEGER NOT NULL DEFAULT 0 CHECK (published IN (0, 1)), + is_published INTEGER NOT NULL DEFAULT 0 CHECK (is_published IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (database_name, name), - CHECK (is_write = 0 OR published = 0) + CHECK (is_write = 0 OR is_published = 0) ); CREATE INDEX IF NOT EXISTS queries_owner_idx diff --git a/datasette/views/database.py b/datasette/views/database.py index a90d889e..ed38189b 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -431,7 +431,7 @@ _query_fields = { "fragment", "parameters", "params", - "published", + "is_published", "on_success_message", "on_success_message_sql", "on_success_redirect", @@ -549,7 +549,7 @@ async def _check_query_name(db, name, *, existing=False): raise QueryValidationError("Query name conflicts with a table or view") -async def _analyze_user_query(datasette, db, sql, *, actor, published): +async def _analyze_user_query(datasette, db, sql, *, actor, is_published): if not sql or not isinstance(sql, str): raise QueryValidationError("SQL is required") derived = _derived_query_parameters(sql) @@ -561,7 +561,7 @@ async def _analyze_user_query(datasette, db, sql, *, actor, published): is_write = _analysis_is_write(analysis) if is_write: - if published: + if is_published: raise QueryValidationError("Writable queries cannot be published") try: await datasette.ensure_query_write_permissions( @@ -660,7 +660,7 @@ async def _prepare_execute_write(datasette, db, sql, params, actor): def _apply_query_data_types(data): typed = dict(data) - for key in ("hide_sql", "published"): + for key in ("hide_sql", "is_published"): if key in typed: typed[key] = _as_bool(typed[key]) return typed @@ -677,15 +677,15 @@ async def _prepare_query_create(datasette, request, db, data): if await datasette.get_query(db.name, name) is not None: raise QueryValidationError("Query already exists") - published = _as_bool(data.get("published")) + is_published = _as_bool(data.get("is_published")) is_write, derived, analysis = await _analyze_user_query( datasette, db, data.get("sql"), actor=request.actor, - published=published, + is_published=is_published, ) - if published and not await datasette.allowed( + if is_published and not await datasette.allowed( action="publish-query", resource=DatabaseResource(db.name), actor=request.actor, @@ -708,7 +708,7 @@ async def _prepare_query_create(datasette, request, db, data): "fragment": data.get("fragment"), "parameters": parameters, "is_write": is_write, - "published": published, + "is_published": is_published, "source": "user", "owner_id": _actor_id(request.actor), "on_success_message": data.get("on_success_message"), @@ -727,7 +727,7 @@ async def _prepare_query_update(datasette, request, db, existing, update): update = _apply_query_data_types(update) sql = update.get("sql", existing["sql"]) - published = update.get("published", existing["published"]) + is_published = update.get("is_published", existing["is_published"]) query_is_write = existing["is_write"] derived = _derived_query_parameters(sql) parameters = None @@ -738,11 +738,11 @@ async def _prepare_query_update(datasette, request, db, existing, update): db, sql, actor=request.actor, - published=published, + is_published=is_published, ) - elif published and query_is_write: + elif is_published and query_is_write: raise QueryValidationError("Writable queries cannot be published") - if published and not existing["published"]: + if is_published and not existing["is_published"]: if not await datasette.allowed( action="publish-query", resource=DatabaseResource(db.name), @@ -772,7 +772,7 @@ async def _prepare_query_update(datasette, request, db, existing, update): "fragment": update.get("fragment"), "parameters": parameters, "is_write": query_is_write, - "published": published, + "is_published": is_published, "on_success_message": update.get("on_success_message"), "on_success_message_sql": update.get("on_success_message_sql"), "on_success_redirect": update.get("on_success_redirect"), diff --git a/docs/internals.rst b/docs/internals.rst index a0845ade..892cf64c 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -2158,13 +2158,13 @@ The internal database schema is as follows: options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - published INTEGER NOT NULL DEFAULT 0 CHECK (published IN (0, 1)), + is_published INTEGER NOT NULL DEFAULT 0 CHECK (is_published IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (database_name, name), - CHECK (is_write = 0 OR published = 0) + CHECK (is_write = 0 OR is_published = 0) ); CREATE INDEX queries_owner_idx ON queries(owner_id); diff --git a/queries-plan.md b/queries-plan.md index dbc46101..0fbddecd 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -13,7 +13,7 @@ Terminology change: these are now "queries", not "canned queries". Legacy code a - Internal table name: `queries`. - Query definitions should use real columns, not a JSON blob for all options. - Query parameter names live in a `parameters` text column as a JSON array. No default values for parameters in this pass. -- No `queries_database_published_idx` index. +- No `queries_database_is_published_idx` index. - User-created queries require `execute-sql` and `insert-query` on the database. Writable queries additionally require matching table write permissions discovered by `Database.analyze_sql()`. - `publish-query` is the permission for creating or updating a query so users without `execute-sql` can execute it. - Add `update-query` and `delete-query`, so administrators can manage queries created by other users. @@ -45,13 +45,13 @@ CREATE TABLE IF NOT EXISTS queries ( options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - published INTEGER NOT NULL DEFAULT 0 CHECK (published IN (0, 1)), + is_published INTEGER NOT NULL DEFAULT 0 CHECK (is_published IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (database_name, name), - CHECK (is_write = 0 OR published = 0) + CHECK (is_write = 0 OR is_published = 0) ); CREATE INDEX IF NOT EXISTS queries_owner_idx @@ -65,11 +65,11 @@ Column notes: - Less common presentation and writable-query behavior lives in `options`, stored as a JSON object. That covers `hide_sql`, `fragment`, `on_success_message`, `on_success_message_sql`, `on_success_redirect`, `on_error_message`, and `on_error_redirect`. - `parameters` is a JSON array of parameter names, stored as text. This preserves explicit parameter order, but does not support labels or default values. - Existing writable query behavior gets `is_write` as a column. Success/error messages, success/error redirects, and `on_success_message_sql` are stored in `options`. -- `published` only applies to read-only queries. A writable query can still be public through explicit `view-query` permissions, but the "publish for users without execute-sql" shortcut should be read-only. +- `is_published` only applies to read-only queries. A writable query can still be public through explicit `view-query` permissions, but the "publish for users without execute-sql" shortcut should be read-only. - `source` distinguishes `user`, `config`, and `plugin` rows. - `owner_id` is the actor id for user-created rows. It is `NULL` for config/plugin rows. -No separate index is needed on `(database_name, name)` because the primary key already creates one. Do not add a `queries_database_published_idx` index for now. +No separate index is needed on `(database_name, name)` because the primary key already creates one. Do not add a `queries_database_is_published_idx` index for now. `QueryResource.resources_sql()` can become: @@ -115,7 +115,7 @@ User-created query creation requires: - `insert-query` on `DatabaseResource(database)` - If analysis shows the query is writable, the table-level write permissions described in the writable query section. -Setting `published=1` requires: +Setting `is_published=1` requires: - `publish-query` on `DatabaseResource(database)` - The query must be read-only according to `Database.analyze_sql()`. @@ -125,7 +125,7 @@ Updating an existing query requires: - `update-query` on `QueryResource(database, query)` or default owner permission for a user-owned row. - If the SQL changes, also require `execute-sql` on the database. - If the changed SQL is writable, also require the table-level write permissions described in the writable query section. -- If `published` changes from `0` to `1`, also require `publish-query` on the database. +- If `is_published` changes from `0` to `1`, also require `publish-query` on the database. Deleting an existing query requires: @@ -140,12 +140,12 @@ Default owner permissions: Default execution rule for read-only queries: -- If `published=0`, the actor needs `execute-sql` on the database. -- If `published=1`, the actor can execute the query without `execute-sql`. +- If `is_published=0`, the actor needs `execute-sql` on the database. +- If `is_published=1`, the actor can execute the query without `execute-sql`. Default execution rule for user-created writable queries: -- `published` must be `0`. +- `is_published` must be `0`. - The actor must have `view-query`. - The actor must currently have every write permission required by fresh `Database.analyze_sql()` results for the query SQL. @@ -153,8 +153,8 @@ Implementation: - Remove `view-query` from the broad `DEFAULT_ALLOW_ACTIONS` set. - Replace it with query-aware default `view-query` permission SQL. -- For `published=1 AND is_write=0`, emit a child-level `view-query` allow. -- For `published=0 AND is_write=0`, emit child-level `view-query` allows for queries whose parent database is in the actor's `execute-sql` allowed resources. +- For `is_published=1 AND is_write=0`, emit a child-level `view-query` allow. +- For `is_published=0 AND is_write=0`, emit child-level `view-query` allows for queries whose parent database is in the actor's `execute-sql` allowed resources. - For `is_write=1 AND source='user'`, emit `view-query` only for the owner or actors with explicit `view-query` permission, then have `QueryView` perform the fresh analysis/table-permission check before execution. - For trusted writable queries, preserve current behavior by emitting child-level `view-query` allows for `is_write=1 AND source IN ('config', 'plugin')` when Datasette is not running with `--default-deny`. @@ -181,7 +181,7 @@ Validation flow for user-created queries: 1. Derive named parameters from the SQL and pass harmless placeholder values into `db.analyze_sql()` so SQLite can prepare statements with bindings. 2. If analysis raises a SQLite error, reject the query. 3. If every table access is `read`, treat the query as read-only and require `execute-sql` plus `insert-query`/`update-query` as described above. -4. If any table access is `insert`, `update`, or `delete`, treat the query as writable and force `published=0`. +4. If any table access is `insert`, `update`, or `delete`, treat the query as writable and force `is_published=0`. 5. Reject writable user-created queries that access a database other than the database they are being saved against, until `analyze_sql()` can reliably map attached SQLite schemas back to Datasette database names. 6. For every write access returned by analysis, require the corresponding permission on `TableResource(access.database, access.table)`: - `insert` -> `insert-row` @@ -201,7 +201,7 @@ Fail closed cases for user-created writable queries: - Analysis reports any write operation that cannot be mapped to a Datasette table resource. - Analysis reports writes outside the target database. - The actor lacks any required table write permission. -- `published=1` is requested. +- `is_published=1` is requested. This gives us writable user-created queries without letting `execute-sql` alone become a path to create arbitrary write endpoints. @@ -226,7 +226,7 @@ Create request: "sql": "select * from customers order by revenue desc limit 20", "title": "Top customers", "description": "Highest revenue customers", - "published": false, + "is_published": false, "parameters": ["region"] } } @@ -243,7 +243,7 @@ Successful create returns `201` and the created query definition: "sql": "select * from customers order by revenue desc limit 20", "title": "Top customers", "description": "Highest revenue customers", - "published": false, + "is_published": false, "parameters": ["region"] } } @@ -255,7 +255,7 @@ Update request, imitating `RowUpdateView`: { "update": { "title": "Top customers by revenue", - "published": true + "is_published": true }, "return": true } @@ -271,7 +271,7 @@ Successful update returns `{"ok": true}` by default. With `"return": true`, retu "name": "top_customers", "sql": "select * from customers order by revenue desc limit 20", "title": "Top customers by revenue", - "published": true + "is_published": true } } ``` @@ -318,7 +318,7 @@ await datasette.add_query( fragment=None, parameters=None, is_write=False, - published=False, + is_published=False, source="plugin", owner_id=None, on_success_message=None, @@ -341,7 +341,7 @@ await datasette.update_query( fragment=UNCHANGED, parameters=UNCHANGED, is_write=UNCHANGED, - published=UNCHANGED, + is_published=UNCHANGED, source=UNCHANGED, owner_id=UNCHANGED, on_success_message=UNCHANGED, @@ -371,13 +371,13 @@ For column-backed fields, `None` should write SQL `NULL`. For option fields, `No Implementation detail: build the `UPDATE` statement dynamically from fields whose value is not `UNCHANGED`, validate non-nullable fields before writing, and update `updated_at` whenever at least one field changes. -The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `published`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. Option values should be unpacked from the `options` JSON object and returned as the same top-level keys accepted by `add_query()` and `update_query()`. +The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `is_published`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. Option values should be unpacked from the `options` JSON object and returned as the same top-level keys accepted by `add_query()` and `update_query()`. ## Query page save UI On `/{database}/-/query`, if the actor has both `execute-sql` and `insert-query`, show a save control for valid read-only SQL. That page already executes read-only arbitrary SQL, so the first UI can stay read-only even though the JSON API can accept writable SQL after `Database.analyze_sql()` validation. -The save form should call `POST /{database}/-/queries/-/insert` and default to `published=false`. +The save form should call `POST /{database}/-/queries/-/insert` and default to `is_published=false`. If the actor also has `publish-query`, include a publish control. The UI copy should make it clear that publishing allows people without arbitrary SQL permission to run this query. @@ -416,7 +416,7 @@ The existing edit-SQL flow from query pages can continue to point back to arbitr - `view-query` is no longer globally default-allowed; default query permissions come from the query-aware hook. - Unpublished read-only query requires `execute-sql` to execute. - Published read-only query can be executed without `execute-sql`. -- Setting `published=true` requires `publish-query`. +- Setting `is_published=true` requires `publish-query`. - User-created query requires both `execute-sql` and `insert-query`. - User-created writable query creation uses `Database.analyze_sql()` and requires matching `insert-row`, `update-row`, and/or `delete-row` permissions for every reported write access. - `/{database}/-/queries/-/create` provides the writable-query authoring UI with an analysis panel and disabled save until all required write permissions pass. diff --git a/tests/test_queries.py b/tests/test_queries.py index edb9484a..df4131b9 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -30,7 +30,7 @@ async def test_queries_internal_table_schema(): "options", "parameters", "is_write", - "published", + "is_published", "source", "owner_id", "created_at", @@ -53,7 +53,7 @@ async def test_add_get_and_remove_query(): hide_sql=True, fragment="chart", parameters=["region"], - published=True, + is_published=True, source="user", owner_id="alice", ) @@ -86,7 +86,7 @@ async def test_add_get_and_remove_query(): "parameters": ["region"], "is_write": False, "write": False, - "published": True, + "is_published": True, "source": "user", "owner_id": "alice", "on_success_message": None, @@ -143,7 +143,7 @@ async def test_update_query_only_updates_provided_fields(): assert query["params"] == [] assert query["on_success_redirect"] is None assert query["sql"] == "select 1" - assert query["published"] is False + assert query["is_published"] is False options_row = ( await ds.get_internal_database().execute( """ @@ -190,7 +190,7 @@ async def test_config_queries_imported_to_internal_table(): "parameters": ["name"], "is_write": False, "write": False, - "published": False, + "is_published": False, "source": "config", "owner_id": None, "on_success_message": None, @@ -218,8 +218,8 @@ async def test_unpublished_query_requires_execute_sql_but_published_does_not(): ds = Datasette(memory=True, settings={"default_allow_sql": False}) ds.add_memory_database("query_permissions", name="data") await ds.invoke_startup() - await ds.add_query("data", "unpublished", "select 1", published=False) - await ds.add_query("data", "published", "select 1", published=True) + await ds.add_query("data", "unpublished", "select 1", is_published=False) + await ds.add_query("data", "published", "select 1", is_published=True) assert not await ds.allowed( action="execute-sql", @@ -347,7 +347,7 @@ async def test_query_list_and_definition_api(): ds.root_enabled = True ds.add_memory_database("query_list_api", name="data") await ds.invoke_startup() - await ds.add_query("data", "listed", "select 1", title="Listed", published=True) + await ds.add_query("data", "listed", "select 1", title="Listed", is_published=True) list_response = await ds.client.get( "/data/-/queries", @@ -387,7 +387,7 @@ async def test_query_insert_api_publish_requires_publish_query(): response = await ds.client.post( "/data/-/queries/-/insert", actor={"id": "writer"}, - json={"query": {"name": "public", "sql": "select 1", "published": True}}, + json={"query": {"name": "public", "sql": "select 1", "is_published": True}}, ) assert response.status_code == 403 @@ -416,7 +416,7 @@ async def test_query_insert_api_creates_writable_query(): assert response.status_code == 201 query = response.json()["query"] assert query["is_write"] is True - assert query["published"] is False + assert query["is_published"] is False assert query["parameters"] == ["name"] bad_response = await ds.client.post( @@ -426,7 +426,7 @@ async def test_query_insert_api_creates_writable_query(): "query": { "name": "published_insert", "sql": "insert into dogs (name) values (:name)", - "published": True, + "is_published": True, } }, ) From 2d07c3b99e654b54c604df4af601ebe27f52b017 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 09:47:12 -0700 Subject: [PATCH 221/299] Ran cog --- datasette/utils/internal_db.py | 3 +-- docs/plugins.rst | 9 +++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index 0f84e886..9c693b0a 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -128,8 +128,7 @@ async def initialize_metadata_tables(db): owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (database_name, name), - CHECK (is_write = 0 OR is_published = 0) + PRIMARY KEY (database_name, name) ); CREATE INDEX IF NOT EXISTS queries_owner_idx diff --git a/docs/plugins.rst b/docs/plugins.rst index 8fa49d6d..d578e9e2 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -216,6 +216,15 @@ If you run ``datasette plugins --all`` it will include default plugins that ship "register_column_types" ] }, + { + "name": "datasette.default_database_actions", + "static": false, + "templates": false, + "version": null, + "hooks": [ + "database_actions" + ] + }, { "name": "datasette.default_debug_menu", "static": false, From 539ff9ddfcdec0283758138987ddb362485e6ad7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 09:49:21 -0700 Subject: [PATCH 222/299] Drop query publication check from docs Refs #2735 --- docs/internals.rst | 3 +-- queries-plan.md | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 892cf64c..b5da7cbf 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -2163,8 +2163,7 @@ The internal database schema is as follows: owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (database_name, name), - CHECK (is_write = 0 OR is_published = 0) + PRIMARY KEY (database_name, name) ); CREATE INDEX queries_owner_idx ON queries(owner_id); diff --git a/queries-plan.md b/queries-plan.md index 0fbddecd..a58ace70 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -50,8 +50,7 @@ CREATE TABLE IF NOT EXISTS queries ( owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (database_name, name), - CHECK (is_write = 0 OR is_published = 0) + PRIMARY KEY (database_name, name) ); CREATE INDEX IF NOT EXISTS queries_owner_idx From 4a70b893559897034625bd797c8fccc80116844a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 10:11:46 -0700 Subject: [PATCH 223/299] Add cursor-paginated query browser Refs #2735 --- datasette/app.py | 129 +++++++++++++++++++++++++--- datasette/templates/database.html | 3 + datasette/templates/query_list.html | 55 ++++++++++++ datasette/views/database.py | 125 ++++++++++++++++++++------- docs/json_api.rst | 2 +- queries-plan.md | 18 +++- tests/test_queries.py | 107 +++++++++++++++++++++-- 7 files changed, 389 insertions(+), 50 deletions(-) create mode 100644 datasette/templates/query_list.html diff --git a/datasette/app.py b/datasette/app.py index 40877802..bdbf9389 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1288,16 +1288,122 @@ class Datasette: ) return self._query_row_to_dict(rows.first()) - async def get_queries(self, database): - rows = await self.get_internal_database().execute( - """ - SELECT * FROM queries - WHERE database_name = ? - ORDER BY name - """, - [database], + async def list_queries( + self, + database, + *, + actor=None, + limit=50, + cursor=None, + q=None, + is_write=None, + is_published=None, + source=None, + owner_id=None, + include_private=False, + ): + limit = min(max(1, int(limit)), 1000) + allowed_sql, allowed_params = await self.allowed_resources_sql( + action="view-query", + actor=actor, + parent=database, + include_is_private=include_private, ) - return {row["name"]: self._query_row_to_dict(row) for row in rows} + params = dict(allowed_params) + params.update({"query_database": database, "limit": limit + 1}) + sort_key_sql = "lower(coalesce(nullif(q.title, ''), q.name))" + where_clauses = ["q.database_name = :query_database"] + + if cursor: + try: + components = urlsafe_components(cursor) + except ValueError: + components = [] + if len(components) == 2: + where_clauses.append(""" + ( + {sort_key_sql} > :cursor_sort_key + OR ( + {sort_key_sql} = :cursor_sort_key + AND q.name > :cursor_name + ) + ) + """.format(sort_key_sql=sort_key_sql)) + params["cursor_sort_key"] = components[0] + params["cursor_name"] = components[1] + + if q: + where_clauses.append(""" + ( + q.name LIKE :query_search + OR q.title LIKE :query_search + OR q.description LIKE :query_search + OR q.sql LIKE :query_search + ) + """) + params["query_search"] = "%{}%".format(q) + if is_write is not None: + where_clauses.append("q.is_write = :query_is_write") + params["query_is_write"] = int(bool(is_write)) + if is_published is not None: + where_clauses.append("q.is_published = :query_is_published") + params["query_is_published"] = int(bool(is_published)) + if source is not None: + where_clauses.append("q.source = :query_source") + params["query_source"] = source + if owner_id is not None: + where_clauses.append("q.owner_id = :query_owner_id") + params["query_owner_id"] = owner_id + + private_select = ", allowed.is_private AS private" if include_private else "" + rows = list( + ( + await self.get_internal_database().execute( + """ + SELECT q.*, {sort_key_sql} AS sort_key{private_select} + FROM queries q + JOIN ( + {allowed_sql} + ) allowed + ON allowed.parent = q.database_name + AND allowed.child = q.name + WHERE {where} + ORDER BY sort_key, q.name + LIMIT :limit + """.format( + allowed_sql=allowed_sql, + private_select=private_select, + sort_key_sql=sort_key_sql, + where=" AND ".join(where_clauses), + ), + params, + ) + ).rows + ) + has_more = len(rows) > limit + if has_more: + rows = rows[:limit] + + queries = [] + for row in rows: + query = self._query_row_to_dict(row) + if include_private: + query["private"] = bool(row["private"]) + queries.append(query) + + next_token = None + if has_more and rows: + last_row = rows[-1] + next_token = "{},{}".format( + tilde_encode(last_row["sort_key"]), + tilde_encode(last_row["name"]), + ) + return { + "queries": queries, + "next": next_token, + "has_more": has_more, + "limit": limit, + } async def ensure_query_write_permissions( self, database, sql, *, actor=None, params=None, analysis=None @@ -1564,7 +1670,8 @@ class Datasette: return self.static_hash("app.css") async def get_canned_queries(self, database_name, actor): - return await self.get_queries(database_name) + page = await self.list_queries(database_name, actor=actor, limit=1000) + return {query["name"]: query for query in page["queries"]} async def get_canned_query(self, database_name, query_name, actor): return await self.get_query(database_name, query_name) @@ -2591,7 +2698,7 @@ class Datasette: add_route(TableCreateView.as_view(self), r"/(?P[^\/\.]+)/-/create$") add_route( QueryListView.as_view(self), - r"/(?P[^\/\.]+)/-/queries$", + r"/(?P[^\/\.]+)/-/queries(\.(?Pjson))?$", ) add_route( QueryCreateView.as_view(self), diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 42b4ca0b..a39d6ad7 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -53,6 +53,9 @@
    • {{ query.title or query.name }}{% if query.private %} 🔒{% endif %}
    • {% endfor %} + {% if queries_more %} +

      View all queries

      + {% endif %} {% endif %} {% if tables %} diff --git a/datasette/templates/query_list.html b/datasette/templates/query_list.html new file mode 100644 index 00000000..ef5da0d5 --- /dev/null +++ b/datasette/templates/query_list.html @@ -0,0 +1,55 @@ +{% extends "base.html" %} + +{% block title %}{{ database }}: queries{% endblock %} + +{% block body_class %}query-list db-{{ database|to_css_class }}{% endblock %} + +{% block crumbs %} +{{ crumbs.nav(request=request, database=database) }} +{% endblock %} + +{% block content %} + +

      Queries

      + +
      +

      + + + +

      +

      + + + + +

      +
      + +{% if queries %} +
        + {% for query in queries %} +
      • + {{ query.title or query.name }}{% if query.private %} 🔒{% endif %} + {% if query.is_write %}Writable{% endif %} + {% if query.is_published %}Published{% endif %} +
      • + {% endfor %} +
      +{% else %} +

      No queries found.

      +{% endif %} + +{% if next_url %} +

      Next page

      +{% endif %} + +{% endblock %} diff --git a/datasette/views/database.py b/datasette/views/database.py index ed38189b..edbc315e 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -92,24 +92,14 @@ class DatabaseView(View): tables = await get_tables(datasette, request, db, allowed_dict) - # Get allowed queries using the new permission system - allowed_query_page = await datasette.allowed_resources( - "view-query", - request.actor, - parent=database, - include_is_private=True, - limit=1000, + queries_page = await datasette.list_queries( + database, + actor=request.actor, + limit=20, + include_private=True, ) - - # Build canned_queries list by looking up each allowed query - all_queries = await datasette.get_canned_queries(database, request.actor) - canned_queries = [] - for query_resource in allowed_query_page.resources: - query_name = query_resource.child - if query_name in all_queries: - canned_queries.append( - dict(all_queries[query_name], private=query_resource.private) - ) + canned_queries = queries_page["queries"] + queries_more = queries_page["has_more"] async def database_actions(): links = [] @@ -141,6 +131,7 @@ class DatabaseView(View): "hidden_count": len([t for t in tables if t["hidden"]]), "views": sql_views, "queries": canned_queries, + "queries_more": queries_more, "allow_execute_sql": allow_execute_sql, "table_columns": ( await _table_columns(datasette, database) if allow_execute_sql else {} @@ -174,6 +165,7 @@ class DatabaseView(View): hidden_count=len([t for t in tables if t["hidden"]]), views=sql_views, queries=canned_queries, + queries_more=queries_more, allow_execute_sql=allow_execute_sql, table_columns=( await _table_columns(datasette, database) @@ -222,6 +214,9 @@ class DatabaseContext(Context): hidden_count: int = field(metadata={"help": "Count of hidden tables"}) views: list = field(metadata={"help": "List of view objects in the database"}) queries: list = field(metadata={"help": "List of canned query objects"}) + queries_more: bool = field( + metadata={"help": "Boolean indicating if more saved queries are available"} + ) allow_execute_sql: bool = field( metadata={"help": "Boolean indicating if custom SQL can be executed"} ) @@ -474,6 +469,31 @@ def _as_bool(value): return bool(value) +def _as_optional_bool(value, name): + if value is None or value == "": + return None + if isinstance(value, bool): + return value + if isinstance(value, int): + return bool(value) + if isinstance(value, str): + lowered = value.lower() + if lowered in {"1", "true", "t", "yes", "on"}: + return True + if lowered in {"0", "false", "f", "no", "off"}: + return False + raise QueryValidationError("{} must be 0 or 1".format(name)) + + +def _query_list_limit(value): + if value in (None, ""): + return 50 + try: + return min(max(1, int(value)), 1000) + except ValueError as ex: + raise QueryValidationError("_size must be an integer") from ex + + def _derived_query_parameters(sql): parameters = [] seen = set() @@ -949,19 +969,66 @@ class QueryListView(BaseView): async def get(self, request): db = await self.ds.resolve_database(request) - page = await self.ds.allowed_resources( - "view-query", - request.actor, - parent=db.name, - limit=1000, + format_ = request.url_vars.get("format") or "html" + try: + limit = _query_list_limit(request.args.get("_size")) + is_write = _as_optional_bool(request.args.get("is_write"), "is_write") + is_published = _as_optional_bool( + request.args.get("is_published"), "is_published" + ) + except QueryValidationError as ex: + return _error([ex.message], ex.status) + + page = await self.ds.list_queries( + db.name, + actor=request.actor, + limit=limit, + cursor=request.args.get("_next"), + q=request.args.get("q") or None, + is_write=is_write, + is_published=is_published, + source=request.args.get("source") or None, + owner_id=request.args.get("owner_id") or None, + include_private=True, + ) + next_url = None + if page["next"]: + pairs = [ + (key, value) + for key, value in parse_qsl( + request.query_string, keep_blank_values=True + ) + if key != "_next" + ] + pairs.append(("_next", page["next"])) + next_url = "{}?{}".format( + self.ds.urls.database(db.name) + "/-/queries", + urlencode(pairs), + ) + + data = { + "ok": True, + "database": db.name, + "queries": page["queries"], + "next": page["next"], + "next_url": next_url, + "has_more": page["has_more"], + "limit": page["limit"], + "filters": { + "q": request.args.get("q") or "", + "is_write": request.args.get("is_write") or "", + "is_published": request.args.get("is_published") or "", + "source": request.args.get("source") or "", + "owner_id": request.args.get("owner_id") or "", + }, + } + if format_ == "json": + return Response.json(data) + return await self.render( + ["query_list.html"], + request, + data, ) - all_queries = await self.ds.get_queries(db.name) - queries = [ - all_queries[resource.child] - for resource in page.resources - if resource.child in all_queries - ] - return Response.json({"ok": True, "database": db.name, "queries": queries}) class QueryCreateView(BaseView): diff --git a/docs/json_api.rst b/docs/json_api.rst index e4c9e86e..ece430c2 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -510,7 +510,7 @@ Datasette provides a write API for JSON data. This is a POST-only API that requi Listing saved queries ~~~~~~~~~~~~~~~~~~~~~ -``GET //-/queries`` returns saved query definitions the actor can view. +``GET //-/queries.json`` returns saved query definitions the actor can view. Use ``?_size=50`` to set the page size and ``?_next=...`` with the cursor returned by the previous page to fetch the next page. .. _QueryCreateView: diff --git a/queries-plan.md b/queries-plan.md index a58ace70..671fc29c 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -210,7 +210,7 @@ JSON endpoints should follow Datasette's existing write API style: use `POST` pl Endpoints: -- `GET /{database}/-/queries` lists query definitions the actor can view or manage, probably paginated. +- `GET /{database}/-/queries` shows a searchable HTML query browser. `GET /{database}/-/queries.json` returns query definitions the actor can view, using cursor pagination with `_next` and `_size`. - `POST /{database}/-/queries/-/insert` creates a query. - `GET /{database}/{query}/-/definition` returns one query definition without executing it. - `POST /{database}/{query}/-/update` updates one query. @@ -353,9 +353,21 @@ await datasette.update_query( await datasette.remove_query(database, name, source=None) await datasette.get_query(database, name) -await datasette.get_queries(database) +await datasette.list_queries( + database, + actor=None, + limit=50, + cursor=None, + q=None, + is_write=None, + is_published=None, + source=None, + owner_id=None, +) ``` +`list_queries()` should return a bounded page shaped like `{"queries": [...], "next": "...", "has_more": true, "limit": 50}`. The `next` value is an opaque cursor token, not an offset. + `update_query()` should use an internal sentinel default such as `UNCHANGED = object()` so callers can distinguish "leave this column alone" from "set this column to `NULL`": ```python @@ -380,6 +392,8 @@ The save form should call `POST /{database}/-/queries/-/insert` and default to ` If the actor also has `publish-query`, include a publish control. The UI copy should make it clear that publishing allows people without arbitrary SQL permission to run this query. +On `/{database}`, show a preview of the first 20 visible queries using `list_queries(..., limit=20)`. If the page has `has_more`, show a link to `/{database}/-/queries` rather than rendering hundreds or thousands of query links inline. The full `/{database}/-/queries` page provides search, filters, and cursor pagination. + ## Dedicated create query UI Add `/{database}/-/queries/-/create` for the fuller query authoring flow, including writable queries. diff --git a/tests/test_queries.py b/tests/test_queries.py index df4131b9..dd906faf 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -7,6 +7,20 @@ from datasette.resources import DatabaseResource, QueryResource from datasette.utils.asgi import Forbidden +async def add_numbered_queries(ds, database, count): + for i in range(1, count + 1): + await ds.add_query( + database, + "demo_query_{:02d}".format(i), + "select {} as query_number".format(i), + title="Demo query {:02d}".format(i), + description="Seeded demo query number {:02d}".format(i), + is_published=True, + source="user", + owner_id="root", + ) + + @pytest.mark.asyncio async def test_queries_internal_table_schema(): ds = Datasette(memory=True) @@ -96,11 +110,15 @@ async def test_add_get_and_remove_query(): "on_error_redirect": None, } - assert await ds.get_queries("data") == {"top_customers": query} + queries_page = await ds.list_queries("data", actor=None) + assert queries_page["queries"] == [query] + assert queries_page["next"] is None await ds.remove_query("data", "top_customers") assert await ds.get_query("data", "top_customers") is None - assert await ds.get_queries("data") == {} + queries_page = await ds.list_queries("data", actor=None) + assert queries_page["queries"] == [] + assert queries_page["next"] is None @pytest.mark.asyncio @@ -238,6 +256,24 @@ async def test_unpublished_query_requires_execute_sql_but_published_does_not(): ) +@pytest.mark.asyncio +async def test_database_page_query_preview_is_limited(): + ds = Datasette(memory=True) + ds.add_memory_database("query_preview", name="data") + await ds.invoke_startup() + await add_numbered_queries(ds, "data", 25) + + html_response = await ds.client.get("/data") + json_response = await ds.client.get("/data.json") + + assert html_response.status_code == 200 + assert "Demo query 20" in html_response.text + assert "Demo query 21" not in html_response.text + assert 'href="/data/-/queries"' in html_response.text + assert len(json_response.json()["queries"]) == 20 + assert json_response.json()["queries_more"] is True + + @pytest.mark.asyncio async def test_query_actions_are_registered(): ds = Datasette() @@ -347,21 +383,78 @@ async def test_query_list_and_definition_api(): ds.root_enabled = True ds.add_memory_database("query_list_api", name="data") await ds.invoke_startup() - await ds.add_query("data", "listed", "select 1", title="Listed", is_published=True) + await add_numbered_queries(ds, "data", 12) list_response = await ds.client.get( - "/data/-/queries", + "/data/-/queries.json?_size=5", + actor={"id": "root"}, + ) + next_response = await ds.client.get( + "/data/-/queries.json?_size=5&_next={}".format(list_response.json()["next"]), actor={"id": "root"}, ) definition_response = await ds.client.get( - "/data/listed/-/definition", + "/data/demo_query_01/-/definition", actor={"id": "root"}, ) assert list_response.status_code == 200 - assert list_response.json()["queries"][0]["name"] == "listed" + assert [query["name"] for query in list_response.json()["queries"]] == [ + "demo_query_01", + "demo_query_02", + "demo_query_03", + "demo_query_04", + "demo_query_05", + ] + assert list_response.json()["next"] + assert [query["name"] for query in next_response.json()["queries"]] == [ + "demo_query_06", + "demo_query_07", + "demo_query_08", + "demo_query_09", + "demo_query_10", + ] assert definition_response.status_code == 200 - assert definition_response.json()["query"]["title"] == "Listed" + assert definition_response.json()["query"]["title"] == "Demo query 01" + + +@pytest.mark.asyncio +async def test_query_list_search_filter_and_html(): + ds = Datasette(memory=True) + ds.root_enabled = True + ds.add_memory_database("query_list_html", name="data") + await ds.invoke_startup() + await add_numbered_queries(ds, "data", 3) + await ds.add_query( + "data", + "private_query", + "select 'private'", + title="Private query", + is_published=False, + source="user", + owner_id="root", + ) + + html_response = await ds.client.get( + "/data/-/queries?q=02", + actor={"id": "root"}, + ) + json_response = await ds.client.get( + "/data/-/queries.json?q=02", + actor={"id": "root"}, + ) + filtered_response = await ds.client.get( + "/data/-/queries.json?is_published=0", + actor={"id": "root"}, + ) + + assert html_response.status_code == 200 + assert "Demo query 02" in html_response.text + assert "Demo query 01" not in html_response.text + assert json_response.json()["queries"][0]["name"] == "demo_query_02" + assert [query["name"] for query in filtered_response.json()["queries"]] == [ + "private_query" + ] @pytest.mark.asyncio From 310c36ae94c54d4b859925d4977554c2a2618534 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 10:18:36 -0700 Subject: [PATCH 224/299] Limit database query preview to five Refs #2735 --- datasette/views/database.py | 2 +- queries-plan.md | 2 +- tests/test_canned_queries.py | 35 ++++++++++++++++++++++++++++++----- tests/test_queries.py | 6 +++--- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/datasette/views/database.py b/datasette/views/database.py index edbc315e..353cfcf2 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -95,7 +95,7 @@ class DatabaseView(View): queries_page = await datasette.list_queries( database, actor=request.actor, - limit=20, + limit=5, include_private=True, ) canned_queries = queries_page["queries"] diff --git a/queries-plan.md b/queries-plan.md index 671fc29c..82ef3260 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -392,7 +392,7 @@ The save form should call `POST /{database}/-/queries/-/insert` and default to ` If the actor also has `publish-query`, include a publish control. The UI copy should make it clear that publishing allows people without arbitrary SQL permission to run this query. -On `/{database}`, show a preview of the first 20 visible queries using `list_queries(..., limit=20)`. If the page has `has_more`, show a link to `/{database}/-/queries` rather than rendering hundreds or thousands of query links inline. The full `/{database}/-/queries` page provides search, filters, and cursor pagination. +On `/{database}`, show a preview of the first 5 visible queries using `list_queries(..., limit=5)`. If the page has `has_more`, show a link to `/{database}/-/queries` rather than rendering hundreds or thousands of query links inline. The full `/{database}/-/queries` page provides search, filters, and cursor pagination. ## Dedicated create query UI diff --git a/tests/test_canned_queries.py b/tests/test_canned_queries.py index c46fd86f..a9d22036 100644 --- a/tests/test_canned_queries.py +++ b/tests/test_canned_queries.py @@ -248,10 +248,9 @@ def test_json_response(canned_write_client, headers, body, querystring): def test_canned_query_permissions_on_database_page(canned_write_client): - # Without auth only shows three queries - query_names = { - q["name"] for q in canned_write_client.get("/data.json").json["queries"] - } + # Without auth shows the five public queries + anon_response = canned_write_client.get("/data.json") + query_names = {q["name"] for q in anon_response.json["queries"]} assert query_names == { "add_name_specify_id_with_error_in_on_success_message_sql", "update_name", @@ -259,8 +258,9 @@ def test_canned_query_permissions_on_database_page(canned_write_client): "canned_read", "add_name", } + assert anon_response.json["queries_more"] is False - # With auth shows four + # With auth the database page preview shows the first five queries response = canned_write_client.get( "/data.json", cookies={"ds_actor": canned_write_client.actor_cookie({"id": "root"})}, @@ -273,6 +273,31 @@ def test_canned_query_permissions_on_database_page(canned_write_client): ], key=lambda q: q["name"], ) + assert query_names_and_private == [ + {"name": "add_name", "private": False}, + {"name": "add_name_specify_id", "private": False}, + { + "name": "add_name_specify_id_with_error_in_on_success_message_sql", + "private": False, + }, + {"name": "canned_read", "private": False}, + {"name": "delete_name", "private": True}, + ] + assert response.json["queries_more"] is True + + # The full query list endpoint includes the remaining query + response = canned_write_client.get( + "/data/-/queries.json?_size=10", + cookies={"ds_actor": canned_write_client.actor_cookie({"id": "root"})}, + ) + assert response.status == 200 + query_names_and_private = sorted( + [ + {"name": q["name"], "private": q["private"]} + for q in response.json["queries"] + ], + key=lambda q: q["name"], + ) assert query_names_and_private == [ {"name": "add_name", "private": False}, {"name": "add_name_specify_id", "private": False}, diff --git a/tests/test_queries.py b/tests/test_queries.py index dd906faf..2b46e00f 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -267,10 +267,10 @@ async def test_database_page_query_preview_is_limited(): json_response = await ds.client.get("/data.json") assert html_response.status_code == 200 - assert "Demo query 20" in html_response.text - assert "Demo query 21" not in html_response.text + assert "Demo query 05" in html_response.text + assert "Demo query 06" not in html_response.text assert 'href="/data/-/queries"' in html_response.text - assert len(json_response.json()["queries"]) == 20 + assert len(json_response.json()["queries"]) == 5 assert json_response.json()["queries_more"] is True From 6eee6c81e8c21737e2391af55baf24866429038d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 10:24:42 -0700 Subject: [PATCH 225/299] Add global query browser Refs #2735 --- datasette/app.py | 57 +++++++++++++++++++----- datasette/templates/query_list.html | 11 +++-- datasette/views/database.py | 27 ++++++++++-- docs/json_api.rst | 3 +- queries-plan.md | 6 +-- tests/test_queries.py | 67 +++++++++++++++++++++++++++++ 6 files changed, 149 insertions(+), 22 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index bdbf9389..c047fde9 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -52,6 +52,7 @@ from .views.database import ( QueryCreateView, QueryDeleteView, QueryDefinitionView, + GlobalQueryListView, QueryInsertView, QueryListView, QueryUpdateView, @@ -1290,7 +1291,7 @@ class Datasette: async def list_queries( self, - database, + database=None, *, actor=None, limit=50, @@ -1310,16 +1311,40 @@ class Datasette: include_is_private=include_private, ) params = dict(allowed_params) - params.update({"query_database": database, "limit": limit + 1}) + params.update({"limit": limit + 1}) sort_key_sql = "lower(coalesce(nullif(q.title, ''), q.name))" - where_clauses = ["q.database_name = :query_database"] + where_clauses = [] + order_by = "q.database_name, sort_key, q.name" + if database is not None: + params["query_database"] = database + where_clauses.append("q.database_name = :query_database") + order_by = "sort_key, q.name" if cursor: try: components = urlsafe_components(cursor) except ValueError: components = [] - if len(components) == 2: + if database is None and len(components) == 3: + where_clauses.append(""" + ( + q.database_name > :cursor_database + OR ( + q.database_name = :cursor_database + AND ( + {sort_key_sql} > :cursor_sort_key + OR ( + {sort_key_sql} = :cursor_sort_key + AND q.name > :cursor_name + ) + ) + ) + ) + """.format(sort_key_sql=sort_key_sql)) + params["cursor_database"] = components[0] + params["cursor_sort_key"] = components[1] + params["cursor_name"] = components[2] + elif database is not None and len(components) == 2: where_clauses.append(""" ( {sort_key_sql} > :cursor_sort_key @@ -1368,13 +1393,14 @@ class Datasette: ON allowed.parent = q.database_name AND allowed.child = q.name WHERE {where} - ORDER BY sort_key, q.name + ORDER BY {order_by} LIMIT :limit """.format( allowed_sql=allowed_sql, private_select=private_select, sort_key_sql=sort_key_sql, - where=" AND ".join(where_clauses), + where=" AND ".join(where_clauses) or "1 = 1", + order_by=order_by, ), params, ) @@ -1394,10 +1420,17 @@ class Datasette: next_token = None if has_more and rows: last_row = rows[-1] - next_token = "{},{}".format( - tilde_encode(last_row["sort_key"]), - tilde_encode(last_row["name"]), - ) + if database is None: + next_token = "{},{},{}".format( + tilde_encode(last_row["database_name"]), + tilde_encode(last_row["sort_key"]), + tilde_encode(last_row["name"]), + ) + else: + next_token = "{},{}".format( + tilde_encode(last_row["sort_key"]), + tilde_encode(last_row["name"]), + ) return { "queries": queries, "next": next_token, @@ -2651,6 +2684,10 @@ class Datasette: JumpView.as_view(self), r"/-/jump(\.(?Pjson))?$", ) + add_route( + GlobalQueryListView.as_view(self), + r"/-/queries(\.(?Pjson))?$", + ) add_route( InstanceSchemaView.as_view(self), r"/-/schema(\.(?Pjson|md))?$", diff --git a/datasette/templates/query_list.html b/datasette/templates/query_list.html index ef5da0d5..af974550 100644 --- a/datasette/templates/query_list.html +++ b/datasette/templates/query_list.html @@ -1,8 +1,8 @@ {% extends "base.html" %} -{% block title %}{{ database }}: queries{% endblock %} +{% block title %}{% if database %}{{ database }}: {% endif %}queries{% endblock %} -{% block body_class %}query-list db-{{ database|to_css_class }}{% endblock %} +{% block body_class %}query-list{% if database %} db-{{ database|to_css_class }}{% endif %}{% endblock %} {% block crumbs %} {{ crumbs.nav(request=request, database=database) }} @@ -12,7 +12,7 @@

      Queries

      -
      +

      @@ -38,7 +38,10 @@

        {% for query in queries %}
      • - {{ query.title or query.name }}{% if query.private %} 🔒{% endif %} + {% if show_database %} + {{ query.database }}: + {% endif %} + {{ query.title or query.name }}{% if query.private %} 🔒{% endif %} {% if query.is_write %}Writable{% endif %} {% if query.is_published %}Published{% endif %}
      • diff --git a/datasette/views/database.py b/datasette/views/database.py index 353cfcf2..1576b6a9 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -967,8 +967,14 @@ class ExecuteWriteView(BaseView): class QueryListView(BaseView): name = "query-list" + async def database_name(self, request): + return (await self.ds.resolve_database(request)).name + + def query_list_path(self, database): + return self.ds.urls.database(database) + "/-/queries" + async def get(self, request): - db = await self.ds.resolve_database(request) + database = await self.database_name(request) format_ = request.url_vars.get("format") or "html" try: limit = _query_list_limit(request.args.get("_size")) @@ -980,7 +986,7 @@ class QueryListView(BaseView): return _error([ex.message], ex.status) page = await self.ds.list_queries( - db.name, + database, actor=request.actor, limit=limit, cursor=request.args.get("_next"), @@ -991,6 +997,7 @@ class QueryListView(BaseView): owner_id=request.args.get("owner_id") or None, include_private=True, ) + query_list_path = self.query_list_path(database) next_url = None if page["next"]: pairs = [ @@ -1002,18 +1009,20 @@ class QueryListView(BaseView): ] pairs.append(("_next", page["next"])) next_url = "{}?{}".format( - self.ds.urls.database(db.name) + "/-/queries", + query_list_path, urlencode(pairs), ) data = { "ok": True, - "database": db.name, + "database": database, "queries": page["queries"], "next": page["next"], "next_url": next_url, "has_more": page["has_more"], "limit": page["limit"], + "query_list_path": query_list_path, + "show_database": database is None, "filters": { "q": request.args.get("q") or "", "is_write": request.args.get("is_write") or "", @@ -1031,6 +1040,16 @@ class QueryListView(BaseView): ) +class GlobalQueryListView(QueryListView): + name = "global-query-list" + + async def database_name(self, request): + return None + + def query_list_path(self, database): + return self.ds.urls.path("/-/queries") + + class QueryCreateView(BaseView): name = "query-create" has_json_alternate = False diff --git a/docs/json_api.rst b/docs/json_api.rst index ece430c2..f44a39fe 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -505,12 +505,13 @@ The JSON write API Datasette provides a write API for JSON data. This is a POST-only API that requires an authenticated API token, see :ref:`CreateTokenView`. The token will need to have the specified :ref:`authentication_permissions`. +.. _GlobalQueryListView: .. _QueryListView: Listing saved queries ~~~~~~~~~~~~~~~~~~~~~ -``GET //-/queries.json`` returns saved query definitions the actor can view. Use ``?_size=50`` to set the page size and ``?_next=...`` with the cursor returned by the previous page to fetch the next page. +``GET /-/queries.json`` returns saved query definitions across every database that the actor can view. ``GET //-/queries.json`` returns saved query definitions for a specific database. Use ``?_size=50`` to set the page size and ``?_next=...`` with the cursor returned by the previous page to fetch the next page. .. _QueryCreateView: diff --git a/queries-plan.md b/queries-plan.md index 82ef3260..a708e887 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -210,7 +210,7 @@ JSON endpoints should follow Datasette's existing write API style: use `POST` pl Endpoints: -- `GET /{database}/-/queries` shows a searchable HTML query browser. `GET /{database}/-/queries.json` returns query definitions the actor can view, using cursor pagination with `_next` and `_size`. +- `GET /-/queries` and `GET /{database}/-/queries` show searchable HTML query browsers. `GET /-/queries.json` lists query definitions across every database the actor can view; `GET /{database}/-/queries.json` scopes that list to one database. Both JSON endpoints use cursor pagination with `_next` and `_size`. - `POST /{database}/-/queries/-/insert` creates a query. - `GET /{database}/{query}/-/definition` returns one query definition without executing it. - `POST /{database}/{query}/-/update` updates one query. @@ -366,7 +366,7 @@ await datasette.list_queries( ) ``` -`list_queries()` should return a bounded page shaped like `{"queries": [...], "next": "...", "has_more": true, "limit": 50}`. The `next` value is an opaque cursor token, not an offset. +`list_queries()` should return a bounded page shaped like `{"queries": [...], "next": "...", "has_more": true, "limit": 50}`. The `next` value is an opaque cursor token, not an offset. Passing `database=None` lists visible queries across all live databases, still filtered through `view-query` permission SQL. `update_query()` should use an internal sentinel default such as `UNCHANGED = object()` so callers can distinguish "leave this column alone" from "set this column to `NULL`": @@ -392,7 +392,7 @@ The save form should call `POST /{database}/-/queries/-/insert` and default to ` If the actor also has `publish-query`, include a publish control. The UI copy should make it clear that publishing allows people without arbitrary SQL permission to run this query. -On `/{database}`, show a preview of the first 5 visible queries using `list_queries(..., limit=5)`. If the page has `has_more`, show a link to `/{database}/-/queries` rather than rendering hundreds or thousands of query links inline. The full `/{database}/-/queries` page provides search, filters, and cursor pagination. +On `/{database}`, show a preview of the first 5 visible queries using `list_queries(..., limit=5)`. If the page has `has_more`, show a link to `/{database}/-/queries` rather than rendering hundreds or thousands of query links inline. The full `/{database}/-/queries` page provides search, filters, and cursor pagination. The global `/-/queries` page reuses the same interface and shows the database for each query. ## Dedicated create query UI diff --git a/tests/test_queries.py b/tests/test_queries.py index 2b46e00f..bc04bb51 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -457,6 +457,73 @@ async def test_query_list_search_filter_and_html(): ] +@pytest.mark.asyncio +async def test_global_query_list_api_and_html(): + ds = Datasette(memory=True) + ds.root_enabled = True + ds.add_memory_database("query_list_global_alpha", name="alpha") + ds.add_memory_database("query_list_global_beta", name="beta") + await ds.invoke_startup() + await ds.add_query( + "alpha", + "alpha_first", + "select 1", + title="Alpha first", + is_published=True, + source="user", + owner_id="root", + ) + await ds.add_query( + "alpha", + "alpha_second", + "select 2", + title="Alpha second", + is_published=True, + source="user", + owner_id="root", + ) + await ds.add_query( + "beta", + "beta_first", + "select 3", + title="Beta first", + is_published=True, + source="user", + owner_id="root", + ) + + list_response = await ds.client.get( + "/-/queries.json?_size=2", + actor={"id": "root"}, + ) + next_response = await ds.client.get( + "/-/queries.json?_size=2&_next={}".format(list_response.json()["next"]), + actor={"id": "root"}, + ) + html_response = await ds.client.get( + "/-/queries?q=Beta", + actor={"id": "root"}, + ) + + assert list_response.status_code == 200 + assert [ + (query["database"], query["name"]) for query in list_response.json()["queries"] + ] == [ + ("alpha", "alpha_first"), + ("alpha", "alpha_second"), + ] + assert list_response.json()["next"] + assert [ + (query["database"], query["name"]) for query in next_response.json()["queries"] + ] == [ + ("beta", "beta_first"), + ] + assert html_response.status_code == 200 + assert 'href="/beta">beta:' in html_response.text + assert "Beta first" in html_response.text + assert "Alpha first" not in html_response.text + + @pytest.mark.asyncio async def test_query_insert_api_publish_requires_publish_query(): ds = Datasette( From f0b59971f7c8c0f4435a18b4f4e9c8053c2683fe Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 10:39:56 -0700 Subject: [PATCH 226/299] Delete unnecessary test --- tests/test_utils_sql_analysis.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index c82fb04f..5730cd0d 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -169,13 +169,6 @@ def test_analyze_attached_database_tables(conn): } -def test_analyze_invalid_sql_cleans_up_authorizer(conn): - with pytest.raises(sqlite3.OperationalError): - analyze_sql_tables(conn, "insert into missing_table values (1)") - - conn.execute("select name from dogs").fetchall() - - def test_analyze_clears_authorizer_on_error(): class FakeConnection: def __init__(self): From 2b5b4ed66b86bae0080e9d8f4881cad8e57bbdb3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 11:11:08 -0700 Subject: [PATCH 227/299] Much improved "Write to this database" UI - Start with a template option, letting you pick table and operation - SQL textarea defaults to 4 empty lines at start - Query operations table is simpler and looks nicer Refs #2742 --- datasette/templates/execute_write.html | 240 +++++++++++++++++++++++-- datasette/views/database.py | 13 +- tests/test_queries.py | 32 +++- 3 files changed, 271 insertions(+), 14 deletions(-) diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 5b4f30d9..90845910 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -1,10 +1,80 @@ {% extends "base.html" %} -{% block title %}Execute write SQL{% endblock %} +{% block title %}Write to this database{% endblock %} {% block extra_head %} {{- super() -}} {% include "_codemirror.html" %} + {% endblock %} {% block body_class %}execute-write db-{{ database|to_css_class }}{% endblock %} @@ -15,13 +85,34 @@ {% block content %} -

        Execute write SQL

        +

        Write to this database

        + +

        Execute SQL to insert, update or delete rows in this database.

        {% if execution_message %}

        {{ execution_message }}

        {% endif %} + {% if write_template_tables %} +
        +
        + Start with a template +

        + + + + + +

        +
        +
        + {% endif %} +

        {% if parameter_names %} @@ -31,30 +122,28 @@ {% endfor %} {% endif %} -

        Analysis

        +

        Query operations

        {% if analysis_error %}

        {{ analysis_error }}

        {% elif analysis_rows %} -
        +
        - + - {% for row in analysis_rows %} - - - - - - + + + + + {% endfor %} @@ -66,6 +155,133 @@

        + + {% include "_codemirror_foot.html" %} +{% if write_template_tables %} + +{% endif %} + {% endblock %} diff --git a/datasette/views/database.py b/datasette/views/database.py index 1576b6a9..fb3bdfdb 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -830,6 +830,13 @@ class ExecuteWriteView(BaseView): parameter_values = parameter_values or {} parameter_names = [] analysis_rows = [] + table_columns = await _table_columns(self.ds, db.name) + hidden_table_names = set(await db.hidden_table_names()) + write_template_tables = { + table: columns + for table, columns in table_columns.items() + if columns and table not in hidden_table_names + } if sql and analysis_error is None: try: parameter_names = _derived_query_parameters(sql) @@ -858,7 +865,9 @@ class ExecuteWriteView(BaseView): "parameter_names": parameter_names, "parameter_values": parameter_values, "analysis_error": analysis_error, - "analysis_rows": analysis_rows, + "analysis_rows": [ + row for row in analysis_rows if row["operation"] != "read" + ], "execution_message": execution_message, "execution_ok": execution_ok, "execute_disabled": bool( @@ -866,6 +875,8 @@ class ExecuteWriteView(BaseView): or analysis_error or any(row["allowed"] is False for row in analysis_rows) ), + "table_columns": table_columns, + "write_template_tables": write_template_tables, }, ) response.status = status diff --git a/tests/test_queries.py b/tests/test_queries.py index bc04bb51..684454fc 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -690,6 +690,14 @@ async def test_execute_write_get_prepopulates_without_executing(): ds.root_enabled = True db = ds.add_memory_database("execute_write_get", name="data") await db.execute_write("create table dogs (id integer primary key, name text)") + await db.execute_write("create table cats (id integer primary key, name text)") + await db.execute_write("create table log (message text)") + await db.execute_write(""" + create trigger dogs_after_insert after insert on dogs begin + update cats set name = new.name where id = new.id; + insert into log (message) values (new.name); + end + """) await ds.invoke_startup() response = await ds.client.get( @@ -700,11 +708,33 @@ async def test_execute_write_get_prepopulates_without_executing(): assert response.status_code == 200 assert response.headers["content-security-policy"] == "frame-ancestors 'none'" assert response.headers["x-frame-options"] == "DENY" - assert "Execute write SQL" in response.text + assert "Write to this database" in response.text + assert ( + "Execute SQL to insert, update or delete rows in this database." + in response.text + ) + assert "

        Query operations

        " in response.text + assert "Start with a template" in response.text + assert '' in response.text + assert 'data-sql-template="insert"' in response.text + assert 'data-sql-template="update"' in response.text + assert 'data-sql-template="delete"' in response.text + assert '
        Operation Database Tablerequired permissionRequired permission AllowedSource
        {{ row.operation }}{{ row.database }}{{ row.table }}{{ row.required_permission }}{% if row.allowed is none %}{% elif row.allowed %}yes{% else %}no{% endif %}{{ row.source or "" }}{{ row.operation }}{{ row.database }}{{ row.table }}{% if row.required_permission %}{{ row.required_permission }}{% endif %}{% if row.allowed is none %}{% elif row.allowed %}yes{% else %}no{% endif %}
        ' in response.text + assert '' in response.text + assert "" in response.text + assert "" in response.text + assert "" not in response.text assert 'action="/data/-/execute-write"' in response.text assert "insert into dogs (name) values ('Cleo')" in response.text assert (await db.execute("select count(*) from dogs")).first()[0] == 0 + empty_response = await ds.client.get( + "/data/-/execute-write", + actor={"id": "root"}, + ) + assert '' in empty_response.text + assert 'executeWriteSqlInput.value = "\\n\\n\\n";' in empty_response.text + @pytest.mark.asyncio async def test_database_action_menu_links_to_execute_write_for_permitted_actor(): From 1bce34a33869709e1dea21b6182327a105895285 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 11:22:24 -0700 Subject: [PATCH 228/299] If just a single insert, link to row page Refs #2742 --- datasette/templates/execute_write.html | 2 +- datasette/views/database.py | 49 ++++++++++++++++++++++++++ tests/test_queries.py | 42 ++++++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 90845910..705181d8 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -90,7 +90,7 @@

        Execute SQL to insert, update or delete rows in this database.

        {% if execution_message %} -

        {{ execution_message }}

        +

        {{ execution_message }}{% for link in execution_links %} {{ link.label }}{% endfor %}

        {% endif %}
        diff --git a/datasette/views/database.py b/datasette/views/database.py index fb3bdfdb..2b3920f7 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -18,8 +18,10 @@ from datasette.utils import ( await_me_maybe, call_with_supported_arguments, named_parameters as derive_named_parameters, + escape_sqlite, format_bytes, make_slot_function, + path_from_row_pks, tilde_decode, to_css_class, validate_sql_select, @@ -678,6 +680,43 @@ async def _prepare_execute_write(datasette, db, sql, params, actor): return parameter_names, params, analysis +async def _inserted_row_url(datasette, db, analysis, cursor): + if cursor.rowcount != 1: + return None + lastrowid = getattr(cursor, "lastrowid", None) + if lastrowid is None: + return None + direct_inserts = [ + access + for access in analysis.table_accesses + if access.operation == "insert" + and access.source is None + and access.database == db.name + ] + if len(direct_inserts) != 1: + return None + table = direct_inserts[0].table + pks = await db.primary_keys(table) + use_rowid = not pks + select = ( + "rowid" + if use_rowid + else ", ".join(escape_sqlite(primary_key) for primary_key in pks) + ) + try: + result = await db.execute( + "select {} from {} where rowid = ?".format(select, escape_sqlite(table)), + [lastrowid], + ) + except sqlite3.DatabaseError: + return None + row = result.first() + if row is None: + return None + row_path = path_from_row_pks(row, pks, use_rowid) + return datasette.urls.row(db.name, table, row_path) + + def _apply_query_data_types(data): typed = dict(data) for key in ("hide_sql", "is_published"): @@ -824,10 +863,12 @@ class ExecuteWriteView(BaseView): analysis=None, analysis_error=None, execution_message=None, + execution_links=None, execution_ok=None, status=200, ): parameter_values = parameter_values or {} + execution_links = execution_links or [] parameter_names = [] analysis_rows = [] table_columns = await _table_columns(self.ds, db.name) @@ -869,6 +910,7 @@ class ExecuteWriteView(BaseView): row for row in analysis_rows if row["operation"] != "read" ], "execution_message": execution_message, + "execution_links": execution_links, "execution_ok": execution_ok, "execute_disabled": bool( (not sql) @@ -964,6 +1006,12 @@ class ExecuteWriteView(BaseView): ) ) + inserted_row_url = await _inserted_row_url(self.ds, db, analysis, cursor) + execution_links = ( + [{"href": inserted_row_url, "label": "View row"}] + if inserted_row_url + else [] + ) return await self._render_form( request, db, @@ -971,6 +1019,7 @@ class ExecuteWriteView(BaseView): parameter_values={name: params.get(name, "") for name in parameter_names}, analysis=analysis, execution_message=message, + execution_links=execution_links, execution_ok=True, ) diff --git a/tests/test_queries.py b/tests/test_queries.py index 684454fc..ed981ee7 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -849,6 +849,48 @@ async def test_execute_write_post_requires_database_and_table_permissions(): assert (await db.execute("select name from dogs")).first()[0] == "Cleo" +@pytest.mark.asyncio +async def test_execute_write_insert_links_to_inserted_row(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_insert_link", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await db.execute_write("create table log (id integer primary key, message text)") + await db.execute_write("insert into log (message) values ('existing')") + await db.execute_write(""" + create trigger dogs_after_insert after insert on dogs begin + insert into log (message) values (new.name); + end + """) + await ds.invoke_startup() + + insert_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "root"}, + data={ + "sql": "insert into dogs (name) values (:name)", + "name": "Cleo", + }, + ) + update_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "root"}, + data={ + "sql": "update dogs set name = :name where id = :id", + "name": "Cleo 2", + "id": "1", + }, + ) + + assert insert_response.status_code == 200 + assert "Query executed, 1 row affected" in insert_response.text + assert 'View row' in insert_response.text + assert "/data/log/2" not in insert_response.text + assert update_response.status_code == 200 + assert "Query executed, 1 row affected" in update_response.text + assert "View row" not in update_response.text + + @pytest.mark.asyncio async def test_execute_write_post_rejects_read_only_sql(): ds = Datasette(memory=True, default_deny=True) From 66bbbbc947bd4d7305761a627dc2f1949949c0a5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 11:35:09 -0700 Subject: [PATCH 229/299] Support multi-line parameters on /db/-/execute-write Refs https://github.com/simonw/datasette/issues/2742#issuecomment-4536317049 Each paramater input now has an expand/collapse button toggle to turn into a textarea. If you paste text that includes at least one newline it toggles automatically. --- datasette/templates/execute_write.html | 94 +++++++++++++++++++++++++- tests/test_queries.py | 1 + 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 705181d8..a560e920 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -74,6 +74,25 @@ color: #b00020; font-weight: 700; } +form.sql .execute-write-parameter-row textarea[data-parameter-control] { + border: 1px solid #ccc; + border-radius: 3px; + box-sizing: content-box; + display: inline-block; + font-family: Helvetica, sans-serif; + font-size: 1em; + min-height: 7rem; + padding: 9px 4px; + vertical-align: top; + width: 60%; +} +form.sql.core button.execute-write-parameter-toggle[type=button] { + font-size: 0.72rem; + height: 1.8rem; + line-height: 1; + margin-left: 0.35rem; + padding: 0.25rem 0.45rem; +} {% endblock %} @@ -118,7 +137,7 @@ {% if parameter_names %}

        Parameters

        {% for parameter in parameter_names %} -

        +

        {% endfor %} {% endif %} @@ -164,6 +183,79 @@ if (executeWriteSqlInput && !executeWriteSqlInput.value) { {% include "_codemirror_foot.html" %} + + {% if write_template_tables %} diff --git a/datasette/views/database.py b/datasette/views/database.py index 2b3920f7..e4eaee30 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -680,6 +680,39 @@ async def _prepare_execute_write(datasette, db, sql, params, actor): return parameter_names, params, analysis +async def _execute_write_analysis_data(datasette, db, sql, actor): + parameter_names = [] + analysis_rows = [] + analysis_error = None + if sql: + try: + parameter_names = _derived_query_parameters(sql) + params = {parameter: "" for parameter in parameter_names} + analysis = await db.analyze_sql(sql, params) + if _analysis_is_write(analysis): + analysis_rows = await _analysis_rows_with_permissions( + datasette, analysis, actor + ) + else: + analysis_error = ( + "Use /-/query for read-only SQL; " + "this endpoint only executes writes" + ) + except (QueryValidationError, sqlite3.DatabaseError) as ex: + analysis_error = getattr(ex, "message", str(ex)) + return { + "ok": analysis_error is None, + "parameters": parameter_names, + "analysis_error": analysis_error, + "analysis_rows": [row for row in analysis_rows if row["operation"] != "read"], + "execute_disabled": bool( + (not sql) + or analysis_error + or any(row["allowed"] is False for row in analysis_rows) + ), + } + + async def _inserted_row_url(datasette, db, analysis, cursor): if cursor.rowcount != 1: return None @@ -1024,6 +1057,45 @@ class ExecuteWriteView(BaseView): ) +class ExecuteWriteAnalyzeView(BaseView): + name = "execute-write-analyze" + has_json_alternate = False + + async def post(self, request): + db = await self.ds.resolve_database(request) + if not await self.ds.allowed( + action="execute-write-sql", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + return _block_framing( + _error(["Permission denied: need execute-write-sql"], 403) + ) + + try: + data, _ = await _json_or_form_payload(request) + except QueryValidationError as ex: + return _block_framing(_error([ex.message], ex.status)) + if not isinstance(data, dict): + return _block_framing(_error(["JSON must be a dictionary"], 400)) + invalid_keys = set(data) - {"sql"} + if invalid_keys: + return _block_framing( + _error( + ["Invalid keys: {}".format(", ".join(sorted(invalid_keys)))], + 400, + ) + ) + sql = data.get("sql") or "" + if not isinstance(sql, str): + return _block_framing(_error(["sql must be a string"], 400)) + return _block_framing( + Response.json( + await _execute_write_analysis_data(self.ds, db, sql, request.actor) + ) + ) + + class QueryListView(BaseView): name = "query-list" diff --git a/docs/json_api.rst b/docs/json_api.rst index f44a39fe..2f581661 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -528,6 +528,7 @@ Creating saved queries ``POST //-/queries/-/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. .. _ExecuteWriteView: +.. _ExecuteWriteAnalyzeView: Executing write SQL ~~~~~~~~~~~~~~~~~~~ @@ -536,6 +537,8 @@ Executing write SQL ``POST //-/execute-write`` executes writable SQL. This requires ``execute-write-sql`` for the database plus the relevant table-level write permissions. +``POST //-/execute-write/-/analyze`` accepts ``{"sql": "..."}`` and returns the derived parameters plus the write operations that SQL would need in order to execute. + .. _QueryDefinitionView: Getting a saved query definition diff --git a/tests/test_queries.py b/tests/test_queries.py index a6080958..6d2c0b25 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -719,7 +719,9 @@ async def test_execute_write_get_prepopulates_without_executing(): assert 'data-sql-template="insert"' in response.text assert 'data-sql-template="update"' in response.text assert 'data-sql-template="delete"' in response.text + assert 'data-analyze-url="/data/-/execute-write/-/analyze"' in response.text assert 'addEventListener("paste"' in response.text + assert "refreshExecuteWriteAnalysis" in response.text assert '
        Required permissioninsertupdateread
        ' in response.text assert '' in response.text assert "" in response.text @@ -737,6 +739,53 @@ async def test_execute_write_get_prepopulates_without_executing(): assert 'executeWriteSqlInput.value = "\\n\\n\\n";' in empty_response.text +@pytest.mark.asyncio +async def test_execute_write_analyze_endpoint_uses_sql_only(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_analyze", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + response = await ds.client.post( + "/data/-/execute-write/-/analyze", + actor={"id": "root"}, + json={"sql": "insert into dogs (name) values (:name)"}, + ) + read_only_response = await ds.client.post( + "/data/-/execute-write/-/analyze", + actor={"id": "root"}, + json={"sql": "select * from dogs where name = :name"}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["ok"] is True + assert data["parameters"] == ["name"] + assert data["analysis_error"] is None + assert data["execute_disabled"] is False + assert data["analysis_rows"] == [ + { + "operation": "insert", + "database": "data", + "table": "dogs", + "required_permission": "insert-row", + "source": None, + "allowed": True, + } + ] + assert "params" not in data + + assert read_only_response.status_code == 200 + read_only_data = read_only_response.json() + assert read_only_data["ok"] is False + assert read_only_data["parameters"] == ["name"] + assert read_only_data["analysis_error"] == ( + "Use /-/query for read-only SQL; this endpoint only executes writes" + ) + assert read_only_data["execute_disabled"] is True + + @pytest.mark.asyncio async def test_database_action_menu_links_to_execute_write_for_permitted_actor(): ds = Datasette( From de55a76d402a6326c60a5f4cd1a03c7476613f0b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 12:33:57 -0700 Subject: [PATCH 232/299] Fix 500 error when accessing query page without ?sql= parameter (#2744) Closes #2743 --- datasette/templates/query.html | 4 ++-- datasette/views/database.py | 43 ++++++++++++++++++---------------- docs/changelog.rst | 7 ++++++ tests/plugins/my_plugin.py | 4 ++-- tests/test_html.py | 16 +++++++++++++ 5 files changed, 50 insertions(+), 24 deletions(-) diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 8b405da5..5f85ac6b 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -46,14 +46,14 @@ {% if not hide_sql %} {% if editable and allow_execute_sql %}

        + >{% if query and query.sql %}{{ query.sql }}{% elif tables %}select * from {{ tables[0].name|escape_sqlite }}{% endif %}

        {% else %}
        {% if query %}{{ query.sql }}{% endif %}
        {% endif %} {% else %} {% if not canned_query %} {% endif %} {% endif %} diff --git a/datasette/views/database.py b/datasette/views/database.py index 0cf93832..8e4ea85a 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -577,7 +577,7 @@ class QueryView(View): named_parameters = [] if canned_query and canned_query.get("params"): named_parameters = canned_query["params"] - if not named_parameters: + if not named_parameters and sql: named_parameters = derive_named_parameters(sql) named_parameter_values = { named_parameter: params.get(named_parameter) or "" @@ -602,7 +602,7 @@ class QueryView(View): params_for_query = params - if not canned_query_write: + if sql and not canned_query_write: try: if not canned_query: # For regular queries we only allow SELECT, plus other rules @@ -646,6 +646,8 @@ class QueryView(View): # Handle formats from plugins if format_ == "csv": + if not sql: + raise DatasetteError("?sql= is required", status=400) async def fetch_data_for_csv(request, _next=None): results = await db.execute(sql, params, truncate=True) @@ -771,25 +773,26 @@ class QueryView(View): # - No magic parameters, so no :_ in the SQL string edit_sql_url = None is_validated_sql = False - try: - validate_sql_select(sql) - is_validated_sql = True - except InvalidSql: - pass - if allow_execute_sql and is_validated_sql and ":_" not in sql: - edit_sql_url = ( - datasette.urls.database(database) - + "/-/query" - + "?" - + urlencode( - { - **{ - "sql": sql, - }, - **named_parameter_values, - } + if sql: + try: + validate_sql_select(sql) + is_validated_sql = True + except InvalidSql: + pass + if allow_execute_sql and is_validated_sql and ":_" not in sql: + edit_sql_url = ( + datasette.urls.database(database) + + "/-/query" + + "?" + + urlencode( + { + **{ + "sql": sql, + }, + **named_parameter_values, + } + ) ) - ) async def query_actions(): query_actions = [] diff --git a/docs/changelog.rst b/docs/changelog.rst index 329b4769..dfb2a736 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,13 @@ Changelog ========= +.. _v1_0_unreleased: + +Unreleased +---------- + +- Fixed a bug where visiting ``//-/query`` without a ``?sql=`` parameter returned a 500 error. (:issue:`2743`) + .. _v1_0_a30: 1.0a30 (2026-05-24) diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 4e401c07..f682e8b9 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -387,8 +387,8 @@ def view_actions(datasette, database, view, actor): @hookimpl def query_actions(datasette, database, query_name, sql): - # Don't explain an explain - if sql.lower().startswith("explain"): + # Don't explain an explain (or a missing query) + if not sql or sql.lower().startswith("explain"): return return [ { diff --git a/tests/test_html.py b/tests/test_html.py index efc1040d..d20796c9 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -241,6 +241,22 @@ def test_query_page_truncates(): ] +@pytest.mark.asyncio +async def test_query_page_with_no_sql(ds_client): + # https://github.com/simonw/datasette/issues/2743 + response = await ds_client.get("/fixtures/-/query") + assert response.status_code == 200 + assert '

        +

        + {% set parameter_names = [] %} + {% set parameter_values = {} %} + {% set sql_parameters_allow_expand = false %} + {% include "_sql_parameters.html" %}

        @@ -90,5 +95,11 @@ {% endif %} {% include "_codemirror_foot.html" %} +{% include "_sql_parameter_scripts.html" %} + {% endblock %} diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 5037d006..9b522f66 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -75,61 +75,8 @@ color: #b00020; font-weight: 700; } -form.sql .execute-write-parameter-row textarea[data-parameter-control] { - border: 1px solid #ccc; - border-radius: 3px; - box-sizing: border-box; - display: block; - font-family: Helvetica, sans-serif; - font-size: 1em; - min-height: 7rem; - padding: 9px 4px; - width: 100%; -} -form.sql .execute-write-parameter-row { - align-items: start; - column-gap: 0.6rem; - display: grid; - grid-template-columns: minmax(8rem, 11rem) minmax(16rem, 1fr) auto; - margin: 0 0 0.65rem; - max-width: 52rem; -} -form.sql .execute-write-parameter-row label { - overflow-wrap: anywhere; - padding-top: 0.55rem; - width: auto; -} -form.sql .execute-write-parameter-row input[data-parameter-control] { - box-sizing: border-box; - width: 100%; -} -form.sql.core button.execute-write-parameter-toggle[type=button] { - font-size: 0.72rem; - height: 1.8rem; - line-height: 1; - margin: 0.25rem 0 0; - padding: 0.25rem 0.45rem; -} -@media (max-width: 480px) { - form.sql .execute-write-parameter-row { - grid-template-columns: 1fr; - row-gap: 0.25rem; - } - form.sql .execute-write-parameter-row label { - padding-top: 0; - } - form.sql.core button.execute-write-parameter-toggle[type=button] { - justify-self: start; - margin-top: 0; - } -} -form.sql .execute-write-editor { - max-width: 52rem; -} -form.sql .execute-write-editor textarea#sql-editor { - width: 100%; -} +{% include "_sql_parameter_styles.html" %} {% endblock %} {% block body_class %}execute-write db-{{ database|to_css_class }}{% endblock %} @@ -168,16 +115,11 @@ form.sql .execute-write-editor textarea#sql-editor { {% endif %} -

        +

        -
        - {% if parameter_names %} -

        Parameters

        - {% for parameter in parameter_names %} -

        - {% endfor %} - {% endif %} -
        + {% set sql_parameters_section_id = "execute-write-parameters-section" %} + {% set sql_parameters_allow_expand = true %} + {% include "_sql_parameters.html" %}

        Query operations

        @@ -222,128 +164,15 @@ if (executeWriteSqlInput && !executeWriteSqlInput.value) { {% include "_codemirror_foot.html" %} +{% include "_sql_parameter_scripts.html" %} diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 7c251e2c..3bcc7178 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -14,6 +14,7 @@ {% endif %} {% include "_codemirror.html" %} +{% include "_sql_parameter_styles.html" %} {% endblock %} {% block body_class %}query db-{{ database|to_css_class }}{% if canned_query %} query-{{ canned_query|to_css_class }}{% endif %}{% endblock %} @@ -36,7 +37,7 @@ {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} - +

        Custom SQL query{% if display_rows %} returning {% if truncated %}more than {% endif %}{{ "{:,}".format(display_rows|length) }} row{% if display_rows|length == 1 %}{% else %}s{% endif %}{% endif %}{% if not query_error %} ({{ show_hide_text }}) {% endif %}

        @@ -45,7 +46,7 @@ {% endif %} {% if not hide_sql %} {% if editable and allow_execute_sql %} -

        {% else %}
        {% if query %}{{ query.sql }}{% endif %}
        @@ -57,12 +58,10 @@ > {% endif %} {% endif %} - {% if named_parameter_values %} -

        Query parameters

        - {% for name, value in named_parameter_values.items() %} -

        - {% endfor %} - {% endif %} + {% set parameter_names = named_parameter_values.keys()|list %} + {% set parameter_values = named_parameter_values %} + {% set sql_parameters_allow_expand = false %} + {% include "_sql_parameters.html" %}

        {% if not hide_sql %}{% endif %} @@ -97,5 +96,11 @@ {% endif %} {% include "_codemirror_foot.html" %} +{% include "_sql_parameter_scripts.html" %} + {% endblock %} diff --git a/datasette/views/database.py b/datasette/views/database.py index e4eaee30..278f7e8c 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1061,7 +1061,7 @@ class ExecuteWriteAnalyzeView(BaseView): name = "execute-write-analyze" has_json_alternate = False - async def post(self, request): + async def get(self, request): db = await self.ds.resolve_database(request) if not await self.ds.allowed( action="execute-write-sql", @@ -1072,13 +1072,7 @@ class ExecuteWriteAnalyzeView(BaseView): _error(["Permission denied: need execute-write-sql"], 403) ) - try: - data, _ = await _json_or_form_payload(request) - except QueryValidationError as ex: - return _block_framing(_error([ex.message], ex.status)) - if not isinstance(data, dict): - return _block_framing(_error(["JSON must be a dictionary"], 400)) - invalid_keys = set(data) - {"sql"} + invalid_keys = set(request.args) - {"sql"} if invalid_keys: return _block_framing( _error( @@ -1086,9 +1080,7 @@ class ExecuteWriteAnalyzeView(BaseView): 400, ) ) - sql = data.get("sql") or "" - if not isinstance(sql, str): - return _block_framing(_error(["sql must be a string"], 400)) + sql = request.args.get("sql") or "" return _block_framing( Response.json( await _execute_write_analysis_data(self.ds, db, sql, request.actor) @@ -1096,6 +1088,34 @@ class ExecuteWriteAnalyzeView(BaseView): ) +class QueryParametersView(BaseView): + name = "query-parameters" + has_json_alternate = False + + async def get(self, request): + db = await self.ds.resolve_database(request) + if not await self.ds.allowed( + action="execute-sql", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + return _block_framing(_error(["Permission denied: need execute-sql"], 403)) + + invalid_keys = set(request.args) - {"sql"} + if invalid_keys: + return _block_framing( + _error( + ["Invalid keys: {}".format(", ".join(sorted(invalid_keys)))], + 400, + ) + ) + try: + parameters = _derived_query_parameters(request.args.get("sql") or "") + except QueryValidationError as ex: + return _block_framing(_error([ex.message], ex.status)) + return _block_framing(Response.json({"ok": True, "parameters": parameters})) + + class QueryListView(BaseView): name = "query-list" diff --git a/docs/json_api.rst b/docs/json_api.rst index 2f581661..91ed5306 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -527,17 +527,20 @@ Creating saved queries ``POST //-/queries/-/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. +.. _QueryParametersView: .. _ExecuteWriteView: .. _ExecuteWriteAnalyzeView: Executing write SQL ~~~~~~~~~~~~~~~~~~~ +``GET //-/query/-/parameters?sql=...`` returns the named parameters used by a SQL query. This requires ``execute-sql`` for the database. + ``GET //-/execute-write`` displays a form for executing writable SQL. A ``?sql=`` query string pre-populates the form without executing it. ``POST //-/execute-write`` executes writable SQL. This requires ``execute-write-sql`` for the database plus the relevant table-level write permissions. -``POST //-/execute-write/-/analyze`` accepts ``{"sql": "..."}`` and returns the derived parameters plus the write operations that SQL would need in order to execute. +``GET //-/execute-write/-/analyze?sql=...`` returns the derived parameters plus the write operations that SQL would need in order to execute. .. _QueryDefinitionView: diff --git a/tests/test_canned_queries.py b/tests/test_canned_queries.py index a9d22036..ae2c74e0 100644 --- a/tests/test_canned_queries.py +++ b/tests/test_canned_queries.py @@ -200,7 +200,10 @@ def test_error_in_on_success_message_sql(canned_write_client): def test_custom_params(canned_write_client): response = canned_write_client.get("/data/update_name?extra=foo") - assert '' in response.text + assert ( + '' + in response.text + ) def test_canned_query_pages_no_vary_header(canned_write_client): diff --git a/tests/test_html.py b/tests/test_html.py index e5f00e17..b49391a6 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -326,17 +326,29 @@ async def test_query_parameter_form_fields(ds_client): response = await ds_client.get("/fixtures/-/query?sql=select+:name") assert response.status_code == 200 assert ( - ' ' + ' ' in response.text ) + assert 'data-parameters-url="/fixtures/-/query/-/parameters"' in response.text + assert 'id="sql-parameters-section"' in response.text + assert "setupSqlParameterRefresh" in response.text response2 = await ds_client.get("/fixtures/-/query?sql=select+:name&name=hello") assert response2.status_code == 200 assert ( - ' ' + ' ' in response2.text ) +@pytest.mark.asyncio +async def test_database_page_sql_parameter_refresh_markup(ds_client): + response = await ds_client.get("/fixtures") + assert response.status_code == 200 + assert 'data-parameters-url="/fixtures/-/query/-/parameters"' in response.text + assert 'id="sql-parameters-section"' in response.text + assert "setupSqlParameterRefresh" in response.text + + @pytest.mark.asyncio async def test_row_html_simple_primary_key(ds_client): response = await ds_client.get("/fixtures/simple_primary_key/1") diff --git a/tests/test_queries.py b/tests/test_queries.py index 6d2c0b25..23820cf3 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -721,7 +721,7 @@ async def test_execute_write_get_prepopulates_without_executing(): assert 'data-sql-template="delete"' in response.text assert 'data-analyze-url="/data/-/execute-write/-/analyze"' in response.text assert 'addEventListener("paste"' in response.text - assert "refreshExecuteWriteAnalysis" in response.text + assert "setupSqlParameterRefresh" in response.text assert '

        Required permissioninsert
        ' in response.text assert '' in response.text assert "" in response.text @@ -747,15 +747,15 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): await db.execute_write("create table dogs (id integer primary key, name text)") await ds.invoke_startup() - response = await ds.client.post( + response = await ds.client.get( "/data/-/execute-write/-/analyze", actor={"id": "root"}, - json={"sql": "insert into dogs (name) values (:name)"}, + params={"sql": "insert into dogs (name) values (:name)"}, ) - read_only_response = await ds.client.post( + read_only_response = await ds.client.get( "/data/-/execute-write/-/analyze", actor={"id": "root"}, - json={"sql": "select * from dogs where name = :name"}, + params={"sql": "select * from dogs where name = :name"}, ) assert response.status_code == 200 @@ -786,6 +786,44 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): assert read_only_data["execute_disabled"] is True +@pytest.mark.asyncio +async def test_query_parameters_endpoint_uses_get_sql_only(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("query_parameters", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + response = await ds.client.get( + "/data/-/query/-/parameters", + actor={"id": "root"}, + params={ + "sql": "select * from dogs where name = :name and id = :id", + }, + ) + permission_denied_response = await ds.client.get( + "/data/-/query/-/parameters", + actor={"id": "not-root"}, + params={"sql": "select * from dogs where name = :name"}, + ) + magic_parameter_response = await ds.client.get( + "/data/-/query/-/parameters", + actor={"id": "root"}, + params={"sql": "select :_actor_id"}, + ) + + assert response.status_code == 200 + assert response.json() == {"ok": True, "parameters": ["name", "id"]} + assert permission_denied_response.status_code == 403 + assert permission_denied_response.json()["errors"] == [ + "Permission denied: need execute-sql" + ] + assert magic_parameter_response.status_code == 400 + assert magic_parameter_response.json()["errors"] == [ + "Magic parameters are not allowed" + ] + + @pytest.mark.asyncio async def test_database_action_menu_links_to_execute_write_for_permitted_actor(): ds = Datasette( From 4208ded249b28f8b0918ce80d289bfc88f9e8921 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 12:46:21 -0700 Subject: [PATCH 234/299] No execute-write on immutable databases Refs https://github.com/simonw/datasette/issues/2742#issuecomment-4536690161 --- datasette/default_database_actions.py | 2 ++ datasette/views/database.py | 7 ++++ tests/test_queries.py | 46 +++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/datasette/default_database_actions.py b/datasette/default_database_actions.py index 78055392..e0cb3cdf 100644 --- a/datasette/default_database_actions.py +++ b/datasette/default_database_actions.py @@ -5,6 +5,8 @@ from datasette.resources import DatabaseResource @hookimpl def database_actions(datasette, actor, database, request): async def inner(): + if not datasette.get_database(database).is_mutable: + return [] if not await datasette.allowed( action="execute-write-sql", resource=DatabaseResource(database), diff --git a/datasette/views/database.py b/datasette/views/database.py index 278f7e8c..de02cd0f 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -964,6 +964,13 @@ class ExecuteWriteView(BaseView): resource=DatabaseResource(db.name), actor=request.actor, ) + if not db.is_mutable: + return _block_framing( + _error( + ["Cannot execute write SQL because this database is immutable."], + 403, + ) + ) return await self._render_form( request, db, diff --git a/tests/test_queries.py b/tests/test_queries.py index 23820cf3..c31d7205 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -858,6 +858,52 @@ async def test_database_action_menu_links_to_execute_write_for_permitted_actor() assert "Execute write SQL" in writer_response.text +@pytest.mark.asyncio +async def test_database_action_menu_hides_execute_write_for_immutable_database(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + db = ds.add_memory_database("execute_write_menu_immutable", name="data") + db.is_mutable = False + await ds.invoke_startup() + + response = await ds.client.get("/data", actor={"id": "writer"}) + + assert response.status_code == 200 + assert "Execute write SQL" not in response.text + assert 'href="/data/-/execute-write"' not in response.text + + +@pytest.mark.asyncio +async def test_execute_write_get_rejects_immutable_database(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_get_immutable", name="data") + db.is_mutable = False + await ds.invoke_startup() + + response = await ds.client.get( + "/data/-/execute-write?sql=insert+into+dogs+(name)+values+('Cleo')", + actor={"id": "root"}, + ) + + assert response.status_code == 403 + assert response.json()["errors"] == [ + "Cannot execute write SQL because this database is immutable." + ] + + @pytest.mark.asyncio async def test_execute_write_post_requires_database_and_table_permissions(): ds = Datasette( From 8ab8999ba97e0ec1d113ee8d3954d6431f39fa28 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 12:55:36 -0700 Subject: [PATCH 235/299] Big visual improvement to /-/queries pages Including /db/-/queries Refs https://github.com/simonw/datasette/issues/2735#issuecomment-4536860239 --- datasette/templates/query_list.html | 226 ++++++++++++++++++++++++---- datasette/views/database.py | 12 +- tests/test_queries.py | 25 ++- 3 files changed, 229 insertions(+), 34 deletions(-) diff --git a/datasette/templates/query_list.html b/datasette/templates/query_list.html index af974550..dbd607ab 100644 --- a/datasette/templates/query_list.html +++ b/datasette/templates/query_list.html @@ -2,6 +2,155 @@ {% block title %}{% if database %}{{ database }}: {% endif %}queries{% endblock %} +{% block extra_head %} +{{- super() -}} + +{% endblock %} + {% block body_class %}query-list{% if database %} db-{{ database|to_css_class }}{% endif %}{% endblock %} {% block crumbs %} @@ -10,49 +159,66 @@ {% block content %} -

        Queries

        +
        - -

        +

        Queries

        + + + -

        - - - - -

        +
        +
        + Mode + + + +
        +
        + Publication + + + +
        +
        {% if queries %} -
          - {% for query in queries %} -
        • - {% if show_database %} - {{ query.database }}: - {% endif %} - {{ query.title or query.name }}{% if query.private %} 🔒{% endif %} - {% if query.is_write %}Writable{% endif %} - {% if query.is_published %}Published{% endif %} -
        • - {% endfor %} -
        +
        Required permissioninsert
        + + + {% if show_database %}{% endif %} + + + + + + + {% for query in queries %} + + {% if show_database %} + + {% endif %} + + + + + {% endfor %} + +
        DatabaseQueryModePublication
        {{ query.database }} + {{ query.title or query.name }}{% if query.private %} 🔒{% endif %} + {% if query.description %}

        {{ query.description }}

        {% endif %} +
        {% if query.is_write %}Writable{% else %}Read-only{% endif %}{% if query.is_published %}Published{% else %}Unpublished{% endif %}
        {% else %}

        No queries found.

        {% endif %} {% if next_url %} -

        Next page

        + {% endif %} + + {% endblock %} diff --git a/datasette/views/database.py b/datasette/views/database.py index de02cd0f..3c660bc7 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -487,9 +487,9 @@ def _as_optional_bool(value, name): raise QueryValidationError("{} must be 0 or 1".format(name)) -def _query_list_limit(value): +def _query_list_limit(value, default=50): if value in (None, ""): - return 50 + return default try: return min(max(1, int(value)), 1000) except ValueError as ex: @@ -1136,7 +1136,10 @@ class QueryListView(BaseView): database = await self.database_name(request) format_ = request.url_vars.get("format") or "html" try: - limit = _query_list_limit(request.args.get("_size")) + limit = _query_list_limit( + request.args.get("_size"), + default=20 if format_ == "html" else 50, + ) is_write = _as_optional_bool(request.args.get("is_write"), "is_write") is_published = _as_optional_bool( request.args.get("is_published"), "is_published" @@ -1175,6 +1178,9 @@ class QueryListView(BaseView): data = { "ok": True, "database": database, + "database_color": ( + self.ds.get_database(database).color if database is not None else None + ), "queries": page["queries"], "next": page["next"], "next_url": next_url, diff --git a/tests/test_queries.py b/tests/test_queries.py index c31d7205..b7416ac7 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -451,12 +451,34 @@ async def test_query_list_search_filter_and_html(): assert html_response.status_code == 200 assert "Demo query 02" in html_response.text assert "Demo query 01" not in html_response.text + assert 'class="query-list-results"' in html_response.text + assert "Mode" in html_response.text + assert 'type="radio" name="is_published" value="1"' in html_response.text assert json_response.json()["queries"][0]["name"] == "demo_query_02" assert [query["name"] for query in filtered_response.json()["queries"]] == [ "private_query" ] +@pytest.mark.asyncio +async def test_query_list_html_defaults_to_twenty_and_shows_pagination(): + ds = Datasette(memory=True) + ds.root_enabled = True + ds.add_memory_database("query_list_html_pagination", name="data") + await ds.invoke_startup() + await add_numbered_queries(ds, "data", 25) + + response = await ds.client.get("/data/-/queries", actor={"id": "root"}) + json_response = await ds.client.get("/data/-/queries.json", actor={"id": "root"}) + + assert response.status_code == 200 + assert response.text.count('aria-label="Query pagination"') == 1 + assert "Demo query 20" in response.text + assert "Demo query 21" not in response.text + assert 'href="/data/-/queries?_next=' in response.text + assert len(json_response.json()["queries"]) == 25 + + @pytest.mark.asyncio async def test_global_query_list_api_and_html(): ds = Datasette(memory=True) @@ -519,7 +541,8 @@ async def test_global_query_list_api_and_html(): ("beta", "beta_first"), ] assert html_response.status_code == 200 - assert 'href="/beta">beta:' in html_response.text + assert 'Database' in html_response.text + assert 'class="query-list-database" href="/beta">beta' in html_response.text assert "Beta first" in html_response.text assert "Alpha first" not in html_response.text From f1dd86ebfb01644fead19f9f007b9b76f863d72e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 14:05:26 -0700 Subject: [PATCH 236/299] Tweak URL designs of new endpoints --- datasette/app.py | 6 +++--- datasette/templates/database.html | 2 +- datasette/templates/execute_write.html | 2 +- datasette/templates/query.html | 2 +- datasette/templates/query_create.html | 2 +- docs/json_api.rst | 6 +++--- queries-plan.md | 4 ++-- tests/test_html.py | 4 ++-- tests/test_queries.py | 22 +++++++++++----------- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 90e41521..232aa0cf 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2745,11 +2745,11 @@ class Datasette: ) add_route( QueryInsertView.as_view(self), - r"/(?P[^\/\.]+)/-/queries/-/insert$", + r"/(?P[^\/\.]+)/-/queries/insert$", ) add_route( ExecuteWriteAnalyzeView.as_view(self), - r"/(?P[^\/\.]+)/-/execute-write/-/analyze$", + r"/(?P[^\/\.]+)/-/execute-write/analyze$", ) add_route( ExecuteWriteView.as_view(self), @@ -2761,7 +2761,7 @@ class Datasette: ) add_route( QueryParametersView.as_view(self), - r"/(?P[^\/\.]+)/-/query/-/parameters$", + r"/(?P[^\/\.]+)/-/query/parameters$", ) add_route( wrap_view(QueryView, self), diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 0c9ec94c..62f9c620 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -26,7 +26,7 @@ {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} {% if allow_execute_sql %} -
        +

        Custom SQL query

        {% set parameter_names = [] %} diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 9b522f66..46f58c3b 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -95,7 +95,7 @@

        {{ execution_message }}{% for link in execution_links %} {{ link.label }}{% endfor %}

        {% endif %} - + {% if write_template_tables %}
        diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 3bcc7178..f74d21f1 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -37,7 +37,7 @@ {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} - +

        Custom SQL query{% if display_rows %} returning {% if truncated %}more than {% endif %}{{ "{:,}".format(display_rows|length) }} row{% if display_rows|length == 1 %}{% else %}s{% endif %}{% endif %}{% if not query_error %} ({{ show_hide_text }}) {% endif %}

        diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index fb2599d2..3c027def 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -17,7 +17,7 @@

        Create query

        - +


        diff --git a/docs/json_api.rst b/docs/json_api.rst index 91ed5306..dd54c459 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -525,7 +525,7 @@ Creating saved queries in the UI Creating saved queries ~~~~~~~~~~~~~~~~~~~~~~ -``POST //-/queries/-/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. +``POST //-/queries/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. .. _QueryParametersView: .. _ExecuteWriteView: @@ -534,13 +534,13 @@ Creating saved queries Executing write SQL ~~~~~~~~~~~~~~~~~~~ -``GET //-/query/-/parameters?sql=...`` returns the named parameters used by a SQL query. This requires ``execute-sql`` for the database. +``GET //-/query/parameters?sql=...`` returns the named parameters used by a SQL query. This requires ``execute-sql`` for the database. ``GET //-/execute-write`` displays a form for executing writable SQL. A ``?sql=`` query string pre-populates the form without executing it. ``POST //-/execute-write`` executes writable SQL. This requires ``execute-write-sql`` for the database plus the relevant table-level write permissions. -``GET //-/execute-write/-/analyze?sql=...`` returns the derived parameters plus the write operations that SQL would need in order to execute. +``GET //-/execute-write/analyze?sql=...`` returns the derived parameters plus the write operations that SQL would need in order to execute. .. _QueryDefinitionView: diff --git a/queries-plan.md b/queries-plan.md index a708e887..72427df2 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -211,7 +211,7 @@ JSON endpoints should follow Datasette's existing write API style: use `POST` pl Endpoints: - `GET /-/queries` and `GET /{database}/-/queries` show searchable HTML query browsers. `GET /-/queries.json` lists query definitions across every database the actor can view; `GET /{database}/-/queries.json` scopes that list to one database. Both JSON endpoints use cursor pagination with `_next` and `_size`. -- `POST /{database}/-/queries/-/insert` creates a query. +- `POST /{database}/-/queries/insert` creates a query. - `GET /{database}/{query}/-/definition` returns one query definition without executing it. - `POST /{database}/{query}/-/update` updates one query. - `POST /{database}/{query}/-/delete` deletes one query. @@ -388,7 +388,7 @@ The read methods should reconstruct the existing dictionary shape used by query On `/{database}/-/query`, if the actor has both `execute-sql` and `insert-query`, show a save control for valid read-only SQL. That page already executes read-only arbitrary SQL, so the first UI can stay read-only even though the JSON API can accept writable SQL after `Database.analyze_sql()` validation. -The save form should call `POST /{database}/-/queries/-/insert` and default to `is_published=false`. +The save form should call `POST /{database}/-/queries/insert` and default to `is_published=false`. If the actor also has `publish-query`, include a publish control. The UI copy should make it clear that publishing allows people without arbitrary SQL permission to run this query. diff --git a/tests/test_html.py b/tests/test_html.py index b49391a6..8cda6dba 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -329,7 +329,7 @@ async def test_query_parameter_form_fields(ds_client): ' ' in response.text ) - assert 'data-parameters-url="/fixtures/-/query/-/parameters"' in response.text + assert 'data-parameters-url="/fixtures/-/query/parameters"' in response.text assert 'id="sql-parameters-section"' in response.text assert "setupSqlParameterRefresh" in response.text response2 = await ds_client.get("/fixtures/-/query?sql=select+:name&name=hello") @@ -344,7 +344,7 @@ async def test_query_parameter_form_fields(ds_client): async def test_database_page_sql_parameter_refresh_markup(ds_client): response = await ds_client.get("/fixtures") assert response.status_code == 200 - assert 'data-parameters-url="/fixtures/-/query/-/parameters"' in response.text + assert 'data-parameters-url="/fixtures/-/query/parameters"' in response.text assert 'id="sql-parameters-section"' in response.text assert "setupSqlParameterRefresh" in response.text diff --git a/tests/test_queries.py b/tests/test_queries.py index b7416ac7..57920584 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -356,7 +356,7 @@ async def test_query_insert_api_creates_read_only_query(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "root"}, json={ "query": { @@ -568,7 +568,7 @@ async def test_query_insert_api_publish_requires_publish_query(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "writer"}, json={"query": {"name": "public", "sql": "select 1", "is_published": True}}, ) @@ -586,7 +586,7 @@ async def test_query_insert_api_creates_writable_query(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "root"}, json={ "query": { @@ -603,7 +603,7 @@ async def test_query_insert_api_creates_writable_query(): assert query["parameters"] == ["name"] bad_response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "root"}, json={ "query": { @@ -671,7 +671,7 @@ async def test_query_insert_api_rejects_magic_parameters(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "root"}, json={"query": {"name": "magic", "sql": "select :_actor_id"}}, ) @@ -742,7 +742,7 @@ async def test_execute_write_get_prepopulates_without_executing(): assert 'data-sql-template="insert"' in response.text assert 'data-sql-template="update"' in response.text assert 'data-sql-template="delete"' in response.text - assert 'data-analyze-url="/data/-/execute-write/-/analyze"' in response.text + assert 'data-analyze-url="/data/-/execute-write/analyze"' in response.text assert 'addEventListener("paste"' in response.text assert "setupSqlParameterRefresh" in response.text assert '' in response.text @@ -771,12 +771,12 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): await ds.invoke_startup() response = await ds.client.get( - "/data/-/execute-write/-/analyze", + "/data/-/execute-write/analyze", actor={"id": "root"}, params={"sql": "insert into dogs (name) values (:name)"}, ) read_only_response = await ds.client.get( - "/data/-/execute-write/-/analyze", + "/data/-/execute-write/analyze", actor={"id": "root"}, params={"sql": "select * from dogs where name = :name"}, ) @@ -818,19 +818,19 @@ async def test_query_parameters_endpoint_uses_get_sql_only(): await ds.invoke_startup() response = await ds.client.get( - "/data/-/query/-/parameters", + "/data/-/query/parameters", actor={"id": "root"}, params={ "sql": "select * from dogs where name = :name and id = :id", }, ) permission_denied_response = await ds.client.get( - "/data/-/query/-/parameters", + "/data/-/query/parameters", actor={"id": "not-root"}, params={"sql": "select * from dogs where name = :name"}, ) magic_parameter_response = await ds.client.get( - "/data/-/query/-/parameters", + "/data/-/query/parameters", actor={"id": "root"}, params={"sql": "select :_actor_id"}, ) From 4a1a4d7807fb99203b9053b6d270b265df61f0af Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 11:59:49 -0700 Subject: [PATCH 237/299] Query is_trusted and is_private properties Refs https://github.com/simonw/datasette/issues/2735#issuecomment-4547270516 Diff explanation: https://gist.github.com/simonw/1e4de6c4b041a51968eb273ee96dec1f --- datasette/app.py | 39 ++-- datasette/default_actions.py | 7 - datasette/default_permissions/defaults.py | 100 +++++---- datasette/templates/query_create.html | 4 +- datasette/templates/query_list.html | 65 +++++- datasette/utils/internal_db.py | 3 +- datasette/views/database.py | 79 ++++--- docs/authentication.rst | 10 - docs/internals.rst | 3 +- queries-plan.md | 84 ++++---- tests/test_queries.py | 245 ++++++++++++++++++---- 11 files changed, 421 insertions(+), 218 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 232aa0cf..3329ee7e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -618,7 +618,8 @@ class Datasette: fragment=query_config.get("fragment"), parameters=query_config.get("params"), is_write=bool(query_config.get("write")), - is_published=bool(query_config.get("is_published")), + is_private=bool(query_config.get("is_private")), + is_trusted=bool(query_config.get("is_trusted", True)), source="config", on_success_message=query_config.get("on_success_message"), on_success_message_sql=query_config.get("on_success_message_sql"), @@ -1084,7 +1085,8 @@ class Datasette: "parameters": parameters, "is_write": is_write, "write": is_write, - "is_published": bool(row["is_published"]), + "is_private": bool(row["is_private"]), + "is_trusted": bool(row["is_trusted"]), "source": row["source"], "owner_id": row["owner_id"], "on_success_message": options.get("on_success_message"), @@ -1119,7 +1121,8 @@ class Datasette: fragment=None, parameters=None, is_write=False, - is_published=False, + is_private=False, + is_trusted=False, source="plugin", owner_id=None, on_success_message=None, @@ -1144,8 +1147,8 @@ class Datasette: sql_statement = """ INSERT INTO queries ( database_name, name, sql, title, description, description_html, - options, parameters, is_write, is_published, source, owner_id - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + options, parameters, is_write, is_private, is_trusted, source, owner_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ if replace: sql_statement += """ @@ -1157,7 +1160,8 @@ class Datasette: options = excluded.options, parameters = excluded.parameters, is_write = excluded.is_write, - is_published = excluded.is_published, + is_private = excluded.is_private, + is_trusted = excluded.is_trusted, source = excluded.source, owner_id = excluded.owner_id, updated_at = CURRENT_TIMESTAMP @@ -1174,7 +1178,8 @@ class Datasette: options_json, parameters_json, int(bool(is_write)), - int(bool(is_published)), + int(bool(is_private)), + int(bool(is_trusted)), source, owner_id, ], @@ -1193,7 +1198,8 @@ class Datasette: fragment=UNCHANGED, parameters=UNCHANGED, is_write=UNCHANGED, - is_published=UNCHANGED, + is_private=UNCHANGED, + is_trusted=UNCHANGED, source=UNCHANGED, owner_id=UNCHANGED, on_success_message=UNCHANGED, @@ -1209,7 +1215,8 @@ class Datasette: "description_html": description_html, "parameters": parameters, "is_write": is_write, - "is_published": is_published, + "is_private": is_private, + "is_trusted": is_trusted, "source": source, "owner_id": owner_id, } @@ -1227,7 +1234,7 @@ class Datasette: for field, value in fields.items(): if value is UNCHANGED: continue - if field in {"is_write", "is_published"}: + if field in {"is_write", "is_private", "is_trusted"}: value = int(bool(value)) elif field == "parameters": value = json.dumps(list(value or [])) @@ -1300,7 +1307,8 @@ class Datasette: cursor=None, q=None, is_write=None, - is_published=None, + is_private=None, + is_trusted=None, source=None, owner_id=None, include_private=False, @@ -1372,9 +1380,12 @@ class Datasette: if is_write is not None: where_clauses.append("q.is_write = :query_is_write") params["query_is_write"] = int(bool(is_write)) - if is_published is not None: - where_clauses.append("q.is_published = :query_is_published") - params["query_is_published"] = int(bool(is_published)) + if is_private is not None: + where_clauses.append("q.is_private = :query_is_private") + params["query_is_private"] = int(bool(is_private)) + if is_trusted is not None: + where_clauses.append("q.is_trusted = :query_is_trusted") + params["query_is_trusted"] = int(bool(is_trusted)) if source is not None: where_clauses.append("q.source = :query_source") params["query_source"] = source diff --git a/datasette/default_actions.py b/datasette/default_actions.py index 6787b80e..6a1f77b8 100644 --- a/datasette/default_actions.py +++ b/datasette/default_actions.py @@ -68,13 +68,6 @@ def register_actions(): resource_class=DatabaseResource, also_requires="execute-sql", ), - Action( - name="publish-query", - abbr="pq", - description="Publish saved queries for actors without execute-sql", - resource_class=DatabaseResource, - also_requires="insert-query", - ), # Table-level actions (child-level) Action( name="view-table", diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index 58deea01..dfd8d3e9 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -26,6 +26,32 @@ DEFAULT_ALLOW_ACTIONS = frozenset( ) +def _configured_query_restriction_selects(datasette: "Datasette") -> tuple[list[str], dict]: + selects = [] + params = {} + for index, (database_name, db_config) in enumerate( + ((datasette.config or {}).get("databases") or {}).items() + ): + for query_name, query_config in (db_config.get("queries") or {}).items(): + if isinstance(query_config, dict) and query_config.get("is_private"): + continue + parent_param = f"query_config_parent_{index}_{len(selects)}" + child_param = f"query_config_child_{index}_{len(selects)}" + selects.append( + f""" + SELECT :{parent_param} AS parent, :{child_param} AS child + WHERE NOT EXISTS ( + SELECT 1 FROM queries + WHERE database_name = :{parent_param} + AND name = :{child_param} + ) + """ + ) + params[parent_param] = database_name + params[child_param] = query_name + return selects, params + + @hookimpl(specname="permission_resources_sql") async def default_allow_sql_check( datasette: "Datasette", @@ -93,61 +119,45 @@ async def default_query_permissions_sql( if action != "view-query": return None - execute_sql = await datasette.allowed_resources_sql( - action="execute-sql", actor=actor - ) - sql = execute_sql.sql - params = {} - for key, value in execute_sql.params.items(): - new_key = f"query_execute_sql_{key}" - sql = sql.replace(f":{key}", f":{new_key}") - params[new_key] = value - - trusted_writable_sql = "" + params = {"query_owner_id": actor_id} + rule_sqls = [] if not datasette.default_deny: - trusted_writable_sql = """ - UNION ALL + rule_sqls.append( + """ SELECT database_name AS parent, name AS child, 1 AS allow, - 'trusted writable query' AS reason + 'non-private query' AS reason FROM queries - WHERE is_write = 1 - AND source IN ('config', 'plugin') - """ + WHERE is_private = 0 + """ + ) - user_writable_sql = "" if actor_id is not None: - params["query_owner_id"] = actor_id - user_writable_sql = """ - UNION ALL + rule_sqls.append( + """ SELECT database_name AS parent, name AS child, 1 AS allow, 'query owner' AS reason FROM queries - WHERE is_write = 1 - AND source = 'user' - AND owner_id = :query_owner_id + WHERE owner_id = :query_owner_id + """ + ) + + config_restriction_selects, config_restriction_params = ( + _configured_query_restriction_selects(datasette) + ) + + restriction_sqls = [ """ + SELECT database_name AS parent, name AS child + FROM queries + WHERE is_private = 0 + OR owner_id = :query_owner_id + """ + ] + restriction_sqls.extend(config_restriction_selects) + params.update(config_restriction_params) return PermissionSQL( - sql=f""" - WITH execute_sql_allowed AS ( - {sql} - ) - SELECT database_name AS parent, name AS child, 1 AS allow, - 'published query' AS reason - FROM queries - WHERE is_write = 0 - AND is_published = 1 - UNION ALL - SELECT q.database_name AS parent, q.name AS child, 1 AS allow, - 'execute-sql allows query' AS reason - FROM queries q - JOIN execute_sql_allowed es - ON es.parent = q.database_name - AND es.child IS NULL - WHERE q.is_write = 0 - AND q.is_published = 0 - {trusted_writable_sql} - {user_writable_sql} - """, + sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, + restriction_sql="\nUNION ALL\n".join(restriction_sqls), params=params, ) diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index 3c027def..686d971e 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -27,9 +27,7 @@

        - {% if can_publish %} -

        - {% endif %} +

        {% if sql and analysis_is_write %}

        Execute write SQL

        {% endif %} diff --git a/datasette/templates/query_list.html b/datasette/templates/query_list.html index dbd607ab..25259b3d 100644 --- a/datasette/templates/query_list.html +++ b/datasette/templates/query_list.html @@ -73,7 +73,7 @@ border-collapse: collapse; font-size: 0.9rem; margin: 0.25rem 0 1rem; - min-width: 36rem; + min-width: 42rem; width: 100%; } .query-list-results th, @@ -100,6 +100,16 @@ font-size: 0.78rem; margin: 0.15rem 0 0; } +.query-list-owner { + color: #39445a; + font-family: var(--font-monospace, monospace); + white-space: nowrap; +} +.query-list-flags { + display: flex; + flex-wrap: wrap; + gap: 0.3rem; +} .query-list-pill { background-color: #eef1f5; border: 1px solid #d7dde5; @@ -116,15 +126,36 @@ background-color: #fff4db; border-color: #e2b64e; } -.query-list-pill-published { +.query-list-pill-public { background-color: #e7f5ec; border-color: #9ecfab; color: #267a3e; } -.query-list-pill-unpublished { +.query-list-pill-private { background-color: #f7edf0; border-color: #dbb8c1; } +.query-list-pill-trusted { + background-color: #e7f5ec; + border-color: #9ecfab; + color: #267a3e; +} +.query-list-empty { + color: #6b7280; +} +.query-list-footnotes { + border-top: 1px solid #d7dde5; + color: #4f5b6d; + font-size: 0.82rem; + margin: 0.35rem 0 1rem; + padding-top: 0.55rem; +} +.query-list-footnotes p { + margin: 0.25rem 0; +} +.query-list-footnotes .query-list-pill { + margin-right: 0.35rem; +} .query-list-pagination a { border: 1px solid #007bff; border-radius: 0.25rem; @@ -177,10 +208,10 @@
        - Publication - - - + Visibility + + +
        @@ -191,8 +222,8 @@
        {% if show_database %}{% endif %} - - + + @@ -205,12 +236,24 @@ {{ query.title or query.name }}{% if query.private %} 🔒{% endif %} {% if query.description %}

        {{ query.description }}

        {% endif %} - - + + {% endfor %}
        DatabaseQueryModePublicationOwnerFlags
        {% if query.is_write %}Writable{% else %}Read-only{% endif %}{% if query.is_published %}Published{% else %}Unpublished{% endif %}{% if query.owner_id is not none %}{{ query.owner_id }}{% else %}-{% endif %} + + {% if query.is_write %}Writable{% else %}Read-only{% endif %} + {% if query.is_private %}Private{% endif %} + {% if query.is_trusted %}Trusted{% endif %} + +
        + {% if show_private_note or show_trusted_note %} +
        + {% if show_private_note %}

        PrivateOnly the owning actor can view this query.

        {% endif %} + {% if show_trusted_note %}

        TrustedExecution skips the usual SQL and write permission checks after view-query allows access.

        {% endif %} +
        + {% endif %} {% else %}

        No queries found.

        {% endif %} diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index 9c693b0a..bf172667 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -123,7 +123,8 @@ async def initialize_metadata_tables(db): options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - is_published INTEGER NOT NULL DEFAULT 0 CHECK (is_published IN (0, 1)), + is_private INTEGER NOT NULL DEFAULT 0 CHECK (is_private IN (0, 1)), + is_trusted INTEGER NOT NULL DEFAULT 0 CHECK (is_trusted IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, diff --git a/datasette/views/database.py b/datasette/views/database.py index 3c660bc7..91e9c350 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -428,7 +428,7 @@ _query_fields = { "fragment", "parameters", "params", - "is_published", + "is_private", "on_success_message", "on_success_message_sql", "on_success_redirect", @@ -571,7 +571,7 @@ async def _check_query_name(db, name, *, existing=False): raise QueryValidationError("Query name conflicts with a table or view") -async def _analyze_user_query(datasette, db, sql, *, actor, is_published): +async def _analyze_user_query(datasette, db, sql, *, actor): if not sql or not isinstance(sql, str): raise QueryValidationError("SQL is required") derived = _derived_query_parameters(sql) @@ -583,8 +583,6 @@ async def _analyze_user_query(datasette, db, sql, *, actor, is_published): is_write = _analysis_is_write(analysis) if is_write: - if is_published: - raise QueryValidationError("Writable queries cannot be published") try: await datasette.ensure_query_write_permissions( db.name, sql, actor=actor, analysis=analysis @@ -680,6 +678,26 @@ async def _prepare_execute_write(datasette, db, sql, params, actor): return parameter_names, params, analysis +async def _ensure_stored_query_execution_permissions(datasette, db, query, actor): + if query.get("is_trusted"): + return + if query.get("write"): + await datasette.ensure_permission( + action="execute-write-sql", + resource=DatabaseResource(db.name), + actor=actor, + ) + await datasette.ensure_query_write_permissions( + db.name, query["sql"], actor=actor + ) + else: + await datasette.ensure_permission( + action="execute-sql", + resource=DatabaseResource(db.name), + actor=actor, + ) + + async def _execute_write_analysis_data(datasette, db, sql, actor): parameter_names = [] analysis_rows = [] @@ -752,7 +770,7 @@ async def _inserted_row_url(datasette, db, analysis, cursor): def _apply_query_data_types(data): typed = dict(data) - for key in ("hide_sql", "is_published"): + for key in ("hide_sql", "is_private"): if key in typed: typed[key] = _as_bool(typed[key]) return typed @@ -769,20 +787,12 @@ async def _prepare_query_create(datasette, request, db, data): if await datasette.get_query(db.name, name) is not None: raise QueryValidationError("Query already exists") - is_published = _as_bool(data.get("is_published")) is_write, derived, analysis = await _analyze_user_query( datasette, db, data.get("sql"), actor=request.actor, - is_published=is_published, ) - if is_published and not await datasette.allowed( - action="publish-query", - resource=DatabaseResource(db.name), - actor=request.actor, - ): - raise QueryValidationError("Permission denied: need publish-query", status=403) if not is_write and any(data.get(field) for field in _query_write_fields): raise QueryValidationError("Writable query fields require writable SQL") @@ -800,7 +810,8 @@ async def _prepare_query_create(datasette, request, db, data): "fragment": data.get("fragment"), "parameters": parameters, "is_write": is_write, - "is_published": is_published, + "is_private": _as_bool(data.get("is_private", True)), + "is_trusted": False, "source": "user", "owner_id": _actor_id(request.actor), "on_success_message": data.get("on_success_message"), @@ -819,7 +830,6 @@ async def _prepare_query_update(datasette, request, db, existing, update): update = _apply_query_data_types(update) sql = update.get("sql", existing["sql"]) - is_published = update.get("is_published", existing["is_published"]) query_is_write = existing["is_write"] derived = _derived_query_parameters(sql) parameters = None @@ -830,19 +840,7 @@ async def _prepare_query_update(datasette, request, db, existing, update): db, sql, actor=request.actor, - is_published=is_published, ) - elif is_published and query_is_write: - raise QueryValidationError("Writable queries cannot be published") - if is_published and not existing["is_published"]: - if not await datasette.allowed( - action="publish-query", - resource=DatabaseResource(db.name), - actor=request.actor, - ): - raise QueryValidationError( - "Permission denied: need publish-query", status=403 - ) if "parameters" in update or "params" in update: parameters = _coerce_query_parameters( @@ -864,7 +862,7 @@ async def _prepare_query_update(datasette, request, db, existing, update): "fragment": update.get("fragment"), "parameters": parameters, "is_write": query_is_write, - "is_published": is_published, + "is_private": update.get("is_private"), "on_success_message": update.get("on_success_message"), "on_success_message_sql": update.get("on_success_message_sql"), "on_success_redirect": update.get("on_success_redirect"), @@ -1141,8 +1139,8 @@ class QueryListView(BaseView): default=20 if format_ == "html" else 50, ) is_write = _as_optional_bool(request.args.get("is_write"), "is_write") - is_published = _as_optional_bool( - request.args.get("is_published"), "is_published" + is_private = _as_optional_bool( + request.args.get("is_private"), "is_private" ) except QueryValidationError as ex: return _error([ex.message], ex.status) @@ -1154,7 +1152,7 @@ class QueryListView(BaseView): cursor=request.args.get("_next"), q=request.args.get("q") or None, is_write=is_write, - is_published=is_published, + is_private=is_private, source=request.args.get("source") or None, owner_id=request.args.get("owner_id") or None, include_private=True, @@ -1186,12 +1184,14 @@ class QueryListView(BaseView): "next_url": next_url, "has_more": page["has_more"], "limit": page["limit"], + "show_private_note": any(query["is_private"] for query in page["queries"]), + "show_trusted_note": any(query["is_trusted"] for query in page["queries"]), "query_list_path": query_list_path, "show_database": database is None, "filters": { "q": request.args.get("q") or "", "is_write": request.args.get("is_write") or "", - "is_published": request.args.get("is_published") or "", + "is_private": request.args.get("is_private") or "", "source": request.args.get("source") or "", "owner_id": request.args.get("owner_id") or "", }, @@ -1255,11 +1255,6 @@ class QueryCreateView(BaseView): "database_color": db.color, "sql": sql, "parameter_names": parameter_names, - "can_publish": await self.ds.allowed( - action="publish-query", - resource=DatabaseResource(db.name), - actor=request.actor, - ), "analysis_error": analysis_error, "analysis_rows": analysis_rows, "analysis_is_write": bool( @@ -1435,9 +1430,9 @@ class QueryView(View): ): raise Forbidden("You do not have permission to view this query") - if canned_query.get("write") and canned_query.get("source") == "user": - await datasette.ensure_query_write_permissions( - db.name, canned_query["sql"], actor=request.actor + if canned_query.get("write"): + await _ensure_stored_query_execution_permissions( + datasette, db, canned_query, request.actor ) # If database is immutable, return an error @@ -1558,6 +1553,10 @@ class QueryView(View): ) if not visible: raise Forbidden("You do not have permission to view this query") + if not canned_query_write: + await _ensure_stored_query_execution_permissions( + datasette, db, canned_query, request.actor + ) else: await datasette.ensure_permission( diff --git a/docs/authentication.rst b/docs/authentication.rst index b6a4cb7e..6e835c8d 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1299,16 +1299,6 @@ insert-query Actor is allowed to create saved queries in a database. -``resource`` - ``datasette.resources.DatabaseResource(database)`` - ``database`` is the name of the database (string) - -.. _actions_publish_query: - -publish-query -------------- - -Actor is allowed to publish a saved read-only query so actors without ``execute-sql`` can run it. - ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) diff --git a/docs/internals.rst b/docs/internals.rst index b5da7cbf..c76de487 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -2158,7 +2158,8 @@ The internal database schema is as follows: options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - is_published INTEGER NOT NULL DEFAULT 0 CHECK (is_published IN (0, 1)), + is_private INTEGER NOT NULL DEFAULT 0 CHECK (is_private IN (0, 1)), + is_trusted INTEGER NOT NULL DEFAULT 0 CHECK (is_trusted IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, diff --git a/queries-plan.md b/queries-plan.md index 72427df2..f4b8049c 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -13,9 +13,9 @@ Terminology change: these are now "queries", not "canned queries". Legacy code a - Internal table name: `queries`. - Query definitions should use real columns, not a JSON blob for all options. - Query parameter names live in a `parameters` text column as a JSON array. No default values for parameters in this pass. -- No `queries_database_is_published_idx` index. -- User-created queries require `execute-sql` and `insert-query` on the database. Writable queries additionally require matching table write permissions discovered by `Database.analyze_sql()`. -- `publish-query` is the permission for creating or updating a query so users without `execute-sql` can execute it. +- No separate index is needed for the privacy/trust flags yet. +- User-created queries require `execute-sql` and `insert-query` on the database. They default to private, and writable queries additionally require matching table write permissions discovered by `Database.analyze_sql()`. +- Configured queries default to trusted, which means actors who can view them can execute them without also holding `execute-sql` or the relevant write permissions. Config can opt out with `is_trusted: false`. - Add `update-query` and `delete-query`, so administrators can manage queries created by other users. - Remove the old `canned_queries()` hook from core. If we want compatibility later, build a separate `datasette-old-canned-queries` plugin. - Writable user-created queries can be supported using `Database.analyze_sql()`, provided we fail closed when analysis cannot prove the required permissions. @@ -45,7 +45,8 @@ CREATE TABLE IF NOT EXISTS queries ( options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - is_published INTEGER NOT NULL DEFAULT 0 CHECK (is_published IN (0, 1)), + is_private INTEGER NOT NULL DEFAULT 0 CHECK (is_private IN (0, 1)), + is_trusted INTEGER NOT NULL DEFAULT 0 CHECK (is_trusted IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, @@ -64,11 +65,12 @@ Column notes: - Less common presentation and writable-query behavior lives in `options`, stored as a JSON object. That covers `hide_sql`, `fragment`, `on_success_message`, `on_success_message_sql`, `on_success_redirect`, `on_error_message`, and `on_error_redirect`. - `parameters` is a JSON array of parameter names, stored as text. This preserves explicit parameter order, but does not support labels or default values. - Existing writable query behavior gets `is_write` as a column. Success/error messages, success/error redirects, and `on_success_message_sql` are stored in `options`. -- `is_published` only applies to read-only queries. A writable query can still be public through explicit `view-query` permissions, but the "publish for users without execute-sql" shortcut should be read-only. +- `is_private` means the query is only visible to its owning actor. This is enforced as a permission restriction, so broader `view-query` grants do not expose private rows. +- `is_trusted` means execution skips the usual `execute-sql` or write-permission checks after `view-query` has allowed access. - `source` distinguishes `user`, `config`, and `plugin` rows. - `owner_id` is the actor id for user-created rows. It is `NULL` for config/plugin rows. -No separate index is needed on `(database_name, name)` because the primary key already creates one. Do not add a `queries_database_is_published_idx` index for now. +No separate index is needed on `(database_name, name)` because the primary key already creates one. `QueryResource.resources_sql()` can become: @@ -104,7 +106,6 @@ Remove the old `canned_queries()` hookspec and all core calls to it. If compatib Add core actions: - `insert-query`, database-level, for creating queries in a database. -- `publish-query`, database-level, for marking read-only queries as executable by actors who lack `execute-sql`. - `update-query`, query-level, for modifying existing query definitions. - `delete-query`, query-level, for deleting existing query definitions. @@ -114,17 +115,11 @@ User-created query creation requires: - `insert-query` on `DatabaseResource(database)` - If analysis shows the query is writable, the table-level write permissions described in the writable query section. -Setting `is_published=1` requires: - -- `publish-query` on `DatabaseResource(database)` -- The query must be read-only according to `Database.analyze_sql()`. - Updating an existing query requires: - `update-query` on `QueryResource(database, query)` or default owner permission for a user-owned row. - If the SQL changes, also require `execute-sql` on the database. - If the changed SQL is writable, also require the table-level write permissions described in the writable query section. -- If `is_published` changes from `0` to `1`, also require `publish-query` on the database. Deleting an existing query requires: @@ -133,18 +128,18 @@ Deleting an existing query requires: Default owner permissions: - For `source='user' AND owner_id = actor.id`, grant `update-query` and `delete-query`. -- Do not automatically grant execution if the user no longer has the execution permission described below. +- For `source='user' AND owner_id = actor.id`, grant `view-query`. If the query is private, restriction SQL ensures no other actor sees it through a broader grant. ## Executing queries Default execution rule for read-only queries: -- If `is_published=0`, the actor needs `execute-sql` on the database. -- If `is_published=1`, the actor can execute the query without `execute-sql`. +- If `is_trusted=0`, the actor needs `execute-sql` on the database. +- If `is_trusted=1`, the actor can execute the query without `execute-sql`, provided `view-query` allows access. Default execution rule for user-created writable queries: -- `is_published` must be `0`. +- `is_trusted` must be `0`. - The actor must have `view-query`. - The actor must currently have every write permission required by fresh `Database.analyze_sql()` results for the query SQL. @@ -152,14 +147,14 @@ Implementation: - Remove `view-query` from the broad `DEFAULT_ALLOW_ACTIONS` set. - Replace it with query-aware default `view-query` permission SQL. -- For `is_published=1 AND is_write=0`, emit a child-level `view-query` allow. -- For `is_published=0 AND is_write=0`, emit child-level `view-query` allows for queries whose parent database is in the actor's `execute-sql` allowed resources. -- For `is_write=1 AND source='user'`, emit `view-query` only for the owner or actors with explicit `view-query` permission, then have `QueryView` perform the fresh analysis/table-permission check before execution. -- For trusted writable queries, preserve current behavior by emitting child-level `view-query` allows for `is_write=1 AND source IN ('config', 'plugin')` when Datasette is not running with `--default-deny`. +- Emit default `view-query` allows for non-private rows when Datasette is not running with `--default-deny`. +- Emit default `view-query` allows for the owning actor. +- Use `restriction_sql` to limit private rows to their owner even when broader `view-query` permissions exist. +- Have `QueryView` perform the fresh `execute-sql` or table-permission check before execution unless the row has `is_trusted=1`. -For read-only queries this keeps `QueryView` simple: it checks `view-query` for the query resource, and the default permission hook encodes the relationship with `execute-sql`. User-created writable queries need one additional runtime permission check because their required table permissions are derived from fresh SQL analysis. +For read-only queries this keeps `QueryView` explicit: it checks `view-query` for the query resource, then checks `execute-sql` unless the row is trusted. User-created writable queries need one additional runtime permission check because their required table permissions are derived from fresh SQL analysis. -Explicit deny rules should still be able to block a published query. +Explicit deny rules should still be able to block a query, and `--default-deny` still blocks trusted queries unless something grants `view-query`. ## Writable queries @@ -180,7 +175,7 @@ Validation flow for user-created queries: 1. Derive named parameters from the SQL and pass harmless placeholder values into `db.analyze_sql()` so SQLite can prepare statements with bindings. 2. If analysis raises a SQLite error, reject the query. 3. If every table access is `read`, treat the query as read-only and require `execute-sql` plus `insert-query`/`update-query` as described above. -4. If any table access is `insert`, `update`, or `delete`, treat the query as writable and force `is_published=0`. +4. If any table access is `insert`, `update`, or `delete`, treat the query as writable and force `is_trusted=0`. 5. Reject writable user-created queries that access a database other than the database they are being saved against, until `analyze_sql()` can reliably map attached SQLite schemas back to Datasette database names. 6. For every write access returned by analysis, require the corresponding permission on `TableResource(access.database, access.table)`: - `insert` -> `insert-row` @@ -200,7 +195,7 @@ Fail closed cases for user-created writable queries: - Analysis reports any write operation that cannot be mapped to a Datasette table resource. - Analysis reports writes outside the target database. - The actor lacks any required table write permission. -- `is_published=1` is requested. +- `is_trusted=1` is requested through the user-facing API. This gives us writable user-created queries without letting `execute-sql` alone become a path to create arbitrary write endpoints. @@ -225,7 +220,7 @@ Create request: "sql": "select * from customers order by revenue desc limit 20", "title": "Top customers", "description": "Highest revenue customers", - "is_published": false, + "is_private": true, "parameters": ["region"] } } @@ -242,7 +237,8 @@ Successful create returns `201` and the created query definition: "sql": "select * from customers order by revenue desc limit 20", "title": "Top customers", "description": "Highest revenue customers", - "is_published": false, + "is_private": true, + "is_trusted": false, "parameters": ["region"] } } @@ -254,7 +250,7 @@ Update request, imitating `RowUpdateView`: { "update": { "title": "Top customers by revenue", - "is_published": true + "is_private": false }, "return": true } @@ -270,7 +266,8 @@ Successful update returns `{"ok": true}` by default. With `"return": true`, retu "name": "top_customers", "sql": "select * from customers order by revenue desc limit 20", "title": "Top customers by revenue", - "is_published": true + "is_private": false, + "is_trusted": false } } ``` @@ -317,7 +314,8 @@ await datasette.add_query( fragment=None, parameters=None, is_write=False, - is_published=False, + is_private=False, + is_trusted=False, source="plugin", owner_id=None, on_success_message=None, @@ -340,7 +338,8 @@ await datasette.update_query( fragment=UNCHANGED, parameters=UNCHANGED, is_write=UNCHANGED, - is_published=UNCHANGED, + is_private=UNCHANGED, + is_trusted=UNCHANGED, source=UNCHANGED, owner_id=UNCHANGED, on_success_message=UNCHANGED, @@ -360,7 +359,8 @@ await datasette.list_queries( cursor=None, q=None, is_write=None, - is_published=None, + is_private=None, + is_trusted=None, source=None, owner_id=None, ) @@ -382,15 +382,13 @@ For column-backed fields, `None` should write SQL `NULL`. For option fields, `No Implementation detail: build the `UPDATE` statement dynamically from fields whose value is not `UNCHANGED`, validate non-nullable fields before writing, and update `updated_at` whenever at least one field changes. -The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `is_published`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. Option values should be unpacked from the `options` JSON object and returned as the same top-level keys accepted by `add_query()` and `update_query()`. +The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `is_private`, `is_trusted`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. Option values should be unpacked from the `options` JSON object and returned as the same top-level keys accepted by `add_query()` and `update_query()`. ## Query page save UI On `/{database}/-/query`, if the actor has both `execute-sql` and `insert-query`, show a save control for valid read-only SQL. That page already executes read-only arbitrary SQL, so the first UI can stay read-only even though the JSON API can accept writable SQL after `Database.analyze_sql()` validation. -The save form should call `POST /{database}/-/queries/insert` and default to `is_published=false`. - -If the actor also has `publish-query`, include a publish control. The UI copy should make it clear that publishing allows people without arbitrary SQL permission to run this query. +The save form should call `POST /{database}/-/queries/insert` and default to `is_private=true`. On `/{database}`, show a preview of the first 5 visible queries using `list_queries(..., limit=5)`. If the page has `has_more`, show a link to `/{database}/-/queries` rather than rendering hundreds or thousands of query links inline. The full `/{database}/-/queries` page provides search, filters, and cursor pagination. The global `/-/queries` page reuses the same interface and shows the database for each query. @@ -403,7 +401,7 @@ This page should require `execute-sql` and `insert-query` to access. It should p - Read-only - Writable -Read-only mode can share the same fields as the arbitrary SQL save flow: name, title, description, parameters, and optional published status if the actor has `publish-query`. +Read-only mode can share the same fields as the arbitrary SQL save flow: name, title, description, parameters, and privacy status. Writable mode should always run `Database.analyze_sql()` and show an analysis panel before saving: @@ -413,7 +411,7 @@ Writable mode should always run `Database.analyze_sql()` and show an analysis pa - whether the actor has that permission - source, when the operation comes from a trigger or view -The Save button should be disabled until analysis succeeds and every required table write permission is allowed. Writable mode should not show a publish control, because user-created writable queries cannot be published. +The Save button should be disabled until analysis succeeds and every required table write permission is allowed. The existing edit-SQL flow from query pages can continue to point back to arbitrary SQL. A later enhancement can add "update this query" when the actor owns it or has `update-query`. @@ -427,14 +425,16 @@ The existing edit-SQL flow from query pages can continue to point back to arbitr - `QueryResource.resources_sql()` returns rows from `queries`. - Database page and `/-/jump` list queries from the internal DB. - `view-query` is no longer globally default-allowed; default query permissions come from the query-aware hook. -- Unpublished read-only query requires `execute-sql` to execute. -- Published read-only query can be executed without `execute-sql`. -- Setting `is_published=true` requires `publish-query`. +- Private query is only visible to its owner, even when a broader `view-query` rule applies. +- Non-trusted read-only query requires `execute-sql` to execute. +- Trusted read-only query can be executed without `execute-sql` after `view-query` passes. +- Config queries default to trusted and can opt out with `is_trusted: false`. +- User API rejects client-supplied `is_trusted`. - User-created query requires both `execute-sql` and `insert-query`. - User-created writable query creation uses `Database.analyze_sql()` and requires matching `insert-row`, `update-row`, and/or `delete-row` permissions for every reported write access. - `/{database}/-/queries/-/create` provides the writable-query authoring UI with an analysis panel and disabled save until all required write permissions pass. - User-created writable query execution re-runs `Database.analyze_sql()` and re-checks table write permissions. -- User-created writable query cannot be published. +- User-created writable query cannot be trusted through the user API. - Query update uses `POST /{database}/{query}/-/update` with an `{"update": {...}}` body. - Query delete uses `POST /{database}/{query}/-/delete`. - There are no `PATCH` or HTTP `DELETE` routes for query management. diff --git a/tests/test_queries.py b/tests/test_queries.py index 57920584..c97b5733 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -15,7 +15,6 @@ async def add_numbered_queries(ds, database, count): "select {} as query_number".format(i), title="Demo query {:02d}".format(i), description="Seeded demo query number {:02d}".format(i), - is_published=True, source="user", owner_id="root", ) @@ -44,7 +43,8 @@ async def test_queries_internal_table_schema(): "options", "parameters", "is_write", - "is_published", + "is_private", + "is_trusted", "source", "owner_id", "created_at", @@ -67,7 +67,7 @@ async def test_add_get_and_remove_query(): hide_sql=True, fragment="chart", parameters=["region"], - is_published=True, + is_trusted=True, source="user", owner_id="alice", ) @@ -100,7 +100,8 @@ async def test_add_get_and_remove_query(): "parameters": ["region"], "is_write": False, "write": False, - "is_published": True, + "is_private": False, + "is_trusted": True, "source": "user", "owner_id": "alice", "on_success_message": None, @@ -161,7 +162,8 @@ async def test_update_query_only_updates_provided_fields(): assert query["params"] == [] assert query["on_success_redirect"] is None assert query["sql"] == "select 1" - assert query["is_published"] is False + assert query["is_private"] is False + assert query["is_trusted"] is False options_row = ( await ds.get_internal_database().execute( """ @@ -208,7 +210,8 @@ async def test_config_queries_imported_to_internal_table(): "parameters": ["name"], "is_write": False, "write": False, - "is_published": False, + "is_private": False, + "is_trusted": True, "source": "config", "owner_id": None, "on_success_message": None, @@ -232,30 +235,171 @@ async def test_query_resources_come_from_internal_table(): @pytest.mark.asyncio -async def test_unpublished_query_requires_execute_sql_but_published_does_not(): - ds = Datasette(memory=True, settings={"default_allow_sql": False}) +async def test_default_deny_blocks_view_query_even_for_trusted_query(): + ds = Datasette(memory=True, default_deny=True) ds.add_memory_database("query_permissions", name="data") await ds.invoke_startup() - await ds.add_query("data", "unpublished", "select 1", is_published=False) - await ds.add_query("data", "published", "select 1", is_published=True) + await ds.add_query("data", "trusted", "select 1", is_trusted=True) assert not await ds.allowed( - action="execute-sql", - resource=DatabaseResource("data"), + action="view-query", + resource=QueryResource("data", "trusted"), actor=None, ) + + +@pytest.mark.asyncio +async def test_private_query_restriction_blocks_broad_view_query_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-query": {"id": "*"}, + } + } + } + }, + ) + ds.add_memory_database("private_query_permissions", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "private_report", + "select 1", + is_private=True, + source="user", + owner_id="alice", + ) + await ds.add_query( + "data", + "shared_report", + "select 2", + is_private=False, + source="user", + owner_id="alice", + ) + + assert await ds.allowed( + action="view-query", + resource=QueryResource("data", "private_report"), + actor={"id": "alice"}, + ) assert not await ds.allowed( action="view-query", - resource=QueryResource("data", "unpublished"), - actor=None, + resource=QueryResource("data", "private_report"), + actor={"id": "bob"}, ) assert await ds.allowed( action="view-query", - resource=QueryResource("data", "published"), - actor=None, + resource=QueryResource("data", "shared_report"), + actor={"id": "bob"}, ) +@pytest.mark.asyncio +async def test_config_query_restriction_does_not_override_private_internal_query(): + ds = Datasette(memory=True, default_deny=True) + ds.add_memory_database("private_query_with_config_name", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "private_report", + "select 1", + is_private=True, + source="user", + owner_id="alice", + ) + ds.config = { + "databases": { + "data": { + "permissions": {"view-query": {"id": "*"}}, + "queries": {"private_report": {"sql": "select 2"}}, + } + } + } + + assert not await ds.allowed( + action="view-query", + resource=QueryResource("data", "private_report"), + actor={"id": "bob"}, + ) + + +@pytest.mark.asyncio +async def test_untrusted_shared_query_execution_requires_execute_sql(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "viewer"}, + "view-query": {"id": "viewer"}, + } + } + } + }, + ) + ds.add_memory_database("untrusted_query_execution", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "shared_report", + "select 1 as one", + is_private=False, + is_trusted=False, + source="user", + owner_id="alice", + ) + + denied = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) + assert denied.status_code == 403 + + ds.config["databases"]["data"]["permissions"]["execute-sql"] = {"id": "viewer"} + allowed = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) + assert allowed.status_code == 200 + assert allowed.json()["rows"] == [{"one": 1}] + + +@pytest.mark.asyncio +async def test_config_queries_are_trusted_by_default_but_can_opt_out(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-query": {"id": "viewer"}, + }, + "queries": { + "trusted_report": {"sql": "select 1 as one"}, + "untrusted_report": { + "sql": "select 2 as two", + "is_trusted": False, + }, + }, + } + } + }, + ) + ds.add_memory_database("trusted_query_config", name="data") + await ds.invoke_startup() + + trusted = await ds.client.get("/data/trusted_report.json", actor={"id": "viewer"}) + untrusted = await ds.client.get( + "/data/untrusted_report.json", actor={"id": "viewer"} + ) + + assert trusted.status_code == 200 + assert trusted.json()["rows"] == [{"one": 1}] + assert untrusted.status_code == 403 + + @pytest.mark.asyncio async def test_database_page_query_preview_is_limited(): ds = Datasette(memory=True) @@ -281,7 +425,6 @@ async def test_query_actions_are_registered(): assert ds.get_action("execute-write-sql").resource_class is DatabaseResource assert ds.get_action("insert-query").resource_class is DatabaseResource - assert ds.get_action("publish-query").resource_class is DatabaseResource assert ds.get_action("update-query").resource_class is QueryResource assert ds.get_action("delete-query").resource_class is QueryResource @@ -430,21 +573,33 @@ async def test_query_list_search_filter_and_html(): "private_query", "select 'private'", title="Private query", - is_published=False, + is_private=True, source="user", owner_id="root", ) + await ds.add_query( + "data", + "trusted_query", + "select 'trusted'", + title="Trusted query", + is_trusted=True, + source="config", + ) html_response = await ds.client.get( "/data/-/queries?q=02", actor={"id": "root"}, ) + flags_response = await ds.client.get( + "/data/-/queries", + actor={"id": "root"}, + ) json_response = await ds.client.get( "/data/-/queries.json?q=02", actor={"id": "root"}, ) filtered_response = await ds.client.get( - "/data/-/queries.json?is_published=0", + "/data/-/queries.json?is_private=1", actor={"id": "root"}, ) @@ -453,7 +608,22 @@ async def test_query_list_search_filter_and_html(): assert "Demo query 01" not in html_response.text assert 'class="query-list-results"' in html_response.text assert "Mode" in html_response.text - assert 'type="radio" name="is_published" value="1"' in html_response.text + assert 'type="radio" name="is_private" value="1"' in html_response.text + assert "Only the owning actor can view this query." not in html_response.text + assert ( + "Execution skips the usual SQL and write permission checks" + not in html_response.text + ) + assert flags_response.status_code == 200 + assert 'Owner' in flags_response.text + assert 'Flags' in flags_response.text + assert 'Mode' not in flags_response.text + assert 'class="query-list-owner">root' in flags_response.text + assert 'class="query-list-pill">Read-only' in flags_response.text + assert 'class="query-list-pill query-list-pill-private">Private' in flags_response.text + assert 'class="query-list-pill query-list-pill-trusted">Trusted' in flags_response.text + assert "Only the owning actor can view this query." in flags_response.text + assert "Execution skips the usual SQL and write permission checks" in flags_response.text assert json_response.json()["queries"][0]["name"] == "demo_query_02" assert [query["name"] for query in filtered_response.json()["queries"]] == [ "private_query" @@ -491,7 +661,6 @@ async def test_global_query_list_api_and_html(): "alpha_first", "select 1", title="Alpha first", - is_published=True, source="user", owner_id="root", ) @@ -500,7 +669,6 @@ async def test_global_query_list_api_and_html(): "alpha_second", "select 2", title="Alpha second", - is_published=True, source="user", owner_id="root", ) @@ -509,7 +677,6 @@ async def test_global_query_list_api_and_html(): "beta_first", "select 3", title="Beta first", - is_published=True, source="user", owner_id="root", ) @@ -548,7 +715,7 @@ async def test_global_query_list_api_and_html(): @pytest.mark.asyncio -async def test_query_insert_api_publish_requires_publish_query(): +async def test_query_insert_api_rejects_is_trusted(): ds = Datasette( memory=True, default_deny=True, @@ -564,17 +731,17 @@ async def test_query_insert_api_publish_requires_publish_query(): } }, ) - ds.add_memory_database("query_publish_api", name="data") + ds.add_memory_database("query_trusted_api", name="data") await ds.invoke_startup() response = await ds.client.post( "/data/-/queries/insert", actor={"id": "writer"}, - json={"query": {"name": "public", "sql": "select 1", "is_published": True}}, + json={"query": {"name": "trusted", "sql": "select 1", "is_trusted": True}}, ) - assert response.status_code == 403 - assert response.json()["errors"] == ["Permission denied: need publish-query"] + assert response.status_code == 400 + assert response.json()["errors"] == ["Invalid keys: is_trusted"] @pytest.mark.asyncio @@ -599,24 +766,10 @@ async def test_query_insert_api_creates_writable_query(): assert response.status_code == 201 query = response.json()["query"] assert query["is_write"] is True - assert query["is_published"] is False + assert query["is_private"] is True + assert query["is_trusted"] is False assert query["parameters"] == ["name"] - bad_response = await ds.client.post( - "/data/-/queries/insert", - actor={"id": "root"}, - json={ - "query": { - "name": "published_insert", - "sql": "insert into dogs (name) values (:name)", - "is_published": True, - } - }, - ) - - assert bad_response.status_code == 400 - assert bad_response.json()["errors"] == ["Writable queries cannot be published"] - @pytest.mark.asyncio async def test_query_update_and_delete_api(): @@ -1103,6 +1256,10 @@ async def test_user_writable_query_execution_rechecks_table_permissions(): config={ "databases": { "data": { + "permissions": { + "view-database": {"id": ["alice", "bob"]}, + "execute-write-sql": {"id": ["alice", "bob"]}, + }, "tables": { "dogs": { "permissions": { From 1cd162e9da48b924c289ec9343e9d801b51a89f9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 12:07:30 -0700 Subject: [PATCH 238/299] Removed some no-longer-necessary code, simplified view-query is back in the default allow actions now. We have other mechanisms that work for controlling visibility, and the fact that queries default to running with the permissions of the actor makes this safe. --- datasette/default_permissions/defaults.py | 55 +++-------------------- tests/test_permissions.py | 9 +++- tests/test_queries.py | 39 ++++++++++++++++ 3 files changed, 51 insertions(+), 52 deletions(-) diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index dfd8d3e9..ed0a6d66 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -21,37 +21,12 @@ DEFAULT_ALLOW_ACTIONS = frozenset( "view-database", "view-database-download", "view-table", + "view-query", "execute-sql", } ) -def _configured_query_restriction_selects(datasette: "Datasette") -> tuple[list[str], dict]: - selects = [] - params = {} - for index, (database_name, db_config) in enumerate( - ((datasette.config or {}).get("databases") or {}).items() - ): - for query_name, query_config in (db_config.get("queries") or {}).items(): - if isinstance(query_config, dict) and query_config.get("is_private"): - continue - parent_param = f"query_config_parent_{index}_{len(selects)}" - child_param = f"query_config_child_{index}_{len(selects)}" - selects.append( - f""" - SELECT :{parent_param} AS parent, :{child_param} AS child - WHERE NOT EXISTS ( - SELECT 1 FROM queries - WHERE database_name = :{parent_param} - AND name = :{child_param} - ) - """ - ) - params[parent_param] = database_name - params[child_param] = query_name - return selects, params - - @hookimpl(specname="permission_resources_sql") async def default_allow_sql_check( datasette: "Datasette", @@ -121,16 +96,6 @@ async def default_query_permissions_sql( params = {"query_owner_id": actor_id} rule_sqls = [] - if not datasette.default_deny: - rule_sqls.append( - """ - SELECT database_name AS parent, name AS child, 1 AS allow, - 'non-private query' AS reason - FROM queries - WHERE is_private = 0 - """ - ) - if actor_id is not None: rule_sqls.append( """ @@ -141,23 +106,13 @@ async def default_query_permissions_sql( """ ) - config_restriction_selects, config_restriction_params = ( - _configured_query_restriction_selects(datasette) - ) - - restriction_sqls = [ - """ + return PermissionSQL( + sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, + restriction_sql=""" SELECT database_name AS parent, name AS child FROM queries WHERE is_private = 0 OR owner_id = :query_owner_id - """ - ] - restriction_sqls.extend(config_restriction_selects) - params.update(config_restriction_params) - - return PermissionSQL( - sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, - restriction_sql="\nUNION ALL\n".join(restriction_sqls), + """, params=params, ) diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 22f294bb..4f342d8f 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -937,16 +937,20 @@ async def test_permissions_in_config( updated_config = copy.deepcopy(previous_config) updated_config.update(config) perms_ds.config = updated_config + await perms_ds.apply_queries_config() try: # Convert old-style resource to Resource object - from datasette.resources import DatabaseResource, TableResource + from datasette.resources import DatabaseResource, QueryResource, TableResource resource_obj = None if resource: if isinstance(resource, str): resource_obj = DatabaseResource(database=resource) elif isinstance(resource, tuple) and len(resource) == 2: - resource_obj = TableResource(database=resource[0], table=resource[1]) + if action == "view-query": + resource_obj = QueryResource(database=resource[0], query=resource[1]) + else: + resource_obj = TableResource(database=resource[0], table=resource[1]) result = await perms_ds.allowed( action=action, resource=resource_obj, actor=actor @@ -956,6 +960,7 @@ async def test_permissions_in_config( assert result == expected_result finally: perms_ds.config = previous_config + await perms_ds.apply_queries_config() @pytest.mark.asyncio diff --git a/tests/test_queries.py b/tests/test_queries.py index c97b5733..dde57dea 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -248,6 +248,45 @@ async def test_default_deny_blocks_view_query_even_for_trusted_query(): ) +@pytest.mark.asyncio +async def test_view_query_default_allow_still_respects_private_restriction(): + ds = Datasette(memory=True) + ds.add_memory_database("default_view_query_permissions", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "private_report", + "select 1", + is_private=True, + source="user", + owner_id="alice", + ) + await ds.add_query( + "data", + "shared_report", + "select 2", + is_private=False, + source="user", + owner_id="alice", + ) + + assert await ds.allowed( + action="view-query", + resource=QueryResource("data", "shared_report"), + actor=None, + ) + assert await ds.allowed( + action="view-query", + resource=QueryResource("data", "private_report"), + actor={"id": "alice"}, + ) + assert not await ds.allowed( + action="view-query", + resource=QueryResource("data", "private_report"), + actor={"id": "bob"}, + ) + + @pytest.mark.asyncio async def test_private_query_restriction_blocks_broad_view_query_permission(): ds = Datasette( From 1ac4265ffd295ea62008b13b3e37af96f5450be4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 12:12:59 -0700 Subject: [PATCH 239/299] Require permissions for untrusted stored query execution, refs #2735 --- datasette/views/database.py | 7 +++---- docs/authentication.rst | 2 +- queries-plan.md | 8 +++----- tests/test_queries.py | 12 ++++++++++-- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/datasette/views/database.py b/datasette/views/database.py index 91e9c350..bd939d87 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1430,10 +1430,9 @@ class QueryView(View): ): raise Forbidden("You do not have permission to view this query") - if canned_query.get("write"): - await _ensure_stored_query_execution_permissions( - datasette, db, canned_query, request.actor - ) + await _ensure_stored_query_execution_permissions( + datasette, db, canned_query, request.actor + ) # If database is immutable, return an error if not db.is_mutable: diff --git a/docs/authentication.rst b/docs/authentication.rst index 6e835c8d..453aaa19 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1285,7 +1285,7 @@ Actor is allowed to view a table (or view) page, e.g. https://latest.datasette.i view-query ---------- -Actor is allowed to view (and execute) a saved query page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size - this includes executing :ref:`canned_queries_writable`. +Actor is allowed to view a saved query page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size. Executing an untrusted saved query also requires ``execute-sql`` or the relevant write permissions; trusted saved queries can execute with ``view-query`` alone. ``resource`` - ``datasette.resources.QueryResource(database, query)`` ``database`` is the name of the database (string) diff --git a/queries-plan.md b/queries-plan.md index f4b8049c..da6b7c92 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -25,7 +25,7 @@ Terminology change: these are now "queries", not "canned queries". Legacy code a - Query definitions currently come from `datasette.yaml` or the `canned_queries()` plugin hook. - `Datasette.get_canned_queries(database_name, actor)` calls that hook every time it needs query definitions. - `QueryResource.resources_sql()` currently enumerates databases and calls the hook for each one, because permissions and `/-/jump` need query resources. -- Query pages execute if the actor has `view-query` for `QueryResource(database, query)`. +- Query pages are visible if the actor has `view-query` for `QueryResource(database, query)`. Executing an untrusted stored query also checks `execute-sql` or the relevant write permissions. - Arbitrary SQL executes if the actor has `execute-sql` for `DatabaseResource(database)`. The main performance and architecture win is making query resource enumeration a direct SQL query against the internal database. @@ -145,9 +145,7 @@ Default execution rule for user-created writable queries: Implementation: -- Remove `view-query` from the broad `DEFAULT_ALLOW_ACTIONS` set. -- Replace it with query-aware default `view-query` permission SQL. -- Emit default `view-query` allows for non-private rows when Datasette is not running with `--default-deny`. +- Keep `view-query` in the broad `DEFAULT_ALLOW_ACTIONS` set, so saved queries remain visible by default in all-public Datasette. - Emit default `view-query` allows for the owning actor. - Use `restriction_sql` to limit private rows to their owner even when broader `view-query` permissions exist. - Have `QueryView` perform the fresh `execute-sql` or table-permission check before execution unless the row has `is_trusted=1`. @@ -424,7 +422,7 @@ The existing edit-SQL flow from query pages can continue to point back to arbitr - The old `canned_queries()` hook is no longer called by core. - `QueryResource.resources_sql()` returns rows from `queries`. - Database page and `/-/jump` list queries from the internal DB. -- `view-query` is no longer globally default-allowed; default query permissions come from the query-aware hook. +- `view-query` remains globally default-allowed, with `restriction_sql` narrowing private queries to their owner. - Private query is only visible to its owner, even when a broader `view-query` rule applies. - Non-trusted read-only query requires `execute-sql` to execute. - Trusted read-only query can be executed without `execute-sql` after `view-query` passes. diff --git a/tests/test_queries.py b/tests/test_queries.py index dde57dea..997f8b39 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -395,8 +395,16 @@ async def test_untrusted_shared_query_execution_requires_execute_sql(): owner_id="alice", ) - denied = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) - assert denied.status_code == 403 + denied_get = await ds.client.get( + "/data/shared_report.json", actor={"id": "viewer"} + ) + denied_post = await ds.client.post( + "/data/shared_report", + actor={"id": "viewer"}, + data={}, + ) + assert denied_get.status_code == 403 + assert denied_post.status_code == 403 ds.config["databases"]["data"]["permissions"]["execute-sql"] = {"id": "viewer"} allowed = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) From 866852eff603c219b8bf7d13f2a69b5ff032fa67 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 12:46:18 -0700 Subject: [PATCH 240/299] Clarifying comments --- datasette/default_permissions/defaults.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index ed0a6d66..32ad4ef1 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -80,6 +80,7 @@ async def default_query_permissions_sql( if action in {"update-query", "delete-query"}: if actor_id is None: return None + # Query owner can update/delete query return PermissionSQL( sql=""" SELECT database_name AS parent, name AS child, 1 AS allow, @@ -97,15 +98,15 @@ async def default_query_permissions_sql( params = {"query_owner_id": actor_id} rule_sqls = [] if actor_id is not None: - rule_sqls.append( - """ + # Query owner can view-query + rule_sqls.append(""" SELECT database_name AS parent, name AS child, 1 AS allow, 'query owner' AS reason FROM queries WHERE owner_id = :query_owner_id - """ - ) + """) + # restriction_sql enforces private queries ONLY visible to owner return PermissionSQL( sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, restriction_sql=""" From 71c76e38534378cbce8576771238a788feccf3ad Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:08:19 -0700 Subject: [PATCH 241/299] Better faceting on /-/queries Ref https://github.com/simonw/datasette/pull/2741#issuecomment-4548321815 --- datasette/app.py | 69 +++++++++++++++++ datasette/templates/query_list.html | 94 +++++++++++++---------- datasette/views/database.py | 99 +++++++++++++++++++++++- tests/test_permissions.py | 8 +- tests/test_queries.py | 115 +++++++++++++++++++++++++--- 5 files changed, 330 insertions(+), 55 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 3329ee7e..1acdfcd8 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1298,6 +1298,75 @@ class Datasette: ) return self._query_row_to_dict(rows.first()) + async def count_queries( + self, + database=None, + *, + actor=None, + q=None, + is_write=None, + is_private=None, + is_trusted=None, + source=None, + owner_id=None, + ): + allowed_sql, allowed_params = await self.allowed_resources_sql( + action="view-query", + actor=actor, + parent=database, + ) + params = dict(allowed_params) + where_clauses = [] + if database is not None: + params["query_database"] = database + where_clauses.append("q.database_name = :query_database") + + if q: + where_clauses.append(""" + ( + q.name LIKE :query_search + OR q.title LIKE :query_search + OR q.description LIKE :query_search + OR q.sql LIKE :query_search + ) + """) + params["query_search"] = "%{}%".format(q) + if is_write is not None: + where_clauses.append("q.is_write = :query_is_write") + params["query_is_write"] = int(bool(is_write)) + if is_private is not None: + where_clauses.append("q.is_private = :query_is_private") + params["query_is_private"] = int(bool(is_private)) + if is_trusted is not None: + where_clauses.append("q.is_trusted = :query_is_trusted") + params["query_is_trusted"] = int(bool(is_trusted)) + if source is not None: + where_clauses.append("q.source = :query_source") + params["query_source"] = source + if owner_id is not None: + where_clauses.append("q.owner_id = :query_owner_id") + params["query_owner_id"] = owner_id + + row = ( + await self.get_internal_database().execute( + """ + SELECT count(*) AS count + FROM queries q + JOIN ( + {allowed_sql} + ) allowed + ON allowed.parent = q.database_name + AND allowed.child = q.name + WHERE {where} + """.format( + allowed_sql=allowed_sql, + where=" AND ".join(where_clauses) or "1 = 1", + ), + params, + ) + ).first() + return row["count"] + async def list_queries( self, database=None, diff --git a/datasette/templates/query_list.html b/datasette/templates/query_list.html index 25259b3d..fa4859b1 100644 --- a/datasette/templates/query_list.html +++ b/datasette/templates/query_list.html @@ -9,7 +9,7 @@ max-width: 64rem; } .query-list-filters { - margin: 0.5rem 0 1rem; + margin: 0.5rem 0 0.75rem; } .query-list-search { align-items: center; @@ -32,43 +32,63 @@ line-height: 1.1; padding: 0.35rem 0.65rem; } -.query-list-filter-groups { +.query-list-facets { align-items: flex-start; display: flex; flex-wrap: wrap; - gap: 0.8rem 1.4rem; + gap: 1rem 1.6rem; + margin: 0 0 1rem; } -.query-list-filter-group { - border: 0; +.query-list-facet { + margin: 0; +} +.query-list-facet h2 { + font-size: 0.9rem; + line-height: 1.2; + margin: 0 0 0.35rem; +} +.query-list-facet ul { display: flex; flex-wrap: wrap; gap: 0.35rem; margin: 0; - min-width: 0; padding: 0; + list-style: none; } -.query-list-filter-group legend { - font-weight: 700; - margin: 0 0.45rem 0 0; - padding: 0; -} -.query-list-filter-group label { +.query-list-facet-link, +.query-list-facet-link:link, +.query-list-facet-link:visited, +.query-list-facet-link:hover, +.query-list-facet-link:focus, +.query-list-facet-link:active { align-items: center; border: 1px solid #c8d1dc; border-radius: 0.25rem; - cursor: pointer; + color: #39445a; display: inline-flex; font-size: 0.82rem; - gap: 0.3rem; + gap: 0.4rem; line-height: 1.1; padding: 0.35rem 0.55rem; + text-decoration: none; } -.query-list-filter-group input { - margin: 0; +.query-list-facet-link:hover { + border-color: #7ca5c8; + color: #1f5d85; } -.query-list-filter-group input:checked + span { +.query-list-facet-link-active { + background-color: #edf6fb; + border-color: #6d9fc0; font-weight: 700; } +.query-list-facet-disabled { + color: #7b8794; + cursor: default; +} +.query-list-facet-count { + color: #4f5b6d; + font-variant-numeric: tabular-nums; +} .query-list-results { border-collapse: collapse; font-size: 0.9rem; @@ -169,15 +189,6 @@ .query-list-search input[type=search] { max-width: none; } - .query-list-filter-group { - display: block; - } - .query-list-filter-group legend { - margin-bottom: 0.3rem; - } - .query-list-filter-group label { - margin: 0 0.25rem 0.35rem 0; - } } {% endblock %} @@ -198,24 +209,27 @@ -
        -
        - Mode - - - -
        -
        - Visibility - - - -
        -
        + + {% if queries %}
        diff --git a/datasette/views/database.py b/datasette/views/database.py index bd939d87..2e77d36b 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1121,6 +1121,21 @@ class QueryParametersView(BaseView): return _block_framing(Response.json({"ok": True, "parameters": parameters})) +def _query_list_url(path, query_string, *, set_args=None, remove_args=None): + set_args = set_args or {} + remove_args = set(remove_args or ()) + skip = set(set_args) | remove_args | {"_next"} + pairs = [ + (key, value) + for key, value in parse_qsl(query_string, keep_blank_values=True) + if key not in skip + ] + for key, value in set_args.items(): + if value not in (None, ""): + pairs.append((key, value)) + return path + (("?" + urlencode(pairs)) if pairs else "") + + class QueryListView(BaseView): name = "query-list" @@ -1139,9 +1154,7 @@ class QueryListView(BaseView): default=20 if format_ == "html" else 50, ) is_write = _as_optional_bool(request.args.get("is_write"), "is_write") - is_private = _as_optional_bool( - request.args.get("is_private"), "is_private" - ) + is_private = _as_optional_bool(request.args.get("is_private"), "is_private") except QueryValidationError as ex: return _error([ex.message], ex.status) @@ -1173,6 +1186,80 @@ class QueryListView(BaseView): urlencode(pairs), ) + current_filters = { + "actor": request.actor, + "q": request.args.get("q") or None, + "is_write": is_write, + "is_private": is_private, + "source": request.args.get("source") or None, + "owner_id": request.args.get("owner_id") or None, + } + + async def facet_count(field, value): + if current_filters[field] is not None and current_filters[field] != value: + return 0 + filters = dict(current_filters) + filters[field] = value + return await self.ds.count_queries(database, **filters) + + def facet_href(field, value): + if current_filters[field] == value: + return _query_list_url( + query_list_path, + request.query_string, + remove_args=[field], + ) + if current_filters[field] is not None: + return None + return _query_list_url( + query_list_path, + request.query_string, + set_args={field: str(int(value))}, + ) + + async def facet_item(label, field, value): + count = await facet_count(field, value) + active = current_filters[field] == value + if not active and not count: + return None + return { + "label": label, + "count": count, + "href": facet_href(field, value) if active or count else None, + "active": active, + } + + async def facet_items(items): + return [ + item + for item in [ + await facet_item(label, field, value) + for label, field, value in items + ] + if item is not None + ] + + facets = [ + { + "title": "Mode", + "items": await facet_items( + [ + ("Read-only", "is_write", False), + ("Writable", "is_write", True), + ] + ), + }, + { + "title": "Visibility", + "items": await facet_items( + [ + ("Not private", "is_private", False), + ("Private", "is_private", True), + ] + ), + }, + ] + data = { "ok": True, "database": database, @@ -1188,6 +1275,7 @@ class QueryListView(BaseView): "show_trusted_note": any(query["is_trusted"] for query in page["queries"]), "query_list_path": query_list_path, "show_database": database is None, + "facets": facets, "filters": { "q": request.args.get("q") or "", "is_write": request.args.get("is_write") or "", @@ -1715,6 +1803,9 @@ class QueryView(View): } ) metadata = await datasette.get_database_metadata(database) + if canned_query: + metadata = dict(canned_query) + metadata.pop("source", None) renderers = {} for key, (_, can_render) in datasette.renderers.items(): @@ -1865,7 +1956,7 @@ class QueryView(View): ) ), show_hide_hidden=markupsafe.Markup(show_hide_hidden), - metadata=canned_query or metadata, + metadata=metadata, alternate_url_json=alternate_url_json, select_templates=[ f"{'*' if template_name == template.name else ''}{template_name}" diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 4f342d8f..eb6cee9f 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -948,9 +948,13 @@ async def test_permissions_in_config( resource_obj = DatabaseResource(database=resource) elif isinstance(resource, tuple) and len(resource) == 2: if action == "view-query": - resource_obj = QueryResource(database=resource[0], query=resource[1]) + resource_obj = QueryResource( + database=resource[0], query=resource[1] + ) else: - resource_obj = TableResource(database=resource[0], table=resource[1]) + resource_obj = TableResource( + database=resource[0], table=resource[1] + ) result = await perms_ds.allowed( action=action, resource=resource_obj, actor=actor diff --git a/tests/test_queries.py b/tests/test_queries.py index 997f8b39..36f7107a 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -395,9 +395,7 @@ async def test_untrusted_shared_query_execution_requires_execute_sql(): owner_id="alice", ) - denied_get = await ds.client.get( - "/data/shared_report.json", actor={"id": "viewer"} - ) + denied_get = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) denied_post = await ds.client.post( "/data/shared_report", actor={"id": "viewer"}, @@ -608,6 +606,27 @@ async def test_query_list_and_definition_api(): assert definition_response.json()["query"]["title"] == "Demo query 01" +@pytest.mark.asyncio +async def test_query_page_does_not_show_internal_source(): + ds = Datasette(memory=True) + ds.add_memory_database("query_page_source", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "stored_report", + "select 1 as one", + title="Stored report", + source="user", + owner_id="root", + ) + + response = await ds.client.get("/data/stored_report", actor={"id": "root"}) + + assert response.status_code == 200 + assert "Stored report" in response.text + assert "Data source:" not in response.text + + @pytest.mark.asyncio async def test_query_list_search_filter_and_html(): ds = Datasette(memory=True) @@ -632,6 +651,15 @@ async def test_query_list_search_filter_and_html(): is_trusted=True, source="config", ) + await ds.add_query( + "data", + "writable_query", + "insert into dogs (name) values (:name)", + title="Writable query", + is_write=True, + source="user", + owner_id="root", + ) html_response = await ds.client.get( "/data/-/queries?q=02", @@ -649,13 +677,21 @@ async def test_query_list_search_filter_and_html(): "/data/-/queries.json?is_private=1", actor={"id": "root"}, ) + filtered_write_response = await ds.client.get( + "/data/-/queries?is_write=1", + actor={"id": "root"}, + ) + filtered_private_response = await ds.client.get( + "/data/-/queries?is_private=1", + actor={"id": "root"}, + ) assert html_response.status_code == 200 assert "Demo query 02" in html_response.text assert "Demo query 01" not in html_response.text assert 'class="query-list-results"' in html_response.text - assert "Mode" in html_response.text - assert 'type="radio" name="is_private" value="1"' in html_response.text + assert 'class="query-list-facets"' in html_response.text + assert 'type="radio"' not in html_response.text assert "Only the owning actor can view this query." not in html_response.text assert ( "Execution skips the usual SQL and write permission checks" @@ -667,14 +703,75 @@ async def test_query_list_search_filter_and_html(): assert '' not in flags_response.text assert 'class="query-list-owner">root' in flags_response.text assert 'class="query-list-pill">Read-only' in flags_response.text - assert 'class="query-list-pill query-list-pill-private">Private' in flags_response.text - assert 'class="query-list-pill query-list-pill-trusted">Trusted' in flags_response.text + assert ( + 'class="query-list-pill query-list-pill-write">Writable' + in flags_response.text + ) + assert ( + 'class="query-list-pill query-list-pill-private">Private' + in flags_response.text + ) + assert ( + 'class="query-list-pill query-list-pill-trusted">Trusted' + in flags_response.text + ) + assert ( + 'href="/data/-/queries?is_write=0">Read-only5' + in flags_response.text + ) + assert ( + 'href="/data/-/queries?is_write=1">Writable1' + in flags_response.text + ) + assert ( + 'href="/data/-/queries?is_private=0">Not private5' + in flags_response.text + ) + assert ( + 'href="/data/-/queries?is_private=1">Private1' + in flags_response.text + ) assert "Only the owning actor can view this query." in flags_response.text - assert "Execution skips the usual SQL and write permission checks" in flags_response.text + assert ( + "Execution skips the usual SQL and write permission checks" + in flags_response.text + ) assert json_response.json()["queries"][0]["name"] == "demo_query_02" assert [query["name"] for query in filtered_response.json()["queries"]] == [ "private_query" ] + assert "Writable query" in filtered_write_response.text + assert "Demo query 01" not in filtered_write_response.text + assert ( + 'query-list-facet-link query-list-facet-link-active" href="/data/-/queries"' + in filtered_write_response.text + ) + assert ( + 'Read-only0' + not in filtered_write_response.text + ) + assert ( + 'href="/data/-/queries?is_write=1&is_private=0">Not private1' + in filtered_write_response.text + ) + assert ( + 'Private0' + not in filtered_write_response.text + ) + assert "Private query" in filtered_private_response.text + assert "Demo query 01" not in filtered_private_response.text + assert ( + 'href="/data/-/queries?is_private=1&is_write=0">Read-only1' + in filtered_private_response.text + ) + assert ( + 'Writable0' + not in filtered_private_response.text + ) + assert ( + 'Not private0' + not in filtered_private_response.text + ) @pytest.mark.asyncio @@ -1313,7 +1410,7 @@ async def test_user_writable_query_execution_rechecks_table_permissions(): "insert-row": {"id": "alice"}, } } - } + }, } } }, From 0fcaa5792ba73143661515af0088d7e5d968e96c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:12:07 -0700 Subject: [PATCH 242/299] Style query operations on create query Made it consistent with the SQL write page. --- .../_execute_write_analysis_styles.html | 37 +++++++++++++++++++ datasette/templates/execute_write.html | 36 +----------------- datasette/templates/query_create.html | 19 +++++----- tests/test_queries.py | 6 ++- 4 files changed, 52 insertions(+), 46 deletions(-) create mode 100644 datasette/templates/_execute_write_analysis_styles.html diff --git a/datasette/templates/_execute_write_analysis_styles.html b/datasette/templates/_execute_write_analysis_styles.html new file mode 100644 index 00000000..f20e67b2 --- /dev/null +++ b/datasette/templates/_execute_write_analysis_styles.html @@ -0,0 +1,37 @@ + diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 46f58c3b..414d4af7 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -40,42 +40,8 @@ border-radius: 0.25rem; min-width: 13rem; } -.execute-write-analysis { - border-collapse: collapse; - font-size: 0.9rem; - margin: 0.25rem 0 1rem; - min-width: 44rem; -} -.execute-write-analysis th, -.execute-write-analysis td { - border-bottom: 1px solid #d7dde5; - padding: 0.45rem 0.7rem; - text-align: left; - vertical-align: top; -} -.execute-write-analysis th { - background-color: #edf6fb; - border-top: 1px solid #d7dde5; - color: #39445a; - font-weight: 700; -} -.execute-write-analysis tbody tr:nth-child(even) { - background-color: rgba(39, 104, 144, 0.05); -} -.execute-write-analysis code { - background: transparent; - font-size: 0.9em; - white-space: nowrap; -} -.execute-write-analysis-allowed { - color: #267a3e; - font-weight: 700; -} -.execute-write-analysis-denied { - color: #b00020; - font-weight: 700; -} +{% include "_execute_write_analysis_styles.html" %} {% include "_sql_parameter_styles.html" %} {% endblock %} diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index 686d971e..2d8a9122 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -5,6 +5,7 @@ {% block extra_head %} {{- super() -}} {% include "_codemirror.html" %} +{% include "_execute_write_analysis_styles.html" %} {% endblock %} {% block body_class %}query-create db-{{ database|to_css_class }}{% endblock %} @@ -32,30 +33,28 @@

        Execute write SQL

        {% endif %} -

        Analysis

        +

        Query operations

        {% if analysis_error %}

        {{ analysis_error }}

        {% elif analysis_rows %} -
        Mode
        +
        - + - {% for row in analysis_rows %} - - - - - - + + + + + {% endfor %} diff --git a/tests/test_queries.py b/tests/test_queries.py index 36f7107a..c27c23da 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -998,7 +998,11 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): assert "Create query" in create_response.text assert "Read-only" in create_response.text assert "Writable" in create_response.text - assert "required permission" in create_response.text + assert "

        Query operations

        " in create_response.text + assert '
        Operation Database Tablerequired permissionRequired permission AllowedSource
        {{ row.operation }}{{ row.database }}{{ row.table }}{{ row.required_permission }}{% if row.allowed is none %}{% elif row.allowed %}yes{% else %}no{% endif %}{{ row.source or "" }}{{ row.operation }}{{ row.database }}{{ row.table }}{% if row.required_permission %}{{ row.required_permission }}{% endif %}{% if row.allowed is none %}{% elif row.allowed %}yes{% else %}no{% endif %}
        ' in create_response.text + assert '' in create_response.text + assert '' not in create_response.text + assert "" in create_response.text assert query_response.status_code == 200 assert "Save query" in query_response.text assert "/data/-/queries/-/create?sql=select+%2A+from+dogs" in query_response.text From 70b23ff4a55528083512fab96aa50725f415cbe4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:47:24 -0700 Subject: [PATCH 243/299] Tweaked save query link --- datasette/templates/query.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/templates/query.html b/datasette/templates/query.html index f74d21f1..1900bd31 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -66,7 +66,7 @@ {% if not hide_sql %}{% endif %} {{ show_hide_hidden }} - {% if save_query_url %}Save query{% endif %} + {% if save_query_url %}Save this query{% endif %} {% if canned_query and edit_sql_url %}Edit SQL{% endif %}

        From eb7c25c57cf914629c08eaa477d0709b0f41efeb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:48:40 -0700 Subject: [PATCH 244/299] Major redesign of create saved query UI https://github.com/simonw/datasette/pull/2741#issuecomment-4548707129 --- datasette/app.py | 6 +- datasette/static/app.css | 4 + .../_execute_write_analysis_scripts.html | 111 +++++++ .../_execute_write_analysis_styles.html | 4 + .../templates/_sql_parameter_scripts.html | 17 +- datasette/templates/execute_write.html | 88 +----- datasette/templates/query_create.html | 296 +++++++++++++++--- datasette/views/database.py | 181 ++++++++--- tests/test_queries.py | 170 +++++++++- 9 files changed, 705 insertions(+), 172 deletions(-) create mode 100644 datasette/templates/_execute_write_analysis_scripts.html diff --git a/datasette/app.py b/datasette/app.py index 1acdfcd8..8936b099 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -50,7 +50,7 @@ from .views.database import ( ExecuteWriteView, TableCreateView, QueryView, - QueryCreateView, + QueryCreateAnalyzeView, QueryDeleteView, QueryDefinitionView, GlobalQueryListView, @@ -2820,8 +2820,8 @@ class Datasette: r"/(?P[^\/\.]+)/-/queries(\.(?Pjson))?$", ) add_route( - QueryCreateView.as_view(self), - r"/(?P[^\/\.]+)/-/queries/-/create$", + QueryCreateAnalyzeView.as_view(self), + r"/(?P[^\/\.]+)/-/queries/analyze$", ) add_route( QueryInsertView.as_view(self), diff --git a/datasette/static/app.css b/datasette/static/app.css index c21d0dc4..4f4db133 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -1414,6 +1414,10 @@ svg.dropdown-menu-icon { position: relative; top: 1px; } +.save-query { + display: inline-block; + margin-left: 0.45em; +} .blob-download { display: block; diff --git a/datasette/templates/_execute_write_analysis_scripts.html b/datasette/templates/_execute_write_analysis_scripts.html new file mode 100644 index 00000000..a19bae13 --- /dev/null +++ b/datasette/templates/_execute_write_analysis_scripts.html @@ -0,0 +1,111 @@ + diff --git a/datasette/templates/_execute_write_analysis_styles.html b/datasette/templates/_execute_write_analysis_styles.html index f20e67b2..165cfe9f 100644 --- a/datasette/templates/_execute_write_analysis_styles.html +++ b/datasette/templates/_execute_write_analysis_styles.html @@ -34,4 +34,8 @@ color: #b00020; font-weight: 700; } +.execute-write-analysis-na { + color: #687386; + font-style: italic; +} diff --git a/datasette/templates/_sql_parameter_scripts.html b/datasette/templates/_sql_parameter_scripts.html index 68e46069..159a141c 100644 --- a/datasette/templates/_sql_parameter_scripts.html +++ b/datasette/templates/_sql_parameter_scripts.html @@ -215,9 +215,10 @@ window.datasetteSqlParameters = (() => { if (!form) { return null; } + const shouldRenderParameters = options.renderParameters !== false; const section = options.section || form.querySelector("[data-sql-parameters-section]"); - if (!section) { + if (shouldRenderParameters && !section) { return null; } const manager = { @@ -225,12 +226,16 @@ window.datasetteSqlParameters = (() => { section, allowExpand: options.allowExpand === undefined - ? section.dataset.allowExpand === "1" + ? section + ? section.dataset.allowExpand === "1" + : false : options.allowExpand, parameterState: new Map(), }; - bindParameterControls(manager); - syncParameterState(manager); + if (section) { + bindParameterControls(manager); + syncParameterState(manager); + } const url = options.url || form.dataset.parametersUrl; let refreshTimer = null; @@ -254,7 +259,9 @@ window.datasetteSqlParameters = (() => { if (!response.ok) { throw new Error((data.errors || [response.statusText]).join("; ")); } - renderParameters(manager, data.parameters || []); + if (shouldRenderParameters) { + renderParameters(manager, data.parameters || []); + } if (options.onData) { options.onData(data, manager); } diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 414d4af7..7a627a7a 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -131,6 +131,7 @@ if (executeWriteSqlInput && !executeWriteSqlInput.value) { {% include "_codemirror_foot.html" %} {% include "_sql_parameter_scripts.html" %} +{% include "_execute_write_analysis_scripts.html" %} + + {% endblock %} diff --git a/datasette/views/database.py b/datasette/views/database.py index 2e77d36b..aafcf40b 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -551,6 +551,17 @@ def _wants_json(request, is_json, data): ) +def _query_create_form_error_message(message): + return { + "Query name is required": "URL is required", + "Invalid query name": "Invalid URL", + "Query name conflicts with a table or view": ( + "URL conflicts with an existing table or view" + ), + "Query already exists": "A query already exists at that URL", + }.get(message, message) + + async def _json_or_form_payload(request): content_type = request.headers.get("content-type", "") if content_type.startswith("application/json"): @@ -731,6 +742,54 @@ async def _execute_write_analysis_data(datasette, db, sql, actor): } +async def _query_create_analysis_data(datasette, db, sql, actor): + has_sql = bool(sql and sql.strip()) + parameter_names = [] + analysis_rows = [] + analysis_error = None + if has_sql: + try: + parameter_names = _derived_query_parameters(sql) + params = {parameter: "" for parameter in parameter_names} + analysis = await db.analyze_sql(sql, params) + analysis_rows = await _analysis_rows_with_permissions( + datasette, analysis, actor + ) + except (QueryValidationError, sqlite3.DatabaseError) as ex: + analysis_error = getattr(ex, "message", str(ex)) + return { + "ok": analysis_error is None, + "parameters": parameter_names, + "analysis_error": analysis_error, + "analysis_rows": analysis_rows, + "has_sql": has_sql, + "analysis_is_write": bool( + analysis_rows and any(row["required_permission"] for row in analysis_rows) + ), + "save_disabled": bool( + (not has_sql) + or analysis_error + or any(row["allowed"] is False for row in analysis_rows) + ), + } + + +async def _query_create_form_context( + datasette, request, db, *, sql="", name="", title="", description="", is_private=True +): + analysis_data = await _query_create_analysis_data(datasette, db, sql, request.actor) + return { + "database": db.name, + "database_color": db.color, + "sql": sql, + "name": name, + "title": title, + "description": description, + "is_private": is_private, + **analysis_data, + } + + async def _inserted_row_url(datasette, db, analysis, cursor): if cursor.rowcount != 1: return None @@ -1307,6 +1366,35 @@ class QueryCreateView(BaseView): name = "query-create" has_json_alternate = False + async def _render_form( + self, + request, + db, + *, + sql="", + name="", + title="", + description="", + is_private=True, + status=200, + ): + response = await self.render( + ["query_create.html"], + request, + await _query_create_form_context( + self.ds, + request, + db, + sql=sql, + name=name, + title=title, + description=description, + is_private=is_private, + ), + ) + response.status = status + return response + async def get(self, request): db = await self.ds.resolve_database(request) await self.ds.ensure_permission( @@ -1320,46 +1408,61 @@ class QueryCreateView(BaseView): actor=request.actor, ) - sql = request.args.get("sql") or "" - analysis_error = None - analysis_rows = [] - parameter_names = [] - if sql: - try: - parameter_names = _derived_query_parameters(sql) - params = {parameter: "" for parameter in parameter_names} - analysis = await db.analyze_sql(sql, params) - analysis_rows = await _analysis_rows_with_permissions( - self.ds, analysis, request.actor - ) - except (QueryValidationError, sqlite3.DatabaseError) as ex: - analysis_error = getattr(ex, "message", str(ex)) + return await self._render_form(request, db, sql=request.args.get("sql") or "") - return await self.render( - ["query_create.html"], - request, - { - "database": db.name, - "database_color": db.color, - "sql": sql, - "parameter_names": parameter_names, - "analysis_error": analysis_error, - "analysis_rows": analysis_rows, - "analysis_is_write": bool( - analysis_rows - and any(row["required_permission"] for row in analysis_rows) - ), - "save_disabled": bool( - analysis_error - or any(row["allowed"] is False for row in analysis_rows) - ), - }, + +class QueryCreateAnalyzeView(BaseView): + name = "query-create-analyze" + has_json_alternate = False + + async def get(self, request): + db = await self.ds.resolve_database(request) + if not await self.ds.allowed( + action="execute-sql", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + return _block_framing(_error(["Permission denied: need execute-sql"], 403)) + if not await self.ds.allowed( + action="insert-query", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + return _block_framing(_error(["Permission denied: need insert-query"], 403)) + + invalid_keys = set(request.args) - {"sql"} + if invalid_keys: + return _block_framing( + _error( + ["Invalid keys: {}".format(", ".join(sorted(invalid_keys)))], + 400, + ) + ) + sql = request.args.get("sql") or "" + return _block_framing( + Response.json( + await _query_create_analysis_data(self.ds, db, sql, request.actor) + ) ) -class QueryInsertView(BaseView): +class QueryInsertView(QueryCreateView): name = "query-insert" + async def _error_response(self, request, db, query_data, message, status): + message = _query_create_form_error_message(message) + self.ds.add_message(request, message, self.ds.ERROR) + return await self._render_form( + request, + db, + sql=query_data.get("sql") or "", + name=query_data.get("name") or "", + title=query_data.get("title") or "", + description=query_data.get("description") or "", + is_private=_as_bool(query_data.get("is_private", True)), + status=status, + ) + async def post(self, request): db = await self.ds.resolve_database(request) if not await self.ds.allowed( @@ -1375,6 +1478,8 @@ class QueryInsertView(BaseView): ): return _error(["Permission denied: need insert-query"], 403) + is_json = False + query_data = {} try: data, is_json = await _json_or_form_payload(request) if not isinstance(data, dict): @@ -1384,6 +1489,10 @@ class QueryInsertView(BaseView): raise QueryValidationError("JSON must contain a query dictionary") prepared = await _prepare_query_create(self.ds, request, db, query_data) except QueryValidationError as ex: + if not is_json and isinstance(query_data, dict): + return await self._error_response( + request, db, query_data, ex.message, ex.status + ) return _error([ex.message], ex.status) prepared.pop("analysis") @@ -1391,6 +1500,8 @@ class QueryInsertView(BaseView): try: await self.ds.add_query(db.name, name, replace=False, **prepared) except sqlite3.IntegrityError as ex: + if not is_json and isinstance(query_data, dict): + return await self._error_response(request, db, query_data, str(ex), 400) return _error([str(ex)], 400) query = await self.ds.get_query(db.name, name) @@ -1896,7 +2007,7 @@ class QueryView(View): ): save_query_url = ( datasette.urls.database(database) - + "/-/queries/-/create?" + + "/-/queries/insert?" + urlencode({"sql": sql}) ) diff --git a/tests/test_queries.py b/tests/test_queries.py index c27c23da..32cdfae3 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -986,6 +986,14 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): await ds.invoke_startup() create_response = await ds.client.get( + "/data/-/queries/insert?sql=select+*+from+dogs", + actor={"id": "root"}, + ) + blank_create_response = await ds.client.get( + "/data/-/queries/insert", + actor={"id": "root"}, + ) + old_create_response = await ds.client.get( "/data/-/queries/-/create?sql=select+*+from+dogs", actor={"id": "root"}, ) @@ -996,16 +1004,171 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): assert create_response.status_code == 200 assert "Create query" in create_response.text - assert "Read-only" in create_response.text assert "Writable" in create_response.text + assert 'type="radio"' not in create_response.text + assert 'name="parameters"' not in create_response.text + assert 'id="query-parameters"' not in create_response.text + assert 'class="query-create-field"' in create_response.text + assert '' not in create_response.text + assert '' in create_response.text + assert '' in create_response.text + assert '/data/' in create_response.text + assert ( + '' + in create_response.text + ) + assert 'function slugify(value)' in create_response.text + assert 'data-analyze-url="/data/-/queries/analyze"' in create_response.text + assert "setupSqlParameterRefresh" in create_response.text + assert "renderParameters: false" in create_response.text + assert "datasetteSqlAnalysis.renderAnalysis" in create_response.text + assert "data-query-create-submit" in create_response.text + assert "data-query-create-writable" in create_response.text + assert ( + "Queries marked private can only be seen by you, their creator." + in create_response.text + ) assert "

        Query operations

        " in create_response.text assert '
        Required permissionSourceread
        ' in create_response.text assert '' in create_response.text assert '' not in create_response.text assert "" in create_response.text + assert ( + create_response.text.count( + '' + ) + == 2 + ) + assert create_response.text.index('value="Save query"') < create_response.text.index( + "

        Query operations

        " + ) + assert blank_create_response.status_code == 200 + assert ( + '
        Required permissionSourcereadn/a
        ' in response.text assert '' in response.text assert "" in response.text From 5dca2dc9beea96c52e6a9c806df66c9a1f2f7874 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:54:47 -0700 Subject: [PATCH 245/299] Show query count on database page --- datasette/templates/database.html | 2 +- datasette/views/database.py | 18 +++++++++++++++++- tests/test_queries.py | 11 ++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 62f9c620..371f6a22 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -59,7 +59,7 @@ {% endfor %} {% if queries_more %} -

        View all queries

        +

        View {{ "{:,}".format(queries_count) }} quer{% if queries_count == 1 %}y{% else %}ies{% endif %}

        {% endif %} {% endif %} diff --git a/datasette/views/database.py b/datasette/views/database.py index feb38619..d40d69d1 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -102,6 +102,11 @@ class DatabaseView(View): ) canned_queries = queries_page["queries"] queries_more = queries_page["has_more"] + queries_count = ( + await datasette.count_queries(database, actor=request.actor) + if queries_more + else len(canned_queries) + ) async def database_actions(): links = [] @@ -134,6 +139,7 @@ class DatabaseView(View): "views": sql_views, "queries": canned_queries, "queries_more": queries_more, + "queries_count": queries_count, "allow_execute_sql": allow_execute_sql, "table_columns": ( await _table_columns(datasette, database) if allow_execute_sql else {} @@ -168,6 +174,7 @@ class DatabaseView(View): views=sql_views, queries=canned_queries, queries_more=queries_more, + queries_count=queries_count, allow_execute_sql=allow_execute_sql, table_columns=( await _table_columns(datasette, database) @@ -219,6 +226,7 @@ class DatabaseContext(Context): queries_more: bool = field( metadata={"help": "Boolean indicating if more saved queries are available"} ) + queries_count: int = field(metadata={"help": "Count of visible saved queries"}) allow_execute_sql: bool = field( metadata={"help": "Boolean indicating if custom SQL can be executed"} ) @@ -775,7 +783,15 @@ async def _query_create_analysis_data(datasette, db, sql, actor): async def _query_create_form_context( - datasette, request, db, *, sql="", name="", title="", description="", is_private=True + datasette, + request, + db, + *, + sql="", + name="", + title="", + description="", + is_private=True, ): analysis_data = await _query_create_analysis_data(datasette, db, sql, request.actor) return { diff --git a/tests/test_queries.py b/tests/test_queries.py index 32cdfae3..09b41645 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -458,9 +458,10 @@ async def test_database_page_query_preview_is_limited(): assert html_response.status_code == 200 assert "Demo query 05" in html_response.text assert "Demo query 06" not in html_response.text - assert 'href="/data/-/queries"' in html_response.text + assert 'View 25 queries' in html_response.text assert len(json_response.json()["queries"]) == 5 assert json_response.json()["queries_more"] is True + assert json_response.json()["queries_count"] == 25 @pytest.mark.asyncio @@ -1017,7 +1018,7 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): '' in create_response.text ) - assert 'function slugify(value)' in create_response.text + assert "function slugify(value)" in create_response.text assert 'data-analyze-url="/data/-/queries/analyze"' in create_response.text assert "setupSqlParameterRefresh" in create_response.text assert "renderParameters: false" in create_response.text @@ -1039,9 +1040,9 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): ) == 2 ) - assert create_response.text.index('value="Save query"') < create_response.text.index( - "

        Query operations

        " - ) + assert create_response.text.index( + 'value="Save query"' + ) < create_response.text.index("

        Query operations

        ") assert blank_create_response.status_code == 200 assert ( '
        Required permissioninsert
        ' in create_response.text assert '' in create_response.text @@ -1053,6 +1067,12 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): "

        Analysis will show each affected table and required permission.

        " not in blank_create_response.text ) + assert "Enter SQL to analyze this query." in blank_create_response.text + assert write_create_response.status_code == 200 + assert ( + 'This query updates data in the database.' + in write_create_response.text + ) assert query_response.status_code == 200 assert "Save this query" in query_response.text assert "/data/-/queries/insert?sql=select+%2A+from+dogs" in query_response.text From 024b9117725bbed17396a5a4b3f48663c23337f5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:09:53 -0700 Subject: [PATCH 247/299] Clarifying comment https://github.com/simonw/datasette/pull/2741/changes#r3306856046 --- datasette/default_permissions/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py index a9f2d8bd..6cd46f04 100644 --- a/datasette/default_permissions/__init__.py +++ b/datasette/default_permissions/__init__.py @@ -26,6 +26,7 @@ from .restrictions import ( from .root import root_user_permissions_sql as root_user_permissions_sql from .config import config_permissions_sql as config_permissions_sql from .defaults import ( + # Avoid "datasette.default_permissions" does not explicitly export attribute default_allow_sql_check as default_allow_sql_check, default_action_permissions_sql as default_action_permissions_sql, default_query_permissions_sql as default_query_permissions_sql, From ac6ee097dd06050188d44c6d4b17a98a12c7b481 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:10:48 -0700 Subject: [PATCH 248/299] Disallow update/delete of private queries If a user does not own a private query they cannot update or delete it either, even if they have global update-query. https://github.com/simonw/datasette/pull/2741/changes#r3306417463 --- datasette/default_permissions/defaults.py | 33 ++++----- tests/test_queries.py | 81 +++++++++++++++++++++++ 2 files changed, 95 insertions(+), 19 deletions(-) diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index 32ad4ef1..5bc74425 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -77,36 +77,31 @@ async def default_query_permissions_sql( ) -> Optional[PermissionSQL]: actor_id = actor.get("id") if isinstance(actor, dict) else None - if action in {"update-query", "delete-query"}: - if actor_id is None: - return None - # Query owner can update/delete query - return PermissionSQL( - sql=""" - SELECT database_name AS parent, name AS child, 1 AS allow, - 'query owner' AS reason - FROM queries - WHERE source = 'user' - AND owner_id = :query_owner_id - """, - params={"query_owner_id": actor_id}, - ) - - if action != "view-query": + if action not in {"view-query", "update-query", "delete-query"}: return None params = {"query_owner_id": actor_id} rule_sqls = [] if actor_id is not None: - # Query owner can view-query - rule_sqls.append(""" + if action in {"update-query", "delete-query"}: + # Query owner can update/delete query + rule_sqls.append(""" + SELECT database_name AS parent, name AS child, 1 AS allow, + 'query owner' AS reason + FROM queries + WHERE source = 'user' + AND owner_id = :query_owner_id + """) + else: + # Query owner can view-query + rule_sqls.append(""" SELECT database_name AS parent, name AS child, 1 AS allow, 'query owner' AS reason FROM queries WHERE owner_id = :query_owner_id """) - # restriction_sql enforces private queries ONLY visible to owner + # restriction_sql enforces private queries ONLY visible/mutable by owner return PermissionSQL( sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, restriction_sql=""" diff --git a/tests/test_queries.py b/tests/test_queries.py index f888dda0..26a0748c 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1581,6 +1581,87 @@ async def test_query_owner_gets_update_delete_and_writable_view_defaults(): ) +@pytest.mark.asyncio +async def test_private_query_restricts_broad_update_delete_permissions(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "update-query": {"id": "bob"}, + "delete-query": {"id": "bob"}, + }, + }, + }, + }, + ) + ds.add_memory_database("query_broad_update_delete", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "alice_private", + "select 1", + is_private=True, + source="user", + owner_id="alice", + ) + await ds.add_query( + "data", + "alice_public", + "select 2", + is_private=False, + source="user", + owner_id="alice", + ) + + for action in ("update-query", "delete-query"): + assert await ds.allowed( + action=action, + resource=QueryResource("data", "alice_private"), + actor={"id": "alice"}, + ) + assert not await ds.allowed( + action=action, + resource=QueryResource("data", "alice_private"), + actor={"id": "bob"}, + ) + assert await ds.allowed( + action=action, + resource=QueryResource("data", "alice_public"), + actor={"id": "bob"}, + ) + + private_update_response = await ds.client.post( + "/data/alice_private/-/update", + actor={"id": "bob"}, + json={"update": {"title": "Nope"}}, + ) + private_delete_response = await ds.client.post( + "/data/alice_private/-/delete", + actor={"id": "bob"}, + json={}, + ) + public_update_response = await ds.client.post( + "/data/alice_public/-/update", + actor={"id": "bob"}, + json={"update": {"title": "Bob can edit public queries"}}, + ) + public_delete_response = await ds.client.post( + "/data/alice_public/-/delete", + actor={"id": "bob"}, + json={}, + ) + + assert private_update_response.status_code == 403 + assert private_delete_response.status_code == 403 + assert public_update_response.status_code == 200 + assert public_delete_response.status_code == 200 + assert await ds.get_query("data", "alice_private") is not None + assert await ds.get_query("data", "alice_public") is None + + @pytest.mark.asyncio async def test_user_writable_query_execution_rechecks_table_permissions(): ds = Datasette( From 180a6a86fd77ac43f6cf3bfb7d7f9150003da419 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:16:10 -0700 Subject: [PATCH 249/299] Remove queries-plan.md We do not need this any more. It can live forever in Git history. --- queries-plan.md | 446 ------------------------------------------------ 1 file changed, 446 deletions(-) delete mode 100644 queries-plan.md diff --git a/queries-plan.md b/queries-plan.md deleted file mode 100644 index da6b7c92..00000000 --- a/queries-plan.md +++ /dev/null @@ -1,446 +0,0 @@ -# Queries in the internal database - -Plan for . - -## Goal - -Move named query definitions into Datasette's internal database, so hundreds or thousands of queries can be listed, searched, permission-filtered, managed, and executed efficiently. - -Terminology change: these are now "queries", not "canned queries". Legacy code and documentation can mention the old name only when describing compatibility or migration. - -## Decisions so far - -- Internal table name: `queries`. -- Query definitions should use real columns, not a JSON blob for all options. -- Query parameter names live in a `parameters` text column as a JSON array. No default values for parameters in this pass. -- No separate index is needed for the privacy/trust flags yet. -- User-created queries require `execute-sql` and `insert-query` on the database. They default to private, and writable queries additionally require matching table write permissions discovered by `Database.analyze_sql()`. -- Configured queries default to trusted, which means actors who can view them can execute them without also holding `execute-sql` or the relevant write permissions. Config can opt out with `is_trusted: false`. -- Add `update-query` and `delete-query`, so administrators can manage queries created by other users. -- Remove the old `canned_queries()` hook from core. If we want compatibility later, build a separate `datasette-old-canned-queries` plugin. -- Writable user-created queries can be supported using `Database.analyze_sql()`, provided we fail closed when analysis cannot prove the required permissions. - -## Current shape - -- Query definitions currently come from `datasette.yaml` or the `canned_queries()` plugin hook. -- `Datasette.get_canned_queries(database_name, actor)` calls that hook every time it needs query definitions. -- `QueryResource.resources_sql()` currently enumerates databases and calls the hook for each one, because permissions and `/-/jump` need query resources. -- Query pages are visible if the actor has `view-query` for `QueryResource(database, query)`. Executing an untrusted stored query also checks `execute-sql` or the relevant write permissions. -- Arbitrary SQL executes if the actor has `execute-sql` for `DatabaseResource(database)`. - -The main performance and architecture win is making query resource enumeration a direct SQL query against the internal database. - -## Proposed internal schema - -Start with one `queries` table. - -```sql -CREATE TABLE IF NOT EXISTS queries ( - database_name TEXT NOT NULL, - name TEXT NOT NULL, - sql TEXT NOT NULL, - title TEXT, - description TEXT, - description_html TEXT, - options TEXT NOT NULL DEFAULT '{}', - parameters TEXT NOT NULL DEFAULT '[]', - is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - is_private INTEGER NOT NULL DEFAULT 0 CHECK (is_private IN (0, 1)), - is_trusted INTEGER NOT NULL DEFAULT 0 CHECK (is_trusted IN (0, 1)), - source TEXT NOT NULL DEFAULT 'user', - owner_id TEXT, - created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, - updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (database_name, name) -); - -CREATE INDEX IF NOT EXISTS queries_owner_idx - ON queries(owner_id); -``` - -Column notes: - -- `database_name`, `name`, and `sql` are the routing and execution core. -- Display fields become columns: `title`, `description`, and `description_html`. -- Less common presentation and writable-query behavior lives in `options`, stored as a JSON object. That covers `hide_sql`, `fragment`, `on_success_message`, `on_success_message_sql`, `on_success_redirect`, `on_error_message`, and `on_error_redirect`. -- `parameters` is a JSON array of parameter names, stored as text. This preserves explicit parameter order, but does not support labels or default values. -- Existing writable query behavior gets `is_write` as a column. Success/error messages, success/error redirects, and `on_success_message_sql` are stored in `options`. -- `is_private` means the query is only visible to its owning actor. This is enforced as a permission restriction, so broader `view-query` grants do not expose private rows. -- `is_trusted` means execution skips the usual `execute-sql` or write-permission checks after `view-query` has allowed access. -- `source` distinguishes `user`, `config`, and `plugin` rows. -- `owner_id` is the actor id for user-created rows. It is `NULL` for config/plugin rows. - -No separate index is needed on `(database_name, name)` because the primary key already creates one. - -`QueryResource.resources_sql()` can become: - -```sql -SELECT q.database_name AS parent, q.name AS child -FROM queries q -JOIN catalog_databases cd ON cd.database_name = q.database_name -``` - -The join keeps persisted queries for detached databases from appearing as live resources. - -## Config and plugin migration - -`datasette.yaml` can continue to support `databases: {db}: queries:` blocks, but core should import them directly into the internal `queries` tables at startup: - -1. Ensure the internal schema exists. -2. Delete previous `source='config'` rows. -3. Read configured query blocks for each live database. -4. Normalize string definitions to `{"sql": ...}`. -5. Insert rows into `queries`, storing explicit `params` as JSON in `parameters`. - -Plugins should move to: - -```python -await datasette.add_query(...) -await datasette.remove_query(...) -``` - -Remove the old `canned_queries()` hookspec and all core calls to it. If compatibility is needed, build `datasette-old-canned-queries` later as a plugin that restores the hook and imports old hook results using `datasette.add_query()`. - -## Permission model - -Add core actions: - -- `insert-query`, database-level, for creating queries in a database. -- `update-query`, query-level, for modifying existing query definitions. -- `delete-query`, query-level, for deleting existing query definitions. - -User-created query creation requires: - -- `execute-sql` on `DatabaseResource(database)` -- `insert-query` on `DatabaseResource(database)` -- If analysis shows the query is writable, the table-level write permissions described in the writable query section. - -Updating an existing query requires: - -- `update-query` on `QueryResource(database, query)` or default owner permission for a user-owned row. -- If the SQL changes, also require `execute-sql` on the database. -- If the changed SQL is writable, also require the table-level write permissions described in the writable query section. - -Deleting an existing query requires: - -- `delete-query` on `QueryResource(database, query)` or default owner permission for a user-owned row. - -Default owner permissions: - -- For `source='user' AND owner_id = actor.id`, grant `update-query` and `delete-query`. -- For `source='user' AND owner_id = actor.id`, grant `view-query`. If the query is private, restriction SQL ensures no other actor sees it through a broader grant. - -## Executing queries - -Default execution rule for read-only queries: - -- If `is_trusted=0`, the actor needs `execute-sql` on the database. -- If `is_trusted=1`, the actor can execute the query without `execute-sql`, provided `view-query` allows access. - -Default execution rule for user-created writable queries: - -- `is_trusted` must be `0`. -- The actor must have `view-query`. -- The actor must currently have every write permission required by fresh `Database.analyze_sql()` results for the query SQL. - -Implementation: - -- Keep `view-query` in the broad `DEFAULT_ALLOW_ACTIONS` set, so saved queries remain visible by default in all-public Datasette. -- Emit default `view-query` allows for the owning actor. -- Use `restriction_sql` to limit private rows to their owner even when broader `view-query` permissions exist. -- Have `QueryView` perform the fresh `execute-sql` or table-permission check before execution unless the row has `is_trusted=1`. - -For read-only queries this keeps `QueryView` explicit: it checks `view-query` for the query resource, then checks `execute-sql` unless the row is trusted. User-created writable queries need one additional runtime permission check because their required table permissions are derived from fresh SQL analysis. - -Explicit deny rules should still be able to block a query, and `--default-deny` still blocks trusted queries unless something grants `view-query`. - -## Writable queries - -Writable user-created queries should be in scope, guarded by `Database.analyze_sql()`. - -The secure rule: a user can create, update, or execute a writable user-created query only if they currently have the corresponding write permissions for every table the SQL can affect. - -`Database.analyze_sql(sql, params=None)` runs the SQL through SQLite's authorizer on an isolated connection and returns a `SQLAnalysis` object containing `SQLTableAccess` rows: - -- `operation`: `read`, `insert`, `update`, or `delete` -- `database`: Datasette database name for `main`, or SQLite schema name where no Datasette mapping exists -- `table`: affected table or view -- `columns`: read/updated columns where SQLite reports them -- `source`: trigger/view/CTE source when SQLite reports one - -Validation flow for user-created queries: - -1. Derive named parameters from the SQL and pass harmless placeholder values into `db.analyze_sql()` so SQLite can prepare statements with bindings. -2. If analysis raises a SQLite error, reject the query. -3. If every table access is `read`, treat the query as read-only and require `execute-sql` plus `insert-query`/`update-query` as described above. -4. If any table access is `insert`, `update`, or `delete`, treat the query as writable and force `is_trusted=0`. -5. Reject writable user-created queries that access a database other than the database they are being saved against, until `analyze_sql()` can reliably map attached SQLite schemas back to Datasette database names. -6. For every write access returned by analysis, require the corresponding permission on `TableResource(access.database, access.table)`: - - `insert` -> `insert-row` - - `update` -> `update-row` - - `delete` -> `delete-row` -7. Include write accesses reported from triggers and views, since those are real side effects. -8. Re-run the same analysis and permission checks when SQL changes through `update_query()` or `POST .../-/update`. -9. Re-run analysis before executing user-created writable queries, so schema or trigger changes cannot leave a previously saved query with stale permission assumptions. - -The user-facing API should not trust a submitted `is_write` value. It should derive `is_write` from analysis. - -Trusted configuration and plugin code can still call `datasette.add_query(..., is_write=True, ...)`. Those are treated as deployment/admin-authored queries. They keep the existing execution model: they require `view-query`, and the default `view-query` hook should preserve current default-open behavior for trusted writable queries while still respecting `--default-deny`. - -Fail closed cases for user-created writable queries: - -- Analysis fails. -- Analysis reports any write operation that cannot be mapped to a Datasette table resource. -- Analysis reports writes outside the target database. -- The actor lacks any required table write permission. -- `is_trusted=1` is requested through the user-facing API. - -This gives us writable user-created queries without letting `execute-sql` alone become a path to create arbitrary write endpoints. - -## HTTP API sketch - -JSON endpoints should follow Datasette's existing write API style: use `POST` plus action paths such as `/-/insert`, `/-/update`, and `/-/delete`, not HTTP `PATCH` or `DELETE`. - -Endpoints: - -- `GET /-/queries` and `GET /{database}/-/queries` show searchable HTML query browsers. `GET /-/queries.json` lists query definitions across every database the actor can view; `GET /{database}/-/queries.json` scopes that list to one database. Both JSON endpoints use cursor pagination with `_next` and `_size`. -- `POST /{database}/-/queries/insert` creates a query. -- `GET /{database}/{query}/-/definition` returns one query definition without executing it. -- `POST /{database}/{query}/-/update` updates one query. -- `POST /{database}/{query}/-/delete` deletes one query. - -Create request: - -```json -{ - "query": { - "name": "top_customers", - "sql": "select * from customers order by revenue desc limit 20", - "title": "Top customers", - "description": "Highest revenue customers", - "is_private": true, - "parameters": ["region"] - } -} -``` - -Successful create returns `201` and the created query definition: - -```json -{ - "ok": true, - "query": { - "database": "fixtures", - "name": "top_customers", - "sql": "select * from customers order by revenue desc limit 20", - "title": "Top customers", - "description": "Highest revenue customers", - "is_private": true, - "is_trusted": false, - "parameters": ["region"] - } -} -``` - -Update request, imitating `RowUpdateView`: - -```json -{ - "update": { - "title": "Top customers by revenue", - "is_private": false - }, - "return": true -} -``` - -Successful update returns `{"ok": true}` by default. With `"return": true`, return the updated query definition: - -```json -{ - "ok": true, - "query": { - "database": "fixtures", - "name": "top_customers", - "sql": "select * from customers order by revenue desc limit 20", - "title": "Top customers by revenue", - "is_private": false, - "is_trusted": false - } -} -``` - -Delete request: - -```http -POST /{database}/{query}/-/delete -Content-Type: application/json -``` - -Successful delete returns: - -```json -{ - "ok": true -} -``` - -Validation: - -- Update bodies must be dictionaries containing an `update` dictionary, with optional `return`; invalid keys return `{"ok": false, "errors": [...]}`. -- Validate route-safe query names. -- Reject names that collide with a table or view in the same database, since table routes currently win over query routes. -- Analyze user-created SQL with `Database.analyze_sql()`. -- Use `validate_sql_select(sql)` as the read-only fast path when analysis shows only reads, but do not require it for writable queries that pass analysis and permission checks. -- Reject magic parameters such as `:_actor_id`, `:_cookie_*`, and `:_header_*` for user-created queries. -- Reject client-supplied `is_write`; derive it from analysis. -- Reject writable-only success/error fields for read-only queries. - -## Python API sketch - -Add methods on `Datasette`: - -```python -await datasette.add_query( - database, - name, - sql, - title=None, - description=None, - description_html=None, - hide_sql=False, - fragment=None, - parameters=None, - is_write=False, - is_private=False, - is_trusted=False, - source="plugin", - owner_id=None, - on_success_message=None, - on_success_message_sql=None, - on_success_redirect=None, - on_error_message=None, - on_error_redirect=None, - replace=True, -) - -await datasette.update_query( - database, - name, - *, - sql=UNCHANGED, - title=UNCHANGED, - description=UNCHANGED, - description_html=UNCHANGED, - hide_sql=UNCHANGED, - fragment=UNCHANGED, - parameters=UNCHANGED, - is_write=UNCHANGED, - is_private=UNCHANGED, - is_trusted=UNCHANGED, - source=UNCHANGED, - owner_id=UNCHANGED, - on_success_message=UNCHANGED, - on_success_message_sql=UNCHANGED, - on_success_redirect=UNCHANGED, - on_error_message=UNCHANGED, - on_error_redirect=UNCHANGED, -) - -await datasette.remove_query(database, name, source=None) - -await datasette.get_query(database, name) -await datasette.list_queries( - database, - actor=None, - limit=50, - cursor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, -) -``` - -`list_queries()` should return a bounded page shaped like `{"queries": [...], "next": "...", "has_more": true, "limit": 50}`. The `next` value is an opaque cursor token, not an offset. Passing `database=None` lists visible queries across all live databases, still filtered through `view-query` permission SQL. - -`update_query()` should use an internal sentinel default such as `UNCHANGED = object()` so callers can distinguish "leave this column alone" from "set this column to `NULL`": - -```python -await datasette.update_query( - "fixtures", - "top_customers", - on_success_redirect=None, -) -``` - -For column-backed fields, `None` should write SQL `NULL`. For option fields, `None` should remove that key from the JSON object so `get_query()` returns `None`; omitting the field should leave the existing option unchanged. - -Implementation detail: build the `UPDATE` statement dynamically from fields whose value is not `UNCHANGED`, validate non-nullable fields before writing, and update `updated_at` whenever at least one field changes. - -The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `is_private`, `is_trusted`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. Option values should be unpacked from the `options` JSON object and returned as the same top-level keys accepted by `add_query()` and `update_query()`. - -## Query page save UI - -On `/{database}/-/query`, if the actor has both `execute-sql` and `insert-query`, show a save control for valid read-only SQL. That page already executes read-only arbitrary SQL, so the first UI can stay read-only even though the JSON API can accept writable SQL after `Database.analyze_sql()` validation. - -The save form should call `POST /{database}/-/queries/insert` and default to `is_private=true`. - -On `/{database}`, show a preview of the first 5 visible queries using `list_queries(..., limit=5)`. If the page has `has_more`, show a link to `/{database}/-/queries` rather than rendering hundreds or thousands of query links inline. The full `/{database}/-/queries` page provides search, filters, and cursor pagination. The global `/-/queries` page reuses the same interface and shows the database for each query. - -## Dedicated create query UI - -Add `/{database}/-/queries/-/create` for the fuller query authoring flow, including writable queries. - -This page should require `execute-sql` and `insert-query` to access. It should provide a SQL editor and a mode control: - -- Read-only -- Writable - -Read-only mode can share the same fields as the arbitrary SQL save flow: name, title, description, parameters, and privacy status. - -Writable mode should always run `Database.analyze_sql()` and show an analysis panel before saving: - -- detected operation -- database and table -- required permission -- whether the actor has that permission -- source, when the operation comes from a trigger or view - -The Save button should be disabled until analysis succeeds and every required table write permission is allowed. - -The existing edit-SQL flow from query pages can continue to point back to arbitrary SQL. A later enhancement can add "update this query" when the actor owns it or has `update-query`. - -## Test plan - -- Internal schema creates `queries`. -- Query parameters are stored in the `queries.parameters` text column as a JSON array of names. -- Config `queries:` blocks import into internal tables. -- Legacy string query definitions normalize to SQL rows. -- The old `canned_queries()` hook is no longer called by core. -- `QueryResource.resources_sql()` returns rows from `queries`. -- Database page and `/-/jump` list queries from the internal DB. -- `view-query` remains globally default-allowed, with `restriction_sql` narrowing private queries to their owner. -- Private query is only visible to its owner, even when a broader `view-query` rule applies. -- Non-trusted read-only query requires `execute-sql` to execute. -- Trusted read-only query can be executed without `execute-sql` after `view-query` passes. -- Config queries default to trusted and can opt out with `is_trusted: false`. -- User API rejects client-supplied `is_trusted`. -- User-created query requires both `execute-sql` and `insert-query`. -- User-created writable query creation uses `Database.analyze_sql()` and requires matching `insert-row`, `update-row`, and/or `delete-row` permissions for every reported write access. -- `/{database}/-/queries/-/create` provides the writable-query authoring UI with an analysis panel and disabled save until all required write permissions pass. -- User-created writable query execution re-runs `Database.analyze_sql()` and re-checks table write permissions. -- User-created writable query cannot be trusted through the user API. -- Query update uses `POST /{database}/{query}/-/update` with an `{"update": {...}}` body. -- Query delete uses `POST /{database}/{query}/-/delete`. -- There are no `PATCH` or HTTP `DELETE` routes for query management. -- `datasette.update_query(..., field=None)` writes `NULL` for column-backed fields and removes JSON keys for option fields, while omitted fields are left unchanged. -- Owner gets default `update-query` and `delete-query` for their own user-created rows. -- Admin can manage other users' queries with `update-query` and `delete-query`. -- User API rejects magic parameters. -- User API rejects writable queries if analysis fails, reports writes outside the target database, or reports writes the actor is not allowed to perform. -- Trusted config/plugin writable queries still execute through `view-query`. -- Trusted config/plugin writable queries are not default-allowed under `--default-deny`. -- Persisted internal DB does not expose queries for detached databases. From 24887004cffd52fe801ecd73da78e13b246ddede Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:51:57 -0700 Subject: [PATCH 250/299] Rename insert-query to store-query Also queries/insert to queries/store Refs https://github.com/simonw/datasette/pull/2741#issuecomment-4549103663 --- datasette/app.py | 6 ++--- datasette/default_actions.py | 6 ++--- datasette/templates/query_create.html | 2 +- datasette/views/database.py | 22 +++++++-------- docs/authentication.rst | 7 ++--- docs/json_api.rst | 5 ++-- tests/test_queries.py | 39 +++++++++++++++------------ 7 files changed, 47 insertions(+), 40 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 8936b099..42a2d27d 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -54,9 +54,9 @@ from .views.database import ( QueryDeleteView, QueryDefinitionView, GlobalQueryListView, - QueryInsertView, QueryListView, QueryParametersView, + QueryStoreView, QueryUpdateView, ) from .views.index import IndexView @@ -2824,8 +2824,8 @@ class Datasette: r"/(?P[^\/\.]+)/-/queries/analyze$", ) add_route( - QueryInsertView.as_view(self), - r"/(?P[^\/\.]+)/-/queries/insert$", + QueryStoreView.as_view(self), + r"/(?P[^\/\.]+)/-/queries/store$", ) add_route( ExecuteWriteAnalyzeView.as_view(self), diff --git a/datasette/default_actions.py b/datasette/default_actions.py index 6a1f77b8..0f4c25fa 100644 --- a/datasette/default_actions.py +++ b/datasette/default_actions.py @@ -62,9 +62,9 @@ def register_actions(): resource_class=DatabaseResource, ), Action( - name="insert-query", - abbr="iq", - description="Create saved queries", + name="store-query", + abbr="sq", + description="Create stored queries", resource_class=DatabaseResource, also_requires="execute-sql", ), diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index cb14ada4..f5dadbff 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -156,7 +156,7 @@ form.sql .query-create-sql textarea#sql-editor {

        Create query

        -
        +

        {{ urls.database(database) }}/

        diff --git a/datasette/views/database.py b/datasette/views/database.py index d40d69d1..900b94ba 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1419,7 +1419,7 @@ class QueryCreateView(BaseView): actor=request.actor, ) await self.ds.ensure_permission( - action="insert-query", + action="store-query", resource=DatabaseResource(db.name), actor=request.actor, ) @@ -1440,11 +1440,11 @@ class QueryCreateAnalyzeView(BaseView): ): return _block_framing(_error(["Permission denied: need execute-sql"], 403)) if not await self.ds.allowed( - action="insert-query", + action="store-query", resource=DatabaseResource(db.name), actor=request.actor, ): - return _block_framing(_error(["Permission denied: need insert-query"], 403)) + return _block_framing(_error(["Permission denied: need store-query"], 403)) invalid_keys = set(request.args) - {"sql"} if invalid_keys: @@ -1462,8 +1462,8 @@ class QueryCreateAnalyzeView(BaseView): ) -class QueryInsertView(QueryCreateView): - name = "query-insert" +class QueryStoreView(QueryCreateView): + name = "query-store" async def _error_response(self, request, db, query_data, message, status): message = _query_create_form_error_message(message) @@ -1488,11 +1488,11 @@ class QueryInsertView(QueryCreateView): ): return _error(["Permission denied: need execute-sql"], 403) if not await self.ds.allowed( - action="insert-query", + action="store-query", resource=DatabaseResource(db.name), actor=request.actor, ): - return _error(["Permission denied: need insert-query"], 403) + return _error(["Permission denied: need store-query"], 403) is_json = False query_data = {} @@ -1961,8 +1961,8 @@ class QueryView(View): resource=DatabaseResource(database=database), actor=request.actor, ) - allow_insert_query = await datasette.allowed( - action="insert-query", + allow_store_query = await datasette.allowed( + action="store-query", resource=DatabaseResource(database=database), actor=request.actor, ) @@ -2020,13 +2020,13 @@ class QueryView(View): if ( not canned_query and allow_execute_sql - and allow_insert_query + and allow_store_query and is_validated_sql and ":_" not in sql ): save_query_url = ( datasette.urls.database(database) - + "/-/queries/insert?" + + "/-/queries/store?" + urlencode({"sql": sql}) ) diff --git a/docs/authentication.rst b/docs/authentication.rst index 453aaa19..184fec5e 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1293,11 +1293,12 @@ Actor is allowed to view a saved query page, e.g. https://latest.datasette.io/fi ``query`` is the name of the query (string) .. _actions_insert_query: +.. _actions_store_query: -insert-query ------------- +store-query +----------- -Actor is allowed to create saved queries in a database. +Actor is allowed to create stored queries in a database. ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) diff --git a/docs/json_api.rst b/docs/json_api.rst index dd54c459..1a6c7021 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -518,14 +518,15 @@ Listing saved queries Creating saved queries in the UI ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``GET //-/queries/-/create`` provides a form for creating saved queries. +``GET //-/queries/store`` provides a form for creating stored queries. +.. _QueryStoreView: .. _QueryInsertView: Creating saved queries ~~~~~~~~~~~~~~~~~~~~~~ -``POST //-/queries/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. +``POST //-/queries/store`` creates a stored query. This requires ``execute-sql`` and ``store-query`` for the database. .. _QueryParametersView: .. _ExecuteWriteView: diff --git a/tests/test_queries.py b/tests/test_queries.py index 26a0748c..5d4da9bb 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -470,7 +470,7 @@ async def test_query_actions_are_registered(): await ds.invoke_startup() assert ds.get_action("execute-write-sql").resource_class is DatabaseResource - assert ds.get_action("insert-query").resource_class is DatabaseResource + assert ds.get_action("store-query").resource_class is DatabaseResource assert ds.get_action("update-query").resource_class is QueryResource assert ds.get_action("delete-query").resource_class is QueryResource @@ -537,15 +537,15 @@ async def test_analyze_write_query_rejects_writes_to_attached_databases(): @pytest.mark.asyncio -async def test_query_insert_api_creates_read_only_query(): +async def test_query_store_api_creates_read_only_query(): ds = Datasette(memory=True, default_deny=True) ds.root_enabled = True - db = ds.add_memory_database("query_insert_api", name="data") + db = ds.add_memory_database("query_store_api", name="data") await db.execute_write("create table dogs (id integer primary key, name text)") await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, json={ "query": { @@ -860,7 +860,7 @@ async def test_global_query_list_api_and_html(): @pytest.mark.asyncio -async def test_query_insert_api_rejects_is_trusted(): +async def test_query_store_api_rejects_is_trusted(): ds = Datasette( memory=True, default_deny=True, @@ -870,7 +870,7 @@ async def test_query_insert_api_rejects_is_trusted(): "permissions": { "view-database": {"id": "writer"}, "execute-sql": {"id": "writer"}, - "insert-query": {"id": "writer"}, + "store-query": {"id": "writer"}, } } } @@ -880,7 +880,7 @@ async def test_query_insert_api_rejects_is_trusted(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "writer"}, json={"query": {"name": "trusted", "sql": "select 1", "is_trusted": True}}, ) @@ -890,7 +890,7 @@ async def test_query_insert_api_rejects_is_trusted(): @pytest.mark.asyncio -async def test_query_insert_api_creates_writable_query(): +async def test_query_store_api_creates_writable_query(): ds = Datasette(memory=True, default_deny=True) ds.root_enabled = True db = ds.add_memory_database("query_write_api", name="data") @@ -898,7 +898,7 @@ async def test_query_insert_api_creates_writable_query(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, json={ "query": { @@ -962,14 +962,14 @@ async def test_query_update_and_delete_api(): @pytest.mark.asyncio -async def test_query_insert_api_rejects_magic_parameters(): +async def test_query_store_api_rejects_magic_parameters(): ds = Datasette(memory=True, default_deny=True) ds.root_enabled = True ds.add_memory_database("query_magic_api", name="data") await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, json={"query": {"name": "magic", "sql": "select :_actor_id"}}, ) @@ -987,15 +987,19 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): await ds.invoke_startup() create_response = await ds.client.get( - "/data/-/queries/insert?sql=select+*+from+dogs", + "/data/-/queries/store?sql=select+*+from+dogs", actor={"id": "root"}, ) write_create_response = await ds.client.get( - "/data/-/queries/insert?sql=insert+into+dogs+(name)+values+('Cleo')", + "/data/-/queries/store?sql=insert+into+dogs+(name)+values+('Cleo')", actor={"id": "root"}, ) blank_create_response = await ds.client.get( - "/data/-/queries/insert", + "/data/-/queries/store", + actor={"id": "root"}, + ) + old_insert_response = await ds.client.get( + "/data/-/queries/insert?sql=select+*+from+dogs", actor={"id": "root"}, ) old_create_response = await ds.client.get( @@ -1075,7 +1079,8 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): ) assert query_response.status_code == 200 assert "Save this query" in query_response.text - assert "/data/-/queries/insert?sql=select+%2A+from+dogs" in query_response.text + assert "/data/-/queries/store?sql=select+%2A+from+dogs" in query_response.text + assert old_insert_response.status_code == 404 assert old_create_response.status_code == 404 @@ -1153,7 +1158,7 @@ async def test_create_query_form_error_redisplays_form_with_values(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, data={ "name": "dogs", @@ -1176,7 +1181,7 @@ async def test_create_query_form_error_redisplays_form_with_values(): assert 'name="is_private" value="1" checked' in response.text public_response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, data={ "name": "dogs", From 0cadd071871ef0b33e4ce3a23e316a104b3137c3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:53:31 -0700 Subject: [PATCH 251/299] No need to document QueryCreateAnalyzeView --- tests/test_docs.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_docs.py b/tests/test_docs.py index 396ba1a2..0d0ef1e1 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -66,7 +66,14 @@ def documented_views(): if first_word.endswith("View"): view_labels.add(first_word) # We deliberately don't document these: - view_labels.update(("PatternPortfolioView", "AuthTokenView", "ApiExplorerView")) + view_labels.update( + ( + "PatternPortfolioView", + "AuthTokenView", + "ApiExplorerView", + "QueryCreateAnalyzeView", + ) + ) return view_labels From 4bf1c4b065fef64676abf5eabd04ff35e07188c5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:54:35 -0700 Subject: [PATCH 252/299] Rename canned queries to queries/stored queries in docs --- datasette/default_actions.py | 4 +- datasette/hookspecs.py | 4 +- datasette/resources.py | 2 +- datasette/views/database.py | 24 ++++----- datasette/views/table.py | 4 +- docs/authentication.rst | 16 +++--- docs/configuration.rst | 10 ++-- docs/custom_templates.rst | 8 +-- docs/internals.rst | 12 ++--- docs/introspection.rst | 2 +- docs/json_api.rst | 32 ++++++------ docs/pages.rst | 4 +- docs/plugin_hooks.rst | 16 +++--- docs/spatialite.rst | 2 +- docs/sql_queries.rst | 95 ++++++++++++++++++++++++++---------- tests/test_html.py | 6 +-- tests/test_permissions.py | 4 +- 17 files changed, 144 insertions(+), 101 deletions(-) diff --git a/datasette/default_actions.py b/datasette/default_actions.py index 0f4c25fa..2f78570b 100644 --- a/datasette/default_actions.py +++ b/datasette/default_actions.py @@ -121,13 +121,13 @@ def register_actions(): Action( name="update-query", abbr="uq", - description="Update saved queries", + description="Update stored queries", resource_class=QueryResource, ), Action( name="delete-query", abbr="dq", - description="Delete saved queries", + description="Delete stored queries", resource_class=QueryResource, ), ) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index a4067eaa..22da02a4 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -174,7 +174,7 @@ def view_actions(datasette, actor, database, view, request): @hookspec def query_actions(datasette, actor, database, query_name, request, sql, params): - """Links for the query and canned query actions menu""" + """Links for the query and stored query actions menu""" @hookspec @@ -229,7 +229,7 @@ def top_query(datasette, request, database, sql): @hookspec def top_canned_query(datasette, request, database, query_name): - """HTML to include at the top of the canned query page""" + """HTML to include at the top of the stored query page""" @hookspec diff --git a/datasette/resources.py b/datasette/resources.py index 91a46d36..ee2e6d98 100644 --- a/datasette/resources.py +++ b/datasette/resources.py @@ -41,7 +41,7 @@ class TableResource(Resource): class QueryResource(Resource): - """A saved query in a database.""" + """A stored query in a database.""" name = "query" parent_class = DatabaseResource diff --git a/datasette/views/database.py b/datasette/views/database.py index 900b94ba..f30d3815 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -222,11 +222,11 @@ class DatabaseContext(Context): tables: list = field(metadata={"help": "List of table objects in the database"}) hidden_count: int = field(metadata={"help": "Count of hidden tables"}) views: list = field(metadata={"help": "List of view objects in the database"}) - queries: list = field(metadata={"help": "List of canned query objects"}) + queries: list = field(metadata={"help": "List of stored query objects"}) queries_more: bool = field( - metadata={"help": "Boolean indicating if more saved queries are available"} + metadata={"help": "Boolean indicating if more stored queries are available"} ) - queries_count: int = field(metadata={"help": "Count of visible saved queries"}) + queries_count: int = field(metadata={"help": "Count of visible stored queries"}) allow_execute_sql: bool = field( metadata={"help": "Boolean indicating if custom SQL can be executed"} ) @@ -272,7 +272,7 @@ class QueryContext(Context): metadata={"help": "The SQL query object containing the `sql` string"} ) canned_query: str = field( - metadata={"help": "The name of the canned query if this is a canned query"} + metadata={"help": "The name of the stored query if this is a stored query"} ) private: bool = field( metadata={"help": "Boolean indicating if this is a private database"} @@ -282,11 +282,11 @@ class QueryContext(Context): # ) canned_query_write: bool = field( metadata={ - "help": "Boolean indicating if this is a canned query that allows writes" + "help": "Boolean indicating if this is a stored query that allows writes" } ) metadata: dict = field( - metadata={"help": "Metadata about the database or the canned query"} + metadata={"help": "Metadata about the database or the stored query"} ) db_is_immutable: bool = field( metadata={"help": "Boolean indicating if this database is immutable"} @@ -315,7 +315,7 @@ class QueryContext(Context): metadata={"help": "Dictionary of parameter names/values"} ) edit_sql_url: str = field( - metadata={"help": "URL to edit the SQL for a canned query"} + metadata={"help": "URL to edit the SQL for a stored query"} ) display_rows: list = field(metadata={"help": "List of result rows to display"}) columns: list = field(metadata={"help": "List of column names"}) @@ -1623,7 +1623,7 @@ class QueryView(View): db = await datasette.resolve_database(request) - # We must be a canned query + # We must be a stored query table_found = False try: await datasette.resolve_table(request) @@ -1742,14 +1742,14 @@ class QueryView(View): # Create lookup dict for quick access allowed_dict = {r.child: r for r in allowed_tables_page.resources} - # Are we a canned query? + # Are we a stored query? canned_query = None canned_query_write = False if "table" in request.url_vars: try: await datasette.resolve_table(request) except TableNotFound as table_not_found: - # Was this actually a canned query? + # Was this actually a stored query? canned_query = await datasette.get_canned_query( table_not_found.database_name, table_not_found.table, request.actor ) @@ -1759,7 +1759,7 @@ class QueryView(View): private = False if canned_query: - # Respect canned query permissions + # Respect stored query permissions visible, private = await datasette.check_visibility( request.actor, action="view-query", @@ -1823,7 +1823,7 @@ class QueryView(View): # For regular queries we only allow SELECT, plus other rules validate_sql_select(sql) else: - # Canned queries can run magic parameters + # Stored queries can run magic parameters params_for_query = MagicParameters(sql, params, request, datasette) await params_for_query.execute_params() results = await datasette.execute( diff --git a/datasette/views/table.py b/datasette/views/table.py index 7027bb10..7b1a5a82 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -963,11 +963,11 @@ async def table_view_traced(datasette, request): try: resolved = await datasette.resolve_table(request) except TableNotFound as not_found: - # Was this actually a canned query? + # Was this actually a stored query? canned_query = await datasette.get_canned_query( not_found.database_name, not_found.table, request.actor ) - # If this is a canned query, not a table, then dispatch to QueryView instead + # If this is a stored query, not a table, then dispatch to QueryView instead if canned_query: return await QueryView()(request, datasette) else: diff --git a/docs/authentication.rst b/docs/authentication.rst index 184fec5e..22db41d8 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -468,7 +468,7 @@ You can control the following: * Access to the entire Datasette instance * Access to specific databases * Access to specific tables and views -* Access to specific :ref:`canned_queries` +* Access to specific :ref:`queries ` If a user has permission to view a table they will be able to view that table, independent of if they have permission to view the database or instance that the table exists within. @@ -641,12 +641,12 @@ This works for SQL views as well - you can list their names in the ``"tables"`` .. _authentication_permissions_query: -Access to specific canned queries ---------------------------------- +Access to specific queries +-------------------------- -:ref:`canned_queries` allow you to configure named SQL queries in your ``datasette.yaml`` that can be executed by users. These queries can be set up to both read and write to the database, so controlling who can execute them can be important. +:ref:`Queries ` allow you to configure named SQL queries in your ``datasette.yaml`` that can be executed by users. These queries can be set up to both read and write to the database, so controlling who can execute them can be important. -To limit access to the ``add_name`` canned query in your ``dogs.db`` database to just the :ref:`root user`: +To limit access to the ``add_name`` query in your ``dogs.db`` database to just the :ref:`root user`: .. [[[cog config_example(cog, """ @@ -1285,7 +1285,7 @@ Actor is allowed to view a table (or view) page, e.g. https://latest.datasette.i view-query ---------- -Actor is allowed to view a saved query page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size. Executing an untrusted saved query also requires ``execute-sql`` or the relevant write permissions; trusted saved queries can execute with ``view-query`` alone. +Actor is allowed to view a stored query page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size. Executing an untrusted stored query also requires ``execute-sql`` or the relevant write permissions; :ref:`trusted stored queries ` can execute with ``view-query`` alone. ``resource`` - ``datasette.resources.QueryResource(database, query)`` ``database`` is the name of the database (string) @@ -1308,7 +1308,7 @@ Actor is allowed to create stored queries in a database. update-query ------------ -Actor is allowed to update a saved query. +Actor is allowed to update a stored query. ``resource`` - ``datasette.resources.QueryResource(database, query)`` ``database`` is the name of the database (string) @@ -1320,7 +1320,7 @@ Actor is allowed to update a saved query. delete-query ------------ -Actor is allowed to delete a saved query. +Actor is allowed to delete a stored query. ``resource`` - ``datasette.resources.QueryResource(database, query)`` ``database`` is the name of the database (string) diff --git a/docs/configuration.rst b/docs/configuration.rst index 8c8c8a67..cf9590b8 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -87,6 +87,7 @@ This is equivalent to a ``datasette.yaml`` file containing the following: } .. [[[end]]] + .. _configuration_reference: ``datasette.yaml`` reference @@ -435,10 +436,10 @@ Here is a simple example: .. _configuration_reference_canned_queries: -Canned queries configuration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Queries configuration +~~~~~~~~~~~~~~~~~~~~~ -:ref:`Canned queries ` are named SQL queries that appear in the Datasette interface. They can be configured in ``datasette.yaml`` using the ``queries`` key at the database level: +:ref:`Queries ` are named SQL queries that appear in the Datasette interface. They can be configured in ``datasette.yaml`` using the ``queries`` key at the database level: .. [[[cog from metadata_doc import config_example, config_example @@ -483,7 +484,7 @@ Canned queries configuration } .. [[[end]]] -See the :ref:`canned queries documentation ` for more, including how to configure :ref:`writable canned queries `. +See the :ref:`queries documentation ` for more, including how to configure :ref:`writable queries `. .. _configuration_reference_css_js: @@ -1211,4 +1212,3 @@ For column types that accept additional configuration, use an object with ``type } } .. [[[end]]] - diff --git a/docs/custom_templates.rst b/docs/custom_templates.rst index 8cc40f0f..c324fb79 100644 --- a/docs/custom_templates.rst +++ b/docs/custom_templates.rst @@ -29,7 +29,7 @@ The custom SQL template (``/dbname?sql=...``) gets this: -A canned query template (``/dbname/queryname``) gets this: +A stored query template (``/dbname/queryname``) gets this: .. code-block:: html @@ -193,8 +193,8 @@ The lookup rules Datasette uses are as follows:: query-mydatabase.html query.html - Canned query page (/mydatabase/canned-query): - query-mydatabase-canned-query.html + Stored query page (/mydatabase/query-name): + query-mydatabase-query-name.html query-mydatabase.html query.html @@ -230,7 +230,7 @@ will look something like this:: -This example is from the canned query page for a query called "tz" in the +This example is from the stored query page for a query called "tz" in the database called "mydb". The asterisk shows which template was selected - so in this case, Datasette found a template file called ``query-mydb-tz.html`` and used that - but if that template had not been found, it would have tried for diff --git a/docs/internals.rst b/docs/internals.rst index c76de487..084922f8 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -725,7 +725,7 @@ The builder methods are: - ``allow_all(action)`` - allow an action across all databases and resources - ``allow_database(database, action)`` - allow an action on a specific database -- ``allow_resource(database, resource, action)`` - allow an action on a specific resource (table, SQL view or :ref:`canned query `) within a database +- ``allow_resource(database, resource, action)`` - allow an action on a specific resource (table, SQL view or :ref:`stored query `) within a database Each method returns the ``TokenRestrictions`` instance so calls can be chained. @@ -837,10 +837,10 @@ await .get_resource_metadata(self, database_name, resource_name) ``database_name`` - string The name of the database to query. ``resource_name`` - string - The name of the resource (table, view, or canned query) inside ``database_name`` to query. + The name of the resource (table, view, or stored query) inside ``database_name`` to query. Returns metadata keys and values for the specified "resource" as a dictionary. -A "resource" in this context can be a table, view, or canned query. +A "resource" in this context can be a table, view, or stored query. Internally queries the ``metadata_resources`` table inside the :ref:`internal database `. .. _datasette_get_column_metadata: @@ -851,7 +851,7 @@ await .get_column_metadata(self, database_name, resource_name, column_name) ``database_name`` - string The name of the database to query. ``resource_name`` - string - The name of the resource (table, view, or canned query) inside ``database_name`` to query. + The name of the resource (table, view, or stored query) inside ``database_name`` to query. ``column_name`` - string The name of the column inside ``resource_name`` to query. @@ -897,7 +897,7 @@ await .set_resource_metadata(self, database_name, resource_name, key, value) ``database_name`` - string The database the metadata entry belongs to. ``resource_name`` - string - The resource (table, view, or canned query) the metadata entry belongs to. + The resource (table, view, or stored query) the metadata entry belongs to. ``key`` - string The metadata entry key to insert (ex ``title``, ``description``, etc.) ``value`` - string @@ -915,7 +915,7 @@ await .set_column_metadata(self, database_name, resource_name, column_name, key, ``database_name`` - string The database the metadata entry belongs to. ``resource_name`` - string - The resource (table, view, or canned query) the metadata entry belongs to. + The resource (table, view, or stored query) the metadata entry belongs to. ``column-name`` - string The column the metadata entry belongs to. ``key`` - string diff --git a/docs/introspection.rst b/docs/introspection.rst index d2eb8efd..7702a4b5 100644 --- a/docs/introspection.rst +++ b/docs/introspection.rst @@ -149,7 +149,7 @@ Shows currently attached databases. `Databases example /-/queries.json`` returns saved query definitions for a specific database. Use ``?_size=50`` to set the page size and ``?_next=...`` with the cursor returned by the previous page to fetch the next page. +``GET /-/queries.json`` returns stored query definitions across every database that the actor can view. ``GET //-/queries.json`` returns stored query definitions for a specific database. Use ``?_size=50`` to set the page size and ``?_next=...`` with the cursor returned by the previous page to fetch the next page. .. _QueryCreateView: -Creating saved queries in the UI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Creating stored queries in the UI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``GET //-/queries/store`` provides a form for creating stored queries. .. _QueryStoreView: .. _QueryInsertView: -Creating saved queries -~~~~~~~~~~~~~~~~~~~~~~ +Creating stored queries +~~~~~~~~~~~~~~~~~~~~~~~ ``POST //-/queries/store`` creates a stored query. This requires ``execute-sql`` and ``store-query`` for the database. @@ -545,24 +545,24 @@ Executing write SQL .. _QueryDefinitionView: -Getting a saved query definition -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Getting a stored query definition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``GET ///-/definition`` returns a saved query definition without executing it. +``GET ///-/definition`` returns a stored query definition without executing it. .. _QueryUpdateView: -Updating saved queries -~~~~~~~~~~~~~~~~~~~~~~ +Updating stored queries +~~~~~~~~~~~~~~~~~~~~~~~ -``POST ///-/update`` updates a saved query using a JSON body with an ``"update"`` object. +``POST ///-/update`` updates a stored query using a JSON body with an ``"update"`` object. .. _QueryDeleteView: -Deleting saved queries -~~~~~~~~~~~~~~~~~~~~~~ +Deleting stored queries +~~~~~~~~~~~~~~~~~~~~~~~ -``POST ///-/delete`` deletes a saved query. +``POST ///-/delete`` deletes a stored query. .. _TableInsertView: diff --git a/docs/pages.rst b/docs/pages.rst index 34c851a5..e57c15e6 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -28,7 +28,7 @@ The index page can also be accessed at ``/-/``, useful for if the default index Database ======== -Each database has a page listing the tables, views and canned queries available for that database. If the :ref:`actions_execute_sql` permission is enabled (it's on by default) there will also be an interface for executing arbitrary SQL select queries against the data. +Each database has a page listing the tables, views and stored queries available for that database. If the :ref:`actions_execute_sql` permission is enabled (it's on by default) there will also be an interface for executing arbitrary SQL select queries against the data. Examples: @@ -68,7 +68,7 @@ This means you can link directly to a query by constructing the following URL: ``/database-name/-/query?sql=SELECT+*+FROM+table_name`` -Each configured :ref:`canned query ` has its own page, at ``/database-name/query-name``. Viewing this page will execute the query and display the results. +Each configured :ref:`stored query ` has its own page, at ``/database-name/query-name``. Viewing this page will execute the query and display the results. In both cases adding a ``.json`` extension to the URL will return the results as JSON. diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index b2676b3e..264b473e 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -609,7 +609,7 @@ When a request is received, the ``"render"`` callback function is called with ze The SQL query that was executed. ``query_name`` - string or None - If this was the execution of a :ref:`canned query `, the name of that query. + If this was the execution of a :ref:`stored query `, the name of that query. ``database`` - string The name of the database. @@ -1212,7 +1212,7 @@ Examples: `datasette-saved-queries `__ @@ -1635,7 +1635,7 @@ register_magic_parameters(datasette) ``datasette`` - :ref:`internals_datasette` You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``. -:ref:`canned_queries_magic_parameters` can be used to add automatic parameters to :ref:`canned queries `. This plugin hook allows additional magic parameters to be defined by plugins. +:ref:`canned_queries_magic_parameters` can be used to add automatic parameters to :ref:`configured queries `. This plugin hook allows additional magic parameters to be defined by plugins. Magic parameters all take this format: ``_prefix_rest_of_parameter``. The prefix indicates which magic parameter function should be called - the rest of the parameter is passed as an argument to that function. @@ -1828,7 +1828,7 @@ jump_items_sql(datasette, actor, request) This hook allows plugins to add extra results to Datasette's ``/`` jump menu, which is powered by the ``/-/jump`` JSON endpoint. -Return a ``datasette.jump.JumpSQL`` object, or a list of ``JumpSQL`` objects. Each ``JumpSQL`` object wraps a SQL query to be searched alongside Datasette's own databases, tables, views and canned query results. The hook can also be an ``async def`` function, or return an awaitable that resolves to one of these values. +Return a ``datasette.jump.JumpSQL`` object, or a list of ``JumpSQL`` objects. Each ``JumpSQL`` object wraps a SQL query to be searched alongside Datasette's own databases, tables, views and stored query results. The hook can also be an ``async def`` function, or return an awaitable that resolves to one of these values. ``JumpSQL`` queries run against Datasette's internal database by default. To run a query against another database, pass its name as the optional ``database=`` argument. For example, ``JumpSQL(database="content", sql="...")`` runs against the ``content`` database. @@ -2004,7 +2004,7 @@ query_actions(datasette, actor, database, query_name, request, sql, params) The name of the database. ``query_name`` - string or None - The name of the canned query, or ``None`` if this is an arbitrary SQL query. + The name of the stored query, or ``None`` if this is an arbitrary SQL query. ``request`` - :ref:`internals_request` The current HTTP request. @@ -2015,7 +2015,7 @@ query_actions(datasette, actor, database, query_name, request, sql, params) ``params`` - dictionary The parameters passed to the SQL query, if any. -Populates a "Query actions" menu on the canned query and arbitrary SQL query pages. +Populates a "Query actions" menu on the stored query and arbitrary SQL query pages. This example adds a new query action linking to a page for explaining a query: @@ -2294,9 +2294,9 @@ top_canned_query(datasette, request, database, query_name) The name of the database. ``query_name`` - string - The name of the canned query. + The name of the stored query. -Returns HTML to be displayed at the top of the canned query page. +Returns HTML to be displayed at the top of the stored query page. .. _plugin_event_tracking: diff --git a/docs/spatialite.rst b/docs/spatialite.rst index c93c1e00..1999ab78 100644 --- a/docs/spatialite.rst +++ b/docs/spatialite.rst @@ -30,7 +30,7 @@ Warning The following steps are recommended: - Disable arbitrary SQL queries by untrusted users. See :ref:`authentication_permissions_execute_sql` for ways to do this. The easiest is to start Datasette with the ``datasette --setting default_allow_sql off`` option. - - Define :ref:`canned_queries` with the SQL queries that use SpatiaLite functions that you want people to be able to execute. + - Define :ref:`queries ` with the SQL queries that use SpatiaLite functions that you want people to be able to execute. The `Datasette SpatiaLite tutorial `__ includes detailed instructions for running SpatiaLite safely using these techniques diff --git a/docs/sql_queries.rst b/docs/sql_queries.rst index 7c3cd4ac..d60656e3 100644 --- a/docs/sql_queries.rst +++ b/docs/sql_queries.rst @@ -68,10 +68,10 @@ You can also use the `sqlite-utils `__ tool .. _canned_queries: -Canned queries --------------- +Queries +------- -As an alternative to adding views to your database, you can define canned queries inside your ``datasette.yaml`` file. Here's an example: +As an alternative to adding views to your database, you can define named queries inside your ``datasette.yaml`` file. Here's an example: .. [[[cog from metadata_doc import config_example, config_example @@ -120,24 +120,67 @@ Then run Datasette like this:: datasette sf-trees.db -m metadata.json -Each canned query will be listed on the database index page, and will also get its own URL at:: +Each configured query will be listed on the database index page, and will also get its own URL at:: - /database-name/canned-query-name + /database-name/query-name For the above example, that URL would be:: /sf-trees/just_species -You can optionally include ``"title"`` and ``"description"`` keys to show a title and description on the canned query page. As with regular table metadata you can alternatively specify ``"description_html"`` to have your description rendered as HTML (rather than having HTML special characters escaped). +You can optionally include ``"title"`` and ``"description"`` keys to show a title and description on the query page. As with regular table metadata you can alternatively specify ``"description_html"`` to have your description rendered as HTML (rather than having HTML special characters escaped). + +.. _stored_queries: +.. _saved_queries: + +Stored queries +~~~~~~~~~~~~~~ + +Datasette stores both configured queries and user-created queries in the ``queries`` table in the :ref:`internal database `. Configured queries come from the ``queries`` section of ``datasette.yaml``. User-created stored queries can be created from the SQL query page by actors with the :ref:`actions_store_query` and :ref:`actions_execute_sql` permissions. Writable stored queries also require the permissions needed for the writes they perform. + +Stored queries created by users default to private. Private stored queries can only be viewed, updated or deleted by the actor that created them. Broad ``view-query``, ``update-query`` or ``delete-query`` permission grants still do not allow other actors to access another actor's private stored queries. + +Stored queries created by users are untrusted. This means they execute using the permissions of the actor who runs them, as if that actor had pasted the SQL into the regular custom SQL interface or write SQL interface. Read-only stored queries require ``execute-sql``. Writable stored queries require ``execute-write-sql`` plus the relevant table-level write permissions. + +.. _trusted_stored_queries: +.. _trusted_saved_queries: + +Trusted stored queries +++++++++++++++++++++++ + +A trusted stored query can execute with ``view-query`` permission alone. It skips the additional ``execute-sql`` and write permission checks that are applied to untrusted stored queries. + +Trusted stored queries should only be used for SQL that has been reviewed by someone trusted to configure the Datasette instance. For that reason, trusted stored queries can only be added using configuration. Users cannot create trusted stored queries through the web interface or the stored query JSON API. + +Queries defined in ``datasette.yaml`` are trusted by default: + +.. code-block:: yaml + + databases: + mydatabase: + queries: + report: + sql: select * from report + +You can opt out of this behavior for a configured query using ``is_trusted: false``: + +.. code-block:: yaml + + databases: + mydatabase: + queries: + report: + sql: select * from report + is_trusted: false .. _canned_queries_named_parameters: -Canned query parameters -~~~~~~~~~~~~~~~~~~~~~~~ +Query parameters +~~~~~~~~~~~~~~~~ -Canned queries support named parameters, so if you include those in the SQL you will then be able to enter them using the form fields on the canned query page or by adding them to the URL. This means canned queries can be used to create custom JSON APIs based on a carefully designed SQL statement. +Configured queries support named parameters, so if you include those in the SQL you will then be able to enter them using the form fields on the query page or by adding them to the URL. This means configured queries can be used to create custom JSON APIs based on a carefully designed SQL statement. -Here's an example of a canned query with a named parameter: +Here's an example of a configured query with a named parameter: .. code-block:: sql @@ -147,7 +190,7 @@ Here's an example of a canned query with a named parameter: where neighborhood like '%' || :text || '%' order by neighborhood; -In the canned query configuration looks like this: +The query configuration looks like this: .. [[[cog @@ -204,7 +247,7 @@ In the canned query configuration looks like this: Note that we are using SQLite string concatenation here - the ``||`` operator - to add wildcard ``%`` characters to the string provided by the user. -You can try this canned query out here: +You can try this query out here: https://latest.datasette.io/fixtures/neighborhood_search?text=town In this example the ``:text`` named parameter is automatically extracted from the query using a regular expression. @@ -272,15 +315,15 @@ You can alternatively provide an explicit list of named parameters using the ``" .. _canned_queries_options: -Additional canned query options -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Additional query options +~~~~~~~~~~~~~~~~~~~~~~~~ -Additional options can be specified for canned queries in the YAML or JSON configuration. +Additional options can be specified for configured queries in the YAML or JSON configuration. hide_sql ++++++++ -Canned queries default to displaying their SQL query at the top of the page. If the query is extremely long you may want to hide it by default, with a "show" link that can be used to make it visible. +Configured queries default to displaying their SQL query at the top of the page. If the query is extremely long you may want to hide it by default, with a "show" link that can be used to make it visible. Add the ``"hide_sql": true`` option to hide the SQL query by default. @@ -289,7 +332,7 @@ fragment Some plugins, such as `datasette-vega `__, can be configured by including additional data in the fragment hash of the URL - the bit that comes after a ``#`` symbol. -You can set a default fragment hash that will be included in the link to the canned query from the database index page using the ``"fragment"`` key. +You can set a default fragment hash that will be included in the link to the query from the database index page using the ``"fragment"`` key. This example demonstrates both ``fragment`` and ``hide_sql``: @@ -348,12 +391,12 @@ This example demonstrates both ``fragment`` and ``hide_sql``: .. _canned_queries_writable: -Writable canned queries -~~~~~~~~~~~~~~~~~~~~~~~ +Writable queries +~~~~~~~~~~~~~~~~ -Canned queries by default are read-only. You can use the ``"write": true`` key to indicate that a canned query can write to the database. +Configured queries are read-only by default. You can use the ``"write": true`` key to indicate that a query can write to the database. -See :ref:`authentication_permissions_query` for details on how to add permission checks to canned queries, using the ``"allow"`` key. +See :ref:`authentication_permissions_query` for details on how to add permission checks to queries, using the ``"allow"`` key. .. [[[cog config_example(cog, { @@ -488,7 +531,7 @@ Magic parameters Named parameters that start with an underscore are special: they can be used to automatically add values created by Datasette that are not contained in the incoming form fields or query string. -These magic parameters are only supported for canned queries: to avoid security issues (such as queries that extract the user's private cookies) they are not available to SQL that is executed by the user as a custom SQL query. +These magic parameters are only supported for configured queries: to avoid security issues (such as queries that extract the user's private cookies) they are not available to SQL that is executed by the user as a custom SQL query. Available magic parameters are: @@ -580,12 +623,12 @@ Additional custom magic parameters can be added by plugins using the :ref:`plugi .. _canned_queries_json_api: -JSON API for writable canned queries -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +JSON API for writable queries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Writable canned queries can also be accessed using a JSON API. You can POST data to them using JSON, and you can request that their response is returned to you as JSON. +Writable queries can also be accessed using a JSON API. You can POST data to them using JSON, and you can request that their response is returned to you as JSON. -To submit JSON to a writable canned query, encode key/value parameters as a JSON document:: +To submit JSON to a writable query, encode key/value parameters as a JSON document:: POST /mydatabase/add_message diff --git a/tests/test_html.py b/tests/test_html.py index 9e460da1..8edb9f6e 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -154,7 +154,7 @@ async def test_database_page(ds_client): ("/fixtures/simple_view", "simple_view"), ] == sorted([(a["href"], a.text) for a in views_ul.find_all("a")]) - # And a list of canned queries + # And a list of stored queries queries_ul = soup.find("h2", string="Queries").find_next_sibling("ul") assert queries_ul is not None assert [ @@ -701,7 +701,7 @@ async def test_show_hide_sql_query(ds_client): @pytest.mark.asyncio async def test_canned_query_with_hide_has_no_hidden_sql(ds_client): - # For a canned query the show/hide should NOT have a hidden SQL field + # For a stored query the show/hide should NOT have a hidden SQL field # https://github.com/simonw/datasette/issues/1411 response = await ds_client.get("/fixtures/pragma_cache_size?_hide_sql=1") soup = Soup(response.content, "html.parser") @@ -1106,7 +1106,7 @@ async def test_trace_correctly_escaped(ds_client): "/fixtures/-/query?sql=select+*+from+facetable", "http://localhost/fixtures/-/query.json?sql=select+*+from+facetable", ), - # Canned query page + # Stored query page ( "/fixtures/neighborhood_search?text=town", "http://localhost/fixtures/neighborhood_search.json?text=town", diff --git a/tests/test_permissions.py b/tests/test_permissions.py index eb6cee9f..0e38c876 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -890,7 +890,7 @@ PermConfigTestCase = collections.namedtuple( resource=("perms_ds_one", "t1"), expected_result=True, ), - # view-query on canned query, wrong actor + # view-query on stored query, wrong actor PermConfigTestCase( config={ "databases": { @@ -909,7 +909,7 @@ PermConfigTestCase = collections.namedtuple( resource=("perms_ds_one", "q1"), expected_result=False, ), - # view-query on canned query, right actor + # view-query on stored query, right actor PermConfigTestCase( config={ "databases": { From b1029acc68626c2fddf7b678adc3339be0fce6e0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 15:05:41 -0700 Subject: [PATCH 253/299] top_canned_query is now top_stored_query, closes #2747 --- datasette/hookspecs.py | 2 +- datasette/templates/query.html | 2 +- datasette/views/database.py | 8 ++++---- docs/changelog.rst | 1 + docs/plugin_hooks.rst | 4 ++-- tests/test_plugins.py | 10 ++++++---- 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 22da02a4..dcd502af 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -228,7 +228,7 @@ def top_query(datasette, request, database, sql): @hookspec -def top_canned_query(datasette, request, database, query_name): +def top_stored_query(datasette, request, database, query_name): """HTML to include at the top of the stored query page""" diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 785b05af..3f03424a 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -33,7 +33,7 @@ {% set action_links, action_title = query_actions(), "Query actions" %} {% include "_action_menu.html" %} -{% if canned_query %}{{ top_canned_query() }}{% else %}{{ top_query() }}{% endif %} +{% if canned_query %}{{ top_stored_query() }}{% else %}{{ top_query() }}{% endif %} {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} diff --git a/datasette/views/database.py b/datasette/views/database.py index f30d3815..def3c530 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -339,8 +339,8 @@ class QueryContext(Context): top_query: callable = field( metadata={"help": "Callable to render the top_query slot"} ) - top_canned_query: callable = field( - metadata={"help": "Callable to render the top_canned_query slot"} + top_stored_query: callable = field( + metadata={"help": "Callable to render the top_stored_query slot"} ) query_actions: callable = field( metadata={ @@ -2095,8 +2095,8 @@ class QueryView(View): top_query=make_slot_function( "top_query", datasette, request, database=database, sql=sql ), - top_canned_query=make_slot_function( - "top_canned_query", + top_stored_query=make_slot_function( + "top_stored_query", datasette, request, database=database, diff --git a/docs/changelog.rst b/docs/changelog.rst index dfb2a736..300ac02f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,6 +10,7 @@ Unreleased ---------- - Fixed a bug where visiting ``//-/query`` without a ``?sql=`` parameter returned a 500 error. (:issue:`2743`) +- The ``top_canned_query()`` plugin hook has been renamed to :ref:`top_stored_query() `. (:issue:`2747`) .. _v1_0_a30: diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 264b473e..4737ca03 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -2279,9 +2279,9 @@ top_query(datasette, request, database, sql) Returns HTML to be displayed at the top of the query results page. -.. _plugin_hook_top_canned_query: +.. _plugin_hook_top_stored_query: -top_canned_query(datasette, request, database, query_name) +top_stored_query(datasette, request, database, query_name) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``datasette`` - :ref:`internals_datasette` diff --git a/tests/test_plugins.py b/tests/test_plugins.py index f7adbd66..32276437 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1486,8 +1486,10 @@ class SlotPlugin: return "Xtop_query:{}:{}:{}".format(database, sql, request.args["z"]) @hookimpl - def top_canned_query(self, request, database, query_name): - return "Xtop_query:{}:{}:{}".format(database, query_name, request.args["z"]) + def top_stored_query(self, request, database, query_name): + return "Xtop_stored_query:{}:{}:{}".format( + database, query_name, request.args["z"] + ) @pytest.mark.asyncio @@ -1548,12 +1550,12 @@ async def test_hook_top_query(ds_client): @pytest.mark.asyncio -async def test_hook_top_canned_query(ds_client): +async def test_hook_top_stored_query(ds_client): try: pm.register(SlotPlugin(), name="SlotPlugin") response = await ds_client.get("/fixtures/magic_parameters?z=xyz") assert response.status_code == 200 - assert "Xtop_query:fixtures:magic_parameters:xyz" in response.text + assert "Xtop_stored_query:fixtures:magic_parameters:xyz" in response.text finally: pm.unregister(name="SlotPlugin") From 2f73869c09962e320e5f40f4691df70618cd052e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 15:09:48 -0700 Subject: [PATCH 254/299] Document that canned_queries() has been removed --- docs/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 300ac02f..674ff5b3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -11,6 +11,7 @@ Unreleased - Fixed a bug where visiting ``//-/query`` without a ``?sql=`` parameter returned a 500 error. (:issue:`2743`) - The ``top_canned_query()`` plugin hook has been renamed to :ref:`top_stored_query() `. (:issue:`2747`) +- The ``canned_queries()`` plugin hook has been removed. Plugins can use the new ``datasette.add_query()``, ``datasette.update_query()`` and ``datasette.remove_query()`` methods to managed stored queries instead. .. _v1_0_a30: From 56b14f37d547e03ba902516ac9ae13ef52765f77 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 15:16:18 -0700 Subject: [PATCH 255/299] The stored queries do not live in that DB --- docs/authentication.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/authentication.rst b/docs/authentication.rst index 22db41d8..86df7f04 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1298,7 +1298,7 @@ Actor is allowed to view a stored query page, e.g. https://latest.datasette.io/f store-query ----------- -Actor is allowed to create stored queries in a database. +Actor is allowed to create stored queries against a database. ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) From 02a1468f1b3c8c14fb80037686b43de856e49c1f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 15:17:51 -0700 Subject: [PATCH 256/299] Renamed canned queries to queries / stored queries in docs And a few renames in code and YAML as well. --- .github/workflows/deploy-latest.yml | 33 +- datasette/app.py | 7 - datasette/facets.py | 2 +- datasette/static/app.css | 2 +- datasette/templates/query.html | 18 +- datasette/views/database.py | 92 +++--- datasette/views/table.py | 6 +- docs/authentication.rst | 10 +- docs/changelog.rst | 23 +- docs/configuration.rst | 6 +- docs/plugin_hooks.rst | 12 +- docs/spatialite.rst | 2 +- docs/sql_queries.rst | 12 +- docs/upgrade-1.0a20.md | 6 +- tests/test_canned_queries.py | 473 ---------------------------- tests/test_html.py | 12 +- tests/test_jump.py | 4 +- 17 files changed, 115 insertions(+), 605 deletions(-) delete mode 100644 tests/test_canned_queries.py diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 7d8dd37d..166d33d0 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -57,7 +57,7 @@ jobs: db.route = "alternative-route" ' > plugins/alternative_route.py cp fixtures.db fixtures2.db - - name: And the counters writable canned query demo + - name: And the counters writable stored query demo run: | cat > plugins/counters.py <This query cannot be executed because the database is immutable.

        {% endif %} -

        {{ metadata.title or database }}{% if canned_query and not metadata.title %}: {{ canned_query }}{% endif %}{% if private %} 🔒{% endif %}

        +

        {{ metadata.title or database }}{% if stored_query and not metadata.title %}: {{ stored_query }}{% endif %}{% if private %} 🔒{% endif %}

        {% set action_links, action_title = query_actions(), "Query actions" %} {% include "_action_menu.html" %} -{% if canned_query %}{{ top_stored_query() }}{% else %}{{ top_query() }}{% endif %} +{% if stored_query %}{{ top_stored_query() }}{% else %}{{ top_query() }}{% endif %} {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} - +

        Custom SQL query{% if display_rows %} returning {% if truncated %}more than {% endif %}{{ "{:,}".format(display_rows|length) }} row{% if display_rows|length == 1 %}{% else %}s{% endif %}{% endif %}{% if not query_error %} ({{ show_hide_text }}) {% endif %}

        @@ -52,7 +52,7 @@
        {% if query %}{{ query.sql }}{% endif %}
        {% endif %} {% else %} - {% if not canned_query %} + {% if not stored_query %} @@ -64,10 +64,10 @@ {% include "_sql_parameters.html" %}

        {% if not hide_sql %}{% endif %} - + {{ show_hide_hidden }} {% if save_query_url %}Save this query{% endif %} - {% if canned_query and edit_sql_url %}Edit SQL{% endif %} + {% if stored_query and edit_sql_url %}Edit SQL{% endif %}

        @@ -90,7 +90,7 @@
        Required permission
        {% else %} - {% if not canned_query_write and not error %} + {% if not stored_query_write and not error %}

        0 results

        {% endif %} {% endif %} diff --git a/datasette/views/database.py b/datasette/views/database.py index def3c530..c36476f6 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -100,12 +100,12 @@ class DatabaseView(View): limit=5, include_private=True, ) - canned_queries = queries_page["queries"] + stored_queries = queries_page["queries"] queries_more = queries_page["has_more"] queries_count = ( await datasette.count_queries(database, actor=request.actor) if queries_more - else len(canned_queries) + else len(stored_queries) ) async def database_actions(): @@ -137,7 +137,7 @@ class DatabaseView(View): "tables": tables, "hidden_count": len([t for t in tables if t["hidden"]]), "views": sql_views, - "queries": canned_queries, + "queries": stored_queries, "queries_more": queries_more, "queries_count": queries_count, "allow_execute_sql": allow_execute_sql, @@ -172,7 +172,7 @@ class DatabaseView(View): tables=tables, hidden_count=len([t for t in tables if t["hidden"]]), views=sql_views, - queries=canned_queries, + queries=stored_queries, queries_more=queries_more, queries_count=queries_count, allow_execute_sql=allow_execute_sql, @@ -271,7 +271,7 @@ class QueryContext(Context): query: dict = field( metadata={"help": "The SQL query object containing the `sql` string"} ) - canned_query: str = field( + stored_query: str = field( metadata={"help": "The name of the stored query if this is a stored query"} ) private: bool = field( @@ -280,7 +280,7 @@ class QueryContext(Context): # urls: dict = field( # metadata={"help": "Object containing URL helpers like `database()`"} # ) - canned_query_write: bool = field( + stored_query_write: bool = field( metadata={ "help": "Boolean indicating if this is a stored query that allows writes" } @@ -1629,10 +1629,10 @@ class QueryView(View): await datasette.resolve_table(request) table_found = True except TableNotFound as table_not_found: - canned_query = await datasette.get_canned_query( - table_not_found.database_name, table_not_found.table, request.actor + stored_query = await datasette.get_query( + table_not_found.database_name, table_not_found.table ) - if canned_query is None: + if stored_query is None: raise if table_found: # That should not have happened @@ -1640,13 +1640,13 @@ class QueryView(View): if not await datasette.allowed( action="view-query", - resource=QueryResource(database=db.name, query=canned_query["name"]), + resource=QueryResource(database=db.name, query=stored_query["name"]), actor=request.actor, ): raise Forbidden("You do not have permission to view this query") await _ensure_stored_query_execution_permissions( - datasette, db, canned_query, request.actor + datasette, db, stored_query, request.actor ) # If database is immutable, return an error @@ -1674,19 +1674,19 @@ class QueryView(View): or params.get("_json") ) params_for_query = MagicParameters( - canned_query["sql"], params, request, datasette + stored_query["sql"], params, request, datasette ) await params_for_query.execute_params() ok = None redirect_url = None try: cursor = await db.execute_write( - canned_query["sql"], params_for_query, request=request + stored_query["sql"], params_for_query, request=request ) # success message can come from on_success_message or on_success_message_sql message = None message_type = datasette.INFO - on_success_message_sql = canned_query.get("on_success_message_sql") + on_success_message_sql = stored_query.get("on_success_message_sql") if on_success_message_sql: try: message_result = ( @@ -1698,18 +1698,18 @@ class QueryView(View): message = "Error running on_success_message_sql: {}".format(ex) message_type = datasette.ERROR if not message: - message = canned_query.get( + message = stored_query.get( "on_success_message" ) or "Query executed, {} row{} affected".format( cursor.rowcount, "" if cursor.rowcount == 1 else "s" ) - redirect_url = canned_query.get("on_success_redirect") + redirect_url = stored_query.get("on_success_redirect") ok = True except Exception as ex: - message = canned_query.get("on_error_message") or str(ex) + message = stored_query.get("on_error_message") or str(ex) message_type = datasette.ERROR - redirect_url = canned_query.get("on_error_redirect") + redirect_url = stored_query.get("on_error_redirect") ok = False if should_return_json: return Response.json( @@ -1743,33 +1743,33 @@ class QueryView(View): allowed_dict = {r.child: r for r in allowed_tables_page.resources} # Are we a stored query? - canned_query = None - canned_query_write = False + stored_query = None + stored_query_write = False if "table" in request.url_vars: try: await datasette.resolve_table(request) except TableNotFound as table_not_found: # Was this actually a stored query? - canned_query = await datasette.get_canned_query( - table_not_found.database_name, table_not_found.table, request.actor + stored_query = await datasette.get_query( + table_not_found.database_name, table_not_found.table ) - if canned_query is None: + if stored_query is None: raise - canned_query_write = bool(canned_query.get("write")) + stored_query_write = bool(stored_query.get("write")) private = False - if canned_query: + if stored_query: # Respect stored query permissions visible, private = await datasette.check_visibility( request.actor, action="view-query", - resource=QueryResource(database=database, query=canned_query["name"]), + resource=QueryResource(database=database, query=stored_query["name"]), ) if not visible: raise Forbidden("You do not have permission to view this query") - if not canned_query_write: + if not stored_query_write: await _ensure_stored_query_execution_permissions( - datasette, db, canned_query, request.actor + datasette, db, stored_query, request.actor ) else: @@ -1783,15 +1783,15 @@ class QueryView(View): params = {key: request.args.get(key) for key in request.args} sql = None - if canned_query: - sql = canned_query["sql"] + if stored_query: + sql = stored_query["sql"] elif "sql" in params: sql = params.pop("sql") # Extract any :named parameters named_parameters = [] - if canned_query and canned_query.get("params"): - named_parameters = canned_query["params"] + if stored_query and stored_query.get("params"): + named_parameters = stored_query["params"] if not named_parameters and sql: named_parameters = derive_named_parameters(sql) named_parameter_values = { @@ -1817,9 +1817,9 @@ class QueryView(View): params_for_query = params - if sql and not canned_query_write: + if sql and not stored_query_write: try: - if not canned_query: + if not stored_query: # For regular queries we only allow SELECT, plus other rules validate_sql_select(sql) else: @@ -1879,7 +1879,7 @@ class QueryView(View): columns=columns, rows=rows, sql=sql, - query_name=canned_query["name"] if canned_query else None, + query_name=stored_query["name"] if stored_query else None, database=database, table=None, request=request, @@ -1911,10 +1911,10 @@ class QueryView(View): elif format_ == "html": headers = {} templates = [f"query-{to_css_class(database)}.html", "query.html"] - if canned_query: + if stored_query: templates.insert( 0, - f"query-{to_css_class(database)}-{to_css_class(canned_query['name'])}.html", + f"query-{to_css_class(database)}-{to_css_class(stored_query['name'])}.html", ) environment = datasette.get_jinja_environment(request) @@ -1932,8 +1932,8 @@ class QueryView(View): } ) metadata = await datasette.get_database_metadata(database) - if canned_query: - metadata = dict(canned_query) + if stored_query: + metadata = dict(stored_query) metadata.pop("source", None) renderers = {} @@ -1968,7 +1968,7 @@ class QueryView(View): ) show_hide_hidden = "" - if canned_query and canned_query.get("hide_sql"): + if stored_query and stored_query.get("hide_sql"): if bool(params.get("_show_sql")): show_hide_link = path_with_removed_args(request, {"_show_sql"}) show_hide_text = "hide" @@ -2018,7 +2018,7 @@ class QueryView(View): ) save_query_url = None if ( - not canned_query + not stored_query and allow_execute_sql and allow_store_query and is_validated_sql @@ -2036,7 +2036,7 @@ class QueryView(View): datasette=datasette, actor=request.actor, database=database, - query_name=canned_query["name"] if canned_query else None, + query_name=stored_query["name"] if stored_query else None, request=request, sql=sql, params=params, @@ -2056,15 +2056,15 @@ class QueryView(View): "sql": sql, "params": params, }, - canned_query=canned_query["name"] if canned_query else None, + stored_query=stored_query["name"] if stored_query else None, private=private, - canned_query_write=canned_query_write, + stored_query_write=stored_query_write, db_is_immutable=not db.is_mutable, error=query_error, hide_sql=hide_sql, show_hide_link=datasette.urls.path(show_hide_link), show_hide_text=show_hide_text, - editable=not canned_query, + editable=not stored_query, allow_execute_sql=allow_execute_sql, save_query_url=save_query_url, tables=await get_tables(datasette, request, db, allowed_dict), @@ -2100,7 +2100,7 @@ class QueryView(View): datasette, request, database=database, - query_name=canned_query["name"] if canned_query else None, + query_name=stored_query["name"] if stored_query else None, ), query_actions=query_actions, ), diff --git a/datasette/views/table.py b/datasette/views/table.py index 7b1a5a82..da69c6b5 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -964,11 +964,11 @@ async def table_view_traced(datasette, request): resolved = await datasette.resolve_table(request) except TableNotFound as not_found: # Was this actually a stored query? - canned_query = await datasette.get_canned_query( - not_found.database_name, not_found.table, request.actor + stored_query = await datasette.get_query( + not_found.database_name, not_found.table ) # If this is a stored query, not a table, then dispatch to QueryView instead - if canned_query: + if stored_query: return await QueryView()(request, datasette) else: raise diff --git a/docs/authentication.rst b/docs/authentication.rst index 86df7f04..cec47f97 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -121,7 +121,7 @@ This configuration will deny access to everyone except the user with ``id`` of ` How permissions are resolved ---------------------------- -Datasette performs permission checks using the internal :ref:`datasette_allowed`, method which accepts keyword arguments for ``action``, ``resource`` and an optional ``actor``. +Datasette performs permission checks using the internal :ref:`datasette_allowed`, method which accepts keyword arguments for ``action``, ``resource`` and an optional ``actor``. ``resource`` should be an instance of the appropriate ``Resource`` subclass from :mod:`datasette.resources`—for example ``InstanceResource()``, ``DatabaseResource(database="...``)`` or ``TableResource(database="...", table="...")``. This defaults to ``InstanceResource()`` if not specified. @@ -468,7 +468,7 @@ You can control the following: * Access to the entire Datasette instance * Access to specific databases * Access to specific tables and views -* Access to specific :ref:`queries ` +* Access to specific :ref:`queries ` If a user has permission to view a table they will be able to view that table, independent of if they have permission to view the database or instance that the table exists within. @@ -496,7 +496,7 @@ Here's how to restrict access to your entire Datasette instance to just the ``"i title: My private Datasette instance allow: id: root - + .. tab:: datasette.json @@ -644,7 +644,7 @@ This works for SQL views as well - you can list their names in the ``"tables"`` Access to specific queries -------------------------- -:ref:`Queries ` allow you to configure named SQL queries in your ``datasette.yaml`` that can be executed by users. These queries can be set up to both read and write to the database, so controlling who can execute them can be important. +:ref:`Queries ` allow you to configure named SQL queries in your ``datasette.yaml`` that can be executed by users. These queries can be set up to both read and write to the database, so controlling who can execute them can be important. To limit access to the ``add_name`` query in your ``dogs.db`` database to just the :ref:`root user`: @@ -1020,7 +1020,7 @@ You can also restrict permissions such that they can only be used within specifi The resulting token will only be able to insert rows, and only to tables in the ``mydatabase`` database. -Finally, you can restrict permissions to individual resources - tables, SQL views and :ref:`named queries ` - within a specific database:: +Finally, you can restrict permissions to individual resources - tables, SQL views and :ref:`named queries ` - within a specific database:: datasette create-token root --resource mydatabase mytable insert-row diff --git a/docs/changelog.rst b/docs/changelog.rst index 674ff5b3..d15dec50 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -11,7 +11,8 @@ Unreleased - Fixed a bug where visiting ``//-/query`` without a ``?sql=`` parameter returned a 500 error. (:issue:`2743`) - The ``top_canned_query()`` plugin hook has been renamed to :ref:`top_stored_query() `. (:issue:`2747`) -- The ``canned_queries()`` plugin hook has been removed. Plugins can use the new ``datasette.add_query()``, ``datasette.update_query()`` and ``datasette.remove_query()`` methods to managed stored queries instead. +- The ``canned_queries()`` plugin hook has been removed. Plugins can use the new ``datasette.add_query()``, ``datasette.update_query()`` and ``datasette.remove_query()`` methods to manage stored queries instead. +- The ``datasette.get_canned_query()`` and ``datasette.get_canned_queries()`` methods have been removed. Plugins can use ``datasette.get_query()`` and ``datasette.list_queries()`` instead. .. _v1_0_a30: @@ -658,7 +659,7 @@ For more information and workarounds, read `the security advisory `` in a `` -

        +

        + + {% if save_query_base_url %}Save this query{% endif %} +

        ", + "on_success_message_sql": "select 'secret'", + } + }, + ) + form_response = await ds.client.post( + "/data/-/queries/store", + actor={"id": "root"}, + data={ + "name": "unsafe_form", + "sql": "select 1", + "description_html": "", + }, + ) + + assert response.status_code == 400 + assert response.json()["errors"] == [ + "Invalid keys: description_html, on_success_message_sql" + ] + assert form_response.status_code == 400 + assert "Invalid keys: description_html" in form_response.text + assert await ds.get_query("data", "unsafe") is None + assert await ds.get_query("data", "unsafe_form") is None + + @pytest.mark.asyncio async def test_query_store_api_creates_writable_query(): ds = Datasette(memory=True, default_deny=True) @@ -959,6 +1000,42 @@ async def test_query_update_and_delete_api(): assert await ds.get_query("data", "editable") is None +@pytest.mark.asyncio +async def test_query_update_api_rejects_config_only_fields(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("query_update_config_only_fields", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + await ds.add_query( + "data", + "editable", + "insert into dogs (name) values (:name)", + is_write=True, + source="user", + owner_id="root", + ) + + response = await ds.client.post( + "/data/editable/-/update", + actor={"id": "root"}, + json={ + "update": { + "description_html": "", + "on_success_message_sql": "select 'secret'", + } + }, + ) + + assert response.status_code == 400 + assert response.json()["errors"] == [ + "Invalid keys: description_html, on_success_message_sql" + ] + query = await ds.get_query("data", "editable") + assert query["description_html"] is None + assert query["on_success_message_sql"] is None + + @pytest.mark.asyncio async def test_query_update_api_rejects_trusted_queries_but_internal_update_allowed(): ds = Datasette( From b1289a73f9869e83a433a088c2a6c48285e67f2d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 16:51:00 -0700 Subject: [PATCH 273/299] stored_queries.StoredQuery dataclass --- datasette/app.py | 102 ++++++------ datasette/stored_queries.py | 258 ++++++++++++++++++++---------- datasette/views/database.py | 56 +++---- datasette/views/query_helpers.py | 19 +-- datasette/views/stored_queries.py | 37 +++-- docs/internals.rst | 14 +- tests/test_queries.py | 128 +++++++-------- 7 files changed, 357 insertions(+), 257 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 96683895..56b89789 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1029,8 +1029,8 @@ class Datasette: ) @staticmethod - def _query_row_to_dict(row): - return stored_queries.query_row_to_dict(row) + def _query_row_to_stored_query(row) -> stored_queries.StoredQuery | None: + return stored_queries.query_row_to_stored_query(row) @staticmethod def _query_options_json(options): @@ -1038,28 +1038,28 @@ class Datasette: async def add_query( self, - database, - name, - sql, + database: str, + name: str, + sql: str, *, - title=None, - description=None, - description_html=None, - hide_sql=False, - fragment=None, - parameters=None, - is_write=False, - is_private=False, - is_trusted=False, - source="plugin", - owner_id=None, - on_success_message=None, - on_success_message_sql=None, - on_success_redirect=None, - on_error_message=None, - on_error_redirect=None, - replace=True, - ): + title: str | None = None, + description: str | None = None, + description_html: str | None = None, + hide_sql: bool = False, + fragment: str | None = None, + parameters: Iterable[str] | None = None, + is_write: bool = False, + is_private: bool = False, + is_trusted: bool = False, + source: str = "plugin", + owner_id: str | None = None, + on_success_message: str | None = None, + on_success_message_sql: str | None = None, + on_success_redirect: str | None = None, + on_error_message: str | None = None, + on_error_redirect: str | None = None, + replace: bool = True, + ) -> None: return await stored_queries.add_query( self, database, @@ -1086,8 +1086,8 @@ class Datasette: async def update_query( self, - database, - name, + database: str, + name: str, *, sql=stored_queries.UNCHANGED, title=stored_queries.UNCHANGED, @@ -1106,7 +1106,7 @@ class Datasette: on_success_redirect=stored_queries.UNCHANGED, on_error_message=stored_queries.UNCHANGED, on_error_redirect=stored_queries.UNCHANGED, - ): + ) -> None: return await stored_queries.update_query( self, database, @@ -1130,24 +1130,28 @@ class Datasette: on_error_redirect=on_error_redirect, ) - async def remove_query(self, database, name, source=None): + async def remove_query( + self, database: str, name: str, source: str | None = None + ) -> None: return await stored_queries.remove_query(self, database, name, source=source) - async def get_query(self, database, name): + async def get_query( + self, database: str, name: str + ) -> stored_queries.StoredQuery | None: return await stored_queries.get_query(self, database, name) async def count_queries( self, - database=None, + database: str | None = None, *, - actor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, - ): + actor: dict[str, Any] | None = None, + q: str | None = None, + is_write: bool | None = None, + is_private: bool | None = None, + is_trusted: bool | None = None, + source: str | None = None, + owner_id: str | None = None, + ) -> int: return await stored_queries.count_queries( self, database, @@ -1162,19 +1166,19 @@ class Datasette: async def list_queries( self, - database=None, + database: str | None = None, *, - actor=None, - limit=50, - cursor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, - include_private=False, - ): + actor: dict[str, Any] | None = None, + limit: int = 50, + cursor: str | None = None, + q: str | None = None, + is_write: bool | None = None, + is_private: bool | None = None, + is_trusted: bool | None = None, + source: str | None = None, + owner_id: str | None = None, + include_private: bool = False, + ) -> stored_queries.StoredQueryPage: return await stored_queries.list_queries( self, database, diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index a28b71bf..bcfdfdb4 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -1,6 +1,8 @@ from __future__ import annotations +from dataclasses import dataclass import json +from typing import Any, Iterable from .resources import TableResource from .utils import named_parameters, sqlite3, tilde_encode, urlsafe_components @@ -19,7 +21,76 @@ QUERY_OPTION_FIELDS = ( ) -async def save_queries_from_config(datasette): +@dataclass +class StoredQuery: + database: str + name: str + sql: str + title: str | None + description: str | None + description_html: str | None + hide_sql: bool + fragment: str | None + parameters: list[str] + is_write: bool + is_private: bool + is_trusted: bool + source: str + owner_id: str | None + on_success_message: str | None + on_success_message_sql: str | None + on_success_redirect: str | None + on_error_message: str | None + on_error_redirect: str | None + private: bool | None = None + + +@dataclass +class StoredQueryPage: + queries: list[StoredQuery] + next: str | None + has_more: bool + limit: int + + +def stored_query_to_dict(query: StoredQuery) -> dict[str, Any]: + data = { + "database": query.database, + "name": query.name, + "sql": query.sql, + "title": query.title, + "description": query.description, + "description_html": query.description_html, + "hide_sql": query.hide_sql, + "fragment": query.fragment, + "params": list(query.parameters), + "parameters": list(query.parameters), + "is_write": query.is_write, + "is_private": query.is_private, + "is_trusted": query.is_trusted, + "source": query.source, + "owner_id": query.owner_id, + "on_success_message": query.on_success_message, + "on_success_message_sql": query.on_success_message_sql, + "on_success_redirect": query.on_success_redirect, + "on_error_message": query.on_error_message, + "on_error_redirect": query.on_error_redirect, + } + if query.private is not None: + data["private"] = query.private + return data + + +def stored_query_page_to_dict(page: StoredQueryPage) -> dict[str, Any]: + return { + "queries": [stored_query_to_dict(query) for query in page.queries], + "next": page.next, + "has_more": page.has_more, + "limit": page.limit, + } + + +async def save_queries_from_config(datasette: Any) -> None: # Apply configured query entries from datasette.yaml to the internal table. await datasette.get_internal_database().execute_write( "DELETE FROM queries WHERE source = 'config'" @@ -50,36 +121,38 @@ async def save_queries_from_config(datasette): ) -def query_row_to_dict(row): +def query_row_to_stored_query( + row: Any, private: bool | None = None +) -> StoredQuery | None: if row is None: return None parameters = json.loads(row["parameters"] or "[]") options = json.loads(row["options"] or "{}") - return { - "database": row["database_name"], - "name": row["name"], - "sql": row["sql"], - "title": row["title"], - "description": row["description"], - "description_html": row["description_html"], - "hide_sql": bool(options.get("hide_sql")), - "fragment": options.get("fragment"), - "params": parameters, - "parameters": parameters, - "is_write": bool(row["is_write"]), - "is_private": bool(row["is_private"]), - "is_trusted": bool(row["is_trusted"]), - "source": row["source"], - "owner_id": row["owner_id"], - "on_success_message": options.get("on_success_message"), - "on_success_message_sql": options.get("on_success_message_sql"), - "on_success_redirect": options.get("on_success_redirect"), - "on_error_message": options.get("on_error_message"), - "on_error_redirect": options.get("on_error_redirect"), - } + return StoredQuery( + database=row["database_name"], + name=row["name"], + sql=row["sql"], + title=row["title"], + description=row["description"], + description_html=row["description_html"], + hide_sql=bool(options.get("hide_sql")), + fragment=options.get("fragment"), + parameters=parameters, + is_write=bool(row["is_write"]), + is_private=bool(row["is_private"]), + is_trusted=bool(row["is_trusted"]), + source=row["source"], + owner_id=row["owner_id"], + on_success_message=options.get("on_success_message"), + on_success_message_sql=options.get("on_success_message_sql"), + on_success_redirect=options.get("on_success_redirect"), + on_error_message=options.get("on_error_message"), + on_error_redirect=options.get("on_error_redirect"), + private=private, + ) -def query_options_json(options): +def query_options_json(options: dict[str, Any]) -> str: options_dict = {} for field in QUERY_OPTION_FIELDS: value = options.get(field) @@ -92,29 +165,29 @@ def query_options_json(options): async def add_query( - datasette, - database, - name, - sql, + datasette: Any, + database: str, + name: str, + sql: str, *, - title=None, - description=None, - description_html=None, - hide_sql=False, - fragment=None, - parameters=None, - is_write=False, - is_private=False, - is_trusted=False, - source="plugin", - owner_id=None, - on_success_message=None, - on_success_message_sql=None, - on_success_redirect=None, - on_error_message=None, - on_error_redirect=None, - replace=True, -): + title: str | None = None, + description: str | None = None, + description_html: str | None = None, + hide_sql: bool = False, + fragment: str | None = None, + parameters: Iterable[str] | None = None, + is_write: bool = False, + is_private: bool = False, + is_trusted: bool = False, + source: str = "plugin", + owner_id: str | None = None, + on_success_message: str | None = None, + on_success_message_sql: str | None = None, + on_success_redirect: str | None = None, + on_error_message: str | None = None, + on_error_redirect: str | None = None, + replace: bool = True, +) -> None: parameters_json = json.dumps(list(parameters or [])) options_json = query_options_json( { @@ -170,9 +243,9 @@ async def add_query( async def update_query( - datasette, - database, - name, + datasette: Any, + database: str, + name: str, *, sql=UNCHANGED, title=UNCHANGED, @@ -191,7 +264,7 @@ async def update_query( on_success_redirect=UNCHANGED, on_error_message=UNCHANGED, on_error_redirect=UNCHANGED, -): +) -> None: fields = { "sql": sql, "title": title, @@ -263,7 +336,9 @@ async def update_query( ) -async def remove_query(datasette, database, name, source=None): +async def remove_query( + datasette: Any, database: str, name: str, source: str | None = None +) -> None: sql = "DELETE FROM queries WHERE database_name = ? AND name = ?" params = [database, name] if source is not None: @@ -272,7 +347,7 @@ async def remove_query(datasette, database, name, source=None): await datasette.get_internal_database().execute_write(sql, params) -async def get_query(datasette, database, name): +async def get_query(datasette: Any, database: str, name: str) -> StoredQuery | None: rows = await datasette.get_internal_database().execute( """ SELECT * FROM queries @@ -280,21 +355,21 @@ async def get_query(datasette, database, name): """, [database, name], ) - return query_row_to_dict(rows.first()) + return query_row_to_stored_query(rows.first()) async def count_queries( - datasette, - database=None, + datasette: Any, + database: str | None = None, *, - actor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, -): + actor: dict[str, Any] | None = None, + q: str | None = None, + is_write: bool | None = None, + is_private: bool | None = None, + is_trusted: bool | None = None, + source: str | None = None, + owner_id: str | None = None, +) -> int: allowed_sql, allowed_params = await datasette.allowed_resources_sql( action="view-query", actor=actor, @@ -354,20 +429,20 @@ async def count_queries( async def list_queries( - datasette, - database=None, + datasette: Any, + database: str | None = None, *, - actor=None, - limit=50, - cursor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, - include_private=False, -): + actor: dict[str, Any] | None = None, + limit: int = 50, + cursor: str | None = None, + q: str | None = None, + is_write: bool | None = None, + is_private: bool | None = None, + is_trusted: bool | None = None, + source: str | None = None, + owner_id: str | None = None, + include_private: bool = False, +) -> StoredQueryPage: limit = min(max(1, int(limit)), 1000) allowed_sql, allowed_params = await datasette.allowed_resources_sql( action="view-query", @@ -480,9 +555,10 @@ async def list_queries( queries = [] for row in rows: - query = query_row_to_dict(row) - if include_private: - query["private"] = bool(row["private"]) + query = query_row_to_stored_query( + row, private=bool(row["private"]) if include_private else None + ) + assert query is not None queries.append(query) next_token = None @@ -499,17 +575,23 @@ async def list_queries( tilde_encode(last_row["sort_key"]), tilde_encode(last_row["name"]), ) - return { - "queries": queries, - "next": next_token, - "has_more": has_more, - "limit": limit, - } + return StoredQueryPage( + queries=queries, + next=next_token, + has_more=has_more, + limit=limit, + ) async def ensure_query_write_permissions( - datasette, database, sql, *, actor=None, params=None, analysis=None -): + datasette: Any, + database: str, + sql: str, + *, + actor: dict[str, Any] | None = None, + params: dict[str, Any] | None = None, + analysis: Any = None, +) -> Any: write_actions = { "insert": "insert-row", "update": "update-row", diff --git a/datasette/views/database.py b/datasette/views/database.py index 98ca989c..b558b002 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -13,6 +13,7 @@ import textwrap from datasette.events import AlterTableEvent, CreateTableEvent, InsertRowsEvent from datasette.database import QueryInterrupted from datasette.resources import DatabaseResource, QueryResource +from datasette.stored_queries import stored_query_to_dict from datasette.utils import ( add_cors_headers, await_me_maybe, @@ -99,8 +100,8 @@ class DatabaseView(View): limit=5, include_private=True, ) - stored_queries = queries_page["queries"] - queries_more = queries_page["has_more"] + stored_queries = queries_page.queries + queries_more = queries_page.has_more queries_count = ( await datasette.count_queries(database, actor=request.actor) if queries_more @@ -136,7 +137,7 @@ class DatabaseView(View): "tables": tables, "hidden_count": len([t for t in tables if t["hidden"]]), "views": sql_views, - "queries": stored_queries, + "queries": [stored_query_to_dict(query) for query in stored_queries], "queries_more": queries_more, "queries_count": queries_count, "allow_execute_sql": allow_execute_sql, @@ -447,7 +448,7 @@ class QueryView(View): if not await datasette.allowed( action="view-query", - resource=QueryResource(database=db.name, query=stored_query["name"]), + resource=QueryResource(database=db.name, query=stored_query.name), actor=request.actor, ): raise Forbidden("You do not have permission to view this query") @@ -480,20 +481,18 @@ class QueryView(View): or request.args.get("_json") or params.get("_json") ) - params_for_query = MagicParameters( - stored_query["sql"], params, request, datasette - ) + params_for_query = MagicParameters(stored_query.sql, params, request, datasette) await params_for_query.execute_params() ok = None redirect_url = None try: cursor = await db.execute_write( - stored_query["sql"], params_for_query, request=request + stored_query.sql, params_for_query, request=request ) # success message can come from on_success_message or on_success_message_sql message = None message_type = datasette.INFO - on_success_message_sql = stored_query.get("on_success_message_sql") + on_success_message_sql = stored_query.on_success_message_sql if on_success_message_sql: try: message_result = ( @@ -505,18 +504,19 @@ class QueryView(View): message = "Error running on_success_message_sql: {}".format(ex) message_type = datasette.ERROR if not message: - message = stored_query.get( - "on_success_message" - ) or "Query executed, {} row{} affected".format( - cursor.rowcount, "" if cursor.rowcount == 1 else "s" + message = ( + stored_query.on_success_message + or "Query executed, {} row{} affected".format( + cursor.rowcount, "" if cursor.rowcount == 1 else "s" + ) ) - redirect_url = stored_query.get("on_success_redirect") + redirect_url = stored_query.on_success_redirect ok = True except Exception as ex: - message = stored_query.get("on_error_message") or str(ex) + message = stored_query.on_error_message or str(ex) message_type = datasette.ERROR - redirect_url = stored_query.get("on_error_redirect") + redirect_url = stored_query.on_error_redirect ok = False if should_return_json: return Response.json( @@ -562,7 +562,7 @@ class QueryView(View): ) if stored_query is None: raise - stored_query_write = bool(stored_query.get("is_write")) + stored_query_write = stored_query.is_write private = False if stored_query: @@ -570,7 +570,7 @@ class QueryView(View): visible, private = await datasette.check_visibility( request.actor, action="view-query", - resource=QueryResource(database=database, query=stored_query["name"]), + resource=QueryResource(database=database, query=stored_query.name), ) if not visible: raise Forbidden("You do not have permission to view this query") @@ -591,14 +591,14 @@ class QueryView(View): sql = None if stored_query: - sql = stored_query["sql"] + sql = stored_query.sql elif "sql" in params: sql = params.pop("sql") # Extract any :named parameters named_parameters = [] - if stored_query and stored_query.get("params"): - named_parameters = stored_query["params"] + if stored_query and stored_query.parameters: + named_parameters = stored_query.parameters if not named_parameters and sql: named_parameters = derive_named_parameters(sql) named_parameter_values = { @@ -686,7 +686,7 @@ class QueryView(View): columns=columns, rows=rows, sql=sql, - query_name=stored_query["name"] if stored_query else None, + query_name=stored_query.name if stored_query else None, database=database, table=None, request=request, @@ -721,7 +721,7 @@ class QueryView(View): if stored_query: templates.insert( 0, - f"query-{to_css_class(database)}-{to_css_class(stored_query['name'])}.html", + f"query-{to_css_class(database)}-{to_css_class(stored_query.name)}.html", ) environment = datasette.get_jinja_environment(request) @@ -740,7 +740,7 @@ class QueryView(View): ) metadata = await datasette.get_database_metadata(database) if stored_query: - metadata = dict(stored_query) + metadata = stored_query_to_dict(stored_query) metadata.pop("source", None) renderers = {} @@ -775,7 +775,7 @@ class QueryView(View): ) show_hide_hidden = "" - if stored_query and stored_query.get("hide_sql"): + if stored_query and stored_query.hide_sql: if bool(params.get("_show_sql")): show_hide_link = path_with_removed_args(request, {"_show_sql"}) show_hide_text = "hide" @@ -843,7 +843,7 @@ class QueryView(View): datasette=datasette, actor=request.actor, database=database, - query_name=stored_query["name"] if stored_query else None, + query_name=stored_query.name if stored_query else None, request=request, sql=sql, params=params, @@ -863,7 +863,7 @@ class QueryView(View): "sql": sql, "params": params, }, - stored_query=stored_query["name"] if stored_query else None, + stored_query=stored_query.name if stored_query else None, private=private, stored_query_write=stored_query_write, db_is_immutable=not db.is_mutable, @@ -907,7 +907,7 @@ class QueryView(View): datasette, request, database=database, - query_name=stored_query["name"] if stored_query else None, + query_name=stored_query.name if stored_query else None, ), query_actions=query_actions, ), diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index de732431..46d71b8e 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -2,6 +2,7 @@ import json import re from datasette.resources import DatabaseResource, TableResource +from datasette.stored_queries import StoredQuery from datasette.utils import ( named_parameters as derive_named_parameters, escape_sqlite, @@ -281,18 +282,18 @@ async def _prepare_execute_write(datasette, db, sql, params, actor): return parameter_names, params, analysis -async def _ensure_stored_query_execution_permissions(datasette, db, query, actor): - if query.get("is_trusted"): +async def _ensure_stored_query_execution_permissions( + datasette, db, query: StoredQuery, actor +): + if query.is_trusted: return - if query.get("is_write"): + if query.is_write: await datasette.ensure_permission( action="execute-write-sql", resource=DatabaseResource(db.name), actor=actor, ) - await datasette.ensure_query_write_permissions( - db.name, query["sql"], actor=actor - ) + await datasette.ensure_query_write_permissions(db.name, query.sql, actor=actor) else: await datasette.ensure_permission( action="execute-sql", @@ -482,7 +483,7 @@ async def _prepare_query_create(datasette, request, db, data): } -async def _prepare_query_update(datasette, request, db, existing, update): +async def _prepare_query_update(datasette, request, db, existing: StoredQuery, update): invalid_keys = set(update) - _query_update_fields if invalid_keys: raise QueryValidationError( @@ -490,8 +491,8 @@ async def _prepare_query_update(datasette, request, db, existing, update): ) update = _apply_query_data_types(update) - sql = update.get("sql", existing["sql"]) - query_is_write = existing["is_write"] + sql = update.get("sql", existing.sql) + query_is_write = existing.is_write derived = _derived_query_parameters(sql) parameters = None diff --git a/datasette/views/stored_queries.py b/datasette/views/stored_queries.py index 1a2c5d00..8c4e849e 100644 --- a/datasette/views/stored_queries.py +++ b/datasette/views/stored_queries.py @@ -1,6 +1,7 @@ from urllib.parse import parse_qsl, urlencode from datasette.resources import DatabaseResource, QueryResource +from datasette.stored_queries import stored_query_to_dict from datasette.utils import sqlite3, tilde_decode from datasette.utils.asgi import Response @@ -100,7 +101,7 @@ class QueryListView(BaseView): ) query_list_path = self.query_list_path(database) next_url = None - if page["next"]: + if page.next: pairs = [ (key, value) for key, value in parse_qsl( @@ -108,7 +109,7 @@ class QueryListView(BaseView): ) if key != "_next" ] - pairs.append(("_next", page["next"])) + pairs.append(("_next", page.next)) next_url = "{}?{}".format( query_list_path, urlencode(pairs), @@ -194,13 +195,13 @@ class QueryListView(BaseView): "database_color": ( self.ds.get_database(database).color if database is not None else None ), - "queries": page["queries"], - "next": page["next"], + "queries": page.queries, + "next": page.next, "next_url": next_url, - "has_more": page["has_more"], - "limit": page["limit"], - "show_private_note": any(query["is_private"] for query in page["queries"]), - "show_trusted_note": any(query["is_trusted"] for query in page["queries"]), + "has_more": page.has_more, + "limit": page.limit, + "show_private_note": any(query.is_private for query in page.queries), + "show_trusted_note": any(query.is_trusted for query in page.queries), "query_list_path": query_list_path, "show_database": database is None, "facets": facets, @@ -213,7 +214,12 @@ class QueryListView(BaseView): }, } if format_ == "json": - return Response.json(data) + return Response.json( + { + **data, + "queries": [stored_query_to_dict(query) for query in page.queries], + } + ) return await self.render( ["query_list.html"], request, @@ -374,8 +380,11 @@ class QueryStoreView(QueryCreateView): return _error([str(ex)], 400) query = await self.ds.get_query(db.name, name) + assert query is not None if is_json: - return Response.json({"ok": True, "query": query}, status=201) + return Response.json( + {"ok": True, "query": stored_query_to_dict(query)}, status=201 + ) self.ds.add_message(request, "Query saved", self.ds.INFO) return Response.redirect(self.ds.urls.path(self.ds.urls.table(db.name, name))) @@ -395,7 +404,7 @@ class QueryDefinitionView(BaseView): actor=request.actor, ): return _error(["Permission denied"], 403) - return Response.json({"ok": True, "query": query}) + return Response.json({"ok": True, "query": stored_query_to_dict(query)}) class QueryUpdateView(BaseView): @@ -413,7 +422,7 @@ class QueryUpdateView(BaseView): actor=request.actor, ): return _error(["Permission denied: need update-query"], 403) - if existing.get("is_trusted"): + if existing.is_trusted: return _error(["Trusted queries cannot be updated using the API"], 403) try: @@ -444,10 +453,12 @@ class QueryUpdateView(BaseView): await self.ds.update_query(db.name, query_name, **update_kwargs) if data.get("return"): + query = await self.ds.get_query(db.name, query_name) + assert query is not None return Response.json( { "ok": True, - "query": await self.ds.get_query(db.name, query_name), + "query": stored_query_to_dict(query), } ) return Response.json({"ok": True}) diff --git a/docs/internals.rst b/docs/internals.rst index 66724aa9..4980ee8b 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1039,11 +1039,11 @@ Example: await .get_query(database, name) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Returns a stored query dictionary, or ``None`` if the query does not exist. +Returns a ``StoredQuery`` dataclass instance, or ``None`` if the query does not exist. -The dictionary contains ``database``, ``name``, ``sql``, ``title``, ``description``, ``description_html``, ``hide_sql``, ``fragment``, ``parameters``, ``params``, ``is_write``, ``is_private``, ``is_trusted``, ``source``, ``owner_id``, ``on_success_message``, ``on_success_message_sql``, ``on_success_redirect``, ``on_error_message`` and ``on_error_redirect``. +``StoredQuery`` has the following attributes: ``database``, ``name``, ``sql``, ``title``, ``description``, ``description_html``, ``hide_sql``, ``fragment``, ``parameters``, ``is_write``, ``is_private``, ``is_trusted``, ``source``, ``owner_id``, ``on_success_message``, ``on_success_message_sql``, ``on_success_redirect``, ``on_error_message`` and ``on_error_redirect``. -``parameters`` and ``params`` contain the same list of explicit parameter names. +``parameters`` is a list of explicit parameter names. .. _datasette_list_queries: @@ -1087,12 +1087,12 @@ Lists stored queries visible to the specified actor. ``owner_id`` - string, optional Filter by owner actor ID. ``include_private`` - boolean, optional - Set to ``True`` to include a ``private`` boolean in each returned query dictionary indicating if anonymous users would be unable to view that query. + Set to ``True`` to populate a ``private`` boolean on each returned ``StoredQuery`` indicating if anonymous users would be unable to view that query. -The return value is a dictionary with these keys: +The return value is a ``StoredQueryPage`` dataclass instance with these attributes: -``queries`` - list of dictionaries - Stored query dictionaries, in the same format returned by :ref:`datasette_get_query`. +``queries`` - list of StoredQuery instances + Stored queries in the same format returned by :ref:`datasette_get_query`. ``next`` - string or None Pagination cursor for the next page, if one exists. ``has_more`` - boolean diff --git a/tests/test_queries.py b/tests/test_queries.py index 70fb7a03..59fab8c0 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -4,6 +4,7 @@ import pytest from datasette.app import Datasette from datasette.resources import DatabaseResource, QueryResource +from datasette.stored_queries import StoredQuery, StoredQueryPage from datasette.utils.asgi import Forbidden @@ -87,38 +88,41 @@ async def test_add_get_and_remove_query(): } query = await ds.get_query("data", "top_customers") - assert query == { - "database": "data", - "name": "top_customers", - "sql": "select * from customers where region = :region", - "title": "Top customers", - "description": "Customers by region", - "description_html": None, - "hide_sql": True, - "fragment": "chart", - "params": ["region"], - "parameters": ["region"], - "is_write": False, - "is_private": False, - "is_trusted": True, - "source": "user", - "owner_id": "alice", - "on_success_message": None, - "on_success_message_sql": None, - "on_success_redirect": None, - "on_error_message": None, - "on_error_redirect": None, - } + assert query == StoredQuery( + database="data", + name="top_customers", + sql="select * from customers where region = :region", + title="Top customers", + description="Customers by region", + description_html=None, + hide_sql=True, + fragment="chart", + parameters=["region"], + is_write=False, + is_private=False, + is_trusted=True, + source="user", + owner_id="alice", + on_success_message=None, + on_success_message_sql=None, + on_success_redirect=None, + on_error_message=None, + on_error_redirect=None, + ) queries_page = await ds.list_queries("data", actor=None) - assert queries_page["queries"] == [query] - assert queries_page["next"] is None + assert queries_page == StoredQueryPage( + queries=[query], + next=None, + has_more=False, + limit=50, + ) await ds.remove_query("data", "top_customers") assert await ds.get_query("data", "top_customers") is None queries_page = await ds.list_queries("data", actor=None) - assert queries_page["queries"] == [] - assert queries_page["next"] is None + assert queries_page.queries == [] + assert queries_page.next is None @pytest.mark.asyncio @@ -156,13 +160,12 @@ async def test_update_query_only_updates_provided_fields(): ) query = await ds.get_query("data", "redirect") - assert query["title"] == "Updated" - assert query["parameters"] == [] - assert query["params"] == [] - assert query["on_success_redirect"] is None - assert query["sql"] == "select 1" - assert query["is_private"] is False - assert query["is_trusted"] is False + assert query.title == "Updated" + assert query.parameters == [] + assert query.on_success_redirect is None + assert query.sql == "select 1" + assert query.is_private is False + assert query.is_trusted is False options_row = ( await ds.get_internal_database().execute( """ @@ -198,28 +201,27 @@ async def test_config_queries_imported_to_internal_table(): ds.add_memory_database("query_config", name="data") await ds.invoke_startup() - assert await ds.get_query("data", "configured") == { - "database": "data", - "name": "configured", - "sql": "select :name as name", - "title": "Configured query", - "description": None, - "description_html": "

        Configured HTML

        ", - "hide_sql": False, - "fragment": None, - "params": ["name"], - "parameters": ["name"], - "is_write": False, - "is_private": False, - "is_trusted": True, - "source": "config", - "owner_id": None, - "on_success_message": None, - "on_success_message_sql": "select 'Hello ' || :name", - "on_success_redirect": None, - "on_error_message": None, - "on_error_redirect": None, - } + assert await ds.get_query("data", "configured") == StoredQuery( + database="data", + name="configured", + sql="select :name as name", + title="Configured query", + description=None, + description_html="

        Configured HTML

        ", + hide_sql=False, + fragment=None, + parameters=["name"], + is_write=False, + is_private=False, + is_trusted=True, + source="config", + owner_id=None, + on_success_message=None, + on_success_message_sql="select 'Hello ' || :name", + on_success_redirect=None, + on_error_message=None, + on_error_redirect=None, + ) @pytest.mark.asyncio @@ -1032,8 +1034,8 @@ async def test_query_update_api_rejects_config_only_fields(): "Invalid keys: description_html, on_success_message_sql" ] query = await ds.get_query("data", "editable") - assert query["description_html"] is None - assert query["on_success_message_sql"] is None + assert query.description_html is None + assert query.on_success_message_sql is None @pytest.mark.asyncio @@ -1072,9 +1074,9 @@ async def test_query_update_api_rejects_trusted_queries_but_internal_update_allo "Trusted queries cannot be updated using the API" ] query = await ds.get_query("data", "trusted_report") - assert query["is_trusted"] is True - assert query["sql"] == "select 1 as one" - assert query["title"] == "Original" + assert query.is_trusted is True + assert query.sql == "select 1 as one" + assert query.title == "Original" await ds.update_query( "data", @@ -1083,9 +1085,9 @@ async def test_query_update_api_rejects_trusted_queries_but_internal_update_allo title="Internal", ) query = await ds.get_query("data", "trusted_report") - assert query["is_trusted"] is True - assert query["sql"] == "select 3 as three" - assert query["title"] == "Internal" + assert query.is_trusted is True + assert query.sql == "select 3 as three" + assert query.title == "Internal" @pytest.mark.asyncio From 9f66cf72c1c9170f10e863d750ac4eee47113a7f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 21:42:50 -0700 Subject: [PATCH 274/299] Removed execute write SQL from query create page --- datasette/templates/query_create.html | 7 ------- 1 file changed, 7 deletions(-) diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index f5dadbff..ec910456 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -106,9 +106,6 @@ form.sql .query-create-sql textarea#sql-editor { .query-create-analysis-note { margin: 0; } -.query-create-action { - margin: 0.35rem 0 1rem; -} .query-create-analysis { margin-top: 0.8rem; } @@ -171,10 +168,6 @@ form.sql .query-create-sql textarea#sql-editor { Queries marked private can only be seen by you, their creator.

        - {% if sql and analysis_is_write %} -

        Execute write SQL

        - {% endif %} -

        From 737ff03efbb2bdc99b10d2654b7818526ec51e13 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 22:11:06 -0700 Subject: [PATCH 275/299] Expanded analysis of SQL operations, refs #2748 --- datasette/permissions.py | 10 ++ datasette/stored_queries.py | 137 +++++++++++++-- datasette/utils/sql_analysis.py | 289 +++++++++++++++++++++++++++---- datasette/views/execute_write.py | 9 +- datasette/views/query_helpers.py | 104 +++++++---- tests/test_actions_sql.py | 14 +- tests/test_internals_database.py | 34 ++-- tests/test_queries.py | 166 ++++++++++++++++++ tests/test_utils_sql_analysis.py | 97 +++++++++-- 9 files changed, 740 insertions(+), 120 deletions(-) diff --git a/datasette/permissions.py b/datasette/permissions.py index 917c58ab..a9a3cc7c 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -58,6 +58,16 @@ class Resource(ABC): self.child = child self._private = None # Sentinel to track if private was set + def __str__(self) -> str: + return "/".join( + str(part) for part in (self.parent, self.child) if part is not None + ) + + def __repr__(self) -> str: + return "{}(parent={!r}, child={!r})".format( + self.__class__.__name__, self.parent, self.child + ) + @property def private(self) -> bool: """ diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index bcfdfdb4..c4b083e5 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -2,11 +2,16 @@ from __future__ import annotations from dataclasses import dataclass import json -from typing import Any, Iterable +from typing import Any, Iterable, TYPE_CHECKING -from .resources import TableResource +from .resources import DatabaseResource, TableResource +from .permissions import Resource from .utils import named_parameters, sqlite3, tilde_encode, urlsafe_components from .utils.asgi import Forbidden +from .utils.sql_analysis import Operation, SQLAnalysis + +if TYPE_CHECKING: + from .app import Datasette UNCHANGED = object() @@ -583,20 +588,94 @@ async def list_queries( ) -async def ensure_query_write_permissions( - datasette: Any, - database: str, - sql: str, - *, - actor: dict[str, Any] | None = None, - params: dict[str, Any] | None = None, - analysis: Any = None, -) -> Any: +PermissionRequirement = tuple[str, Resource] + + +def permission_for_operation(operation: Operation) -> PermissionRequirement | None: write_actions = { "insert": "insert-row", "update": "update-row", "delete": "delete-row", } + action = write_actions.get(operation.operation) + if ( + action + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return ( + action, + TableResource(database=operation.database, table=operation.table), + ) + if operation.operation == "create" and operation.target_type == "table": + if operation.database is None: + return None + return ( + "create-table", + DatabaseResource(database=operation.database), + ) + if ( + operation.operation == "alter" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return ( + "alter-table", + TableResource(database=operation.database, table=operation.table), + ) + if ( + operation.operation == "drop" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return ( + "drop-table", + TableResource(database=operation.database, table=operation.table), + ) + if ( + operation.operation in {"create", "drop"} + and operation.target_type == "index" + and operation.database is not None + and operation.table is not None + ): + return ( + "alter-table", + TableResource(database=operation.database, table=operation.table), + ) + return None + + +def operation_is_write(operation: Operation) -> bool: + return operation.operation in { + "insert", + "update", + "delete", + "create", + "alter", + "drop", + "begin", + "commit", + "rollback", + "attach", + "detach", + "pragma", + "analyze", + "reindex", + } + + +async def ensure_query_write_permissions( + datasette: Datasette, + database: str, + sql: str, + *, + actor: dict[str, object] | None = None, + params: dict[str, object] | None = None, + analysis: SQLAnalysis | None = None, +) -> SQLAnalysis: db = datasette.get_database(database) if analysis is None: if params is None: @@ -606,18 +685,38 @@ async def ensure_query_write_permissions( except sqlite3.DatabaseError as ex: raise Forbidden(f"Could not analyze query: {ex}") from ex - for access in analysis.table_accesses: - action = write_actions.get(access.operation) - if action is None: + has_semantic_schema_operation = any( + operation.operation in {"create", "alter", "drop"} + and operation.target_type in {"table", "index", "view", "trigger"} + for operation in analysis.operations + ) + for operation in analysis.operations: + if operation.internal and has_semantic_schema_operation: continue - if access.database != database: + if has_semantic_schema_operation and operation.operation in { + "read", + "insert", + "update", + "delete", + "reindex", + }: + continue + permission = permission_for_operation(operation) + if permission is None: + if operation_is_write(operation): + raise Forbidden( + "Unsupported SQL operation: {} {}".format( + operation.operation, operation.target_type + ) + ) + continue + action, resource = permission + if operation.database != database: raise Forbidden("Writable queries may not write to attached databases") if not await datasette.allowed( action=action, - resource=TableResource(database=access.database, table=access.table), + resource=resource, actor=actor, ): - raise Forbidden( - f"Permission denied: need {action} on {access.database}/{access.table}" - ) + raise Forbidden(f"Permission denied: need {action} on {resource}") return analysis diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index b5317b62..54f310fe 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -3,22 +3,66 @@ from typing import Literal from datasette.utils.sqlite import sqlite3 +SQLOperation = Literal[ + "read", + "insert", + "update", + "delete", + "create", + "alter", + "drop", + "begin", + "commit", + "rollback", + "attach", + "detach", + "pragma", + "analyze", + "reindex", +] +SQLTargetType = Literal[ + "table", + "index", + "view", + "trigger", + "schema", + "transaction", + "database", + "pragma", + "unknown", +] SQLTableOperation = Literal["read", "insert", "update", "delete"] @dataclass(frozen=True) -class SQLTableAccess: - operation: SQLTableOperation +class Operation: + operation: SQLOperation + target_type: SQLTargetType database: str | None - table: str + table: str | None sqlite_schema: str | None + target: str | None = None columns: tuple[str, ...] = () source: str | None = None + internal: bool = False @dataclass(frozen=True) class SQLAnalysis: - table_accesses: tuple[SQLTableAccess, ...] + operations: tuple[Operation, ...] + + +# Hashable dict key for grouping repeated authorizer callbacks while collecting columns. +@dataclass(frozen=True) +class OperationKey: + operation: SQLOperation + target_type: SQLTargetType + database: str | None + table: str | None + sqlite_schema: str | None + target: str | None + source: str | None + internal: bool _ACTION_TO_OPERATION: dict[int, SQLTableOperation] = { @@ -28,6 +72,36 @@ _ACTION_TO_OPERATION: dict[int, SQLTableOperation] = { sqlite3.SQLITE_DELETE: "delete", } +# Values are (operation, target_type) pairs used to construct Operation objects. +_CREATE_ACTIONS = { + sqlite3.SQLITE_CREATE_INDEX: ("create", "index"), + sqlite3.SQLITE_CREATE_TABLE: ("create", "table"), + sqlite3.SQLITE_CREATE_TRIGGER: ("create", "trigger"), + sqlite3.SQLITE_CREATE_VIEW: ("create", "view"), +} +_DROP_ACTIONS = { + sqlite3.SQLITE_DROP_INDEX: ("drop", "index"), + sqlite3.SQLITE_DROP_TABLE: ("drop", "table"), + sqlite3.SQLITE_DROP_TRIGGER: ("drop", "trigger"), + sqlite3.SQLITE_DROP_VIEW: ("drop", "view"), +} +for action_name, operation, target_type in ( + ("SQLITE_CREATE_TEMP_INDEX", "create", "index"), + ("SQLITE_CREATE_TEMP_TABLE", "create", "table"), + ("SQLITE_CREATE_TEMP_TRIGGER", "create", "trigger"), + ("SQLITE_CREATE_TEMP_VIEW", "create", "view"), + ("SQLITE_DROP_TEMP_INDEX", "drop", "index"), + ("SQLITE_DROP_TEMP_TABLE", "drop", "table"), + ("SQLITE_DROP_TEMP_TRIGGER", "drop", "trigger"), + ("SQLITE_DROP_TEMP_VIEW", "drop", "view"), +): + action_value = getattr(sqlite3, action_name, None) + if action_value is not None: + actions = _CREATE_ACTIONS if operation == "create" else _DROP_ACTIONS + actions[action_value] = (operation, target_type) + +_SQLITE_SCHEMA_TABLES = {"sqlite_master", "sqlite_schema"} + def analyze_sql_tables( conn, @@ -38,15 +112,13 @@ def analyze_sql_tables( schema_to_database: dict[str, str] | None = None, ) -> SQLAnalysis: """ - Return tables accessed by a SQL statement according to SQLite's authorizer. + Return operations performed by a SQL statement according to SQLite's authorizer. This function is synchronous and connection-based. It temporarily installs a - SQLite authorizer, prepares ``EXPLAIN ``, and returns the table access + SQLite authorizer, prepares ``EXPLAIN ``, and returns the operation callbacks observed while SQLite compiles the statement. """ - accesses: dict[ - tuple[SQLTableOperation, str | None, str, str | None, str | None], set[str] - ] = {} + operations: dict[OperationKey, set[str]] = {} def database_for_schema(sqlite_schema): if schema_to_database and sqlite_schema in schema_to_database: @@ -55,21 +127,166 @@ def analyze_sql_tables( return database_name return sqlite_schema + def record( + operation: SQLOperation, + target_type: SQLTargetType, + *, + database: str | None, + table: str | None, + sqlite_schema: str | None, + target: str | None, + source: str | None, + column: str | None = None, + internal: bool = False, + ): + key = OperationKey( + operation=operation, + target_type=target_type, + database=database, + table=table, + sqlite_schema=sqlite_schema, + target=target, + source=source, + internal=internal, + ) + columns = operations.setdefault(key, set()) + if column is not None: + columns.add(column) + def authorizer(action, arg1, arg2, sqlite_schema, source): operation = _ACTION_TO_OPERATION.get(action) - if operation is None or arg1 is None: + if operation is not None and arg1 is not None: + target_type = "schema" if arg1 in _SQLITE_SCHEMA_TABLES else "table" + column = ( + arg2 if operation in ("read", "update") and arg2 is not None else None + ) + record( + operation, + target_type, + database=database_for_schema(sqlite_schema), + table=arg1 if target_type == "table" else None, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + column=column, + internal=target_type == "schema", + ) + return sqlite3.SQLITE_OK + + create_operation = _CREATE_ACTIONS.get(action) + if create_operation is not None and arg1 is not None: + operation, target_type = create_operation + related_table = arg2 if target_type in {"index", "trigger"} else arg1 + record( + operation, + target_type, + database=database_for_schema(sqlite_schema), + table=related_table, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + drop_operation = _DROP_ACTIONS.get(action) + if drop_operation is not None and arg1 is not None: + operation, target_type = drop_operation + related_table = arg2 if target_type in {"index", "trigger"} else arg1 + record( + operation, + target_type, + database=database_for_schema(sqlite_schema), + table=related_table, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_ALTER_TABLE and arg2 is not None: + record( + "alter", + "table", + database=database_for_schema(arg1), + table=arg2, + sqlite_schema=arg1, + target=arg2, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_TRANSACTION and arg1 is not None: + record( + arg1.lower(), + "transaction", + database=None, + table=None, + sqlite_schema=None, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_ATTACH and arg1 is not None: + record( + "attach", + "database", + database=None, + table=None, + sqlite_schema=None, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_DETACH and arg1 is not None: + record( + "detach", + "database", + database=None, + table=None, + sqlite_schema=None, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_PRAGMA and arg1 is not None: + record( + "pragma", + "pragma", + database=None, + table=None, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_ANALYZE: + record( + "analyze", + "database" if arg1 is None else "table", + database=database_for_schema(sqlite_schema), + table=arg1, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_REINDEX and arg1 is not None: + record( + "reindex", + "index", + database=database_for_schema(sqlite_schema), + table=None, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) return sqlite3.SQLITE_OK - key = ( - operation, - database_for_schema(sqlite_schema), - arg1, - sqlite_schema, - source, - ) - columns = accesses.setdefault(key, set()) - if operation in ("read", "update") and arg2 is not None: - columns.add(arg2) return sqlite3.SQLITE_OK conn.set_authorizer(authorizer) @@ -78,22 +295,26 @@ def analyze_sql_tables( finally: conn.set_authorizer(None) + has_schema_operation = any( + key.target_type in {"table", "index", "view", "trigger"} + and key.operation in {"create", "alter", "drop"} + for key in operations + ) + return SQLAnalysis( - table_accesses=tuple( - SQLTableAccess( - operation=operation, - database=database, - table=table, - sqlite_schema=sqlite_schema, + operations=tuple( + Operation( + operation=key.operation, + target_type=key.target_type, + database=key.database, + table=key.table, + sqlite_schema=key.sqlite_schema, + target=key.target, columns=tuple(sorted(columns)), - source=source, + source=key.source, + internal=key.internal + or (has_schema_operation and key.target_type == "schema"), ) - for ( - operation, - database, - table, - sqlite_schema, - source, - ), columns in accesses.items() + for key, columns in operations.items() ) ) diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index 0054300c..cead8926 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -193,9 +193,12 @@ class ExecuteWriteView(BaseView): status=400, ) - message = "Query executed, {} row{} affected".format( - cursor.rowcount, "" if cursor.rowcount == 1 else "s" - ) + if cursor.rowcount == -1: + message = "Query executed" + else: + message = "Query executed, {} row{} affected".format( + cursor.rowcount, "" if cursor.rowcount == 1 else "s" + ) if _wants_json(request, is_json, data): return _block_framing( Response.json( diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 46d71b8e..922f4e52 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -1,8 +1,12 @@ import json import re -from datasette.resources import DatabaseResource, TableResource -from datasette.stored_queries import StoredQuery +from datasette.resources import DatabaseResource +from datasette.stored_queries import ( + StoredQuery, + operation_is_write, + permission_for_operation, +) from datasette.utils import ( named_parameters as derive_named_parameters, escape_sqlite, @@ -12,6 +16,7 @@ from datasette.utils import ( InvalidSql, ) from datasette.utils.asgi import Forbidden +from datasette.utils.sql_analysis import Operation, SQLAnalysis _query_name_re = re.compile(r"^[^/\.\n]+$") @@ -123,11 +128,8 @@ def _coerce_query_parameters(value, derived): return parameters -def _analysis_is_write(analysis): - return any( - access.operation in {"insert", "update", "delete"} - for access in analysis.table_accesses - ) +def _analysis_is_write(analysis: SQLAnalysis) -> bool: + return any(operation_is_write(operation) for operation in analysis.operations) def _block_framing(response): @@ -201,34 +203,66 @@ async def _analyze_user_query(datasette, db, sql, *, actor): return is_write, derived, analysis -def _analysis_rows(analysis): - write_actions = { - "insert": "insert-row", - "update": "update-row", - "delete": "delete-row", - } - return [ - { - "operation": access.operation, - "database": access.database, - "table": access.table, - "required_permission": write_actions.get(access.operation, ""), - "source": access.source, - } - for access in analysis.table_accesses - ] +def _semantic_schema_operation_is_present(operations: tuple[Operation, ...]) -> bool: + return any( + operation.operation in {"create", "alter", "drop"} + and operation.target_type in {"table", "index", "view", "trigger"} + for operation in operations + ) -async def _analysis_rows_with_permissions(datasette, analysis, actor): +def _display_operations(analysis: SQLAnalysis) -> list[Operation]: + has_semantic_schema_operation = _semantic_schema_operation_is_present( + analysis.operations + ) + operations = [] + for operation in analysis.operations: + if operation.internal and has_semantic_schema_operation: + continue + if has_semantic_schema_operation and operation.operation in { + "read", + "insert", + "update", + "delete", + "reindex", + }: + continue + operations.append(operation) + return operations + + +def _analysis_rows(analysis: SQLAnalysis) -> list[dict[str, object]]: + rows = [] + for operation in _display_operations(analysis): + permission = permission_for_operation(operation) + required_permission = permission[0] if permission else "" + rows.append( + { + "operation": operation.operation, + "database": operation.database, + "table": operation.table or operation.target, + "required_permission": required_permission, + "source": operation.source, + } + ) + return rows + + +async def _analysis_rows_with_permissions( + datasette, analysis: SQLAnalysis, actor +) -> list[dict[str, object]]: rows = _analysis_rows(analysis) - for row in rows: - permission = row["required_permission"] + for row, operation in zip(rows, _display_operations(analysis)): + permission = permission_for_operation(operation) if permission: + action, resource = permission row["allowed"] = await datasette.allowed( - action=permission, - resource=TableResource(row["database"], row["table"]), + action=action, + resource=resource, actor=actor, ) + elif operation_is_write(operation): + row["allowed"] = False else: row["allowed"] = None return rows @@ -398,15 +432,19 @@ async def _inserted_row_url(datasette, db, analysis, cursor): if lastrowid is None: return None direct_inserts = [ - access - for access in analysis.table_accesses - if access.operation == "insert" - and access.source is None - and access.database == db.name + operation + for operation in analysis.operations + if operation.operation == "insert" + and operation.target_type == "table" + and not operation.internal + and operation.source is None + and operation.database == db.name ] if len(direct_inserts) != 1: return None table = direct_inserts[0].table + if table is None: + return None pks = await db.primary_keys(table) use_rowid = not pks select = ( diff --git a/tests/test_actions_sql.py b/tests/test_actions_sql.py index 863d2529..a1fca971 100644 --- a/tests/test_actions_sql.py +++ b/tests/test_actions_sql.py @@ -12,10 +12,22 @@ import pytest import pytest_asyncio from datasette.app import Datasette from datasette.permissions import PermissionSQL -from datasette.resources import TableResource +from datasette.resources import DatabaseResource, QueryResource, TableResource from datasette import hookimpl +def test_resource_string_representations(): + assert str(DatabaseResource("content")) == "content" + assert repr(DatabaseResource("content")) == ( + "DatabaseResource(parent='content', child=None)" + ) + assert str(TableResource("content", "dogs")) == "content/dogs" + assert repr(TableResource("content", "dogs")) == ( + "TableResource(parent='content', child='dogs')" + ) + assert str(QueryResource("content", "insert-a-dog")) == "content/insert-a-dog" + + # Test plugin that provides permission rules class PermissionRulesPlugin: def __init__(self, rules_callback): diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 5481a398..d6e130b4 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -698,14 +698,17 @@ async def test_analyze_sql(): assert [ ( - access.operation, - access.database, - access.sqlite_schema, - access.table, - access.columns, - access.source, + operation.operation, + operation.database, + operation.sqlite_schema, + operation.table, + operation.columns, + operation.source, ) - for access in analysis.table_accesses + for operation in analysis.operations + if operation.target_type == "table" + and operation.operation in {"read", "insert", "update", "delete"} + and not operation.internal ] == [ ("read", "data", "main", "dogs", ("id", "name"), None), ] @@ -722,14 +725,17 @@ async def test_analyze_sql_insert_select(): assert { ( - access.operation, - access.database, - access.sqlite_schema, - access.table, - access.columns, - access.source, + operation.operation, + operation.database, + operation.sqlite_schema, + operation.table, + operation.columns, + operation.source, ) - for access in analysis.table_accesses + for operation in analysis.operations + if operation.target_type == "table" + and operation.operation in {"read", "insert", "update", "delete"} + and not operation.internal } == { ("insert", "data", "main", "dogs", (), None), ("read", "data", "main", "cats", ("name",), None), diff --git a/tests/test_queries.py b/tests/test_queries.py index 59fab8c0..4b8a6486 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1643,6 +1643,172 @@ async def test_execute_write_post_requires_database_and_table_permissions(): assert (await db.execute("select name from dogs")).first()[0] == "Cleo" +@pytest.mark.asyncio +async def test_execute_write_create_table_uses_create_table_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "permissions": { + "insert-row": {"id": "row-writer"}, + "update-row": {"id": "row-writer"}, + }, + "databases": { + "data": { + "permissions": { + "view-database": {"id": ["creator", "row-writer"]}, + "execute-write-sql": {"id": ["creator", "row-writer"]}, + "create-table": {"id": "creator"}, + } + } + }, + }, + ) + db = ds.add_memory_database("execute_write_create_table", name="data") + await ds.invoke_startup() + + analysis_response = await ds.client.get( + "/data/-/execute-write/analyze", + actor={"id": "creator"}, + params={"sql": "create table foobar (id integer primary key, name text)"}, + ) + allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "creator"}, + json={"sql": "create table foobar (id integer primary key, name text)"}, + ) + row_permission_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "row-writer"}, + json={"sql": "create table should_not_exist (id integer primary key)"}, + ) + + assert analysis_response.status_code == 200 + analysis_data = analysis_response.json() + assert analysis_data["ok"] is True + assert analysis_data["execute_disabled"] is False + assert analysis_data["analysis_rows"] == [ + { + "operation": "create", + "database": "data", + "table": "foobar", + "required_permission": "create-table", + "source": None, + "allowed": True, + } + ] + + assert allowed_response.status_code == 200 + assert allowed_response.json()["ok"] is True + assert allowed_response.json()["message"] == "Query executed" + assert await db.table_exists("foobar") + + assert row_permission_response.status_code == 403 + assert row_permission_response.json()["errors"] == [ + "Permission denied: need create-table on data" + ] + assert not await db.table_exists("should_not_exist") + + +@pytest.mark.asyncio +async def test_execute_write_alter_and_drop_table_use_schema_permissions(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "permissions": { + "delete-row": {"id": "row-writer"}, + "update-row": {"id": "row-writer"}, + }, + "databases": { + "data": { + "permissions": { + "view-database": {"id": ["alterer", "dropper", "row-writer"]}, + "execute-write-sql": { + "id": ["alterer", "dropper", "row-writer"] + }, + }, + "tables": { + "dogs": { + "permissions": { + "alter-table": {"id": "alterer"}, + "drop-table": {"id": "dropper"}, + } + } + }, + } + }, + }, + ) + db = ds.add_memory_database("execute_write_alter_drop_table", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await db.execute_write("create table cats (id integer primary key, name text)") + await ds.invoke_startup() + + alter_allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "alterer"}, + json={"sql": "alter table dogs add column age integer"}, + ) + alter_row_permission_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "row-writer"}, + json={"sql": "alter table cats add column age integer"}, + ) + + assert alter_allowed_response.status_code == 200 + assert "age" in [column.name for column in await db.table_column_details("dogs")] + assert alter_row_permission_response.status_code == 403 + assert alter_row_permission_response.json()["errors"] == [ + "Permission denied: need alter-table on data/cats" + ] + assert "age" not in [ + column.name for column in await db.table_column_details("cats") + ] + + create_index_allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "alterer"}, + json={"sql": "create index idx_dogs_name on dogs(name)"}, + ) + create_index_row_permission_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "row-writer"}, + json={"sql": "create index idx_cats_name on cats(name)"}, + ) + drop_index_allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "alterer"}, + json={"sql": "drop index idx_dogs_name"}, + ) + + assert create_index_allowed_response.status_code == 200 + assert create_index_row_permission_response.status_code == 403 + assert create_index_row_permission_response.json()["errors"] == [ + "Permission denied: need alter-table on data/cats" + ] + assert drop_index_allowed_response.status_code == 200 + + drop_allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "dropper"}, + json={"sql": "drop table dogs"}, + ) + drop_row_permission_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "row-writer"}, + json={"sql": "drop table cats"}, + ) + + assert drop_allowed_response.status_code == 200 + assert not await db.table_exists("dogs") + assert drop_row_permission_response.status_code == 403 + assert drop_row_permission_response.json()["errors"] == [ + "Permission denied: need drop-table on data/cats" + ] + assert await db.table_exists("cats") + + @pytest.mark.asyncio async def test_execute_write_insert_links_to_inserted_row(): ds = Datasette(memory=True, default_deny=True) diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index 5730cd0d..5306a515 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -26,17 +26,20 @@ def conn(): conn.close() -def as_tuples(analysis): +def table_operation_tuples(analysis): return [ ( - access.operation, - access.database, - access.sqlite_schema, - access.table, - access.columns, - access.source, + operation.operation, + operation.database, + operation.sqlite_schema, + operation.table, + operation.columns, + operation.source, ) - for access in analysis.table_accesses + for operation in analysis.operations + if operation.target_type == "table" + and operation.operation in {"read", "insert", "update", "delete"} + and not operation.internal ] @@ -48,7 +51,7 @@ def test_analyze_select_tables(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("read", "data", "main", "cats", ("id", "name"), None), ("read", "data", "main", "dogs", ("age", "id", "name"), None), } @@ -57,11 +60,73 @@ def test_analyze_select_tables(conn): def test_analyze_uses_sqlite_schema_as_default_database(conn): analysis = analyze_sql_tables(conn, "select name from dogs") - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("read", "main", "main", "dogs", ("name",), None), } +def operation_dict(operation): + return { + "operation": operation.operation, + "target_type": operation.target_type, + "database": operation.database, + "sqlite_schema": operation.sqlite_schema, + "table": operation.table, + "target": operation.target, + "columns": operation.columns, + "source": operation.source, + "internal": operation.internal, + } + + +def test_analyze_create_table_operation(): + conn = sqlite3.connect(":memory:") + try: + analysis = analyze_sql_tables( + conn, + "create table foobar (id integer primary key, name text)", + database_name="data", + ) + finally: + conn.close() + + assert { + "operation": "create", + "target_type": "table", + "database": "data", + "sqlite_schema": "main", + "table": "foobar", + "target": "foobar", + "columns": (), + "source": None, + "internal": False, + } in [operation_dict(operation) for operation in analysis.operations] + assert not [ + operation + for operation in analysis.operations + if operation.table in {"sqlite_master", "sqlite_schema"} + and not operation.internal + ] + + +def test_analyze_transaction_operation(conn): + analysis = analyze_sql_tables(conn, "commit", database_name="data") + + assert [operation_dict(operation) for operation in analysis.operations] == [ + { + "operation": "commit", + "target_type": "transaction", + "database": None, + "sqlite_schema": None, + "table": None, + "target": "COMMIT", + "columns": (), + "source": None, + "internal": False, + } + ] + + def test_analyze_insert_tables(conn): analysis = analyze_sql_tables( conn, @@ -70,7 +135,7 @@ def test_analyze_insert_tables(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("insert", "data", "main", "dogs", (), None), ("read", "data", "main", "dogs", ("id", "name"), "dogs_after_insert"), ("update", "data", "main", "cats", ("name",), "dogs_after_insert"), @@ -87,7 +152,7 @@ def test_analyze_update_tables(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("update", "data", "main", "dogs", ("age",), None), ("read", "data", "main", "dogs", ("age", "name"), None), } @@ -101,7 +166,7 @@ def test_analyze_delete_tables(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("delete", "data", "main", "dogs", (), None), ("read", "data", "main", "dogs", ("name",), None), } @@ -121,7 +186,7 @@ def test_analyze_insert_select_with_cte(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("insert", "data", "main", "cats", (), None), ("read", "data", "main", "dogs", ("age", "name"), "old_dogs"), } @@ -135,7 +200,7 @@ def test_analyze_view_with_instead_of_trigger(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("update", "data", "main", "dog_names", ("name",), None), ("read", "data", "main", "dogs", ("id", "name"), "dog_names"), ("read", "data", "main", "dog_names", ("id", "name"), "dog_names"), @@ -163,7 +228,7 @@ def test_analyze_attached_database_tables(conn): schema_to_database={"extra": "extra_db"}, ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("insert", "extra_db", "extra", "people", (), None), ("read", "data", "main", "dogs", ("name",), None), } From 86d0e7335f98a88874df31ec0adb64967446dfac Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 14:52:52 -0700 Subject: [PATCH 276/299] Deny unsupported write SQL operations by default Require view-table permission for reads discovered inside write SQL analysis, including INSERT ... SELECT and CREATE TABLE ... AS SELECT. Record additional SQLite authorizer callbacks as Operation values so unsupported functions, savepoints, virtual table DDL, and unknown callbacks are denied unless explicitly handled. --- datasette/stored_queries.py | 43 +++---- datasette/utils/sql_analysis.py | 192 +++++++++++++++++++++++++++++-- datasette/views/execute_write.py | 4 +- datasette/views/query_helpers.py | 32 ++---- tests/test_queries.py | 136 ++++++++++++++++++++-- tests/test_utils_sql_analysis.py | 94 +++++++++++++++ 6 files changed, 433 insertions(+), 68 deletions(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index c4b083e5..4b0fe6a6 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -592,6 +592,16 @@ PermissionRequirement = tuple[str, Resource] def permission_for_operation(operation: Operation) -> PermissionRequirement | None: + if ( + operation.operation == "read" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return ( + "view-table", + TableResource(database=operation.database, table=operation.table), + ) write_actions = { "insert": "insert-row", "update": "update-row", @@ -648,6 +658,10 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No return None +def operation_should_be_ignored(operation: Operation) -> bool: + return operation.internal or operation.operation == "select" + + def operation_is_write(operation: Operation) -> bool: return operation.operation in { "insert", @@ -659,11 +673,13 @@ def operation_is_write(operation: Operation) -> bool: "begin", "commit", "rollback", + "savepoint", "attach", "detach", "pragma", "analyze", "reindex", + "unknown", } @@ -685,34 +701,19 @@ async def ensure_query_write_permissions( except sqlite3.DatabaseError as ex: raise Forbidden(f"Could not analyze query: {ex}") from ex - has_semantic_schema_operation = any( - operation.operation in {"create", "alter", "drop"} - and operation.target_type in {"table", "index", "view", "trigger"} - for operation in analysis.operations - ) for operation in analysis.operations: - if operation.internal and has_semantic_schema_operation: - continue - if has_semantic_schema_operation and operation.operation in { - "read", - "insert", - "update", - "delete", - "reindex", - }: + if operation_should_be_ignored(operation): continue permission = permission_for_operation(operation) if permission is None: - if operation_is_write(operation): - raise Forbidden( - "Unsupported SQL operation: {} {}".format( - operation.operation, operation.target_type - ) + raise Forbidden( + "Unsupported SQL operation: {} {}".format( + operation.operation, operation.target_type ) - continue + ) action, resource = permission if operation.database != database: - raise Forbidden("Writable queries may not write to attached databases") + raise Forbidden("Writable queries may not access attached databases") if not await datasette.allowed( action=action, resource=resource, diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index 54f310fe..8963da77 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -8,30 +8,39 @@ SQLOperation = Literal[ "insert", "update", "delete", + "select", + "function", "create", "alter", "drop", "begin", "commit", "rollback", + "savepoint", "attach", "detach", "pragma", "analyze", "reindex", + "unknown", ] SQLTargetType = Literal[ "table", "index", "view", "trigger", + "virtual-table", "schema", + "statement", "transaction", "database", "pragma", + "function", "unknown", ] SQLTableOperation = Literal["read", "insert", "update", "delete"] +SQLSchemaOperation = Literal["create", "drop"] +SQLSchemaTargetType = Literal["index", "table", "trigger", "view", "virtual-table"] @dataclass(frozen=True) @@ -73,19 +82,34 @@ _ACTION_TO_OPERATION: dict[int, SQLTableOperation] = { } # Values are (operation, target_type) pairs used to construct Operation objects. -_CREATE_ACTIONS = { +_CREATE_ACTIONS: dict[int, tuple[SQLSchemaOperation, SQLSchemaTargetType]] = { sqlite3.SQLITE_CREATE_INDEX: ("create", "index"), sqlite3.SQLITE_CREATE_TABLE: ("create", "table"), sqlite3.SQLITE_CREATE_TRIGGER: ("create", "trigger"), sqlite3.SQLITE_CREATE_VIEW: ("create", "view"), } -_DROP_ACTIONS = { +_DROP_ACTIONS: dict[int, tuple[SQLSchemaOperation, SQLSchemaTargetType]] = { sqlite3.SQLITE_DROP_INDEX: ("drop", "index"), sqlite3.SQLITE_DROP_TABLE: ("drop", "table"), sqlite3.SQLITE_DROP_TRIGGER: ("drop", "trigger"), sqlite3.SQLITE_DROP_VIEW: ("drop", "view"), } -for action_name, operation, target_type in ( + + +def _add_schema_action( + action_name: str, + operation: SQLSchemaOperation, + target_type: SQLSchemaTargetType, +) -> None: + action_value = getattr(sqlite3, action_name, None) + if action_value is not None: + actions = _CREATE_ACTIONS if operation == "create" else _DROP_ACTIONS + actions[action_value] = (operation, target_type) + + +_TEMP_SCHEMA_ACTIONS: tuple[ + tuple[str, SQLSchemaOperation, SQLSchemaTargetType], ... +] = ( ("SQLITE_CREATE_TEMP_INDEX", "create", "index"), ("SQLITE_CREATE_TEMP_TABLE", "create", "table"), ("SQLITE_CREATE_TEMP_TRIGGER", "create", "trigger"), @@ -94,13 +118,76 @@ for action_name, operation, target_type in ( ("SQLITE_DROP_TEMP_TABLE", "drop", "table"), ("SQLITE_DROP_TEMP_TRIGGER", "drop", "trigger"), ("SQLITE_DROP_TEMP_VIEW", "drop", "view"), -): - action_value = getattr(sqlite3, action_name, None) - if action_value is not None: - actions = _CREATE_ACTIONS if operation == "create" else _DROP_ACTIONS - actions[action_value] = (operation, target_type) +) +for schema_action in _TEMP_SCHEMA_ACTIONS: + _add_schema_action(*schema_action) -_SQLITE_SCHEMA_TABLES = {"sqlite_master", "sqlite_schema"} +_VTABLE_SCHEMA_ACTIONS: tuple[ + tuple[str, SQLSchemaOperation, SQLSchemaTargetType], ... +] = ( + ("SQLITE_CREATE_VTABLE", "create", "virtual-table"), + ("SQLITE_DROP_VTABLE", "drop", "virtual-table"), +) +for schema_action in _VTABLE_SCHEMA_ACTIONS: + _add_schema_action(*schema_action) + +_SQLITE_SCHEMA_TABLES = { + "sqlite_master", + "sqlite_schema", + "sqlite_temp_master", + "sqlite_temp_schema", +} +_SQLITE_INTERNAL_SCHEMA_FUNCTIONS = { + "length", + "like", + "printf", + "sqlite_drop_column", + "sqlite_rename_column", + "sqlite_rename_quotefix", + "sqlite_rename_table", + "sqlite_rename_test", + "substr", +} + +_AUTHORIZER_ACTION_NAMES = { + getattr(sqlite3, name): name + for name in ( + "SQLITE_CREATE_INDEX", + "SQLITE_CREATE_TABLE", + "SQLITE_CREATE_TEMP_INDEX", + "SQLITE_CREATE_TEMP_TABLE", + "SQLITE_CREATE_TEMP_TRIGGER", + "SQLITE_CREATE_TEMP_VIEW", + "SQLITE_CREATE_TRIGGER", + "SQLITE_CREATE_VIEW", + "SQLITE_DELETE", + "SQLITE_DROP_INDEX", + "SQLITE_DROP_TABLE", + "SQLITE_DROP_TEMP_INDEX", + "SQLITE_DROP_TEMP_TABLE", + "SQLITE_DROP_TEMP_TRIGGER", + "SQLITE_DROP_TEMP_VIEW", + "SQLITE_DROP_TRIGGER", + "SQLITE_DROP_VIEW", + "SQLITE_INSERT", + "SQLITE_PRAGMA", + "SQLITE_READ", + "SQLITE_SELECT", + "SQLITE_TRANSACTION", + "SQLITE_UPDATE", + "SQLITE_ATTACH", + "SQLITE_DETACH", + "SQLITE_ALTER_TABLE", + "SQLITE_REINDEX", + "SQLITE_ANALYZE", + "SQLITE_CREATE_VTABLE", + "SQLITE_DROP_VTABLE", + "SQLITE_FUNCTION", + "SQLITE_SAVEPOINT", + "SQLITE_RECURSIVE", + ) + if hasattr(sqlite3, name) +} def analyze_sql_tables( @@ -287,6 +374,52 @@ def analyze_sql_tables( ) return sqlite3.SQLITE_OK + if action == sqlite3.SQLITE_SELECT: + record( + "select", + "statement", + database=None, + table=None, + sqlite_schema=sqlite_schema, + target=None, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_FUNCTION and arg2 is not None: + record( + "function", + "function", + database=None, + table=None, + sqlite_schema=sqlite_schema, + target=arg2, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_SAVEPOINT and arg1 is not None: + record( + "savepoint", + "transaction", + database=None, + table=None, + sqlite_schema=sqlite_schema, + target="{} {}".format(arg1, arg2) if arg2 is not None else arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + action_name = _AUTHORIZER_ACTION_NAMES.get(action, "SQLITE_{}".format(action)) + record( + "unknown", + "unknown", + database=database_for_schema(sqlite_schema), + table=None, + sqlite_schema=sqlite_schema, + target=action_name, + source=source, + ) return sqlite3.SQLITE_OK conn.set_authorizer(authorizer) @@ -296,10 +429,46 @@ def analyze_sql_tables( conn.set_authorizer(None) has_schema_operation = any( - key.target_type in {"table", "index", "view", "trigger"} + key.target_type in {"table", "index", "view", "trigger", "virtual-table"} and key.operation in {"create", "alter", "drop"} for key in operations ) + dropped_tables = { + (key.database, key.table) + for key in operations + if key.operation == "drop" and key.target_type == "table" + } + + def key_is_drop_table_delete(key: OperationKey) -> bool: + return ( + key.operation == "delete" + and key.target_type == "table" + and (key.database, key.table) in dropped_tables + ) + + has_user_table_access_in_schema_operation = any( + key.operation in {"read", "insert", "update", "delete"} + and key.target_type == "table" + and not key.internal + and not key_is_drop_table_delete(key) + for key in operations + ) + + def operation_is_internal(key: OperationKey) -> bool: + if key.internal or (has_schema_operation and key.target_type == "schema"): + return True + if has_schema_operation and key.operation == "reindex": + return True + if ( + has_schema_operation + and not has_user_table_access_in_schema_operation + and key.operation == "function" + and key.target in _SQLITE_INTERNAL_SCHEMA_FUNCTIONS + ): + return True + if key_is_drop_table_delete(key): + return True + return False return SQLAnalysis( operations=tuple( @@ -312,8 +481,7 @@ def analyze_sql_tables( target=key.target, columns=tuple(sorted(columns)), source=key.source, - internal=key.internal - or (has_schema_operation and key.target_type == "schema"), + internal=operation_is_internal(key), ) for key, columns in operations.items() ) diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index cead8926..19006ac5 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -99,9 +99,7 @@ class ExecuteWriteView(BaseView): "parameter_names": parameter_names, "parameter_values": parameter_values, "analysis_error": analysis_error, - "analysis_rows": [ - row for row in analysis_rows if row["operation"] != "read" - ], + "analysis_rows": analysis_rows, "execution_message": execution_message, "execution_links": execution_links, "execution_ok": execution_ok, diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 922f4e52..05a0d73e 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -5,6 +5,7 @@ from datasette.resources import DatabaseResource from datasette.stored_queries import ( StoredQuery, operation_is_write, + operation_should_be_ignored, permission_for_operation, ) from datasette.utils import ( @@ -203,29 +204,10 @@ async def _analyze_user_query(datasette, db, sql, *, actor): return is_write, derived, analysis -def _semantic_schema_operation_is_present(operations: tuple[Operation, ...]) -> bool: - return any( - operation.operation in {"create", "alter", "drop"} - and operation.target_type in {"table", "index", "view", "trigger"} - for operation in operations - ) - - def _display_operations(analysis: SQLAnalysis) -> list[Operation]: - has_semantic_schema_operation = _semantic_schema_operation_is_present( - analysis.operations - ) operations = [] for operation in analysis.operations: - if operation.internal and has_semantic_schema_operation: - continue - if has_semantic_schema_operation and operation.operation in { - "read", - "insert", - "update", - "delete", - "reindex", - }: + if operation_should_be_ignored(operation): continue operations.append(operation) return operations @@ -252,6 +234,7 @@ async def _analysis_rows_with_permissions( datasette, analysis: SQLAnalysis, actor ) -> list[dict[str, object]]: rows = _analysis_rows(analysis) + is_write = _analysis_is_write(analysis) for row, operation in zip(rows, _display_operations(analysis)): permission = permission_for_operation(operation) if permission: @@ -261,7 +244,7 @@ async def _analysis_rows_with_permissions( resource=resource, actor=actor, ) - elif operation_is_write(operation): + elif is_write: row["allowed"] = False else: row["allowed"] = None @@ -360,7 +343,7 @@ async def _execute_write_analysis_data(datasette, db, sql, actor): "ok": analysis_error is None, "parameters": parameter_names, "analysis_error": analysis_error, - "analysis_rows": [row for row in analysis_rows if row["operation"] != "read"], + "analysis_rows": analysis_rows, "execute_disabled": bool( (not sql) or analysis_error @@ -374,6 +357,7 @@ async def _query_create_analysis_data(datasette, db, sql, actor): parameter_names = [] analysis_rows = [] analysis_error = None + analysis: SQLAnalysis | None = None if has_sql: try: parameter_names = _derived_query_parameters(sql) @@ -390,9 +374,7 @@ async def _query_create_analysis_data(datasette, db, sql, actor): "analysis_error": analysis_error, "analysis_rows": analysis_rows, "has_sql": has_sql, - "analysis_is_write": bool( - analysis_rows and any(row["required_permission"] for row in analysis_rows) - ), + "analysis_is_write": _analysis_is_write(analysis) if analysis else False, "save_disabled": bool( (not has_sql) or analysis_error diff --git a/tests/test_queries.py b/tests/test_queries.py index 4b8a6486..97ec973f 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1181,11 +1181,10 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): assert 'Required permission' in create_response.text assert 'Source' not in create_response.text assert "read" in create_response.text + assert "view-table" in create_response.text assert ( - create_response.text.count( - 'n/a' - ) - == 2 + 'n/a' + not in create_response.text ) assert create_response.text.index( 'value="Save query"' @@ -1255,9 +1254,9 @@ async def test_create_query_analyze_endpoint_uses_sql_only(): "operation": "read", "database": "data", "table": "dogs", - "required_permission": "", + "required_permission": "view-table", "source": None, - "allowed": None, + "allowed": True, } ] @@ -1375,7 +1374,8 @@ async def test_execute_write_get_prepopulates_without_executing(): assert 'Required permission' in response.text assert "insert" in response.text assert "update" in response.text - assert "read" not in response.text + assert "read" in response.text + assert "view-table" in response.text assert 'action="/data/-/execute-write"' in response.text assert "insert into dogs (name) values ('Cleo')" in response.text assert (await db.execute("select count(*) from dogs")).first()[0] == 0 @@ -1643,6 +1643,127 @@ async def test_execute_write_post_requires_database_and_table_permissions(): assert (await db.execute("select name from dogs")).first()[0] == "Cleo" +@pytest.mark.asyncio +async def test_execute_write_insert_select_requires_view_table_on_source(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "secret": { + "permissions": {"view-table": {"id": "someone-else"}} + }, + "public_log": {"permissions": {"insert-row": {"id": "writer"}}}, + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_insert_select_source", name="data") + await db.execute_write("create table secret (value text)") + await db.execute_write("create table public_log (value text)") + await db.execute_write("insert into secret values ('sensitive')") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "insert into public_log(value) select value from secret"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need view-table on data/secret" + ] + assert (await db.execute("select value from public_log")).dicts() == [] + + +@pytest.mark.asyncio +async def test_execute_write_create_table_as_select_requires_view_table_on_source(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "creator"}, + "execute-write-sql": {"id": "creator"}, + "create-table": {"id": "creator"}, + }, + "tables": { + "secret": { + "permissions": {"view-table": {"id": "someone-else"}} + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_create_as_select_source", name="data") + await db.execute_write("create table secret (value text)") + await db.execute_write("insert into secret values ('sensitive')") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "creator"}, + json={"sql": "create table copied_secret as select value from secret"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need view-table on data/secret" + ] + assert not await db.table_exists("copied_secret") + + +@pytest.mark.asyncio +async def test_execute_write_rejects_function_operations(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "dogs": { + "permissions": { + "insert-row": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_function_operation", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "insert into dogs (name) values (upper('cleo'))"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Unsupported SQL operation: function function" + ] + assert (await db.execute("select name from dogs")).dicts() == [] + + @pytest.mark.asyncio async def test_execute_write_create_table_uses_create_table_permission(): ds = Datasette( @@ -1733,6 +1854,7 @@ async def test_execute_write_alter_and_drop_table_use_schema_permissions(): "permissions": { "alter-table": {"id": "alterer"}, "drop-table": {"id": "dropper"}, + "view-table": {"id": "alterer"}, } } }, diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index 5306a515..2ae11502 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -127,6 +127,100 @@ def test_analyze_transaction_operation(conn): ] +def test_analyze_savepoint_operation(conn): + analysis = analyze_sql_tables(conn, "savepoint s", database_name="data") + + assert [operation_dict(operation) for operation in analysis.operations] == [ + { + "operation": "savepoint", + "target_type": "transaction", + "database": None, + "sqlite_schema": None, + "table": None, + "target": "BEGIN s", + "columns": (), + "source": None, + "internal": False, + } + ] + + +def test_analyze_function_operation(conn): + analysis = analyze_sql_tables( + conn, + "insert into dogs (name) values (upper(:name))", + {"name": "Cleo"}, + database_name="data", + ) + + assert { + ( + operation.operation, + operation.target_type, + operation.target, + operation.database, + operation.table, + ) + for operation in analysis.operations + } == { + ("insert", "table", "dogs", "data", "dogs"), + ("function", "function", "upper", None, None), + ("read", "table", "dogs", "data", "dogs"), + ("update", "table", "cats", "data", "cats"), + ("read", "table", "cats", "data", "cats"), + ("insert", "table", "log", "data", "log"), + } + + +def test_analyze_create_virtual_table_operation(): + conn = sqlite3.connect(":memory:") + try: + analysis = analyze_sql_tables( + conn, + "create virtual table docs using fts5(body)", + database_name="data", + ) + finally: + conn.close() + + assert { + "operation": "create", + "target_type": "virtual-table", + "database": "data", + "sqlite_schema": "main", + "table": "docs", + "target": "docs", + "columns": (), + "source": None, + "internal": False, + } in [operation_dict(operation) for operation in analysis.operations] + + +def test_analyze_create_table_as_select_function_is_not_internal(): + conn = sqlite3.connect(":memory:") + try: + conn.execute("create table secret(value text)") + analysis = analyze_sql_tables( + conn, + "create table copied as select substr(value, 1, 1) from secret", + database_name="data", + ) + finally: + conn.close() + + assert { + "operation": "function", + "target_type": "function", + "database": None, + "sqlite_schema": None, + "table": None, + "target": "substr", + "columns": (), + "source": None, + "internal": False, + } in [operation_dict(operation) for operation in analysis.operations] + + def test_analyze_insert_tables(conn): analysis = analyze_sql_tables( conn, From 03b2c66f6312b8317d87eb4c1326977f6f63b26d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 15:17:10 -0700 Subject: [PATCH 277/299] Require full row mutation permissions for raw SQL Raw SQL insert and update statements can have broader effects than their SQLite authorizer callbacks reveal. INSERT OR REPLACE and UPDATE OR REPLACE can delete conflicting rows while only surfacing insert or update operations. Expand table insert and update operations to require insert-row, update-row, and delete-row together. Keep delete operations mapped to delete-row, and update the analysis UI/API to report and evaluate multiple required permissions for a single operation. Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4559083539 --- datasette/stored_queries.py | 108 ++++++++++++----- datasette/views/query_helpers.py | 27 +++-- tests/test_queries.py | 200 ++++++++++++++++++++++++++++++- 3 files changed, 290 insertions(+), 45 deletions(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index 4b0fe6a6..cf44a9ff 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -588,10 +588,25 @@ async def list_queries( ) -PermissionRequirement = tuple[str, Resource] +@dataclass(frozen=True) +class PermissionRequirement: + action: str + resource: Resource -def permission_for_operation(operation: Operation) -> PermissionRequirement | None: +def row_mutation_requirements( + database: str, table: str +) -> tuple[PermissionRequirement, ...]: + resource = TableResource(database=database, table=table) + return tuple( + PermissionRequirement(action=action, resource=resource) + for action in ("insert-row", "update-row", "delete-row") + ) + + +def permission_requirements_for_operation( + operation: Operation, +) -> tuple[PermissionRequirement, ...]: if ( operation.operation == "read" and operation.target_type == "table" @@ -599,31 +614,45 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No and operation.table is not None ): return ( - "view-table", - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="view-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) - write_actions = { - "insert": "insert-row", - "update": "update-row", - "delete": "delete-row", - } - action = write_actions.get(operation.operation) if ( - action + operation.operation in {"insert", "update"} + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return row_mutation_requirements( + database=operation.database, + table=operation.table, + ) + if ( + operation.operation == "delete" and operation.target_type == "table" and operation.database is not None and operation.table is not None ): return ( - action, - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="delete-row", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) if operation.operation == "create" and operation.target_type == "table": if operation.database is None: - return None + return () return ( - "create-table", - DatabaseResource(database=operation.database), + PermissionRequirement( + action="create-table", + resource=DatabaseResource(database=operation.database), + ), ) if ( operation.operation == "alter" @@ -632,8 +661,12 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No and operation.table is not None ): return ( - "alter-table", - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="alter-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) if ( operation.operation == "drop" @@ -642,8 +675,12 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No and operation.table is not None ): return ( - "drop-table", - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="drop-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) if ( operation.operation in {"create", "drop"} @@ -652,10 +689,14 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No and operation.table is not None ): return ( - "alter-table", - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="alter-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) - return None + return () def operation_should_be_ignored(operation: Operation) -> bool: @@ -704,20 +745,23 @@ async def ensure_query_write_permissions( for operation in analysis.operations: if operation_should_be_ignored(operation): continue - permission = permission_for_operation(operation) - if permission is None: + permissions = permission_requirements_for_operation(operation) + if not permissions: raise Forbidden( "Unsupported SQL operation: {} {}".format( operation.operation, operation.target_type ) ) - action, resource = permission if operation.database != database: raise Forbidden("Writable queries may not access attached databases") - if not await datasette.allowed( - action=action, - resource=resource, - actor=actor, - ): - raise Forbidden(f"Permission denied: need {action} on {resource}") + for permission in permissions: + if not await datasette.allowed( + action=permission.action, + resource=permission.resource, + actor=actor, + ): + raise Forbidden( + f"Permission denied: need {permission.action} " + f"on {permission.resource}" + ) return analysis diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 05a0d73e..7f3ef1bc 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -6,7 +6,7 @@ from datasette.stored_queries import ( StoredQuery, operation_is_write, operation_should_be_ignored, - permission_for_operation, + permission_requirements_for_operation, ) from datasette.utils import ( named_parameters as derive_named_parameters, @@ -216,8 +216,10 @@ def _display_operations(analysis: SQLAnalysis) -> list[Operation]: def _analysis_rows(analysis: SQLAnalysis) -> list[dict[str, object]]: rows = [] for operation in _display_operations(analysis): - permission = permission_for_operation(operation) - required_permission = permission[0] if permission else "" + permissions = permission_requirements_for_operation(operation) + required_permission = ", ".join( + permission.action for permission in permissions + ) rows.append( { "operation": operation.operation, @@ -236,14 +238,17 @@ async def _analysis_rows_with_permissions( rows = _analysis_rows(analysis) is_write = _analysis_is_write(analysis) for row, operation in zip(rows, _display_operations(analysis)): - permission = permission_for_operation(operation) - if permission: - action, resource = permission - row["allowed"] = await datasette.allowed( - action=action, - resource=resource, - actor=actor, - ) + permissions = permission_requirements_for_operation(operation) + if permissions: + row["allowed"] = True + for permission in permissions: + if not await datasette.allowed( + action=permission.action, + resource=permission.resource, + actor=actor, + ): + row["allowed"] = False + break elif is_write: row["allowed"] = False else: diff --git a/tests/test_queries.py b/tests/test_queries.py index 97ec973f..fcd19d1c 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -508,6 +508,8 @@ async def test_analyze_write_query_requires_table_permissions(): "dogs": { "permissions": { "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, } } } @@ -1429,7 +1431,7 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): "operation": "insert", "database": "data", "table": "dogs", - "required_permission": "insert-row", + "required_permission": "insert-row, update-row, delete-row", "source": None, "allowed": True, } @@ -1627,6 +1629,40 @@ async def test_execute_write_post_requires_database_and_table_permissions(): } } } + missing_update_permission = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": "insert into dogs (name) values (:name)", + "params": {"name": "Cleo"}, + }, + ) + + assert missing_update_permission.status_code == 403 + assert missing_update_permission.json()["errors"] == [ + "Permission denied: need update-row on data/dogs" + ] + + ds.config["databases"]["data"]["tables"]["dogs"]["permissions"][ + "update-row" + ] = {"id": "writer"} + missing_delete_permission = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": "insert into dogs (name) values (:name)", + "params": {"name": "Cleo"}, + }, + ) + + assert missing_delete_permission.status_code == 403 + assert missing_delete_permission.json()["errors"] == [ + "Permission denied: need delete-row on data/dogs" + ] + + ds.config["databases"]["data"]["tables"]["dogs"]["permissions"][ + "delete-row" + ] = {"id": "writer"} allowed = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, @@ -1643,6 +1679,156 @@ async def test_execute_write_post_requires_database_and_table_permissions(): assert (await db.execute("select name from dogs")).first()[0] == "Cleo" +@pytest.mark.asyncio +async def test_execute_write_insert_or_replace_requires_delete_row_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "users": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "view-table": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_insert_or_replace", name="data") + await db.execute_write( + "create table users (id integer primary key, email text unique)" + ) + await db.execute_write( + "insert into users (id, email) values " + "(1, 'a@example.com'), (2, 'b@example.com')" + ) + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": ( + "insert or replace into users(id, email) " + "values (3, 'b@example.com')" + ) + }, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need delete-row on data/users" + ] + assert (await db.execute("select id, email from users order by id")).dicts() == [ + {"id": 1, "email": "a@example.com"}, + {"id": 2, "email": "b@example.com"}, + ] + + +@pytest.mark.asyncio +async def test_execute_write_update_or_replace_requires_delete_row_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "users": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "view-table": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_update_or_replace", name="data") + await db.execute_write( + "create table users (id integer primary key, email text unique)" + ) + await db.execute_write( + "insert into users (id, email) values " + "(1, 'a@example.com'), (2, 'b@example.com')" + ) + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "update or replace users set email = 'b@example.com' where id = 1"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need delete-row on data/users" + ] + assert (await db.execute("select id, email from users order by id")).dicts() == [ + {"id": 1, "email": "a@example.com"}, + {"id": 2, "email": "b@example.com"}, + ] + + +@pytest.mark.asyncio +async def test_execute_write_update_requires_insert_row_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "users": { + "permissions": { + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, + "view-table": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_update_requires_insert", name="data") + await db.execute_write("create table users (id integer primary key, name text)") + await db.execute_write("insert into users (id, name) values (1, 'Alice')") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "update users set name = 'Alicia' where id = 1"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need insert-row on data/users" + ] + assert (await db.execute("select name from users where id = 1")).first()[0] == "Alice" + + @pytest.mark.asyncio async def test_execute_write_insert_select_requires_view_table_on_source(): ds = Datasette( @@ -1659,7 +1845,13 @@ async def test_execute_write_insert_select_requires_view_table_on_source(): "secret": { "permissions": {"view-table": {"id": "someone-else"}} }, - "public_log": {"permissions": {"insert-row": {"id": "writer"}}}, + "public_log": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, + } + }, }, } } @@ -1740,6 +1932,8 @@ async def test_execute_write_rejects_function_operations(): "dogs": { "permissions": { "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, } } }, @@ -2117,6 +2311,8 @@ async def test_user_writable_query_execution_rechecks_table_permissions(): "dogs": { "permissions": { "insert-row": {"id": "alice"}, + "update-row": {"id": "alice"}, + "delete-row": {"id": "alice"}, } } }, From 1932f8429fd3259d48fb848fdf893f9a004276e9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 16:14:50 -0700 Subject: [PATCH 278/299] Deny user-authored schema table reads in write SQL Stop marking sqlite_master and sqlite_schema reads as internal as soon as the SQLite authorizer reports them. The later DDL-aware pass still treats schema catalog access as internal when it accompanies semantic CREATE, ALTER, or DROP operations. This makes explicit catalog reads in write SQL fall through to the deny-by-default path as unsupported read schema operations, preventing queries from copying private table definitions into writable tables. Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4559073803 --- datasette/utils/sql_analysis.py | 1 - datasette/views/query_helpers.py | 4 +- tests/test_queries.py | 73 +++++++++++++++++++++++++++----- tests/test_utils_sql_analysis.py | 20 +++++++++ 4 files changed, 84 insertions(+), 14 deletions(-) diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index 8963da77..91216501 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -256,7 +256,6 @@ def analyze_sql_tables( target=arg1, source=source, column=column, - internal=target_type == "schema", ) return sqlite3.SQLITE_OK diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 7f3ef1bc..0e3d4e01 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -217,9 +217,7 @@ def _analysis_rows(analysis: SQLAnalysis) -> list[dict[str, object]]: rows = [] for operation in _display_operations(analysis): permissions = permission_requirements_for_operation(operation) - required_permission = ", ".join( - permission.action for permission in permissions - ) + required_permission = ", ".join(permission.action for permission in permissions) rows.append( { "operation": operation.operation, diff --git a/tests/test_queries.py b/tests/test_queries.py index fcd19d1c..40bc5052 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1643,9 +1643,9 @@ async def test_execute_write_post_requires_database_and_table_permissions(): "Permission denied: need update-row on data/dogs" ] - ds.config["databases"]["data"]["tables"]["dogs"]["permissions"][ - "update-row" - ] = {"id": "writer"} + ds.config["databases"]["data"]["tables"]["dogs"]["permissions"]["update-row"] = { + "id": "writer" + } missing_delete_permission = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, @@ -1660,9 +1660,9 @@ async def test_execute_write_post_requires_database_and_table_permissions(): "Permission denied: need delete-row on data/dogs" ] - ds.config["databases"]["data"]["tables"]["dogs"]["permissions"][ - "delete-row" - ] = {"id": "writer"} + ds.config["databases"]["data"]["tables"]["dogs"]["permissions"]["delete-row"] = { + "id": "writer" + } allowed = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, @@ -1719,8 +1719,7 @@ async def test_execute_write_insert_or_replace_requires_delete_row_permission(): actor={"id": "writer"}, json={ "sql": ( - "insert or replace into users(id, email) " - "values (3, 'b@example.com')" + "insert or replace into users(id, email) " "values (3, 'b@example.com')" ) }, ) @@ -1773,7 +1772,9 @@ async def test_execute_write_update_or_replace_requires_delete_row_permission(): denied_response = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, - json={"sql": "update or replace users set email = 'b@example.com' where id = 1"}, + json={ + "sql": "update or replace users set email = 'b@example.com' where id = 1" + }, ) assert denied_response.status_code == 403 @@ -1826,7 +1827,9 @@ async def test_execute_write_update_requires_insert_row_permission(): assert denied_response.json()["errors"] == [ "Permission denied: need insert-row on data/users" ] - assert (await db.execute("select name from users where id = 1")).first()[0] == "Alice" + assert (await db.execute("select name from users where id = 1")).first()[ + 0 + ] == "Alice" @pytest.mark.asyncio @@ -1876,6 +1879,56 @@ async def test_execute_write_insert_select_requires_view_table_on_source(): assert (await db.execute("select value from public_log")).dicts() == [] +@pytest.mark.asyncio +async def test_execute_write_rejects_sqlite_master_reads(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "secret": { + "permissions": {"view-table": {"id": "someone-else"}} + }, + "log": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, + } + }, + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_sqlite_master_read", name="data") + await db.execute_write("create table secret (value text)") + await db.execute_write("create table log (value text)") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": ( + "insert into log " "select sql from sqlite_master where name = 'secret'" + ) + }, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Unsupported SQL operation: read schema" + ] + assert (await db.execute("select value from log")).dicts() == [] + + @pytest.mark.asyncio async def test_execute_write_create_table_as_select_requires_view_table_on_source(): ds = Datasette( diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index 2ae11502..f931be51 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -65,6 +65,26 @@ def test_analyze_uses_sqlite_schema_as_default_database(conn): } +def test_analyze_user_schema_table_read_is_not_internal(conn): + analysis = analyze_sql_tables( + conn, + "insert into log select sql from sqlite_master where name = 'dogs'", + database_name="data", + ) + + assert { + "operation": "read", + "target_type": "schema", + "database": "data", + "sqlite_schema": "main", + "table": None, + "target": "sqlite_master", + "columns": ("name", "sql"), + "source": None, + "internal": False, + } in [operation_dict(operation) for operation in analysis.operations] + + def operation_dict(operation): return { "operation": operation.operation, From 951f5a9f306ebe0bb8b3668ee698dc6cb6051d78 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 16:30:05 -0700 Subject: [PATCH 279/299] Detect VACUUM in SQL analysis Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4559073803 --- datasette/stored_queries.py | 1 + datasette/utils/sql_analysis.py | 33 +++++++++++++++++++++++- tests/test_queries.py | 31 ++++++++++++++++++++++ tests/test_utils_sql_analysis.py | 44 ++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index cf44a9ff..6746124a 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -720,6 +720,7 @@ def operation_is_write(operation: Operation) -> bool: "pragma", "analyze", "reindex", + "vacuum", "unknown", } diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index 91216501..f2eb903f 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -22,6 +22,7 @@ SQLOperation = Literal[ "pragma", "analyze", "reindex", + "vacuum", "unknown", ] SQLTargetType = Literal[ @@ -423,10 +424,40 @@ def analyze_sql_tables( conn.set_authorizer(authorizer) try: - conn.execute("EXPLAIN " + sql, params if params is not None else {}).fetchall() + explain_rows = conn.execute( + "EXPLAIN " + sql, params if params is not None else {} + ).fetchall() finally: conn.set_authorizer(None) + if not operations: + vacuum_row = next((row for row in explain_rows if row[1] == "Vacuum"), None) + if vacuum_row is not None: + schema_by_index = { + row[0]: row[1] for row in conn.execute("PRAGMA database_list") + } + sqlite_schema = schema_by_index.get(vacuum_row[2]) + database = database_for_schema(sqlite_schema) + record( + "vacuum", + "database", + database=database, + table=None, + sqlite_schema=sqlite_schema, + target=database, + source=None, + ) + else: + record( + "unknown", + "statement", + database=database_name, + table=None, + sqlite_schema=None, + target=None, + source=None, + ) + has_schema_operation = any( key.target_type in {"table", "index", "view", "trigger", "virtual-table"} and key.operation in {"create", "alter", "drop"} diff --git a/tests/test_queries.py b/tests/test_queries.py index 40bc5052..bf371a80 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -2011,6 +2011,37 @@ async def test_execute_write_rejects_function_operations(): assert (await db.execute("select name from dogs")).dicts() == [] +@pytest.mark.asyncio +async def test_execute_write_rejects_vacuum_operation(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("execute_write_vacuum_operation", name="data") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "vacuum"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Unsupported SQL operation: vacuum database" + ] + + @pytest.mark.asyncio async def test_execute_write_create_table_uses_create_table_permission(): ds = Datasette( diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index f931be51..df4b3625 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -129,6 +129,50 @@ def test_analyze_create_table_operation(): ] +def test_analyze_vacuum_operation(): + conn = sqlite3.connect(":memory:") + try: + analysis = analyze_sql_tables(conn, "vacuum", database_name="data") + finally: + conn.close() + + assert [operation_dict(operation) for operation in analysis.operations] == [ + { + "operation": "vacuum", + "target_type": "database", + "database": "data", + "sqlite_schema": "main", + "table": None, + "target": "data", + "columns": (), + "source": None, + "internal": False, + } + ] + + +def test_analyze_statement_with_no_authorizer_callbacks_is_unknown(): + conn = sqlite3.connect(":memory:") + try: + analysis = analyze_sql_tables(conn, "reindex", database_name="data") + finally: + conn.close() + + assert [operation_dict(operation) for operation in analysis.operations] == [ + { + "operation": "unknown", + "target_type": "statement", + "database": "data", + "sqlite_schema": None, + "table": None, + "target": None, + "columns": (), + "source": None, + "internal": False, + } + ] + + def test_analyze_transaction_operation(conn): analysis = analyze_sql_tables(conn, "commit", database_name="data") From 11bddc891918849e7c4a006c64d0217072aa499c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 16:51:12 -0700 Subject: [PATCH 280/299] Deny VACUUM in user-authored SQL Reject VACUUM explicitly during write-query permission analysis so arbitrary write SQL and untrusted stored write queries cannot run it, even when the actor has execute-write-sql. Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4559073803 (P3) --- datasette/stored_queries.py | 16 ++++ datasette/views/database.py | 23 ++++- datasette/views/execute_write.py | 6 +- datasette/views/query_helpers.py | 9 +- tests/test_queries.py | 153 ++++++++++++++++++++++++++++++- 5 files changed, 199 insertions(+), 8 deletions(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index 6746124a..fd1cabf3 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -15,6 +15,13 @@ if TYPE_CHECKING: UNCHANGED = object() + +class QueryWriteRejected(Exception): + def __init__(self, message: str): + self.message = message + super().__init__(message) + + QUERY_OPTION_FIELDS = ( "hide_sql", "fragment", @@ -703,6 +710,12 @@ def operation_should_be_ignored(operation: Operation) -> bool: return operation.internal or operation.operation == "select" +def operation_forbidden_message(operation: Operation) -> str | None: + if operation.operation == "vacuum": + return "VACUUM is not allowed in user-supplied SQL" + return None + + def operation_is_write(operation: Operation) -> bool: return operation.operation in { "insert", @@ -746,6 +759,9 @@ async def ensure_query_write_permissions( for operation in analysis.operations: if operation_should_be_ignored(operation): continue + forbidden_message = operation_forbidden_message(operation) + if forbidden_message is not None: + raise QueryWriteRejected(forbidden_message) permissions = permission_requirements_for_operation(operation) if not permissions: raise Forbidden( diff --git a/datasette/views/database.py b/datasette/views/database.py index b558b002..ae1cf375 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -13,7 +13,7 @@ import textwrap from datasette.events import AlterTableEvent, CreateTableEvent, InsertRowsEvent from datasette.database import QueryInterrupted from datasette.resources import DatabaseResource, QueryResource -from datasette.stored_queries import stored_query_to_dict +from datasette.stored_queries import QueryWriteRejected, stored_query_to_dict from datasette.utils import ( add_cors_headers, await_me_maybe, @@ -453,9 +453,24 @@ class QueryView(View): ): raise Forbidden("You do not have permission to view this query") - await _ensure_stored_query_execution_permissions( - datasette, db, stored_query, request.actor - ) + try: + await _ensure_stored_query_execution_permissions( + datasette, db, stored_query, request.actor + ) + except QueryWriteRejected as ex: + if request.headers.get("accept") == "application/json" or request.args.get( + "_json" + ): + return Response.json( + { + "ok": False, + "message": ex.message, + "redirect": None, + }, + status=403, + ) + datasette.add_message(request, ex.message, datasette.ERROR) + return Response.redirect(stored_query.on_error_redirect or request.path) # If database is immutable, return an error if not db.is_mutable: diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index 19006ac5..57c4d78e 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -163,13 +163,15 @@ class ExecuteWriteView(BaseView): except QueryValidationError as ex: if _wants_json(request, is_json, data): return _block_framing(_error([ex.message], ex.status)) + if ex.flash: + self.ds.add_message(request, ex.message, self.ds.ERROR) return await self._render_form( request, db, sql=sql or "", parameter_values=provided_params, - analysis_error=ex.message, - execution_message=ex.message, + analysis_error=None if ex.flash else ex.message, + execution_message=None if ex.flash else ex.message, execution_ok=False, status=ex.status, ) diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 0e3d4e01..92328ff3 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -3,6 +3,7 @@ import re from datasette.resources import DatabaseResource from datasette.stored_queries import ( + QueryWriteRejected, StoredQuery, operation_is_write, operation_should_be_ignored, @@ -47,9 +48,11 @@ _query_write_fields = { class QueryValidationError(Exception): - def __init__(self, message, status=400): + def __init__(self, message, status=400, *, flash=False): self.message = message self.status = status + self.flash = flash + super().__init__(message) def _actor_id(actor): @@ -194,6 +197,8 @@ async def _analyze_user_query(datasette, db, sql, *, actor): await datasette.ensure_query_write_permissions( db.name, sql, actor=actor, analysis=analysis ) + except QueryWriteRejected as ex: + raise QueryValidationError(ex.message, status=403, flash=True) from ex except Forbidden as ex: raise QueryValidationError(str(ex), status=403) from ex else: @@ -297,6 +302,8 @@ async def _prepare_execute_write(datasette, db, sql, params, actor): await datasette.ensure_query_write_permissions( db.name, sql, actor=actor, analysis=analysis ) + except QueryWriteRejected as ex: + raise QueryValidationError(ex.message, status=403, flash=True) from ex except Forbidden as ex: raise QueryValidationError(str(ex), status=403) from ex return parameter_names, params, analysis diff --git a/tests/test_queries.py b/tests/test_queries.py index bf371a80..b6e1637d 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -2038,10 +2038,161 @@ async def test_execute_write_rejects_vacuum_operation(): assert denied_response.status_code == 403 assert denied_response.json()["errors"] == [ - "Unsupported SQL operation: vacuum database" + "VACUUM is not allowed in user-supplied SQL" ] +@pytest.mark.asyncio +async def test_execute_write_form_rejects_vacuum_operation_with_flash_error(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("execute_write_vacuum_operation_form", name="data") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + data={"sql": "vacuum"}, + ) + + assert denied_response.status_code == 403 + assert ( + '

        VACUUM is not allowed in user-supplied SQL

        ' + in denied_response.text + ) + assert denied_response.text.count("VACUUM is not allowed in user-supplied SQL") == 1 + + +@pytest.mark.asyncio +async def test_untrusted_stored_write_query_rejects_vacuum_operation(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("stored_query_vacuum_operation", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "vacuum_db", + "vacuum", + is_write=True, + is_trusted=False, + source="user", + owner_id="writer", + ) + + denied_response = await ds.client.post( + "/data/vacuum_db?_json=1", + actor={"id": "writer"}, + data={}, + ) + + assert denied_response.status_code == 403 + assert "VACUUM is not allowed in user-supplied SQL" in denied_response.text + + +@pytest.mark.asyncio +async def test_untrusted_stored_write_query_rejects_vacuum_operation_with_flash_error(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("stored_query_vacuum_operation_form", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "vacuum_db", + "vacuum", + is_write=True, + is_trusted=False, + source="user", + owner_id="writer", + ) + + denied_response = await ds.client.post( + "/data/vacuum_db", + actor={"id": "writer"}, + data={}, + ) + + assert denied_response.status_code == 302 + assert denied_response.headers["location"] == "/data/vacuum_db" + assert ds.unsign(denied_response.cookies["ds_messages"], "messages") == [ + ["VACUUM is not allowed in user-supplied SQL", ds.ERROR] + ] + + +@pytest.mark.asyncio +async def test_trusted_stored_write_query_skips_vacuum_filtering(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("trusted_stored_query_vacuum", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "trusted_vacuum", + "vacuum", + is_write=True, + is_trusted=True, + source="config", + ) + + response = await ds.client.post( + "/data/trusted_vacuum?_json=1", + actor={"id": "writer"}, + data={}, + ) + + assert response.status_code == 200 + assert response.json()["ok"] is True + + @pytest.mark.asyncio async def test_execute_write_create_table_uses_create_table_permission(): ds = Datasette( From 0c5053cdf64a0dc2d1e9808fa712b88233760512 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 17:26:50 -0700 Subject: [PATCH 281/299] Docs for //-/execute-write JSON API Closes #2750, refs #2742 --- docs/json_api.rst | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/docs/json_api.rst b/docs/json_api.rst index 48c70af6..fffc16d7 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -505,6 +505,68 @@ The JSON write API Datasette provides a write API for JSON data. This is a POST-only API that requires an authenticated API token, see :ref:`CreateTokenView`. The token will need to have the specified :ref:`authentication_permissions`. +.. _ExecuteWriteView: + +Executing write SQL +~~~~~~~~~~~~~~~~~~~ + +Actors with the :ref:`actions_execute_write_sql` permission can execute arbitrary writable SQL against a mutable database using ``/-/execute-write``. + +:: + + POST //-/execute-write + Content-Type: application/json + Authorization: Bearer dstok_ + +The request body must include a ``"sql"`` string. Named SQL parameters can be provided using the optional ``"params"`` object: + +.. code-block:: json + + { + "sql": "insert into dogs (name) values (:name)", + "params": { + "name": "Cleo" + } + } + +The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query API ` instead. + +Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. + +A successful response includes a message, the SQLite ``rowcount`` and a summary of the operations that were executed: + +The shape of the ``"analysis"`` block is not yet considered a stable API and may change in future Datasette releases. + +.. code-block:: json + + { + "ok": true, + "message": "Query executed, 1 row affected", + "rowcount": 1, + "analysis": [ + { + "operation": "insert", + "database": "data", + "table": "dogs", + "required_permission": "insert-row, update-row, delete-row", + "source": null + } + ] + } + +If SQLite reports ``-1`` for the row count, the message will be ``"Query executed"``. + +Errors use the standard Datasette error format: + +.. code-block:: json + + { + "ok": false, + "errors": [ + "Permission denied: need execute-write-sql" + ] + } + .. _TableInsertView: Inserting rows From bcd989f4f8802a73a60c75f9bda77649c1347986 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 08:36:59 -0700 Subject: [PATCH 282/299] Detect and disallow insert to virtual/shadow table Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4565727978 --- datasette/stored_queries.py | 5 + datasette/utils/sql_analysis.py | 21 ++++- datasette/utils/sqlite.py | 112 ++++++++++++++++++++++ tests/test_queries.py | 153 +++++++++++++++++++++++++++++++ tests/test_utils.py | 45 ++++++++- tests/test_utils_sql_analysis.py | 47 ++++++++++ 6 files changed, 381 insertions(+), 2 deletions(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index fd1cabf3..b5aea221 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -713,6 +713,11 @@ def operation_should_be_ignored(operation: Operation) -> bool: def operation_forbidden_message(operation: Operation) -> str | None: if operation.operation == "vacuum": return "VACUUM is not allowed in user-supplied SQL" + if operation.operation in {"insert", "update", "delete"}: + if operation.table_kind == "virtual": + return "Writes to virtual tables are not allowed in user-supplied SQL" + if operation.table_kind == "shadow": + return "Writes to shadow tables are not allowed in user-supplied SQL" return None diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index f2eb903f..a71fa315 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Literal -from datasette.utils.sqlite import sqlite3 +from datasette.utils.sqlite import SQLiteTableType, sqlite3, sqlite_table_type SQLOperation = Literal[ "read", @@ -42,6 +42,7 @@ SQLTargetType = Literal[ SQLTableOperation = Literal["read", "insert", "update", "delete"] SQLSchemaOperation = Literal["create", "drop"] SQLSchemaTargetType = Literal["index", "table", "trigger", "view", "virtual-table"] +SQLTableKind = SQLiteTableType @dataclass(frozen=True) @@ -51,6 +52,7 @@ class Operation: database: str | None table: str | None sqlite_schema: str | None + table_kind: SQLTableKind | None = None target: str | None = None columns: tuple[str, ...] = () source: str | None = None @@ -500,6 +502,22 @@ def analyze_sql_tables( return True return False + table_kind_cache: dict[tuple[str | None, str], SQLTableKind | None] = {} + + def table_kind_for(key: OperationKey) -> SQLTableKind | None: + if ( + key.target_type != "table" + or key.operation not in {"read", "insert", "update", "delete"} + or key.table is None + ): + return None + cache_key = (key.sqlite_schema, key.table) + if cache_key not in table_kind_cache: + table_kind_cache[cache_key] = sqlite_table_type( + conn, key.table, schema=key.sqlite_schema + ) + return table_kind_cache[cache_key] + return SQLAnalysis( operations=tuple( Operation( @@ -508,6 +526,7 @@ def analyze_sql_tables( database=key.database, table=key.table, sqlite_schema=key.sqlite_schema, + table_kind=table_kind_for(key), target=key.target, columns=tuple(sorted(columns)), source=key.source, diff --git a/datasette/utils/sqlite.py b/datasette/utils/sqlite.py index d0a2d783..130c5f62 100644 --- a/datasette/utils/sqlite.py +++ b/datasette/utils/sqlite.py @@ -1,3 +1,6 @@ +import re +from typing import Literal + using_pysqlite3 = False try: import pysqlite3 as sqlite3 @@ -10,6 +13,18 @@ if hasattr(sqlite3, "enable_callback_tracebacks"): sqlite3.enable_callback_tracebacks(True) _cached_sqlite_version = None +SQLiteTableType = Literal["table", "view", "virtual", "shadow"] +_VIRTUAL_TABLE_MODULE_RE = re.compile( + r"\bCREATE\s+VIRTUAL\s+TABLE\b.*?\bUSING\s+([^\s(]+)", + re.IGNORECASE, +) +_VIRTUAL_TABLE_SHADOW_SUFFIXES = { + "fts3": ("_content", "_segdir", "_segments", "_stat", "_docsize"), + "fts4": ("_content", "_segdir", "_segments", "_stat", "_docsize"), + "fts5": ("_data", "_idx", "_docsize", "_content", "_config"), + "rtree": ("_node", "_parent", "_rowid"), + "rtree_i32": ("_node", "_parent", "_rowid"), +} def sqlite_version(): @@ -36,5 +51,102 @@ def supports_table_xinfo(): return sqlite_version() >= (3, 26, 0) +def supports_table_list(): + return sqlite_version() >= (3, 37, 0) + + def supports_generated_columns(): return sqlite_version() >= (3, 31, 0) + + +def sqlite_table_type( + conn, + table: str, + *, + schema: str | None = "main", +) -> SQLiteTableType | None: + if supports_table_list(): + try: + query = "select type from pragma_table_list where name = ?" + params: tuple[str, ...] = (table,) + if schema is not None: + query += " and schema = ?" + params = (table, schema) + row = conn.execute(query, params).fetchone() + if row is not None and row[0] in {"table", "view", "virtual", "shadow"}: + return row[0] + except sqlite3.DatabaseError: + pass + return _sqlite_table_type_from_schema(conn, table, schema=schema) + + +def _sqlite_table_type_from_schema( + conn, + table: str, + *, + schema: str | None = "main", +) -> SQLiteTableType | None: + schema_table = _sqlite_schema_table(schema) + try: + row = conn.execute( + "select type, sql from {} where name = ?".format(schema_table), + (table,), + ).fetchone() + except sqlite3.DatabaseError: + return None + if row is None: + return None + object_type, sql = row + if object_type == "view": + return "view" + if object_type != "table": + return None + if _virtual_table_module(sql) is not None: + return "virtual" + if _is_known_shadow_table(conn, table, schema=schema): + return "shadow" + return "table" + + +def _is_known_shadow_table( + conn, + table: str, + *, + schema: str | None = "main", +) -> bool: + schema_table = _sqlite_schema_table(schema) + try: + rows = conn.execute( + "select name, sql from {} where type = 'table'".format(schema_table) + ).fetchall() + except sqlite3.DatabaseError: + return False + for virtual_table, sql in rows: + module = _virtual_table_module(sql) + if module is None: + continue + for suffix in _VIRTUAL_TABLE_SHADOW_SUFFIXES.get(module, ()): + if table == virtual_table + suffix: + return True + return False + + +def _sqlite_schema_table(schema: str | None) -> str: + if schema is None or schema == "main": + return "sqlite_master" + if schema == "temp": + return "sqlite_temp_master" + return "{}.sqlite_master".format(_quote_identifier(schema)) + + +def _quote_identifier(value: str) -> str: + return '"{}"'.format(value.replace('"', '""')) + + +def _virtual_table_module(sql: str | None) -> str | None: + if not sql: + return None + match = _VIRTUAL_TABLE_MODULE_RE.search(sql) + if match is None: + return None + return match.group(1).strip("\"'[]`").lower() diff --git a/tests/test_queries.py b/tests/test_queries.py index b6e1637d..73f8f3cf 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -2193,6 +2193,159 @@ async def test_trusted_stored_write_query_skips_vacuum_filtering(): assert response.json()["ok"] is True +@pytest.mark.asyncio +async def test_execute_write_rejects_virtual_table_control_insert(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_virtual_table_control", name="data") + await db.execute_write(""" + create virtual table docs using fts5(title, body, content='') + """) + await db.execute_write(""" + insert into docs(rowid, title, body) values (1, 'hello', 'world') + """) + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "root"}, + json={"sql": "insert into docs(docs) values('delete-all')"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Writes to virtual tables are not allowed in user-supplied SQL" + ] + assert ( + await db.execute("select count(*) from docs where docs match 'hello'") + ).first()[0] == 1 + + +@pytest.mark.asyncio +async def test_execute_write_rejects_regular_virtual_table_insert(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_virtual_table_insert", name="data") + await db.execute_write("create virtual table docs using fts5(title, body)") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "root"}, + json={"sql": "insert into docs(rowid, title, body) values (1, 'a', 'b')"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Writes to virtual tables are not allowed in user-supplied SQL" + ] + assert (await db.execute("select count(*) from docs")).first()[0] == 0 + + +@pytest.mark.asyncio +async def test_execute_write_rejects_shadow_table_insert(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_shadow_table_insert", name="data") + await db.execute_write("create virtual table docs using fts5(title, body)") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "root"}, + json={"sql": "insert into docs_config(k, v) values ('x', 1)"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Writes to shadow tables are not allowed in user-supplied SQL" + ] + assert (await db.execute("select count(*) from docs_config")).first()[0] == 1 + + +@pytest.mark.asyncio +async def test_untrusted_stored_write_query_rejects_virtual_table_control_insert(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("stored_query_virtual_table_control", name="data") + await db.execute_write(""" + create virtual table docs using fts5(title, body, content='') + """) + await db.execute_write(""" + insert into docs(rowid, title, body) values (1, 'hello', 'world') + """) + await ds.invoke_startup() + await ds.add_query( + "data", + "delete_all_docs", + "insert into docs(docs) values('delete-all')", + is_write=True, + is_trusted=False, + source="user", + owner_id="root", + ) + + denied_response = await ds.client.post( + "/data/delete_all_docs?_json=1", + actor={"id": "root"}, + data={}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["message"] == ( + "Writes to virtual tables are not allowed in user-supplied SQL" + ) + assert ( + await db.execute("select count(*) from docs where docs match 'hello'") + ).first()[0] == 1 + + +@pytest.mark.asyncio +async def test_trusted_stored_write_query_can_write_virtual_table(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + } + } + } + }, + ) + db = ds.add_memory_database("trusted_stored_query_virtual_table", name="data") + await db.execute_write(""" + create virtual table docs using fts5(title, body, content='') + """) + await db.execute_write(""" + insert into docs(rowid, title, body) values (1, 'hello', 'world') + """) + await ds.invoke_startup() + await ds.add_query( + "data", + "trusted_delete_all", + "insert into docs(docs) values('delete-all')", + is_write=True, + is_trusted=True, + source="config", + ) + + response = await ds.client.post( + "/data/trusted_delete_all?_json=1", + actor={"id": "writer"}, + data={}, + ) + + assert response.status_code == 200 + assert response.json()["ok"] is True + assert ( + await db.execute("select count(*) from docs where docs match 'hello'") + ).first()[0] == 0 + + @pytest.mark.asyncio async def test_execute_write_create_table_uses_create_table_permission(): ds = Datasette( diff --git a/tests/test_utils.py b/tests/test_utils.py index 3fcb623e..e142bb5b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,7 +5,7 @@ Tests for various datasette helper functions. from datasette.app import Datasette from datasette import utils from datasette.utils.asgi import Request -from datasette.utils.sqlite import sqlite3 +from datasette.utils.sqlite import sqlite3, sqlite_table_type import json import os import pathlib @@ -226,6 +226,49 @@ def test_detect_fts_different_table_names(table): conn.close() +@pytest.mark.parametrize("use_fallback", (False, True)) +def test_sqlite_table_type_detects_virtual_and_shadow_tables(monkeypatch, use_fallback): + if use_fallback: + monkeypatch.setattr("datasette.utils.sqlite.sqlite_version", lambda: (3, 25, 0)) + conn = utils.sqlite3.connect(":memory:") + try: + conn.executescript(""" + create table dogs(id integer primary key, name text); + create view dog_names as select name from dogs; + create virtual table search_index using fts5(title, body); + create virtual table boxes using rtree(id, minx, maxx, miny, maxy); + """) + + assert sqlite_table_type(conn, "dogs") == "table" + assert sqlite_table_type(conn, "dog_names") == "view" + assert sqlite_table_type(conn, "search_index") == "virtual" + assert sqlite_table_type(conn, "search_index_config") == "shadow" + assert sqlite_table_type(conn, "boxes") == "virtual" + assert sqlite_table_type(conn, "boxes_node") == "shadow" + assert sqlite_table_type(conn, "missing") is None + finally: + conn.close() + + +@pytest.mark.parametrize("use_fallback", (False, True)) +def test_sqlite_table_type_detects_attached_database_tables(monkeypatch, use_fallback): + if use_fallback: + monkeypatch.setattr("datasette.utils.sqlite.sqlite_version", lambda: (3, 25, 0)) + conn = utils.sqlite3.connect(":memory:") + try: + conn.executescript(""" + attach database ':memory:' as extra; + create table extra.cats(id integer primary key, name text); + create virtual table extra.cat_search using fts5(name); + """) + + assert sqlite_table_type(conn, "cats", schema="extra") == "table" + assert sqlite_table_type(conn, "cat_search", schema="extra") == "virtual" + assert sqlite_table_type(conn, "cat_search_data", schema="extra") == "shadow" + finally: + conn.close() + + @pytest.mark.parametrize( "url,expected", [ diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index df4b3625..979ff9e1 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -260,6 +260,53 @@ def test_analyze_create_virtual_table_operation(): } in [operation_dict(operation) for operation in analysis.operations] +def test_analyze_table_kind_for_regular_virtual_and_shadow_tables(): + conn = sqlite3.connect(":memory:") + try: + conn.executescript(""" + create table dogs (id integer primary key, name text); + create virtual table docs using fts5(title, body, content=''); + """) + + regular_analysis = analyze_sql_tables( + conn, + "insert into dogs (name) values ('Cleo')", + database_name="data", + ) + virtual_analysis = analyze_sql_tables( + conn, + "insert into docs(docs) values('delete-all')", + database_name="data", + ) + shadow_analysis = analyze_sql_tables( + conn, + "insert into docs_config(k, v) values ('x', 1)", + database_name="data", + ) + finally: + conn.close() + + regular_insert = next( + operation + for operation in regular_analysis.operations + if operation.operation == "insert" and operation.table == "dogs" + ) + virtual_insert = next( + operation + for operation in virtual_analysis.operations + if operation.operation == "insert" and operation.table == "docs" + ) + shadow_insert = next( + operation + for operation in shadow_analysis.operations + if operation.operation == "insert" and operation.table == "docs_config" + ) + + assert regular_insert.table_kind == "table" + assert virtual_insert.table_kind == "virtual" + assert shadow_insert.table_kind == "shadow" + + def test_analyze_create_table_as_select_function_is_not_internal(): conn = sqlite3.connect(":memory:") try: From aaf00e9ec22b77e53f291ccedcbf2f499cce9e2b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 08:42:06 -0700 Subject: [PATCH 283/299] Refactor hidden_table_names() to use new implemenatation Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4565727978 --- datasette/database.py | 80 +------------------------------- datasette/utils/sqlite.py | 29 ++++++++++++ tests/test_internals_database.py | 9 +--- tests/test_utils.py | 12 ++++- 4 files changed, 43 insertions(+), 87 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index e7e9527e..10417670 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -26,7 +26,7 @@ from .utils import ( table_column_details, ) from .utils.sql_analysis import SQLAnalysis, analyze_sql_tables -from .utils.sqlite import sqlite_version +from .utils.sqlite import sqlite_hidden_table_names from .inspect import inspect_hash connections = threading.local() @@ -702,83 +702,7 @@ class Database: t for t in db_config["tables"] if db_config["tables"][t].get("hidden") ] - if sqlite_version()[1] >= 37: - hidden_tables += [x[0] for x in await self.execute(""" - with shadow_tables as ( - select name - from pragma_table_list - where [type] = 'shadow' - order by name - ), - core_tables as ( - select name - from sqlite_master - WHERE name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') - OR substr(name, 1, 1) == '_' - ), - combined as ( - select name from shadow_tables - union all - select name from core_tables - ) - select name from combined order by 1 - """)] - else: - hidden_tables += [x[0] for x in await self.execute(""" - WITH base AS ( - SELECT name - FROM sqlite_master - WHERE name IN ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') - OR substr(name, 1, 1) == '_' - ), - fts_suffixes AS ( - SELECT column1 AS suffix - FROM (VALUES ('_data'), ('_idx'), ('_docsize'), ('_content'), ('_config')) - ), - fts5_names AS ( - SELECT name - FROM sqlite_master - WHERE sql LIKE '%VIRTUAL TABLE%USING FTS%' - ), - fts5_shadow_tables AS ( - SELECT - printf('%s%s', fts5_names.name, fts_suffixes.suffix) AS name - FROM fts5_names - JOIN fts_suffixes - ), - fts3_suffixes AS ( - SELECT column1 AS suffix - FROM (VALUES ('_content'), ('_segdir'), ('_segments'), ('_stat'), ('_docsize')) - ), - fts3_names AS ( - SELECT name - FROM sqlite_master - WHERE sql LIKE '%VIRTUAL TABLE%USING FTS3%' - OR sql LIKE '%VIRTUAL TABLE%USING FTS4%' - ), - fts3_shadow_tables AS ( - SELECT - printf('%s%s', fts3_names.name, fts3_suffixes.suffix) AS name - FROM fts3_names - JOIN fts3_suffixes - ), - final AS ( - SELECT name FROM base - UNION ALL - SELECT name FROM fts5_shadow_tables - UNION ALL - SELECT name FROM fts3_shadow_tables - ) - SELECT name FROM final ORDER BY 1 - """)] - # Also hide any FTS tables that have a content= argument - hidden_tables += [x[0] for x in await self.execute(""" - SELECT name - FROM sqlite_master - WHERE sql LIKE '%VIRTUAL TABLE%' - AND sql LIKE '%USING FTS%' - AND sql LIKE '%content=%' - """)] + hidden_tables += await self.execute_fn(sqlite_hidden_table_names) has_spatialite = await self.execute_fn(detect_spatialite) if has_spatialite: diff --git a/datasette/utils/sqlite.py b/datasette/utils/sqlite.py index 130c5f62..d3f52751 100644 --- a/datasette/utils/sqlite.py +++ b/datasette/utils/sqlite.py @@ -80,6 +80,28 @@ def sqlite_table_type( return _sqlite_table_type_from_schema(conn, table, schema=schema) +def sqlite_hidden_table_names(conn, *, schema: str | None = "main") -> list[str]: + schema_table = _sqlite_schema_table(schema) + try: + rows = conn.execute( + "select name, sql from {} where type = 'table'".format(schema_table) + ).fetchall() + except sqlite3.DatabaseError: + return [] + hidden_tables = [] + content_fts_tables = [] + for name, sql in rows: + if ( + name in {"sqlite_stat1", "sqlite_stat2", "sqlite_stat3", "sqlite_stat4"} + or name.startswith("_") + or sqlite_table_type(conn, name, schema=schema) == "shadow" + ): + hidden_tables.append(name) + elif _is_fts_content_virtual_table(sql): + content_fts_tables.append(name) + return sorted(hidden_tables) + content_fts_tables + + def _sqlite_table_type_from_schema( conn, table: str, @@ -150,3 +172,10 @@ def _virtual_table_module(sql: str | None) -> str | None: if match is None: return None return match.group(1).strip("\"'[]`").lower() + + +def _is_fts_content_virtual_table(sql: str | None) -> bool: + return ( + _virtual_table_module(sql) in {"fts3", "fts4", "fts5"} + and "content=" in sql.lower() + ) diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index d6e130b4..88f9d571 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -8,7 +8,7 @@ from datasette.app import Datasette from datasette.database import Database, Results, MultipleValues from datasette.database import DatasetteClosedError from datasette.database import _deliver_write_result -from datasette.utils.sqlite import sqlite3, sqlite_version +from datasette.utils.sqlite import sqlite3 from datasette.utils import Column import pytest import time @@ -798,14 +798,7 @@ async def test_in_memory_databases_forbid_writes(app_client): assert await db.table_names() == ["foo"] -def pragma_table_list_supported(): - return sqlite_version()[1] >= 37 - - @pytest.mark.asyncio -@pytest.mark.skipif( - not pragma_table_list_supported(), reason="Requires PRAGMA table_list support" -) async def test_hidden_tables(app_client): ds = app_client.ds db = ds.add_database(Database(ds, is_memory=True, is_mutable=True)) diff --git a/tests/test_utils.py b/tests/test_utils.py index e142bb5b..90013537 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,7 +5,7 @@ Tests for various datasette helper functions. from datasette.app import Datasette from datasette import utils from datasette.utils.asgi import Request -from datasette.utils.sqlite import sqlite3, sqlite_table_type +from datasette.utils.sqlite import sqlite3, sqlite_hidden_table_names, sqlite_table_type import json import os import pathlib @@ -246,6 +246,16 @@ def test_sqlite_table_type_detects_virtual_and_shadow_tables(monkeypatch, use_fa assert sqlite_table_type(conn, "boxes") == "virtual" assert sqlite_table_type(conn, "boxes_node") == "shadow" assert sqlite_table_type(conn, "missing") is None + assert sqlite_hidden_table_names(conn) == [ + "boxes_node", + "boxes_parent", + "boxes_rowid", + "search_index_config", + "search_index_content", + "search_index_data", + "search_index_docsize", + "search_index_idx", + ] finally: conn.close() From 2785fd29deef505f132902dcee86284e39e3fdcb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 09:03:10 -0700 Subject: [PATCH 284/299] Fix tests I just broke --- datasette/utils/sql_analysis.py | 86 +++++++++++++++++++-------------- datasette/utils/sqlite.py | 2 +- tests/test_utils.py | 14 ++++++ 3 files changed, 65 insertions(+), 37 deletions(-) diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index a71fa315..b5d7ada8 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -193,6 +193,10 @@ _AUTHORIZER_ACTION_NAMES = { } +def _allow_authorizer_action(*args): + return sqlite3.SQLITE_OK + + def analyze_sql_tables( conn, sql: str, @@ -424,42 +428,59 @@ def analyze_sql_tables( ) return sqlite3.SQLITE_OK + table_kind_cache: dict[tuple[str | None, str], SQLTableKind | None] = {} + conn.set_authorizer(authorizer) try: explain_rows = conn.execute( "EXPLAIN " + sql, params if params is not None else {} ).fetchall() + # Passing None before these lookups leaves a failing callback installed + # on Python 3.10, so use a permissive callback until they are complete. + conn.set_authorizer(_allow_authorizer_action) + + if not operations: + vacuum_row = next((row for row in explain_rows if row[1] == "Vacuum"), None) + if vacuum_row is not None: + schema_by_index = { + row[0]: row[1] for row in conn.execute("PRAGMA database_list") + } + sqlite_schema = schema_by_index.get(vacuum_row[2]) + database = database_for_schema(sqlite_schema) + record( + "vacuum", + "database", + database=database, + table=None, + sqlite_schema=sqlite_schema, + target=database, + source=None, + ) + else: + record( + "unknown", + "statement", + database=database_name, + table=None, + sqlite_schema=None, + target=None, + source=None, + ) + + for key in operations: + if ( + key.target_type == "table" + and key.operation in {"read", "insert", "update", "delete"} + and key.table is not None + ): + cache_key = (key.sqlite_schema, key.table) + if cache_key not in table_kind_cache: + table_kind_cache[cache_key] = sqlite_table_type( + conn, key.table, schema=key.sqlite_schema + ) finally: conn.set_authorizer(None) - if not operations: - vacuum_row = next((row for row in explain_rows if row[1] == "Vacuum"), None) - if vacuum_row is not None: - schema_by_index = { - row[0]: row[1] for row in conn.execute("PRAGMA database_list") - } - sqlite_schema = schema_by_index.get(vacuum_row[2]) - database = database_for_schema(sqlite_schema) - record( - "vacuum", - "database", - database=database, - table=None, - sqlite_schema=sqlite_schema, - target=database, - source=None, - ) - else: - record( - "unknown", - "statement", - database=database_name, - table=None, - sqlite_schema=None, - target=None, - source=None, - ) - has_schema_operation = any( key.target_type in {"table", "index", "view", "trigger", "virtual-table"} and key.operation in {"create", "alter", "drop"} @@ -502,8 +523,6 @@ def analyze_sql_tables( return True return False - table_kind_cache: dict[tuple[str | None, str], SQLTableKind | None] = {} - def table_kind_for(key: OperationKey) -> SQLTableKind | None: if ( key.target_type != "table" @@ -511,12 +530,7 @@ def analyze_sql_tables( or key.table is None ): return None - cache_key = (key.sqlite_schema, key.table) - if cache_key not in table_kind_cache: - table_kind_cache[cache_key] = sqlite_table_type( - conn, key.table, schema=key.sqlite_schema - ) - return table_kind_cache[cache_key] + return table_kind_cache[(key.sqlite_schema, key.table)] return SQLAnalysis( operations=tuple( diff --git a/datasette/utils/sqlite.py b/datasette/utils/sqlite.py index d3f52751..5a7c6c38 100644 --- a/datasette/utils/sqlite.py +++ b/datasette/utils/sqlite.py @@ -16,7 +16,7 @@ _cached_sqlite_version = None SQLiteTableType = Literal["table", "view", "virtual", "shadow"] _VIRTUAL_TABLE_MODULE_RE = re.compile( r"\bCREATE\s+VIRTUAL\s+TABLE\b.*?\bUSING\s+([^\s(]+)", - re.IGNORECASE, + re.IGNORECASE | re.DOTALL, ) _VIRTUAL_TABLE_SHADOW_SUFFIXES = { "fts3": ("_content", "_segdir", "_segments", "_stat", "_docsize"), diff --git a/tests/test_utils.py b/tests/test_utils.py index 90013537..e83eed7a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -279,6 +279,20 @@ def test_sqlite_table_type_detects_attached_database_tables(monkeypatch, use_fal conn.close() +def test_sqlite_hidden_table_names_hides_multiline_content_fts_table(): + conn = utils.sqlite3.connect(":memory:") + try: + conn.executescript(""" + create table searchable(id integer primary key, body text); + create virtual table searchable_fts + using fts5(body, content='searchable', content_rowid='id'); + """) + + assert "searchable_fts" in sqlite_hidden_table_names(conn) + finally: + conn.close() + + @pytest.mark.parametrize( "url,expected", [ From 8bd7e165f465fe057beace2b17d52c0a347819f8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 09:50:56 -0700 Subject: [PATCH 285/299] Refactored for code readability --- datasette/app.py | 5 +- datasette/stored_queries.py | 211 +------------------------ datasette/views/database.py | 3 +- datasette/views/query_helpers.py | 27 ++-- datasette/write_sql.py | 255 +++++++++++++++++++++++++++++++ tests/test_write_sql.py | 59 +++++++ 6 files changed, 339 insertions(+), 221 deletions(-) create mode 100644 datasette/write_sql.py create mode 100644 tests/test_write_sql.py diff --git a/datasette/app.py b/datasette/app.py index 56b89789..e7f34e69 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -42,7 +42,7 @@ from jinja2.exceptions import TemplateNotFound from .events import Event from .column_types import SQLiteType -from . import stored_queries +from . import stored_queries, write_sql from .views import Context from .views.database import ( database_download, @@ -1197,7 +1197,8 @@ class Datasette: async def ensure_query_write_permissions( self, database, sql, *, actor=None, params=None, analysis=None ): - return await stored_queries.ensure_query_write_permissions( + # Raise Forbidden or QueryWriteRejected if SQL should not run + return await write_sql.ensure_query_write_permissions( self, database, sql, actor=actor, params=params, analysis=analysis ) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index b5aea221..b6ac49b8 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -2,26 +2,13 @@ from __future__ import annotations from dataclasses import dataclass import json -from typing import Any, Iterable, TYPE_CHECKING +from typing import Any, Iterable -from .resources import DatabaseResource, TableResource -from .permissions import Resource -from .utils import named_parameters, sqlite3, tilde_encode, urlsafe_components -from .utils.asgi import Forbidden -from .utils.sql_analysis import Operation, SQLAnalysis - -if TYPE_CHECKING: - from .app import Datasette +from .utils import tilde_encode, urlsafe_components UNCHANGED = object() -class QueryWriteRejected(Exception): - def __init__(self, message: str): - self.message = message - super().__init__(message) - - QUERY_OPTION_FIELDS = ( "hide_sql", "fragment", @@ -593,197 +580,3 @@ async def list_queries( has_more=has_more, limit=limit, ) - - -@dataclass(frozen=True) -class PermissionRequirement: - action: str - resource: Resource - - -def row_mutation_requirements( - database: str, table: str -) -> tuple[PermissionRequirement, ...]: - resource = TableResource(database=database, table=table) - return tuple( - PermissionRequirement(action=action, resource=resource) - for action in ("insert-row", "update-row", "delete-row") - ) - - -def permission_requirements_for_operation( - operation: Operation, -) -> tuple[PermissionRequirement, ...]: - if ( - operation.operation == "read" - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="view-table", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - if ( - operation.operation in {"insert", "update"} - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return row_mutation_requirements( - database=operation.database, - table=operation.table, - ) - if ( - operation.operation == "delete" - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="delete-row", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - if operation.operation == "create" and operation.target_type == "table": - if operation.database is None: - return () - return ( - PermissionRequirement( - action="create-table", - resource=DatabaseResource(database=operation.database), - ), - ) - if ( - operation.operation == "alter" - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="alter-table", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - if ( - operation.operation == "drop" - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="drop-table", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - if ( - operation.operation in {"create", "drop"} - and operation.target_type == "index" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="alter-table", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - return () - - -def operation_should_be_ignored(operation: Operation) -> bool: - return operation.internal or operation.operation == "select" - - -def operation_forbidden_message(operation: Operation) -> str | None: - if operation.operation == "vacuum": - return "VACUUM is not allowed in user-supplied SQL" - if operation.operation in {"insert", "update", "delete"}: - if operation.table_kind == "virtual": - return "Writes to virtual tables are not allowed in user-supplied SQL" - if operation.table_kind == "shadow": - return "Writes to shadow tables are not allowed in user-supplied SQL" - return None - - -def operation_is_write(operation: Operation) -> bool: - return operation.operation in { - "insert", - "update", - "delete", - "create", - "alter", - "drop", - "begin", - "commit", - "rollback", - "savepoint", - "attach", - "detach", - "pragma", - "analyze", - "reindex", - "vacuum", - "unknown", - } - - -async def ensure_query_write_permissions( - datasette: Datasette, - database: str, - sql: str, - *, - actor: dict[str, object] | None = None, - params: dict[str, object] | None = None, - analysis: SQLAnalysis | None = None, -) -> SQLAnalysis: - db = datasette.get_database(database) - if analysis is None: - if params is None: - params = {name: "" for name in named_parameters(sql)} - try: - analysis = await db.analyze_sql(sql, params) - except sqlite3.DatabaseError as ex: - raise Forbidden(f"Could not analyze query: {ex}") from ex - - for operation in analysis.operations: - if operation_should_be_ignored(operation): - continue - forbidden_message = operation_forbidden_message(operation) - if forbidden_message is not None: - raise QueryWriteRejected(forbidden_message) - permissions = permission_requirements_for_operation(operation) - if not permissions: - raise Forbidden( - "Unsupported SQL operation: {} {}".format( - operation.operation, operation.target_type - ) - ) - if operation.database != database: - raise Forbidden("Writable queries may not access attached databases") - for permission in permissions: - if not await datasette.allowed( - action=permission.action, - resource=permission.resource, - actor=actor, - ): - raise Forbidden( - f"Permission denied: need {permission.action} " - f"on {permission.resource}" - ) - return analysis diff --git a/datasette/views/database.py b/datasette/views/database.py index ae1cf375..b4a964f1 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -13,7 +13,8 @@ import textwrap from datasette.events import AlterTableEvent, CreateTableEvent, InsertRowsEvent from datasette.database import QueryInterrupted from datasette.resources import DatabaseResource, QueryResource -from datasette.stored_queries import QueryWriteRejected, stored_query_to_dict +from datasette.stored_queries import stored_query_to_dict +from datasette.write_sql import QueryWriteRejected from datasette.utils import ( add_cors_headers, await_me_maybe, diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 92328ff3..712832e8 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -3,11 +3,14 @@ import re from datasette.resources import DatabaseResource from datasette.stored_queries import ( - QueryWriteRejected, StoredQuery, +) +from datasette.write_sql import ( + IgnoreWriteSqlOperation, + QueryWriteRejected, + RequireWriteSqlPermissions, + decision_for_write_sql_operation, operation_is_write, - operation_should_be_ignored, - permission_requirements_for_operation, ) from datasette.utils import ( named_parameters as derive_named_parameters, @@ -212,7 +215,9 @@ async def _analyze_user_query(datasette, db, sql, *, actor): def _display_operations(analysis: SQLAnalysis) -> list[Operation]: operations = [] for operation in analysis.operations: - if operation_should_be_ignored(operation): + if isinstance( + decision_for_write_sql_operation(operation), IgnoreWriteSqlOperation + ): continue operations.append(operation) return operations @@ -221,8 +226,12 @@ def _display_operations(analysis: SQLAnalysis) -> list[Operation]: def _analysis_rows(analysis: SQLAnalysis) -> list[dict[str, object]]: rows = [] for operation in _display_operations(analysis): - permissions = permission_requirements_for_operation(operation) - required_permission = ", ".join(permission.action for permission in permissions) + decision = decision_for_write_sql_operation(operation) + required_permission = ( + ", ".join(permission.action for permission in decision.permissions) + if isinstance(decision, RequireWriteSqlPermissions) + else "" + ) rows.append( { "operation": operation.operation, @@ -241,10 +250,10 @@ async def _analysis_rows_with_permissions( rows = _analysis_rows(analysis) is_write = _analysis_is_write(analysis) for row, operation in zip(rows, _display_operations(analysis)): - permissions = permission_requirements_for_operation(operation) - if permissions: + decision = decision_for_write_sql_operation(operation) + if isinstance(decision, RequireWriteSqlPermissions): row["allowed"] = True - for permission in permissions: + for permission in decision.permissions: if not await datasette.allowed( action=permission.action, resource=permission.resource, diff --git a/datasette/write_sql.py b/datasette/write_sql.py new file mode 100644 index 00000000..2e1b69af --- /dev/null +++ b/datasette/write_sql.py @@ -0,0 +1,255 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from .permissions import Resource +from .resources import DatabaseResource, TableResource +from .utils import named_parameters, sqlite3 +from .utils.asgi import Forbidden +from .utils.sql_analysis import Operation, SQLAnalysis + +if TYPE_CHECKING: + from .app import Datasette + + +class QueryWriteRejected(Exception): + def __init__(self, message: str): + self.message = message + super().__init__(message) + + +@dataclass(frozen=True) +class PermissionRequirement: + action: str + resource: Resource + + +PermissionRequirements = tuple[PermissionRequirement, ...] + + +class WriteSqlOperationDecision: + """What Datasette should do with one operation in user-supplied write SQL.""" + + +@dataclass(frozen=True) +class IgnoreWriteSqlOperation(WriteSqlOperationDecision): + reason: str + + +@dataclass(frozen=True) +class RequireWriteSqlPermissions(WriteSqlOperationDecision): + permissions: PermissionRequirements + + +@dataclass(frozen=True) +class RejectWriteSqlOperation(WriteSqlOperationDecision): + message: str + + +@dataclass(frozen=True) +class UnsupportedWriteSqlOperation(WriteSqlOperationDecision): + message: str + + +def row_mutation_requirements(database: str, table: str) -> PermissionRequirements: + resource = TableResource(database=database, table=table) + return tuple( + PermissionRequirement(action=action, resource=resource) + for action in ("insert-row", "update-row", "delete-row") + ) + + +def decision_for_write_sql_operation( + operation: Operation, +) -> WriteSqlOperationDecision: + unsupported_message = ( + f"Unsupported SQL operation: {operation.operation} {operation.target_type}" + ) + if operation.internal: + return IgnoreWriteSqlOperation("internal SQLite operation") + if operation.operation == "select": + return IgnoreWriteSqlOperation("select statement") + if operation.operation == "vacuum": + return RejectWriteSqlOperation("VACUUM is not allowed in user-supplied SQL") + if operation.operation in {"insert", "update", "delete"}: + if operation.table_kind == "virtual": + return RejectWriteSqlOperation( + "Writes to virtual tables are not allowed in user-supplied SQL" + ) + if operation.table_kind == "shadow": + return RejectWriteSqlOperation( + "Writes to shadow tables are not allowed in user-supplied SQL" + ) + if operation.operation == "function": + # SQL functions currently have no Datasette permission mapping. They are + # rejected by the user-supplied write SQL allow-list as unsupported. + return UnsupportedWriteSqlOperation(unsupported_message) + if ( + operation.operation == "read" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="view-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + if ( + operation.operation in {"insert", "update"} + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + row_mutation_requirements( + database=operation.database, + table=operation.table, + ) + ) + if ( + operation.operation == "delete" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="delete-row", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + if operation.operation == "create" and operation.target_type == "table": + if operation.database is None: + return UnsupportedWriteSqlOperation(unsupported_message) + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="create-table", + resource=DatabaseResource(database=operation.database), + ), + ) + ) + if ( + operation.operation == "alter" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="alter-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + if ( + operation.operation == "drop" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="drop-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + if ( + operation.operation in {"create", "drop"} + and operation.target_type == "index" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="alter-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + return UnsupportedWriteSqlOperation(unsupported_message) + + +def operation_is_write(operation: Operation) -> bool: + return operation.operation in { + "insert", + "update", + "delete", + "create", + "alter", + "drop", + "begin", + "commit", + "rollback", + "savepoint", + "attach", + "detach", + "pragma", + "analyze", + "reindex", + "vacuum", + "unknown", + } + + +async def ensure_query_write_permissions( + datasette: Datasette, + database: str, + sql: str, + *, + actor: dict[str, object] | None = None, + params: dict[str, object] | None = None, + analysis: SQLAnalysis | None = None, +) -> SQLAnalysis: + db = datasette.get_database(database) + if analysis is None: + if params is None: + params = {name: "" for name in named_parameters(sql)} + try: + analysis = await db.analyze_sql(sql, params) + except sqlite3.DatabaseError as ex: + raise Forbidden(f"Could not analyze query: {ex}") from ex + + for operation in analysis.operations: + decision = decision_for_write_sql_operation(operation) + if isinstance(decision, IgnoreWriteSqlOperation): + continue + if isinstance(decision, RejectWriteSqlOperation): + raise QueryWriteRejected(decision.message) + if isinstance(decision, UnsupportedWriteSqlOperation): + raise Forbidden(decision.message) + permissions = decision.permissions + if operation.database != database: + raise Forbidden("Writable queries may not access attached databases") + for permission in permissions: + if not await datasette.allowed( + action=permission.action, + resource=permission.resource, + actor=actor, + ): + raise Forbidden( + f"Permission denied: need {permission.action} " + f"on {permission.resource}" + ) + return analysis diff --git a/tests/test_write_sql.py b/tests/test_write_sql.py new file mode 100644 index 00000000..cfaf0f53 --- /dev/null +++ b/tests/test_write_sql.py @@ -0,0 +1,59 @@ +from datasette.utils.sql_analysis import Operation +from datasette.write_sql import ( + IgnoreWriteSqlOperation, + RejectWriteSqlOperation, + RequireWriteSqlPermissions, + UnsupportedWriteSqlOperation, + WriteSqlOperationDecision, + decision_for_write_sql_operation, +) + + +def test_decision_for_write_sql_operation_ignores_internal_and_select_operations(): + internal_decision = decision_for_write_sql_operation( + Operation("read", "schema", None, None, "main", internal=True) + ) + select_decision = decision_for_write_sql_operation( + Operation("select", "statement", None, None, None) + ) + + assert isinstance(internal_decision, IgnoreWriteSqlOperation) + assert isinstance(internal_decision, WriteSqlOperationDecision) + assert isinstance(select_decision, IgnoreWriteSqlOperation) + assert isinstance(select_decision, WriteSqlOperationDecision) + + +def test_decision_for_write_sql_operation_requires_table_write_permissions(): + decision = decision_for_write_sql_operation( + Operation("insert", "table", "data", "dogs", None) + ) + + assert isinstance(decision, RequireWriteSqlPermissions) + assert [permission.action for permission in decision.permissions] == [ + "insert-row", + "update-row", + "delete-row", + ] + assert [str(permission.resource) for permission in decision.permissions] == [ + "data/dogs", + "data/dogs", + "data/dogs", + ] + + +def test_decision_for_write_sql_operation_rejects_vacuum(): + decision = decision_for_write_sql_operation( + Operation("vacuum", "statement", None, None, None) + ) + + assert isinstance(decision, RejectWriteSqlOperation) + assert decision.message == "VACUUM is not allowed in user-supplied SQL" + + +def test_decision_for_write_sql_operation_reports_unsupported_functions(): + decision = decision_for_write_sql_operation( + Operation("function", "function", None, None, None, target="upper") + ) + + assert isinstance(decision, UnsupportedWriteSqlOperation) + assert decision.message == "Unsupported SQL operation: function function" From 51dab16149f8b345d46cf517fa03b95fc1028234 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 10:22:16 -0700 Subject: [PATCH 286/299] Allow SQL functions in SQL write queries Closes #2751 --- datasette/write_sql.py | 4 +- docs/authentication.rst | 2 +- docs/json_api.rst | 2 +- docs/sql_queries.rst | 2 +- tests/test_queries.py | 83 +++++++++++++++++++++++++++++++++++++---- tests/test_write_sql.py | 13 ++++++- 6 files changed, 91 insertions(+), 15 deletions(-) diff --git a/datasette/write_sql.py b/datasette/write_sql.py index 2e1b69af..cdc0c6d3 100644 --- a/datasette/write_sql.py +++ b/datasette/write_sql.py @@ -82,9 +82,7 @@ def decision_for_write_sql_operation( "Writes to shadow tables are not allowed in user-supplied SQL" ) if operation.operation == "function": - # SQL functions currently have no Datasette permission mapping. They are - # rejected by the user-supplied write SQL allow-list as unsupported. - return UnsupportedWriteSqlOperation(unsupported_message) + return IgnoreWriteSqlOperation("SQL function") if ( operation.operation == "read" and operation.target_type == "table" diff --git a/docs/authentication.rst b/docs/authentication.rst index f720c12f..a0891900 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1425,7 +1425,7 @@ See also :ref:`the default_allow_sql setting `. execute-write-sql ----------------- -Actor is allowed to run arbitrary writable SQL queries against a specific database, subject to table-level write permissions such as ``insert-row``, ``update-row`` and ``delete-row``. +Actor is allowed to run arbitrary writable SQL queries against a specific database, subject to table-level write permissions such as ``insert-row``, ``update-row`` and ``delete-row``. SQL functions are allowed and are not separately restricted by Datasette permissions. ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) diff --git a/docs/json_api.rst b/docs/json_api.rst index fffc16d7..d502299e 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -531,7 +531,7 @@ The request body must include a ``"sql"`` string. Named SQL parameters can be pr The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query API ` instead. -Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. +Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. SQL functions are allowed and are not separately restricted by Datasette permissions. A successful response includes a message, the SQLite ``rowcount`` and a summary of the operations that were executed: diff --git a/docs/sql_queries.rst b/docs/sql_queries.rst index f593a534..d427ea2b 100644 --- a/docs/sql_queries.rst +++ b/docs/sql_queries.rst @@ -140,7 +140,7 @@ Datasette stores both configured queries and user-created queries in the ``queri Stored queries created by users default to private. Private stored queries can only be viewed, updated or deleted by the actor that created them. Broad ``view-query``, ``update-query`` or ``delete-query`` permission grants still do not allow other actors to access another actor's private stored queries. -Stored queries created by users are untrusted. This means they execute using the permissions of the actor who runs them, as if that actor had pasted the SQL into the regular custom SQL interface or write SQL interface. Read-only stored queries require ``execute-sql``. Writable stored queries require ``execute-write-sql`` plus the relevant table-level write permissions. +Stored queries created by users are untrusted. This means they execute using the permissions of the actor who runs them, as if that actor had pasted the SQL into the regular custom SQL interface or write SQL interface. Read-only stored queries require ``execute-sql``. Writable stored queries require ``execute-write-sql`` plus the relevant table-level write permissions. SQL functions are allowed and are not separately restricted by Datasette permissions. .. _trusted_stored_queries: .. _trusted_saved_queries: diff --git a/tests/test_queries.py b/tests/test_queries.py index 73f8f3cf..9c3ebcc8 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1414,6 +1414,11 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): actor={"id": "root"}, params={"sql": "insert into dogs (name) values (:name)"}, ) + function_response = await ds.client.get( + "/data/-/execute-write/analyze", + actor={"id": "root"}, + params={"sql": "insert into dogs (name) values (upper(:name))"}, + ) read_only_response = await ds.client.get( "/data/-/execute-write/analyze", actor={"id": "root"}, @@ -1438,6 +1443,22 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): ] assert "params" not in data + assert function_response.status_code == 200 + function_data = function_response.json() + assert function_data["ok"] is True + assert function_data["parameters"] == ["name"] + assert function_data["execute_disabled"] is False + assert function_data["analysis_rows"] == [ + { + "operation": "insert", + "database": "data", + "table": "dogs", + "required_permission": "insert-row, update-row, delete-row", + "source": None, + "allowed": True, + } + ] + assert read_only_response.status_code == 200 read_only_data = read_only_response.json() assert read_only_data["ok"] is False @@ -1970,7 +1991,7 @@ async def test_execute_write_create_table_as_select_requires_view_table_on_sourc @pytest.mark.asyncio -async def test_execute_write_rejects_function_operations(): +async def test_execute_write_allows_function_operations(): ds = Datasette( memory=True, default_deny=True, @@ -1998,17 +2019,65 @@ async def test_execute_write_rejects_function_operations(): await db.execute_write("create table dogs (id integer primary key, name text)") await ds.invoke_startup() - denied_response = await ds.client.post( + response = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, json={"sql": "insert into dogs (name) values (upper('cleo'))"}, ) - assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Unsupported SQL operation: function function" - ] - assert (await db.execute("select name from dogs")).dicts() == [] + assert response.status_code == 200 + assert response.json()["ok"] is True + assert (await db.execute("select name from dogs")).dicts() == [{"name": "CLEO"}] + + +@pytest.mark.asyncio +async def test_untrusted_stored_write_query_allows_function_operations(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "dogs": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("stored_query_function_operation", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + await ds.add_query( + "data", + "insert_dog", + "insert into dogs (name) values (upper(:name))", + is_write=True, + is_trusted=False, + source="user", + owner_id="writer", + ) + + response = await ds.client.post( + "/data/insert_dog?_json=1", + actor={"id": "writer"}, + data={"name": "cleo"}, + ) + + assert response.status_code == 200 + assert response.json()["ok"] is True + assert (await db.execute("select name from dogs")).dicts() == [{"name": "CLEO"}] @pytest.mark.asyncio diff --git a/tests/test_write_sql.py b/tests/test_write_sql.py index cfaf0f53..6d95c3c4 100644 --- a/tests/test_write_sql.py +++ b/tests/test_write_sql.py @@ -50,10 +50,19 @@ def test_decision_for_write_sql_operation_rejects_vacuum(): assert decision.message == "VACUUM is not allowed in user-supplied SQL" -def test_decision_for_write_sql_operation_reports_unsupported_functions(): +def test_decision_for_write_sql_operation_ignores_functions(): decision = decision_for_write_sql_operation( Operation("function", "function", None, None, None, target="upper") ) + assert isinstance(decision, IgnoreWriteSqlOperation) + assert decision.reason == "SQL function" + + +def test_decision_for_write_sql_operation_reports_unsupported_operations(): + decision = decision_for_write_sql_operation( + Operation("unknown", "unknown", None, None, None) + ) + assert isinstance(decision, UnsupportedWriteSqlOperation) - assert decision.message == "Unsupported SQL operation: function function" + assert decision.message == "Unsupported SQL operation: unknown unknown" From b2b20b36c52ea446fb05fe688b636b83d187e6a6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 10:24:40 -0700 Subject: [PATCH 287/299] Document write SQL analyzer restrictions Expand the unreleased changelog with the deny-by-default operation analysis model, SQL function handling, and the VACUUM and virtual/shadow table restrictions for user-supplied write SQL. Clarify the /-/execute-write JSON API documentation with the same restrictions and DDL permission requirements. --- docs/changelog.rst | 2 ++ docs/json_api.rst | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 2ba713ee..a4be98b1 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -24,6 +24,8 @@ Write SQL UI - New "Write to this database" interface at ``//-/execute-write`` for running arbitrary writable SQL against mutable databases. The form extracts named parameters, analyzes the SQL, shows the table operations that will be attempted and links to a newly inserted row when a single-row insert succeeds. (:issue:`2742`) - Added the new :ref:`execute-write-sql ` permission for running arbitrary writable SQL. Execution is also gated by table-level permissions such as :ref:`insert-row `, :ref:`update-row ` and :ref:`delete-row `, and writes to attached databases are rejected. (:issue:`2742`) +- The write SQL analyzer now uses a deny-by-default model for unsupported operations. Reads from source tables require :ref:`view-table ` permission, schema changes require :ref:`create-table `, :ref:`alter-table ` or :ref:`drop-table ` as appropriate, and row mutation statements require the full ``insert-row``, ``update-row`` and ``delete-row`` permission set. SQL functions are allowed and are not separately permission-gated. (:issue:`2748`) +- User-supplied write SQL now rejects ``VACUUM`` and writes to SQLite virtual tables or shadow tables. These restrictions also apply to untrusted stored write queries; trusted configured stored queries continue to skip these filters. (:issue:`2748`) Plugin API changes ~~~~~~~~~~~~~~~~~~ diff --git a/docs/json_api.rst b/docs/json_api.rst index d502299e..db19afc2 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -531,7 +531,9 @@ The request body must include a ``"sql"`` string. Named SQL parameters can be pr The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query API ` instead. -Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. SQL functions are allowed and are not separately restricted by Datasette permissions. +Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. Schema changes require ``create-table``, ``alter-table`` or ``drop-table`` permissions as appropriate. + +Unsupported SQL operations are rejected by default. ``VACUUM`` is not allowed in arbitrary write SQL, and writes to SQLite virtual tables or shadow tables are rejected. SQL functions are allowed and are not separately restricted by Datasette permissions. A successful response includes a message, the SQLite ``rowcount`` and a summary of the operations that were executed: From cbe9594a3dcac1f91a6baa7ac99a138c22a71a8a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 11:00:04 -0700 Subject: [PATCH 288/299] Use SQLiteTableType directly in SQL analysis Remove the redundant SQLTableKind alias from the write SQL analysis model. Operation.table_kind and the analyzer cache now use the SQLite metadata classification type directly, making the source of table-kind values clearer. --- datasette/utils/sql_analysis.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index b5d7ada8..0a3a947c 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -42,7 +42,6 @@ SQLTargetType = Literal[ SQLTableOperation = Literal["read", "insert", "update", "delete"] SQLSchemaOperation = Literal["create", "drop"] SQLSchemaTargetType = Literal["index", "table", "trigger", "view", "virtual-table"] -SQLTableKind = SQLiteTableType @dataclass(frozen=True) @@ -52,7 +51,7 @@ class Operation: database: str | None table: str | None sqlite_schema: str | None - table_kind: SQLTableKind | None = None + table_kind: SQLiteTableType | None = None target: str | None = None columns: tuple[str, ...] = () source: str | None = None @@ -428,7 +427,7 @@ def analyze_sql_tables( ) return sqlite3.SQLITE_OK - table_kind_cache: dict[tuple[str | None, str], SQLTableKind | None] = {} + table_kind_cache: dict[tuple[str | None, str], SQLiteTableType | None] = {} conn.set_authorizer(authorizer) try: @@ -523,7 +522,7 @@ def analyze_sql_tables( return True return False - def table_kind_for(key: OperationKey) -> SQLTableKind | None: + def table_kind_for(key: OperationKey) -> SQLiteTableType | None: if ( key.target_type != "table" or key.operation not in {"read", "insert", "update", "delete"} From 17f45b884b4b4844e9f0cce0fef402e888c690f0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 12:06:57 -0700 Subject: [PATCH 289/299] Clarify ignored write SQL operation tests Split the combined ignored-operation decision test into separate internal-operation and select-statement cases. Assert the decision reason for each case instead of checking the shared base class, so the tests document why those operations are ignored. --- tests/test_write_sql.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/test_write_sql.py b/tests/test_write_sql.py index 6d95c3c4..75d6b6e1 100644 --- a/tests/test_write_sql.py +++ b/tests/test_write_sql.py @@ -4,23 +4,26 @@ from datasette.write_sql import ( RejectWriteSqlOperation, RequireWriteSqlPermissions, UnsupportedWriteSqlOperation, - WriteSqlOperationDecision, decision_for_write_sql_operation, ) -def test_decision_for_write_sql_operation_ignores_internal_and_select_operations(): - internal_decision = decision_for_write_sql_operation( +def test_decision_for_write_sql_operation_ignores_internal_operations(): + decision = decision_for_write_sql_operation( Operation("read", "schema", None, None, "main", internal=True) ) - select_decision = decision_for_write_sql_operation( + + assert isinstance(decision, IgnoreWriteSqlOperation) + assert decision.reason == "internal SQLite operation" + + +def test_decision_for_write_sql_operation_ignores_select_statement_operations(): + decision = decision_for_write_sql_operation( Operation("select", "statement", None, None, None) ) - assert isinstance(internal_decision, IgnoreWriteSqlOperation) - assert isinstance(internal_decision, WriteSqlOperationDecision) - assert isinstance(select_decision, IgnoreWriteSqlOperation) - assert isinstance(select_decision, WriteSqlOperationDecision) + assert isinstance(decision, IgnoreWriteSqlOperation) + assert decision.reason == "select statement" def test_decision_for_write_sql_operation_requires_table_write_permissions(): From 0b7c26c6c8bf4827c02aba9707b1db0eb63aeaa5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 12:09:02 -0700 Subject: [PATCH 290/299] Refactored write decision tests --- tests/test_write_sql.py | 71 ---------------- tests/test_write_sql_operation_decisions.py | 94 +++++++++++++++++++++ 2 files changed, 94 insertions(+), 71 deletions(-) delete mode 100644 tests/test_write_sql.py create mode 100644 tests/test_write_sql_operation_decisions.py diff --git a/tests/test_write_sql.py b/tests/test_write_sql.py deleted file mode 100644 index 75d6b6e1..00000000 --- a/tests/test_write_sql.py +++ /dev/null @@ -1,71 +0,0 @@ -from datasette.utils.sql_analysis import Operation -from datasette.write_sql import ( - IgnoreWriteSqlOperation, - RejectWriteSqlOperation, - RequireWriteSqlPermissions, - UnsupportedWriteSqlOperation, - decision_for_write_sql_operation, -) - - -def test_decision_for_write_sql_operation_ignores_internal_operations(): - decision = decision_for_write_sql_operation( - Operation("read", "schema", None, None, "main", internal=True) - ) - - assert isinstance(decision, IgnoreWriteSqlOperation) - assert decision.reason == "internal SQLite operation" - - -def test_decision_for_write_sql_operation_ignores_select_statement_operations(): - decision = decision_for_write_sql_operation( - Operation("select", "statement", None, None, None) - ) - - assert isinstance(decision, IgnoreWriteSqlOperation) - assert decision.reason == "select statement" - - -def test_decision_for_write_sql_operation_requires_table_write_permissions(): - decision = decision_for_write_sql_operation( - Operation("insert", "table", "data", "dogs", None) - ) - - assert isinstance(decision, RequireWriteSqlPermissions) - assert [permission.action for permission in decision.permissions] == [ - "insert-row", - "update-row", - "delete-row", - ] - assert [str(permission.resource) for permission in decision.permissions] == [ - "data/dogs", - "data/dogs", - "data/dogs", - ] - - -def test_decision_for_write_sql_operation_rejects_vacuum(): - decision = decision_for_write_sql_operation( - Operation("vacuum", "statement", None, None, None) - ) - - assert isinstance(decision, RejectWriteSqlOperation) - assert decision.message == "VACUUM is not allowed in user-supplied SQL" - - -def test_decision_for_write_sql_operation_ignores_functions(): - decision = decision_for_write_sql_operation( - Operation("function", "function", None, None, None, target="upper") - ) - - assert isinstance(decision, IgnoreWriteSqlOperation) - assert decision.reason == "SQL function" - - -def test_decision_for_write_sql_operation_reports_unsupported_operations(): - decision = decision_for_write_sql_operation( - Operation("unknown", "unknown", None, None, None) - ) - - assert isinstance(decision, UnsupportedWriteSqlOperation) - assert decision.message == "Unsupported SQL operation: unknown unknown" diff --git a/tests/test_write_sql_operation_decisions.py b/tests/test_write_sql_operation_decisions.py new file mode 100644 index 00000000..cc19f701 --- /dev/null +++ b/tests/test_write_sql_operation_decisions.py @@ -0,0 +1,94 @@ +import pytest + +from datasette.utils.sql_analysis import Operation +from datasette.write_sql import ( + IgnoreWriteSqlOperation, + RejectWriteSqlOperation, + RequireWriteSqlPermissions, + UnsupportedWriteSqlOperation, + decision_for_write_sql_operation, +) + + +@pytest.mark.parametrize( + ("operation", "reason"), + ( + pytest.param( + Operation("read", "schema", None, None, "main", internal=True), + "internal SQLite operation", + id="internal", + ), + pytest.param( + Operation("select", "statement", None, None, None), + "select statement", + id="select-statement", + ), + pytest.param( + Operation("function", "function", None, None, None, target="upper"), + "SQL function", + id="function", + ), + ), +) +def test_decision_for_write_sql_operation_ignores_operations(operation, reason): + decision = decision_for_write_sql_operation(operation) + + assert isinstance(decision, IgnoreWriteSqlOperation) + assert decision.reason == reason + + +@pytest.mark.parametrize("operation", ("insert", "update")) +def test_decision_for_write_sql_operation_requires_table_write_permissions(operation): + decision = decision_for_write_sql_operation( + Operation(operation, "table", "data", "dogs", None) + ) + + assert isinstance(decision, RequireWriteSqlPermissions) + assert [permission.action for permission in decision.permissions] == [ + "insert-row", + "update-row", + "delete-row", + ] + assert [str(permission.resource) for permission in decision.permissions] == [ + "data/dogs", + "data/dogs", + "data/dogs", + ] + + +@pytest.mark.parametrize( + ("operation", "message"), + ( + pytest.param( + Operation("vacuum", "statement", None, None, None), + "VACUUM is not allowed in user-supplied SQL", + id="vacuum", + ), + pytest.param( + Operation("insert", "table", "data", "docs", None, table_kind="virtual"), + "Writes to virtual tables are not allowed in user-supplied SQL", + id="virtual-table", + ), + pytest.param( + Operation( + "insert", "table", "data", "docs_data", None, table_kind="shadow" + ), + "Writes to shadow tables are not allowed in user-supplied SQL", + id="shadow-table", + ), + ), +) +def test_decision_for_write_sql_operation_rejects_operations(operation, message): + decision = decision_for_write_sql_operation(operation) + + assert isinstance(decision, RejectWriteSqlOperation) + assert decision.message == message + + +def test_decision_for_write_sql_operation_reports_unsupported_operations(): + decision = decision_for_write_sql_operation( + Operation("unknown", "unknown", None, None, None) + ) + + assert isinstance(decision, UnsupportedWriteSqlOperation) + assert decision.message == "Unsupported SQL operation: unknown unknown" From cd838daef4d066e584b047164d8e2a5e96909511 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 15:22:21 -0700 Subject: [PATCH 291/299] Refactor tests a bit --- tests/test_queries.py | 449 +++++++++++++++++++++--------------------- 1 file changed, 225 insertions(+), 224 deletions(-) diff --git a/tests/test_queries.py b/tests/test_queries.py index 9c3ebcc8..216cb211 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1700,8 +1700,22 @@ async def test_execute_write_post_requires_database_and_table_permissions(): assert (await db.execute("select name from dogs")).first()[0] == "Cleo" +@pytest.mark.parametrize( + "database_name, sql", + ( + ( + "execute_write_insert_or_replace", + "insert or replace into users(id, email) values (3, 'b@example.com')", + ), + ( + "execute_write_update_or_replace", + "update or replace users set email = 'b@example.com' where id = 1", + ), + ), + ids=("insert-or-replace", "update-or-replace"), +) @pytest.mark.asyncio -async def test_execute_write_insert_or_replace_requires_delete_row_permission(): +async def test_execute_write_replace_requires_delete_row_permission(database_name, sql): ds = Datasette( memory=True, default_deny=True, @@ -1725,7 +1739,7 @@ async def test_execute_write_insert_or_replace_requires_delete_row_permission(): } }, ) - db = ds.add_memory_database("execute_write_insert_or_replace", name="data") + db = ds.add_memory_database(database_name, name="data") await db.execute_write( "create table users (id integer primary key, email text unique)" ) @@ -1738,64 +1752,7 @@ async def test_execute_write_insert_or_replace_requires_delete_row_permission(): denied_response = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, - json={ - "sql": ( - "insert or replace into users(id, email) " "values (3, 'b@example.com')" - ) - }, - ) - - assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Permission denied: need delete-row on data/users" - ] - assert (await db.execute("select id, email from users order by id")).dicts() == [ - {"id": 1, "email": "a@example.com"}, - {"id": 2, "email": "b@example.com"}, - ] - - -@pytest.mark.asyncio -async def test_execute_write_update_or_replace_requires_delete_row_permission(): - ds = Datasette( - memory=True, - default_deny=True, - config={ - "databases": { - "data": { - "permissions": { - "view-database": {"id": "writer"}, - "execute-write-sql": {"id": "writer"}, - }, - "tables": { - "users": { - "permissions": { - "insert-row": {"id": "writer"}, - "update-row": {"id": "writer"}, - "view-table": {"id": "writer"}, - } - } - }, - } - } - }, - ) - db = ds.add_memory_database("execute_write_update_or_replace", name="data") - await db.execute_write( - "create table users (id integer primary key, email text unique)" - ) - await db.execute_write( - "insert into users (id, email) values " - "(1, 'a@example.com'), (2, 'b@example.com')" - ) - await ds.invoke_startup() - - denied_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "writer"}, - json={ - "sql": "update or replace users set email = 'b@example.com' where id = 1" - }, + json={"sql": sql}, ) assert denied_response.status_code == 403 @@ -2262,74 +2219,71 @@ async def test_trusted_stored_write_query_skips_vacuum_filtering(): assert response.json()["ok"] is True +@pytest.mark.parametrize( + ( + "database_name", + "setup_sqls", + "write_sql", + "expected_error", + "verification_sql", + "expected_count", + ), + ( + ( + "execute_write_virtual_table_control", + ( + "create virtual table docs using fts5(title, body, content='')", + "insert into docs(rowid, title, body) values (1, 'hello', 'world')", + ), + "insert into docs(docs) values('delete-all')", + "Writes to virtual tables are not allowed in user-supplied SQL", + "select count(*) from docs where docs match 'hello'", + 1, + ), + ( + "execute_write_virtual_table_insert", + ("create virtual table docs using fts5(title, body)",), + "insert into docs(rowid, title, body) values (1, 'a', 'b')", + "Writes to virtual tables are not allowed in user-supplied SQL", + "select count(*) from docs", + 0, + ), + ( + "execute_write_shadow_table_insert", + ("create virtual table docs using fts5(title, body)",), + "insert into docs_config(k, v) values ('x', 1)", + "Writes to shadow tables are not allowed in user-supplied SQL", + "select count(*) from docs_config", + 1, + ), + ), + ids=("control-insert", "virtual-table", "shadow-table"), +) @pytest.mark.asyncio -async def test_execute_write_rejects_virtual_table_control_insert(): +async def test_execute_write_rejects_virtual_and_shadow_table_writes( + database_name, + setup_sqls, + write_sql, + expected_error, + verification_sql, + expected_count, +): ds = Datasette(memory=True, default_deny=True) ds.root_enabled = True - db = ds.add_memory_database("execute_write_virtual_table_control", name="data") - await db.execute_write(""" - create virtual table docs using fts5(title, body, content='') - """) - await db.execute_write(""" - insert into docs(rowid, title, body) values (1, 'hello', 'world') - """) + db = ds.add_memory_database(database_name, name="data") + for setup_sql in setup_sqls: + await db.execute_write(setup_sql) await ds.invoke_startup() denied_response = await ds.client.post( "/data/-/execute-write", actor={"id": "root"}, - json={"sql": "insert into docs(docs) values('delete-all')"}, + json={"sql": write_sql}, ) assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Writes to virtual tables are not allowed in user-supplied SQL" - ] - assert ( - await db.execute("select count(*) from docs where docs match 'hello'") - ).first()[0] == 1 - - -@pytest.mark.asyncio -async def test_execute_write_rejects_regular_virtual_table_insert(): - ds = Datasette(memory=True, default_deny=True) - ds.root_enabled = True - db = ds.add_memory_database("execute_write_virtual_table_insert", name="data") - await db.execute_write("create virtual table docs using fts5(title, body)") - await ds.invoke_startup() - - denied_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "root"}, - json={"sql": "insert into docs(rowid, title, body) values (1, 'a', 'b')"}, - ) - - assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Writes to virtual tables are not allowed in user-supplied SQL" - ] - assert (await db.execute("select count(*) from docs")).first()[0] == 0 - - -@pytest.mark.asyncio -async def test_execute_write_rejects_shadow_table_insert(): - ds = Datasette(memory=True, default_deny=True) - ds.root_enabled = True - db = ds.add_memory_database("execute_write_shadow_table_insert", name="data") - await db.execute_write("create virtual table docs using fts5(title, body)") - await ds.invoke_startup() - - denied_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "root"}, - json={"sql": "insert into docs_config(k, v) values ('x', 1)"}, - ) - - assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Writes to shadow tables are not allowed in user-supplied SQL" - ] - assert (await db.execute("select count(*) from docs_config")).first()[0] == 1 + assert denied_response.json()["errors"] == [expected_error] + assert (await db.execute(verification_sql)).first()[0] == expected_count @pytest.mark.asyncio @@ -2482,8 +2436,69 @@ async def test_execute_write_create_table_uses_create_table_permission(): assert not await db.table_exists("should_not_exist") +@pytest.mark.parametrize( + ( + "database_name", + "allowed_actor", + "allowed_sql", + "denied_sql", + "expected_error", + "setup_sqls", + "expected_state", + ), + ( + ( + "execute_write_alter_table", + "alterer", + "alter table dogs add column age integer", + "alter table cats add column age integer", + "Permission denied: need alter-table on data/cats", + (), + "alter-table", + ), + ( + "execute_write_create_index", + "alterer", + "create index idx_dogs_name on dogs(name)", + "create index idx_cats_name on cats(name)", + "Permission denied: need alter-table on data/cats", + (), + "create-index", + ), + ( + "execute_write_drop_index", + "alterer", + "drop index idx_dogs_name", + "drop index idx_cats_name", + "Permission denied: need alter-table on data/cats", + ( + "create index idx_dogs_name on dogs(name)", + "create index idx_cats_name on cats(name)", + ), + "drop-index", + ), + ( + "execute_write_drop_table", + "dropper", + "drop table dogs", + "drop table cats", + "Permission denied: need drop-table on data/cats", + (), + "drop-table", + ), + ), + ids=("alter-table", "create-index", "drop-index", "drop-table"), +) @pytest.mark.asyncio -async def test_execute_write_alter_and_drop_table_use_schema_permissions(): +async def test_execute_write_schema_operations_use_schema_permissions( + database_name, + allowed_actor, + allowed_sql, + denied_sql, + expected_error, + setup_sqls, + expected_state, +): ds = Datasette( memory=True, default_deny=True, @@ -2513,73 +2528,53 @@ async def test_execute_write_alter_and_drop_table_use_schema_permissions(): }, }, ) - db = ds.add_memory_database("execute_write_alter_drop_table", name="data") + db = ds.add_memory_database(database_name, name="data") await db.execute_write("create table dogs (id integer primary key, name text)") await db.execute_write("create table cats (id integer primary key, name text)") + for setup_sql in setup_sqls: + await db.execute_write(setup_sql) await ds.invoke_startup() - alter_allowed_response = await ds.client.post( + async def index_exists(index_name): + row = ( + await db.execute( + "select 1 from sqlite_master where type = 'index' and name = ?", + [index_name], + ) + ).first() + return row is not None + + allowed_response = await ds.client.post( "/data/-/execute-write", - actor={"id": "alterer"}, - json={"sql": "alter table dogs add column age integer"}, + actor={"id": allowed_actor}, + json={"sql": allowed_sql}, ) - alter_row_permission_response = await ds.client.post( + denied_response = await ds.client.post( "/data/-/execute-write", actor={"id": "row-writer"}, - json={"sql": "alter table cats add column age integer"}, + json={"sql": denied_sql}, ) - assert alter_allowed_response.status_code == 200 - assert "age" in [column.name for column in await db.table_column_details("dogs")] - assert alter_row_permission_response.status_code == 403 - assert alter_row_permission_response.json()["errors"] == [ - "Permission denied: need alter-table on data/cats" - ] - assert "age" not in [ - column.name for column in await db.table_column_details("cats") - ] + assert allowed_response.status_code == 200 + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [expected_error] - create_index_allowed_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "alterer"}, - json={"sql": "create index idx_dogs_name on dogs(name)"}, - ) - create_index_row_permission_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "row-writer"}, - json={"sql": "create index idx_cats_name on cats(name)"}, - ) - drop_index_allowed_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "alterer"}, - json={"sql": "drop index idx_dogs_name"}, - ) - - assert create_index_allowed_response.status_code == 200 - assert create_index_row_permission_response.status_code == 403 - assert create_index_row_permission_response.json()["errors"] == [ - "Permission denied: need alter-table on data/cats" - ] - assert drop_index_allowed_response.status_code == 200 - - drop_allowed_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "dropper"}, - json={"sql": "drop table dogs"}, - ) - drop_row_permission_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "row-writer"}, - json={"sql": "drop table cats"}, - ) - - assert drop_allowed_response.status_code == 200 - assert not await db.table_exists("dogs") - assert drop_row_permission_response.status_code == 403 - assert drop_row_permission_response.json()["errors"] == [ - "Permission denied: need drop-table on data/cats" - ] - assert await db.table_exists("cats") + if expected_state == "alter-table": + assert "age" in [ + column.name for column in await db.table_column_details("dogs") + ] + assert "age" not in [ + column.name for column in await db.table_column_details("cats") + ] + elif expected_state == "create-index": + assert await index_exists("idx_dogs_name") + assert not await index_exists("idx_cats_name") + elif expected_state == "drop-index": + assert not await index_exists("idx_dogs_name") + assert await index_exists("idx_cats_name") + elif expected_state == "drop-table": + assert not await db.table_exists("dogs") + assert await db.table_exists("cats") @pytest.mark.asyncio @@ -2644,8 +2639,9 @@ async def test_execute_write_post_rejects_read_only_sql(): ] +@pytest.mark.parametrize("action", ("view-query", "update-query", "delete-query")) @pytest.mark.asyncio -async def test_query_owner_gets_update_delete_and_writable_view_defaults(): +async def test_query_owner_gets_update_delete_and_writable_view_defaults(action): ds = Datasette(memory=True, default_deny=True) ds.add_memory_database("query_owner_defaults", name="data") await ds.invoke_startup() @@ -2658,21 +2654,35 @@ async def test_query_owner_gets_update_delete_and_writable_view_defaults(): owner_id="alice", ) - for action in ("view-query", "update-query", "delete-query"): - assert await ds.allowed( - action=action, - resource=QueryResource("data", "insert_dog"), - actor={"id": "alice"}, - ) - assert not await ds.allowed( - action=action, - resource=QueryResource("data", "insert_dog"), - actor={"id": "bob"}, - ) + assert await ds.allowed( + action=action, + resource=QueryResource("data", "insert_dog"), + actor={"id": "alice"}, + ) + assert not await ds.allowed( + action=action, + resource=QueryResource("data", "insert_dog"), + actor={"id": "bob"}, + ) +@pytest.mark.parametrize( + "action, path_suffix, request_json, expected_public_title", + ( + ( + "update-query", + "-/update", + {"update": {"title": "Bob can edit public queries"}}, + "Bob can edit public queries", + ), + ("delete-query", "-/delete", {}, None), + ), + ids=("update-query", "delete-query"), +) @pytest.mark.asyncio -async def test_private_query_restricts_broad_update_delete_permissions(): +async def test_private_query_restricts_broad_update_delete_permissions( + action, path_suffix, request_json, expected_public_title +): ds = Datasette( memory=True, default_deny=True, @@ -2706,50 +2716,41 @@ async def test_private_query_restricts_broad_update_delete_permissions(): owner_id="alice", ) - for action in ("update-query", "delete-query"): - assert await ds.allowed( - action=action, - resource=QueryResource("data", "alice_private"), - actor={"id": "alice"}, - ) - assert not await ds.allowed( - action=action, - resource=QueryResource("data", "alice_private"), - actor={"id": "bob"}, - ) - assert await ds.allowed( - action=action, - resource=QueryResource("data", "alice_public"), - actor={"id": "bob"}, - ) - - private_update_response = await ds.client.post( - "/data/alice_private/-/update", - actor={"id": "bob"}, - json={"update": {"title": "Nope"}}, + assert await ds.allowed( + action=action, + resource=QueryResource("data", "alice_private"), + actor={"id": "alice"}, ) - private_delete_response = await ds.client.post( - "/data/alice_private/-/delete", + assert not await ds.allowed( + action=action, + resource=QueryResource("data", "alice_private"), actor={"id": "bob"}, - json={}, ) - public_update_response = await ds.client.post( - "/data/alice_public/-/update", + assert await ds.allowed( + action=action, + resource=QueryResource("data", "alice_public"), actor={"id": "bob"}, - json={"update": {"title": "Bob can edit public queries"}}, - ) - public_delete_response = await ds.client.post( - "/data/alice_public/-/delete", - actor={"id": "bob"}, - json={}, ) - assert private_update_response.status_code == 403 - assert private_delete_response.status_code == 403 - assert public_update_response.status_code == 200 - assert public_delete_response.status_code == 200 + private_response = await ds.client.post( + "/data/alice_private/{}".format(path_suffix), + actor={"id": "bob"}, + json=request_json, + ) + public_response = await ds.client.post( + "/data/alice_public/{}".format(path_suffix), + actor={"id": "bob"}, + json=request_json, + ) + + assert private_response.status_code == 403 + assert public_response.status_code == 200 assert await ds.get_query("data", "alice_private") is not None - assert await ds.get_query("data", "alice_public") is None + public_query = await ds.get_query("data", "alice_public") + if expected_public_title is None: + assert public_query is None + else: + assert public_query.title == expected_public_title @pytest.mark.asyncio From b6e9b189905f6a03136e5998fdf39e1944a1e2a8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 15:37:48 -0700 Subject: [PATCH 292/299] datasette.yml can no longer set a query to private Private means it has an owner, and the config does not let you say who the owner is - plus configured queries should not be possible to edit or delete in the UI so having an owner makes even less sense. You can still make configured queries visible to specific people using regular view-query permissions. --- datasette/stored_queries.py | 1 - tests/test_queries.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index b6ac49b8..a6123daa 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -109,7 +109,6 @@ async def save_queries_from_config(datasette: Any) -> None: fragment=query_config.get("fragment"), parameters=query_config.get("params"), is_write=bool(query_config.get("write")), - is_private=bool(query_config.get("is_private")), is_trusted=bool(query_config.get("is_trusted", True)), source="config", on_success_message=query_config.get("on_success_message"), diff --git a/tests/test_queries.py b/tests/test_queries.py index 216cb211..2aa5142b 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -191,6 +191,8 @@ async def test_config_queries_imported_to_internal_table(): "title": "Configured query", "description_html": "

        Configured HTML

        ", "params": ["name"], + # Configured queries are always public; this is ignored. + "is_private": True, "on_success_message_sql": "select 'Hello ' || :name", } } From 74324cb8492be8aa8597e58fb6f690158128e6fc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 15:46:27 -0700 Subject: [PATCH 293/299] Improved docs for user-facing SQL query pages - /database-name/-/execute-write - /-/queries --- docs/authentication.rst | 4 ++-- docs/pages.rst | 27 +++++++++++++++++++++++++++ docs/sql_queries.rst | 2 ++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/docs/authentication.rst b/docs/authentication.rst index a0891900..5d831da0 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1413,7 +1413,7 @@ Actor is allowed to drop a database table. execute-sql ----------- -Actor is allowed to run arbitrary read-only SQL queries against a specific database, e.g. https://latest.datasette.io/fixtures/-/query?sql=select+100 +Actor is allowed to run arbitrary read-only SQL queries against a specific database using the :ref:`custom SQL query page `, e.g. https://latest.datasette.io/fixtures/-/query?sql=select+100 ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) @@ -1425,7 +1425,7 @@ See also :ref:`the default_allow_sql setting `. execute-write-sql ----------------- -Actor is allowed to run arbitrary writable SQL queries against a specific database, subject to table-level write permissions such as ``insert-row``, ``update-row`` and ``delete-row``. SQL functions are allowed and are not separately restricted by Datasette permissions. +Actor is allowed to run arbitrary writable SQL queries against a specific database using the :ref:`write SQL queries page `, subject to table-level write permissions such as ``insert-row``, ``update-row`` and ``delete-row``. SQL functions are allowed and are not separately restricted by Datasette permissions. ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) diff --git a/docs/pages.rst b/docs/pages.rst index e57c15e6..a8ff7c37 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -62,6 +62,11 @@ The following tables are hidden by default: Queries ======= +.. _pages_custom_sql_queries: + +Custom SQL queries +------------------ + The ``/database-name/-/query`` page can be used to execute an arbitrary SQL query against that database, if the :ref:`actions_execute_sql` permission is enabled. This query is passed as the ``?sql=`` query string parameter. This means you can link directly to a query by constructing the following URL: @@ -72,6 +77,28 @@ Each configured :ref:`stored query ` has its own page, at ``/dat In both cases adding a ``.json`` extension to the URL will return the results as JSON. +.. _pages_execute_write: + +Write SQL queries +----------------- + +The ``/database-name/-/execute-write`` page can be used to execute SQL statements that write to a mutable database, if the :ref:`actions_execute_write_sql` permission is enabled. + +This page extracts named parameters from the SQL, shows the tables that will be affected and lists the permissions required before the query can be executed. It also includes templates for common ``INSERT``, ``UPDATE`` and ``DELETE`` statements. + +Datasette checks additional permissions based on the operations in the SQL. Row changes require the relevant table-level permissions such as :ref:`actions_insert_row`, :ref:`actions_update_row` and :ref:`actions_delete_row`; reads from source tables require :ref:`actions_view_table`; and schema changes require permissions such as :ref:`actions_create_table`, :ref:`actions_alter_table` or :ref:`actions_drop_table`. + +Use the :ref:`ExecuteWriteView` JSON API to execute writable SQL programmatically. + +.. _pages_stored_query_browser: + +Stored query browsers +--------------------- + +The ``/-/queries`` page lists stored queries across every database visible to the current actor. The ``/database-name/-/queries`` page lists stored queries for a single database. + +These pages support search, pagination and filters for read-only or writable queries and private or public queries. Adding a ``.json`` extension to either URL returns the same list as JSON. + .. _TableView: Table diff --git a/docs/sql_queries.rst b/docs/sql_queries.rst index d427ea2b..c0ba67f0 100644 --- a/docs/sql_queries.rst +++ b/docs/sql_queries.rst @@ -7,6 +7,8 @@ Datasette treats SQLite database files as read-only and immutable. This means it The easiest way to execute custom SQL against Datasette is through the web UI. The database index page includes a SQL editor that lets you run any SELECT query you like. You can also construct queries using the filter interface on the tables page, then click "View and edit SQL" to open that query in the custom SQL editor. +For mutable databases, actors with the appropriate permissions can use the :ref:`write SQL page ` to execute SQL statements that insert, update or delete rows. + Note that this interface is only available if the :ref:`actions_execute_sql` permission is allowed. See :ref:`authentication_permissions_execute_sql`. Any Datasette SQL query is reflected in the URL of the page, allowing you to bookmark them, share them with others and navigate through previous queries using your browser back button. From 6a998610eef6e69d439a654dd31087023d285452 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 15:52:51 -0700 Subject: [PATCH 294/299] datasette inspect now counts 10,000+ tables correctly (#2752) Closes #2712 Refs https://github.com/simonw/datasette/pull/2721#issuecomment-4568966383 --- datasette/cli.py | 7 ++++--- docs/changelog.rst | 1 + tests/test_cli.py | 18 +++++++++++++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/datasette/cli.py b/datasette/cli.py index 93aa22ef..90a33e80 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -21,6 +21,7 @@ from .app import ( SQLITE_LIMIT_ATTACHED, pm, ) +from .inspect import inspect_tables from .utils import ( LoadExtension, StartupError, @@ -154,14 +155,14 @@ async def inspect_(files, sqlite_extensions): app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions) data = {} for name, database in app.databases.items(): - counts = await database.table_counts(limit=3600 * 1000) + tables = await database.execute_fn(lambda conn: inspect_tables(conn, {})) data[name] = { "hash": database.hash, "size": database.size, "file": database.path, "tables": { - table_name: {"count": table_count} - for table_name, table_count in counts.items() + table_name: {"count": table["count"]} + for table_name, table in tables.items() }, } return data diff --git a/docs/changelog.rst b/docs/changelog.rst index a4be98b1..3882cc12 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -37,6 +37,7 @@ Bug fixes ~~~~~~~~~ - Fixed a bug where visiting ``//-/query`` without a ``?sql=`` parameter returned a 500 error. (:issue:`2743`) +- The ``datasette inspect`` command now correctly records row counts for tables with more than 10,000 rows. (:issue:`2712`) .. _v1_0_a30: diff --git a/tests/test_cli.py b/tests/test_cli.py index 1d3a2b28..f86d6909 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -35,12 +35,28 @@ def test_inspect_cli(app_client): assert expected_count == database["tables"][table_name]["count"] +def test_inspect_cli_counts_all_rows(tmp_path): + db_path = tmp_path / "big.db" + conn = sqlite3.connect(db_path) + with conn: + conn.execute("create table t (id integer primary key)") + conn.executemany("insert into t (id) values (?)", ((i,) for i in range(10002))) + conn.close() + + runner = CliRunner() + result = runner.invoke(cli, ["inspect", str(db_path)]) + assert result.exit_code == 0, result.output + data = json.loads(result.output) + + assert data["big"]["tables"]["t"]["count"] == 10002 + + def test_inspect_cli_writes_to_file(app_client): runner = CliRunner() result = runner.invoke( cli, ["inspect", "fixtures.db", "--inspect-file", "foo.json"] ) - assert 0 == result.exit_code, result.output + assert result.exit_code == 0, result.output with open("foo.json") as fp: data = json.load(fp) assert ["fixtures"] == list(data.keys()) From e5b6166fa35558920342e74f5ec13078957e87bf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 16:19:39 -0700 Subject: [PATCH 295/299] Nicer UI around Execute Write SQL denied Refs https://github.com/simonw/datasette/issues/2753#issuecomment-4569117665 --- datasette/templates/execute_write.html | 82 ++++++++++++++++++++------ datasette/views/execute_write.py | 17 +++--- datasette/views/query_helpers.py | 20 +++++-- tests/test_queries.py | 75 ++++++++++++++++++++++- 4 files changed, 160 insertions(+), 34 deletions(-) diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 6b626f8d..ee251111 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -40,6 +40,26 @@ border-radius: 0.25rem; min-width: 13rem; } +.execute-write-submit-row { + align-items: center; + display: flex; + flex-wrap: wrap; + gap: 0.45rem 0.75rem; +} +.execute-write-submit-row [hidden] { + display: none; +} +form.sql.core input[data-execute-write-submit]:disabled { + background: #d0d7de; + border-color: #b6c0cc; + color: #5f6975; + cursor: not-allowed; + opacity: 1; +} +.execute-write-disabled-reason { + color: #4f5b6d; + font-size: 0.85rem; +} {% include "_execute_write_analysis_styles.html" %} {% include "_sql_parameter_styles.html" %} @@ -119,9 +139,10 @@ {% endif %}
        -

        - - {% if save_query_base_url %}Save this query{% endif %} +

        + + {{ execute_disabled_reason or "" }} + {% if save_query_url %}Save this query{% endif %}

        @@ -143,25 +164,55 @@ window.addEventListener("DOMContentLoaded", () => { const submitButton = form ? form.querySelector("[data-execute-write-submit]") : null; - const saveQueryLink = form + const submitDisabledReason = form + ? form.querySelector("[data-execute-write-disabled-reason]") + : null; + const submitRow = form + ? form.querySelector(".execute-write-submit-row") + : null; + let saveQueryLink = form ? form.querySelector("[data-save-query-link]") : null; + function updateSubmitState(data) { + if (submitButton) { + submitButton.disabled = data.execute_disabled; + } + if (!submitDisabledReason) { + return; + } + const reason = data.execute_disabled_reason || ""; + submitDisabledReason.textContent = reason; + submitDisabledReason.hidden = !reason; + } + function updateSaveQueryLink(data) { - if (!saveQueryLink) { + if (!submitRow || !submitRow.dataset.saveQueryBaseUrl) { return; } const sql = window.editor ? window.editor.state.doc.toString() : executeWriteSqlInput.value; if (!sql.trim() || !data.ok || data.execute_disabled) { - saveQueryLink.hidden = true; + if (saveQueryLink) { + saveQueryLink.remove(); + saveQueryLink = null; + } return; } - const url = new URL(saveQueryLink.dataset.saveQueryBaseUrl, window.location.href); + if (!saveQueryLink) { + saveQueryLink = document.createElement("a"); + saveQueryLink.className = "save-query"; + saveQueryLink.setAttribute("data-save-query-link", ""); + saveQueryLink.textContent = "Save this query"; + submitRow.appendChild(saveQueryLink); + } + const url = new URL( + submitRow.dataset.saveQueryBaseUrl, + window.location.href + ); url.searchParams.set("sql", sql); saveQueryLink.href = url.pathname + url.search + url.hash; - saveQueryLink.hidden = false; } window.datasetteSqlParameters.setupSqlParameterRefresh({ @@ -170,9 +221,7 @@ window.addEventListener("DOMContentLoaded", () => { allowExpand: true, onData(data) { window.datasetteSqlAnalysis.renderAnalysis(analysisSection, data); - if (submitButton) { - submitButton.disabled = data.execute_disabled; - } + updateSubmitState(data); updateSaveQueryLink(data); }, onError(error) { @@ -180,12 +229,11 @@ window.addEventListener("DOMContentLoaded", () => { analysis_error: error.message, analysis_rows: [], }); - if (submitButton) { - submitButton.disabled = true; - } - if (saveQueryLink) { - saveQueryLink.hidden = true; - } + updateSubmitState({ + execute_disabled: true, + execute_disabled_reason: error.message, + }); + updateSaveQueryLink({ ok: false, execute_disabled: true }); }, }); }); diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index 57c4d78e..7b693978 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -14,6 +14,7 @@ from .query_helpers import ( _coerce_execute_write_payload, _derived_query_parameters, _execute_write_analysis_data, + _execute_write_disabled_reason, _inserted_row_url, _json_or_form_payload, _prepare_execute_write, @@ -80,13 +81,12 @@ class ExecuteWriteView(BaseView): ) save_query_base_url = None save_query_url = None + execute_disabled_reason = _execute_write_disabled_reason( + sql, analysis_error, analysis_rows + ) if allow_save_query: save_query_base_url = self.ds.urls.database(db.name) + "/-/queries/store" - if ( - sql - and analysis_error is None - and not any(row["allowed"] is False for row in analysis_rows) - ): + if not execute_disabled_reason: save_query_url = save_query_base_url + "?" + urlencode({"sql": sql}) response = await self.render( @@ -103,11 +103,8 @@ class ExecuteWriteView(BaseView): "execution_message": execution_message, "execution_links": execution_links, "execution_ok": execution_ok, - "execute_disabled": bool( - (not sql) - or analysis_error - or any(row["allowed"] is False for row in analysis_rows) - ), + "execute_disabled": bool(execute_disabled_reason), + "execute_disabled_reason": execute_disabled_reason, "table_columns": table_columns, "write_template_tables": write_template_tables, "save_query_url": save_query_url, diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 712832e8..f30a30bc 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -268,6 +268,16 @@ async def _analysis_rows_with_permissions( return rows +def _execute_write_disabled_reason(sql, analysis_error, analysis_rows): + if not (sql and sql.strip()): + return "Enter writable SQL before executing." + if analysis_error: + return analysis_error + if any(row.get("allowed") is False for row in analysis_rows): + return "You do not have permission for every operation listed above." + return None + + def _coerce_execute_write_payload(data, is_json): if not isinstance(data, dict): raise QueryValidationError("JSON must be a dictionary") @@ -358,16 +368,16 @@ async def _execute_write_analysis_data(datasette, db, sql, actor): ) except (QueryValidationError, sqlite3.DatabaseError) as ex: analysis_error = getattr(ex, "message", str(ex)) + execute_disabled_reason = _execute_write_disabled_reason( + sql, analysis_error, analysis_rows + ) return { "ok": analysis_error is None, "parameters": parameter_names, "analysis_error": analysis_error, "analysis_rows": analysis_rows, - "execute_disabled": bool( - (not sql) - or analysis_error - or any(row["allowed"] is False for row in analysis_rows) - ), + "execute_disabled": bool(execute_disabled_reason), + "execute_disabled_reason": execute_disabled_reason, } diff --git a/tests/test_queries.py b/tests/test_queries.py index 2aa5142b..87ecacde 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1374,6 +1374,10 @@ async def test_execute_write_get_prepopulates_without_executing(): assert 'addEventListener("paste"' in response.text assert "setupSqlParameterRefresh" in response.text assert "datasetteSqlAnalysis.renderAnalysis" in response.text + assert "input[data-execute-write-submit]:disabled" in response.text + assert ( + 'data-execute-write-disabled-reason aria-live="polite" hidden' in response.text + ) assert '' in response.text assert '' in response.text assert "" in response.text @@ -1390,7 +1394,9 @@ async def test_execute_write_get_prepopulates_without_executing(): ) assert '' in empty_response.text assert 'executeWriteSqlInput.value = "\\n\\n\\n";' in empty_response.text - assert "hidden>Save this query" in empty_response.text + assert "Enter writable SQL before executing." in empty_response.text + assert 'data-save-query-base-url="/data/-/queries/store"' in empty_response.text + assert 'Save this query" in read_only_response.text + assert ( + '' + ) in read_only_response.text + assert 'data-save-query-base-url="/data/-/queries/store"' in read_only_response.text + assert '' + ) in response.text + assert ( + '' + "You do not have permission for every operation listed above." + ) in response.text + assert 'no' in response.text + assert 'data-save-query-base-url="/data/-/queries/store"' in response.text + assert ' Date: Thu, 28 May 2026 16:20:28 -0700 Subject: [PATCH 296/299] //-/query.json and changelog docs --- docs/changelog.rst | 3 ++- docs/json_api.rst | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 3882cc12..3501aa60 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -17,12 +17,13 @@ Stored queries - Users with :ref:`store-query ` and :ref:`execute-sql ` permission can create stored queries from the SQL query page or the new ``GET //-/queries/store`` form. (:issue:`2735`) - The database page now shows a count and preview of stored queries, capped at five, and links to new paginated query browsers at ``/-/queries`` and ``//-/queries``. Those browsers support search. (:issue:`2735`) - Stored queries created by users default to private and untrusted. Private stored queries can only be viewed, updated or deleted by their owner, even if another actor has broad ``view-query``, ``update-query`` or ``delete-query`` permission. Untrusted stored queries execute using the permissions of the actor running them. See :ref:`stored_queries` and :ref:`trusted_stored_queries` for details. (:issue:`2735`) +- Configured queries from ``datasette.yaml`` are trusted by default, so they can execute with ``view-query`` permission alone. They can opt out of that behavior using ``is_trusted: false`` but cannot be made private; private queries are only available for user-created stored queries. (:issue:`2735`) - New ``store-query``, ``update-query`` and ``delete-query`` permissions, plus updated semantics for :ref:`view-query `. Trusted stored queries can still execute with ``view-query`` alone; untrusted read queries also require :ref:`execute-sql ` and untrusted writable queries require :ref:`execute-write-sql ` plus the relevant table-level write permissions. (:issue:`2735`) Write SQL UI ~~~~~~~~~~~~ -- New "Write to this database" interface at ``//-/execute-write`` for running arbitrary writable SQL against mutable databases. The form extracts named parameters, analyzes the SQL, shows the table operations that will be attempted and links to a newly inserted row when a single-row insert succeeds. (:issue:`2742`) +- New "Write to this database" interface at ``//-/execute-write`` for running arbitrary writable SQL against mutable databases. The form extracts named parameters, analyzes the SQL, shows the table operations that will be attempted, includes starter templates for ``INSERT``, ``UPDATE`` and ``DELETE`` statements and links to a newly inserted row when a single-row insert succeeds. This is also available as a :ref:`JSON API `. (:issue:`2742`) - Added the new :ref:`execute-write-sql ` permission for running arbitrary writable SQL. Execution is also gated by table-level permissions such as :ref:`insert-row `, :ref:`update-row ` and :ref:`delete-row `, and writes to attached databases are rejected. (:issue:`2742`) - The write SQL analyzer now uses a deny-by-default model for unsupported operations. Reads from source tables require :ref:`view-table ` permission, schema changes require :ref:`create-table `, :ref:`alter-table ` or :ref:`drop-table ` as appropriate, and row mutation statements require the full ``insert-row``, ``update-row`` and ``delete-row`` permission set. SQL functions are allowed and are not separately permission-gated. (:issue:`2748`) - User-supplied write SQL now rejects ``VACUUM`` and writes to SQLite virtual tables or shadow tables. These restrictions also apply to untrusted stored write queries; trusted configured stored queries continue to skip these filters. (:issue:`2748`) diff --git a/docs/json_api.rst b/docs/json_api.rst index db19afc2..4bd76717 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -50,6 +50,25 @@ The ``"truncated"`` key lets you know if the query was truncated. This can happe For table pages, an additional key ``"next"`` may be present. This indicates that the next page in the pagination set can be retrieved using ``?_next=VALUE``. +.. _json_api_custom_sql: + +Executing custom SQL +-------------------- + +Actors with the :ref:`actions_execute_sql` permission can execute read-only SQL against a database using ``/-/query.json``: + +:: + + GET //-/query.json?sql=select+*+from+dogs + +Values for named SQL parameters can be provided as additional query string parameters: + +:: + + GET //-/query.json?sql=select+*+from+dogs+where+name=:name&name=Cleo + +The response uses the same default representation described above. + .. _json_api_shapes: Different shapes @@ -529,7 +548,7 @@ The request body must include a ``"sql"`` string. Named SQL parameters can be pr } } -The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query API ` instead. +The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query JSON API ` instead. Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. Schema changes require ``create-table``, ``alter-table`` or ``drop-table`` permissions as appropriate. From 9e377e8b90b27ae21d3263d0bfe8d3808e2c6133 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 20:01:48 -0700 Subject: [PATCH 297/299] Only show valid SQL write templates Closes #2753 Demo: https://github.com/simonw/datasette/issues/2753#issuecomment-4570071413 --- datasette/templates/execute_write.html | 130 ++++------------- datasette/views/execute_write.py | 192 ++++++++++++++++++++++++- tests/test_queries.py | 117 ++++++++++++++- 3 files changed, 331 insertions(+), 108 deletions(-) diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index ee251111..394261de 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -89,16 +89,18 @@ form.sql.core input[data-execute-write-submit]:disabled {

        - - - + {% for operation in write_template_operations %} + + {% endfor %}

        + {% else %} +

        You don't currently have permission to insert, edit or delete from any tables.

        {% endif %}

        @@ -242,119 +244,43 @@ window.addEventListener("DOMContentLoaded", () => { {% if write_template_tables %} {% endif %} diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index 7b693978..cff20847 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -1,3 +1,4 @@ +import re from urllib.parse import urlencode from datasette.resources import DatabaseResource @@ -22,6 +23,187 @@ from .query_helpers import ( _wants_json, ) +WRITE_TEMPLATE_LABELS = { + "insert": "Insert row", + "update": "Update rows", + "delete": "Delete rows", +} +WRITE_TEMPLATE_OPERATIONS = tuple(WRITE_TEMPLATE_LABELS) + + +def _parameter_names(columns): + seen = set() + names = {} + for column in columns: + base = re.sub(r"[^a-z0-9_]+", "_", column.lower()) + base = base.strip("_") or "value" + if base[0].isdigit(): + base = "p_{}".format(base) + name = base + index = 2 + while name in seen: + name = "{}_{}".format(base, index) + index += 1 + seen.add(name) + names[column] = name + return names + + +def _quote_identifier(identifier): + return '"{}"'.format(identifier.replace('"', '""')) + + +def _preferred_where_column(table, columns): + lower_table_id = "{}_id".format(table.lower()) + return ( + next((column for column in columns if column.lower() == "id"), None) + or next( + (column for column in columns if column.lower() == lower_table_id), None + ) + or columns[0] + ) + + +def _auto_incrementing_primary_key(columns): + primary_keys = [column for column in columns if column.is_pk] + if len(primary_keys) != 1: + return None + primary_key = primary_keys[0] + if primary_key.type and primary_key.type.lower() == "integer": + return primary_key.name + return None + + +def _insert_template_sql(table, columns): + column_names = [column.name for column in columns] + auto_pk = _auto_incrementing_primary_key(columns) + insert_columns = [column for column in column_names if column != auto_pk] + if not insert_columns: + return "insert into {}\ndefault values".format(_quote_identifier(table)) + names = _parameter_names(insert_columns) + return "\n".join( + ( + "insert into {} (".format(_quote_identifier(table)), + ",\n".join( + " {}".format(_quote_identifier(column)) for column in insert_columns + ), + ")", + "values (", + ",\n".join(" :{}".format(names[column]) for column in insert_columns), + ")", + ) + ) + + +def _update_template_sql(table, columns): + column_names = [column.name for column in columns] + names = _parameter_names(column_names) + where_column = _preferred_where_column(table, column_names) + set_columns = [column for column in column_names if column != where_column] + if not set_columns: + return "\n".join( + ( + "update {}".format(_quote_identifier(table)), + "set {} = :new_{}".format( + _quote_identifier(where_column), names[where_column] + ), + "where {} = :{}".format( + _quote_identifier(where_column), names[where_column] + ), + ) + ) + return "\n".join( + ( + "update {}".format(_quote_identifier(table)), + "set " + + ",\n".join( + "{}{} = :{}".format( + " " if index else "", + _quote_identifier(column), + names[column], + ) + for index, column in enumerate(set_columns) + ), + "where {} = :{}".format( + _quote_identifier(where_column), names[where_column] + ), + ) + ) + + +def _delete_template_sql(table, columns): + column_names = [column.name for column in columns] + names = _parameter_names(column_names) + where_column = _preferred_where_column(table, column_names) + return "\n".join( + ( + "delete from {}".format(_quote_identifier(table)), + "where {} = :{}".format( + _quote_identifier(where_column), names[where_column] + ), + ) + ) + + +def _template_sqls_for_table(table, columns): + return { + "insert": _insert_template_sql(table, columns), + "update": _update_template_sql(table, columns), + "delete": _delete_template_sql(table, columns), + } + + +async def _template_sql_allowed(datasette, db, sql, actor): + params = {parameter: "" for parameter in _derived_query_parameters(sql)} + try: + analysis = await db.analyze_sql(sql, params) + except sqlite3.DatabaseError: + return False + if not _analysis_is_write(analysis): + return False + analysis_rows = await _analysis_rows_with_permissions(datasette, analysis, actor) + return _execute_write_disabled_reason(sql, None, analysis_rows) is None + + +async def _write_template_tables( + datasette, db, table_columns, hidden_table_names, actor +): + write_template_tables = {} + for table in table_columns: + if table in hidden_table_names or not table_columns[table]: + continue + column_details = [ + column + for column in await db.table_column_details(table) + if not column.hidden + ] + if not column_details: + continue + templates = {} + for operation, sql in _template_sqls_for_table(table, column_details).items(): + if await _template_sql_allowed(datasette, db, sql, actor): + templates[operation] = sql + if templates: + write_template_tables[table] = { + "templates": templates, + } + return write_template_tables + + +def _write_template_operations(write_template_tables): + operations = [] + for operation in WRITE_TEMPLATE_OPERATIONS: + if any( + operation in table["templates"] for table in write_template_tables.values() + ): + operations.append( + { + "name": operation, + "label": WRITE_TEMPLATE_LABELS[operation], + } + ) + return operations + class ExecuteWriteView(BaseView): name = "execute-write" @@ -47,11 +229,10 @@ class ExecuteWriteView(BaseView): analysis_rows = [] table_columns = await _table_columns(self.ds, db.name) hidden_table_names = set(await db.hidden_table_names()) - write_template_tables = { - table: columns - for table, columns in table_columns.items() - if columns and table not in hidden_table_names - } + write_template_tables = await _write_template_tables( + self.ds, db, table_columns, hidden_table_names, request.actor + ) + write_template_operations = _write_template_operations(write_template_tables) if sql and analysis_error is None: try: parameter_names = _derived_query_parameters(sql) @@ -107,6 +288,7 @@ class ExecuteWriteView(BaseView): "execute_disabled_reason": execute_disabled_reason, "table_columns": table_columns, "write_template_tables": write_template_tables, + "write_template_operations": write_template_operations, "save_query_url": save_query_url, "save_query_base_url": save_query_base_url, }, diff --git a/tests/test_queries.py b/tests/test_queries.py index 87ecacde..89167a1d 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1,4 +1,6 @@ import json +import re +from html import unescape import pytest @@ -8,6 +10,19 @@ from datasette.stored_queries import StoredQuery, StoredQueryPage from datasette.utils.asgi import Forbidden +def _template_option_attributes(html, table): + match = re.search(r'' in response.text + assert '
        Required permissioninsert