From 2fd98f4422b40a1b74715b6e9ceee42ed5a2e551 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 08:46:42 -0800 Subject: [PATCH 01/42] WIP restrictions SQL mechanism, refs #2572 --- datasette/default_permissions.py | 120 +++++++++------------- datasette/permissions.py | 9 +- datasette/utils/actions_sql.py | 122 +++++++++++++++++------ datasette/utils/permissions.py | 149 +++++++++++++++++++++++++++- datasette/views/special.py | 2 +- tests/test_actor_restriction_bug.py | 133 +++++++++++++++++++++++++ tests/test_plugins.py | 10 ++ 7 files changed, 433 insertions(+), 112 deletions(-) create mode 100644 tests/test_actor_restriction_bug.py diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 9afb088e..5642cdfe 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -19,7 +19,7 @@ async def actor_restrictions_sql(datasette, actor, action): return None restrictions = actor.get("_r") if isinstance(actor, dict) else None - if not restrictions: + if restrictions is None: return [] # Check if this action appears in restrictions (with abbreviations) @@ -28,91 +28,63 @@ async def actor_restrictions_sql(datasette, actor, action): if action_obj and action_obj.abbr: action_checks.add(action_obj.abbr) - # Check if this action is in the allowlist anywhere in restrictions - is_in_allowlist = False + # Check if globally allowed in restrictions global_actions = restrictions.get("a", []) - if action_checks.intersection(global_actions): - is_in_allowlist = True + is_globally_allowed = action_checks.intersection(global_actions) - if not is_in_allowlist: - for db_actions in restrictions.get("d", {}).values(): - if action_checks.intersection(db_actions): - is_in_allowlist = True - break + if is_globally_allowed: + # Globally allowed - no restriction filtering needed + return [] - if not is_in_allowlist: - for tables in restrictions.get("r", {}).values(): - for table_actions in tables.values(): - if action_checks.intersection(table_actions): - is_in_allowlist = True - break - if is_in_allowlist: - break - - # If action not in allowlist at all, add global deny and return - if not is_in_allowlist: - sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, :actor_deny_reason AS reason" - return [ - PermissionSQL( - sql=sql, - params={ - "actor_deny_reason": f"actor restrictions: {action} not in allowlist" - }, - ) - ] - - # Action IS in allowlist - build deny + specific allows - selects = [] - params = {} + # Not globally allowed - build restriction_sql that lists allowlisted resources + restriction_selects = [] + restriction_params = {} param_counter = 0 - def add_row(parent, child, allow, reason): - """Helper to add a parameterized SELECT statement.""" - nonlocal param_counter - prefix = f"restr_{param_counter}" - param_counter += 1 - - selects.append( - f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child, " - f":{prefix}_allow AS allow, :{prefix}_reason AS reason" - ) - params[f"{prefix}_parent"] = parent - params[f"{prefix}_child"] = child - params[f"{prefix}_allow"] = 1 if allow else 0 - params[f"{prefix}_reason"] = reason - - # If NOT globally allowed, add global deny as gatekeeper - is_globally_allowed = action_checks.intersection(global_actions) - if not is_globally_allowed: - add_row(None, None, 0, f"actor restrictions: {action} denied by default") - else: - # Globally allowed - add global allow - add_row(None, None, 1, f"actor restrictions: global {action}") - - # Add database-level allows + # Add database-level allowlisted resources db_restrictions = restrictions.get("d", {}) for db_name, db_actions in db_restrictions.items(): if action_checks.intersection(db_actions): - add_row(db_name, None, 1, f"actor restrictions: database {db_name}") + prefix = f"restr_{param_counter}" + param_counter += 1 + restriction_selects.append( + f"SELECT :{prefix}_parent AS parent, NULL AS child" + ) + restriction_params[f"{prefix}_parent"] = db_name - # Add resource/table-level allows + # Add table-level allowlisted resources resource_restrictions = restrictions.get("r", {}) for db_name, tables in resource_restrictions.items(): for table_name, table_actions in tables.items(): if action_checks.intersection(table_actions): - add_row( - db_name, - table_name, - 1, - f"actor restrictions: {db_name}/{table_name}", + prefix = f"restr_{param_counter}" + param_counter += 1 + restriction_selects.append( + f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child" ) + restriction_params[f"{prefix}_parent"] = db_name + restriction_params[f"{prefix}_child"] = table_name - if not selects: - return [] + if not restriction_selects: + # Action not in allowlist - return empty restriction (INTERSECT will return no results) + return [ + PermissionSQL( + params={"deny": f"actor restrictions: {action} not in allowlist"}, + restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", # Empty set + ) + ] - sql = "\nUNION ALL\n".join(selects) + # Build restriction SQL that returns allowed (parent, child) pairs + restriction_sql = "\nUNION ALL\n".join(restriction_selects) - return [PermissionSQL(sql=sql, params=params)] + # Return restriction-only PermissionSQL (sql=None means no permission rules) + # The restriction_sql does the actual filtering via INTERSECT + return [ + PermissionSQL( + params=restriction_params, + restriction_sql=restriction_sql, + ) + ] @hookimpl(specname="permission_resources_sql") @@ -375,13 +347,11 @@ async def default_allow_sql_check(datasette, actor, action): @hookimpl(specname="permission_resources_sql") async def default_action_permissions_sql(datasette, actor, action): - """Apply default allow rules for standard view/execute actions.""" - # Only apply defaults if actor has no restrictions - # If actor has restrictions, they've already added their own deny/allow rules - has_restrictions = actor and "_r" in actor - if has_restrictions: - return None + """Apply default allow rules for standard view/execute actions. + With the INTERSECT-based restriction approach, these defaults are always generated + and then filtered by restriction_sql if the actor has restrictions. + """ default_allow_actions = { "view-instance", "view-database", diff --git a/datasette/permissions.py b/datasette/permissions.py index 7b1fc90c..c91385a0 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -138,13 +138,20 @@ class PermissionSQL: child TEXT NULL, allow INTEGER, -- 1 allow, 0 deny reason TEXT + + For restriction-only plugins, sql can be None and only restriction_sql is provided. """ - sql: str # SQL that SELECTs the 4 columns above + sql: str | None = ( + None # SQL that SELECTs the 4 columns above (can be None for restriction-only) + ) params: dict[str, Any] | None = ( None # bound params for the SQL (values only; no ':' prefix) ) source: str | None = None # System will set this to the plugin name + restriction_sql: str | None = ( + None # Optional SQL that returns (parent, child) for restriction filtering + ) @classmethod def allow(cls, reason: str, _allow: bool = True) -> "PermissionSQL": diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index 13594a2d..22d42dbc 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -157,8 +157,19 @@ async def _build_single_action_sql( all_params = {} rule_sqls = [] + restriction_sqls = [] for permission_sql in permission_sqls: + # Always collect params (even from restriction-only plugins) + all_params.update(permission_sql.params or {}) + + # Collect restriction SQL filters + if permission_sql.restriction_sql: + restriction_sqls.append(permission_sql.restriction_sql) + + # Skip plugins that only provide restriction_sql (no permission rules) + if permission_sql.sql is None: + continue rule_sqls.append( f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( @@ -166,7 +177,6 @@ async def _build_single_action_sql( ) """.strip() ) - all_params.update(permission_sql.params or {}) # If no rules, return empty result (deny all) if not rule_sqls: @@ -200,6 +210,9 @@ async def _build_single_action_sql( anon_params = {} for permission_sql in anon_permission_sqls: + # Skip plugins that only provide restriction_sql (no permission rules) + if permission_sql.sql is None: + continue rewritten_sql = permission_sql.sql for key, value in (permission_sql.params or {}).items(): anon_key = f"anon_{key}" @@ -360,6 +373,13 @@ async def _build_single_action_sql( query_parts.append(")") + # Add restriction list CTE if there are restrictions + if restriction_sqls: + restriction_intersect = "\nINTERSECT\n".join(restriction_sqls) + query_parts.extend( + [",", "restriction_list AS (", f" {restriction_intersect}", ")"] + ) + # Final SELECT select_cols = "parent, child, reason" if include_is_private: @@ -369,6 +389,17 @@ async def _build_single_action_sql( query_parts.append("FROM decisions") query_parts.append("WHERE is_allowed = 1") + # Add restriction filter if there are restrictions + if restriction_sqls: + query_parts.append( + """ + AND EXISTS ( + SELECT 1 FROM restriction_list r + WHERE (r.parent = decisions.parent OR r.parent IS NULL) + AND (r.child = decisions.child OR r.child IS NULL) + )""" + ) + # Add parent filter if specified if parent is not None: query_parts.append(" AND parent = :filter_parent") @@ -405,11 +436,24 @@ async def build_permission_rules_sql( return ( "SELECT NULL AS parent, NULL AS child, 0 AS allow, NULL AS reason, NULL AS source_plugin WHERE 0", {}, + [], ) union_parts = [] all_params = {} + restriction_sqls = [] + for permission_sql in permission_sqls: + all_params.update(permission_sql.params or {}) + + # Collect restriction SQL filters + if permission_sql.restriction_sql: + restriction_sqls.append(permission_sql.restriction_sql) + + # Skip plugins that only provide restriction_sql (no permission rules) + if permission_sql.sql is None: + continue + union_parts.append( f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( @@ -417,10 +461,9 @@ async def build_permission_rules_sql( ) """.strip() ) - all_params.update(permission_sql.params or {}) rules_union = " UNION ALL ".join(union_parts) - return rules_union, all_params + return rules_union, all_params, restriction_sqls async def check_permission_for_resource( @@ -447,7 +490,9 @@ async def check_permission_for_resource( This builds the cascading permission query and checks if the specific resource is in the allowed set. """ - rules_union, all_params = await build_permission_rules_sql(datasette, actor, action) + rules_union, all_params, restriction_sqls = await build_permission_rules_sql( + datasette, actor, action + ) # If no rules (empty SQL), default deny if not rules_union: @@ -457,43 +502,54 @@ async def check_permission_for_resource( all_params["_check_parent"] = parent all_params["_check_child"] = child + # If there are restriction filters, check if the resource passes them first + if restriction_sqls: + # Check if resource is in restriction allowlist + # Database-level restrictions (parent, NULL) should match all children (parent, *) + restriction_check = "\nINTERSECT\n".join(restriction_sqls) + restriction_query = f""" +WITH restriction_list AS ( + {restriction_check} +) +SELECT EXISTS ( + SELECT 1 FROM restriction_list + WHERE (parent = :_check_parent OR parent IS NULL) + AND (child = :_check_child OR child IS NULL) +) AS in_allowlist +""" + result = await datasette.get_internal_database().execute( + restriction_query, all_params + ) + if result.rows and not result.rows[0][0]: + # Resource not in restriction allowlist - deny + return False + query = f""" WITH all_rules AS ( {rules_union} ), -child_lvl AS ( - SELECT - MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny, - MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow +matched_rules AS ( + SELECT ar.*, + CASE + WHEN ar.child IS NOT NULL THEN 2 -- child-level (most specific) + WHEN ar.parent IS NOT NULL THEN 1 -- parent-level + ELSE 0 -- root/global + END AS depth FROM all_rules ar - WHERE ar.parent = :_check_parent AND ar.child = :_check_child + WHERE (ar.parent IS NULL OR ar.parent = :_check_parent) + AND (ar.child IS NULL OR ar.child = :_check_child) ), -parent_lvl AS ( - SELECT - MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny, - MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow - FROM all_rules ar - WHERE ar.parent = :_check_parent AND ar.child IS NULL -), -global_lvl AS ( - SELECT - MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny, - MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow - FROM all_rules ar - WHERE ar.parent IS NULL AND ar.child IS NULL +winner AS ( + SELECT * + FROM matched_rules + ORDER BY + depth DESC, -- specificity first (higher depth wins) + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow + source_plugin -- stable tie-break + LIMIT 1 ) -SELECT - CASE - WHEN cl.any_deny = 1 THEN 0 - WHEN cl.any_allow = 1 THEN 1 - WHEN pl.any_deny = 1 THEN 0 - WHEN pl.any_allow = 1 THEN 1 - WHEN gl.any_deny = 1 THEN 0 - WHEN gl.any_allow = 1 THEN 1 - ELSE 0 - END AS is_allowed -FROM child_lvl cl, parent_lvl pl, global_lvl gl +SELECT COALESCE((SELECT allow FROM winner), 0) AS is_allowed """ # Execute the query against the internal database diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py index 75307abf..f951e2b3 100644 --- a/datasette/utils/permissions.py +++ b/datasette/utils/permissions.py @@ -99,6 +99,10 @@ def build_rules_union( # No namespacing - just use plugin params as-is params.update(p.params or {}) + # Skip plugins that only provide restriction_sql (no permission rules) + if p.sql is None: + continue + parts.append( f""" SELECT parent, child, allow, reason, '{p.source}' AS source_plugin FROM ( @@ -155,6 +159,8 @@ async def resolve_permissions_from_catalog( - resource (rendered "/parent/child" or "/parent" or "/") """ resolved_plugins: List[PermissionSQL] = [] + restriction_sqls: List[str] = [] + for plugin in plugins: if callable(plugin) and not isinstance(plugin, PermissionSQL): resolved = plugin(action) # type: ignore[arg-type] @@ -164,6 +170,10 @@ async def resolve_permissions_from_catalog( raise TypeError("Plugin providers must return PermissionSQL instances") resolved_plugins.append(resolved) + # Collect restriction SQL filters + if resolved.restriction_sql: + restriction_sqls.append(resolved.restriction_sql) + union_sql, rule_params = build_rules_union(actor, resolved_plugins) all_params = { **(candidate_params or {}), @@ -199,8 +209,8 @@ async def resolve_permissions_from_catalog( PARTITION BY parent, child ORDER BY depth DESC, -- specificity first - CASE WHEN allow=0 THEN 0 ELSE 1 END, -- deny over allow at same depth - source_plugin -- stable tie-break + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth + source_plugin -- stable tie-break ) AS rn FROM matched ), @@ -228,6 +238,141 @@ async def resolve_permissions_from_catalog( ORDER BY c.parent, c.child """ + # If there are restriction filters, wrap the query with INTERSECT + # This ensures only resources in the restriction allowlist are returned + if restriction_sqls: + # Start with the main query, but select only parent/child for the INTERSECT + main_query_for_intersect = f""" + WITH + cands AS ( + {candidate_sql} + ), + rules AS ( + {union_sql} + ), + matched AS ( + SELECT + c.parent, c.child, + r.allow, r.reason, r.source_plugin, + CASE + WHEN r.child IS NOT NULL THEN 2 -- child-level (most specific) + WHEN r.parent IS NOT NULL THEN 1 -- parent-level + ELSE 0 -- root/global + END AS depth + FROM cands c + JOIN rules r + ON (r.parent IS NULL OR r.parent = c.parent) + AND (r.child IS NULL OR r.child = c.child) + ), + ranked AS ( + SELECT *, + ROW_NUMBER() OVER ( + PARTITION BY parent, child + ORDER BY + depth DESC, -- specificity first + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth + source_plugin -- stable tie-break + ) AS rn + FROM matched + ), + winner AS ( + SELECT parent, child, + allow, reason, source_plugin, depth + FROM ranked WHERE rn = 1 + ), + permitted_resources AS ( + SELECT c.parent, c.child + FROM cands c + LEFT JOIN winner w + ON ((w.parent = c.parent) OR (w.parent IS NULL AND c.parent IS NULL)) + AND ((w.child = c.child ) OR (w.child IS NULL AND c.child IS NULL)) + WHERE COALESCE(w.allow, CASE WHEN :implicit_deny THEN 0 ELSE NULL END) = 1 + ) + SELECT parent, child FROM permitted_resources + """ + + # Build restriction list with INTERSECT (all must match) + # Then filter to resources that match hierarchically + restriction_intersect = "\nINTERSECT\n".join(restriction_sqls) + + # Combine: resources allowed by permissions AND in restriction allowlist + # Database-level restrictions (parent, NULL) should match all children (parent, *) + filtered_resources = f""" + WITH restriction_list AS ( + {restriction_intersect} + ), + permitted AS ( + {main_query_for_intersect} + ), + filtered AS ( + SELECT p.parent, p.child + FROM permitted p + WHERE EXISTS ( + SELECT 1 FROM restriction_list r + WHERE (r.parent = p.parent OR r.parent IS NULL) + AND (r.child = p.child OR r.child IS NULL) + ) + ) + """ + + # Now join back to get full results for only the filtered resources + sql = f""" + {filtered_resources} + , cands AS ( + {candidate_sql} + ), + rules AS ( + {union_sql} + ), + matched AS ( + SELECT + c.parent, c.child, + r.allow, r.reason, r.source_plugin, + CASE + WHEN r.child IS NOT NULL THEN 2 -- child-level (most specific) + WHEN r.parent IS NOT NULL THEN 1 -- parent-level + ELSE 0 -- root/global + END AS depth + FROM cands c + JOIN rules r + ON (r.parent IS NULL OR r.parent = c.parent) + AND (r.child IS NULL OR r.child = c.child) + ), + ranked AS ( + SELECT *, + ROW_NUMBER() OVER ( + PARTITION BY parent, child + ORDER BY + depth DESC, -- specificity first + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth + source_plugin -- stable tie-break + ) AS rn + FROM matched + ), + winner AS ( + SELECT parent, child, + allow, reason, source_plugin, depth + FROM ranked WHERE rn = 1 + ) + SELECT + c.parent, c.child, + COALESCE(w.allow, CASE WHEN :implicit_deny THEN 0 ELSE NULL END) AS allow, + COALESCE(w.reason, CASE WHEN :implicit_deny THEN 'implicit deny' ELSE NULL END) AS reason, + w.source_plugin, + COALESCE(w.depth, -1) AS depth, + :action AS action, + CASE + WHEN c.parent IS NULL THEN '/' + WHEN c.child IS NULL THEN '/' || c.parent + ELSE '/' || c.parent || '/' || c.child + END AS resource + FROM filtered c + LEFT JOIN winner w + ON ((w.parent = c.parent) OR (w.parent IS NULL AND c.parent IS NULL)) + AND ((w.child = c.child ) OR (w.child IS NULL AND c.child IS NULL)) + ORDER BY c.parent, c.child + """ + rows_iter: Iterable[sqlite3.Row] = await db.execute( sql, {**all_params, "implicit_deny": 1 if implicit_deny else 0}, diff --git a/datasette/views/special.py b/datasette/views/special.py index 5a341911..a1d736c5 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -403,7 +403,7 @@ class PermissionRulesView(BaseView): from datasette.utils.actions_sql import build_permission_rules_sql - union_sql, union_params = await build_permission_rules_sql( + union_sql, union_params, restriction_sqls = await build_permission_rules_sql( self.ds, actor, action ) await self.ds.refresh_schemas() diff --git a/tests/test_actor_restriction_bug.py b/tests/test_actor_restriction_bug.py new file mode 100644 index 00000000..0bfc9e1e --- /dev/null +++ b/tests/test_actor_restriction_bug.py @@ -0,0 +1,133 @@ +""" +Test for actor restrictions bug with database-level config. + +This test currently FAILS, demonstrating the bug where database-level +config allow blocks can bypass table-level restrictions. +""" + +import pytest +from datasette.app import Datasette +from datasette.resources import TableResource + + +@pytest.mark.asyncio +async def test_table_restrictions_not_bypassed_by_database_level_config(): + """ + Actor restrictions should act as hard limits that config cannot override. + + BUG: When an actor has table-level restrictions (e.g., only table2 and table3) + but config has a database-level allow block, the database-level config rule + currently allows ALL tables, not just those in the restriction allowlist. + + This test documents the expected behavior and will FAIL until the bug is fixed. + """ + # Config grants access at DATABASE level (not table level) + config = { + "databases": { + "test_db_rnbbdlc": { + "allow": { + "id": "user" + } # Database-level allow - grants access to all tables + } + } + } + + ds = Datasette(config=config) + await ds.invoke_startup() + db = ds.add_memory_database("test_db_rnbbdlc") + await db.execute_write("create table table1 (id integer primary key)") + await db.execute_write("create table table2 (id integer primary key)") + await db.execute_write("create table table3 (id integer primary key)") + await db.execute_write("create table table4 (id integer primary key)") + + # Actor restricted to ONLY table2 and table3 + # Even though config allows the whole database, restrictions should limit access + actor = { + "id": "user", + "_r": { + "r": { # Resource-level (table-level) restrictions + "test_db_rnbbdlc": { + "table2": ["vt"], # vt = view-table abbreviation + "table3": ["vt"], + } + } + }, + } + + # table2 should be allowed (in restriction allowlist AND config allows) + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table2"), + actor=actor, + ) + assert result is True, "table2 should be allowed - in restriction allowlist" + + # table3 should be allowed (in restriction allowlist AND config allows) + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table3"), + actor=actor, + ) + assert result is True, "table3 should be allowed - in restriction allowlist" + + # table1 should be DENIED (NOT in restriction allowlist) + # Even though database-level config allows it, restrictions should deny it + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table1"), + actor=actor, + ) + assert ( + result is False + ), "table1 should be DENIED - not in restriction allowlist, config cannot override" + + # table4 should be DENIED (NOT in restriction allowlist) + # Even though database-level config allows it, restrictions should deny it + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table4"), + actor=actor, + ) + assert ( + result is False + ), "table4 should be DENIED - not in restriction allowlist, config cannot override" + + +@pytest.mark.asyncio +async def test_database_restrictions_with_database_level_config(): + """ + Verify that database-level restrictions work correctly with database-level config. + + This should pass - it's testing the case where restriction granularity + matches config granularity. + """ + config = { + "databases": {"test_db_rwdl": {"allow": {"id": "user"}}} # Database-level allow + } + + ds = Datasette(config=config) + await ds.invoke_startup() + db = ds.add_memory_database("test_db_rwdl") + await db.execute_write("create table table1 (id integer primary key)") + await db.execute_write("create table table2 (id integer primary key)") + + # Actor has database-level restriction (all tables in test_db_rwdl) + actor = { + "id": "user", + "_r": {"d": {"test_db_rwdl": ["vt"]}}, # Database-level restrictions + } + + # Both tables should be allowed (database-level restriction matches database-level config) + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rwdl", "table1"), + actor=actor, + ) + assert result is True, "table1 should be allowed" + + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rwdl", "table2"), + actor=actor, + ) + assert result is True, "table2 should be allowed" diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 5a530b25..971b7e82 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1630,6 +1630,16 @@ async def test_hook_register_actions_with_custom_resources(): reason="user2 granted view-document-collection" ) + # Default allow for view-document-collection (like other view-* actions) + if action == "view-document-collection": + return PermissionSQL.allow( + reason="default allow for view-document-collection" + ) + + # Default allow for view-document (like other view-* actions) + if action == "view-document": + return PermissionSQL.allow(reason="default allow for view-document") + # Register the plugin temporarily plugin = TestPlugin() pm.register(plugin, name="test_custom_resources_plugin") From 161f2937cb14d6737ba2f65fd57a63d278662cd9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 11:51:53 -0800 Subject: [PATCH 02/42] facet_suggest_time_limit_ms 200ms in tests, closes #2574 --- tests/conftest.py | 1 + tests/test_api.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 4a8ef51d..ad7243c1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -62,6 +62,7 @@ async def ds_client(): "default_page_size": 50, "max_returned_rows": 100, "sql_time_limit_ms": 200, + "facet_suggest_time_limit_ms": 200, # Up from 50 default # Default is 3 but this results in "too many open files" # errors when running the full test suite: "num_sql_threads": 1, diff --git a/tests/test_api.py b/tests/test_api.py index 2ac647c7..859c5809 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -875,7 +875,7 @@ async def test_settings_json(ds_client): "default_page_size": 50, "default_facet_size": 30, "default_allow_sql": True, - "facet_suggest_time_limit_ms": 50, + "facet_suggest_time_limit_ms": 200, "facet_time_limit_ms": 200, "max_returned_rows": 100, "max_insert_rows": 100, From c76c3e6e6fcf541a10547b8647dbe288db269323 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 11:51:53 -0800 Subject: [PATCH 03/42] facet_suggest_time_limit_ms 200ms in tests, closes #2574 --- tests/conftest.py | 1 + tests/test_api.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 4a8ef51d..ad7243c1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -62,6 +62,7 @@ async def ds_client(): "default_page_size": 50, "max_returned_rows": 100, "sql_time_limit_ms": 200, + "facet_suggest_time_limit_ms": 200, # Up from 50 default # Default is 3 but this results in "too many open files" # errors when running the full test suite: "num_sql_threads": 1, diff --git a/tests/test_api.py b/tests/test_api.py index 2ac647c7..859c5809 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -875,7 +875,7 @@ async def test_settings_json(ds_client): "default_page_size": 50, "default_facet_size": 30, "default_allow_sql": True, - "facet_suggest_time_limit_ms": 50, + "facet_suggest_time_limit_ms": 200, "facet_time_limit_ms": 200, "max_returned_rows": 100, "max_insert_rows": 100, From c0a87b809f562ed1c64c666ab1938f17dc3dc2b9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:07:03 -0800 Subject: [PATCH 04/42] Additional tests for restrict SQL Refs https://github.com/simonw/datasette/issues/2572#issuecomment-3482778412 --- datasette/utils/actions_sql.py | 11 +- datasette/utils/permissions.py | 6 +- tests/test_restriction_sql.py | 315 +++++++++++++++++++++++++++++++++ 3 files changed, 329 insertions(+), 3 deletions(-) create mode 100644 tests/test_restriction_sql.py diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index 22d42dbc..7121e2d0 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -375,7 +375,11 @@ async def _build_single_action_sql( # Add restriction list CTE if there are restrictions if restriction_sqls: - restriction_intersect = "\nINTERSECT\n".join(restriction_sqls) + # Wrap each restriction_sql in a subquery to avoid operator precedence issues + # with UNION ALL inside the restriction SQL statements + restriction_intersect = "\nINTERSECT\n".join( + f"SELECT * FROM ({sql})" for sql in restriction_sqls + ) query_parts.extend( [",", "restriction_list AS (", f" {restriction_intersect}", ")"] ) @@ -506,7 +510,10 @@ async def check_permission_for_resource( if restriction_sqls: # Check if resource is in restriction allowlist # Database-level restrictions (parent, NULL) should match all children (parent, *) - restriction_check = "\nINTERSECT\n".join(restriction_sqls) + # Wrap each restriction_sql in a subquery to avoid operator precedence issues + restriction_check = "\nINTERSECT\n".join( + f"SELECT * FROM ({sql})" for sql in restriction_sqls + ) restriction_query = f""" WITH restriction_list AS ( {restriction_check} diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py index f951e2b3..58be53a3 100644 --- a/datasette/utils/permissions.py +++ b/datasette/utils/permissions.py @@ -293,7 +293,11 @@ async def resolve_permissions_from_catalog( # Build restriction list with INTERSECT (all must match) # Then filter to resources that match hierarchically - restriction_intersect = "\nINTERSECT\n".join(restriction_sqls) + # Wrap each restriction_sql in a subquery to avoid operator precedence issues + # with UNION ALL inside the restriction SQL statements + restriction_intersect = "\nINTERSECT\n".join( + f"SELECT * FROM ({sql})" for sql in restriction_sqls + ) # Combine: resources allowed by permissions AND in restriction allowlist # Database-level restrictions (parent, NULL) should match all children (parent, *) diff --git a/tests/test_restriction_sql.py b/tests/test_restriction_sql.py new file mode 100644 index 00000000..7d6d8a5a --- /dev/null +++ b/tests/test_restriction_sql.py @@ -0,0 +1,315 @@ +import pytest +from datasette.app import Datasette +from datasette.permissions import PermissionSQL +from datasette.resources import TableResource + + +@pytest.mark.asyncio +async def test_multiple_restriction_sources_intersect(): + """ + Test that when multiple plugins return restriction_sql, they are INTERSECTed. + + This tests the case where both actor _r restrictions AND a plugin + provide restriction_sql - both must pass for access to be granted. + """ + from datasette import hookimpl + from datasette.plugins import pm + + class RestrictivePlugin: + __name__ = "RestrictivePlugin" + + @hookimpl + def permission_resources_sql(self, datasette, actor, action): + # Plugin adds additional restriction: only db1_multi_intersect allowed + if action == "view-table": + return PermissionSQL( + restriction_sql="SELECT 'db1_multi_intersect' AS parent, NULL AS child", + params={}, + ) + return None + + plugin = RestrictivePlugin() + pm.register(plugin, name="restrictive_plugin") + + try: + ds = Datasette() + await ds.invoke_startup() + db1 = ds.add_memory_database("db1_multi_intersect") + db2 = ds.add_memory_database("db2_multi_intersect") + await db1.execute_write("CREATE TABLE t1 (id INTEGER)") + await db2.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() # Populate catalog tables + + # Actor has restrictions allowing both databases + # But plugin only allows db1_multi_intersect + # INTERSECT means only db1_multi_intersect/t1 should pass + actor = { + "id": "user", + "_r": {"d": {"db1_multi_intersect": ["vt"], "db2_multi_intersect": ["vt"]}}, + } + + page = await ds.allowed_resources("view-table", actor) + resources = {(r.parent, r.child) for r in page.resources} + + # Should only see db1_multi_intersect/t1 (intersection of actor restrictions and plugin restrictions) + assert ("db1_multi_intersect", "t1") in resources + assert ("db2_multi_intersect", "t1") not in resources + finally: + pm.unregister(name="restrictive_plugin") + + +@pytest.mark.asyncio +async def test_restriction_sql_with_overlapping_databases_and_tables(): + """ + Test actor with both database-level and table-level restrictions for same database. + + When actor has: + - Database-level: db1_overlapping allowed (all tables) + - Table-level: db1_overlapping/t1 allowed + + Both entries are UNION'd (OR'ed) within the actor's restrictions. + Database-level restriction allows ALL tables, so table-level is redundant. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_overlapping") + await db.execute_write("CREATE TABLE t1 (id INTEGER)") + await db.execute_write("CREATE TABLE t2 (id INTEGER)") + await ds._refresh_schemas() + + # Actor has BOTH database-level (db1_overlapping all tables) AND table-level (db1_overlapping/t1 only) + actor = { + "id": "user", + "_r": { + "d": { + "db1_overlapping": ["vt"] + }, # Database-level: all tables in db1_overlapping + "r": { + "db1_overlapping": {"t1": ["vt"]} + }, # Table-level: only t1 in db1_overlapping + }, + } + + # Within actor restrictions, entries are UNION'd (OR'ed): + # - Database level allows: (db1_overlapping, NULL) → matches all tables via hierarchical matching + # - Table level allows: (db1_overlapping, t1) → redundant, already covered by database level + # Result: Both tables are allowed + page = await ds.allowed_resources("view-table", actor) + resources = {(r.parent, r.child) for r in page.resources} + + assert ("db1_overlapping", "t1") in resources + # Database-level restriction allows all tables, so t2 is also allowed + assert ("db1_overlapping", "t2") in resources + + +@pytest.mark.asyncio +async def test_restriction_sql_empty_allowlist_query(): + """ + Test the specific SQL query generated when action is not in allowlist. + + actor_restrictions_sql() returns "SELECT NULL AS parent, NULL AS child WHERE 0" + Verify this produces an empty result set. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_empty_allowlist") + await db.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() + + # Actor has restrictions but action not in allowlist + actor = {"id": "user", "_r": {"r": {"db1_empty_allowlist": {"t1": ["vt"]}}}} + + # Try to view-database (only view-table is in allowlist) + page = await ds.allowed_resources("view-database", actor) + + # Should be empty + assert len(page.resources) == 0 + + +@pytest.mark.asyncio +async def test_restriction_sql_with_pagination(): + """ + Test that restrictions work correctly with keyset pagination. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_pagination") + + # Create many tables + for i in range(10): + await db.execute_write(f"CREATE TABLE t{i:02d} (id INTEGER)") + await ds._refresh_schemas() + + # Actor restricted to only odd-numbered tables + restrictions = {"r": {"db1_pagination": {}}} + for i in range(10): + if i % 2 == 1: # Only odd tables + restrictions["r"]["db1_pagination"][f"t{i:02d}"] = ["vt"] + + actor = {"id": "user", "_r": restrictions} + + # Get first page with small limit + page1 = await ds.allowed_resources( + "view-table", actor, parent="db1_pagination", limit=2 + ) + assert len(page1.resources) == 2 + assert page1.next is not None + + # Get second page using next token + page2 = await ds.allowed_resources( + "view-table", actor, parent="db1_pagination", limit=2, next=page1.next + ) + assert len(page2.resources) == 2 + + # Should have no overlap + page1_ids = {r.child for r in page1.resources} + page2_ids = {r.child for r in page2.resources} + assert page1_ids.isdisjoint(page2_ids) + + # All should be odd-numbered tables + all_ids = page1_ids | page2_ids + for table_id in all_ids: + table_num = int(table_id[1:]) # Extract number from "t01", "t03", etc. + assert table_num % 2 == 1, f"Table {table_id} should be odd-numbered" + + +@pytest.mark.asyncio +async def test_also_requires_with_restrictions(): + """ + Test that also_requires actions properly respect restrictions. + + execute-sql requires view-database. With restrictions, both must pass. + """ + ds = Datasette() + await ds.invoke_startup() + db1 = ds.add_memory_database("db1_also_requires") + db2 = ds.add_memory_database("db2_also_requires") + await ds._refresh_schemas() + + # Actor restricted to only db1_also_requires for view-database + # execute-sql requires view-database, so should only work on db1_also_requires + actor = { + "id": "user", + "_r": { + "d": { + "db1_also_requires": ["vd", "es"], + "db2_also_requires": [ + "es" + ], # They have execute-sql but not view-database + } + }, + } + + # db1_also_requires should allow execute-sql + result = await ds.allowed( + action="execute-sql", + resource=TableResource("db1_also_requires", None), + actor=actor, + ) + assert result is True + + # db2_also_requires should not (they have execute-sql but not view-database) + result = await ds.allowed( + action="execute-sql", + resource=TableResource("db2_also_requires", None), + actor=actor, + ) + assert result is False + + +@pytest.mark.asyncio +async def test_restriction_abbreviations_and_full_names(): + """ + Test that both abbreviations and full action names work in restrictions. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_abbrev") + await db.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() + + # Test with abbreviation + actor_abbr = {"id": "user", "_r": {"r": {"db1_abbrev": {"t1": ["vt"]}}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("db1_abbrev", "t1"), + actor=actor_abbr, + ) + assert result is True + + # Test with full name + actor_full = {"id": "user", "_r": {"r": {"db1_abbrev": {"t1": ["view-table"]}}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("db1_abbrev", "t1"), + actor=actor_full, + ) + assert result is True + + # Test with mixed + actor_mixed = {"id": "user", "_r": {"d": {"db1_abbrev": ["view-database", "vt"]}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("db1_abbrev", "t1"), + actor=actor_mixed, + ) + assert result is True + + +@pytest.mark.asyncio +async def test_permission_resources_sql_multiple_restriction_sources_intersect(): + """ + Test that when multiple plugins return restriction_sql, they are INTERSECTed. + + This tests the case where both actor _r restrictions AND a plugin + provide restriction_sql - both must pass for access to be granted. + """ + from datasette import hookimpl + from datasette.plugins import pm + + class RestrictivePlugin: + __name__ = "RestrictivePlugin" + + @hookimpl + def permission_resources_sql(self, datasette, actor, action): + # Plugin adds additional restriction: only db1_multi_restrictions allowed + if action == "view-table": + return PermissionSQL( + restriction_sql="SELECT 'db1_multi_restrictions' AS parent, NULL AS child", + params={}, + ) + return None + + plugin = RestrictivePlugin() + pm.register(plugin, name="restrictive_plugin") + + try: + ds = Datasette() + await ds.invoke_startup() + db1 = ds.add_memory_database("db1_multi_restrictions") + db2 = ds.add_memory_database("db2_multi_restrictions") + await db1.execute_write("CREATE TABLE t1 (id INTEGER)") + await db2.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() # Populate catalog tables + + # Actor has restrictions allowing both databases + # But plugin only allows db1 + # INTERSECT means only db1/t1 should pass + actor = { + "id": "user", + "_r": { + "d": { + "db1_multi_restrictions": ["vt"], + "db2_multi_restrictions": ["vt"], + } + }, + } + + page = await ds.allowed_resources("view-table", actor) + resources = {(r.parent, r.child) for r in page.resources} + + # Should only see db1/t1 (intersection of actor restrictions and plugin restrictions) + assert ("db1_multi_restrictions", "t1") in resources + assert ("db2_multi_restrictions", "t1") not in resources + finally: + pm.unregister(name="restrictive_plugin") From 18fd373a8f95a30aa41bc059d18f21d9703bedd7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:17:51 -0800 Subject: [PATCH 05/42] New PermissionSQL.restriction_sql mechanism for actor restrictions Implement INTERSECT-based actor restrictions to prevent permission bypass Actor restrictions are now implemented as SQL filters using INTERSECT rather than as deny/allow permission rules. This ensures restrictions act as hard limits that cannot be overridden by other permission plugins or config blocks. Previously, actor restrictions (_r in actor dict) were implemented by generating permission rules with deny/allow logic. This approach had a critical flaw: database-level config allow blocks could bypass table-level restrictions, granting access to tables not in the actor's allowlist. The new approach separates concerns: - Permission rules determine what's allowed based on config and plugins - Restriction filters limit the result set to only allowlisted resources - Restrictions use INTERSECT to ensure all restriction criteria are met - Database-level restrictions (parent, NULL) properly match all child tables Implementation details: - Added restriction_sql field to PermissionSQL dataclass - Made PermissionSQL.sql optional to support restriction-only plugins - Updated actor_restrictions_sql() to return restriction filters instead of rules - Modified SQL builders to apply restrictions via INTERSECT and EXISTS clauses Closes #2572 --- datasette/default_permissions.py | 120 ++++------- datasette/permissions.py | 9 +- datasette/utils/actions_sql.py | 129 +++++++++--- datasette/utils/permissions.py | 153 +++++++++++++- datasette/views/special.py | 2 +- tests/test_actor_restriction_bug.py | 133 ++++++++++++ tests/test_plugins.py | 10 + tests/test_restriction_sql.py | 315 ++++++++++++++++++++++++++++ 8 files changed, 759 insertions(+), 112 deletions(-) create mode 100644 tests/test_actor_restriction_bug.py create mode 100644 tests/test_restriction_sql.py diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 9afb088e..5642cdfe 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -19,7 +19,7 @@ async def actor_restrictions_sql(datasette, actor, action): return None restrictions = actor.get("_r") if isinstance(actor, dict) else None - if not restrictions: + if restrictions is None: return [] # Check if this action appears in restrictions (with abbreviations) @@ -28,91 +28,63 @@ async def actor_restrictions_sql(datasette, actor, action): if action_obj and action_obj.abbr: action_checks.add(action_obj.abbr) - # Check if this action is in the allowlist anywhere in restrictions - is_in_allowlist = False + # Check if globally allowed in restrictions global_actions = restrictions.get("a", []) - if action_checks.intersection(global_actions): - is_in_allowlist = True + is_globally_allowed = action_checks.intersection(global_actions) - if not is_in_allowlist: - for db_actions in restrictions.get("d", {}).values(): - if action_checks.intersection(db_actions): - is_in_allowlist = True - break + if is_globally_allowed: + # Globally allowed - no restriction filtering needed + return [] - if not is_in_allowlist: - for tables in restrictions.get("r", {}).values(): - for table_actions in tables.values(): - if action_checks.intersection(table_actions): - is_in_allowlist = True - break - if is_in_allowlist: - break - - # If action not in allowlist at all, add global deny and return - if not is_in_allowlist: - sql = "SELECT NULL AS parent, NULL AS child, 0 AS allow, :actor_deny_reason AS reason" - return [ - PermissionSQL( - sql=sql, - params={ - "actor_deny_reason": f"actor restrictions: {action} not in allowlist" - }, - ) - ] - - # Action IS in allowlist - build deny + specific allows - selects = [] - params = {} + # Not globally allowed - build restriction_sql that lists allowlisted resources + restriction_selects = [] + restriction_params = {} param_counter = 0 - def add_row(parent, child, allow, reason): - """Helper to add a parameterized SELECT statement.""" - nonlocal param_counter - prefix = f"restr_{param_counter}" - param_counter += 1 - - selects.append( - f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child, " - f":{prefix}_allow AS allow, :{prefix}_reason AS reason" - ) - params[f"{prefix}_parent"] = parent - params[f"{prefix}_child"] = child - params[f"{prefix}_allow"] = 1 if allow else 0 - params[f"{prefix}_reason"] = reason - - # If NOT globally allowed, add global deny as gatekeeper - is_globally_allowed = action_checks.intersection(global_actions) - if not is_globally_allowed: - add_row(None, None, 0, f"actor restrictions: {action} denied by default") - else: - # Globally allowed - add global allow - add_row(None, None, 1, f"actor restrictions: global {action}") - - # Add database-level allows + # Add database-level allowlisted resources db_restrictions = restrictions.get("d", {}) for db_name, db_actions in db_restrictions.items(): if action_checks.intersection(db_actions): - add_row(db_name, None, 1, f"actor restrictions: database {db_name}") + prefix = f"restr_{param_counter}" + param_counter += 1 + restriction_selects.append( + f"SELECT :{prefix}_parent AS parent, NULL AS child" + ) + restriction_params[f"{prefix}_parent"] = db_name - # Add resource/table-level allows + # Add table-level allowlisted resources resource_restrictions = restrictions.get("r", {}) for db_name, tables in resource_restrictions.items(): for table_name, table_actions in tables.items(): if action_checks.intersection(table_actions): - add_row( - db_name, - table_name, - 1, - f"actor restrictions: {db_name}/{table_name}", + prefix = f"restr_{param_counter}" + param_counter += 1 + restriction_selects.append( + f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child" ) + restriction_params[f"{prefix}_parent"] = db_name + restriction_params[f"{prefix}_child"] = table_name - if not selects: - return [] + if not restriction_selects: + # Action not in allowlist - return empty restriction (INTERSECT will return no results) + return [ + PermissionSQL( + params={"deny": f"actor restrictions: {action} not in allowlist"}, + restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", # Empty set + ) + ] - sql = "\nUNION ALL\n".join(selects) + # Build restriction SQL that returns allowed (parent, child) pairs + restriction_sql = "\nUNION ALL\n".join(restriction_selects) - return [PermissionSQL(sql=sql, params=params)] + # Return restriction-only PermissionSQL (sql=None means no permission rules) + # The restriction_sql does the actual filtering via INTERSECT + return [ + PermissionSQL( + params=restriction_params, + restriction_sql=restriction_sql, + ) + ] @hookimpl(specname="permission_resources_sql") @@ -375,13 +347,11 @@ async def default_allow_sql_check(datasette, actor, action): @hookimpl(specname="permission_resources_sql") async def default_action_permissions_sql(datasette, actor, action): - """Apply default allow rules for standard view/execute actions.""" - # Only apply defaults if actor has no restrictions - # If actor has restrictions, they've already added their own deny/allow rules - has_restrictions = actor and "_r" in actor - if has_restrictions: - return None + """Apply default allow rules for standard view/execute actions. + With the INTERSECT-based restriction approach, these defaults are always generated + and then filtered by restriction_sql if the actor has restrictions. + """ default_allow_actions = { "view-instance", "view-database", diff --git a/datasette/permissions.py b/datasette/permissions.py index 7b1fc90c..c91385a0 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -138,13 +138,20 @@ class PermissionSQL: child TEXT NULL, allow INTEGER, -- 1 allow, 0 deny reason TEXT + + For restriction-only plugins, sql can be None and only restriction_sql is provided. """ - sql: str # SQL that SELECTs the 4 columns above + sql: str | None = ( + None # SQL that SELECTs the 4 columns above (can be None for restriction-only) + ) params: dict[str, Any] | None = ( None # bound params for the SQL (values only; no ':' prefix) ) source: str | None = None # System will set this to the plugin name + restriction_sql: str | None = ( + None # Optional SQL that returns (parent, child) for restriction filtering + ) @classmethod def allow(cls, reason: str, _allow: bool = True) -> "PermissionSQL": diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index 13594a2d..7121e2d0 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -157,8 +157,19 @@ async def _build_single_action_sql( all_params = {} rule_sqls = [] + restriction_sqls = [] for permission_sql in permission_sqls: + # Always collect params (even from restriction-only plugins) + all_params.update(permission_sql.params or {}) + + # Collect restriction SQL filters + if permission_sql.restriction_sql: + restriction_sqls.append(permission_sql.restriction_sql) + + # Skip plugins that only provide restriction_sql (no permission rules) + if permission_sql.sql is None: + continue rule_sqls.append( f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( @@ -166,7 +177,6 @@ async def _build_single_action_sql( ) """.strip() ) - all_params.update(permission_sql.params or {}) # If no rules, return empty result (deny all) if not rule_sqls: @@ -200,6 +210,9 @@ async def _build_single_action_sql( anon_params = {} for permission_sql in anon_permission_sqls: + # Skip plugins that only provide restriction_sql (no permission rules) + if permission_sql.sql is None: + continue rewritten_sql = permission_sql.sql for key, value in (permission_sql.params or {}).items(): anon_key = f"anon_{key}" @@ -360,6 +373,17 @@ async def _build_single_action_sql( query_parts.append(")") + # Add restriction list CTE if there are restrictions + if restriction_sqls: + # Wrap each restriction_sql in a subquery to avoid operator precedence issues + # with UNION ALL inside the restriction SQL statements + restriction_intersect = "\nINTERSECT\n".join( + f"SELECT * FROM ({sql})" for sql in restriction_sqls + ) + query_parts.extend( + [",", "restriction_list AS (", f" {restriction_intersect}", ")"] + ) + # Final SELECT select_cols = "parent, child, reason" if include_is_private: @@ -369,6 +393,17 @@ async def _build_single_action_sql( query_parts.append("FROM decisions") query_parts.append("WHERE is_allowed = 1") + # Add restriction filter if there are restrictions + if restriction_sqls: + query_parts.append( + """ + AND EXISTS ( + SELECT 1 FROM restriction_list r + WHERE (r.parent = decisions.parent OR r.parent IS NULL) + AND (r.child = decisions.child OR r.child IS NULL) + )""" + ) + # Add parent filter if specified if parent is not None: query_parts.append(" AND parent = :filter_parent") @@ -405,11 +440,24 @@ async def build_permission_rules_sql( return ( "SELECT NULL AS parent, NULL AS child, 0 AS allow, NULL AS reason, NULL AS source_plugin WHERE 0", {}, + [], ) union_parts = [] all_params = {} + restriction_sqls = [] + for permission_sql in permission_sqls: + all_params.update(permission_sql.params or {}) + + # Collect restriction SQL filters + if permission_sql.restriction_sql: + restriction_sqls.append(permission_sql.restriction_sql) + + # Skip plugins that only provide restriction_sql (no permission rules) + if permission_sql.sql is None: + continue + union_parts.append( f""" SELECT parent, child, allow, reason, '{permission_sql.source}' AS source_plugin FROM ( @@ -417,10 +465,9 @@ async def build_permission_rules_sql( ) """.strip() ) - all_params.update(permission_sql.params or {}) rules_union = " UNION ALL ".join(union_parts) - return rules_union, all_params + return rules_union, all_params, restriction_sqls async def check_permission_for_resource( @@ -447,7 +494,9 @@ async def check_permission_for_resource( This builds the cascading permission query and checks if the specific resource is in the allowed set. """ - rules_union, all_params = await build_permission_rules_sql(datasette, actor, action) + rules_union, all_params, restriction_sqls = await build_permission_rules_sql( + datasette, actor, action + ) # If no rules (empty SQL), default deny if not rules_union: @@ -457,43 +506,57 @@ async def check_permission_for_resource( all_params["_check_parent"] = parent all_params["_check_child"] = child + # If there are restriction filters, check if the resource passes them first + if restriction_sqls: + # Check if resource is in restriction allowlist + # Database-level restrictions (parent, NULL) should match all children (parent, *) + # Wrap each restriction_sql in a subquery to avoid operator precedence issues + restriction_check = "\nINTERSECT\n".join( + f"SELECT * FROM ({sql})" for sql in restriction_sqls + ) + restriction_query = f""" +WITH restriction_list AS ( + {restriction_check} +) +SELECT EXISTS ( + SELECT 1 FROM restriction_list + WHERE (parent = :_check_parent OR parent IS NULL) + AND (child = :_check_child OR child IS NULL) +) AS in_allowlist +""" + result = await datasette.get_internal_database().execute( + restriction_query, all_params + ) + if result.rows and not result.rows[0][0]: + # Resource not in restriction allowlist - deny + return False + query = f""" WITH all_rules AS ( {rules_union} ), -child_lvl AS ( - SELECT - MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny, - MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow +matched_rules AS ( + SELECT ar.*, + CASE + WHEN ar.child IS NOT NULL THEN 2 -- child-level (most specific) + WHEN ar.parent IS NOT NULL THEN 1 -- parent-level + ELSE 0 -- root/global + END AS depth FROM all_rules ar - WHERE ar.parent = :_check_parent AND ar.child = :_check_child + WHERE (ar.parent IS NULL OR ar.parent = :_check_parent) + AND (ar.child IS NULL OR ar.child = :_check_child) ), -parent_lvl AS ( - SELECT - MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny, - MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow - FROM all_rules ar - WHERE ar.parent = :_check_parent AND ar.child IS NULL -), -global_lvl AS ( - SELECT - MAX(CASE WHEN ar.allow = 0 THEN 1 ELSE 0 END) AS any_deny, - MAX(CASE WHEN ar.allow = 1 THEN 1 ELSE 0 END) AS any_allow - FROM all_rules ar - WHERE ar.parent IS NULL AND ar.child IS NULL +winner AS ( + SELECT * + FROM matched_rules + ORDER BY + depth DESC, -- specificity first (higher depth wins) + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow + source_plugin -- stable tie-break + LIMIT 1 ) -SELECT - CASE - WHEN cl.any_deny = 1 THEN 0 - WHEN cl.any_allow = 1 THEN 1 - WHEN pl.any_deny = 1 THEN 0 - WHEN pl.any_allow = 1 THEN 1 - WHEN gl.any_deny = 1 THEN 0 - WHEN gl.any_allow = 1 THEN 1 - ELSE 0 - END AS is_allowed -FROM child_lvl cl, parent_lvl pl, global_lvl gl +SELECT COALESCE((SELECT allow FROM winner), 0) AS is_allowed """ # Execute the query against the internal database diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py index 75307abf..58be53a3 100644 --- a/datasette/utils/permissions.py +++ b/datasette/utils/permissions.py @@ -99,6 +99,10 @@ def build_rules_union( # No namespacing - just use plugin params as-is params.update(p.params or {}) + # Skip plugins that only provide restriction_sql (no permission rules) + if p.sql is None: + continue + parts.append( f""" SELECT parent, child, allow, reason, '{p.source}' AS source_plugin FROM ( @@ -155,6 +159,8 @@ async def resolve_permissions_from_catalog( - resource (rendered "/parent/child" or "/parent" or "/") """ resolved_plugins: List[PermissionSQL] = [] + restriction_sqls: List[str] = [] + for plugin in plugins: if callable(plugin) and not isinstance(plugin, PermissionSQL): resolved = plugin(action) # type: ignore[arg-type] @@ -164,6 +170,10 @@ async def resolve_permissions_from_catalog( raise TypeError("Plugin providers must return PermissionSQL instances") resolved_plugins.append(resolved) + # Collect restriction SQL filters + if resolved.restriction_sql: + restriction_sqls.append(resolved.restriction_sql) + union_sql, rule_params = build_rules_union(actor, resolved_plugins) all_params = { **(candidate_params or {}), @@ -199,8 +209,8 @@ async def resolve_permissions_from_catalog( PARTITION BY parent, child ORDER BY depth DESC, -- specificity first - CASE WHEN allow=0 THEN 0 ELSE 1 END, -- deny over allow at same depth - source_plugin -- stable tie-break + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth + source_plugin -- stable tie-break ) AS rn FROM matched ), @@ -228,6 +238,145 @@ async def resolve_permissions_from_catalog( ORDER BY c.parent, c.child """ + # If there are restriction filters, wrap the query with INTERSECT + # This ensures only resources in the restriction allowlist are returned + if restriction_sqls: + # Start with the main query, but select only parent/child for the INTERSECT + main_query_for_intersect = f""" + WITH + cands AS ( + {candidate_sql} + ), + rules AS ( + {union_sql} + ), + matched AS ( + SELECT + c.parent, c.child, + r.allow, r.reason, r.source_plugin, + CASE + WHEN r.child IS NOT NULL THEN 2 -- child-level (most specific) + WHEN r.parent IS NOT NULL THEN 1 -- parent-level + ELSE 0 -- root/global + END AS depth + FROM cands c + JOIN rules r + ON (r.parent IS NULL OR r.parent = c.parent) + AND (r.child IS NULL OR r.child = c.child) + ), + ranked AS ( + SELECT *, + ROW_NUMBER() OVER ( + PARTITION BY parent, child + ORDER BY + depth DESC, -- specificity first + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth + source_plugin -- stable tie-break + ) AS rn + FROM matched + ), + winner AS ( + SELECT parent, child, + allow, reason, source_plugin, depth + FROM ranked WHERE rn = 1 + ), + permitted_resources AS ( + SELECT c.parent, c.child + FROM cands c + LEFT JOIN winner w + ON ((w.parent = c.parent) OR (w.parent IS NULL AND c.parent IS NULL)) + AND ((w.child = c.child ) OR (w.child IS NULL AND c.child IS NULL)) + WHERE COALESCE(w.allow, CASE WHEN :implicit_deny THEN 0 ELSE NULL END) = 1 + ) + SELECT parent, child FROM permitted_resources + """ + + # Build restriction list with INTERSECT (all must match) + # Then filter to resources that match hierarchically + # Wrap each restriction_sql in a subquery to avoid operator precedence issues + # with UNION ALL inside the restriction SQL statements + restriction_intersect = "\nINTERSECT\n".join( + f"SELECT * FROM ({sql})" for sql in restriction_sqls + ) + + # Combine: resources allowed by permissions AND in restriction allowlist + # Database-level restrictions (parent, NULL) should match all children (parent, *) + filtered_resources = f""" + WITH restriction_list AS ( + {restriction_intersect} + ), + permitted AS ( + {main_query_for_intersect} + ), + filtered AS ( + SELECT p.parent, p.child + FROM permitted p + WHERE EXISTS ( + SELECT 1 FROM restriction_list r + WHERE (r.parent = p.parent OR r.parent IS NULL) + AND (r.child = p.child OR r.child IS NULL) + ) + ) + """ + + # Now join back to get full results for only the filtered resources + sql = f""" + {filtered_resources} + , cands AS ( + {candidate_sql} + ), + rules AS ( + {union_sql} + ), + matched AS ( + SELECT + c.parent, c.child, + r.allow, r.reason, r.source_plugin, + CASE + WHEN r.child IS NOT NULL THEN 2 -- child-level (most specific) + WHEN r.parent IS NOT NULL THEN 1 -- parent-level + ELSE 0 -- root/global + END AS depth + FROM cands c + JOIN rules r + ON (r.parent IS NULL OR r.parent = c.parent) + AND (r.child IS NULL OR r.child = c.child) + ), + ranked AS ( + SELECT *, + ROW_NUMBER() OVER ( + PARTITION BY parent, child + ORDER BY + depth DESC, -- specificity first + CASE WHEN allow=0 THEN 0 ELSE 1 END, -- then deny over allow at same depth + source_plugin -- stable tie-break + ) AS rn + FROM matched + ), + winner AS ( + SELECT parent, child, + allow, reason, source_plugin, depth + FROM ranked WHERE rn = 1 + ) + SELECT + c.parent, c.child, + COALESCE(w.allow, CASE WHEN :implicit_deny THEN 0 ELSE NULL END) AS allow, + COALESCE(w.reason, CASE WHEN :implicit_deny THEN 'implicit deny' ELSE NULL END) AS reason, + w.source_plugin, + COALESCE(w.depth, -1) AS depth, + :action AS action, + CASE + WHEN c.parent IS NULL THEN '/' + WHEN c.child IS NULL THEN '/' || c.parent + ELSE '/' || c.parent || '/' || c.child + END AS resource + FROM filtered c + LEFT JOIN winner w + ON ((w.parent = c.parent) OR (w.parent IS NULL AND c.parent IS NULL)) + AND ((w.child = c.child ) OR (w.child IS NULL AND c.child IS NULL)) + ORDER BY c.parent, c.child + """ + rows_iter: Iterable[sqlite3.Row] = await db.execute( sql, {**all_params, "implicit_deny": 1 if implicit_deny else 0}, diff --git a/datasette/views/special.py b/datasette/views/special.py index 5a341911..a1d736c5 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -403,7 +403,7 @@ class PermissionRulesView(BaseView): from datasette.utils.actions_sql import build_permission_rules_sql - union_sql, union_params = await build_permission_rules_sql( + union_sql, union_params, restriction_sqls = await build_permission_rules_sql( self.ds, actor, action ) await self.ds.refresh_schemas() diff --git a/tests/test_actor_restriction_bug.py b/tests/test_actor_restriction_bug.py new file mode 100644 index 00000000..0bfc9e1e --- /dev/null +++ b/tests/test_actor_restriction_bug.py @@ -0,0 +1,133 @@ +""" +Test for actor restrictions bug with database-level config. + +This test currently FAILS, demonstrating the bug where database-level +config allow blocks can bypass table-level restrictions. +""" + +import pytest +from datasette.app import Datasette +from datasette.resources import TableResource + + +@pytest.mark.asyncio +async def test_table_restrictions_not_bypassed_by_database_level_config(): + """ + Actor restrictions should act as hard limits that config cannot override. + + BUG: When an actor has table-level restrictions (e.g., only table2 and table3) + but config has a database-level allow block, the database-level config rule + currently allows ALL tables, not just those in the restriction allowlist. + + This test documents the expected behavior and will FAIL until the bug is fixed. + """ + # Config grants access at DATABASE level (not table level) + config = { + "databases": { + "test_db_rnbbdlc": { + "allow": { + "id": "user" + } # Database-level allow - grants access to all tables + } + } + } + + ds = Datasette(config=config) + await ds.invoke_startup() + db = ds.add_memory_database("test_db_rnbbdlc") + await db.execute_write("create table table1 (id integer primary key)") + await db.execute_write("create table table2 (id integer primary key)") + await db.execute_write("create table table3 (id integer primary key)") + await db.execute_write("create table table4 (id integer primary key)") + + # Actor restricted to ONLY table2 and table3 + # Even though config allows the whole database, restrictions should limit access + actor = { + "id": "user", + "_r": { + "r": { # Resource-level (table-level) restrictions + "test_db_rnbbdlc": { + "table2": ["vt"], # vt = view-table abbreviation + "table3": ["vt"], + } + } + }, + } + + # table2 should be allowed (in restriction allowlist AND config allows) + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table2"), + actor=actor, + ) + assert result is True, "table2 should be allowed - in restriction allowlist" + + # table3 should be allowed (in restriction allowlist AND config allows) + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table3"), + actor=actor, + ) + assert result is True, "table3 should be allowed - in restriction allowlist" + + # table1 should be DENIED (NOT in restriction allowlist) + # Even though database-level config allows it, restrictions should deny it + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table1"), + actor=actor, + ) + assert ( + result is False + ), "table1 should be DENIED - not in restriction allowlist, config cannot override" + + # table4 should be DENIED (NOT in restriction allowlist) + # Even though database-level config allows it, restrictions should deny it + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rnbbdlc", "table4"), + actor=actor, + ) + assert ( + result is False + ), "table4 should be DENIED - not in restriction allowlist, config cannot override" + + +@pytest.mark.asyncio +async def test_database_restrictions_with_database_level_config(): + """ + Verify that database-level restrictions work correctly with database-level config. + + This should pass - it's testing the case where restriction granularity + matches config granularity. + """ + config = { + "databases": {"test_db_rwdl": {"allow": {"id": "user"}}} # Database-level allow + } + + ds = Datasette(config=config) + await ds.invoke_startup() + db = ds.add_memory_database("test_db_rwdl") + await db.execute_write("create table table1 (id integer primary key)") + await db.execute_write("create table table2 (id integer primary key)") + + # Actor has database-level restriction (all tables in test_db_rwdl) + actor = { + "id": "user", + "_r": {"d": {"test_db_rwdl": ["vt"]}}, # Database-level restrictions + } + + # Both tables should be allowed (database-level restriction matches database-level config) + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rwdl", "table1"), + actor=actor, + ) + assert result is True, "table1 should be allowed" + + result = await ds.allowed( + action="view-table", + resource=TableResource("test_db_rwdl", "table2"), + actor=actor, + ) + assert result is True, "table2 should be allowed" diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 5a530b25..971b7e82 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1630,6 +1630,16 @@ async def test_hook_register_actions_with_custom_resources(): reason="user2 granted view-document-collection" ) + # Default allow for view-document-collection (like other view-* actions) + if action == "view-document-collection": + return PermissionSQL.allow( + reason="default allow for view-document-collection" + ) + + # Default allow for view-document (like other view-* actions) + if action == "view-document": + return PermissionSQL.allow(reason="default allow for view-document") + # Register the plugin temporarily plugin = TestPlugin() pm.register(plugin, name="test_custom_resources_plugin") diff --git a/tests/test_restriction_sql.py b/tests/test_restriction_sql.py new file mode 100644 index 00000000..7d6d8a5a --- /dev/null +++ b/tests/test_restriction_sql.py @@ -0,0 +1,315 @@ +import pytest +from datasette.app import Datasette +from datasette.permissions import PermissionSQL +from datasette.resources import TableResource + + +@pytest.mark.asyncio +async def test_multiple_restriction_sources_intersect(): + """ + Test that when multiple plugins return restriction_sql, they are INTERSECTed. + + This tests the case where both actor _r restrictions AND a plugin + provide restriction_sql - both must pass for access to be granted. + """ + from datasette import hookimpl + from datasette.plugins import pm + + class RestrictivePlugin: + __name__ = "RestrictivePlugin" + + @hookimpl + def permission_resources_sql(self, datasette, actor, action): + # Plugin adds additional restriction: only db1_multi_intersect allowed + if action == "view-table": + return PermissionSQL( + restriction_sql="SELECT 'db1_multi_intersect' AS parent, NULL AS child", + params={}, + ) + return None + + plugin = RestrictivePlugin() + pm.register(plugin, name="restrictive_plugin") + + try: + ds = Datasette() + await ds.invoke_startup() + db1 = ds.add_memory_database("db1_multi_intersect") + db2 = ds.add_memory_database("db2_multi_intersect") + await db1.execute_write("CREATE TABLE t1 (id INTEGER)") + await db2.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() # Populate catalog tables + + # Actor has restrictions allowing both databases + # But plugin only allows db1_multi_intersect + # INTERSECT means only db1_multi_intersect/t1 should pass + actor = { + "id": "user", + "_r": {"d": {"db1_multi_intersect": ["vt"], "db2_multi_intersect": ["vt"]}}, + } + + page = await ds.allowed_resources("view-table", actor) + resources = {(r.parent, r.child) for r in page.resources} + + # Should only see db1_multi_intersect/t1 (intersection of actor restrictions and plugin restrictions) + assert ("db1_multi_intersect", "t1") in resources + assert ("db2_multi_intersect", "t1") not in resources + finally: + pm.unregister(name="restrictive_plugin") + + +@pytest.mark.asyncio +async def test_restriction_sql_with_overlapping_databases_and_tables(): + """ + Test actor with both database-level and table-level restrictions for same database. + + When actor has: + - Database-level: db1_overlapping allowed (all tables) + - Table-level: db1_overlapping/t1 allowed + + Both entries are UNION'd (OR'ed) within the actor's restrictions. + Database-level restriction allows ALL tables, so table-level is redundant. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_overlapping") + await db.execute_write("CREATE TABLE t1 (id INTEGER)") + await db.execute_write("CREATE TABLE t2 (id INTEGER)") + await ds._refresh_schemas() + + # Actor has BOTH database-level (db1_overlapping all tables) AND table-level (db1_overlapping/t1 only) + actor = { + "id": "user", + "_r": { + "d": { + "db1_overlapping": ["vt"] + }, # Database-level: all tables in db1_overlapping + "r": { + "db1_overlapping": {"t1": ["vt"]} + }, # Table-level: only t1 in db1_overlapping + }, + } + + # Within actor restrictions, entries are UNION'd (OR'ed): + # - Database level allows: (db1_overlapping, NULL) → matches all tables via hierarchical matching + # - Table level allows: (db1_overlapping, t1) → redundant, already covered by database level + # Result: Both tables are allowed + page = await ds.allowed_resources("view-table", actor) + resources = {(r.parent, r.child) for r in page.resources} + + assert ("db1_overlapping", "t1") in resources + # Database-level restriction allows all tables, so t2 is also allowed + assert ("db1_overlapping", "t2") in resources + + +@pytest.mark.asyncio +async def test_restriction_sql_empty_allowlist_query(): + """ + Test the specific SQL query generated when action is not in allowlist. + + actor_restrictions_sql() returns "SELECT NULL AS parent, NULL AS child WHERE 0" + Verify this produces an empty result set. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_empty_allowlist") + await db.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() + + # Actor has restrictions but action not in allowlist + actor = {"id": "user", "_r": {"r": {"db1_empty_allowlist": {"t1": ["vt"]}}}} + + # Try to view-database (only view-table is in allowlist) + page = await ds.allowed_resources("view-database", actor) + + # Should be empty + assert len(page.resources) == 0 + + +@pytest.mark.asyncio +async def test_restriction_sql_with_pagination(): + """ + Test that restrictions work correctly with keyset pagination. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_pagination") + + # Create many tables + for i in range(10): + await db.execute_write(f"CREATE TABLE t{i:02d} (id INTEGER)") + await ds._refresh_schemas() + + # Actor restricted to only odd-numbered tables + restrictions = {"r": {"db1_pagination": {}}} + for i in range(10): + if i % 2 == 1: # Only odd tables + restrictions["r"]["db1_pagination"][f"t{i:02d}"] = ["vt"] + + actor = {"id": "user", "_r": restrictions} + + # Get first page with small limit + page1 = await ds.allowed_resources( + "view-table", actor, parent="db1_pagination", limit=2 + ) + assert len(page1.resources) == 2 + assert page1.next is not None + + # Get second page using next token + page2 = await ds.allowed_resources( + "view-table", actor, parent="db1_pagination", limit=2, next=page1.next + ) + assert len(page2.resources) == 2 + + # Should have no overlap + page1_ids = {r.child for r in page1.resources} + page2_ids = {r.child for r in page2.resources} + assert page1_ids.isdisjoint(page2_ids) + + # All should be odd-numbered tables + all_ids = page1_ids | page2_ids + for table_id in all_ids: + table_num = int(table_id[1:]) # Extract number from "t01", "t03", etc. + assert table_num % 2 == 1, f"Table {table_id} should be odd-numbered" + + +@pytest.mark.asyncio +async def test_also_requires_with_restrictions(): + """ + Test that also_requires actions properly respect restrictions. + + execute-sql requires view-database. With restrictions, both must pass. + """ + ds = Datasette() + await ds.invoke_startup() + db1 = ds.add_memory_database("db1_also_requires") + db2 = ds.add_memory_database("db2_also_requires") + await ds._refresh_schemas() + + # Actor restricted to only db1_also_requires for view-database + # execute-sql requires view-database, so should only work on db1_also_requires + actor = { + "id": "user", + "_r": { + "d": { + "db1_also_requires": ["vd", "es"], + "db2_also_requires": [ + "es" + ], # They have execute-sql but not view-database + } + }, + } + + # db1_also_requires should allow execute-sql + result = await ds.allowed( + action="execute-sql", + resource=TableResource("db1_also_requires", None), + actor=actor, + ) + assert result is True + + # db2_also_requires should not (they have execute-sql but not view-database) + result = await ds.allowed( + action="execute-sql", + resource=TableResource("db2_also_requires", None), + actor=actor, + ) + assert result is False + + +@pytest.mark.asyncio +async def test_restriction_abbreviations_and_full_names(): + """ + Test that both abbreviations and full action names work in restrictions. + """ + ds = Datasette() + await ds.invoke_startup() + db = ds.add_memory_database("db1_abbrev") + await db.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() + + # Test with abbreviation + actor_abbr = {"id": "user", "_r": {"r": {"db1_abbrev": {"t1": ["vt"]}}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("db1_abbrev", "t1"), + actor=actor_abbr, + ) + assert result is True + + # Test with full name + actor_full = {"id": "user", "_r": {"r": {"db1_abbrev": {"t1": ["view-table"]}}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("db1_abbrev", "t1"), + actor=actor_full, + ) + assert result is True + + # Test with mixed + actor_mixed = {"id": "user", "_r": {"d": {"db1_abbrev": ["view-database", "vt"]}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("db1_abbrev", "t1"), + actor=actor_mixed, + ) + assert result is True + + +@pytest.mark.asyncio +async def test_permission_resources_sql_multiple_restriction_sources_intersect(): + """ + Test that when multiple plugins return restriction_sql, they are INTERSECTed. + + This tests the case where both actor _r restrictions AND a plugin + provide restriction_sql - both must pass for access to be granted. + """ + from datasette import hookimpl + from datasette.plugins import pm + + class RestrictivePlugin: + __name__ = "RestrictivePlugin" + + @hookimpl + def permission_resources_sql(self, datasette, actor, action): + # Plugin adds additional restriction: only db1_multi_restrictions allowed + if action == "view-table": + return PermissionSQL( + restriction_sql="SELECT 'db1_multi_restrictions' AS parent, NULL AS child", + params={}, + ) + return None + + plugin = RestrictivePlugin() + pm.register(plugin, name="restrictive_plugin") + + try: + ds = Datasette() + await ds.invoke_startup() + db1 = ds.add_memory_database("db1_multi_restrictions") + db2 = ds.add_memory_database("db2_multi_restrictions") + await db1.execute_write("CREATE TABLE t1 (id INTEGER)") + await db2.execute_write("CREATE TABLE t1 (id INTEGER)") + await ds._refresh_schemas() # Populate catalog tables + + # Actor has restrictions allowing both databases + # But plugin only allows db1 + # INTERSECT means only db1/t1 should pass + actor = { + "id": "user", + "_r": { + "d": { + "db1_multi_restrictions": ["vt"], + "db2_multi_restrictions": ["vt"], + } + }, + } + + page = await ds.allowed_resources("view-table", actor) + resources = {(r.parent, r.child) for r in page.resources} + + # Should only see db1/t1 (intersection of actor restrictions and plugin restrictions) + assert ("db1_multi_restrictions", "t1") in resources + assert ("db2_multi_restrictions", "t1") not in resources + finally: + pm.unregister(name="restrictive_plugin") From 92db0343c303e7aea8abb3b714d667c062744488 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:26:20 -0800 Subject: [PATCH 06/42] Updated release notes for 1.0a20 Refs #2550 --- docs/changelog.rst | 22 ++++++++++++---------- docs/upgrade-1.0a20.md | 2 ++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index f98ad8ac..cc5e75af 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -6,27 +6,29 @@ Changelog .. _v1_0_a20: -UNRELEASED 1.0a20 (2025-??-??) ------------------------------- +1.0a20 (2025-11-03) +------------------- -This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning Datasette's permission system. +This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning how Datasette's permission system works. Permission system redesign ~~~~~~~~~~~~~~~~~~~~~~~~~~ Previously the permission system worked using ``datasette.permission_allowed()`` checks which consulted all available plugins in turn to determine whether a given actor was allowed to perform a given action on a given resource. -This approach could become prohibitively expensive for large lists of items - for example to determine the list of tables that a user could view in a large Datasette instance, where the plugin hooks would be called N times for N tables. +This approach could become prohibitively expensive for large lists of items - for example to determine the list of tables that a user could view in a large Datasette instance each plugin implementation of that hook would be fired for every table. -The new system instead uses SQL queries against Datasette's internal :ref:`catalog tables ` to derive the list of resources for which an actor has permission for a given action. +The new design uses SQL queries against Datasette's internal :ref:`catalog tables ` to derive the list of resources for which an actor has permission for a given action. This turns an N x M problem (N resources, M plugins) into a single SQL query. -Plugins can use the new :ref:`plugin_hook_permission_resources_sql` hook to return SQL fragments which will influence the construction of that query. +Plugins can use the new :ref:`plugin_hook_permission_resources_sql` hook to return SQL fragments which will be used as part of that query. -Affected plugins should make the following changes: +Plugins that use any of the following features will need to be updated to work with this and following alphas (and Datasette 1.0 stable itself): -- Replace calls to ``datasette.permission_allowed()`` with calls to the new :ref:`datasette.allowed() ` method. The new method takes a ``resource=`` parameter which should be an instance of a ``Resource`` subclass, as described in the method documentation. -- The ``permission_allowed()`` plugin hook has been removed in favor of the new :ref:`permission_resources_sql() ` hook. -- The ``register_permissions()`` plugin hook has been removed in favor of :ref:`register_actions() `. +- Checking permissions with ``datasette.permission_allowed()`` - this method has been replaced with :ref:`datasette.allowed() `. +- Implementing the ``permission_allowed()`` plugin hook - this hook has been removed in favor of :ref:`permission_resources_sql() `. +- Using ``register_permissions()`` to register permissions - this hook has been removed in favor of :ref:`register_actions() `. + +Consult the :ref:`v1.0a20 upgrade guide ` for further details on how to upgrade affected plugins. Plugins can now make use of two new internal methods to help resolve permission checks: diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index 6abcd23d..2aa782e0 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -2,6 +2,8 @@ orphan: true --- +(upgrade_guide_v1_a20)= + # Datasette 1.0a20 plugin upgrade guide From b212895b9763068599e535fd7cd77fd5b8e2b14c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:26:20 -0800 Subject: [PATCH 07/42] Updated release notes for 1.0a20 Refs #2550 --- docs/changelog.rst | 22 ++++++++++++---------- docs/upgrade-1.0a20.md | 2 ++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index f98ad8ac..cc5e75af 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -6,27 +6,29 @@ Changelog .. _v1_0_a20: -UNRELEASED 1.0a20 (2025-??-??) ------------------------------- +1.0a20 (2025-11-03) +------------------- -This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning Datasette's permission system. +This alpha introduces a major breaking change prior to the 1.0 release of Datasette concerning how Datasette's permission system works. Permission system redesign ~~~~~~~~~~~~~~~~~~~~~~~~~~ Previously the permission system worked using ``datasette.permission_allowed()`` checks which consulted all available plugins in turn to determine whether a given actor was allowed to perform a given action on a given resource. -This approach could become prohibitively expensive for large lists of items - for example to determine the list of tables that a user could view in a large Datasette instance, where the plugin hooks would be called N times for N tables. +This approach could become prohibitively expensive for large lists of items - for example to determine the list of tables that a user could view in a large Datasette instance each plugin implementation of that hook would be fired for every table. -The new system instead uses SQL queries against Datasette's internal :ref:`catalog tables ` to derive the list of resources for which an actor has permission for a given action. +The new design uses SQL queries against Datasette's internal :ref:`catalog tables ` to derive the list of resources for which an actor has permission for a given action. This turns an N x M problem (N resources, M plugins) into a single SQL query. -Plugins can use the new :ref:`plugin_hook_permission_resources_sql` hook to return SQL fragments which will influence the construction of that query. +Plugins can use the new :ref:`plugin_hook_permission_resources_sql` hook to return SQL fragments which will be used as part of that query. -Affected plugins should make the following changes: +Plugins that use any of the following features will need to be updated to work with this and following alphas (and Datasette 1.0 stable itself): -- Replace calls to ``datasette.permission_allowed()`` with calls to the new :ref:`datasette.allowed() ` method. The new method takes a ``resource=`` parameter which should be an instance of a ``Resource`` subclass, as described in the method documentation. -- The ``permission_allowed()`` plugin hook has been removed in favor of the new :ref:`permission_resources_sql() ` hook. -- The ``register_permissions()`` plugin hook has been removed in favor of :ref:`register_actions() `. +- Checking permissions with ``datasette.permission_allowed()`` - this method has been replaced with :ref:`datasette.allowed() `. +- Implementing the ``permission_allowed()`` plugin hook - this hook has been removed in favor of :ref:`permission_resources_sql() `. +- Using ``register_permissions()`` to register permissions - this hook has been removed in favor of :ref:`register_actions() `. + +Consult the :ref:`v1.0a20 upgrade guide ` for further details on how to upgrade affected plugins. Plugins can now make use of two new internal methods to help resolve permission checks: diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index 6abcd23d..2aa782e0 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -2,6 +2,8 @@ orphan: true --- +(upgrade_guide_v1_a20)= + # Datasette 1.0a20 plugin upgrade guide From b3b8c5831be832f32e648dad3080317847778cdc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:34:29 -0800 Subject: [PATCH 08/42] Fixed some broken reference links on upgrade guide --- docs/upgrade-1.0a20.md | 4 ++-- docs/upgrade_guide.md | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index 2aa782e0..fbdcac2b 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -2,10 +2,10 @@ orphan: true --- -(upgrade_guide_v1_a20)= - # Datasette 1.0a20 plugin upgrade guide +(upgrade_guide_v1_a20)= + Datasette 1.0a20 makes some breaking changes to Datasette's permission system. Plugins need to be updated if they use **any of the following**: diff --git a/docs/upgrade_guide.md b/docs/upgrade_guide.md index 105d7281..a3c321a4 100644 --- a/docs/upgrade_guide.md +++ b/docs/upgrade_guide.md @@ -4,7 +4,7 @@ (upgrade_guide_v1)= ## Datasette 0.X -> 1.0 -This section reviews breaking changes Datasette ``1.0`` has when upgrading from a ``0.XX`` version. For new features that ``1.0`` offers, see the :ref:`changelog`. +This section reviews breaking changes Datasette ``1.0`` has when upgrading from a ``0.XX`` version. For new features that ``1.0`` offers, see the {ref}`changelog`. (upgrade_guide_v1_sql_queries)= ### New URL for SQL queries @@ -37,7 +37,7 @@ Metadata was completely revamped for Datasette 1.0. There are a number of relate (upgrade_guide_v1_metadata_split)= #### ``metadata.yaml`` split into ``datasette.yaml`` -Before Datasette 1.0, the ``metadata.yaml`` file became a kitchen sink if a mix of metadata, configuration, and settings. Now ``metadata.yaml`` is strictly for metadata (ex title and descriptions of database and tables, licensing info, etc). Other settings have been moved to a ``datasette.yml`` configuration file, described in :ref:`configuration`. +Before Datasette 1.0, the ``metadata.yaml`` file became a kitchen sink if a mix of metadata, configuration, and settings. Now ``metadata.yaml`` is strictly for metadata (ex title and descriptions of database and tables, licensing info, etc). Other settings have been moved to a ``datasette.yml`` configuration file, described in {ref}`configuration`. To start Datasette with both metadata and configuration files, run it like this: @@ -85,14 +85,14 @@ def get_metadata(datasette, key, database, table): pass ``` -Instead, plugins are encouraged to interact directly with Datasette's in-memory metadata tables in SQLite using the following methods on the :ref:`internals_datasette`: +Instead, plugins are encouraged to interact directly with Datasette's in-memory metadata tables in SQLite using the following methods on the {ref}`internals_datasette`: -- :ref:`get_instance_metadata() ` and :ref:`set_instance_metadata() ` -- :ref:`get_database_metadata() ` and :ref:`set_database_metadata() ` -- :ref:`get_resource_metadata() ` and :ref:`set_resource_metadata() ` -- :ref:`get_column_metadata() ` and :ref:`set_column_metadata() ` +- {ref}`get_instance_metadata() ` and {ref}`set_instance_metadata() ` +- {ref}`get_database_metadata() ` and {ref}`set_database_metadata() ` +- {ref}`get_resource_metadata() ` and {ref}`set_resource_metadata() ` +- {ref}`get_column_metadata() ` and {ref}`set_column_metadata() ` -A plugin that stores or calculates its own metadata can implement the :ref:`plugin_hook_startup` hook to populate those items on startup, and then call those methods while it is running to persist any new metadata changes. +A plugin that stores or calculates its own metadata can implement the {ref}`plugin_hook_startup` hook to populate those items on startup, and then call those methods while it is running to persist any new metadata changes. (upgrade_guide_v1_metadata_json_removed)= #### The ``/metadata.json`` endpoint has been removed @@ -106,10 +106,10 @@ As of Datasette ``1.0a14``, the ``.metadata()`` method on the Datasette Python A Instead, one should use the following methods on a Datasette class: -- :ref:`get_instance_metadata() ` -- :ref:`get_database_metadata() ` -- :ref:`get_resource_metadata() ` -- :ref:`get_column_metadata() ` +- {ref}`get_instance_metadata() ` +- {ref}`get_database_metadata() ` +- {ref}`get_resource_metadata() ` +- {ref}`get_column_metadata() ` ```{include} upgrade-1.0a20.md :heading-offset: 1 From 5d4dfcec6b91a47ab0f2053aa3bc30c68036f879 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:38:57 -0800 Subject: [PATCH 09/42] Fix for link from changelog not working Annoyingly we now get a warning in the docs build about a duplicate label, but it seems harmless enough. --- docs/upgrade-1.0a20.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/upgrade-1.0a20.md b/docs/upgrade-1.0a20.md index fbdcac2b..749d383c 100644 --- a/docs/upgrade-1.0a20.md +++ b/docs/upgrade-1.0a20.md @@ -2,11 +2,8 @@ orphan: true --- -# Datasette 1.0a20 plugin upgrade guide - (upgrade_guide_v1_a20)= - - +# Datasette 1.0a20 plugin upgrade guide Datasette 1.0a20 makes some breaking changes to Datasette's permission system. Plugins need to be updated if they use **any of the following**: From dc3f9fe9e4cbcda7e7dd88195f65cf0dad21ce5c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:42:59 -0800 Subject: [PATCH 10/42] Python 3.10, not 3.8 --- docs/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index 4771aa11..6be0247c 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -20,7 +20,7 @@ General guidelines Setting up a development environment ------------------------------------ -If you have Python 3.8 or higher installed on your computer (on OS X the quickest way to do this `is using homebrew `__) you can install an editable copy of Datasette using the following steps. +If you have Python 3.10 or higher installed on your computer (on OS X the quickest way to do this `is using homebrew `__) you can install an editable copy of Datasette using the following steps. If you want to use GitHub to publish your changes, first `create a fork of datasette `__ under your own GitHub account. From 95a1fef28000d0b44ceaa398421530f588c5c385 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 14:47:24 -0800 Subject: [PATCH 11/42] Release 1.0a20 Refs #2488, #2495, #2503, #2505, #2509, #2510, #2513, #2515, #2517, #2519, #2520, #2521, #2524, #2525, #2526, #2528, #2530, #2531, #2534, #2537, #2543, #2544, #2550, #2551, #2555, #2558, #2561, #2562, #2564, #2565, #2567, #2569, #2570, #2571, #2574 --- datasette/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index c1318c6f..20cb46c7 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a19" +__version__ = "1.0a20" __version_info__ = tuple(__version__.split(".")) From 295e4a2e87464fc1838d6278308bf151bc14ba73 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 3 Nov 2025 15:05:17 -0800 Subject: [PATCH 12/42] Pin to httpx<1.0 Refs https://github.com/encode/httpx/issues/3635 Closes #2576 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fb9f0453..1395ce82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dependencies = [ "click-default-group>=1.2.3", "Jinja2>=2.10.3", "hupper>=1.9", - "httpx>=0.20", + "httpx>=0.20,<1.0", "pluggy>=1.0", "uvicorn>=0.11", "aiofiles>=0.4", From f257ca6edb64848c3b04b54d41e347c54fe57c05 Mon Sep 17 00:00:00 2001 From: James Jefferies Date: Wed, 5 Nov 2025 01:04:12 +0000 Subject: [PATCH 13/42] Fix for open redirect - identified in Issue 2429 (#2500) * Issue 2429 indicates the possiblity of an open redirect The 404 processing ends up redirecting a request with multiple path slashes to that site, i.e. https://my-site//shedcode.co.uk will redirect to https://shedcode.co.uk This commit uses a regular expression to remove the multiple leading slashes before redirecting. --- datasette/app.py | 5 +++++ tests/test_custom_pages.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/datasette/app.py b/datasette/app.py index 09936b3a..be507241 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2150,6 +2150,11 @@ class DatasetteRouter: context = {} if path.endswith(b"/"): path = path.rstrip(b"/") + + # If you redirect with a // at the beginning, you end up with an open redirect, so + # https://my.site//foo/ - will redirect to https://foo + path = re.sub(rb"^/+", b"/", path) + if request.scope["query_string"]: path += b"?" + request.scope["query_string"] await asgi_send_redirect(send, path.decode("latin1")) diff --git a/tests/test_custom_pages.py b/tests/test_custom_pages.py index f2cfe394..ccc139ce 100644 --- a/tests/test_custom_pages.py +++ b/tests/test_custom_pages.py @@ -97,3 +97,9 @@ def test_custom_route_pattern_404(custom_pages_client): assert response.status == 404 assert "

Error 404

" in response.text assert ">Oh no Date: Tue, 4 Nov 2025 17:08:06 -0800 Subject: [PATCH 14/42] Move open redirect fix to asgi_send_redirect, refs #2429 See https://github.com/simonw/datasette/pull/2500#issuecomment-3488632278 --- datasette/app.py | 5 ----- datasette/utils/asgi.py | 4 ++++ tests/test_custom_pages.py | 5 +++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index be507241..09936b3a 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2150,11 +2150,6 @@ class DatasetteRouter: context = {} if path.endswith(b"/"): path = path.rstrip(b"/") - - # If you redirect with a // at the beginning, you end up with an open redirect, so - # https://my.site//foo/ - will redirect to https://foo - path = re.sub(rb"^/+", b"/", path) - if request.scope["query_string"]: path += b"?" + request.scope["query_string"] await asgi_send_redirect(send, path.decode("latin1")) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 40214cbe..7f3329a6 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -6,6 +6,7 @@ from pathlib import Path from http.cookies import SimpleCookie, Morsel import aiofiles import aiofiles.os +import re # Workaround for adding samesite support to pre 3.8 python Morsel._reserved["samesite"] = "SameSite" @@ -248,6 +249,9 @@ async def asgi_send_html(send, html, status=200, headers=None): async def asgi_send_redirect(send, location, status=302): + # Prevent open redirect vulnerability: strip multiple leading slashes + # //example.com would be interpreted as a protocol-relative URL (e.g., https://example.com/) + location = re.sub(r"^/+", "/", location) await asgi_send( send, "", diff --git a/tests/test_custom_pages.py b/tests/test_custom_pages.py index ccc139ce..39a4c06b 100644 --- a/tests/test_custom_pages.py +++ b/tests/test_custom_pages.py @@ -100,6 +100,7 @@ def test_custom_route_pattern_404(custom_pages_client): def test_custom_route_pattern_with_slash_slash_302(custom_pages_client): - response = custom_pages_client.get("//nastyOpenRedirect/") + # https://github.com/simonw/datasette/issues/2429 + response = custom_pages_client.get("//example.com/") assert response.status == 302 - assert response.headers["location"] == "/nastyOpenRedirect" + assert response.headers["location"] == "/example.com" From 9f74dc22a8587b6322c4aa2747894e408ab58a9c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Nov 2025 18:11:20 -0800 Subject: [PATCH 15/42] Run cog with --extra test Previously it kept on adding stuff to cli-reference.rst that came from other plugins installed for my global environment --- Justfile | 4 ++-- datasette/cli.py | 28 ++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Justfile b/Justfile index adb8cf0d..abb134a6 100644 --- a/Justfile +++ b/Justfile @@ -21,11 +21,11 @@ export DATASETTE_SECRET := "not_a_secret" @lint: codespell uv run black . --check uv run flake8 - uv run cog --check README.md docs/*.rst + uv run --extra test cog --check README.md docs/*.rst # Rebuild docs with cog @cog: - uv run cog -r README.md docs/*.rst + uv run --extra test cog -r README.md docs/*.rst # Serve live docs on localhost:8000 @docs: cog blacken-docs diff --git a/datasette/cli.py b/datasette/cli.py index 94af09a2..7c1c2d44 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -442,6 +442,11 @@ def uninstall(packages, yes): "--get", help="Run an HTTP GET request against this path, print results and exit", ) +@click.option( + "--headers", + is_flag=True, + help="Include HTTP headers in --get output (requires --get)", +) @click.option( "--token", help="API token to send with --get requests", @@ -510,6 +515,7 @@ def serve( secret, root, get, + headers, token, actor, version_note, @@ -658,19 +664,33 @@ def serve( # Run async soundness checks - but only if we're not under pytest run_sync(lambda: check_databases(ds)) + if headers and not get: + raise click.ClickException("--headers can only be used with --get") + if token and not get: raise click.ClickException("--token can only be used with --get") if get: client = TestClient(ds) - headers = {} + request_headers = {} if token: - headers["Authorization"] = "Bearer {}".format(token) + request_headers["Authorization"] = "Bearer {}".format(token) cookies = {} if actor: cookies["ds_actor"] = client.actor_cookie(json.loads(actor)) - response = client.get(get, headers=headers, cookies=cookies) - click.echo(response.text) + response = client.get(get, headers=request_headers, cookies=cookies) + + if headers: + # Output HTTP status code, headers, two newlines, then the response body + click.echo(f"HTTP/1.1 {response.status}") + for key, value in response.headers.items(): + click.echo(f"{key}: {value}") + if response.text: + click.echo() + click.echo(response.text) + else: + click.echo(response.text) + exit_code = 0 if response.status == 200 else 1 sys.exit(exit_code) return From ce464da34b8e18617fa10579f1b4bb32b56bdc20 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Nov 2025 18:12:15 -0800 Subject: [PATCH 16/42] datasette --get --headers option, closes #2578 --- datasette/cli.py | 2 +- docs/cli-reference.rst | 1 + tests/test_cli_serve_get.py | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/datasette/cli.py b/datasette/cli.py index 7c1c2d44..aaf1b244 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -445,7 +445,7 @@ def uninstall(packages, yes): @click.option( "--headers", is_flag=True, - help="Include HTTP headers in --get output (requires --get)", + help="Include HTTP headers in --get output", ) @click.option( "--token", diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 67e06254..0e224916 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -121,6 +121,7 @@ Once started you can access it at ``http://localhost:8001`` the root user --get TEXT Run an HTTP GET request against this path, print results and exit + --headers Include HTTP headers in --get output --token TEXT API token to send with --get requests --actor TEXT Actor to use for --get requests (JSON string) --version-note TEXT Additional note to show on /-/versions diff --git a/tests/test_cli_serve_get.py b/tests/test_cli_serve_get.py index 513669a1..5cba5081 100644 --- a/tests/test_cli_serve_get.py +++ b/tests/test_cli_serve_get.py @@ -52,6 +52,27 @@ def test_serve_with_get(tmp_path_factory): pm.unregister(to_unregister) +def test_serve_with_get_headers(): + runner = CliRunner() + result = runner.invoke( + cli, + [ + "serve", + "--memory", + "--get", + "/_memory/", + "--headers", + ], + ) + # exit_code is 1 because it wasn't a 200 response + assert result.exit_code == 1, result.output + assert result.output == ( + "HTTP/1.1 302\n" + "location: /_memory\n" + "content-type: text/html; charset=utf-8\n" + ) + + def test_serve_with_get_and_token(): runner = CliRunner() result1 = runner.invoke( From b4385a3ff7ccc96b53312428ba496770e68cc3f8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Nov 2025 18:39:25 -0800 Subject: [PATCH 17/42] Made test_serve_with_get_headers a bit more forgiving --- tests/test_cli_serve_get.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_cli_serve_get.py b/tests/test_cli_serve_get.py index 5cba5081..5ad01bfa 100644 --- a/tests/test_cli_serve_get.py +++ b/tests/test_cli_serve_get.py @@ -66,11 +66,10 @@ def test_serve_with_get_headers(): ) # exit_code is 1 because it wasn't a 200 response assert result.exit_code == 1, result.output - assert result.output == ( - "HTTP/1.1 302\n" - "location: /_memory\n" - "content-type: text/html; charset=utf-8\n" - ) + lines = result.output.splitlines() + assert lines and lines[0] == "HTTP/1.1 302" + assert "location: /_memory" in lines + assert "content-type: text/html; charset=utf-8" in lines def test_serve_with_get_and_token(): From 12016342e716a4e779bddac3f3a1fc43408527f4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Nov 2025 18:40:58 -0800 Subject: [PATCH 18/42] Fix test_metadata_yaml I broke in #2578 --- tests/test_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 1c8f51ef..3bb360fb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -146,6 +146,7 @@ def test_metadata_yaml(): actor=None, version_note=None, get=None, + headers=False, help_settings=False, pdb=False, crossdb=False, From f12f6cc2aba0ab554bc0a985bc772741d7f36d7d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 09:28:41 -0800 Subject: [PATCH 19/42] Get publish cloudrun working with latest Cloud Run (#2581) Refs: - #2511 Filter out bad services, refs: - https://github.com/simonw/datasette/pull/2581#issuecomment-3492243400 --- .github/workflows/deploy-latest.yml | 11 +-- .github/workflows/publish.yml | 11 +-- datasette/publish/cloudrun.py | 100 ++++++++++++++++++++++++++-- docs/cli-reference.rst | 9 ++- tests/test_publish_cloudrun.py | 36 ++++++++-- 5 files changed, 146 insertions(+), 21 deletions(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 6907b438..8ffdbfd5 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -102,12 +102,13 @@ jobs: # jq '.plugins |= . + {"datasette-ephemeral-tables": {"table_ttl": 900}}' \ # > metadata.json # cat metadata.json - - name: Set up Cloud Run - uses: google-github-actions/setup-gcloud@v0 + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 with: - version: '318.0.0' - service_account_email: ${{ secrets.GCP_SA_EMAIL }} - service_account_key: ${{ secrets.GCP_SA_KEY }} + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 - name: Deploy to Cloud Run env: LATEST_DATASETTE_SECRET: ${{ secrets.LATEST_DATASETTE_SECRET }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 14bfaded..e94d0bdd 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -73,12 +73,13 @@ jobs: DISABLE_SPHINX_INLINE_TABS=1 sphinx-build -b xml . _build sphinx-to-sqlite ../docs.db _build cd .. - - name: Set up Cloud Run - uses: google-github-actions/setup-gcloud@v0 + - id: auth + name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 with: - version: '318.0.0' - service_account_email: ${{ secrets.GCP_SA_EMAIL }} - service_account_key: ${{ secrets.GCP_SA_KEY }} + credentials_json: ${{ secrets.GCP_SA_KEY }} + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v3 - name: Deploy stable-docs.datasette.io to Cloud Run run: |- gcloud config set run/region us-central1 diff --git a/datasette/publish/cloudrun.py b/datasette/publish/cloudrun.py index 760ff0d1..63d22fe8 100644 --- a/datasette/publish/cloudrun.py +++ b/datasette/publish/cloudrun.py @@ -3,7 +3,7 @@ import click import json import os import re -from subprocess import check_call, check_output +from subprocess import CalledProcessError, check_call, check_output from .common import ( add_common_publish_arguments_and_options, @@ -23,7 +23,9 @@ def publish_subcommand(publish): help="Application name to use when building", ) @click.option( - "--service", default="", help="Cloud Run service to deploy (or over-write)" + "--service", + default="", + help="Cloud Run service to deploy (or over-write)", ) @click.option("--spatialite", is_flag=True, help="Enable SpatialLite extension") @click.option( @@ -55,13 +57,32 @@ def publish_subcommand(publish): @click.option( "--max-instances", type=int, - help="Maximum Cloud Run instances", + default=1, + show_default=True, + help="Maximum Cloud Run instances (use 0 to remove the limit)", ) @click.option( "--min-instances", type=int, help="Minimum Cloud Run instances", ) + @click.option( + "--artifact-repository", + default="datasette", + show_default=True, + help="Artifact Registry repository to store the image", + ) + @click.option( + "--artifact-region", + default="us", + show_default=True, + help="Artifact Registry location (region or multi-region)", + ) + @click.option( + "--artifact-project", + default=None, + help="Project ID for Artifact Registry (defaults to the active project)", + ) def cloudrun( files, metadata, @@ -91,6 +112,9 @@ def publish_subcommand(publish): apt_get_extras, max_instances, min_instances, + artifact_repository, + artifact_region, + artifact_project, ): "Publish databases to Datasette running on Cloud Run" fail_if_publish_binary_not_installed( @@ -100,6 +124,21 @@ def publish_subcommand(publish): "gcloud config get-value project", shell=True, universal_newlines=True ).strip() + artifact_project = artifact_project or project + + # Ensure Artifact Registry exists for the target image + _ensure_artifact_registry( + artifact_project=artifact_project, + artifact_region=artifact_region, + artifact_repository=artifact_repository, + ) + + artifact_host = ( + artifact_region + if artifact_region.endswith("-docker.pkg.dev") + else f"{artifact_region}-docker.pkg.dev" + ) + if not service: # Show the user their current services, then prompt for one click.echo("Please provide a service name for this deployment\n") @@ -117,6 +156,11 @@ def publish_subcommand(publish): click.echo("") service = click.prompt("Service name", type=str) + image_id = ( + f"{artifact_host}/{artifact_project}/" + f"{artifact_repository}/datasette-{service}" + ) + extra_metadata = { "title": title, "license": license, @@ -173,7 +217,6 @@ def publish_subcommand(publish): print(fp.read()) print("\n====================\n") - image_id = f"gcr.io/{project}/datasette-{service}" check_call( "gcloud builds submit --tag {}{}".format( image_id, " --timeout {}".format(timeout) if timeout else "" @@ -187,7 +230,7 @@ def publish_subcommand(publish): ("--max-instances", max_instances), ("--min-instances", min_instances), ): - if value: + if value is not None: extra_deploy_options.append("{} {}".format(option, value)) check_call( "gcloud run deploy --allow-unauthenticated --platform=managed --image {} {}{}".format( @@ -199,6 +242,52 @@ def publish_subcommand(publish): ) +def _ensure_artifact_registry(artifact_project, artifact_region, artifact_repository): + """Ensure Artifact Registry API is enabled and the repository exists.""" + + enable_cmd = ( + "gcloud services enable artifactregistry.googleapis.com " + f"--project {artifact_project} --quiet" + ) + try: + check_call(enable_cmd, shell=True) + except CalledProcessError as exc: + raise click.ClickException( + "Failed to enable artifactregistry.googleapis.com. " + "Please ensure you have permissions to manage services." + ) from exc + + describe_cmd = ( + "gcloud artifacts repositories describe {repo} --project {project} " + "--location {location} --quiet" + ).format( + repo=artifact_repository, + project=artifact_project, + location=artifact_region, + ) + try: + check_call(describe_cmd, shell=True) + return + except CalledProcessError: + create_cmd = ( + "gcloud artifacts repositories create {repo} --repository-format=docker " + '--location {location} --project {project} --description "Datasette Cloud Run images" --quiet' + ).format( + repo=artifact_repository, + location=artifact_region, + project=artifact_project, + ) + try: + check_call(create_cmd, shell=True) + click.echo(f"Created Artifact Registry repository '{artifact_repository}'") + except CalledProcessError as exc: + raise click.ClickException( + "Failed to create Artifact Registry repository. " + "Use --artifact-repository/--artifact-region to point to an existing repo " + "or create one manually." + ) from exc + + def get_existing_services(): services = json.loads( check_output( @@ -214,6 +303,7 @@ def get_existing_services(): "url": service["status"]["address"]["url"], } for service in services + if "url" in service["status"] ] diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 0e224916..f002d05a 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -489,8 +489,15 @@ See :ref:`publish_cloud_run`. --cpu [1|2|4] Number of vCPUs to allocate in Cloud Run --timeout INTEGER Build timeout in seconds --apt-get-install TEXT Additional packages to apt-get install - --max-instances INTEGER Maximum Cloud Run instances + --max-instances INTEGER Maximum Cloud Run instances (use 0 to remove + the limit) [default: 1] --min-instances INTEGER Minimum Cloud Run instances + --artifact-repository TEXT Artifact Registry repository to store the + image [default: datasette] + --artifact-region TEXT Artifact Registry location (region or multi- + region) [default: us] + --artifact-project TEXT Project ID for Artifact Registry (defaults to + the active project) --help Show this message and exit. diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index 818fa2d3..f53e5059 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -57,12 +57,20 @@ def test_publish_cloudrun_prompts_for_service( "Service name: input-service" ) == result.output.strip() assert 0 == result.exit_code - tag = "gcr.io/myproject/datasette-input-service" + tag = "us-docker.pkg.dev/myproject/datasette/datasette-input-service" mock_call.assert_has_calls( [ + mock.call( + "gcloud services enable artifactregistry.googleapis.com --project myproject --quiet", + shell=True, + ), + mock.call( + "gcloud artifacts repositories describe datasette --project myproject --location us --quiet", + shell=True, + ), mock.call(f"gcloud builds submit --tag {tag}", shell=True), mock.call( - "gcloud run deploy --allow-unauthenticated --platform=managed --image {} input-service".format( + "gcloud run deploy --allow-unauthenticated --platform=managed --image {} input-service --max-instances 1".format( tag ), shell=True, @@ -86,12 +94,20 @@ def test_publish_cloudrun(mock_call, mock_output, mock_which, tmp_path_factory): cli.cli, ["publish", "cloudrun", "test.db", "--service", "test"] ) assert 0 == result.exit_code - tag = f"gcr.io/{mock_output.return_value}/datasette-test" + tag = f"us-docker.pkg.dev/{mock_output.return_value}/datasette/datasette-test" mock_call.assert_has_calls( [ + mock.call( + f"gcloud services enable artifactregistry.googleapis.com --project {mock_output.return_value} --quiet", + shell=True, + ), + mock.call( + f"gcloud artifacts repositories describe datasette --project {mock_output.return_value} --location us --quiet", + shell=True, + ), mock.call(f"gcloud builds submit --tag {tag}", shell=True), mock.call( - "gcloud run deploy --allow-unauthenticated --platform=managed --image {} test".format( + "gcloud run deploy --allow-unauthenticated --platform=managed --image {} test --max-instances 1".format( tag ), shell=True, @@ -167,7 +183,7 @@ def test_publish_cloudrun_memory_cpu( assert 2 == result.exit_code return assert 0 == result.exit_code - tag = f"gcr.io/{mock_output.return_value}/datasette-test" + tag = f"us-docker.pkg.dev/{mock_output.return_value}/datasette/datasette-test" expected_call = ( "gcloud run deploy --allow-unauthenticated --platform=managed" " --image {} test".format(tag) @@ -179,8 +195,18 @@ def test_publish_cloudrun_memory_cpu( expected_call += " --cpu {}".format(cpu) if timeout: expected_build_call += f" --timeout {timeout}" + # max_instances defaults to 1 + expected_call += " --max-instances 1" mock_call.assert_has_calls( [ + mock.call( + f"gcloud services enable artifactregistry.googleapis.com --project {mock_output.return_value} --quiet", + shell=True, + ), + mock.call( + f"gcloud artifacts repositories describe datasette --project {mock_output.return_value} --location us --quiet", + shell=True, + ), mock.call(expected_build_call, shell=True), mock.call( expected_call, From 3c2254463be78199678093a8db0fc1e6ad0c4cde Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 10:25:37 -0800 Subject: [PATCH 20/42] Release notes for 0.65.2 Adding those to main. Refs #2579 --- docs/changelog.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index cc5e75af..dbcff8cb 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,16 @@ Changelog ========= +.. _v0_65_2: + +0.65.2 (2025-11-05) +------------------- + +* Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) +* Upgraded for compatibility with Python 3.14. +* Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) +* Minor upgrades to fix warnings, including ``pkg_resources`` deprecation. + .. _v1_0_a20: 1.0a20 (2025-11-03) From ec99bb46f8854ec1e17600fa0373461e66cdb26c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 10:51:46 -0800 Subject: [PATCH 21/42] stable-docs YAML workflow, refs #2582 --- .github/workflows/stable-docs.yml | 76 +++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 .github/workflows/stable-docs.yml diff --git a/.github/workflows/stable-docs.yml b/.github/workflows/stable-docs.yml new file mode 100644 index 00000000..3119d617 --- /dev/null +++ b/.github/workflows/stable-docs.yml @@ -0,0 +1,76 @@ +name: Update Stable Docs + +on: + release: + types: [published] + push: + branches: + - main + +permissions: + contents: write + +jobs: + update_stable_docs: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 0 # We need all commits to find docs/ changes + - name: Set up Git user + run: | + git config user.name "Automated" + git config user.email "actions@users.noreply.github.com" + - name: Create stable branch if it does not yet exist + run: | + if ! git ls-remote --heads origin stable | grep -qE '\bstable\b'; then + # Make sure we have all tags locally + git fetch --tags --quiet + + # Latest tag that is just numbers and dots (optionally prefixed with 'v') + # e.g., 0.65.2 or v0.65.2 — excludes 1.0a20, 1.0-rc1, etc. + LATEST_RELEASE=$( + git tag -l --sort=-v:refname \ + | grep -E '^v?[0-9]+(\.[0-9]+){1,3}$' \ + | head -n1 + ) + + git checkout -b stable + + # If there are any stable releases, copy docs/ from the most recent + if [ -n "$LATEST_RELEASE" ]; then + rm -rf docs/ + git checkout "$LATEST_RELEASE" -- docs/ || true + fi + + git commit -m "Populate docs/ from $LATEST_RELEASE" || echo "No changes" + git push -u origin stable + fi + - name: Handle Release + if: github.event_name == 'release' && !github.event.release.prerelease + run: | + git fetch --all + git checkout stable + git reset --hard ${GITHUB_REF#refs/tags/} + git push origin stable --force + - name: Handle Commit to Main + if: contains(github.event.head_commit.message, '!stable-docs') + run: | + git fetch origin + git checkout -b stable origin/stable + # Get the list of modified files in docs/ from the current commit + FILES=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} -- docs/) + # Check if the list of files is non-empty + if [[ -n "$FILES" ]]; then + # Checkout those files to the stable branch to over-write with their contents + for FILE in $FILES; do + git checkout ${{ github.sha }} -- $FILE + done + git add docs/ + git commit -m "Doc changes from ${{ github.sha }}" + git push origin stable + else + echo "No changes to docs/ in this commit." + exit 0 + fi From d814e81b32087c7c395faca9a9f9a7d3966ade76 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 13:38:01 -0800 Subject: [PATCH 22/42] datasette.client.get(..., skip_permission_checks=True) Closes #2580 --- datasette/app.py | 109 +++++++++++---- datasette/permissions.py | 27 ++++ datasette/utils/actions_sql.py | 21 +++ datasette/utils/permissions.py | 15 ++- docs/internals.rst | 30 +++++ tests/test_internals_datasette_client.py | 162 +++++++++++++++++++++++ 6 files changed, 335 insertions(+), 29 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 09936b3a..177debe2 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2363,6 +2363,12 @@ class NotFoundExplicit(NotFound): class DatasetteClient: + """Internal HTTP client for making requests to a Datasette instance. + + Used for testing and for internal operations that need to make HTTP requests + to the Datasette app without going through an actual HTTP server. + """ + def __init__(self, ds): self.ds = ds self.app = ds.app() @@ -2378,40 +2384,87 @@ class DatasetteClient: path = f"http://localhost{path}" return path - async def _request(self, method, path, **kwargs): - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await getattr(client, method)(self._fix(path), **kwargs) + async def _request(self, method, path, skip_permission_checks=False, **kwargs): + from datasette.permissions import SkipPermissions - async def get(self, path, **kwargs): - return await self._request("get", path, **kwargs) + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) - async def options(self, path, **kwargs): - return await self._request("options", path, **kwargs) + async def get(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "get", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def head(self, path, **kwargs): - return await self._request("head", path, **kwargs) + async def options(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "options", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def post(self, path, **kwargs): - return await self._request("post", path, **kwargs) + async def head(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "head", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def put(self, path, **kwargs): - return await self._request("put", path, **kwargs) + async def post(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "post", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def patch(self, path, **kwargs): - return await self._request("patch", path, **kwargs) + async def put(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "put", path, skip_permission_checks=skip_permission_checks, **kwargs + ) - async def delete(self, path, **kwargs): - return await self._request("delete", path, **kwargs) + async def patch(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "patch", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def delete(self, path, skip_permission_checks=False, **kwargs): + return await self._request( + "delete", path, skip_permission_checks=skip_permission_checks, **kwargs + ) + + async def request(self, method, path, skip_permission_checks=False, **kwargs): + """Make an HTTP request with the specified method. + + Args: + method: HTTP method (e.g., "GET", "POST", "PUT") + path: The path to request + skip_permission_checks: If True, bypass all permission checks for this request + **kwargs: Additional arguments to pass to httpx + + Returns: + httpx.Response: The response from the request + """ + from datasette.permissions import SkipPermissions - async def request(self, method, path, **kwargs): avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await client.request( - method, self._fix(path, avoid_path_rewrites), **kwargs - ) + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) + else: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) diff --git a/datasette/permissions.py b/datasette/permissions.py index c91385a0..c48293ac 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -1,6 +1,33 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Any, NamedTuple +import contextvars + + +# Context variable to track when permission checks should be skipped +_skip_permission_checks = contextvars.ContextVar( + "skip_permission_checks", default=False +) + + +class SkipPermissions: + """Context manager to temporarily skip permission checks. + + This is not a stable API and may change in future releases. + + Usage: + with SkipPermissions(): + # Permission checks are skipped within this block + response = await datasette.client.get("/protected") + """ + + def __enter__(self): + self.token = _skip_permission_checks.set(True) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + _skip_permission_checks.reset(self.token) + return False class Resource(ABC): diff --git a/datasette/utils/actions_sql.py b/datasette/utils/actions_sql.py index 7121e2d0..9c2add0e 100644 --- a/datasette/utils/actions_sql.py +++ b/datasette/utils/actions_sql.py @@ -155,6 +155,16 @@ async def _build_single_action_sql( action=action, ) + # If permission_sqls is the sentinel, skip all permission checks + # Return SQL that allows all resources + from datasette.utils.permissions import SKIP_PERMISSION_CHECKS + + if permission_sqls is SKIP_PERMISSION_CHECKS: + cols = "parent, child, 'skip_permission_checks' AS reason" + if include_is_private: + cols += ", 0 AS is_private" + return f"SELECT {cols} FROM ({base_resources_sql})", {} + all_params = {} rule_sqls = [] restriction_sqls = [] @@ -436,6 +446,17 @@ async def build_permission_rules_sql( action=action, ) + # If permission_sqls is the sentinel, skip all permission checks + # Return SQL that allows everything + from datasette.utils.permissions import SKIP_PERMISSION_CHECKS + + if permission_sqls is SKIP_PERMISSION_CHECKS: + return ( + "SELECT NULL AS parent, NULL AS child, 1 AS allow, 'skip_permission_checks' AS reason, 'skip' AS source_plugin", + {}, + [], + ) + if not permission_sqls: return ( "SELECT NULL AS parent, NULL AS child, 0 AS allow, NULL AS reason, NULL AS source_plugin WHERE 0", diff --git a/datasette/utils/permissions.py b/datasette/utils/permissions.py index 58be53a3..6c30a12a 100644 --- a/datasette/utils/permissions.py +++ b/datasette/utils/permissions.py @@ -10,13 +10,26 @@ from datasette.plugins import pm from datasette.utils import await_me_maybe +# Sentinel object to indicate permission checks should be skipped +SKIP_PERMISSION_CHECKS = object() + + async def gather_permission_sql_from_hooks( *, datasette, actor: dict | None, action: str -) -> List[PermissionSQL]: +) -> List[PermissionSQL] | object: """Collect PermissionSQL objects from the permission_resources_sql hook. Ensures that each returned PermissionSQL has a populated ``source``. + + Returns SKIP_PERMISSION_CHECKS sentinel if skip_permission_checks context variable + is set, signaling that all permission checks should be bypassed. """ + from datasette.permissions import _skip_permission_checks + + # Check if we should skip permission checks BEFORE calling hooks + # This avoids creating unawaited coroutines + if _skip_permission_checks.get(): + return SKIP_PERMISSION_CHECKS hook_caller = pm.hook.permission_resources_sql hookimpls = hook_caller.get_hookimpls() diff --git a/docs/internals.rst b/docs/internals.rst index 406bc9b3..468b3f95 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1045,6 +1045,36 @@ These methods can be used with :ref:`internals_datasette_urls` - for example: For documentation on available ``**kwargs`` options and the shape of the HTTPX Response object refer to the `HTTPX Async documentation `__. +Bypassing permission checks +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All ``datasette.client`` methods accept an optional ``skip_permission_checks=True`` parameter. When set, all permission checks will be bypassed for that request, allowing access to any resource regardless of the configured permissions. + +This is useful for plugins and internal operations that need to access all resources without being subject to permission restrictions. + +Example usage: + +.. code-block:: python + + # Regular request - respects permissions + response = await datasette.client.get( + "/private-db/secret-table.json" + ) + # May return 403 Forbidden if access is denied + + # With skip_permission_checks - bypasses all permission checks + response = await datasette.client.get( + "/private-db/secret-table.json", + skip_permission_checks=True, + ) + # Will return 200 OK and the data, regardless of permissions + +This parameter works with all HTTP methods (``get``, ``post``, ``put``, ``patch``, ``delete``, ``options``, ``head``) and the generic ``request`` method. + +.. warning:: + + Use ``skip_permission_checks=True`` with caution. It completely bypasses Datasette's permission system and should only be used in trusted plugin code or internal operations where you need guaranteed access to resources. + .. _internals_datasette_urls: datasette.urls diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index afc9b335..55f7392f 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -1,6 +1,7 @@ import httpx import pytest import pytest_asyncio +from datasette.app import Datasette @pytest_asyncio.fixture @@ -9,6 +10,23 @@ async def datasette(ds_client): return ds_client.ds +@pytest_asyncio.fixture +async def datasette_with_permissions(): + """A datasette instance with permission restrictions for testing""" + ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) + await ds.invoke_startup() + db = ds.add_memory_database("test_db") + await db.execute_write( + "create table if not exists test_table (id integer primary key, name text)" + ) + await db.execute_write( + "insert or ignore into test_table (id, name) values (1, 'Alice')" + ) + # Trigger catalog refresh + await ds.client.get("/") + return ds + + @pytest.mark.asyncio @pytest.mark.parametrize( "method,path,expected_status", @@ -65,3 +83,147 @@ async def test_client_path(datasette, prefix, expected_path): assert path == expected_path finally: datasette._settings["base_url"] = original_base_url + + +@pytest.mark.asyncio +async def test_skip_permission_checks_allows_forbidden_access( + datasette_with_permissions, +): + """Test that skip_permission_checks=True bypasses permission checks""" + ds = datasette_with_permissions + + # Without skip_permission_checks, anonymous user should get 403 for protected database + response = await ds.client.get("/test_db.json") + assert response.status_code == 403 + + # With skip_permission_checks=True, should get 200 + response = await ds.client.get("/test_db.json", skip_permission_checks=True) + assert response.status_code == 200 + data = response.json() + assert data["database"] == "test_db" + + +@pytest.mark.asyncio +async def test_skip_permission_checks_on_table(datasette_with_permissions): + """Test skip_permission_checks works for table access""" + ds = datasette_with_permissions + + # Without skip_permission_checks, should get 403 + response = await ds.client.get("/test_db/test_table.json") + assert response.status_code == 403 + + # With skip_permission_checks=True, should get table data + response = await ds.client.get( + "/test_db/test_table.json", skip_permission_checks=True + ) + assert response.status_code == 200 + data = response.json() + assert data["rows"] == [{"id": 1, "name": "Alice"}] + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "method", ["get", "post", "put", "patch", "delete", "options", "head"] +) +async def test_skip_permission_checks_all_methods(datasette_with_permissions, method): + """Test that skip_permission_checks works with all HTTP methods""" + ds = datasette_with_permissions + + # All methods should work with skip_permission_checks=True + client_method = getattr(ds.client, method) + response = await client_method("/test_db.json", skip_permission_checks=True) + # We don't check status code since some methods might not be allowed, + # but we verify the request doesn't fail due to permissions + assert isinstance(response, httpx.Response) + + +@pytest.mark.asyncio +async def test_skip_permission_checks_request_method(datasette_with_permissions): + """Test that skip_permission_checks works with client.request()""" + ds = datasette_with_permissions + + # Without skip_permission_checks + response = await ds.client.request("GET", "/test_db.json") + assert response.status_code == 403 + + # With skip_permission_checks=True + response = await ds.client.request( + "GET", "/test_db.json", skip_permission_checks=True + ) + assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_skip_permission_checks_isolated_to_request(datasette_with_permissions): + """Test that skip_permission_checks doesn't affect other concurrent requests""" + ds = datasette_with_permissions + + # First request with skip_permission_checks=True should succeed + response1 = await ds.client.get("/test_db.json", skip_permission_checks=True) + assert response1.status_code == 200 + + # Subsequent request without it should still get 403 + response2 = await ds.client.get("/test_db.json") + assert response2.status_code == 403 + + # And another with skip should succeed again + response3 = await ds.client.get("/test_db.json", skip_permission_checks=True) + assert response3.status_code == 200 + + +@pytest.mark.asyncio +async def test_skip_permission_checks_with_admin_actor(datasette_with_permissions): + """Test that skip_permission_checks works even when actor is provided""" + ds = datasette_with_permissions + + # Admin actor should normally have access + admin_cookies = {"ds_actor": ds.client.actor_cookie({"id": "admin"})} + response = await ds.client.get("/test_db.json", cookies=admin_cookies) + assert response.status_code == 200 + + # Non-admin actor should get 403 + user_cookies = {"ds_actor": ds.client.actor_cookie({"id": "user"})} + response = await ds.client.get("/test_db.json", cookies=user_cookies) + assert response.status_code == 403 + + # Non-admin actor with skip_permission_checks=True should get 200 + response = await ds.client.get( + "/test_db.json", cookies=user_cookies, skip_permission_checks=True + ) + assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_skip_permission_checks_shows_denied_tables(): + """Test that skip_permission_checks=True shows tables from denied databases in /-/tables.json""" + ds = Datasette( + config={ + "databases": { + "fixtures": {"allow": False} # Deny all access to this database + } + } + ) + await ds.invoke_startup() + db = ds.add_memory_database("fixtures") + await db.execute_write( + "CREATE TABLE test_table (id INTEGER PRIMARY KEY, name TEXT)" + ) + await db.execute_write("INSERT INTO test_table (id, name) VALUES (1, 'Alice')") + await ds._refresh_schemas() + + # Without skip_permission_checks, tables from denied database should not appear in /-/tables.json + response = await ds.client.get("/-/tables.json") + assert response.status_code == 200 + data = response.json() + table_names = [match["name"] for match in data["matches"]] + # Should not see any fixtures tables since access is denied + fixtures_tables = [name for name in table_names if name.startswith("fixtures:")] + assert len(fixtures_tables) == 0 + + # With skip_permission_checks=True, tables from denied database SHOULD appear + response = await ds.client.get("/-/tables.json", skip_permission_checks=True) + assert response.status_code == 200 + data = response.json() + table_names = [match["name"] for match in data["matches"]] + # Should see fixtures tables when permission checks are skipped + assert "fixtures: test_table" in table_names From 257e1c1b1b432e82b3d58e9579a2b5fd57f6a46a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 13:51:58 -0800 Subject: [PATCH 23/42] Release 1.0a21 Refs #2429, #2511, #2578, #2583 --- datasette/version.py | 2 +- docs/changelog.rst | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/datasette/version.py b/datasette/version.py index 20cb46c7..01f00fcd 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a20" +__version__ = "1.0a21" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index dbcff8cb..7696fd89 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,15 +4,25 @@ Changelog ========= +.. _v1_0_a21: + +1.0a21 (2025-11-05) +------------------- + +- Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) +- Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) +- New ``datasette --get /path --headers`` option for inspecting the headers returned by a path. (:issue:`2578`) +- New ``datasette.client.get(..., skip_permission_checks=True)`` parameter to bypass permission checks when making requests using the internal client. (:issue:`2583`) + .. _v0_65_2: 0.65.2 (2025-11-05) ------------------- -* Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) -* Upgraded for compatibility with Python 3.14. -* Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) -* Minor upgrades to fix warnings, including ``pkg_resources`` deprecation. +- Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) +- Upgraded for compatibility with Python 3.14. +- Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) +- Minor upgrades to fix warnings, including ``pkg_resources`` deprecation. .. _v1_0_a20: @@ -52,22 +62,16 @@ Related changes: - Permission debugging improvements: - The ``/-/allowed`` endpoint shows resources the user is allowed to interact with for different actions. - - ``/-/rules`` shows the raw allow/deny rules that apply to different permission checks. - - ``/-/actions`` lists every available action. - - ``/-/check`` can be used to try out different permission checks for the current actor. Other changes ~~~~~~~~~~~~~ - The internal ``catalog_views`` table now tracks SQLite views alongside tables in the introspection database. (:issue:`2495`) - - Hitting the ``/`` brings up a search interface for navigating to tables that the current user can view. A new ``/-/tables`` endpoint supports this functionality. (:issue:`2523`) - - Datasette attempts to detect some configuration errors on startup. - - Datasette now supports Python 3.14 and no longer tests against Python 3.9. .. _v1_0_a19: From 1df4028d783f27a88bd304d47990ae4d0e7147bb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 15:18:17 -0800 Subject: [PATCH 24/42] add_memory_database(memory_name, name=None, route=None) --- datasette/app.py | 6 ++++-- docs/internals.rst | 8 +++++--- tests/test_internals_datasette_client.py | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 177debe2..45d34991 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -729,8 +729,10 @@ class Datasette: self.databases = new_databases return db - def add_memory_database(self, memory_name): - return self.add_database(Database(self, memory_name=memory_name)) + def add_memory_database(self, memory_name, name=None, route=None): + return self.add_database( + Database(self, memory_name=memory_name), name=name, route=route + ) def remove_database(self, name): self.get_database(name).close() diff --git a/docs/internals.rst b/docs/internals.rst index 468b3f95..2e01a8e8 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -781,8 +781,8 @@ Use ``is_mutable=False`` to add an immutable database. .. _datasette_add_memory_database: -.add_memory_database(name) --------------------------- +.add_memory_database(memory_name, name=None, route=None) +-------------------------------------------------------- Adds a shared in-memory database with the specified name: @@ -800,7 +800,9 @@ This is a shortcut for the following: Database(datasette, memory_name="statistics") ) -Using either of these pattern will result in the in-memory database being served at ``/statistics``. +Using either of these patterns will result in the in-memory database being served at ``/statistics``. + +The ``name`` and ``route`` parameters are optional and work the same way as they do for :ref:`datasette_add_database`. .. _datasette_remove_database: diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index 55f7392f..a15d294f 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -15,7 +15,7 @@ async def datasette_with_permissions(): """A datasette instance with permission restrictions for testing""" ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) await ds.invoke_startup() - db = ds.add_memory_database("test_db") + db = ds.add_memory_database("test_datasette_with_permissions", name="test_db") await db.execute_write( "create table if not exists test_table (id integer primary key, name text)" ) From 8bc9b1ee03c3e9deb43f4df5fc257e3093e7f484 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 7 Nov 2025 12:01:23 -0800 Subject: [PATCH 25/42] /-/schema and /db/-/schema and /db/table/-/schema pages (plus .json/.md) * Add schema endpoints for databases, instances, and tables Closes: #2586 This commit adds new endpoints to view database schemas in multiple formats: - /-/schema - View schemas for all databases (HTML, JSON, MD) - /database/-/schema - View schema for a specific database (HTML, JSON, MD) - /database/table/-/schema - View schema for a specific table (JSON, MD) Features: - Supports HTML, JSON, and Markdown output formats - Respects view-database and view-table permissions - Uses group_concat(sql, ';' || CHAR(10)) from sqlite_master to retrieve schemas - Includes comprehensive tests covering all formats and permission checks The JSON endpoints return: - Instance level: {"schemas": [{"database": "name", "schema": "sql"}, ...]} - Database level: {"database": "name", "schema": "sql"} - Table level: {"database": "name", "table": "name", "schema": "sql"} Markdown format provides formatted output with headings and SQL code blocks. Co-Authored-By: Claude --- datasette/app.py | 15 ++ datasette/templates/database.html | 2 +- datasette/templates/schema.html | 41 +++++ datasette/views/special.py | 179 ++++++++++++++++++++- docs/pages.rst | 43 ++++++ tests/test_html.py | 2 +- tests/test_schema_endpoints.py | 248 ++++++++++++++++++++++++++++++ 7 files changed, 526 insertions(+), 4 deletions(-) create mode 100644 datasette/templates/schema.html create mode 100644 tests/test_schema_endpoints.py diff --git a/datasette/app.py b/datasette/app.py index 45d34991..60a20032 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -58,6 +58,9 @@ from .views.special import ( PermissionRulesView, PermissionCheckView, TablesView, + InstanceSchemaView, + DatabaseSchemaView, + TableSchemaView, ) from .views.table import ( TableInsertView, @@ -1910,6 +1913,10 @@ class Datasette: TablesView.as_view(self), r"/-/tables(\.(?Pjson))?$", ) + add_route( + InstanceSchemaView.as_view(self), + r"/-/schema(\.(?Pjson|md))?$", + ) add_route( LogoutView.as_view(self), r"/-/logout$", @@ -1951,6 +1958,10 @@ class Datasette: r"/(?P[^\/\.]+)(\.(?P\w+))?$", ) add_route(TableCreateView.as_view(self), r"/(?P[^\/\.]+)/-/create$") + add_route( + DatabaseSchemaView.as_view(self), + r"/(?P[^\/\.]+)/-/schema(\.(?Pjson|md))?$", + ) add_route( wrap_view(QueryView, self), r"/(?P[^\/\.]+)/-/query(\.(?P\w+))?$", @@ -1975,6 +1986,10 @@ class Datasette: TableDropView.as_view(self), r"/(?P[^\/\.]+)/(?P[^\/\.]+)/-/drop$", ) + add_route( + TableSchemaView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/schema(\.(?Pjson|md))?$", + ) add_route( RowDeleteView.as_view(self), r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)/-/delete$", diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 66f288dc..42b4ca0b 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -56,7 +56,7 @@ {% endif %} {% if tables %} -

Tables

+

Tables schema

{% endif %} {% for table in tables %} diff --git a/datasette/templates/schema.html b/datasette/templates/schema.html new file mode 100644 index 00000000..2fd8637e --- /dev/null +++ b/datasette/templates/schema.html @@ -0,0 +1,41 @@ +{% extends "base.html" %} + +{% block title %}{% if is_instance %}Schema for all databases{% elif table_name %}Schema for {{ schemas[0].database }}.{{ table_name }}{% else %}Schema for {{ schemas[0].database }}{% endif %}{% endblock %} + +{% block body_class %}schema{% endblock %} + +{% block crumbs %} +{% if is_instance %} +{{ crumbs.nav(request=request) }} +{% elif table_name %} +{{ crumbs.nav(request=request, database=schemas[0].database, table=table_name) }} +{% else %} +{{ crumbs.nav(request=request, database=schemas[0].database) }} +{% endif %} +{% endblock %} + +{% block content %} + + +{% for item in schemas %} + {% if is_instance %} +

{{ item.database }}

+ {% endif %} + + {% if item.schema %} +
{{ item.schema }}
+ {% else %} +

No schema available for this database.

+ {% endif %} + + {% if not loop.last %} +
+ {% endif %} +{% endfor %} + +{% if not schemas %} +

No databases with viewable schemas found.

+{% endif %} +{% endblock %} diff --git a/datasette/views/special.py b/datasette/views/special.py index a1d736c5..411363ec 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -761,8 +761,6 @@ class ApiExplorerView(BaseView): async def example_links(self, request): databases = [] for name, db in self.ds.databases.items(): - if name == "_internal": - continue database_visible, _ = await self.ds.check_visibility( request.actor, action="view-database", @@ -981,3 +979,180 @@ class TablesView(BaseView): ] return Response.json({"matches": matches, "truncated": truncated}) + + +class SchemaBaseView(BaseView): + """Base class for schema views with common response formatting.""" + + has_json_alternate = False + + async def get_database_schema(self, database_name): + """Get schema SQL for a database.""" + db = self.ds.databases[database_name] + result = await db.execute( + "select group_concat(sql, ';' || CHAR(10)) as schema from sqlite_master where sql is not null" + ) + row = result.first() + return row["schema"] if row and row["schema"] else "" + + def format_json_response(self, data): + """Format data as JSON response with CORS headers if needed.""" + headers = {} + if self.ds.cors: + add_cors_headers(headers) + return Response.json(data, headers=headers) + + def format_error_response(self, error_message, format_, status=404): + """Format error response based on requested format.""" + if format_ == "json": + headers = {} + if self.ds.cors: + add_cors_headers(headers) + return Response.json( + {"ok": False, "error": error_message}, status=status, headers=headers + ) + else: + return Response.text(error_message, status=status) + + def format_markdown_response(self, heading, schema): + """Format schema as Markdown response.""" + md_output = f"# {heading}\n\n```sql\n{schema}\n```\n" + return Response.text( + md_output, headers={"content-type": "text/markdown; charset=utf-8"} + ) + + async def format_html_response( + self, request, schemas, is_instance=False, table_name=None + ): + """Format schema as HTML response.""" + context = { + "schemas": schemas, + "is_instance": is_instance, + } + if table_name: + context["table_name"] = table_name + return await self.render(["schema.html"], request=request, context=context) + + +class InstanceSchemaView(SchemaBaseView): + """ + Displays schema for all databases in the instance. + Supports HTML, JSON, and Markdown formats. + """ + + name = "instance_schema" + + async def get(self, request): + format_ = request.url_vars.get("format") or "html" + + # Get all databases the actor can view + allowed_databases_page = await self.ds.allowed_resources( + "view-database", + request.actor, + ) + allowed_databases = [r.parent async for r in allowed_databases_page.all()] + + # Get schema for each database + schemas = [] + for database_name in allowed_databases: + schema = await self.get_database_schema(database_name) + schemas.append({"database": database_name, "schema": schema}) + + if format_ == "json": + return self.format_json_response({"schemas": schemas}) + elif format_ == "md": + md_parts = [ + f"# Schema for {item['database']}\n\n```sql\n{item['schema']}\n```" + for item in schemas + ] + return Response.text( + "\n\n".join(md_parts), + headers={"content-type": "text/markdown; charset=utf-8"}, + ) + else: + return await self.format_html_response(request, schemas, is_instance=True) + + +class DatabaseSchemaView(SchemaBaseView): + """ + Displays schema for a specific database. + Supports HTML, JSON, and Markdown formats. + """ + + name = "database_schema" + + async def get(self, request): + database_name = request.url_vars["database"] + format_ = request.url_vars.get("format") or "html" + + # Check if database exists + if database_name not in self.ds.databases: + return self.format_error_response("Database not found", format_) + + # Check view-database permission + await self.ds.ensure_permission( + action="view-database", + resource=DatabaseResource(database=database_name), + actor=request.actor, + ) + + schema = await self.get_database_schema(database_name) + + if format_ == "json": + return self.format_json_response( + {"database": database_name, "schema": schema} + ) + elif format_ == "md": + return self.format_markdown_response(f"Schema for {database_name}", schema) + else: + schemas = [{"database": database_name, "schema": schema}] + return await self.format_html_response(request, schemas) + + +class TableSchemaView(SchemaBaseView): + """ + Displays schema for a specific table. + Supports HTML, JSON, and Markdown formats. + """ + + name = "table_schema" + + async def get(self, request): + database_name = request.url_vars["database"] + table_name = request.url_vars["table"] + format_ = request.url_vars.get("format") or "html" + + # Check view-table permission + await self.ds.ensure_permission( + action="view-table", + resource=TableResource(database=database_name, table=table_name), + actor=request.actor, + ) + + # Get schema for the table + db = self.ds.databases[database_name] + result = await db.execute( + "select sql from sqlite_master where name = ? and sql is not null", + [table_name], + ) + row = result.first() + + # Return 404 if table doesn't exist + if not row or not row["sql"]: + return self.format_error_response("Table not found", format_) + + schema = row["sql"] + + if format_ == "json": + return self.format_json_response( + {"database": database_name, "table": table_name, "schema": schema} + ) + elif format_ == "md": + return self.format_markdown_response( + f"Schema for {database_name}.{table_name}", schema + ) + else: + schemas = [{"database": database_name, "schema": schema}] + return await self.format_html_response( + request, schemas, table_name=table_name + ) diff --git a/docs/pages.rst b/docs/pages.rst index 3d6530a3..2e54ce2f 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -107,3 +107,46 @@ Note that this URL includes the encoded primary key of the record. Here's that same page as JSON: `../people/uk~2Eorg~2Epublicwhip~2Fperson~2F10001.json `_ + + +.. _pages_schemas: + +Schemas +======= + +Datasette offers ``/-/schema`` endpoints to expose the SQL schema for databases and tables. + +.. _InstanceSchemaView: + +Instance schema +--------------- + +Access ``/-/schema`` to see the complete schema for all attached databases in the Datasette instance. + +Use ``/-/schema.md`` to get the same information as Markdown. + +Use ``/-/schema.json`` to get the same information as JSON, which looks like this: + +.. code-block:: json + + { + "schemas": [ + { + "database": "content", + "schema": "create table posts ..." + } + } + +.. _DatabaseSchemaView: + +Database schema +--------------- + +Use ``/database-name/-/schema`` to see the complete schema for a specific database. The ``.md`` and ``.json`` extensions work here too. The JSON returns an object with ``"database"`` and ``"schema"`` keys. + +.. _TableSchemaView: + +Table schema +------------ + +Use ``/database-name/table-name/-/schema`` to see the schema for a specific table. The ``.md`` and ``.json`` extensions work here too. The JSON returns an object with ``"database"``, ``"table"``, and ``"schema"`` keys. diff --git a/tests/test_html.py b/tests/test_html.py index dbe993c4..9997279b 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -142,7 +142,7 @@ async def test_database_page(ds_client): # And a list of tables for fragment in ( - '

Tables

', + '

Tables', '

sortable

', "

pk, foreign_key_with_label, foreign_key_with_blank_label, ", ): diff --git a/tests/test_schema_endpoints.py b/tests/test_schema_endpoints.py new file mode 100644 index 00000000..5500a7b0 --- /dev/null +++ b/tests/test_schema_endpoints.py @@ -0,0 +1,248 @@ +import asyncio +import pytest +import pytest_asyncio +from datasette.app import Datasette + + +@pytest_asyncio.fixture(scope="module") +async def schema_ds(): + """Create a Datasette instance with test databases and permission config.""" + ds = Datasette( + config={ + "databases": { + "schema_private_db": {"allow": {"id": "root"}}, + } + } + ) + + # Create public database with multiple tables + public_db = ds.add_memory_database("schema_public_db") + await public_db.execute_write( + "CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT)" + ) + await public_db.execute_write( + "CREATE TABLE IF NOT EXISTS posts (id INTEGER PRIMARY KEY, title TEXT)" + ) + await public_db.execute_write( + "CREATE VIEW IF NOT EXISTS recent_posts AS SELECT * FROM posts ORDER BY id DESC" + ) + + # Create a database with restricted access (requires root permission) + private_db = ds.add_memory_database("schema_private_db") + await private_db.execute_write( + "CREATE TABLE IF NOT EXISTS secret_data (id INTEGER PRIMARY KEY, value TEXT)" + ) + + # Create an empty database + ds.add_memory_database("schema_empty_db") + + return ds + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "format_ext,expected_in_content", + [ + ("json", None), + ("md", ["# Schema for", "```sql"]), + ("", ["Schema for", "CREATE TABLE"]), + ], +) +async def test_database_schema_formats(schema_ds, format_ext, expected_in_content): + """Test /database/-/schema endpoint in different formats.""" + url = "/schema_public_db/-/schema" + if format_ext: + url += f".{format_ext}" + response = await schema_ds.client.get(url) + assert response.status_code == 200 + + if format_ext == "json": + data = response.json() + assert "database" in data + assert data["database"] == "schema_public_db" + assert "schema" in data + assert "CREATE TABLE users" in data["schema"] + else: + content = response.text + for expected in expected_in_content: + assert expected in content + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "format_ext,expected_in_content", + [ + ("json", None), + ("md", ["# Schema for", "```sql"]), + ("", ["Schema for all databases"]), + ], +) +async def test_instance_schema_formats(schema_ds, format_ext, expected_in_content): + """Test /-/schema endpoint in different formats.""" + url = "/-/schema" + if format_ext: + url += f".{format_ext}" + response = await schema_ds.client.get(url) + assert response.status_code == 200 + + if format_ext == "json": + data = response.json() + assert "schemas" in data + assert isinstance(data["schemas"], list) + db_names = [item["database"] for item in data["schemas"]] + # Should see schema_public_db and schema_empty_db, but not schema_private_db (anonymous user) + assert "schema_public_db" in db_names + assert "schema_empty_db" in db_names + assert "schema_private_db" not in db_names + # Check schemas are present + for item in data["schemas"]: + if item["database"] == "schema_public_db": + assert "CREATE TABLE users" in item["schema"] + else: + content = response.text + for expected in expected_in_content: + assert expected in content + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "format_ext,expected_in_content", + [ + ("json", None), + ("md", ["# Schema for", "```sql"]), + ("", ["Schema for users"]), + ], +) +async def test_table_schema_formats(schema_ds, format_ext, expected_in_content): + """Test /database/table/-/schema endpoint in different formats.""" + url = "/schema_public_db/users/-/schema" + if format_ext: + url += f".{format_ext}" + response = await schema_ds.client.get(url) + assert response.status_code == 200 + + if format_ext == "json": + data = response.json() + assert "database" in data + assert data["database"] == "schema_public_db" + assert "table" in data + assert data["table"] == "users" + assert "schema" in data + assert "CREATE TABLE users" in data["schema"] + else: + content = response.text + for expected in expected_in_content: + assert expected in content + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "url", + [ + "/schema_private_db/-/schema.json", + "/schema_private_db/secret_data/-/schema.json", + ], +) +async def test_schema_permission_enforcement(schema_ds, url): + """Test that permissions are enforced for schema endpoints.""" + # Anonymous user should get 403 + response = await schema_ds.client.get(url) + assert response.status_code == 403 + + # Authenticated user with permission should succeed + response = await schema_ds.client.get( + url, + cookies={"ds_actor": schema_ds.client.actor_cookie({"id": "root"})}, + ) + assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_instance_schema_respects_database_permissions(schema_ds): + """Test that /-/schema only shows databases the user can view.""" + # Anonymous user should only see public databases + response = await schema_ds.client.get("/-/schema.json") + assert response.status_code == 200 + data = response.json() + db_names = [item["database"] for item in data["schemas"]] + assert "schema_public_db" in db_names + assert "schema_empty_db" in db_names + assert "schema_private_db" not in db_names + + # Authenticated user should see all databases + response = await schema_ds.client.get( + "/-/schema.json", + cookies={"ds_actor": schema_ds.client.actor_cookie({"id": "root"})}, + ) + assert response.status_code == 200 + data = response.json() + db_names = [item["database"] for item in data["schemas"]] + assert "schema_public_db" in db_names + assert "schema_empty_db" in db_names + assert "schema_private_db" in db_names + + +@pytest.mark.asyncio +async def test_database_schema_with_multiple_tables(schema_ds): + """Test schema with multiple tables in a database.""" + response = await schema_ds.client.get("/schema_public_db/-/schema.json") + assert response.status_code == 200 + data = response.json() + schema = data["schema"] + + # All objects should be in the schema + assert "CREATE TABLE users" in schema + assert "CREATE TABLE posts" in schema + assert "CREATE VIEW recent_posts" in schema + + +@pytest.mark.asyncio +async def test_empty_database_schema(schema_ds): + """Test schema for an empty database.""" + response = await schema_ds.client.get("/schema_empty_db/-/schema.json") + assert response.status_code == 200 + data = response.json() + assert data["database"] == "schema_empty_db" + assert data["schema"] == "" + + +@pytest.mark.asyncio +async def test_database_not_exists(schema_ds): + """Test schema for a non-existent database returns 404.""" + # Test JSON format + response = await schema_ds.client.get("/nonexistent_db/-/schema.json") + assert response.status_code == 404 + data = response.json() + assert data["ok"] is False + assert "not found" in data["error"].lower() + + # Test HTML format (returns text) + response = await schema_ds.client.get("/nonexistent_db/-/schema") + assert response.status_code == 404 + assert "not found" in response.text.lower() + + # Test Markdown format (returns text) + response = await schema_ds.client.get("/nonexistent_db/-/schema.md") + assert response.status_code == 404 + assert "not found" in response.text.lower() + + +@pytest.mark.asyncio +async def test_table_not_exists(schema_ds): + """Test schema for a non-existent table returns 404.""" + # Test JSON format + response = await schema_ds.client.get("/schema_public_db/nonexistent/-/schema.json") + assert response.status_code == 404 + data = response.json() + assert data["ok"] is False + assert "not found" in data["error"].lower() + + # Test HTML format (returns text) + response = await schema_ds.client.get("/schema_public_db/nonexistent/-/schema") + assert response.status_code == 404 + assert "not found" in response.text.lower() + + # Test Markdown format (returns text) + response = await schema_ds.client.get("/schema_public_db/nonexistent/-/schema.md") + assert response.status_code == 404 + assert "not found" in response.text.lower() From a508fc4a8e63ec28d9c1516f60dce718cc10f330 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 7 Nov 2025 16:50:00 -0800 Subject: [PATCH 26/42] Remove permission_allowed hook docs, closes #2588 Refs #2528 --- docs/plugin_hooks.rst | 72 ++------------------------------------ tests/plugins/my_plugin.py | 4 +-- tests/test_html.py | 8 ++--- tests/test_plugins.py | 2 +- 4 files changed, 10 insertions(+), 76 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 93f7f476..118a6bde 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1314,72 +1314,6 @@ This example plugin causes 0 results to be returned if ``?_nothing=1`` is added Example: `datasette-leaflet-freedraw `_ -.. _plugin_hook_permission_allowed: - -permission_allowed(datasette, actor, action, resource) ------------------------------------------------------- - -``datasette`` - :ref:`internals_datasette` - You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``, or to execute SQL queries. - -``actor`` - dictionary - The current actor, as decided by :ref:`plugin_hook_actor_from_request`. - -``action`` - string - The action to be performed, e.g. ``"edit-table"``. - -``resource`` - string or None - An identifier for the individual resource, e.g. the name of the table. - -Called to check that an actor has permission to perform an action on a resource. Can return ``True`` if the action is allowed, ``False`` if the action is not allowed or ``None`` if the plugin does not have an opinion one way or the other. - -Here's an example plugin which randomly selects if a permission should be allowed or denied, except for ``view-instance`` which always uses the default permission scheme instead. - -.. code-block:: python - - from datasette import hookimpl - import random - - - @hookimpl - def permission_allowed(action): - if action != "view-instance": - # Return True or False at random - return random.random() > 0.5 - # Returning None falls back to default permissions - -This function can alternatively return an awaitable function which itself returns ``True``, ``False`` or ``None``. You can use this option if you need to execute additional database queries using ``await datasette.execute(...)``. - -Here's an example that allows users to view the ``admin_log`` table only if their actor ``id`` is present in the ``admin_users`` table. It aso disallows arbitrary SQL queries for the ``staff.db`` database for all users. - -.. code-block:: python - - @hookimpl - def permission_allowed(datasette, actor, action, resource): - async def inner(): - if action == "execute-sql" and resource == "staff": - return False - if action == "view-table" and resource == ( - "staff", - "admin_log", - ): - if not actor: - return False - user_id = actor["id"] - result = await datasette.get_database( - "staff" - ).execute( - "select count(*) from admin_users where user_id = :user_id", - {"user_id": user_id}, - ) - return result.first()[0] > 0 - - return inner - -See :ref:`built-in permissions ` for a full list of permissions that are included in Datasette core. - -Example: `datasette-permissions-sql `_ - .. _plugin_hook_permission_resources_sql: permission_resources_sql(datasette, actor, action) @@ -1981,16 +1915,16 @@ This example adds a new database action for creating a table, if the user has th .. code-block:: python from datasette import hookimpl + from datasette.resources import DatabaseResource @hookimpl def database_actions(datasette, actor, database): async def inner(): - if not await datasette.permission_allowed( + if not await datasette.allowed( actor, "edit-schema", - resource=database, - default=False, + resource=DatabaseResource("database"), ): return [] return [ diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 1435ce28..96a8b4d7 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -469,7 +469,7 @@ def register_actions(datasette): description="View a collection", resource_class=DatabaseResource, ), - # Test actions for test_hook_permission_allowed (global actions - no resource_class) + # Test actions for test_hook_custom_allowed (global actions - no resource_class) Action( name="this_is_allowed", abbr=None, @@ -553,7 +553,7 @@ def register_actions(datasette): def permission_resources_sql(datasette, actor, action): from datasette.permissions import PermissionSQL - # Handle test actions used in test_hook_permission_allowed + # Handle test actions used in test_hook_custom_allowed if action == "this_is_allowed": return PermissionSQL.allow(reason="test plugin allows this_is_allowed") elif action == "this_is_denied": diff --git a/tests/test_html.py b/tests/test_html.py index 9997279b..35b839ec 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -935,7 +935,7 @@ async def test_edit_sql_link_on_canned_queries(ds_client, path, expected): @pytest.mark.parametrize( - "permission_allowed", + "has_permission", [ pytest.param( True, @@ -943,15 +943,15 @@ async def test_edit_sql_link_on_canned_queries(ds_client, path, expected): False, ], ) -def test_edit_sql_link_not_shown_if_user_lacks_permission(permission_allowed): +def test_edit_sql_link_not_shown_if_user_lacks_permission(has_permission): with make_app_client( config={ - "allow_sql": None if permission_allowed else {"id": "not-you"}, + "allow_sql": None if has_permission else {"id": "not-you"}, "databases": {"fixtures": {"queries": {"simple": "select 1 + 1"}}}, } ) as client: response = client.get("/fixtures/simple") - if permission_allowed: + if has_permission: assert "Edit SQL" in response.text else: assert "Edit SQL" not in response.text diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 971b7e82..4a8c60d7 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -677,7 +677,7 @@ async def test_existing_scope_actor_respected(ds_client): ("this_is_denied_async", False), ], ) -async def test_hook_permission_allowed(action, expected): +async def test_hook_custom_allowed(action, expected): # Test actions and permission logic are defined in tests/plugins/my_plugin.py ds = Datasette(plugins_dir=PLUGINS_DIR) await ds.invoke_startup() From 354d7a28732b701d5ebee334fc32a6e6e74ce0b2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 9 Nov 2025 15:42:11 -0800 Subject: [PATCH 27/42] Bump a few versions, deploy on push to main Refs: - #2511 --- .github/workflows/deploy-latest.yml | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 8ffdbfd5..9f53b01e 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -2,10 +2,10 @@ name: Deploy latest.datasette.io on: workflow_dispatch: - # push: - # branches: - # - main - # - 1.0-dev + push: + branches: + - main + # - 1.0-dev permissions: contents: read @@ -15,19 +15,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out datasette - uses: actions/checkout@v3 + uses: actions/checkout@v5 - name: Set up Python uses: actions/setup-python@v6 - # Using Python 3.10 for gcloud compatibility: with: - python-version: "3.10" - - uses: actions/cache@v4 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }} - restore-keys: | - ${{ runner.os }}-pip- + python-version: "3.13" + cache: pip - name: Install Python dependencies run: | python -m pip install --upgrade pip @@ -104,7 +97,7 @@ jobs: # cat metadata.json - id: auth name: Authenticate to Google Cloud - uses: google-github-actions/auth@v2 + uses: google-github-actions/auth@v3 with: credentials_json: ${{ secrets.GCP_SA_KEY }} - name: Set up Cloud SDK From 291f71ec6b52bb7d346f8cad74ca60122db392e3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 11 Nov 2025 21:59:26 -0800 Subject: [PATCH 28/42] Remove out-dated plugin_hook_permission_allowed references --- docs/changelog.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 7696fd89..66d46bce 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -278,7 +278,7 @@ To avoid similar mistakes in the future the ``datasette.permission_allowed()`` m Permission checks now consider opinions from every plugin ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``datasette.permission_allowed()`` method previously consulted every plugin that implemented the :ref:`permission_allowed() ` plugin hook and obeyed the opinion of the last plugin to return a value. (:issue:`2275`) +The ``datasette.permission_allowed()`` method previously consulted every plugin that implemented the ``permission_allowed()`` plugin hook and obeyed the opinion of the last plugin to return a value. (:issue:`2275`) Datasette now consults every plugin and checks to see if any of them returned ``False`` (the veto rule), and if none of them did, it then checks to see if any of them returned ``True``. @@ -1397,7 +1397,7 @@ You can use the new ``"allow"`` block syntax in ``metadata.json`` (or ``metadata See :ref:`authentication_permissions_allow` for more details. -Plugins can implement their own custom permission checks using the new :ref:`plugin_hook_permission_allowed` hook. +Plugins can implement their own custom permission checks using the new ``plugin_hook_permission_allowed()`` plugin hook. A new debug page at ``/-/permissions`` shows recent permission checks, to help administrators and plugin authors understand exactly what checks are being performed. This tool defaults to only being available to the root user, but can be exposed to other users by plugins that respond to the ``permissions-debug`` permission. (:issue:`788`) From 32a425868cd6b58c66d9e255fd59017be0cd34c4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Nov 2025 06:07:16 -0800 Subject: [PATCH 29/42] Bump black from 25.9.0 to 25.11.0 in the python-packages group (#2590) Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). Updates `black` from 25.9.0 to 25.11.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/25.9.0...25.11.0) --- updated-dependencies: - dependency-name: black dependency-version: 25.11.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1395ce82..4f487458 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ test = [ "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", "beautifulsoup4>=4.8.1", - "black==25.9.0", + "black==25.11.0", "blacken-docs==1.20.0", "pytest-timeout>=1.4.2", "trustme>=0.7", From 23a640d38bebd55d9cc3b13a83ef6bc89d717fab Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 12 Nov 2025 16:14:21 -0800 Subject: [PATCH 30/42] datasette serve --default-deny option (#2593) Closes #2592 --- datasette/app.py | 2 + datasette/cli.py | 7 ++ datasette/default_permissions.py | 4 + docs/authentication.rst | 33 ++++++++ docs/cli-reference.rst | 1 + tests/test_cli.py | 1 + tests/test_default_deny.py | 129 +++++++++++++++++++++++++++++++ 7 files changed, 177 insertions(+) create mode 100644 tests/test_default_deny.py diff --git a/datasette/app.py b/datasette/app.py index 60a20032..5f2a484e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -304,6 +304,7 @@ class Datasette: crossdb=False, nolock=False, internal=None, + default_deny=False, ): self._startup_invoked = False assert config_dir is None or isinstance( @@ -512,6 +513,7 @@ class Datasette: self._permission_checks = collections.deque(maxlen=200) self._root_token = secrets.token_hex(32) self.root_enabled = False + self.default_deny = default_deny self.client = DatasetteClient(self) async def apply_metadata_json(self): diff --git a/datasette/cli.py b/datasette/cli.py index aaf1b244..21420491 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -438,6 +438,11 @@ def uninstall(packages, yes): help="Output URL that sets a cookie authenticating the root user", is_flag=True, ) +@click.option( + "--default-deny", + help="Deny all permissions by default", + is_flag=True, +) @click.option( "--get", help="Run an HTTP GET request against this path, print results and exit", @@ -514,6 +519,7 @@ def serve( settings, secret, root, + default_deny, get, headers, token, @@ -594,6 +600,7 @@ def serve( crossdb=crossdb, nolock=nolock, internal=internal, + default_deny=default_deny, ) # Separate directories from files diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 5642cdfe..12e6c1ef 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -352,6 +352,10 @@ async def default_action_permissions_sql(datasette, actor, action): With the INTERSECT-based restriction approach, these defaults are always generated and then filtered by restriction_sql if the actor has restrictions. """ + # Skip default allow rules if default_deny is enabled + if datasette.default_deny: + return None + default_allow_actions = { "view-instance", "view-database", diff --git a/docs/authentication.rst b/docs/authentication.rst index e69b0aa4..69a6f606 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -83,6 +83,39 @@ Datasette's built-in view actions (``view-database``, ``view-table`` etc) are al Other actions, including those introduced by plugins, will default to *deny*. +.. _authentication_default_deny: + +Denying all permissions by default +---------------------------------- + +By default, Datasette allows unauthenticated access to view databases, tables, and execute SQL queries. + +You may want to run Datasette in a mode where **all** access is denied by default, and you explicitly grant permissions only to authenticated users, either using the :ref:`--root mechanism ` or through :ref:`configuration file rules ` or plugins. + +Use the ``--default-deny`` command-line option to run Datasette in this mode:: + + datasette --default-deny data.db --root + +With ``--default-deny`` enabled: + +* Anonymous users are denied access to view the instance, databases, tables, and queries +* Authenticated users are also denied access unless they're explicitly granted permissions +* The root user (when using ``--root``) still has access to everything +* You can grant permissions using :ref:`configuration file rules ` or plugins + +For example, to allow only a specific user to access your instance:: + + datasette --default-deny data.db --config datasette.yaml + +Where ``datasette.yaml`` contains: + +.. code-block:: yaml + + allow: + id: alice + +This configuration will deny access to everyone except the user with ``id`` of ``alice``. + .. _authentication_permissions_explained: How permissions are resolved diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index f002d05a..7ca88c4e 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -119,6 +119,7 @@ Once started you can access it at ``http://localhost:8001`` signed cookies --root Output URL that sets a cookie authenticating the root user + --default-deny Deny all permissions by default --get TEXT Run an HTTP GET request against this path, print results and exit --headers Include HTTP headers in --get output diff --git a/tests/test_cli.py b/tests/test_cli.py index 3bb360fb..21b86569 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -142,6 +142,7 @@ def test_metadata_yaml(): settings=[], secret=None, root=False, + default_deny=False, token=None, actor=None, version_note=None, diff --git a/tests/test_default_deny.py b/tests/test_default_deny.py new file mode 100644 index 00000000..81e95b84 --- /dev/null +++ b/tests/test_default_deny.py @@ -0,0 +1,129 @@ +import pytest +from datasette.app import Datasette +from datasette.resources import DatabaseResource, TableResource + + +@pytest.mark.asyncio +async def test_default_deny_denies_default_permissions(): + """Test that default_deny=True denies default permissions""" + # Without default_deny, anonymous users can view instance/database/tables + ds_normal = Datasette() + await ds_normal.invoke_startup() + + # Add a test database + db = ds_normal.add_memory_database("test_db_normal") + await db.execute_write("create table test_table (id integer primary key)") + await ds_normal._refresh_schemas() # Trigger catalog refresh + + # Test default behavior - anonymous user should be able to view + response = await ds_normal.client.get("/") + assert response.status_code == 200 + + response = await ds_normal.client.get("/test_db_normal") + assert response.status_code == 200 + + response = await ds_normal.client.get("/test_db_normal/test_table") + assert response.status_code == 200 + + # With default_deny=True, anonymous users should be denied + ds_deny = Datasette(default_deny=True) + await ds_deny.invoke_startup() + + # Add the same test database + db = ds_deny.add_memory_database("test_db_deny") + await db.execute_write("create table test_table (id integer primary key)") + await ds_deny._refresh_schemas() # Trigger catalog refresh + + # Anonymous user should be denied + response = await ds_deny.client.get("/") + assert response.status_code == 403 + + response = await ds_deny.client.get("/test_db_deny") + assert response.status_code == 403 + + response = await ds_deny.client.get("/test_db_deny/test_table") + assert response.status_code == 403 + + +@pytest.mark.asyncio +async def test_default_deny_with_root_user(): + """Test that root user still has access when default_deny=True""" + ds = Datasette(default_deny=True) + ds.root_enabled = True + await ds.invoke_startup() + + root_actor = {"id": "root"} + + # Root user should have all permissions even with default_deny + assert await ds.allowed(action="view-instance", actor=root_actor) is True + assert ( + await ds.allowed( + action="view-database", + actor=root_actor, + resource=DatabaseResource("test_db"), + ) + is True + ) + assert ( + await ds.allowed( + action="view-table", + actor=root_actor, + resource=TableResource("test_db", "test_table"), + ) + is True + ) + assert ( + await ds.allowed( + action="execute-sql", actor=root_actor, resource=DatabaseResource("test_db") + ) + is True + ) + + +@pytest.mark.asyncio +async def test_default_deny_with_config_allow(): + """Test that config allow rules still work with default_deny=True""" + ds = Datasette(default_deny=True, config={"allow": {"id": "user1"}}) + await ds.invoke_startup() + + # Anonymous user should be denied + assert await ds.allowed(action="view-instance", actor=None) is False + + # Authenticated user with explicit permission should have access + assert await ds.allowed(action="view-instance", actor={"id": "user1"}) is True + + # Different user should be denied + assert await ds.allowed(action="view-instance", actor={"id": "user2"}) is False + + +@pytest.mark.asyncio +async def test_default_deny_basic_permissions(): + """Test that default_deny=True denies basic permissions""" + ds = Datasette(default_deny=True) + await ds.invoke_startup() + + # Anonymous user should be denied all default permissions + assert await ds.allowed(action="view-instance", actor=None) is False + assert ( + await ds.allowed( + action="view-database", actor=None, resource=DatabaseResource("test_db") + ) + is False + ) + assert ( + await ds.allowed( + action="view-table", + actor=None, + resource=TableResource("test_db", "test_table"), + ) + is False + ) + assert ( + await ds.allowed( + action="execute-sql", actor=None, resource=DatabaseResource("test_db") + ) + is False + ) + + # Authenticated user without explicit permission should also be denied + assert await ds.allowed(action="view-instance", actor={"id": "user"}) is False From 5125bef5735c0823b72b27088cb11a189502e323 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 09:56:06 -0800 Subject: [PATCH 31/42] datasette.in_client() method, closes #2594 --- datasette/app.py | 63 ++++++++++++----- docs/internals.rst | 22 ++++++ tests/test_internals_datasette_client.py | 86 ++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 18 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 5f2a484e..a5efdad5 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2,6 +2,7 @@ from __future__ import annotations from asgi_csrf import Errors import asyncio +import contextvars from typing import TYPE_CHECKING, Any, Dict, Iterable, List if TYPE_CHECKING: @@ -130,6 +131,22 @@ from .resources import DatabaseResource, TableResource app_root = Path(__file__).parent.parent +# Context variable to track when code is executing within a datasette.client request +_in_datasette_client = contextvars.ContextVar("in_datasette_client", default=False) + + +class _DatasetteClientContext: + """Context manager to mark code as executing within a datasette.client request.""" + + def __enter__(self): + self.token = _in_datasette_client.set(True) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + _in_datasette_client.reset(self.token) + return False + + @dataclasses.dataclass class PermissionCheck: """Represents a logged permission check for debugging purposes.""" @@ -666,6 +683,14 @@ class Datasette: def unsign(self, signed, namespace="default"): return URLSafeSerializer(self._secret, namespace).loads(signed) + def in_client(self) -> bool: + """Check if the current code is executing within a datasette.client request. + + Returns: + bool: True if currently executing within a datasette.client request, False otherwise. + """ + return _in_datasette_client.get() + def create_token( self, actor_id: str, @@ -2406,19 +2431,20 @@ class DatasetteClient: async def _request(self, method, path, skip_permission_checks=False, **kwargs): from datasette.permissions import SkipPermissions - if skip_permission_checks: - with SkipPermissions(): + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await getattr(client, method)(self._fix(path), **kwargs) + else: async with httpx.AsyncClient( transport=httpx.ASGITransport(app=self.app), cookies=kwargs.pop("cookies", None), ) as client: return await getattr(client, method)(self._fix(path), **kwargs) - else: - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await getattr(client, method)(self._fix(path), **kwargs) async def get(self, path, skip_permission_checks=False, **kwargs): return await self._request( @@ -2470,8 +2496,17 @@ class DatasetteClient: from datasette.permissions import SkipPermissions avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) - if skip_permission_checks: - with SkipPermissions(): + with _DatasetteClientContext(): + if skip_permission_checks: + with SkipPermissions(): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=self.app), + cookies=kwargs.pop("cookies", None), + ) as client: + return await client.request( + method, self._fix(path, avoid_path_rewrites), **kwargs + ) + else: async with httpx.AsyncClient( transport=httpx.ASGITransport(app=self.app), cookies=kwargs.pop("cookies", None), @@ -2479,11 +2514,3 @@ class DatasetteClient: return await client.request( method, self._fix(path, avoid_path_rewrites), **kwargs ) - else: - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=self.app), - cookies=kwargs.pop("cookies", None), - ) as client: - return await client.request( - method, self._fix(path, avoid_path_rewrites), **kwargs - ) diff --git a/docs/internals.rst b/docs/internals.rst index 2e01a8e8..09fb7572 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1077,6 +1077,28 @@ This parameter works with all HTTP methods (``get``, ``post``, ``put``, ``patch` Use ``skip_permission_checks=True`` with caution. It completely bypasses Datasette's permission system and should only be used in trusted plugin code or internal operations where you need guaranteed access to resources. +Detecting internal client requests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``datasette.in_client()`` - returns bool + Returns ``True`` if the current code is executing within a ``datasette.client`` request, ``False`` otherwise. + +This method is useful for plugins that need to behave differently when called through ``datasette.client`` versus when handling external HTTP requests. + +Example usage: + +.. code-block:: python + + async def fetch_documents(datasette): + if not datasette.in_client(): + return Response.text( + "Only available via internal client requests", + status=403 + ) + ... + +Note that ``datasette.in_client()`` is independent of ``skip_permission_checks``. A request made through ``datasette.client`` will always have ``in_client()`` return ``True``, regardless of whether ``skip_permission_checks`` is set. + .. _internals_datasette_urls: datasette.urls diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index a15d294f..b254c5e4 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -227,3 +227,89 @@ async def test_skip_permission_checks_shows_denied_tables(): table_names = [match["name"] for match in data["matches"]] # Should see fixtures tables when permission checks are skipped assert "fixtures: test_table" in table_names + + +@pytest.mark.asyncio +async def test_in_client_returns_false_outside_request(datasette): + """Test that datasette.in_client() returns False outside of a client request""" + assert datasette.in_client() is False + + +@pytest.mark.asyncio +async def test_in_client_returns_true_inside_request(): + """Test that datasette.in_client() returns True inside a client request""" + from datasette import hookimpl, Response + from datasette.plugins import pm + + class TestPlugin: + __name__ = "test_in_client_plugin" + + @hookimpl + def register_routes(self): + async def test_view(datasette): + # Assert in_client() returns True within the view + assert datasette.in_client() is True + return Response.json({"in_client": datasette.in_client()}) + + return [ + (r"^/-/test-in-client$", test_view), + ] + + pm.register(TestPlugin(), name="test_in_client_plugin") + try: + ds = Datasette() + await ds.invoke_startup() + + # Outside of a client request, should be False + assert ds.in_client() is False + + # Make a request via datasette.client + response = await ds.client.get("/-/test-in-client") + assert response.status_code == 200 + assert response.json()["in_client"] is True + + # After the request, should be False again + assert ds.in_client() is False + finally: + pm.unregister(name="test_in_client_plugin") + + +@pytest.mark.asyncio +async def test_in_client_with_skip_permission_checks(): + """Test that in_client() works regardless of skip_permission_checks value""" + from datasette import hookimpl + from datasette.plugins import pm + from datasette.utils.asgi import Response + + in_client_values = [] + + class TestPlugin: + __name__ = "test_in_client_skip_plugin" + + @hookimpl + def register_routes(self): + async def test_view(datasette): + in_client_values.append(datasette.in_client()) + return Response.json({"in_client": datasette.in_client()}) + + return [ + (r"^/-/test-in-client$", test_view), + ] + + pm.register(TestPlugin(), name="test_in_client_skip_plugin") + try: + ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) + await ds.invoke_startup() + + # Request without skip_permission_checks + await ds.client.get("/-/test-in-client") + # Request with skip_permission_checks=True + await ds.client.get("/-/test-in-client", skip_permission_checks=True) + + # Both should have detected in_client as True + assert ( + len(in_client_values) == 2 + ), f"Expected 2 values, got {len(in_client_values)}" + assert all(in_client_values), f"Expected all True, got {in_client_values}" + finally: + pm.unregister(name="test_in_client_skip_plugin") From 4b4add4d311ce9c8b3e6b08b2f81db1bbd9cbf7e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 10:31:03 -0800 Subject: [PATCH 32/42] datasette.pm property, closes #2595 --- datasette/app.py | 16 +++++++++- datasette/plugins.py | 19 +++++++----- docs/internals.rst | 2 +- docs/testing_plugins.rst | 9 +++--- tests/test_actions_sql.py | 25 ++++++++-------- tests/test_allowed_resources.py | 25 ++++++++-------- tests/test_docs_plugins.py | 8 ++--- tests/test_internals_datasette_client.py | 18 +++++------ tests/test_permission_endpoints.py | 10 +++---- tests/test_plugins.py | 38 ++++++++++++------------ tests/test_restriction_sql.py | 20 ++++++------- 11 files changed, 101 insertions(+), 89 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index a5efdad5..2d8283a4 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -631,6 +631,17 @@ class Datasette: def urls(self): return Urls(self) + @property + def pm(self): + """ + Return the global plugin manager instance. + + This provides access to the pluggy PluginManager that manages all + Datasette plugins and hooks. Use datasette.pm.hook.hook_name() to + call plugin hooks. + """ + return pm + async def invoke_startup(self): # This must be called for Datasette to be in a usable state if self._startup_invoked: @@ -2415,7 +2426,10 @@ class DatasetteClient: def __init__(self, ds): self.ds = ds - self.app = ds.app() + + @property + def app(self): + return self.ds.app() def actor_cookie(self, actor): # Utility method, mainly for tests diff --git a/datasette/plugins.py b/datasette/plugins.py index 392ab60d..e9818885 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -94,21 +94,24 @@ def get_plugins(): for plugin in pm.get_plugins(): static_path = None templates_path = None - if plugin.__name__ not in DEFAULT_PLUGINS: + plugin_name = ( + plugin.__name__ + if hasattr(plugin, "__name__") + else plugin.__class__.__name__ + ) + if plugin_name not in DEFAULT_PLUGINS: try: - if (importlib_resources.files(plugin.__name__) / "static").is_dir(): - static_path = str( - importlib_resources.files(plugin.__name__) / "static" - ) - if (importlib_resources.files(plugin.__name__) / "templates").is_dir(): + if (importlib_resources.files(plugin_name) / "static").is_dir(): + static_path = str(importlib_resources.files(plugin_name) / "static") + if (importlib_resources.files(plugin_name) / "templates").is_dir(): templates_path = str( - importlib_resources.files(plugin.__name__) / "templates" + importlib_resources.files(plugin_name) / "templates" ) except (TypeError, ModuleNotFoundError): # Caused by --plugins_dir= plugins pass plugin_info = { - "name": plugin.__name__, + "name": plugin_name, "static_path": static_path, "templates_path": templates_path, "hooks": [h.name for h in pm.get_hookcallers(plugin)], diff --git a/docs/internals.rst b/docs/internals.rst index 09fb7572..09d45c90 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1093,7 +1093,7 @@ Example usage: if not datasette.in_client(): return Response.text( "Only available via internal client requests", - status=403 + status=403, ) ... diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index e4fad500..fc1aa6f6 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -283,13 +283,12 @@ Here's a test for that plugin that mocks the HTTPX outbound request: Registering a plugin for the duration of a test ----------------------------------------------- -When writing tests for plugins you may find it useful to register a test plugin just for the duration of a single test. You can do this using ``pm.register()`` and ``pm.unregister()`` like this: +When writing tests for plugins you may find it useful to register a test plugin just for the duration of a single test. You can do this using ``datasette.pm.register()`` and ``datasette.pm.unregister()`` like this: .. code-block:: python from datasette import hookimpl from datasette.app import Datasette - from datasette.plugins import pm import pytest @@ -305,14 +304,14 @@ When writing tests for plugins you may find it useful to register a test plugin (r"^/error$", lambda: 1 / 0), ] - pm.register(TestPlugin(), name="undo") + datasette = Datasette() try: # The test implementation goes here - datasette = Datasette() + datasette.pm.register(TestPlugin(), name="undo") response = await datasette.client.get("/error") assert response.status_code == 500 finally: - pm.unregister(name="undo") + datasette.pm.unregister(name="undo") To reuse the same temporary plugin in multiple tests, you can register it inside a fixture in your ``conftest.py`` file like this: diff --git a/tests/test_actions_sql.py b/tests/test_actions_sql.py index 734a427d..863d2529 100644 --- a/tests/test_actions_sql.py +++ b/tests/test_actions_sql.py @@ -11,7 +11,6 @@ These tests verify: import pytest import pytest_asyncio from datasette.app import Datasette -from datasette.plugins import pm from datasette.permissions import PermissionSQL from datasette.resources import TableResource from datasette import hookimpl @@ -67,7 +66,7 @@ async def test_allowed_resources_global_allow(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: # Use the new allowed_resources() method @@ -87,7 +86,7 @@ async def test_allowed_resources_global_allow(test_ds): assert ("production", "orders") in table_set finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -106,7 +105,7 @@ async def test_allowed_specific_resource(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "bob", "role": "analyst"} @@ -130,7 +129,7 @@ async def test_allowed_specific_resource(test_ds): ) finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -148,7 +147,7 @@ async def test_allowed_resources_include_reasons(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: # Use allowed_resources with include_reasons to get debugging info @@ -170,7 +169,7 @@ async def test_allowed_resources_include_reasons(test_ds): assert "analyst access" in reasons_text finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -190,7 +189,7 @@ async def test_child_deny_overrides_parent_allow(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "bob", "role": "analyst"} @@ -219,7 +218,7 @@ async def test_child_deny_overrides_parent_allow(test_ds): ) finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -239,7 +238,7 @@ async def test_child_allow_overrides_parent_deny(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "carol"} @@ -264,7 +263,7 @@ async def test_child_allow_overrides_parent_deny(test_ds): ) finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -288,7 +287,7 @@ async def test_sql_does_filtering_not_python(test_ds): return PermissionSQL(sql=sql) plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: actor = {"id": "dave"} @@ -314,4 +313,4 @@ async def test_sql_does_filtering_not_python(test_ds): assert tables[0].child == "users" finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") diff --git a/tests/test_allowed_resources.py b/tests/test_allowed_resources.py index cecffbe2..0cd48ea9 100644 --- a/tests/test_allowed_resources.py +++ b/tests/test_allowed_resources.py @@ -8,7 +8,6 @@ based on permission rules from plugins and configuration. import pytest import pytest_asyncio from datasette.app import Datasette -from datasette.plugins import pm from datasette.permissions import PermissionSQL from datasette import hookimpl @@ -62,7 +61,7 @@ async def test_tables_endpoint_global_access(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: # Use the allowed_resources API directly @@ -87,7 +86,7 @@ async def test_tables_endpoint_global_access(test_ds): assert "production/orders" in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -102,7 +101,7 @@ async def test_tables_endpoint_database_restriction(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources( @@ -130,7 +129,7 @@ async def test_tables_endpoint_database_restriction(test_ds): # Note: default_permissions.py provides default allows, so we just check analytics are present finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -149,7 +148,7 @@ async def test_tables_endpoint_table_exception(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources("view-table", {"id": "carol"}) @@ -172,7 +171,7 @@ async def test_tables_endpoint_table_exception(test_ds): assert "analytics/sensitive" not in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -191,7 +190,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources( @@ -214,7 +213,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds): assert "analytics/sensitive" not in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -257,7 +256,7 @@ async def test_tables_endpoint_specific_table_only(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources("view-table", {"id": "dave"}) @@ -280,7 +279,7 @@ async def test_tables_endpoint_specific_table_only(test_ds): assert "production/orders" in table_names finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio @@ -295,7 +294,7 @@ async def test_tables_endpoint_empty_result(test_ds): return None plugin = PermissionRulesPlugin(rules_callback) - pm.register(plugin, name="test_plugin") + test_ds.pm.register(plugin, name="test_plugin") try: page = await test_ds.allowed_resources("view-table", {"id": "blocked"}) @@ -311,7 +310,7 @@ async def test_tables_endpoint_empty_result(test_ds): assert len(result) == 0 finally: - pm.unregister(plugin, name="test_plugin") + test_ds.pm.unregister(plugin, name="test_plugin") @pytest.mark.asyncio diff --git a/tests/test_docs_plugins.py b/tests/test_docs_plugins.py index 92b4514c..c51858d3 100644 --- a/tests/test_docs_plugins.py +++ b/tests/test_docs_plugins.py @@ -2,7 +2,6 @@ # -- start datasette_with_plugin_fixture -- from datasette import hookimpl from datasette.app import Datasette -from datasette.plugins import pm import pytest import pytest_asyncio @@ -18,11 +17,12 @@ async def datasette_with_plugin(): (r"^/error$", lambda: 1 / 0), ] - pm.register(TestPlugin(), name="undo") + datasette = Datasette() + datasette.pm.register(TestPlugin(), name="undo") try: - yield Datasette() + yield datasette finally: - pm.unregister(name="undo") + datasette.pm.unregister(name="undo") # -- end datasette_with_plugin_fixture -- diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index b254c5e4..326fcdc0 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -239,7 +239,6 @@ async def test_in_client_returns_false_outside_request(datasette): async def test_in_client_returns_true_inside_request(): """Test that datasette.in_client() returns True inside a client request""" from datasette import hookimpl, Response - from datasette.plugins import pm class TestPlugin: __name__ = "test_in_client_plugin" @@ -255,10 +254,10 @@ async def test_in_client_returns_true_inside_request(): (r"^/-/test-in-client$", test_view), ] - pm.register(TestPlugin(), name="test_in_client_plugin") + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(TestPlugin(), name="test_in_client_plugin") try: - ds = Datasette() - await ds.invoke_startup() # Outside of a client request, should be False assert ds.in_client() is False @@ -271,14 +270,13 @@ async def test_in_client_returns_true_inside_request(): # After the request, should be False again assert ds.in_client() is False finally: - pm.unregister(name="test_in_client_plugin") + ds.pm.unregister(name="test_in_client_plugin") @pytest.mark.asyncio async def test_in_client_with_skip_permission_checks(): """Test that in_client() works regardless of skip_permission_checks value""" from datasette import hookimpl - from datasette.plugins import pm from datasette.utils.asgi import Response in_client_values = [] @@ -296,10 +294,10 @@ async def test_in_client_with_skip_permission_checks(): (r"^/-/test-in-client$", test_view), ] - pm.register(TestPlugin(), name="test_in_client_skip_plugin") + ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) + await ds.invoke_startup() + ds.pm.register(TestPlugin(), name="test_in_client_skip_plugin") try: - ds = Datasette(config={"databases": {"test_db": {"allow": {"id": "admin"}}}}) - await ds.invoke_startup() # Request without skip_permission_checks await ds.client.get("/-/test-in-client") @@ -312,4 +310,4 @@ async def test_in_client_with_skip_permission_checks(): ), f"Expected 2 values, got {len(in_client_values)}" assert all(in_client_values), f"Expected all True, got {in_client_values}" finally: - pm.unregister(name="test_in_client_skip_plugin") + ds.pm.unregister(name="test_in_client_skip_plugin") diff --git a/tests/test_permission_endpoints.py b/tests/test_permission_endpoints.py index d7b7bf07..84f3370f 100644 --- a/tests/test_permission_endpoints.py +++ b/tests/test_permission_endpoints.py @@ -439,7 +439,6 @@ async def test_execute_sql_requires_view_database(): be able to execute SQL on that database. """ from datasette.permissions import PermissionSQL - from datasette.plugins import pm from datasette import hookimpl class TestPermissionPlugin: @@ -464,11 +463,12 @@ async def test_execute_sql_requires_view_database(): return [] plugin = TestPermissionPlugin() - pm.register(plugin, name="test_plugin") + + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(plugin, name="test_plugin") try: - ds = Datasette() - await ds.invoke_startup() ds.add_memory_database("secret") await ds.refresh_schemas() @@ -498,4 +498,4 @@ async def test_execute_sql_requires_view_database(): f"but got {response.status_code}" ) finally: - pm.unregister(plugin) + ds.pm.unregister(plugin) diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 4a8c60d7..42995c0d 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -691,7 +691,7 @@ async def test_hook_permission_resources_sql(): await ds.invoke_startup() collected = [] - for block in pm.hook.permission_resources_sql( + for block in ds.pm.hook.permission_resources_sql( datasette=ds, actor={"id": "alice"}, action="view-table", @@ -1161,12 +1161,12 @@ async def test_hook_filters_from_request(ds_client): if request.args.get("_nothing"): return FilterArguments(["1 = 0"], human_descriptions=["NOTHING"]) - pm.register(ReturnNothingPlugin(), name="ReturnNothingPlugin") + ds_client.ds.pm.register(ReturnNothingPlugin(), name="ReturnNothingPlugin") response = await ds_client.get("/fixtures/facetable?_nothing=1") assert "0 rows\n where NOTHING" in response.text json_response = await ds_client.get("/fixtures/facetable.json?_nothing=1") assert json_response.json()["rows"] == [] - pm.unregister(name="ReturnNothingPlugin") + ds_client.ds.pm.unregister(name="ReturnNothingPlugin") @pytest.mark.asyncio @@ -1327,7 +1327,7 @@ async def test_hook_actors_from_ids(): return inner try: - pm.register(ActorsFromIdsPlugin(), name="ActorsFromIdsPlugin") + ds.pm.register(ActorsFromIdsPlugin(), name="ActorsFromIdsPlugin") actors2 = await ds.actors_from_ids(["3", "5", "7"]) assert actors2 == { "3": {"id": "3", "name": "Cate Blanchett"}, @@ -1335,7 +1335,7 @@ async def test_hook_actors_from_ids(): "7": {"id": "7", "name": "Sarah Paulson"}, } finally: - pm.unregister(name="ReturnNothingPlugin") + ds.pm.unregister(name="ReturnNothingPlugin") @pytest.mark.asyncio @@ -1350,14 +1350,14 @@ async def test_plugin_is_installed(): return {} try: - pm.register(DummyPlugin(), name="DummyPlugin") + datasette.pm.register(DummyPlugin(), name="DummyPlugin") response = await datasette.client.get("/-/plugins.json") assert response.status_code == 200 installed_plugins = {p["name"] for p in response.json()} assert "DummyPlugin" in installed_plugins finally: - pm.unregister(name="DummyPlugin") + datasette.pm.unregister(name="DummyPlugin") @pytest.mark.asyncio @@ -1384,7 +1384,7 @@ async def test_hook_jinja2_environment_from_request(tmpdir): datasette = Datasette(memory=True) try: - pm.register(EnvironmentPlugin(), name="EnvironmentPlugin") + datasette.pm.register(EnvironmentPlugin(), name="EnvironmentPlugin") response = await datasette.client.get("/") assert response.status_code == 200 assert "Hello museums!" not in response.text @@ -1395,7 +1395,7 @@ async def test_hook_jinja2_environment_from_request(tmpdir): assert response2.status_code == 200 assert "Hello museums!" in response2.text finally: - pm.unregister(name="EnvironmentPlugin") + datasette.pm.unregister(name="EnvironmentPlugin") class SlotPlugin: @@ -1433,48 +1433,48 @@ class SlotPlugin: @pytest.mark.asyncio async def test_hook_top_homepage(): + datasette = Datasette(memory=True) try: - pm.register(SlotPlugin(), name="SlotPlugin") - datasette = Datasette(memory=True) + datasette.pm.register(SlotPlugin(), name="SlotPlugin") response = await datasette.client.get("/?z=foo") assert response.status_code == 200 assert "Xtop_homepage:foo" in response.text finally: - pm.unregister(name="SlotPlugin") + datasette.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio async def test_hook_top_database(): + datasette = Datasette(memory=True) try: - pm.register(SlotPlugin(), name="SlotPlugin") - datasette = Datasette(memory=True) + datasette.pm.register(SlotPlugin(), name="SlotPlugin") response = await datasette.client.get("/_memory?z=bar") assert response.status_code == 200 assert "Xtop_database:_memory:bar" in response.text finally: - pm.unregister(name="SlotPlugin") + datasette.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio async def test_hook_top_table(ds_client): try: - pm.register(SlotPlugin(), name="SlotPlugin") + ds_client.ds.pm.register(SlotPlugin(), name="SlotPlugin") response = await ds_client.get("/fixtures/facetable?z=baz") assert response.status_code == 200 assert "Xtop_table:fixtures:facetable:baz" in response.text finally: - pm.unregister(name="SlotPlugin") + ds_client.ds.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio async def test_hook_top_row(ds_client): try: - pm.register(SlotPlugin(), name="SlotPlugin") + ds_client.ds.pm.register(SlotPlugin(), name="SlotPlugin") response = await ds_client.get("/fixtures/facet_cities/1?z=bax") assert response.status_code == 200 assert "Xtop_row:fixtures:facet_cities:San Francisco:bax" in response.text finally: - pm.unregister(name="SlotPlugin") + ds_client.ds.pm.unregister(name="SlotPlugin") @pytest.mark.asyncio diff --git a/tests/test_restriction_sql.py b/tests/test_restriction_sql.py index 7d6d8a5a..f23eb839 100644 --- a/tests/test_restriction_sql.py +++ b/tests/test_restriction_sql.py @@ -13,7 +13,6 @@ async def test_multiple_restriction_sources_intersect(): provide restriction_sql - both must pass for access to be granted. """ from datasette import hookimpl - from datasette.plugins import pm class RestrictivePlugin: __name__ = "RestrictivePlugin" @@ -29,11 +28,12 @@ async def test_multiple_restriction_sources_intersect(): return None plugin = RestrictivePlugin() - pm.register(plugin, name="restrictive_plugin") + + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(plugin, name="restrictive_plugin") try: - ds = Datasette() - await ds.invoke_startup() db1 = ds.add_memory_database("db1_multi_intersect") db2 = ds.add_memory_database("db2_multi_intersect") await db1.execute_write("CREATE TABLE t1 (id INTEGER)") @@ -55,7 +55,7 @@ async def test_multiple_restriction_sources_intersect(): assert ("db1_multi_intersect", "t1") in resources assert ("db2_multi_intersect", "t1") not in resources finally: - pm.unregister(name="restrictive_plugin") + ds.pm.unregister(name="restrictive_plugin") @pytest.mark.asyncio @@ -265,7 +265,6 @@ async def test_permission_resources_sql_multiple_restriction_sources_intersect() provide restriction_sql - both must pass for access to be granted. """ from datasette import hookimpl - from datasette.plugins import pm class RestrictivePlugin: __name__ = "RestrictivePlugin" @@ -281,11 +280,12 @@ async def test_permission_resources_sql_multiple_restriction_sources_intersect() return None plugin = RestrictivePlugin() - pm.register(plugin, name="restrictive_plugin") + + ds = Datasette() + await ds.invoke_startup() + ds.pm.register(plugin, name="restrictive_plugin") try: - ds = Datasette() - await ds.invoke_startup() db1 = ds.add_memory_database("db1_multi_restrictions") db2 = ds.add_memory_database("db2_multi_restrictions") await db1.execute_write("CREATE TABLE t1 (id INTEGER)") @@ -312,4 +312,4 @@ async def test_permission_resources_sql_multiple_restriction_sources_intersect() assert ("db1_multi_restrictions", "t1") in resources assert ("db2_multi_restrictions", "t1") not in resources finally: - pm.unregister(name="restrictive_plugin") + ds.pm.unregister(name="restrictive_plugin") From 93b455239a4063c80d52da795db700c6a88e4d16 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 10:40:24 -0800 Subject: [PATCH 33/42] Release notes for 1.0a22, closes #2596 --- docs/changelog.rst | 9 +++++++++ docs/internals.rst | 2 ++ 2 files changed, 11 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 66d46bce..feba9390 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,15 @@ Changelog ========= +.. _v1_0_a22: + +1.0a22 (2025-11-13) +------------------- + +- ``datasette serve --default-deny`` option for running Datasette configured to :ref:`deny all permissions by default `. (:issue:`2592`) +- ``datasette.is_client()`` method for detecting if code is :ref:`executing inside a datasette.client request `. (:issue:`2594`) +- ``datasette.pm`` property can now be used to :ref:`register and unregister plugins in tests `. (:issue:`2595`) + .. _v1_0_a21: 1.0a21 (2025-11-05) diff --git a/docs/internals.rst b/docs/internals.rst index 09d45c90..cfd78593 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1077,6 +1077,8 @@ This parameter works with all HTTP methods (``get``, ``post``, ``put``, ``patch` Use ``skip_permission_checks=True`` with caution. It completely bypasses Datasette's permission system and should only be used in trusted plugin code or internal operations where you need guaranteed access to resources. +.. _internals_datasette_is_client: + Detecting internal client requests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 2125115cd9b609def872cd8051912ac80179f510 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Nov 2025 10:41:02 -0800 Subject: [PATCH 34/42] Release 1.0a22 Refs #2592, #2594, #2595, #2596 --- datasette/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 01f00fcd..d0ff6ab1 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a21" +__version__ = "1.0a22" __version_info__ = tuple(__version__.split(".")) From 68f1179bac991b5e37b99a5482c40134f317c04f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 26 Nov 2025 17:12:52 -0800 Subject: [PATCH 35/42] Fix for text None shown on /-/actions, closes #2599 --- datasette/templates/debug_actions.html | 2 +- tests/test_html.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/datasette/templates/debug_actions.html b/datasette/templates/debug_actions.html index 6dd5ac0e..0ef7b329 100644 --- a/datasette/templates/debug_actions.html +++ b/datasette/templates/debug_actions.html @@ -31,7 +31,7 @@

- + diff --git a/tests/test_html.py b/tests/test_html.py index 35b839ec..7b667301 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1194,6 +1194,21 @@ async def test_actions_page(ds_client): ds_client.ds.root_enabled = original_root_enabled +@pytest.mark.asyncio +async def test_actions_page_does_not_display_none_string(ds_client): + """Ensure the Resource column doesn't display the string 'None' for null values.""" + # https://github.com/simonw/datasette/issues/2599 + original_root_enabled = ds_client.ds.root_enabled + try: + ds_client.ds.root_enabled = True + cookies = {"ds_actor": ds_client.actor_cookie({"id": "root"})} + response = await ds_client.get("/-/actions", cookies=cookies) + assert response.status_code == 200 + assert "None" not in response.text + finally: + ds_client.ds.root_enabled = original_root_enabled + + @pytest.mark.asyncio async def test_permission_debug_tabs_with_query_string(ds_client): """Test that navigation tabs persist query strings across Check, Allowed, and Rules pages""" From c6c2a238c3e890384eef6bf9bca062fd784d9157 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 16:22:42 -0800 Subject: [PATCH 36/42] Fix for stale internal database bug, closes #2605 --- datasette/utils/internal_db.py | 3 +++ tests/test_internal_db.py | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index a3afbab2..587ea7b1 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -116,6 +116,9 @@ async def populate_schema_tables(internal_db, db): database_name = db.name def delete_everything(conn): + conn.execute( + "DELETE FROM catalog_databases WHERE database_name = ?", [database_name] + ) conn.execute( "DELETE FROM catalog_tables WHERE database_name = ?", [database_name] ) diff --git a/tests/test_internal_db.py b/tests/test_internal_db.py index 59516225..7a0d1630 100644 --- a/tests/test_internal_db.py +++ b/tests/test_internal_db.py @@ -91,3 +91,51 @@ async def test_internal_foreign_key_references(ds_client): ) await internal_db.execute_fn(inner) + + +@pytest.mark.asyncio +async def test_stale_catalog_entry_database_fix(tmp_path): + """ + Test for https://github.com/simonw/datasette/issues/2605 + + When the internal database persists across restarts and has entries in + catalog_databases for databases that no longer exist, accessing the + index page should not cause a 500 error (KeyError). + """ + from datasette.app import Datasette + + internal_db_path = str(tmp_path / "internal.db") + data_db_path = str(tmp_path / "data.db") + + # Create a data database file + import sqlite3 + + conn = sqlite3.connect(data_db_path) + conn.execute("CREATE TABLE test_table (id INTEGER PRIMARY KEY)") + conn.close() + + # First Datasette instance: with the data database and persistent internal db + ds1 = Datasette(files=[data_db_path], internal=internal_db_path) + await ds1.invoke_startup() + + # Access the index page to populate the internal catalog + response = await ds1.client.get("/") + assert "data" in ds1.databases + assert response.status_code == 200 + + # Second Datasette instance: reusing internal.db but WITHOUT the data database + # This simulates restarting Datasette after removing a database + ds2 = Datasette(internal=internal_db_path) + await ds2.invoke_startup() + + # The database is not in ds2.databases + assert "data" not in ds2.databases + + # Accessing the index page should NOT cause a 500 error + # This is the bug: it currently raises KeyError when trying to + # access ds.databases["data"] for the stale catalog entry + response = await ds2.client.get("/") + assert response.status_code == 200, ( + f"Index page should return 200, not {response.status_code}. " + "This fails due to stale catalog entries causing KeyError." + ) From 170b3ff61c1c7bc49b999ecbe43853af9727f2f1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:00:13 -0800 Subject: [PATCH 37/42] Better fix for stale catalog_databases, closes #2606 Refs 2605 --- datasette/app.py | 9 +++++++++ datasette/utils/internal_db.py | 3 --- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 2d8283a4..b9955925 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -606,6 +606,15 @@ class Datasette: "select database_name, schema_version from catalog_databases" ) } + # Delete stale entries for databases that are no longer attached + stale_databases = set(current_schema_versions.keys()) - set( + self.databases.keys() + ) + for stale_db_name in stale_databases: + await internal_db.execute_write( + "DELETE FROM catalog_databases WHERE database_name = ?", + [stale_db_name], + ) for database_name, db in self.databases.items(): schema_version = (await db.execute("PRAGMA schema_version")).first()[0] # Compare schema versions to see if we should skip it diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index 587ea7b1..a3afbab2 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -116,9 +116,6 @@ async def populate_schema_tables(internal_db, db): database_name = db.name def delete_everything(conn): - conn.execute( - "DELETE FROM catalog_databases WHERE database_name = ?", [database_name] - ) conn.execute( "DELETE FROM catalog_tables WHERE database_name = ?", [database_name] ) From 0a924524be06a331f20d2e1314ec82370995630b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:11:31 -0800 Subject: [PATCH 38/42] Split default_permissions.py into a package (#2603) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Split default_permissions.py into a package, refs #2602 * Remove unused is_resource_allowed() method, improve test coverage - Remove dead code: is_resource_allowed() method was never called - Change isinstance check to assertion with error message - Add test cases for table-level restrictions in restrictions_allow_action() - Coverage for restrictions.py improved from 79% to 99% 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * Additional permission test for gap spotted by coverage --- datasette/default_permissions.py | 494 ------------------ datasette/default_permissions/__init__.py | 59 +++ datasette/default_permissions/config.py | 442 ++++++++++++++++ datasette/default_permissions/defaults.py | 70 +++ datasette/default_permissions/helpers.py | 85 +++ datasette/default_permissions/restrictions.py | 195 +++++++ datasette/default_permissions/root.py | 29 + datasette/default_permissions/tokens.py | 95 ++++ tests/test_permissions.py | 59 +++ 9 files changed, 1034 insertions(+), 494 deletions(-) delete mode 100644 datasette/default_permissions.py create mode 100644 datasette/default_permissions/__init__.py create mode 100644 datasette/default_permissions/config.py create mode 100644 datasette/default_permissions/defaults.py create mode 100644 datasette/default_permissions/helpers.py create mode 100644 datasette/default_permissions/restrictions.py create mode 100644 datasette/default_permissions/root.py create mode 100644 datasette/default_permissions/tokens.py diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py deleted file mode 100644 index 12e6c1ef..00000000 --- a/datasette/default_permissions.py +++ /dev/null @@ -1,494 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from datasette.app import Datasette - -from datasette import hookimpl -from datasette.permissions import PermissionSQL -from datasette.utils import actor_matches_allow -import itsdangerous -import time - - -@hookimpl(specname="permission_resources_sql") -async def actor_restrictions_sql(datasette, actor, action): - """Handle actor restriction-based permission rules (_r key).""" - if not actor: - return None - - restrictions = actor.get("_r") if isinstance(actor, dict) else None - if restrictions is None: - return [] - - # Check if this action appears in restrictions (with abbreviations) - action_obj = datasette.actions.get(action) - action_checks = {action} - if action_obj and action_obj.abbr: - action_checks.add(action_obj.abbr) - - # Check if globally allowed in restrictions - global_actions = restrictions.get("a", []) - is_globally_allowed = action_checks.intersection(global_actions) - - if is_globally_allowed: - # Globally allowed - no restriction filtering needed - return [] - - # Not globally allowed - build restriction_sql that lists allowlisted resources - restriction_selects = [] - restriction_params = {} - param_counter = 0 - - # Add database-level allowlisted resources - db_restrictions = restrictions.get("d", {}) - for db_name, db_actions in db_restrictions.items(): - if action_checks.intersection(db_actions): - prefix = f"restr_{param_counter}" - param_counter += 1 - restriction_selects.append( - f"SELECT :{prefix}_parent AS parent, NULL AS child" - ) - restriction_params[f"{prefix}_parent"] = db_name - - # Add table-level allowlisted resources - resource_restrictions = restrictions.get("r", {}) - for db_name, tables in resource_restrictions.items(): - for table_name, table_actions in tables.items(): - if action_checks.intersection(table_actions): - prefix = f"restr_{param_counter}" - param_counter += 1 - restriction_selects.append( - f"SELECT :{prefix}_parent AS parent, :{prefix}_child AS child" - ) - restriction_params[f"{prefix}_parent"] = db_name - restriction_params[f"{prefix}_child"] = table_name - - if not restriction_selects: - # Action not in allowlist - return empty restriction (INTERSECT will return no results) - return [ - PermissionSQL( - params={"deny": f"actor restrictions: {action} not in allowlist"}, - restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", # Empty set - ) - ] - - # Build restriction SQL that returns allowed (parent, child) pairs - restriction_sql = "\nUNION ALL\n".join(restriction_selects) - - # Return restriction-only PermissionSQL (sql=None means no permission rules) - # The restriction_sql does the actual filtering via INTERSECT - return [ - PermissionSQL( - params=restriction_params, - restriction_sql=restriction_sql, - ) - ] - - -@hookimpl(specname="permission_resources_sql") -async def root_user_permissions_sql(datasette, actor, action): - """Grant root user full permissions when enabled.""" - if datasette.root_enabled and actor and actor.get("id") == "root": - # Add a single global-level allow rule (NULL, NULL) for root - # This allows root to access everything by default, but database-level - # and table-level deny rules in config can still block specific resources - return PermissionSQL.allow(reason="root user") - return None - - -@hookimpl(specname="permission_resources_sql") -async def config_permissions_sql(datasette, actor, action): - """Apply config-based permission rules from datasette.yaml.""" - config = datasette.config or {} - - def evaluate(allow_block): - if allow_block is None: - return None - return actor_matches_allow(actor, allow_block) - - has_restrictions = actor and "_r" in actor if actor else False - restrictions = actor.get("_r", {}) if actor else {} - - action_obj = datasette.actions.get(action) - action_checks = {action} - if action_obj and action_obj.abbr: - action_checks.add(action_obj.abbr) - - restricted_databases: set[str] = set() - restricted_tables: set[tuple[str, str]] = set() - if has_restrictions: - restricted_databases = { - db_name - for db_name, db_actions in (restrictions.get("d") or {}).items() - if action_checks.intersection(db_actions) - } - restricted_tables = { - (db_name, table_name) - for db_name, tables in (restrictions.get("r") or {}).items() - for table_name, table_actions in tables.items() - if action_checks.intersection(table_actions) - } - # Tables implicitly reference their parent databases - restricted_databases.update(db for db, _ in restricted_tables) - - def is_in_restriction_allowlist(parent, child, action_name): - """Check if a resource is in the actor's restriction allowlist for this action""" - if not has_restrictions: - return True # No restrictions, all resources allowed - - # Check global allowlist - if action_checks.intersection(restrictions.get("a", [])): - return True - - # Check database-level allowlist - if parent and action_checks.intersection( - restrictions.get("d", {}).get(parent, []) - ): - return True - - # Check table-level allowlist - if parent: - table_restrictions = (restrictions.get("r", {}) or {}).get(parent, {}) - if child: - table_actions = table_restrictions.get(child, []) - if action_checks.intersection(table_actions): - return True - else: - # Parent query should proceed if any child in this database is allowlisted - for table_actions in table_restrictions.values(): - if action_checks.intersection(table_actions): - return True - - # Parent/child both None: include if any restrictions exist for this action - if parent is None and child is None: - if action_checks.intersection(restrictions.get("a", [])): - return True - if restricted_databases: - return True - if restricted_tables: - return True - - return False - - rows = [] - - def add_row(parent, child, result, scope): - if result is None: - return - rows.append( - ( - parent, - child, - bool(result), - f"config {'allow' if result else 'deny'} {scope}", - ) - ) - - def add_row_allow_block(parent, child, allow_block, scope): - """For 'allow' blocks, always add a row if the block exists - deny if no match""" - if allow_block is None: - return - - # If actor has restrictions and this resource is NOT in allowlist, skip this config rule - # Restrictions act as a gating filter - config cannot grant access to restricted-out resources - if not is_in_restriction_allowlist(parent, child, action): - return - - result = evaluate(allow_block) - bool_result = bool(result) - # If result is None (no match) or False, treat as deny - rows.append( - ( - parent, - child, - bool_result, # None becomes False, False stays False, True stays True - f"config {'allow' if result else 'deny'} {scope}", - ) - ) - if has_restrictions and not bool_result and child is None: - reason = f"config deny {scope} (restriction gate)" - if parent is None: - # Root-level deny: add more specific denies for restricted resources - if action_obj and action_obj.takes_parent: - for db_name in restricted_databases: - rows.append((db_name, None, 0, reason)) - if action_obj and action_obj.takes_child: - for db_name, table_name in restricted_tables: - rows.append((db_name, table_name, 0, reason)) - else: - # Database-level deny: add child-level denies for restricted tables - if action_obj and action_obj.takes_child: - for db_name, table_name in restricted_tables: - if db_name == parent: - rows.append((db_name, table_name, 0, reason)) - - root_perm = (config.get("permissions") or {}).get(action) - add_row(None, None, evaluate(root_perm), f"permissions for {action}") - - for db_name, db_config in (config.get("databases") or {}).items(): - db_perm = (db_config.get("permissions") or {}).get(action) - add_row( - db_name, None, evaluate(db_perm), f"permissions for {action} on {db_name}" - ) - - for table_name, table_config in (db_config.get("tables") or {}).items(): - table_perm = (table_config.get("permissions") or {}).get(action) - add_row( - db_name, - table_name, - evaluate(table_perm), - f"permissions for {action} on {db_name}/{table_name}", - ) - - if action == "view-table": - table_allow = (table_config or {}).get("allow") - add_row_allow_block( - db_name, - table_name, - table_allow, - f"allow for {action} on {db_name}/{table_name}", - ) - - for query_name, query_config in (db_config.get("queries") or {}).items(): - # query_config can be a string (just SQL) or a dict (with SQL and options) - if isinstance(query_config, dict): - query_perm = (query_config.get("permissions") or {}).get(action) - add_row( - db_name, - query_name, - evaluate(query_perm), - f"permissions for {action} on {db_name}/{query_name}", - ) - if action == "view-query": - query_allow = query_config.get("allow") - add_row_allow_block( - db_name, - query_name, - query_allow, - f"allow for {action} on {db_name}/{query_name}", - ) - - if action == "view-database": - db_allow = db_config.get("allow") - add_row_allow_block( - db_name, None, db_allow, f"allow for {action} on {db_name}" - ) - - if action == "execute-sql": - db_allow_sql = db_config.get("allow_sql") - add_row_allow_block(db_name, None, db_allow_sql, f"allow_sql for {db_name}") - - if action == "view-table": - # Database-level allow block affects all tables in that database - db_allow = db_config.get("allow") - add_row_allow_block( - db_name, None, db_allow, f"allow for {action} on {db_name}" - ) - - if action == "view-query": - # Database-level allow block affects all queries in that database - db_allow = db_config.get("allow") - add_row_allow_block( - db_name, None, db_allow, f"allow for {action} on {db_name}" - ) - - # Root-level allow block applies to all view-* actions - if action == "view-instance": - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-instance") - - if action == "view-database": - # Root-level allow block also applies to view-database - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-database") - - if action == "view-table": - # Root-level allow block also applies to view-table - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-table") - - if action == "view-query": - # Root-level allow block also applies to view-query - allow_block = config.get("allow") - add_row_allow_block(None, None, allow_block, "allow for view-query") - - if action == "execute-sql": - allow_sql = config.get("allow_sql") - add_row_allow_block(None, None, allow_sql, "allow_sql") - - if not rows: - return [] - - parts = [] - params = {} - for idx, (parent, child, allow, reason) in enumerate(rows): - key = f"cfg_{idx}" - parts.append( - f"SELECT :{key}_parent AS parent, :{key}_child AS child, :{key}_allow AS allow, :{key}_reason AS reason" - ) - params[f"{key}_parent"] = parent - params[f"{key}_child"] = child - params[f"{key}_allow"] = 1 if allow else 0 - params[f"{key}_reason"] = reason - - sql = "\nUNION ALL\n".join(parts) - return [PermissionSQL(sql=sql, params=params)] - - -@hookimpl(specname="permission_resources_sql") -async def default_allow_sql_check(datasette, actor, action): - """Enforce default_allow_sql setting for execute-sql action.""" - if action == "execute-sql" and not datasette.setting("default_allow_sql"): - return PermissionSQL.deny(reason="default_allow_sql is false") - return None - - -@hookimpl(specname="permission_resources_sql") -async def default_action_permissions_sql(datasette, actor, action): - """Apply default allow rules for standard view/execute actions. - - With the INTERSECT-based restriction approach, these defaults are always generated - and then filtered by restriction_sql if the actor has restrictions. - """ - # Skip default allow rules if default_deny is enabled - if datasette.default_deny: - return None - - default_allow_actions = { - "view-instance", - "view-database", - "view-database-download", - "view-table", - "view-query", - "execute-sql", - } - if action in default_allow_actions: - reason = f"default allow for {action}".replace("'", "''") - return PermissionSQL.allow(reason=reason) - - return None - - -def restrictions_allow_action( - datasette: "Datasette", - restrictions: dict, - action: str, - resource: str | tuple[str, str], -): - """ - Check if actor restrictions allow the requested action against the requested resource. - - Restrictions work on an exact-match basis: if an actor has view-table permission, - they can view tables, but NOT automatically view-instance or view-database. - Each permission is checked independently without implication logic. - """ - # Does this action have an abbreviation? - to_check = {action} - action_obj = datasette.actions.get(action) - if action_obj and action_obj.abbr: - to_check.add(action_obj.abbr) - - # Check if restrictions explicitly allow this action - # Restrictions can be at three levels: - # - "a": global (any resource) - # - "d": per-database - # - "r": per-table/resource - - # Check global level (any resource) - all_allowed = restrictions.get("a") - if all_allowed is not None: - assert isinstance(all_allowed, list) - if to_check.intersection(all_allowed): - return True - - # Check database level - if resource: - if isinstance(resource, str): - database_name = resource - else: - database_name = resource[0] - database_allowed = restrictions.get("d", {}).get(database_name) - if database_allowed is not None: - assert isinstance(database_allowed, list) - if to_check.intersection(database_allowed): - return True - - # Check table/resource level - if resource is not None and not isinstance(resource, str) and len(resource) == 2: - database, table = resource - table_allowed = restrictions.get("r", {}).get(database, {}).get(table) - if table_allowed is not None: - assert isinstance(table_allowed, list) - if to_check.intersection(table_allowed): - return True - - # This action is not explicitly allowed, so reject it - return False - - -@hookimpl -def actor_from_request(datasette, request): - prefix = "dstok_" - if not datasette.setting("allow_signed_tokens"): - return None - max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") - authorization = request.headers.get("authorization") - if not authorization: - return None - if not authorization.startswith("Bearer "): - return None - token = authorization[len("Bearer ") :] - if not token.startswith(prefix): - return None - token = token[len(prefix) :] - try: - decoded = datasette.unsign(token, namespace="token") - except itsdangerous.BadSignature: - return None - if "t" not in decoded: - # Missing timestamp - return None - created = decoded["t"] - if not isinstance(created, int): - # Invalid timestamp - return None - duration = decoded.get("d") - if duration is not None and not isinstance(duration, int): - # Invalid duration - return None - if (duration is None and max_signed_tokens_ttl) or ( - duration is not None - and max_signed_tokens_ttl - and duration > max_signed_tokens_ttl - ): - duration = max_signed_tokens_ttl - if duration: - if time.time() - created > duration: - # Expired - return None - actor = {"id": decoded["a"], "token": "dstok"} - if "_r" in decoded: - actor["_r"] = decoded["_r"] - if duration: - actor["token_expires"] = created + duration - return actor - - -@hookimpl -def skip_csrf(scope): - # Skip CSRF check for requests with content-type: application/json - if scope["type"] == "http": - headers = scope.get("headers") or {} - if dict(headers).get(b"content-type") == b"application/json": - return True - - -@hookimpl -def canned_queries(datasette, database, actor): - """Return canned queries from datasette configuration.""" - queries = ( - ((datasette.config or {}).get("databases") or {}).get(database) or {} - ).get("queries") or {} - return queries diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py new file mode 100644 index 00000000..4c82d705 --- /dev/null +++ b/datasette/default_permissions/__init__.py @@ -0,0 +1,59 @@ +""" +Default permission implementations for Datasette. + +This module provides the built-in permission checking logic through implementations +of the permission_resources_sql hook. The hooks are organized by their purpose: + +1. Actor Restrictions - Enforces _r allowlists embedded in actor tokens +2. Root User - Grants full access when --root flag is used +3. Config Rules - Applies permissions from datasette.yaml +4. Default Settings - Enforces default_allow_sql and default view permissions + +IMPORTANT: These hooks return PermissionSQL objects that are combined using SQL +UNION/INTERSECT operations. The order of evaluation is: + - restriction_sql fields are INTERSECTed (all must match) + - Regular sql fields are UNIONed and evaluated with cascading priority +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl + +# Re-export all hooks and public utilities +from .restrictions import ( + actor_restrictions_sql, + restrictions_allow_action, + ActorRestrictions, +) +from .root import root_user_permissions_sql +from .config import config_permissions_sql +from .defaults import ( + default_allow_sql_check, + default_action_permissions_sql, + DEFAULT_ALLOW_ACTIONS, +) +from .tokens import actor_from_signed_api_token + + +@hookimpl +def skip_csrf(scope) -> Optional[bool]: + """Skip CSRF check for JSON content-type requests.""" + if scope["type"] == "http": + headers = scope.get("headers") or {} + if dict(headers).get(b"content-type") == b"application/json": + return True + return None + + +@hookimpl +def canned_queries(datasette: "Datasette", database: str, actor) -> dict: + """Return canned queries defined in datasette.yaml configuration.""" + queries = ( + ((datasette.config or {}).get("databases") or {}).get(database) or {} + ).get("queries") or {} + return queries diff --git a/datasette/default_permissions/config.py b/datasette/default_permissions/config.py new file mode 100644 index 00000000..aab87c1c --- /dev/null +++ b/datasette/default_permissions/config.py @@ -0,0 +1,442 @@ +""" +Config-based permission handling for Datasette. + +Applies permission rules from datasette.yaml configuration. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL +from datasette.utils import actor_matches_allow + +from .helpers import PermissionRowCollector, get_action_name_variants + + +class ConfigPermissionProcessor: + """ + Processes permission rules from datasette.yaml configuration. + + Configuration structure: + + permissions: # Root-level permissions block + view-instance: + id: admin + + databases: + mydb: + permissions: # Database-level permissions + view-database: + id: admin + allow: # Database-level allow block (for view-*) + id: viewer + allow_sql: # execute-sql allow block + id: analyst + tables: + users: + permissions: # Table-level permissions + view-table: + id: admin + allow: # Table-level allow block + id: viewer + queries: + my_query: + permissions: # Query-level permissions + view-query: + id: admin + allow: # Query-level allow block + id: viewer + """ + + def __init__( + self, + datasette: "Datasette", + actor: Optional[dict], + action: str, + ): + self.datasette = datasette + self.actor = actor + self.action = action + self.config = datasette.config or {} + self.collector = PermissionRowCollector(prefix="cfg") + + # Pre-compute action variants + self.action_checks = get_action_name_variants(datasette, action) + self.action_obj = datasette.actions.get(action) + + # Parse restrictions if present + self.has_restrictions = actor and "_r" in actor if actor else False + self.restrictions = actor.get("_r", {}) if actor else {} + + # Pre-compute restriction info for efficiency + self.restricted_databases: Set[str] = set() + self.restricted_tables: Set[Tuple[str, str]] = set() + + if self.has_restrictions: + self.restricted_databases = { + db_name + for db_name, db_actions in (self.restrictions.get("d") or {}).items() + if self.action_checks.intersection(db_actions) + } + self.restricted_tables = { + (db_name, table_name) + for db_name, tables in (self.restrictions.get("r") or {}).items() + for table_name, table_actions in tables.items() + if self.action_checks.intersection(table_actions) + } + # Tables implicitly reference their parent databases + self.restricted_databases.update(db for db, _ in self.restricted_tables) + + def evaluate_allow_block(self, allow_block: Any) -> Optional[bool]: + """Evaluate an allow block against the current actor.""" + if allow_block is None: + return None + return actor_matches_allow(self.actor, allow_block) + + def is_in_restriction_allowlist( + self, + parent: Optional[str], + child: Optional[str], + ) -> bool: + """Check if resource is allowed by actor restrictions.""" + if not self.has_restrictions: + return True # No restrictions, all resources allowed + + # Check global allowlist + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + + # Check database-level allowlist + if parent and self.action_checks.intersection( + self.restrictions.get("d", {}).get(parent, []) + ): + return True + + # Check table-level allowlist + if parent: + table_restrictions = (self.restrictions.get("r", {}) or {}).get(parent, {}) + if child: + table_actions = table_restrictions.get(child, []) + if self.action_checks.intersection(table_actions): + return True + else: + # Parent query should proceed if any child in this database is allowlisted + for table_actions in table_restrictions.values(): + if self.action_checks.intersection(table_actions): + return True + + # Parent/child both None: include if any restrictions exist for this action + if parent is None and child is None: + if self.action_checks.intersection(self.restrictions.get("a", [])): + return True + if self.restricted_databases: + return True + if self.restricted_tables: + return True + + return False + + def add_permissions_rule( + self, + parent: Optional[str], + child: Optional[str], + permissions_block: Optional[dict], + scope_desc: str, + ) -> None: + """Add a rule from a permissions:{action} block.""" + if permissions_block is None: + return + + action_allow_block = permissions_block.get(self.action) + result = self.evaluate_allow_block(action_allow_block) + + self.collector.add( + parent=parent, + child=child, + allow=result, + reason=f"config {'allow' if result else 'deny'} {scope_desc}", + if_not_none=True, + ) + + def add_allow_block_rule( + self, + parent: Optional[str], + child: Optional[str], + allow_block: Any, + scope_desc: str, + ) -> None: + """ + Add rules from an allow:{} block. + + For allow blocks, if the block exists but doesn't match the actor, + this is treated as a deny. We also handle the restriction-gate logic. + """ + if allow_block is None: + return + + # Skip if resource is not in restriction allowlist + if not self.is_in_restriction_allowlist(parent, child): + return + + result = self.evaluate_allow_block(allow_block) + bool_result = bool(result) + + self.collector.add( + parent, + child, + bool_result, + f"config {'allow' if result else 'deny'} {scope_desc}", + ) + + # Handle restriction-gate: add explicit denies for restricted resources + self._add_restriction_gate_denies(parent, child, bool_result, scope_desc) + + def _add_restriction_gate_denies( + self, + parent: Optional[str], + child: Optional[str], + is_allowed: bool, + scope_desc: str, + ) -> None: + """ + When a config rule denies at a higher level, add explicit denies + for restricted resources to prevent child-level allows from + incorrectly granting access. + """ + if is_allowed or child is not None or not self.has_restrictions: + return + + if not self.action_obj: + return + + reason = f"config deny {scope_desc} (restriction gate)" + + if parent is None: + # Root-level deny: add denies for all restricted resources + if self.action_obj.takes_parent: + for db_name in self.restricted_databases: + self.collector.add(db_name, None, False, reason) + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + self.collector.add(db_name, table_name, False, reason) + else: + # Database-level deny: add denies for tables in that database + if self.action_obj.takes_child: + for db_name, table_name in self.restricted_tables: + if db_name == parent: + self.collector.add(db_name, table_name, False, reason) + + def process(self) -> Optional[PermissionSQL]: + """Process all config rules and return combined PermissionSQL.""" + self._process_root_permissions() + self._process_databases() + self._process_root_allow_blocks() + + return self.collector.to_permission_sql() + + def _process_root_permissions(self) -> None: + """Process root-level permissions block.""" + root_perms = self.config.get("permissions") or {} + self.add_permissions_rule( + None, + None, + root_perms, + f"permissions for {self.action}", + ) + + def _process_databases(self) -> None: + """Process database-level and nested configurations.""" + databases = self.config.get("databases") or {} + + for db_name, db_config in databases.items(): + self._process_database(db_name, db_config or {}) + + def _process_database(self, db_name: str, db_config: dict) -> None: + """Process a single database's configuration.""" + # Database-level permissions block + db_perms = db_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + None, + db_perms, + f"permissions for {self.action} on {db_name}", + ) + + # Process tables + for table_name, table_config in (db_config.get("tables") or {}).items(): + self._process_table(db_name, table_name, table_config or {}) + + # Process queries + for query_name, query_config in (db_config.get("queries") or {}).items(): + self._process_query(db_name, query_name, query_config) + + # Database-level allow blocks + self._process_database_allow_blocks(db_name, db_config) + + def _process_table( + self, + db_name: str, + table_name: str, + table_config: dict, + ) -> None: + """Process a single table's configuration.""" + # Table-level permissions block + table_perms = table_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + table_name, + table_perms, + f"permissions for {self.action} on {db_name}/{table_name}", + ) + + # Table-level allow block (for view-table) + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + table_name, + table_config.get("allow"), + f"allow for {self.action} on {db_name}/{table_name}", + ) + + def _process_query( + self, + db_name: str, + query_name: str, + query_config: Any, + ) -> None: + """Process a single query's configuration.""" + # Query config can be a string (just SQL) or dict + if not isinstance(query_config, dict): + return + + # Query-level permissions block + query_perms = query_config.get("permissions") or {} + self.add_permissions_rule( + db_name, + query_name, + query_perms, + f"permissions for {self.action} on {db_name}/{query_name}", + ) + + # Query-level allow block (for view-query) + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + query_name, + query_config.get("allow"), + f"allow for {self.action} on {db_name}/{query_name}", + ) + + def _process_database_allow_blocks( + self, + db_name: str, + db_config: dict, + ) -> None: + """Process database-level allow/allow_sql blocks.""" + # view-database allow block + if self.action == "view-database": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # execute-sql allow_sql block + if self.action == "execute-sql": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow_sql"), + f"allow_sql for {db_name}", + ) + + # view-table uses database-level allow for inheritance + if self.action == "view-table": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + # view-query uses database-level allow for inheritance + if self.action == "view-query": + self.add_allow_block_rule( + db_name, + None, + db_config.get("allow"), + f"allow for {self.action} on {db_name}", + ) + + def _process_root_allow_blocks(self) -> None: + """Process root-level allow/allow_sql blocks.""" + root_allow = self.config.get("allow") + + if self.action == "view-instance": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-instance", + ) + + if self.action == "view-database": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-database", + ) + + if self.action == "view-table": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-table", + ) + + if self.action == "view-query": + self.add_allow_block_rule( + None, + None, + root_allow, + "allow for view-query", + ) + + if self.action == "execute-sql": + self.add_allow_block_rule( + None, + None, + self.config.get("allow_sql"), + "allow_sql", + ) + + +@hookimpl(specname="permission_resources_sql") +async def config_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Apply permission rules from datasette.yaml configuration. + + This processes: + - permissions: blocks at root, database, table, and query levels + - allow: blocks for view-* actions + - allow_sql: blocks for execute-sql action + """ + processor = ConfigPermissionProcessor(datasette, actor, action) + result = processor.process() + + if result is None: + return [] + + return [result] diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py new file mode 100644 index 00000000..f5a6a270 --- /dev/null +++ b/datasette/default_permissions/defaults.py @@ -0,0 +1,70 @@ +""" +Default permission settings for Datasette. + +Provides default allow rules for standard view/execute actions. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +# Actions that are allowed by default (unless --default-deny is used) +DEFAULT_ALLOW_ACTIONS = frozenset( + { + "view-instance", + "view-database", + "view-database-download", + "view-table", + "view-query", + "execute-sql", + } +) + + +@hookimpl(specname="permission_resources_sql") +async def default_allow_sql_check( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Enforce the default_allow_sql setting. + + When default_allow_sql is false (the default), execute-sql is denied + unless explicitly allowed by config or other rules. + """ + if action == "execute-sql": + if not datasette.setting("default_allow_sql"): + return PermissionSQL.deny(reason="default_allow_sql is false") + + return None + + +@hookimpl(specname="permission_resources_sql") +async def default_action_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[PermissionSQL]: + """ + Provide default allow rules for standard view/execute actions. + + These defaults are skipped when datasette is started with --default-deny. + The restriction_sql mechanism (from actor_restrictions_sql) will still + filter these results if the actor has restrictions. + """ + if datasette.default_deny: + return None + + if action in DEFAULT_ALLOW_ACTIONS: + reason = f"default allow for {action}".replace("'", "''") + return PermissionSQL.allow(reason=reason) + + return None diff --git a/datasette/default_permissions/helpers.py b/datasette/default_permissions/helpers.py new file mode 100644 index 00000000..47e03569 --- /dev/null +++ b/datasette/default_permissions/helpers.py @@ -0,0 +1,85 @@ +""" +Shared helper utilities for default permission implementations. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette.permissions import PermissionSQL + + +def get_action_name_variants(datasette: "Datasette", action: str) -> Set[str]: + """ + Get all name variants for an action (full name and abbreviation). + + Example: + get_action_name_variants(ds, "view-table") -> {"view-table", "vt"} + """ + variants = {action} + action_obj = datasette.actions.get(action) + if action_obj and action_obj.abbr: + variants.add(action_obj.abbr) + return variants + + +def action_in_list(datasette: "Datasette", action: str, action_list: list) -> bool: + """Check if an action (or its abbreviation) is in a list.""" + return bool(get_action_name_variants(datasette, action).intersection(action_list)) + + +@dataclass +class PermissionRow: + """A single permission rule row.""" + + parent: Optional[str] + child: Optional[str] + allow: bool + reason: str + + +class PermissionRowCollector: + """Collects permission rows and converts them to PermissionSQL.""" + + def __init__(self, prefix: str = "row"): + self.rows: List[PermissionRow] = [] + self.prefix = prefix + + def add( + self, + parent: Optional[str], + child: Optional[str], + allow: Optional[bool], + reason: str, + if_not_none: bool = False, + ) -> None: + """Add a permission row. If if_not_none=True, only add if allow is not None.""" + if if_not_none and allow is None: + return + self.rows.append(PermissionRow(parent, child, allow, reason)) + + def to_permission_sql(self) -> Optional[PermissionSQL]: + """Convert collected rows to a PermissionSQL object.""" + if not self.rows: + return None + + parts = [] + params = {} + + for idx, row in enumerate(self.rows): + key = f"{self.prefix}_{idx}" + parts.append( + f"SELECT :{key}_parent AS parent, :{key}_child AS child, " + f":{key}_allow AS allow, :{key}_reason AS reason" + ) + params[f"{key}_parent"] = row.parent + params[f"{key}_child"] = row.child + params[f"{key}_allow"] = 1 if row.allow else 0 + params[f"{key}_reason"] = row.reason + + sql = "\nUNION ALL\n".join(parts) + return PermissionSQL(sql=sql, params=params) diff --git a/datasette/default_permissions/restrictions.py b/datasette/default_permissions/restrictions.py new file mode 100644 index 00000000..a22cd7e5 --- /dev/null +++ b/datasette/default_permissions/restrictions.py @@ -0,0 +1,195 @@ +""" +Actor restriction handling for Datasette permissions. + +This module handles the _r (restrictions) key in actor dictionaries, which +contains allowlists of resources the actor can access. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + +from .helpers import action_in_list, get_action_name_variants + + +@dataclass +class ActorRestrictions: + """Parsed actor restrictions from the _r key.""" + + global_actions: List[str] # _r.a - globally allowed actions + database_actions: dict # _r.d - {db_name: [actions]} + table_actions: dict # _r.r - {db_name: {table: [actions]}} + + @classmethod + def from_actor(cls, actor: Optional[dict]) -> Optional["ActorRestrictions"]: + """Parse restrictions from actor dict. Returns None if no restrictions.""" + if not actor: + return None + assert isinstance(actor, dict), "actor must be a dictionary" + + restrictions = actor.get("_r") + if restrictions is None: + return None + + return cls( + global_actions=restrictions.get("a", []), + database_actions=restrictions.get("d", {}), + table_actions=restrictions.get("r", {}), + ) + + def is_action_globally_allowed(self, datasette: "Datasette", action: str) -> bool: + """Check if action is in the global allowlist.""" + return action_in_list(datasette, action, self.global_actions) + + def get_allowed_databases(self, datasette: "Datasette", action: str) -> Set[str]: + """Get database names where this action is allowed.""" + allowed = set() + for db_name, db_actions in self.database_actions.items(): + if action_in_list(datasette, action, db_actions): + allowed.add(db_name) + return allowed + + def get_allowed_tables( + self, datasette: "Datasette", action: str + ) -> Set[Tuple[str, str]]: + """Get (database, table) pairs where this action is allowed.""" + allowed = set() + for db_name, tables in self.table_actions.items(): + for table_name, table_actions in tables.items(): + if action_in_list(datasette, action, table_actions): + allowed.add((db_name, table_name)) + return allowed + + +@hookimpl(specname="permission_resources_sql") +async def actor_restrictions_sql( + datasette: "Datasette", + actor: Optional[dict], + action: str, +) -> Optional[List[PermissionSQL]]: + """ + Handle actor restriction-based permission rules. + + When an actor has an "_r" key, it contains an allowlist of resources they + can access. This function returns restriction_sql that filters the final + results to only include resources in that allowlist. + + The _r structure: + { + "a": ["vi", "pd"], # Global actions allowed + "d": {"mydb": ["vt", "es"]}, # Database-level actions + "r": {"mydb": {"users": ["vt"]}} # Table-level actions + } + """ + if not actor: + return None + + restrictions = ActorRestrictions.from_actor(actor) + + if restrictions is None: + # No restrictions - all resources allowed + return [] + + # If globally allowed, no filtering needed + if restrictions.is_action_globally_allowed(datasette, action): + return [] + + # Build restriction SQL + allowed_dbs = restrictions.get_allowed_databases(datasette, action) + allowed_tables = restrictions.get_allowed_tables(datasette, action) + + # If nothing is allowed for this action, return empty-set restriction + if not allowed_dbs and not allowed_tables: + return [ + PermissionSQL( + params={"deny": f"actor restrictions: {action} not in allowlist"}, + restriction_sql="SELECT NULL AS parent, NULL AS child WHERE 0", + ) + ] + + # Build UNION of allowed resources + selects = [] + params = {} + counter = 0 + + # Database-level entries (parent, NULL) - allows all children + for db_name in allowed_dbs: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, NULL AS child") + params[f"{key}_parent"] = db_name + + # Table-level entries (parent, child) + for db_name, table_name in allowed_tables: + key = f"restr_{counter}" + counter += 1 + selects.append(f"SELECT :{key}_parent AS parent, :{key}_child AS child") + params[f"{key}_parent"] = db_name + params[f"{key}_child"] = table_name + + restriction_sql = "\nUNION ALL\n".join(selects) + + return [PermissionSQL(params=params, restriction_sql=restriction_sql)] + + +def restrictions_allow_action( + datasette: "Datasette", + restrictions: dict, + action: str, + resource: Optional[str | Tuple[str, str]], +) -> bool: + """ + Check if restrictions allow the requested action on the requested resource. + + This is a synchronous utility function for use by other code that needs + to quickly check restriction allowlists. + + Args: + datasette: The Datasette instance + restrictions: The _r dict from an actor + action: The action name to check + resource: None for global, str for database, (db, table) tuple for table + + Returns: + True if allowed, False if denied + """ + # Does this action have an abbreviation? + to_check = get_action_name_variants(datasette, action) + + # Check global level (any resource) + all_allowed = restrictions.get("a") + if all_allowed is not None: + assert isinstance(all_allowed, list) + if to_check.intersection(all_allowed): + return True + + # Check database level + if resource: + if isinstance(resource, str): + database_name = resource + else: + database_name = resource[0] + database_allowed = restrictions.get("d", {}).get(database_name) + if database_allowed is not None: + assert isinstance(database_allowed, list) + if to_check.intersection(database_allowed): + return True + + # Check table/resource level + if resource is not None and not isinstance(resource, str) and len(resource) == 2: + database, table = resource + table_allowed = restrictions.get("r", {}).get(database, {}).get(table) + if table_allowed is not None: + assert isinstance(table_allowed, list) + if to_check.intersection(table_allowed): + return True + + # This action is not explicitly allowed, so reject it + return False diff --git a/datasette/default_permissions/root.py b/datasette/default_permissions/root.py new file mode 100644 index 00000000..4931f7ff --- /dev/null +++ b/datasette/default_permissions/root.py @@ -0,0 +1,29 @@ +""" +Root user permission handling for Datasette. + +Grants full permissions to the root user when --root flag is used. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +from datasette import hookimpl +from datasette.permissions import PermissionSQL + + +@hookimpl(specname="permission_resources_sql") +async def root_user_permissions_sql( + datasette: "Datasette", + actor: Optional[dict], +) -> Optional[PermissionSQL]: + """ + Grant root user full permissions when --root flag is used. + """ + if not datasette.root_enabled: + return None + if actor is not None and actor.get("id") == "root": + return PermissionSQL.allow(reason="root user") diff --git a/datasette/default_permissions/tokens.py b/datasette/default_permissions/tokens.py new file mode 100644 index 00000000..474b0c23 --- /dev/null +++ b/datasette/default_permissions/tokens.py @@ -0,0 +1,95 @@ +""" +Token authentication for Datasette. + +Handles signed API tokens (dstok_ prefix). +""" + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from datasette.app import Datasette + +import itsdangerous + +from datasette import hookimpl + + +@hookimpl(specname="actor_from_request") +def actor_from_signed_api_token(datasette: "Datasette", request) -> Optional[dict]: + """ + Authenticate requests using signed API tokens (dstok_ prefix). + + Token structure (signed JSON): + { + "a": "actor_id", # Actor ID + "t": 1234567890, # Timestamp (Unix epoch) + "d": 3600, # Optional: Duration in seconds + "_r": {...} # Optional: Restrictions + } + """ + prefix = "dstok_" + + # Check if tokens are enabled + if not datasette.setting("allow_signed_tokens"): + return None + + max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") + + # Get authorization header + authorization = request.headers.get("authorization") + if not authorization: + return None + if not authorization.startswith("Bearer "): + return None + + token = authorization[len("Bearer ") :] + if not token.startswith(prefix): + return None + + # Remove prefix and verify signature + token = token[len(prefix) :] + try: + decoded = datasette.unsign(token, namespace="token") + except itsdangerous.BadSignature: + return None + + # Validate timestamp + if "t" not in decoded: + return None + created = decoded["t"] + if not isinstance(created, int): + return None + + # Handle duration/expiry + duration = decoded.get("d") + if duration is not None and not isinstance(duration, int): + return None + + # Apply max TTL if configured + if (duration is None and max_signed_tokens_ttl) or ( + duration is not None + and max_signed_tokens_ttl + and duration > max_signed_tokens_ttl + ): + duration = max_signed_tokens_ttl + + # Check expiry + if duration: + if time.time() - created > duration: + return None + + # Build actor dict + actor = {"id": decoded["a"], "token": "dstok"} + + # Copy restrictions if present + if "_r" in decoded: + actor["_r"] = decoded["_r"] + + # Add expiry timestamp if applicable + if duration: + actor["token_expires"] = created + duration + + return actor diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 6def3840..e2dd92b8 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -1323,6 +1323,20 @@ async def test_actor_restrictions( ("dbname2", "tablename"), False, ), + # Table-level restriction allows access to that specific table + ( + {"r": {"dbname": {"tablename": ["view-table"]}}}, + "view-table", + ("dbname", "tablename"), + True, + ), + # But not to a different table in the same database + ( + {"r": {"dbname": {"tablename": ["view-table"]}}}, + "view-table", + ("dbname", "other_table"), + False, + ), ), ) async def test_restrictions_allow_action(restrictions, action, resource, expected): @@ -1653,3 +1667,48 @@ async def test_permission_check_view_requires_debug_permission(): data = response.json() assert data["action"] == "view-instance" assert data["allowed"] is True + + +@pytest.mark.asyncio +async def test_root_allow_block_with_table_restricted_actor(): + """ + Test that root-level allow: blocks are processed for actors with + table-level restrictions. + + This covers the case in config.py is_in_restriction_allowlist() where + parent=None, child=None and actor has table restrictions but not global. + """ + from datasette.resources import TableResource + + # Config with root-level allow block that denies non-admin users + ds = Datasette( + config={ + "allow": {"id": "admin"}, # Root-level allow block + } + ) + await ds.invoke_startup() + db = ds.add_memory_database("mydb") + await db.execute_write("create table t1 (id integer primary key)") + await ds.client.get("/") # Trigger catalog refresh + + # Actor with table-level restrictions only (not global) + actor = {"id": "user", "_r": {"r": {"mydb": {"t1": ["view-table"]}}}} + + # The root-level allow: {id: admin} should be processed and deny this user + # because they're not "admin", even though they have table restrictions + result = await ds.allowed( + action="view-table", + resource=TableResource("mydb", "t1"), + actor=actor, + ) + # Should be False because root allow: {id: admin} denies non-admin users + assert result is False + + # But admin with same restrictions should be allowed + admin_actor = {"id": "admin", "_r": {"r": {"mydb": {"t1": ["view-table"]}}}} + result = await ds.allowed( + action="view-table", + resource=TableResource("mydb", "t1"), + actor=admin_actor, + ) + assert result is True From 3eca3ad6d45c94da16a09b51a648052bbeeeaf2f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:16:39 -0800 Subject: [PATCH 39/42] Better recipe for 'just docs' --- Justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Justfile b/Justfile index abb134a6..a47662c3 100644 --- a/Justfile +++ b/Justfile @@ -29,7 +29,7 @@ export DATASETTE_SECRET := "not_a_secret" # Serve live docs on localhost:8000 @docs: cog blacken-docs - uv sync --extra docs && cd docs && uv run make livehtml + uv run --extra docs make -C docs livehtml # Build docs as static HTML @docs-build: cog blacken-docs From 03ab3592083c6677bde58f1bd20002963c980344 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:19:48 -0800 Subject: [PATCH 40/42] tool.uv.package = true --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 4f487458..8ec1c6b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,3 +93,6 @@ datasette = ["templates/*.html"] [tool.setuptools.dynamic] version = {attr = "datasette.version.__version__"} + +[tool.uv] +package = true From 2ca00b6c75b165c3318d06e6dc6eb228b9b60338 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Dec 2025 19:20:43 -0800 Subject: [PATCH 41/42] Release 1.0a23 Refs #2605, #2599 --- datasette/version.py | 2 +- docs/changelog.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index d0ff6ab1..fff37a72 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a22" +__version__ = "1.0a23" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index feba9390..feba7e86 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Changelog ========= +.. _v1_0_a23: + +1.0a23 (2025-12-02) +------------------- + +- Fix for bug where a stale database entry in ``internal.db`` could cause a 500 error on the homepage. (:issue:`2605`) +- Cosmetic improvement to ``/-/actions`` page. (:issue:`2599`) + .. _v1_0_a22: 1.0a22 (2025-11-13) From 1d4448fc5603f479f11b37b9da0ee11c2b1a19e4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 4 Dec 2025 21:36:39 -0800 Subject: [PATCH 42/42] Use subtests in tests/test_docs.py (#2609) Closes #2608 --- pyproject.toml | 2 +- tests/test_docs.py | 53 +++++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8ec1c6b7..f3053447 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ docs = [ "ruamel.yaml", ] test = [ - "pytest>=5.2.2", + "pytest>=9", "pytest-xdist>=2.2.1", "pytest-asyncio>=1.2.0", "beautifulsoup4>=4.8.1", diff --git a/tests/test_docs.py b/tests/test_docs.py index 953224dd..b94a6f23 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -28,9 +28,10 @@ def settings_headings(): return get_headings((docs_path / "settings.rst").read_text(), "~") -@pytest.mark.parametrize("setting", app.SETTINGS) -def test_settings_are_documented(settings_headings, setting): - assert setting.name in settings_headings +def test_settings_are_documented(settings_headings, subtests): + for setting in app.SETTINGS: + with subtests.test(setting=setting.name): + assert setting.name in settings_headings @pytest.fixture(scope="session") @@ -38,21 +39,21 @@ def plugin_hooks_content(): return (docs_path / "plugin_hooks.rst").read_text() -@pytest.mark.parametrize( - "plugin", [name for name in dir(app.pm.hook) if not name.startswith("_")] -) -def test_plugin_hooks_are_documented(plugin, plugin_hooks_content): +def test_plugin_hooks_are_documented(plugin_hooks_content, subtests): headings = set() headings.update(get_headings(plugin_hooks_content, "-")) headings.update(get_headings(plugin_hooks_content, "~")) - assert plugin in headings - hook_caller = getattr(app.pm.hook, plugin) - arg_names = [a for a in hook_caller.spec.argnames if a != "__multicall__"] - # Check for plugin_name(arg1, arg2, arg3) - expected = f"{plugin}({', '.join(arg_names)})" - assert ( - expected in plugin_hooks_content - ), f"Missing from plugin hook documentation: {expected}" + plugins = [name for name in dir(app.pm.hook) if not name.startswith("_")] + for plugin in plugins: + with subtests.test(plugin=plugin): + assert plugin in headings + hook_caller = getattr(app.pm.hook, plugin) + arg_names = [a for a in hook_caller.spec.argnames if a != "__multicall__"] + # Check for plugin_name(arg1, arg2, arg3) + expected = f"{plugin}({', '.join(arg_names)})" + assert ( + expected in plugin_hooks_content + ), f"Missing from plugin hook documentation: {expected}" @pytest.fixture(scope="session") @@ -68,9 +69,11 @@ def documented_views(): return view_labels -@pytest.mark.parametrize("view_class", [v for v in dir(app) if v.endswith("View")]) -def test_view_classes_are_documented(documented_views, view_class): - assert view_class in documented_views +def test_view_classes_are_documented(documented_views, subtests): + view_classes = [v for v in dir(app) if v.endswith("View")] + for view_class in view_classes: + with subtests.test(view_class=view_class): + assert view_class in documented_views @pytest.fixture(scope="session") @@ -85,9 +88,10 @@ def documented_table_filters(): } -@pytest.mark.parametrize("filter", [f.key for f in Filters._filters]) -def test_table_filters_are_documented(documented_table_filters, filter): - assert filter in documented_table_filters +def test_table_filters_are_documented(documented_table_filters, subtests): + for f in Filters._filters: + with subtests.test(filter=f.key): + assert f.key in documented_table_filters @pytest.fixture(scope="session") @@ -101,9 +105,10 @@ def documented_fns(): } -@pytest.mark.parametrize("fn", utils.functions_marked_as_documented) -def test_functions_marked_with_documented_are_documented(documented_fns, fn): - assert fn.__name__ in documented_fns +def test_functions_marked_with_documented_are_documented(documented_fns, subtests): + for fn in utils.functions_marked_as_documented: + with subtests.test(fn=fn.__name__): + assert fn.__name__ in documented_fns def test_rst_heading_underlines_match_title_length():
{{ action.name }} {% if action.abbr %}{{ action.abbr }}{% endif %} {{ action.description or "" }}{{ action.resource_class }}{% if action.resource_class %}{{ action.resource_class }}{% endif %} {% if action.takes_parent %}✓{% endif %} {% if action.takes_child %}✓{% endif %} {% if action.also_requires %}{{ action.also_requires }}{% endif %}