From 75298db4ae305d14e9aa0099aad3fc8090e8f15d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 17 Oct 2025 17:04:05 -0700 Subject: [PATCH 1/2] Optimize database page table listing to avoid scanning all tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructured the table listing logic in get_tables() to check permissions via SQL first using the permission_resources_sql plugin hook, then only fetch table counts for tables that will actually be displayed. Previous implementation called table_counts() for all tables before checking permissions, which defeated the purpose of the optimization - it still required scanning every table just to list them. Changes: - Modified Database.table_counts() to accept optional 'tables' parameter that allows selective counting while preserving caching for immutable DBs - Rewrote get_tables() in database view to query catalog_tables for table names first (cheap operation), use resolve_permissions_from_catalog to check permissions in bulk, then only call table_counts() with the subset of allowed tables - Fixed bug in default_permissions.py where query_config could be a string instead of dict, causing AttributeError - Correctly handles table-level 'allow: True' blocks that should bypass database-level restrictions when determining privacy status All 177 permission tests passing. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- datasette/database.py | 24 ++++- datasette/default_permissions.py | 4 +- datasette/views/database.py | 169 ++++++++++++++++++++++++++----- 3 files changed, 169 insertions(+), 28 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index b74f02bb..54a81a67 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -374,12 +374,25 @@ class Database: self.cached_size = Path(self.path).stat().st_size return self.cached_size - async def table_counts(self, limit=10): + async def table_counts(self, limit=10, tables=None): + # Determine which tables we need counts for + if tables is None: + tables_to_count = await self.table_names() + else: + tables_to_count = tables + + # If we have cached counts for immutable database, use them if not self.is_mutable and self.cached_table_counts is not None: - return self.cached_table_counts + # Return only the requested tables from cache + return { + table: self.cached_table_counts.get(table) + for table in tables_to_count + if table in self.cached_table_counts + } + # Try to get counts for each table, $limit timeout for each count counts = {} - for table in await self.table_names(): + for table in tables_to_count: try: table_count = ( await self.execute( @@ -392,8 +405,11 @@ class Database: # QueryInterrupted - so we catch that too: except (QueryInterrupted, sqlite3.OperationalError, sqlite3.DatabaseError): counts[table] = None - if not self.is_mutable: + + # Only cache if we counted all tables + if tables is None and not self.is_mutable: self._cached_table_counts = counts + return counts @property diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index a9534cab..abad3787 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -263,6 +263,9 @@ async def _config_permission_rules(datasette, actor, action) -> list[PluginSQL]: ) for query_name, query_config in (db_config.get("queries") or {}).items(): + # query_config can be a string (just SQL) or a dict + if isinstance(query_config, str): + continue query_perm = (query_config.get("permissions") or {}).get(action) add_row( db_name, @@ -325,7 +328,6 @@ async def _config_permission_rules(datasette, actor, action) -> list[PluginSQL]: params[f"{key}_reason"] = reason sql = "\nUNION ALL\n".join(parts) - print(sql, params) return [PluginSQL(source="config_permissions", sql=sql, params=params)] diff --git a/datasette/views/database.py b/datasette/views/database.py index 33ee07b3..7312442b 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -273,34 +273,157 @@ class QueryContext: async def get_tables(datasette, request, db): tables = [] database = db.name - table_counts = await db.table_counts(100) hidden_table_names = set(await db.hidden_table_names()) all_foreign_keys = await db.get_all_foreign_keys() - for table in table_counts: - table_visible, table_private = await datasette.check_visibility( - request.actor, - permissions=[ - ("view-table", (database, table)), - ("view-database", database), - "view-instance", - ], + # Use the new SQL-based permission system to check all tables at once + from datasette.utils.permissions import resolve_permissions_from_catalog, PluginSQL + from datasette.plugins import pm + from datasette.utils import await_me_maybe + + # Get all table names from catalog (cheap operation, no scanning) + internal_db = datasette.get_internal_database() + table_names_result = await internal_db.execute( + "SELECT table_name FROM catalog_tables WHERE database_name = ?", [database] + ) + table_names = [row["table_name"] for row in table_names_result.rows] + + if table_names: + # Use catalog_tables for candidate SQL to query all tables in this database at once + candidate_sql = "SELECT :database AS parent, table_name AS child FROM catalog_tables WHERE database_name = :database" + candidate_params = {"database": database} + + # Get plugin SQL blocks for view-table permission + plugins = [] + for block in pm.hook.permission_resources_sql( + datasette=datasette, + actor=request.actor, + action="view-table", + ): + block = await await_me_maybe(block) + if block is None: + continue + if isinstance(block, (list, tuple)): + candidates = block + else: + candidates = [block] + for candidate in candidates: + if candidate is None: + continue + if not isinstance(candidate, PluginSQL): + continue + plugins.append(candidate) + + # Resolve permissions for all tables at once + if isinstance(request.actor, dict): + actor_id = request.actor.get("id") + elif request.actor: + actor_id = request.actor + else: + actor_id = None + internal_db = datasette.get_internal_database() + permission_results = await resolve_permissions_from_catalog( + internal_db, + actor=str(actor_id) if actor_id is not None else "", + plugins=plugins, + action="view-table", + candidate_sql=candidate_sql, + candidate_params=candidate_params, + implicit_deny=True, ) - if not table_visible: - continue + + # Create a lookup dict for allowed tables and their privacy status + allowed_tables = {} + for result in permission_results: + table_name = result["child"] + if result["allow"] == 1: + allowed_tables[table_name] = result + + # Check which tables are visible to anonymous users (for determining "private" status) + # A table is visible to anonymous users if BOTH view-database AND view-table pass + anon_allowed_tables = set() + if request.actor: + # Check if anonymous users can view the database + anon_can_view_database = await datasette.permission_allowed( + None, "view-database", database + ) + + # Generate new plugin SQL blocks for anonymous user to check table permissions + anon_plugins = [] + for block in pm.hook.permission_resources_sql( + datasette=datasette, + actor="", + action="view-table", + ): + block = await await_me_maybe(block) + if block is None: + continue + if isinstance(block, (list, tuple)): + candidates = block + else: + candidates = [block] + for candidate in candidates: + if candidate is None: + continue + if not isinstance(candidate, PluginSQL): + continue + anon_plugins.append(candidate) + + anon_permission_results = await resolve_permissions_from_catalog( + internal_db, + actor="", + plugins=anon_plugins, + action="view-table", + candidate_sql=candidate_sql, + candidate_params=candidate_params, + implicit_deny=True, + ) + + # A table is not private if anonymous users can view it + # This requires passing BOTH view-table AND view-database checks + # UNLESS the table has an explicit allow block that overrides database restrictions + # We approximate this by checking if the permission result has a specific "config allow" reason + # which indicates an explicit table-level allow block + for result in anon_permission_results: + if result["allow"] == 1: + # Check if this is from an explicit table-level allow block + # or if the anonymous user can also view the database + reason = result.get("reason", "") + has_explicit_table_allow = ( + "config allow allow for view-table" in reason + ) + if has_explicit_table_allow or anon_can_view_database: + anon_allowed_tables.add(result["child"]) + else: + allowed_tables = {} + anon_allowed_tables = set() + + # Build the tables list for allowed tables only + # Only get table counts for the tables we're actually going to display + allowed_table_names = list(allowed_tables.keys()) + + # Get counts only for allowed tables (uses caching mechanism) + if allowed_table_names: + table_counts = await db.table_counts(limit=10, tables=allowed_table_names) + else: + table_counts = {} + + for table in allowed_table_names: + # Determine if table is private (not visible to anonymous users) + table_private = bool(request.actor and table not in anon_allowed_tables) + table_columns = await db.table_columns(table) - tables.append( - { - "name": table, - "columns": table_columns, - "primary_keys": await db.primary_keys(table), - "count": table_counts[table], - "hidden": table in hidden_table_names, - "fts_table": await db.fts_table(table), - "foreign_keys": all_foreign_keys[table], - "private": table_private, - } - ) + table_dict = { + "name": table, + "columns": table_columns, + "primary_keys": await db.primary_keys(table), + "count": table_counts.get(table), + "hidden": table in hidden_table_names, + "fts_table": await db.fts_table(table), + "foreign_keys": all_foreign_keys.get(table, {}), + "private": table_private, + } + tables.append(table_dict) tables.sort(key=lambda t: (t["hidden"], t["name"])) return tables From cb6ffca471b5ad08081b34540f3accfcc2951cc0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 23 Oct 2025 08:39:53 -0700 Subject: [PATCH 2/2] New navigation-search feature, new /-/tables.json endpoint Closes #2523 Also added allow() method with keyword-only arguments, closes #2519 --- datasette/app.py | 170 ++++++++++- datasette/static/navigation-search.js | 401 ++++++++++++++++++++++++++ datasette/templates/base.html | 2 + datasette/views/special.py | 37 +++ docs/introspection.rst | 34 +++ 5 files changed, 642 insertions(+), 2 deletions(-) create mode 100644 datasette/static/navigation-search.js diff --git a/datasette/app.py b/datasette/app.py index 6c7026a8..2658d848 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -52,6 +52,7 @@ from .views.special import ( AllowedResourcesView, PermissionRulesView, PermissionCheckView, + TablesSearchView, ) from .views.table import ( TableInsertView, @@ -1069,8 +1070,161 @@ class Datasette: ) return sql, params - async def permission_allowed_2( - self, actor, action, resource=None, *, default=DEFAULT_NOT_SET + async def get_allowed_tables( + self, + actor, + database: Optional[str] = None, + extra_sql: str = "", + extra_params: Optional[dict] = None, + ): + """ + Get list of tables the actor is allowed to view. + + Args: + actor: The actor dict (or None for anonymous) + database: Optional database name to filter by + extra_sql: Optional extra SQL to add to the WHERE clause + extra_params: Optional parameters for the extra SQL + + Returns: + List of dicts with keys: database, table, resource + """ + from datasette.utils.permissions import resolve_permissions_from_catalog + + await self.refresh_schemas() + internal_db = self.get_internal_database() + + # Build the candidate SQL query + where_clauses = [] + params = extra_params.copy() if extra_params else {} + + if database: + where_clauses.append("database_name = :database") + params["database"] = database + + if extra_sql: + where_clauses.append(f"({extra_sql})") + + where_sql = " AND ".join(where_clauses) if where_clauses else "1=1" + + candidate_sql = f""" + SELECT database_name AS parent, table_name AS child + FROM catalog_tables + WHERE {where_sql} + """ + + # Collect plugin SQL blocks for view-table permission + table_plugins = [] + for block in pm.hook.permission_resources_sql( + datasette=self, + actor=actor, + action="view-table", + ): + block = await await_me_maybe(block) + if block is None: + continue + if isinstance(block, (list, tuple)): + candidates = block + else: + candidates = [block] + for candidate in candidates: + if candidate is None: + continue + if not isinstance(candidate, PluginSQL): + continue + table_plugins.append(candidate) + + # Collect plugin SQL blocks for view-database permission + db_plugins = [] + for block in pm.hook.permission_resources_sql( + datasette=self, + actor=actor, + action="view-database", + ): + block = await await_me_maybe(block) + if block is None: + continue + if isinstance(block, (list, tuple)): + candidates = block + else: + candidates = [block] + for candidate in candidates: + if candidate is None: + continue + if not isinstance(candidate, PluginSQL): + continue + db_plugins.append(candidate) + + # Get actor_id for resolve_permissions_from_catalog + if isinstance(actor, dict): + actor_id = actor.get("id") + elif actor: + actor_id = actor + else: + actor_id = None + + actor_str = str(actor_id) if actor_id is not None else "" + + # Resolve permissions for all matching tables + table_permission_results = await resolve_permissions_from_catalog( + internal_db, + actor=actor_str, + plugins=table_plugins, + action="view-table", + candidate_sql=candidate_sql, + candidate_params=params, + implicit_deny=True, + ) + + # Get unique database names from table results + database_names = list( + set(r["parent"] for r in table_permission_results if r["allow"] == 1) + ) + + # Check view-database permissions for those databases + if database_names: + # Build placeholders and params dict for database check + placeholders = ",".join(f":db{i}" for i in range(len(database_names))) + db_params = {f"db{i}": db_name for i, db_name in enumerate(database_names)} + + db_candidate_sql = f""" + SELECT database_name AS parent, NULL AS child + FROM catalog_databases + WHERE database_name IN ({placeholders}) + """ + db_permission_results = await resolve_permissions_from_catalog( + internal_db, + actor=actor_str, + plugins=db_plugins, + action="view-database", + candidate_sql=db_candidate_sql, + candidate_params=db_params, + implicit_deny=True, + ) + + # Create set of allowed databases + allowed_databases = { + r["parent"] for r in db_permission_results if r["allow"] == 1 + } + else: + allowed_databases = set() + + # Filter to only tables in allowed databases + allowed = [] + for result in table_permission_results: + if result["allow"] == 1 and result["parent"] in allowed_databases: + allowed.append( + { + "database": result["parent"], + "table": result["child"], + "resource": result["resource"], + } + ) + + return allowed + + async def allowed( + self, *, actor, action, resource=None, default=DEFAULT_NOT_SET ): """Permission check backed by permission_resources_sql rules.""" @@ -1178,6 +1332,14 @@ class Datasette: return result + async def permission_allowed_2( + self, actor, action, resource=None, *, default=DEFAULT_NOT_SET + ): + """Legacy method that delegates to allowed().""" + return await self.allowed( + actor=actor, action=action, resource=resource, default=default + ) + async def ensure_permissions( self, actor: dict, @@ -1754,6 +1916,10 @@ class Datasette: AllowDebugView.as_view(self), r"/-/allow-debug$", ) + add_route( + TablesSearchView.as_view(self), + r"/-/tables(\.(?Pjson))?$", + ) add_route( wrap_view(PatternPortfolioView, self), r"/-/patterns$", diff --git a/datasette/static/navigation-search.js b/datasette/static/navigation-search.js new file mode 100644 index 00000000..202839d5 --- /dev/null +++ b/datasette/static/navigation-search.js @@ -0,0 +1,401 @@ +class NavigationSearch extends HTMLElement { + constructor() { + super(); + this.attachShadow({ mode: 'open' }); + this.selectedIndex = -1; + this.matches = []; + this.debounceTimer = null; + + this.render(); + this.setupEventListeners(); + } + + render() { + this.shadowRoot.innerHTML = ` + + + +
+
+ +
+
+
+ ↑ ↓ Navigate + Enter Select + Esc Close +
+
+
+ `; + } + + setupEventListeners() { + const dialog = this.shadowRoot.querySelector('dialog'); + const input = this.shadowRoot.querySelector('.search-input'); + const resultsContainer = this.shadowRoot.querySelector('.results-container'); + + // Global keyboard listener for "/" + document.addEventListener('keydown', (e) => { + if (e.key === '/' && !this.isInputFocused() && !dialog.open) { + e.preventDefault(); + this.openMenu(); + } + }); + + // Input event + input.addEventListener('input', (e) => { + this.handleSearch(e.target.value); + }); + + // Keyboard navigation + input.addEventListener('keydown', (e) => { + if (e.key === 'ArrowDown') { + e.preventDefault(); + this.moveSelection(1); + } else if (e.key === 'ArrowUp') { + e.preventDefault(); + this.moveSelection(-1); + } else if (e.key === 'Enter') { + e.preventDefault(); + this.selectCurrentItem(); + } else if (e.key === 'Escape') { + this.closeMenu(); + } + }); + + // Click on result item + resultsContainer.addEventListener('click', (e) => { + const item = e.target.closest('.result-item'); + if (item) { + const index = parseInt(item.dataset.index); + this.selectItem(index); + } + }); + + // Close on backdrop click + dialog.addEventListener('click', (e) => { + if (e.target === dialog) { + this.closeMenu(); + } + }); + + // Initial load + this.loadInitialData(); + } + + isInputFocused() { + const activeElement = document.activeElement; + return activeElement && ( + activeElement.tagName === 'INPUT' || + activeElement.tagName === 'TEXTAREA' || + activeElement.isContentEditable + ); + } + + loadInitialData() { + const itemsAttr = this.getAttribute('items'); + if (itemsAttr) { + try { + this.allItems = JSON.parse(itemsAttr); + this.matches = this.allItems; + } catch (e) { + console.error('Failed to parse items attribute:', e); + this.allItems = []; + this.matches = []; + } + } + } + + handleSearch(query) { + clearTimeout(this.debounceTimer); + + this.debounceTimer = setTimeout(() => { + const url = this.getAttribute('url'); + + if (url) { + // Fetch from API + this.fetchResults(url, query); + } else { + // Filter local items + this.filterLocalItems(query); + } + }, 200); + } + + async fetchResults(url, query) { + try { + const searchUrl = `${url}?q=${encodeURIComponent(query)}`; + const response = await fetch(searchUrl); + const data = await response.json(); + this.matches = data.matches || []; + this.selectedIndex = this.matches.length > 0 ? 0 : -1; + this.renderResults(); + } catch (e) { + console.error('Failed to fetch search results:', e); + this.matches = []; + this.renderResults(); + } + } + + filterLocalItems(query) { + if (!query.trim()) { + this.matches = []; + } else { + const lowerQuery = query.toLowerCase(); + this.matches = (this.allItems || []).filter(item => + item.name.toLowerCase().includes(lowerQuery) || + item.url.toLowerCase().includes(lowerQuery) + ); + } + this.selectedIndex = this.matches.length > 0 ? 0 : -1; + this.renderResults(); + } + + renderResults() { + const container = this.shadowRoot.querySelector('.results-container'); + const input = this.shadowRoot.querySelector('.search-input'); + + if (this.matches.length === 0) { + const message = input.value.trim() ? 'No results found' : 'Start typing to search...'; + container.innerHTML = `
${message}
`; + return; + } + + container.innerHTML = this.matches.map((match, index) => ` +
+
+
${this.escapeHtml(match.name)}
+
${this.escapeHtml(match.url)}
+
+
+ `).join(''); + + // Scroll selected item into view + if (this.selectedIndex >= 0) { + const selectedItem = container.children[this.selectedIndex]; + if (selectedItem) { + selectedItem.scrollIntoView({ block: 'nearest' }); + } + } + } + + moveSelection(direction) { + const newIndex = this.selectedIndex + direction; + if (newIndex >= 0 && newIndex < this.matches.length) { + this.selectedIndex = newIndex; + this.renderResults(); + } + } + + selectCurrentItem() { + if (this.selectedIndex >= 0 && this.selectedIndex < this.matches.length) { + this.selectItem(this.selectedIndex); + } + } + + selectItem(index) { + const match = this.matches[index]; + if (match) { + // Dispatch custom event + this.dispatchEvent(new CustomEvent('select', { + detail: match, + bubbles: true, + composed: true + })); + + // Navigate to URL + window.location.href = match.url; + + this.closeMenu(); + } + } + + openMenu() { + const dialog = this.shadowRoot.querySelector('dialog'); + const input = this.shadowRoot.querySelector('.search-input'); + + dialog.showModal(); + input.value = ''; + input.focus(); + + // Reset state - start with no items shown + this.matches = []; + this.selectedIndex = -1; + this.renderResults(); + } + + closeMenu() { + const dialog = this.shadowRoot.querySelector('dialog'); + dialog.close(); + } + + escapeHtml(text) { + const div = document.createElement('div'); + div.textContent = text; + return div.innerHTML; + } +} + +// Register the custom element +customElements.define('navigation-search', NavigationSearch); \ No newline at end of file diff --git a/datasette/templates/base.html b/datasette/templates/base.html index 0b2def5a..0d89e11c 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -72,5 +72,7 @@ {% endfor %} {% if select_templates %}{% endif %} + + diff --git a/datasette/views/special.py b/datasette/views/special.py index 7e5ce517..bba44a45 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -776,6 +776,43 @@ class CreateTokenView(BaseView): return await self.render(["create_token.html"], request, context) +class TablesSearchView(BaseView): + name = "tables_search" + has_json_alternate = False + + async def get(self, request): + # Get the search query parameter + query = request.args.get("q", "").strip() + + if not query: + return Response.json({"matches": []}) + + # Use the new get_allowed_tables() method with search + extra_sql = "table_name LIKE :search" + extra_params = {"search": f"%{query}%"} + + allowed_tables = await self.ds.get_allowed_tables( + actor=request.actor, extra_sql=extra_sql, extra_params=extra_params + ) + + # Format the response + matches = [] + for item in allowed_tables: + database = item["database"] + table = item["table"] + matches.append( + { + "url": self.ds.urls.table(database, table), + "name": f"{database}: {table}", + } + ) + + response = Response.json({"matches": matches}) + if self.ds.cors: + add_cors_headers(response.headers) + return response + + class ApiExplorerView(BaseView): name = "api_explorer" has_json_alternate = False diff --git a/docs/introspection.rst b/docs/introspection.rst index ff78ec78..0328120a 100644 --- a/docs/introspection.rst +++ b/docs/introspection.rst @@ -198,3 +198,37 @@ Shows the currently authenticated actor. Useful for debugging Datasette authenti ----------- The debug tool at ``/-/messages`` can be used to set flash messages to try out that feature. See :ref:`datasette_add_message` for details of this feature. + +.. _TablesSearchView: + +/-/tables.json +-------------- + +The ``/-/tables.json`` endpoint provides a JSON API for searching tables that the current user has permission to access. + +Pass a ``?q=`` query parameter with your search term to find matching tables. The search matches against table names using a case-insensitive substring match. + +This endpoint returns JSON only and respects the current user's permissions - only tables they are allowed to view will be included in the results. + +Example request: + +``/-/tables.json?q=users`` + +Example response: + +.. code-block:: json + + { + "matches": [ + { + "url": "/mydb/users", + "name": "mydb: users" + }, + { + "url": "/otherdb/users_archive", + "name": "otherdb: users_archive" + } + ] + } + +If no search query is provided, the endpoint returns an empty matches array.