Add keyset pagination to allowed_resources() (#2562)

* Add keyset pagination to allowed_resources()

This replaces the unbounded list return with PaginatedResources,
which supports efficient keyset pagination for handling thousands
of resources.

Closes #2560

Changes:
- allowed_resources() now returns PaginatedResources instead of list
- Added limit (1-1000, default 100) and next (keyset token) parameters
- Added include_reasons parameter (replaces allowed_resources_with_reasons)
- Removed allowed_resources_with_reasons() method entirely
- PaginatedResources.all() async generator for automatic pagination
- Uses tilde-encoding for tokens (matching table pagination)
- Updated all callers to use .resources accessor
- Updated documentation with new API and examples

The PaginatedResources object has:
- resources: List of Resource objects for current page
- next: Token for next page (None if no more results)
- all(): Async generator that yields all resources across pages

Example usage:
    page = await ds.allowed_resources("view-table", actor, limit=100)
    for table in page.resources:
        print(table.child)

    # Iterate all pages automatically
    async for table in page.all():
        print(table.child)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Simon Willison 2025-10-31 14:50:46 -07:00 committed by GitHub
commit 400fa08e4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 366 additions and 223 deletions

View file

@ -71,6 +71,7 @@ from .url_builder import Urls
from .database import Database, QueryInterrupted
from .utils import (
PaginatedResources,
PrefixedUrlString,
SPATIALITE_FUNCTIONS,
StartupError,
@ -91,6 +92,7 @@ from .utils import (
resolve_env_secrets,
resolve_routes,
tilde_decode,
tilde_encode,
to_css_class,
urlsafe_components,
redact_keys,
@ -1147,104 +1149,147 @@ class Datasette:
*,
parent: str | None = None,
include_is_private: bool = False,
) -> list["Resource"]:
include_reasons: bool = False,
limit: int = 100,
next: str | None = None,
) -> PaginatedResources:
"""
Return all resources the actor can access for the given action.
Return paginated resources the actor can access for the given action.
Uses SQL to filter resources based on cascading permission rules.
Returns instances of the appropriate Resource subclass.
Uses SQL with keyset pagination to efficiently filter resources.
Returns PaginatedResources with list of Resource instances and pagination metadata.
Args:
action: The action name (e.g., "view-table")
actor: The actor dict (or None for unauthenticated)
parent: Optional parent filter (e.g., database name) to limit results
include_is_private: If True, adds a .private attribute to each Resource
include_reasons: If True, adds a .reasons attribute with List[str] of permission reasons
limit: Maximum number of results to return (1-1000, default 100)
next: Keyset token from previous page for pagination
Returns:
PaginatedResources with:
- resources: List of Resource objects for this page
- next: Token for next page (None if no more results)
Example:
# Get all tables
tables = await datasette.allowed_resources("view-table", actor)
for table in tables:
# Get first page of tables
page = await datasette.allowed_resources("view-table", actor, limit=50)
for table in page.resources:
print(f"{table.parent}/{table.child}")
# Get tables for specific database with private flag
tables = await datasette.allowed_resources(
"view-table", actor, parent="mydb", include_is_private=True
# Get next page
if page.next:
next_page = await datasette.allowed_resources(
"view-table", actor, limit=50, next=page.next
)
# With reasons for debugging
page = await datasette.allowed_resources(
"view-table", actor, include_reasons=True
)
for table in tables:
if table.private:
print(f"{table.child} is private")
for table in page.resources:
print(f"{table.child}: {table.reasons}")
# Iterate through all results with async generator
page = await datasette.allowed_resources("view-table", actor)
async for table in page.all():
print(table.child)
"""
action_obj = self.actions.get(action)
if not action_obj:
raise ValueError(f"Unknown action: {action}")
# Validate and cap limit
limit = min(max(1, limit), 1000)
# Get base SQL query
query, params = await self.allowed_resources_sql(
action=action,
actor=actor,
parent=parent,
include_is_private=include_is_private,
)
result = await self.get_internal_database().execute(query, params)
# Instantiate the appropriate Resource subclass for each row
# Add keyset pagination WHERE clause if next token provided
if next:
try:
components = urlsafe_components(next)
if len(components) >= 2:
last_parent, last_child = components[0], components[1]
# Keyset condition: (parent > last) OR (parent = last AND child > last)
keyset_where = """
(parent > :keyset_parent OR
(parent = :keyset_parent AND child > :keyset_child))
"""
# Wrap original query and add keyset filter
query = f"SELECT * FROM ({query}) WHERE {keyset_where}"
params["keyset_parent"] = last_parent
params["keyset_child"] = last_child
except (ValueError, KeyError):
# Invalid token - ignore and start from beginning
pass
# Add LIMIT (fetch limit+1 to detect if there are more results)
# Note: query from allowed_resources_sql() already includes ORDER BY parent, child
query = f"{query} LIMIT :limit"
params["limit"] = limit + 1
# Execute query
result = await self.get_internal_database().execute(query, params)
rows = list(result.rows)
# Check if truncated (got more than limit rows)
truncated = len(rows) > limit
if truncated:
rows = rows[:limit] # Remove the extra row
# Build Resource objects with optional attributes
resources = []
for row in result.rows:
# row[0]=parent, row[1]=child, row[2]=reason (ignored), row[3]=is_private (if requested)
for row in rows:
# row[0]=parent, row[1]=child, row[2]=reason, row[3]=is_private (if requested)
resource = self.resource_for_action(action, parent=row[0], child=row[1])
# Add reasons if requested
if include_reasons:
reason_json = row[2]
try:
reasons_array = (
json.loads(reason_json) if isinstance(reason_json, str) else []
)
resource.reasons = [r for r in reasons_array if r is not None]
except (json.JSONDecodeError, TypeError):
resource.reasons = [reason_json] if reason_json else []
# Add private flag if requested
if include_is_private:
resource.private = bool(row[3])
resources.append(resource)
return resources
# Generate next token if there are more results
next_token = None
if truncated and resources:
last_resource = resources[-1]
# Use tilde-encoding like table pagination
next_token = "{},{}".format(
tilde_encode(str(last_resource.parent)),
tilde_encode(str(last_resource.child)),
)
async def allowed_resources_with_reasons(
self,
action: str,
actor: dict | None = None,
) -> list["AllowedResource"]:
"""
Return allowed resources with permission reasons for debugging.
Uses SQL to filter resources and includes the reason each was allowed.
Returns list of AllowedResource named tuples with (resource, reason).
Example:
debug_info = await datasette.allowed_resources_with_reasons("view-table", actor)
for allowed in debug_info:
print(f"{allowed.resource}: {allowed.reason}")
"""
from datasette.permissions import AllowedResource
action_obj = self.actions.get(action)
if not action_obj:
raise ValueError(f"Unknown action: {action}")
query, params = await self.allowed_resources_sql(action=action, actor=actor)
result = await self.get_internal_database().execute(query, params)
resources = []
for row in result.rows:
resource = self.resource_for_action(action, parent=row[0], child=row[1])
reason_json = row[2]
# Parse JSON array of reasons and filter out nulls
try:
import json
reasons_array = (
json.loads(reason_json) if isinstance(reason_json, str) else []
)
reasons_filtered = [r for r in reasons_array if r is not None]
# Store as list for multiple reasons, or keep empty list
reason = reasons_filtered
except (json.JSONDecodeError, TypeError):
# Fallback for backward compatibility
reason = [reason_json] if reason_json else []
resources.append(AllowedResource(resource=resource, reason=reason))
return resources
return PaginatedResources(
resources=resources,
next=next_token,
_datasette=self,
_action=action,
_actor=actor,
_parent=parent,
_include_is_private=include_is_private,
_include_reasons=include_reasons,
_limit=limit,
)
async def allowed(
self,

View file

@ -16,6 +16,10 @@ class Resource(ABC):
name: str = None # e.g., "table", "database", "model"
parent_name: str | None = None # e.g., "database" for tables
# Instance-level optional extra attributes
reasons: list[str] | None = None
include_reasons: bool | None = None
def __init__(self, parent: str | None = None, child: str | None = None):
"""
Create a resource instance.

View file

@ -4,6 +4,7 @@ import aiofiles
import click
from collections import OrderedDict, namedtuple, Counter
import copy
import dataclasses
import base64
import hashlib
import inspect
@ -27,6 +28,58 @@ from .sqlite import sqlite3, supports_table_xinfo
if typing.TYPE_CHECKING:
from datasette.database import Database
from datasette.permissions import Resource
@dataclasses.dataclass
class PaginatedResources:
"""Paginated results from allowed_resources query."""
resources: List["Resource"]
next: str | None # Keyset token for next page (None if no more results)
_datasette: typing.Any = dataclasses.field(default=None, repr=False)
_action: str = dataclasses.field(default=None, repr=False)
_actor: typing.Any = dataclasses.field(default=None, repr=False)
_parent: str | None = dataclasses.field(default=None, repr=False)
_include_is_private: bool = dataclasses.field(default=False, repr=False)
_include_reasons: bool = dataclasses.field(default=False, repr=False)
_limit: int = dataclasses.field(default=100, repr=False)
async def all(self):
"""
Async generator that yields all resources across all pages.
Automatically handles pagination under the hood. This is useful when you need
to iterate through all results without manually managing pagination tokens.
Yields:
Resource objects one at a time
Example:
page = await datasette.allowed_resources("view-table", actor)
async for table in page.all():
print(f"{table.parent}/{table.child}")
"""
# Yield all resources from current page
for resource in self.resources:
yield resource
# Continue fetching subsequent pages if there are more
next_token = self.next
while next_token:
page = await self._datasette.allowed_resources(
self._action,
self._actor,
parent=self._parent,
include_is_private=self._include_is_private,
include_reasons=self._include_reasons,
limit=self._limit,
next=next_token,
)
for resource in page.resources:
yield resource
next_token = page.next
# From https://www.sqlite.org/lang_keywords.html
reserved_words = set(

View file

@ -70,12 +70,15 @@ class DatabaseView(View):
metadata = await datasette.get_database_metadata(database)
# Get all tables/views this actor can see in bulk with private flag
allowed_tables = await datasette.allowed_resources(
"view-table", request.actor, parent=database, include_is_private=True
allowed_tables_page = await datasette.allowed_resources(
"view-table",
request.actor,
parent=database,
include_is_private=True,
limit=1000,
)
# Create lookup dict for quick access
allowed_dict = {r.child: r for r in allowed_tables}
allowed_dict = {r.child: r for r in allowed_tables_page.resources}
# Filter to just views
view_names_set = set(await db.view_names())
@ -88,14 +91,18 @@ class DatabaseView(View):
tables = await get_tables(datasette, request, db, allowed_dict)
# Get allowed queries using the new permission system
allowed_query_resources = await datasette.allowed_resources(
"view-query", request.actor, parent=database, include_is_private=True
allowed_query_page = await datasette.allowed_resources(
"view-query",
request.actor,
parent=database,
include_is_private=True,
limit=1000,
)
# Build canned_queries list by looking up each allowed query
all_queries = await datasette.get_canned_queries(database, request.actor)
canned_queries = []
for query_resource in allowed_query_resources:
for query_resource in allowed_query_page.resources:
query_name = query_resource.child
if query_name in all_queries:
canned_queries.append(
@ -509,12 +516,15 @@ class QueryView(View):
database = db.name
# Get all tables/views this actor can see in bulk with private flag
allowed_tables = await datasette.allowed_resources(
"view-table", request.actor, parent=database, include_is_private=True
allowed_tables_page = await datasette.allowed_resources(
"view-table",
request.actor,
parent=database,
include_is_private=True,
limit=1000,
)
# Create lookup dict for quick access
allowed_dict = {r.child: r for r in allowed_tables}
allowed_dict = {r.child: r for r in allowed_tables_page.resources}
# Are we a canned query?
canned_query = None

View file

@ -28,17 +28,18 @@ class IndexView(BaseView):
await self.ds.ensure_permission(action="view-instance", actor=request.actor)
# Get all allowed databases and tables in bulk
allowed_databases = await self.ds.allowed_resources(
db_page = await self.ds.allowed_resources(
"view-database", request.actor, include_is_private=True
)
allowed_databases = [r async for r in db_page.all()]
allowed_db_dict = {r.parent: r for r in allowed_databases}
allowed_tables = await self.ds.allowed_resources(
# Group tables by database
tables_by_db = {}
table_page = await self.ds.allowed_resources(
"view-table", request.actor, include_is_private=True
)
# Group by database
tables_by_db = {}
for t in allowed_tables:
async for t in table_page.all():
if t.parent not in tables_by_db:
tables_by_db[t.parent] = {}
tables_by_db[t.parent][t.child] = t

View file

@ -268,19 +268,38 @@ class AllowedResourcesView(BaseView):
offset = (page - 1) * page_size
# Use the simplified allowed_resources method
# If user has debug permission, use the with_reasons variant
# Collect all resources with optional reasons for debugging
try:
if has_debug_permission:
allowed_resources = await self.ds.allowed_resources_with_reasons(
action=action,
actor=actor,
)
else:
allowed_resources = await self.ds.allowed_resources(
action=action,
actor=actor,
parent=parent_filter,
)
allowed_rows = []
result = await self.ds.allowed_resources(
action=action,
actor=actor,
parent=parent_filter,
include_reasons=has_debug_permission,
)
async for resource in result.all():
parent_val = resource.parent
child_val = resource.child
# Build resource path
if parent_val is None:
resource_path = "/"
elif child_val is None:
resource_path = f"/{parent_val}"
else:
resource_path = f"/{parent_val}/{child_val}"
row = {
"parent": parent_val,
"child": child_val,
"resource": resource_path,
}
# Add reason if we have it (from include_reasons=True)
if has_debug_permission and hasattr(resource, "reasons"):
row["reason"] = resource.reasons
allowed_rows.append(row)
except Exception:
# If catalog tables don't exist yet, return empty results
return (
@ -295,46 +314,6 @@ class AllowedResourcesView(BaseView):
200,
)
# Convert to list of dicts with resource path
allowed_rows = []
for item in allowed_resources:
# Extract resource and reason depending on what we got back
if has_debug_permission:
# allowed_resources_with_reasons returns AllowedResource(resource, reason)
resource = item.resource
reason = item.reason
else:
# allowed_resources returns plain Resource objects
resource = item
reason = None
parent_val = resource.parent
child_val = resource.child
# Apply parent filter if needed (when using with_reasons, we need to filter manually)
if parent_filter is not None and parent_val != parent_filter:
continue
# Build resource path
if parent_val is None:
resource_path = "/"
elif child_val is None:
resource_path = f"/{parent_val}"
else:
resource_path = f"/{parent_val}/{child_val}"
row = {
"parent": parent_val,
"child": child_val,
"resource": resource_path,
}
# Add reason if we have it (it's already a list from allowed_resources_with_reasons)
if reason is not None:
row["reason"] = reason
allowed_rows.append(row)
# Apply child filter if specified
if child_filter is not None:
allowed_rows = [row for row in allowed_rows if row["child"] == child_filter]
@ -652,10 +631,11 @@ class CreateTokenView(BaseView):
async def shared(self, request):
self.check_permission(request)
# Build list of databases and tables the user has permission to view
allowed_databases = await self.ds.allowed_resources(
"view-database", request.actor
)
allowed_tables = await self.ds.allowed_resources("view-table", request.actor)
db_page = await self.ds.allowed_resources("view-database", request.actor)
allowed_databases = [r async for r in db_page.all()]
table_page = await self.ds.allowed_resources("view-table", request.actor)
allowed_tables = [r async for r in table_page.all()]
# Build database -> tables mapping
database_with_tables = []