mirror of
https://github.com/simonw/datasette.git
synced 2025-12-10 16:51:24 +01:00
Add keyset pagination to allowed_resources() (#2562)
* Add keyset pagination to allowed_resources() This replaces the unbounded list return with PaginatedResources, which supports efficient keyset pagination for handling thousands of resources. Closes #2560 Changes: - allowed_resources() now returns PaginatedResources instead of list - Added limit (1-1000, default 100) and next (keyset token) parameters - Added include_reasons parameter (replaces allowed_resources_with_reasons) - Removed allowed_resources_with_reasons() method entirely - PaginatedResources.all() async generator for automatic pagination - Uses tilde-encoding for tokens (matching table pagination) - Updated all callers to use .resources accessor - Updated documentation with new API and examples The PaginatedResources object has: - resources: List of Resource objects for current page - next: Token for next page (None if no more results) - all(): Async generator that yields all resources across pages Example usage: page = await ds.allowed_resources("view-table", actor, limit=100) for table in page.resources: print(table.child) # Iterate all pages automatically async for table in page.all(): print(table.child) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
b7ef968c6f
commit
400fa08e4c
10 changed files with 366 additions and 223 deletions
175
datasette/app.py
175
datasette/app.py
|
|
@ -71,6 +71,7 @@ from .url_builder import Urls
|
|||
from .database import Database, QueryInterrupted
|
||||
|
||||
from .utils import (
|
||||
PaginatedResources,
|
||||
PrefixedUrlString,
|
||||
SPATIALITE_FUNCTIONS,
|
||||
StartupError,
|
||||
|
|
@ -91,6 +92,7 @@ from .utils import (
|
|||
resolve_env_secrets,
|
||||
resolve_routes,
|
||||
tilde_decode,
|
||||
tilde_encode,
|
||||
to_css_class,
|
||||
urlsafe_components,
|
||||
redact_keys,
|
||||
|
|
@ -1147,104 +1149,147 @@ class Datasette:
|
|||
*,
|
||||
parent: str | None = None,
|
||||
include_is_private: bool = False,
|
||||
) -> list["Resource"]:
|
||||
include_reasons: bool = False,
|
||||
limit: int = 100,
|
||||
next: str | None = None,
|
||||
) -> PaginatedResources:
|
||||
"""
|
||||
Return all resources the actor can access for the given action.
|
||||
Return paginated resources the actor can access for the given action.
|
||||
|
||||
Uses SQL to filter resources based on cascading permission rules.
|
||||
Returns instances of the appropriate Resource subclass.
|
||||
Uses SQL with keyset pagination to efficiently filter resources.
|
||||
Returns PaginatedResources with list of Resource instances and pagination metadata.
|
||||
|
||||
Args:
|
||||
action: The action name (e.g., "view-table")
|
||||
actor: The actor dict (or None for unauthenticated)
|
||||
parent: Optional parent filter (e.g., database name) to limit results
|
||||
include_is_private: If True, adds a .private attribute to each Resource
|
||||
include_reasons: If True, adds a .reasons attribute with List[str] of permission reasons
|
||||
limit: Maximum number of results to return (1-1000, default 100)
|
||||
next: Keyset token from previous page for pagination
|
||||
|
||||
Returns:
|
||||
PaginatedResources with:
|
||||
- resources: List of Resource objects for this page
|
||||
- next: Token for next page (None if no more results)
|
||||
|
||||
Example:
|
||||
# Get all tables
|
||||
tables = await datasette.allowed_resources("view-table", actor)
|
||||
for table in tables:
|
||||
# Get first page of tables
|
||||
page = await datasette.allowed_resources("view-table", actor, limit=50)
|
||||
for table in page.resources:
|
||||
print(f"{table.parent}/{table.child}")
|
||||
|
||||
# Get tables for specific database with private flag
|
||||
tables = await datasette.allowed_resources(
|
||||
"view-table", actor, parent="mydb", include_is_private=True
|
||||
# Get next page
|
||||
if page.next:
|
||||
next_page = await datasette.allowed_resources(
|
||||
"view-table", actor, limit=50, next=page.next
|
||||
)
|
||||
|
||||
# With reasons for debugging
|
||||
page = await datasette.allowed_resources(
|
||||
"view-table", actor, include_reasons=True
|
||||
)
|
||||
for table in tables:
|
||||
if table.private:
|
||||
print(f"{table.child} is private")
|
||||
for table in page.resources:
|
||||
print(f"{table.child}: {table.reasons}")
|
||||
|
||||
# Iterate through all results with async generator
|
||||
page = await datasette.allowed_resources("view-table", actor)
|
||||
async for table in page.all():
|
||||
print(table.child)
|
||||
"""
|
||||
|
||||
action_obj = self.actions.get(action)
|
||||
if not action_obj:
|
||||
raise ValueError(f"Unknown action: {action}")
|
||||
|
||||
# Validate and cap limit
|
||||
limit = min(max(1, limit), 1000)
|
||||
|
||||
# Get base SQL query
|
||||
query, params = await self.allowed_resources_sql(
|
||||
action=action,
|
||||
actor=actor,
|
||||
parent=parent,
|
||||
include_is_private=include_is_private,
|
||||
)
|
||||
result = await self.get_internal_database().execute(query, params)
|
||||
|
||||
# Instantiate the appropriate Resource subclass for each row
|
||||
# Add keyset pagination WHERE clause if next token provided
|
||||
if next:
|
||||
try:
|
||||
components = urlsafe_components(next)
|
||||
if len(components) >= 2:
|
||||
last_parent, last_child = components[0], components[1]
|
||||
# Keyset condition: (parent > last) OR (parent = last AND child > last)
|
||||
keyset_where = """
|
||||
(parent > :keyset_parent OR
|
||||
(parent = :keyset_parent AND child > :keyset_child))
|
||||
"""
|
||||
# Wrap original query and add keyset filter
|
||||
query = f"SELECT * FROM ({query}) WHERE {keyset_where}"
|
||||
params["keyset_parent"] = last_parent
|
||||
params["keyset_child"] = last_child
|
||||
except (ValueError, KeyError):
|
||||
# Invalid token - ignore and start from beginning
|
||||
pass
|
||||
|
||||
# Add LIMIT (fetch limit+1 to detect if there are more results)
|
||||
# Note: query from allowed_resources_sql() already includes ORDER BY parent, child
|
||||
query = f"{query} LIMIT :limit"
|
||||
params["limit"] = limit + 1
|
||||
|
||||
# Execute query
|
||||
result = await self.get_internal_database().execute(query, params)
|
||||
rows = list(result.rows)
|
||||
|
||||
# Check if truncated (got more than limit rows)
|
||||
truncated = len(rows) > limit
|
||||
if truncated:
|
||||
rows = rows[:limit] # Remove the extra row
|
||||
|
||||
# Build Resource objects with optional attributes
|
||||
resources = []
|
||||
for row in result.rows:
|
||||
# row[0]=parent, row[1]=child, row[2]=reason (ignored), row[3]=is_private (if requested)
|
||||
for row in rows:
|
||||
# row[0]=parent, row[1]=child, row[2]=reason, row[3]=is_private (if requested)
|
||||
resource = self.resource_for_action(action, parent=row[0], child=row[1])
|
||||
|
||||
# Add reasons if requested
|
||||
if include_reasons:
|
||||
reason_json = row[2]
|
||||
try:
|
||||
reasons_array = (
|
||||
json.loads(reason_json) if isinstance(reason_json, str) else []
|
||||
)
|
||||
resource.reasons = [r for r in reasons_array if r is not None]
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
resource.reasons = [reason_json] if reason_json else []
|
||||
|
||||
# Add private flag if requested
|
||||
if include_is_private:
|
||||
resource.private = bool(row[3])
|
||||
|
||||
resources.append(resource)
|
||||
|
||||
return resources
|
||||
# Generate next token if there are more results
|
||||
next_token = None
|
||||
if truncated and resources:
|
||||
last_resource = resources[-1]
|
||||
# Use tilde-encoding like table pagination
|
||||
next_token = "{},{}".format(
|
||||
tilde_encode(str(last_resource.parent)),
|
||||
tilde_encode(str(last_resource.child)),
|
||||
)
|
||||
|
||||
async def allowed_resources_with_reasons(
|
||||
self,
|
||||
action: str,
|
||||
actor: dict | None = None,
|
||||
) -> list["AllowedResource"]:
|
||||
"""
|
||||
Return allowed resources with permission reasons for debugging.
|
||||
|
||||
Uses SQL to filter resources and includes the reason each was allowed.
|
||||
Returns list of AllowedResource named tuples with (resource, reason).
|
||||
|
||||
Example:
|
||||
debug_info = await datasette.allowed_resources_with_reasons("view-table", actor)
|
||||
for allowed in debug_info:
|
||||
print(f"{allowed.resource}: {allowed.reason}")
|
||||
"""
|
||||
from datasette.permissions import AllowedResource
|
||||
|
||||
action_obj = self.actions.get(action)
|
||||
if not action_obj:
|
||||
raise ValueError(f"Unknown action: {action}")
|
||||
|
||||
query, params = await self.allowed_resources_sql(action=action, actor=actor)
|
||||
result = await self.get_internal_database().execute(query, params)
|
||||
|
||||
resources = []
|
||||
for row in result.rows:
|
||||
resource = self.resource_for_action(action, parent=row[0], child=row[1])
|
||||
reason_json = row[2]
|
||||
|
||||
# Parse JSON array of reasons and filter out nulls
|
||||
try:
|
||||
import json
|
||||
|
||||
reasons_array = (
|
||||
json.loads(reason_json) if isinstance(reason_json, str) else []
|
||||
)
|
||||
reasons_filtered = [r for r in reasons_array if r is not None]
|
||||
# Store as list for multiple reasons, or keep empty list
|
||||
reason = reasons_filtered
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Fallback for backward compatibility
|
||||
reason = [reason_json] if reason_json else []
|
||||
|
||||
resources.append(AllowedResource(resource=resource, reason=reason))
|
||||
|
||||
return resources
|
||||
return PaginatedResources(
|
||||
resources=resources,
|
||||
next=next_token,
|
||||
_datasette=self,
|
||||
_action=action,
|
||||
_actor=actor,
|
||||
_parent=parent,
|
||||
_include_is_private=include_is_private,
|
||||
_include_reasons=include_reasons,
|
||||
_limit=limit,
|
||||
)
|
||||
|
||||
async def allowed(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -16,6 +16,10 @@ class Resource(ABC):
|
|||
name: str = None # e.g., "table", "database", "model"
|
||||
parent_name: str | None = None # e.g., "database" for tables
|
||||
|
||||
# Instance-level optional extra attributes
|
||||
reasons: list[str] | None = None
|
||||
include_reasons: bool | None = None
|
||||
|
||||
def __init__(self, parent: str | None = None, child: str | None = None):
|
||||
"""
|
||||
Create a resource instance.
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import aiofiles
|
|||
import click
|
||||
from collections import OrderedDict, namedtuple, Counter
|
||||
import copy
|
||||
import dataclasses
|
||||
import base64
|
||||
import hashlib
|
||||
import inspect
|
||||
|
|
@ -27,6 +28,58 @@ from .sqlite import sqlite3, supports_table_xinfo
|
|||
|
||||
if typing.TYPE_CHECKING:
|
||||
from datasette.database import Database
|
||||
from datasette.permissions import Resource
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PaginatedResources:
|
||||
"""Paginated results from allowed_resources query."""
|
||||
|
||||
resources: List["Resource"]
|
||||
next: str | None # Keyset token for next page (None if no more results)
|
||||
_datasette: typing.Any = dataclasses.field(default=None, repr=False)
|
||||
_action: str = dataclasses.field(default=None, repr=False)
|
||||
_actor: typing.Any = dataclasses.field(default=None, repr=False)
|
||||
_parent: str | None = dataclasses.field(default=None, repr=False)
|
||||
_include_is_private: bool = dataclasses.field(default=False, repr=False)
|
||||
_include_reasons: bool = dataclasses.field(default=False, repr=False)
|
||||
_limit: int = dataclasses.field(default=100, repr=False)
|
||||
|
||||
async def all(self):
|
||||
"""
|
||||
Async generator that yields all resources across all pages.
|
||||
|
||||
Automatically handles pagination under the hood. This is useful when you need
|
||||
to iterate through all results without manually managing pagination tokens.
|
||||
|
||||
Yields:
|
||||
Resource objects one at a time
|
||||
|
||||
Example:
|
||||
page = await datasette.allowed_resources("view-table", actor)
|
||||
async for table in page.all():
|
||||
print(f"{table.parent}/{table.child}")
|
||||
"""
|
||||
# Yield all resources from current page
|
||||
for resource in self.resources:
|
||||
yield resource
|
||||
|
||||
# Continue fetching subsequent pages if there are more
|
||||
next_token = self.next
|
||||
while next_token:
|
||||
page = await self._datasette.allowed_resources(
|
||||
self._action,
|
||||
self._actor,
|
||||
parent=self._parent,
|
||||
include_is_private=self._include_is_private,
|
||||
include_reasons=self._include_reasons,
|
||||
limit=self._limit,
|
||||
next=next_token,
|
||||
)
|
||||
for resource in page.resources:
|
||||
yield resource
|
||||
next_token = page.next
|
||||
|
||||
|
||||
# From https://www.sqlite.org/lang_keywords.html
|
||||
reserved_words = set(
|
||||
|
|
|
|||
|
|
@ -70,12 +70,15 @@ class DatabaseView(View):
|
|||
metadata = await datasette.get_database_metadata(database)
|
||||
|
||||
# Get all tables/views this actor can see in bulk with private flag
|
||||
|
||||
allowed_tables = await datasette.allowed_resources(
|
||||
"view-table", request.actor, parent=database, include_is_private=True
|
||||
allowed_tables_page = await datasette.allowed_resources(
|
||||
"view-table",
|
||||
request.actor,
|
||||
parent=database,
|
||||
include_is_private=True,
|
||||
limit=1000,
|
||||
)
|
||||
# Create lookup dict for quick access
|
||||
allowed_dict = {r.child: r for r in allowed_tables}
|
||||
allowed_dict = {r.child: r for r in allowed_tables_page.resources}
|
||||
|
||||
# Filter to just views
|
||||
view_names_set = set(await db.view_names())
|
||||
|
|
@ -88,14 +91,18 @@ class DatabaseView(View):
|
|||
tables = await get_tables(datasette, request, db, allowed_dict)
|
||||
|
||||
# Get allowed queries using the new permission system
|
||||
allowed_query_resources = await datasette.allowed_resources(
|
||||
"view-query", request.actor, parent=database, include_is_private=True
|
||||
allowed_query_page = await datasette.allowed_resources(
|
||||
"view-query",
|
||||
request.actor,
|
||||
parent=database,
|
||||
include_is_private=True,
|
||||
limit=1000,
|
||||
)
|
||||
|
||||
# Build canned_queries list by looking up each allowed query
|
||||
all_queries = await datasette.get_canned_queries(database, request.actor)
|
||||
canned_queries = []
|
||||
for query_resource in allowed_query_resources:
|
||||
for query_resource in allowed_query_page.resources:
|
||||
query_name = query_resource.child
|
||||
if query_name in all_queries:
|
||||
canned_queries.append(
|
||||
|
|
@ -509,12 +516,15 @@ class QueryView(View):
|
|||
database = db.name
|
||||
|
||||
# Get all tables/views this actor can see in bulk with private flag
|
||||
|
||||
allowed_tables = await datasette.allowed_resources(
|
||||
"view-table", request.actor, parent=database, include_is_private=True
|
||||
allowed_tables_page = await datasette.allowed_resources(
|
||||
"view-table",
|
||||
request.actor,
|
||||
parent=database,
|
||||
include_is_private=True,
|
||||
limit=1000,
|
||||
)
|
||||
# Create lookup dict for quick access
|
||||
allowed_dict = {r.child: r for r in allowed_tables}
|
||||
allowed_dict = {r.child: r for r in allowed_tables_page.resources}
|
||||
|
||||
# Are we a canned query?
|
||||
canned_query = None
|
||||
|
|
|
|||
|
|
@ -28,17 +28,18 @@ class IndexView(BaseView):
|
|||
await self.ds.ensure_permission(action="view-instance", actor=request.actor)
|
||||
|
||||
# Get all allowed databases and tables in bulk
|
||||
allowed_databases = await self.ds.allowed_resources(
|
||||
db_page = await self.ds.allowed_resources(
|
||||
"view-database", request.actor, include_is_private=True
|
||||
)
|
||||
allowed_databases = [r async for r in db_page.all()]
|
||||
allowed_db_dict = {r.parent: r for r in allowed_databases}
|
||||
|
||||
allowed_tables = await self.ds.allowed_resources(
|
||||
# Group tables by database
|
||||
tables_by_db = {}
|
||||
table_page = await self.ds.allowed_resources(
|
||||
"view-table", request.actor, include_is_private=True
|
||||
)
|
||||
# Group by database
|
||||
tables_by_db = {}
|
||||
for t in allowed_tables:
|
||||
async for t in table_page.all():
|
||||
if t.parent not in tables_by_db:
|
||||
tables_by_db[t.parent] = {}
|
||||
tables_by_db[t.parent][t.child] = t
|
||||
|
|
|
|||
|
|
@ -268,19 +268,38 @@ class AllowedResourcesView(BaseView):
|
|||
offset = (page - 1) * page_size
|
||||
|
||||
# Use the simplified allowed_resources method
|
||||
# If user has debug permission, use the with_reasons variant
|
||||
# Collect all resources with optional reasons for debugging
|
||||
try:
|
||||
if has_debug_permission:
|
||||
allowed_resources = await self.ds.allowed_resources_with_reasons(
|
||||
action=action,
|
||||
actor=actor,
|
||||
)
|
||||
else:
|
||||
allowed_resources = await self.ds.allowed_resources(
|
||||
action=action,
|
||||
actor=actor,
|
||||
parent=parent_filter,
|
||||
)
|
||||
allowed_rows = []
|
||||
result = await self.ds.allowed_resources(
|
||||
action=action,
|
||||
actor=actor,
|
||||
parent=parent_filter,
|
||||
include_reasons=has_debug_permission,
|
||||
)
|
||||
async for resource in result.all():
|
||||
parent_val = resource.parent
|
||||
child_val = resource.child
|
||||
|
||||
# Build resource path
|
||||
if parent_val is None:
|
||||
resource_path = "/"
|
||||
elif child_val is None:
|
||||
resource_path = f"/{parent_val}"
|
||||
else:
|
||||
resource_path = f"/{parent_val}/{child_val}"
|
||||
|
||||
row = {
|
||||
"parent": parent_val,
|
||||
"child": child_val,
|
||||
"resource": resource_path,
|
||||
}
|
||||
|
||||
# Add reason if we have it (from include_reasons=True)
|
||||
if has_debug_permission and hasattr(resource, "reasons"):
|
||||
row["reason"] = resource.reasons
|
||||
|
||||
allowed_rows.append(row)
|
||||
except Exception:
|
||||
# If catalog tables don't exist yet, return empty results
|
||||
return (
|
||||
|
|
@ -295,46 +314,6 @@ class AllowedResourcesView(BaseView):
|
|||
200,
|
||||
)
|
||||
|
||||
# Convert to list of dicts with resource path
|
||||
allowed_rows = []
|
||||
for item in allowed_resources:
|
||||
# Extract resource and reason depending on what we got back
|
||||
if has_debug_permission:
|
||||
# allowed_resources_with_reasons returns AllowedResource(resource, reason)
|
||||
resource = item.resource
|
||||
reason = item.reason
|
||||
else:
|
||||
# allowed_resources returns plain Resource objects
|
||||
resource = item
|
||||
reason = None
|
||||
|
||||
parent_val = resource.parent
|
||||
child_val = resource.child
|
||||
|
||||
# Apply parent filter if needed (when using with_reasons, we need to filter manually)
|
||||
if parent_filter is not None and parent_val != parent_filter:
|
||||
continue
|
||||
|
||||
# Build resource path
|
||||
if parent_val is None:
|
||||
resource_path = "/"
|
||||
elif child_val is None:
|
||||
resource_path = f"/{parent_val}"
|
||||
else:
|
||||
resource_path = f"/{parent_val}/{child_val}"
|
||||
|
||||
row = {
|
||||
"parent": parent_val,
|
||||
"child": child_val,
|
||||
"resource": resource_path,
|
||||
}
|
||||
|
||||
# Add reason if we have it (it's already a list from allowed_resources_with_reasons)
|
||||
if reason is not None:
|
||||
row["reason"] = reason
|
||||
|
||||
allowed_rows.append(row)
|
||||
|
||||
# Apply child filter if specified
|
||||
if child_filter is not None:
|
||||
allowed_rows = [row for row in allowed_rows if row["child"] == child_filter]
|
||||
|
|
@ -652,10 +631,11 @@ class CreateTokenView(BaseView):
|
|||
async def shared(self, request):
|
||||
self.check_permission(request)
|
||||
# Build list of databases and tables the user has permission to view
|
||||
allowed_databases = await self.ds.allowed_resources(
|
||||
"view-database", request.actor
|
||||
)
|
||||
allowed_tables = await self.ds.allowed_resources("view-table", request.actor)
|
||||
db_page = await self.ds.allowed_resources("view-database", request.actor)
|
||||
allowed_databases = [r async for r in db_page.all()]
|
||||
|
||||
table_page = await self.ds.allowed_resources("view-table", request.actor)
|
||||
allowed_tables = [r async for r in table_page.all()]
|
||||
|
||||
# Build database -> tables mapping
|
||||
database_with_tables = []
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue