Add keyset pagination to allowed_resources() (#2562)

* Add keyset pagination to allowed_resources()

This replaces the unbounded list return with PaginatedResources,
which supports efficient keyset pagination for handling thousands
of resources.

Closes #2560

Changes:
- allowed_resources() now returns PaginatedResources instead of list
- Added limit (1-1000, default 100) and next (keyset token) parameters
- Added include_reasons parameter (replaces allowed_resources_with_reasons)
- Removed allowed_resources_with_reasons() method entirely
- PaginatedResources.all() async generator for automatic pagination
- Uses tilde-encoding for tokens (matching table pagination)
- Updated all callers to use .resources accessor
- Updated documentation with new API and examples

The PaginatedResources object has:
- resources: List of Resource objects for current page
- next: Token for next page (None if no more results)
- all(): Async generator that yields all resources across pages

Example usage:
    page = await ds.allowed_resources("view-table", actor, limit=100)
    for table in page.resources:
        print(table.child)

    # Iterate all pages automatically
    async for table in page.all():
        print(table.child)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Simon Willison 2025-10-31 14:50:46 -07:00 committed by GitHub
commit 400fa08e4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 366 additions and 223 deletions

View file

@ -2,9 +2,9 @@
Tests for the new Resource-based permission system.
These tests verify:
1. The new Datasette.allowed_resources() method
1. The new Datasette.allowed_resources() method (with pagination)
2. The new Datasette.allowed() method
3. The new Datasette.allowed_resources_with_reasons() method
3. The include_reasons parameter for debugging
4. That SQL does the heavy lifting (no Python filtering)
"""
@ -71,7 +71,8 @@ async def test_allowed_resources_global_allow(test_ds):
try:
# Use the new allowed_resources() method
tables = await test_ds.allowed_resources("view-table", {"id": "alice"})
result = await test_ds.allowed_resources("view-table", {"id": "alice"})
tables = result.resources
# Alice should see all tables
assert len(tables) == 5
@ -133,9 +134,7 @@ async def test_allowed_specific_resource(test_ds):
@pytest.mark.asyncio
async def test_allowed_resources_with_reasons(test_ds):
"""Test allowed_resources_with_reasons() exposes debugging info"""
async def test_allowed_resources_include_reasons(test_ds):
def rules_callback(datasette, actor, action):
if actor and actor.get("role") == "analyst":
sql = """
@ -152,21 +151,22 @@ async def test_allowed_resources_with_reasons(test_ds):
pm.register(plugin, name="test_plugin")
try:
# Use allowed_resources_with_reasons to get debugging info
allowed = await test_ds.allowed_resources_with_reasons(
"view-table", {"id": "bob", "role": "analyst"}
# Use allowed_resources with include_reasons to get debugging info
result = await test_ds.allowed_resources(
"view-table", {"id": "bob", "role": "analyst"}, include_reasons=True
)
allowed = result.resources
# Should get analytics tables except sensitive
assert len(allowed) >= 2 # At least users and events
# Check we can access both resource and reason
for item in allowed:
assert isinstance(item.resource, TableResource)
assert isinstance(item.reason, list)
if item.resource.parent == "analytics":
for resource in allowed:
assert isinstance(resource, TableResource)
assert isinstance(resource.reasons, list)
if resource.parent == "analytics":
# Should mention parent-level reason in at least one of the reasons
reasons_text = " ".join(item.reason).lower()
reasons_text = " ".join(resource.reasons).lower()
assert "analyst access" in reasons_text
finally:
@ -194,7 +194,8 @@ async def test_child_deny_overrides_parent_allow(test_ds):
try:
actor = {"id": "bob", "role": "analyst"}
tables = await test_ds.allowed_resources("view-table", actor)
result = await test_ds.allowed_resources("view-table", actor)
tables = result.resources
# Should see analytics tables except sensitive
analytics_tables = [t for t in tables if t.parent == "analytics"]
@ -242,7 +243,8 @@ async def test_child_allow_overrides_parent_deny(test_ds):
try:
actor = {"id": "carol"}
tables = await test_ds.allowed_resources("view-table", actor)
result = await test_ds.allowed_resources("view-table", actor)
tables = result.resources
# Should only see production.orders
production_tables = [t for t in tables if t.parent == "production"]
@ -305,7 +307,8 @@ async def test_sql_does_filtering_not_python(test_ds):
)
# allowed_resources() should also use SQL filtering
tables = await test_ds.allowed_resources("view-table", actor)
result = await test_ds.allowed_resources("view-table", actor)
tables = result.resources
assert len(tables) == 1
assert tables[0].parent == "analytics"
assert tables[0].child == "users"

View file

@ -66,7 +66,7 @@ async def test_tables_endpoint_global_access(test_ds):
try:
# Use the allowed_resources API directly
tables = await test_ds.allowed_resources("view-table", {"id": "alice"})
page = await test_ds.allowed_resources("view-table", {"id": "alice"})
# Convert to the format the endpoint returns
result = [
@ -74,7 +74,7 @@ async def test_tables_endpoint_global_access(test_ds):
"name": f"{t.parent}/{t.child}",
"url": test_ds.urls.table(t.parent, t.child),
}
for t in tables
for t in page.resources
]
# Alice should see all tables
@ -105,7 +105,7 @@ async def test_tables_endpoint_database_restriction(test_ds):
pm.register(plugin, name="test_plugin")
try:
tables = await test_ds.allowed_resources(
page = await test_ds.allowed_resources(
"view-table", {"id": "bob", "role": "analyst"}
)
result = [
@ -113,7 +113,7 @@ async def test_tables_endpoint_database_restriction(test_ds):
"name": f"{t.parent}/{t.child}",
"url": test_ds.urls.table(t.parent, t.child),
}
for t in tables
for t in page.resources
]
# Bob should only see analytics tables
@ -152,13 +152,13 @@ async def test_tables_endpoint_table_exception(test_ds):
pm.register(plugin, name="test_plugin")
try:
tables = await test_ds.allowed_resources("view-table", {"id": "carol"})
page = await test_ds.allowed_resources("view-table", {"id": "carol"})
result = [
{
"name": f"{t.parent}/{t.child}",
"url": test_ds.urls.table(t.parent, t.child),
}
for t in tables
for t in page.resources
]
# Carol should see analytics.users but not other analytics tables
@ -194,7 +194,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds):
pm.register(plugin, name="test_plugin")
try:
tables = await test_ds.allowed_resources(
page = await test_ds.allowed_resources(
"view-table", {"id": "bob", "role": "analyst"}
)
result = [
@ -202,7 +202,7 @@ async def test_tables_endpoint_deny_overrides_allow(test_ds):
"name": f"{t.parent}/{t.child}",
"url": test_ds.urls.table(t.parent, t.child),
}
for t in tables
for t in page.resources
]
analytics_tables = [m for m in result if m["name"].startswith("analytics/")]
@ -230,10 +230,10 @@ async def test_tables_endpoint_no_permissions():
await ds._refresh_schemas()
# Unknown actor with no custom permissions
tables = await ds.allowed_resources("view-table", {"id": "unknown"})
page = await ds.allowed_resources("view-table", {"id": "unknown"})
result = [
{"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)}
for t in tables
for t in page.resources
]
# Should see tables (due to default_permissions.py providing default allow)
@ -260,13 +260,13 @@ async def test_tables_endpoint_specific_table_only(test_ds):
pm.register(plugin, name="test_plugin")
try:
tables = await test_ds.allowed_resources("view-table", {"id": "dave"})
page = await test_ds.allowed_resources("view-table", {"id": "dave"})
result = [
{
"name": f"{t.parent}/{t.child}",
"url": test_ds.urls.table(t.parent, t.child),
}
for t in tables
for t in page.resources
]
# Should see only the two specifically allowed tables
@ -298,13 +298,13 @@ async def test_tables_endpoint_empty_result(test_ds):
pm.register(plugin, name="test_plugin")
try:
tables = await test_ds.allowed_resources("view-table", {"id": "blocked"})
page = await test_ds.allowed_resources("view-table", {"id": "blocked"})
result = [
{
"name": f"{t.parent}/{t.child}",
"url": test_ds.urls.table(t.parent, t.child),
}
for t in tables
for t in page.resources
]
# Global deny should block access to all tables
@ -328,11 +328,11 @@ async def test_tables_endpoint_no_query_returns_all():
await ds._refresh_schemas()
# Get all tables without query
all_tables = await ds.allowed_resources("view-table", None)
page = await ds.allowed_resources("view-table", None)
# Should return all tables with truncated: false
assert len(all_tables) >= 3
table_names = {f"{t.parent}/{t.child}" for t in all_tables}
assert len(page.resources) >= 3
table_names = {f"{t.parent}/{t.child}" for t in page.resources}
assert "test_db/users" in table_names
assert "test_db/posts" in table_names
assert "test_db/comments" in table_names
@ -350,12 +350,13 @@ async def test_tables_endpoint_truncation():
await db.execute_write(f"CREATE TABLE table_{i:03d} (id INTEGER)")
await ds._refresh_schemas()
# Get all tables - should be truncated
all_tables = await ds.allowed_resources("view-table", None)
big_db_tables = [t for t in all_tables if t.parent == "big_db"]
# Get all tables - should be paginated with limit=100 by default
page = await ds.allowed_resources("view-table", None)
big_db_tables = [t for t in page.resources if t.parent == "big_db"]
# Should have exactly 105 tables in the database
assert len(big_db_tables) == 105
# Should have exactly 100 tables in first page (default limit)
assert len(big_db_tables) == 100
assert page.next is not None # More results available
@pytest.mark.asyncio
@ -374,10 +375,10 @@ async def test_tables_endpoint_search_single_term():
await ds._refresh_schemas()
# Get all tables in the new format
all_tables = await ds.allowed_resources("view-table", None)
page = await ds.allowed_resources("view-table", None)
matches = [
{"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)}
for t in all_tables
for t in page.resources
]
# Filter for "user" (extract table name from "db/table")
@ -411,10 +412,10 @@ async def test_tables_endpoint_search_multiple_terms():
await ds._refresh_schemas()
# Get all tables in the new format
all_tables = await ds.allowed_resources("view-table", None)
page = await ds.allowed_resources("view-table", None)
matches = [
{"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)}
for t in all_tables
for t in page.resources
]
# Filter for "user profile" (two terms, extract table name from "db/table")
@ -453,10 +454,10 @@ async def test_tables_endpoint_search_ordering():
await ds._refresh_schemas()
# Get all tables in the new format
all_tables = await ds.allowed_resources("view-table", None)
page = await ds.allowed_resources("view-table", None)
matches = [
{"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)}
for t in all_tables
for t in page.resources
]
# Filter for "user" and sort by table name length
@ -490,10 +491,10 @@ async def test_tables_endpoint_search_case_insensitive():
await ds._refresh_schemas()
# Get all tables in the new format
all_tables = await ds.allowed_resources("view-table", None)
page = await ds.allowed_resources("view-table", None)
matches = [
{"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)}
for t in all_tables
for t in page.resources
]
# Filter for "user" (lowercase) should match all case variants
@ -525,10 +526,10 @@ async def test_tables_endpoint_search_no_matches():
await ds._refresh_schemas()
# Get all tables in the new format
all_tables = await ds.allowed_resources("view-table", None)
page = await ds.allowed_resources("view-table", None)
matches = [
{"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)}
for t in all_tables
for t in page.resources
]
# Filter for "zzz" which doesn't exist
@ -563,10 +564,10 @@ async def test_tables_endpoint_config_database_allow():
await ds._refresh_schemas()
# Root user should see restricted_db tables
root_tables = await ds.allowed_resources("view-table", {"id": "root"})
root_page = await ds.allowed_resources("view-table", {"id": "root"})
root_list = [
{"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)}
for t in root_tables
for t in root_page.resources
]
restricted_tables_root = [
m for m in root_list if m["name"].startswith("restricted_db/")
@ -577,10 +578,10 @@ async def test_tables_endpoint_config_database_allow():
assert "restricted_db/posts" in table_names
# Alice should NOT see restricted_db tables
alice_tables = await ds.allowed_resources("view-table", {"id": "alice"})
alice_page = await ds.allowed_resources("view-table", {"id": "alice"})
alice_list = [
{"name": f"{t.parent}/{t.child}", "url": ds.urls.table(t.parent, t.child)}
for t in alice_tables
for t in alice_page.resources
]
restricted_tables_alice = [
m for m in alice_list if m["name"].startswith("restricted_db/")

View file

@ -1327,14 +1327,14 @@ async def test_actor_restrictions_filters_allowed_resources(perms_ds):
actor = {"id": "user", "_r": {"r": {"perms_ds_one": {"t1": ["vt"]}}}}
# Should only return t1
allowed_tables = await perms_ds.allowed_resources("view-table", actor)
assert len(allowed_tables) == 1
assert allowed_tables[0].parent == "perms_ds_one"
assert allowed_tables[0].child == "t1"
page = await perms_ds.allowed_resources("view-table", actor)
assert len(page.resources) == 1
assert page.resources[0].parent == "perms_ds_one"
assert page.resources[0].child == "t1"
# Database listing should be empty (no view-database permission)
allowed_dbs = await perms_ds.allowed_resources("view-database", actor)
assert len(allowed_dbs) == 0
db_page = await perms_ds.allowed_resources("view-database", actor)
assert len(db_page.resources) == 0
@pytest.mark.asyncio
@ -1343,12 +1343,10 @@ async def test_actor_restrictions_database_level(perms_ds):
actor = {"id": "user", "_r": {"d": {"perms_ds_one": ["vt"]}}}
allowed_tables = await perms_ds.allowed_resources(
"view-table", actor, parent="perms_ds_one"
)
page = await perms_ds.allowed_resources("view-table", actor, parent="perms_ds_one")
# Should return all tables in perms_ds_one
table_names = {r.child for r in allowed_tables}
table_names = {r.child for r in page.resources}
assert "t1" in table_names
assert "t2" in table_names
assert "v1" in table_names # views too
@ -1360,11 +1358,11 @@ async def test_actor_restrictions_global_level(perms_ds):
actor = {"id": "user", "_r": {"a": ["vt"]}}
allowed_tables = await perms_ds.allowed_resources("view-table", actor)
page = await perms_ds.allowed_resources("view-table", actor)
# Should return all tables in all databases
assert len(allowed_tables) > 0
dbs = {r.parent for r in allowed_tables}
assert len(page.resources) > 0
dbs = {r.parent for r in page.resources}
assert "perms_ds_one" in dbs
assert "perms_ds_two" in dbs
@ -1430,8 +1428,8 @@ async def test_actor_restrictions_view_instance_only(perms_ds):
data = response.json()
# The instance is visible but databases list should be empty or minimal
# Actually, let's check via allowed_resources
allowed_dbs = await perms_ds.allowed_resources("view-database", actor)
assert len(allowed_dbs) == 0
page = await perms_ds.allowed_resources("view-database", actor)
assert len(page.resources) == 0
@pytest.mark.asyncio
@ -1441,11 +1439,11 @@ async def test_actor_restrictions_empty_allowlist(perms_ds):
actor = {"id": "user", "_r": {}}
# No actions in allowlist, so everything should be denied
allowed_tables = await perms_ds.allowed_resources("view-table", actor)
assert len(allowed_tables) == 0
page1 = await perms_ds.allowed_resources("view-table", actor)
assert len(page1.resources) == 0
allowed_dbs = await perms_ds.allowed_resources("view-database", actor)
assert len(allowed_dbs) == 0
page2 = await perms_ds.allowed_resources("view-database", actor)
assert len(page2.resources) == 0
result = await perms_ds.allowed(action="view-instance", actor=actor)
assert result is False