Fix startup hook to fire after metadata and schema tables are populated (#2666)

* Fix startup hook to fire after metadata and schema tables are populated

Previously, the startup() plugin hook fired before internal database
tables were populated from metadata.yaml and before catalog schema
tables were filled. This meant plugins couldn't read or modify metadata
during startup. Now invoke_startup() calls refresh_schemas() before
firing startup hooks, ensuring metadata and catalog tables are available.

* Fix startup hook to fire after metadata and schema tables are populated

Previously, the startup() plugin hook fired before internal database
tables were populated from metadata.yaml and before catalog schema
tables were filled. This meant plugins couldn't read or modify metadata
during startup. Now invoke_startup() calls _refresh_schemas() before
firing startup hooks, ensuring metadata and catalog tables are available.

Updated test_tracer to reflect that internal DB creation SQL now runs
during startup rather than during the first traced request.

* Move check_databases before invoke_startup in CLI serve

Since invoke_startup now calls _refresh_schemas() which queries each
database, the spatialite connection check must run first to provide
the friendly error message instead of a raw OperationalError.

https://claude.ai/code/session_01KL4t5FZYb32rZY7xaqrrZU
This commit is contained in:
Simon Willison 2026-03-16 17:56:40 -07:00 committed by GitHub
commit 7f93353549
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 35 additions and 22 deletions

View file

@ -696,6 +696,8 @@ class Datasette:
env=self._jinja_env, datasette=self
):
await await_me_maybe(hook)
# Ensure internal tables and metadata are populated before startup hooks
await self._refresh_schemas()
for hook in pm.hook.startup(datasette=self):
await await_me_maybe(hook)
self._startup_invoked = True

View file

@ -661,15 +661,16 @@ def serve(
# Private utility mechanism for writing unit tests
return ds
# Run async soundness checks before startup hooks, since invoke_startup
# now populates internal tables which requires querying each database
run_sync(lambda: check_databases(ds))
# Run the "startup" plugin hooks
try:
run_sync(ds.invoke_startup)
except StartupError as e:
raise click.ClickException(e.args[0])
# Run async soundness checks - but only if we're not under pytest
run_sync(lambda: check_databases(ds))
if headers and not get:
raise click.ClickException("--headers can only be used with --get")

View file

@ -127,6 +127,18 @@ def startup(datasette):
internal_db = datasette.get_internal_database()
result = await internal_db.execute("select 1 + 1")
datasette._startup_hook_calculation = result.first()[0]
# Check that metadata tables have been populated before startup fires
metadata_rows = await internal_db.execute(
"select key, value from metadata_instance"
)
datasette._startup_metadata_keys = [row["key"] for row in metadata_rows]
# Check that catalog/schema tables have been populated before startup fires
catalog_rows = await internal_db.execute(
"select database_name from catalog_databases"
)
datasette._startup_catalog_databases = [
row["database_name"] for row in catalog_rows
]
return inner

View file

@ -862,6 +862,18 @@ async def test_hook_startup(ds_client):
assert 2 == ds_client.ds._startup_hook_calculation
@pytest.mark.asyncio
async def test_hook_startup_metadata_available(ds_client):
# Metadata from metadata.yaml should be populated before startup() fires
assert "title" in ds_client.ds._startup_metadata_keys
@pytest.mark.asyncio
async def test_hook_startup_catalog_populated(ds_client):
# Internal catalog tables should be populated before startup() fires
assert "fixtures" in ds_client.ds._startup_catalog_databases
@pytest.mark.asyncio
async def test_hook_canned_queries(ds_client):
queries = (await ds_client.get("/fixtures.json")).json()["queries"]

View file

@ -32,25 +32,11 @@ def test_trace(trace_debug):
assert isinstance(trace.get("params"), (list, dict, None.__class__))
sqls = [trace["sql"] for trace in traces if "sql" in trace]
# There should be a mix of different types of SQL statement
expected = (
"CREATE TABLE ",
"PRAGMA ",
"INSERT OR REPLACE INTO ",
"INSERT INTO",
"select ",
)
for prefix in expected:
assert any(
sql.startswith(prefix) for sql in sqls
), "No trace beginning with: {}".format(prefix)
# Should be at least one executescript
assert any(trace for trace in traces if trace.get("executescript"))
# And at least one executemany
execute_manys = [trace for trace in traces if trace.get("executemany")]
assert execute_manys
assert all(isinstance(trace["count"], int) for trace in execute_manys)
# There should be SQL statements from request handling in the trace.
# Note: CREATE TABLE, INSERT OR REPLACE, executescript, and executemany
# are not expected here because internal tables are now created and
# populated during invoke_startup(), before the request is traced.
assert any(sql.startswith("select ") for sql in sqls), "No select statements traced"
def test_trace_silently_fails_for_large_page():