From 7f93353549a330f2c3d76ee5844dd4087db3efcb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 16 Mar 2026 17:56:40 -0700 Subject: [PATCH] Fix startup hook to fire after metadata and schema tables are populated (#2666) * Fix startup hook to fire after metadata and schema tables are populated Previously, the startup() plugin hook fired before internal database tables were populated from metadata.yaml and before catalog schema tables were filled. This meant plugins couldn't read or modify metadata during startup. Now invoke_startup() calls refresh_schemas() before firing startup hooks, ensuring metadata and catalog tables are available. * Fix startup hook to fire after metadata and schema tables are populated Previously, the startup() plugin hook fired before internal database tables were populated from metadata.yaml and before catalog schema tables were filled. This meant plugins couldn't read or modify metadata during startup. Now invoke_startup() calls _refresh_schemas() before firing startup hooks, ensuring metadata and catalog tables are available. Updated test_tracer to reflect that internal DB creation SQL now runs during startup rather than during the first traced request. * Move check_databases before invoke_startup in CLI serve Since invoke_startup now calls _refresh_schemas() which queries each database, the spatialite connection check must run first to provide the friendly error message instead of a raw OperationalError. https://claude.ai/code/session_01KL4t5FZYb32rZY7xaqrrZU --- datasette/app.py | 2 ++ datasette/cli.py | 7 ++++--- tests/plugins/my_plugin_2.py | 12 ++++++++++++ tests/test_plugins.py | 12 ++++++++++++ tests/test_tracer.py | 24 +++++------------------- 5 files changed, 35 insertions(+), 22 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 2df6e4e8..f0349895 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -696,6 +696,8 @@ class Datasette: env=self._jinja_env, datasette=self ): await await_me_maybe(hook) + # Ensure internal tables and metadata are populated before startup hooks + await self._refresh_schemas() for hook in pm.hook.startup(datasette=self): await await_me_maybe(hook) self._startup_invoked = True diff --git a/datasette/cli.py b/datasette/cli.py index db777fe8..32a4d898 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -661,15 +661,16 @@ def serve( # Private utility mechanism for writing unit tests return ds + # Run async soundness checks before startup hooks, since invoke_startup + # now populates internal tables which requires querying each database + run_sync(lambda: check_databases(ds)) + # Run the "startup" plugin hooks try: run_sync(ds.invoke_startup) except StartupError as e: raise click.ClickException(e.args[0]) - # Run async soundness checks - but only if we're not under pytest - run_sync(lambda: check_databases(ds)) - if headers and not get: raise click.ClickException("--headers can only be used with --get") diff --git a/tests/plugins/my_plugin_2.py b/tests/plugins/my_plugin_2.py index 35775ef9..9e8d9b2b 100644 --- a/tests/plugins/my_plugin_2.py +++ b/tests/plugins/my_plugin_2.py @@ -127,6 +127,18 @@ def startup(datasette): internal_db = datasette.get_internal_database() result = await internal_db.execute("select 1 + 1") datasette._startup_hook_calculation = result.first()[0] + # Check that metadata tables have been populated before startup fires + metadata_rows = await internal_db.execute( + "select key, value from metadata_instance" + ) + datasette._startup_metadata_keys = [row["key"] for row in metadata_rows] + # Check that catalog/schema tables have been populated before startup fires + catalog_rows = await internal_db.execute( + "select database_name from catalog_databases" + ) + datasette._startup_catalog_databases = [ + row["database_name"] for row in catalog_rows + ] return inner diff --git a/tests/test_plugins.py b/tests/test_plugins.py index fa9d1a1f..f2a47ab4 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -862,6 +862,18 @@ async def test_hook_startup(ds_client): assert 2 == ds_client.ds._startup_hook_calculation +@pytest.mark.asyncio +async def test_hook_startup_metadata_available(ds_client): + # Metadata from metadata.yaml should be populated before startup() fires + assert "title" in ds_client.ds._startup_metadata_keys + + +@pytest.mark.asyncio +async def test_hook_startup_catalog_populated(ds_client): + # Internal catalog tables should be populated before startup() fires + assert "fixtures" in ds_client.ds._startup_catalog_databases + + @pytest.mark.asyncio async def test_hook_canned_queries(ds_client): queries = (await ds_client.get("/fixtures.json")).json()["queries"] diff --git a/tests/test_tracer.py b/tests/test_tracer.py index 1e0d7001..6cc80fc4 100644 --- a/tests/test_tracer.py +++ b/tests/test_tracer.py @@ -32,25 +32,11 @@ def test_trace(trace_debug): assert isinstance(trace.get("params"), (list, dict, None.__class__)) sqls = [trace["sql"] for trace in traces if "sql" in trace] - # There should be a mix of different types of SQL statement - expected = ( - "CREATE TABLE ", - "PRAGMA ", - "INSERT OR REPLACE INTO ", - "INSERT INTO", - "select ", - ) - for prefix in expected: - assert any( - sql.startswith(prefix) for sql in sqls - ), "No trace beginning with: {}".format(prefix) - - # Should be at least one executescript - assert any(trace for trace in traces if trace.get("executescript")) - # And at least one executemany - execute_manys = [trace for trace in traces if trace.get("executemany")] - assert execute_manys - assert all(isinstance(trace["count"], int) for trace in execute_manys) + # There should be SQL statements from request handling in the trace. + # Note: CREATE TABLE, INSERT OR REPLACE, executescript, and executemany + # are not expected here because internal tables are now created and + # populated during invoke_startup(), before the request is traced. + assert any(sql.startswith("select ") for sql in sqls), "No select statements traced" def test_trace_silently_fails_for_large_page():