Remove existing stale catalog_ tables, refs #2723

Now if there are any existing stale records in internal.db
those will be removed as well.
This commit is contained in:
Simon Willison 2026-05-20 12:39:54 -07:00
commit 54b272baf6
2 changed files with 64 additions and 11 deletions

View file

@ -614,22 +614,30 @@ class Datasette:
"select database_name, schema_version from catalog_databases"
)
}
# Delete stale entries for databases that are no longer attached
stale_databases = set(current_schema_versions.keys()) - set(
self.databases.keys()
catalog_table_names = (
"catalog_columns",
"catalog_foreign_keys",
"catalog_indexes",
"catalog_views",
"catalog_tables",
"catalog_databases",
)
# Delete stale entries for databases that are no longer attached
catalog_database_names = set(current_schema_versions.keys())
for table in catalog_table_names[:-1]:
catalog_database_names.update(
row["database_name"]
for row in await internal_db.execute(
"select distinct database_name from {}".format(table)
)
if row["database_name"] is not None
)
stale_databases = catalog_database_names - set(self.databases.keys())
if stale_databases:
def delete_stale_database_catalog(conn):
for stale_db_name in stale_databases:
for table in (
"catalog_columns",
"catalog_foreign_keys",
"catalog_indexes",
"catalog_views",
"catalog_tables",
"catalog_databases",
):
for table in catalog_table_names:
conn.execute(
"DELETE FROM {} WHERE database_name = ?".format(table),
[stale_db_name],

View file

@ -187,3 +187,48 @@ async def test_stale_catalog_child_entries_removed_for_missing_database(tmp_path
assert [tuple(row) for row in catalog_tables.rows] == [("alpha", "alpha_table")]
ds2.close()
@pytest.mark.asyncio
async def test_orphan_stale_catalog_child_entries_removed(tmp_path):
from datasette.app import Datasette
import sqlite3
internal_db_path = str(tmp_path / "internal.db")
alpha_db_path = str(tmp_path / "alpha.db")
conn = sqlite3.connect(alpha_db_path)
conn.execute("CREATE TABLE alpha_table (id INTEGER PRIMARY KEY)")
conn.close()
ds1 = Datasette(files=[alpha_db_path], internal=internal_db_path)
await ds1.invoke_startup()
ds1.close()
# Simulate the state left behind by old cleanup code: the parent database
# row was deleted, but child catalog rows survived because foreign key
# enforcement is not enabled for these internal catalog writes.
conn = sqlite3.connect(internal_db_path)
conn.execute("DELETE FROM catalog_databases WHERE database_name = 'fixtures'")
conn.execute("""
INSERT INTO catalog_tables (database_name, table_name, rootpage, sql)
VALUES ('fixtures', 'stale_table', 1, 'CREATE TABLE stale_table (id INTEGER)')
""")
conn.commit()
conn.close()
ds2 = Datasette(files=[alpha_db_path], internal=internal_db_path)
await ds2.invoke_startup()
catalog_tables = await ds2.get_internal_database().execute("""
SELECT database_name, table_name
FROM catalog_tables
ORDER BY database_name, table_name
""")
assert [tuple(row) for row in catalog_tables.rows] == [("alpha", "alpha_table")]
response = await ds2.client.get("/-/tables.json")
assert response.status_code == 200
ds2.close()