Warn once when writing to temporary internal database

The internal database defaults to a temporary file that is deleted on
shutdown. Plugin writes that expect persistence currently vanish silently.

Detect such writes via an `internal_ephemeral_writes()` context manager
in `datasette.database`: Datasette wraps `_refresh_schemas` (its own
catalog/metadata writes) with the marker, and `Database.execute_write_fn`
prints a one-time stderr warning when a write hits a `is_temp_disk`
database without the marker set. The warning recommends restarting with
`--internal path/to/file.db` for persistence.
This commit is contained in:
Claude 2026-05-06 01:03:01 +00:00
commit 2e014cda80
No known key found for this signature in database
4 changed files with 156 additions and 37 deletions

View file

@ -73,7 +73,7 @@ from .views.table import (
from .views.row import RowView, RowDeleteView, RowUpdateView
from .renderer import json_renderer
from .url_builder import Urls
from .database import Database, QueryInterrupted
from .database import Database, QueryInterrupted, internal_ephemeral_writes
from .utils import (
PaginatedResources,
@ -380,6 +380,7 @@ class Datasette:
)
self.internal_db_created = False
self._warned_internal_temp_write = False
if internal is None:
self._internal_database = Database(self, is_temp_disk=True)
else:
@ -604,43 +605,44 @@ class Datasette:
async def _refresh_schemas(self):
internal_db = self.get_internal_database()
if not self.internal_db_created:
await init_internal_db(internal_db)
await self.apply_metadata_json()
self.internal_db_created = True
current_schema_versions = {
row["database_name"]: row["schema_version"]
for row in await internal_db.execute(
"select database_name, schema_version from catalog_databases"
with internal_ephemeral_writes():
if not self.internal_db_created:
await init_internal_db(internal_db)
await self.apply_metadata_json()
self.internal_db_created = True
current_schema_versions = {
row["database_name"]: row["schema_version"]
for row in await internal_db.execute(
"select database_name, schema_version from catalog_databases"
)
}
# Delete stale entries for databases that are no longer attached
stale_databases = set(current_schema_versions.keys()) - set(
self.databases.keys()
)
}
# Delete stale entries for databases that are no longer attached
stale_databases = set(current_schema_versions.keys()) - set(
self.databases.keys()
)
for stale_db_name in stale_databases:
await internal_db.execute_write(
"DELETE FROM catalog_databases WHERE database_name = ?",
[stale_db_name],
)
for database_name, db in self.databases.items():
schema_version = (await db.execute("PRAGMA schema_version")).first()[0]
# Compare schema versions to see if we should skip it
if schema_version == current_schema_versions.get(database_name):
continue
placeholders = "(?, ?, ?, ?)"
values = [database_name, str(db.path), db.is_memory, schema_version]
if db.path is None:
placeholders = "(?, null, ?, ?)"
values = [database_name, db.is_memory, schema_version]
await internal_db.execute_write(
"""
INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version)
VALUES {}
""".format(placeholders),
values,
)
await populate_schema_tables(internal_db, db)
for stale_db_name in stale_databases:
await internal_db.execute_write(
"DELETE FROM catalog_databases WHERE database_name = ?",
[stale_db_name],
)
for database_name, db in self.databases.items():
schema_version = (await db.execute("PRAGMA schema_version")).first()[0]
# Compare schema versions to see if we should skip it
if schema_version == current_schema_versions.get(database_name):
continue
placeholders = "(?, ?, ?, ?)"
values = [database_name, str(db.path), db.is_memory, schema_version]
if db.path is None:
placeholders = "(?, null, ?, ?)"
values = [database_name, db.is_memory, schema_version]
await internal_db.execute_write(
"""
INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version)
VALUES {}
""".format(placeholders),
values,
)
await populate_schema_tables(internal_db, db)
@property
def urls(self):

View file

@ -1,6 +1,8 @@
import asyncio
import atexit
from collections import namedtuple
import contextlib
import contextvars
import inspect
import os
from pathlib import Path
@ -13,6 +15,24 @@ import threading
import uuid
from .tracer import trace
_internal_ephemeral_write = contextvars.ContextVar(
"datasette_internal_ephemeral_write", default=False
)
@contextlib.contextmanager
def internal_ephemeral_writes():
"""Mark internal-database writes inside this block as ephemeral catalog
writes that Datasette recreates on startup. Suppresses the temp-internal-DB
warning for the duration of the block. Plugins should NOT use this their
writes are exactly what the warning is meant to catch."""
token = _internal_ephemeral_write.set(True)
try:
yield
finally:
_internal_ephemeral_write.reset(token)
from .utils import (
call_with_supported_arguments,
detect_fts,
@ -289,6 +309,18 @@ class Database:
async def execute_write_fn(self, fn, block=True, transaction=True, request=None):
self._check_not_closed()
if (
self.is_temp_disk
and not self.ds._warned_internal_temp_write
and not _internal_ephemeral_write.get()
):
self.ds._warned_internal_temp_write = True
print(
"Datasette warning: write to the internal database, which is a "
"temporary file that will be deleted on shutdown. If this data "
"should persist, restart Datasette with --internal path/to/file.db",
file=sys.stderr,
)
pending_events = []
def track_event(event):

View file

@ -2037,6 +2037,8 @@ Plugin authors are asked to practice good etiquette when using the internal data
3. Use temporary tables or shared in-memory attached databases when possible.
4. Avoid implementing features that could expose private data stored in the internal database by other plugins.
If a plugin writes to the default temporary internal database, Datasette prints a one-time warning to standard error noting that the data will be lost on shutdown and recommending the ``--internal path/to/file.db`` flag for persistence. Datasette suppresses this warning for its own catalog and metadata writes, which are recreated on every startup.
.. _internals_internal_schema:
Internal database schema

View file

@ -0,0 +1,83 @@
import pytest
from datasette.app import Datasette
@pytest.mark.asyncio
async def test_no_warning_during_startup(tmp_path, capfd):
"""Catalog/metadata writes during _refresh_schemas must not warn."""
data_db = tmp_path / "data.db"
import sqlite3
conn = sqlite3.connect(str(data_db))
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)")
conn.close()
ds = Datasette(files=[str(data_db)])
await ds.invoke_startup()
# Trigger the schema refresh / index hit so all catalog writes happen.
response = await ds.client.get("/")
assert response.status_code == 200
captured = capfd.readouterr()
assert "internal database" not in captured.err
assert ds._warned_internal_temp_write is False
@pytest.mark.asyncio
async def test_warning_fires_once_on_plugin_style_write(tmp_path, capfd):
"""A write to the temp internal DB outside the ephemeral context must
print the warning to stderr exactly once."""
data_db = tmp_path / "data.db"
import sqlite3
conn = sqlite3.connect(str(data_db))
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)")
conn.close()
ds = Datasette(files=[str(data_db)])
await ds.invoke_startup()
await ds.client.get("/")
capfd.readouterr() # discard startup output
internal_db = ds.get_internal_database()
await internal_db.execute_write(
"CREATE TABLE IF NOT EXISTS plugin_table (k TEXT)"
)
first = capfd.readouterr()
assert "internal database" in first.err
assert "--internal" in first.err
assert ds._warned_internal_temp_write is True
# A second plugin write must not warn again.
await internal_db.execute_write(
"INSERT INTO plugin_table (k) VALUES (?)", ["v"]
)
second = capfd.readouterr()
assert second.err == ""
@pytest.mark.asyncio
async def test_no_warning_with_persistent_internal(tmp_path, capfd):
"""When --internal is given, is_temp_disk is False so no warning fires."""
data_db = tmp_path / "data.db"
import sqlite3
conn = sqlite3.connect(str(data_db))
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)")
conn.close()
ds = Datasette(
files=[str(data_db)],
internal=str(tmp_path / "internal.db"),
)
await ds.invoke_startup()
await ds.client.get("/")
internal_db = ds.get_internal_database()
await internal_db.execute_write(
"CREATE TABLE IF NOT EXISTS plugin_table (k TEXT)"
)
captured = capfd.readouterr()
assert "internal database" not in captured.err
assert ds._warned_internal_temp_write is False