diff --git a/datasette/app.py b/datasette/app.py index 358081ef..dcbfa2ad 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -73,7 +73,7 @@ from .views.table import ( from .views.row import RowView, RowDeleteView, RowUpdateView from .renderer import json_renderer from .url_builder import Urls -from .database import Database, QueryInterrupted +from .database import Database, QueryInterrupted, internal_ephemeral_writes from .utils import ( PaginatedResources, @@ -380,6 +380,7 @@ class Datasette: ) self.internal_db_created = False + self._warned_internal_temp_write = False if internal is None: self._internal_database = Database(self, is_temp_disk=True) else: @@ -604,43 +605,44 @@ class Datasette: async def _refresh_schemas(self): internal_db = self.get_internal_database() - if not self.internal_db_created: - await init_internal_db(internal_db) - await self.apply_metadata_json() - self.internal_db_created = True - current_schema_versions = { - row["database_name"]: row["schema_version"] - for row in await internal_db.execute( - "select database_name, schema_version from catalog_databases" + with internal_ephemeral_writes(): + if not self.internal_db_created: + await init_internal_db(internal_db) + await self.apply_metadata_json() + self.internal_db_created = True + current_schema_versions = { + row["database_name"]: row["schema_version"] + for row in await internal_db.execute( + "select database_name, schema_version from catalog_databases" + ) + } + # Delete stale entries for databases that are no longer attached + stale_databases = set(current_schema_versions.keys()) - set( + self.databases.keys() ) - } - # Delete stale entries for databases that are no longer attached - stale_databases = set(current_schema_versions.keys()) - set( - self.databases.keys() - ) - for stale_db_name in stale_databases: - await internal_db.execute_write( - "DELETE FROM catalog_databases WHERE database_name = ?", - [stale_db_name], - ) - for database_name, db in self.databases.items(): - schema_version = (await db.execute("PRAGMA schema_version")).first()[0] - # Compare schema versions to see if we should skip it - if schema_version == current_schema_versions.get(database_name): - continue - placeholders = "(?, ?, ?, ?)" - values = [database_name, str(db.path), db.is_memory, schema_version] - if db.path is None: - placeholders = "(?, null, ?, ?)" - values = [database_name, db.is_memory, schema_version] - await internal_db.execute_write( - """ - INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version) - VALUES {} - """.format(placeholders), - values, - ) - await populate_schema_tables(internal_db, db) + for stale_db_name in stale_databases: + await internal_db.execute_write( + "DELETE FROM catalog_databases WHERE database_name = ?", + [stale_db_name], + ) + for database_name, db in self.databases.items(): + schema_version = (await db.execute("PRAGMA schema_version")).first()[0] + # Compare schema versions to see if we should skip it + if schema_version == current_schema_versions.get(database_name): + continue + placeholders = "(?, ?, ?, ?)" + values = [database_name, str(db.path), db.is_memory, schema_version] + if db.path is None: + placeholders = "(?, null, ?, ?)" + values = [database_name, db.is_memory, schema_version] + await internal_db.execute_write( + """ + INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version) + VALUES {} + """.format(placeholders), + values, + ) + await populate_schema_tables(internal_db, db) @property def urls(self): diff --git a/datasette/database.py b/datasette/database.py index e3c4bfec..3f13198d 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -1,6 +1,8 @@ import asyncio import atexit from collections import namedtuple +import contextlib +import contextvars import inspect import os from pathlib import Path @@ -13,6 +15,24 @@ import threading import uuid from .tracer import trace + + +_internal_ephemeral_write = contextvars.ContextVar( + "datasette_internal_ephemeral_write", default=False +) + + +@contextlib.contextmanager +def internal_ephemeral_writes(): + """Mark internal-database writes inside this block as ephemeral catalog + writes that Datasette recreates on startup. Suppresses the temp-internal-DB + warning for the duration of the block. Plugins should NOT use this — their + writes are exactly what the warning is meant to catch.""" + token = _internal_ephemeral_write.set(True) + try: + yield + finally: + _internal_ephemeral_write.reset(token) from .utils import ( call_with_supported_arguments, detect_fts, @@ -289,6 +309,18 @@ class Database: async def execute_write_fn(self, fn, block=True, transaction=True, request=None): self._check_not_closed() + if ( + self.is_temp_disk + and not self.ds._warned_internal_temp_write + and not _internal_ephemeral_write.get() + ): + self.ds._warned_internal_temp_write = True + print( + "Datasette warning: write to the internal database, which is a " + "temporary file that will be deleted on shutdown. If this data " + "should persist, restart Datasette with --internal path/to/file.db", + file=sys.stderr, + ) pending_events = [] def track_event(event): diff --git a/docs/internals.rst b/docs/internals.rst index e0123a7b..fa2b2345 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -2037,6 +2037,8 @@ Plugin authors are asked to practice good etiquette when using the internal data 3. Use temporary tables or shared in-memory attached databases when possible. 4. Avoid implementing features that could expose private data stored in the internal database by other plugins. +If a plugin writes to the default temporary internal database, Datasette prints a one-time warning to standard error noting that the data will be lost on shutdown and recommending the ``--internal path/to/file.db`` flag for persistence. Datasette suppresses this warning for its own catalog and metadata writes, which are recreated on every startup. + .. _internals_internal_schema: Internal database schema diff --git a/tests/test_internal_db_warning.py b/tests/test_internal_db_warning.py new file mode 100644 index 00000000..23365ae8 --- /dev/null +++ b/tests/test_internal_db_warning.py @@ -0,0 +1,83 @@ +import pytest + +from datasette.app import Datasette + + +@pytest.mark.asyncio +async def test_no_warning_during_startup(tmp_path, capfd): + """Catalog/metadata writes during _refresh_schemas must not warn.""" + data_db = tmp_path / "data.db" + import sqlite3 + + conn = sqlite3.connect(str(data_db)) + conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)") + conn.close() + + ds = Datasette(files=[str(data_db)]) + await ds.invoke_startup() + # Trigger the schema refresh / index hit so all catalog writes happen. + response = await ds.client.get("/") + assert response.status_code == 200 + + captured = capfd.readouterr() + assert "internal database" not in captured.err + assert ds._warned_internal_temp_write is False + + +@pytest.mark.asyncio +async def test_warning_fires_once_on_plugin_style_write(tmp_path, capfd): + """A write to the temp internal DB outside the ephemeral context must + print the warning to stderr exactly once.""" + data_db = tmp_path / "data.db" + import sqlite3 + + conn = sqlite3.connect(str(data_db)) + conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)") + conn.close() + + ds = Datasette(files=[str(data_db)]) + await ds.invoke_startup() + await ds.client.get("/") + capfd.readouterr() # discard startup output + + internal_db = ds.get_internal_database() + await internal_db.execute_write( + "CREATE TABLE IF NOT EXISTS plugin_table (k TEXT)" + ) + first = capfd.readouterr() + assert "internal database" in first.err + assert "--internal" in first.err + assert ds._warned_internal_temp_write is True + + # A second plugin write must not warn again. + await internal_db.execute_write( + "INSERT INTO plugin_table (k) VALUES (?)", ["v"] + ) + second = capfd.readouterr() + assert second.err == "" + + +@pytest.mark.asyncio +async def test_no_warning_with_persistent_internal(tmp_path, capfd): + """When --internal is given, is_temp_disk is False so no warning fires.""" + data_db = tmp_path / "data.db" + import sqlite3 + + conn = sqlite3.connect(str(data_db)) + conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)") + conn.close() + + ds = Datasette( + files=[str(data_db)], + internal=str(tmp_path / "internal.db"), + ) + await ds.invoke_startup() + await ds.client.get("/") + + internal_db = ds.get_internal_database() + await internal_db.execute_write( + "CREATE TABLE IF NOT EXISTS plugin_table (k TEXT)" + ) + captured = capfd.readouterr() + assert "internal database" not in captured.err + assert ds._warned_internal_temp_write is False