mirror of
https://github.com/simonw/datasette.git
synced 2026-05-31 14:16:59 +02:00
Warn once when writing to temporary internal database
The internal database defaults to a temporary file that is deleted on shutdown. Plugin writes that expect persistence currently vanish silently. Detect such writes via an `internal_ephemeral_writes()` context manager in `datasette.database`: Datasette wraps `_refresh_schemas` (its own catalog/metadata writes) with the marker, and `Database.execute_write_fn` prints a one-time stderr warning when a write hits a `is_temp_disk` database without the marker set. The warning recommends restarting with `--internal path/to/file.db` for persistence.
This commit is contained in:
parent
aa84fe008d
commit
2e014cda80
4 changed files with 156 additions and 37 deletions
|
|
@ -73,7 +73,7 @@ from .views.table import (
|
|||
from .views.row import RowView, RowDeleteView, RowUpdateView
|
||||
from .renderer import json_renderer
|
||||
from .url_builder import Urls
|
||||
from .database import Database, QueryInterrupted
|
||||
from .database import Database, QueryInterrupted, internal_ephemeral_writes
|
||||
|
||||
from .utils import (
|
||||
PaginatedResources,
|
||||
|
|
@ -380,6 +380,7 @@ class Datasette:
|
|||
)
|
||||
|
||||
self.internal_db_created = False
|
||||
self._warned_internal_temp_write = False
|
||||
if internal is None:
|
||||
self._internal_database = Database(self, is_temp_disk=True)
|
||||
else:
|
||||
|
|
@ -604,43 +605,44 @@ class Datasette:
|
|||
|
||||
async def _refresh_schemas(self):
|
||||
internal_db = self.get_internal_database()
|
||||
if not self.internal_db_created:
|
||||
await init_internal_db(internal_db)
|
||||
await self.apply_metadata_json()
|
||||
self.internal_db_created = True
|
||||
current_schema_versions = {
|
||||
row["database_name"]: row["schema_version"]
|
||||
for row in await internal_db.execute(
|
||||
"select database_name, schema_version from catalog_databases"
|
||||
with internal_ephemeral_writes():
|
||||
if not self.internal_db_created:
|
||||
await init_internal_db(internal_db)
|
||||
await self.apply_metadata_json()
|
||||
self.internal_db_created = True
|
||||
current_schema_versions = {
|
||||
row["database_name"]: row["schema_version"]
|
||||
for row in await internal_db.execute(
|
||||
"select database_name, schema_version from catalog_databases"
|
||||
)
|
||||
}
|
||||
# Delete stale entries for databases that are no longer attached
|
||||
stale_databases = set(current_schema_versions.keys()) - set(
|
||||
self.databases.keys()
|
||||
)
|
||||
}
|
||||
# Delete stale entries for databases that are no longer attached
|
||||
stale_databases = set(current_schema_versions.keys()) - set(
|
||||
self.databases.keys()
|
||||
)
|
||||
for stale_db_name in stale_databases:
|
||||
await internal_db.execute_write(
|
||||
"DELETE FROM catalog_databases WHERE database_name = ?",
|
||||
[stale_db_name],
|
||||
)
|
||||
for database_name, db in self.databases.items():
|
||||
schema_version = (await db.execute("PRAGMA schema_version")).first()[0]
|
||||
# Compare schema versions to see if we should skip it
|
||||
if schema_version == current_schema_versions.get(database_name):
|
||||
continue
|
||||
placeholders = "(?, ?, ?, ?)"
|
||||
values = [database_name, str(db.path), db.is_memory, schema_version]
|
||||
if db.path is None:
|
||||
placeholders = "(?, null, ?, ?)"
|
||||
values = [database_name, db.is_memory, schema_version]
|
||||
await internal_db.execute_write(
|
||||
"""
|
||||
INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version)
|
||||
VALUES {}
|
||||
""".format(placeholders),
|
||||
values,
|
||||
)
|
||||
await populate_schema_tables(internal_db, db)
|
||||
for stale_db_name in stale_databases:
|
||||
await internal_db.execute_write(
|
||||
"DELETE FROM catalog_databases WHERE database_name = ?",
|
||||
[stale_db_name],
|
||||
)
|
||||
for database_name, db in self.databases.items():
|
||||
schema_version = (await db.execute("PRAGMA schema_version")).first()[0]
|
||||
# Compare schema versions to see if we should skip it
|
||||
if schema_version == current_schema_versions.get(database_name):
|
||||
continue
|
||||
placeholders = "(?, ?, ?, ?)"
|
||||
values = [database_name, str(db.path), db.is_memory, schema_version]
|
||||
if db.path is None:
|
||||
placeholders = "(?, null, ?, ?)"
|
||||
values = [database_name, db.is_memory, schema_version]
|
||||
await internal_db.execute_write(
|
||||
"""
|
||||
INSERT OR REPLACE INTO catalog_databases (database_name, path, is_memory, schema_version)
|
||||
VALUES {}
|
||||
""".format(placeholders),
|
||||
values,
|
||||
)
|
||||
await populate_schema_tables(internal_db, db)
|
||||
|
||||
@property
|
||||
def urls(self):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import asyncio
|
||||
import atexit
|
||||
from collections import namedtuple
|
||||
import contextlib
|
||||
import contextvars
|
||||
import inspect
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
|
@ -13,6 +15,24 @@ import threading
|
|||
import uuid
|
||||
|
||||
from .tracer import trace
|
||||
|
||||
|
||||
_internal_ephemeral_write = contextvars.ContextVar(
|
||||
"datasette_internal_ephemeral_write", default=False
|
||||
)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def internal_ephemeral_writes():
|
||||
"""Mark internal-database writes inside this block as ephemeral catalog
|
||||
writes that Datasette recreates on startup. Suppresses the temp-internal-DB
|
||||
warning for the duration of the block. Plugins should NOT use this — their
|
||||
writes are exactly what the warning is meant to catch."""
|
||||
token = _internal_ephemeral_write.set(True)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
_internal_ephemeral_write.reset(token)
|
||||
from .utils import (
|
||||
call_with_supported_arguments,
|
||||
detect_fts,
|
||||
|
|
@ -289,6 +309,18 @@ class Database:
|
|||
|
||||
async def execute_write_fn(self, fn, block=True, transaction=True, request=None):
|
||||
self._check_not_closed()
|
||||
if (
|
||||
self.is_temp_disk
|
||||
and not self.ds._warned_internal_temp_write
|
||||
and not _internal_ephemeral_write.get()
|
||||
):
|
||||
self.ds._warned_internal_temp_write = True
|
||||
print(
|
||||
"Datasette warning: write to the internal database, which is a "
|
||||
"temporary file that will be deleted on shutdown. If this data "
|
||||
"should persist, restart Datasette with --internal path/to/file.db",
|
||||
file=sys.stderr,
|
||||
)
|
||||
pending_events = []
|
||||
|
||||
def track_event(event):
|
||||
|
|
|
|||
|
|
@ -2037,6 +2037,8 @@ Plugin authors are asked to practice good etiquette when using the internal data
|
|||
3. Use temporary tables or shared in-memory attached databases when possible.
|
||||
4. Avoid implementing features that could expose private data stored in the internal database by other plugins.
|
||||
|
||||
If a plugin writes to the default temporary internal database, Datasette prints a one-time warning to standard error noting that the data will be lost on shutdown and recommending the ``--internal path/to/file.db`` flag for persistence. Datasette suppresses this warning for its own catalog and metadata writes, which are recreated on every startup.
|
||||
|
||||
.. _internals_internal_schema:
|
||||
|
||||
Internal database schema
|
||||
|
|
|
|||
83
tests/test_internal_db_warning.py
Normal file
83
tests/test_internal_db_warning.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import pytest
|
||||
|
||||
from datasette.app import Datasette
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_warning_during_startup(tmp_path, capfd):
|
||||
"""Catalog/metadata writes during _refresh_schemas must not warn."""
|
||||
data_db = tmp_path / "data.db"
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect(str(data_db))
|
||||
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)")
|
||||
conn.close()
|
||||
|
||||
ds = Datasette(files=[str(data_db)])
|
||||
await ds.invoke_startup()
|
||||
# Trigger the schema refresh / index hit so all catalog writes happen.
|
||||
response = await ds.client.get("/")
|
||||
assert response.status_code == 200
|
||||
|
||||
captured = capfd.readouterr()
|
||||
assert "internal database" not in captured.err
|
||||
assert ds._warned_internal_temp_write is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_warning_fires_once_on_plugin_style_write(tmp_path, capfd):
|
||||
"""A write to the temp internal DB outside the ephemeral context must
|
||||
print the warning to stderr exactly once."""
|
||||
data_db = tmp_path / "data.db"
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect(str(data_db))
|
||||
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)")
|
||||
conn.close()
|
||||
|
||||
ds = Datasette(files=[str(data_db)])
|
||||
await ds.invoke_startup()
|
||||
await ds.client.get("/")
|
||||
capfd.readouterr() # discard startup output
|
||||
|
||||
internal_db = ds.get_internal_database()
|
||||
await internal_db.execute_write(
|
||||
"CREATE TABLE IF NOT EXISTS plugin_table (k TEXT)"
|
||||
)
|
||||
first = capfd.readouterr()
|
||||
assert "internal database" in first.err
|
||||
assert "--internal" in first.err
|
||||
assert ds._warned_internal_temp_write is True
|
||||
|
||||
# A second plugin write must not warn again.
|
||||
await internal_db.execute_write(
|
||||
"INSERT INTO plugin_table (k) VALUES (?)", ["v"]
|
||||
)
|
||||
second = capfd.readouterr()
|
||||
assert second.err == ""
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_warning_with_persistent_internal(tmp_path, capfd):
|
||||
"""When --internal is given, is_temp_disk is False so no warning fires."""
|
||||
data_db = tmp_path / "data.db"
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect(str(data_db))
|
||||
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)")
|
||||
conn.close()
|
||||
|
||||
ds = Datasette(
|
||||
files=[str(data_db)],
|
||||
internal=str(tmp_path / "internal.db"),
|
||||
)
|
||||
await ds.invoke_startup()
|
||||
await ds.client.get("/")
|
||||
|
||||
internal_db = ds.get_internal_database()
|
||||
await internal_db.execute_write(
|
||||
"CREATE TABLE IF NOT EXISTS plugin_table (k TEXT)"
|
||||
)
|
||||
captured = capfd.readouterr()
|
||||
assert "internal database" not in captured.err
|
||||
assert ds._warned_internal_temp_write is False
|
||||
Loading…
Add table
Add a link
Reference in a new issue