From fc1794719a99812103aa27ad5bf46b4449828642 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 30 Mar 2026 21:03:21 -0700 Subject: [PATCH] Database(is_temp_disk=True) option, used for internal database (#2684) Closes #2683 * Add is_temp_disk option to Database for temp file-backed databases Replace the default in-memory internal database with a temporary file-backed database using WAL mode. This fixes concurrent read/write locking errors that occur with named in-memory SQLite databases. The new is_temp_disk parameter on Database creates a temp file via tempfile.mkstemp, connects to it as a regular file-based database with WAL mode enabled, and cleans it up on close() and via atexit. https://claude.ai/code/session_01TteLrUjpDcARjnP1GMRqz2 --- datasette/app.py | 2 +- datasette/database.py | 37 ++++++++++++++++++++++++++++++-- docs/internals.rst | 14 ++++++++++-- tests/test_internals_database.py | 29 +++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 5 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 4c98e521..ed62c528 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -381,7 +381,7 @@ class Datasette: self.internal_db_created = False if internal is None: - self._internal_database = Database(self, memory_name=secrets.token_hex()) + self._internal_database = Database(self, is_temp_disk=True) else: self._internal_database = Database(self, path=internal, mode="rwc") self._internal_database.name = INTERNAL_DB_NAME diff --git a/datasette/database.py b/datasette/database.py index ffbbebba..8b824462 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -1,10 +1,13 @@ import asyncio +import atexit from collections import namedtuple +import os from pathlib import Path import janus import queue import sqlite_utils import sys +import tempfile import threading import uuid @@ -43,6 +46,7 @@ class Database: is_memory=False, memory_name=None, mode=None, + is_temp_disk=False, ): self.name = None self._thread_local_id = f"x{self._thread_local_id_counter}" @@ -53,8 +57,19 @@ class Database: self.is_mutable = is_mutable self.is_memory = is_memory self.memory_name = memory_name + self.is_temp_disk = is_temp_disk if memory_name is not None: self.is_memory = True + if is_temp_disk: + fd, temp_path = tempfile.mkstemp(suffix=".db", prefix="datasette_temp_") + os.close(fd) + self.path = temp_path + self.is_mutable = True + self.mode = "rwc" + self._wal_enabled = False + atexit.register(self._cleanup_temp_file) + else: + self._wal_enabled = False self.cached_hash = None self.cached_size = None self._cached_table_counts = None @@ -65,7 +80,8 @@ class Database: self._write_connection = None # This is used to track all file connections so they can be closed self._all_file_connections = [] - self.mode = mode + if not is_temp_disk: + self.mode = mode @property def cached_table_counts(self): @@ -86,6 +102,8 @@ class Database: return md5_not_usedforsecurity(self.name)[:6] def suggest_name(self): + if self.is_temp_disk: + return "_temp_disk" if self.path: return Path(self.path).stem elif self.memory_name: @@ -124,12 +142,25 @@ class Database: f"file:{self.path}{qs}", uri=True, check_same_thread=False, **extra_kwargs ) self._all_file_connections.append(conn) + if self.is_temp_disk and not self._wal_enabled: + conn.execute("PRAGMA journal_mode=WAL") + self._wal_enabled = True return conn def close(self): # Close all connections - useful to avoid running out of file handles in tests for connection in self._all_file_connections: connection.close() + if self.is_temp_disk: + self._cleanup_temp_file() + + def _cleanup_temp_file(self): + if self.is_temp_disk and self.path: + for suffix in ("", "-wal", "-shm"): + try: + os.unlink(self.path + suffix) + except OSError: + pass async def execute_write(self, sql, params=None, block=True, request=None): def _inner(conn): @@ -405,7 +436,7 @@ class Database: def hash(self): if self.cached_hash is not None: return self.cached_hash - elif self.is_mutable or self.is_memory: + elif self.is_mutable or self.is_memory or self.is_temp_disk: return None elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): self.cached_hash = self.ds.inspect_data[self.name]["hash"] @@ -704,6 +735,8 @@ class Database: tags.append("mutable") if self.is_memory: tags.append("memory") + if self.is_temp_disk: + tags.append("temp_disk") if self.hash: tags.append(f"hash={self.hash}") if self.size is not None: diff --git a/docs/internals.rst b/docs/internals.rst index 367ec223..06a6b348 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1552,8 +1552,8 @@ Instances of the ``Database`` class can be used to execute queries against attac .. _database_constructor: -Database(ds, path=None, is_mutable=True, is_memory=False, memory_name=None) ---------------------------------------------------------------------------- +Database(ds, path=None, is_mutable=True, is_memory=False, memory_name=None, is_temp_disk=False) +----------------------------------------------------------------------------------------------- The ``Database()`` constructor can be used by plugins, in conjunction with :ref:`datasette_add_database`, to create and register new databases. @@ -1574,6 +1574,13 @@ The arguments are as follows: ``memory_name`` - string or ``None`` Use this to create a named in-memory database. Unlike regular memory databases these can be accessed by multiple threads and will persist an changes made to them for the lifetime of the Datasette server process. +``is_temp_disk`` - boolean + Set this to ``True`` to create a temporary file-backed database. This creates a SQLite database in a temporary file on disk (using Python's ``tempfile.mkstemp()``) with WAL mode enabled for better concurrent read/write performance. The temporary file is automatically cleaned up when the database is closed or when the process exits. + + Unlike named in-memory databases (``memory_name``), temporary disk databases support concurrent readers and writers without locking errors, because WAL mode allows readers and writers to operate simultaneously. This makes them suitable for use cases like the internal database where concurrent access is common. + + When ``is_temp_disk=True``, the ``path``, ``is_mutable``, and ``mode`` parameters are set automatically and should not be provided. + The first argument is the ``datasette`` instance you are attaching to, the second is a ``path=``, then ``is_mutable`` and ``is_memory`` are both optional arguments. .. _database_hash: @@ -1825,6 +1832,9 @@ The ``Database`` class also provides properties and methods for introspecting th ``db.is_memory`` - boolean Is this database an in-memory database? +``db.is_temp_disk`` - boolean + Is this database a temporary file-backed database? See :ref:`database_constructor` for details. Temporary disk databases report ``hash`` as ``None`` but have real values for ``size`` and ``mtime_ns`` since they are backed by a file on disk. + ``await db.attached_databases()`` - list of named tuples Returns a list of additional databases that have been connected to this database using the SQLite ATTACH command. Each named tuple has fields ``seq``, ``name`` and ``file``. diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 5e3459cd..9a83dd4f 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -767,3 +767,32 @@ async def test_replace_database(tmpdir): db2 = datasette.get_database("data1") count = (await db2.execute("select count(*) from t")).first()[0] assert count == 1 + + +@pytest.mark.parametrize( + "kwargs,expected_repr", + [ + ({"is_memory": True}, ""), + ({"memory_name": "my_mem"}, ""), + ( + {"is_memory": True, "is_mutable": False}, + "", + ), + ], + ids=["memory", "named_memory", "immutable_memory"], +) +def test_repr(app_client, kwargs, expected_repr): + db = Database(app_client.ds, **kwargs) + db.name = "test_db" + assert repr(db) == expected_repr + + +def test_repr_temp_disk(app_client): + db = Database(app_client.ds, is_temp_disk=True) + db.name = "test_db" + r = repr(db) + assert r.startswith("") + assert isinstance(db.size, int) + assert isinstance(db.mtime_ns, int) + db.close()