Database(is_temp_disk=True) option, used for internal database (#2684)

Closes #2683

* Add is_temp_disk option to Database for temp file-backed databases

Replace the default in-memory internal database with a temporary
file-backed database using WAL mode. This fixes concurrent read/write
locking errors that occur with named in-memory SQLite databases.

The new is_temp_disk parameter on Database creates a temp file via
tempfile.mkstemp, connects to it as a regular file-based database
with WAL mode enabled, and cleans it up on close() and via atexit.

https://claude.ai/code/session_01TteLrUjpDcARjnP1GMRqz2
This commit is contained in:
Simon Willison 2026-03-30 21:03:21 -07:00 committed by GitHub
commit fc1794719a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 77 additions and 5 deletions

View file

@ -381,7 +381,7 @@ class Datasette:
self.internal_db_created = False
if internal is None:
self._internal_database = Database(self, memory_name=secrets.token_hex())
self._internal_database = Database(self, is_temp_disk=True)
else:
self._internal_database = Database(self, path=internal, mode="rwc")
self._internal_database.name = INTERNAL_DB_NAME

View file

@ -1,10 +1,13 @@
import asyncio
import atexit
from collections import namedtuple
import os
from pathlib import Path
import janus
import queue
import sqlite_utils
import sys
import tempfile
import threading
import uuid
@ -43,6 +46,7 @@ class Database:
is_memory=False,
memory_name=None,
mode=None,
is_temp_disk=False,
):
self.name = None
self._thread_local_id = f"x{self._thread_local_id_counter}"
@ -53,8 +57,19 @@ class Database:
self.is_mutable = is_mutable
self.is_memory = is_memory
self.memory_name = memory_name
self.is_temp_disk = is_temp_disk
if memory_name is not None:
self.is_memory = True
if is_temp_disk:
fd, temp_path = tempfile.mkstemp(suffix=".db", prefix="datasette_temp_")
os.close(fd)
self.path = temp_path
self.is_mutable = True
self.mode = "rwc"
self._wal_enabled = False
atexit.register(self._cleanup_temp_file)
else:
self._wal_enabled = False
self.cached_hash = None
self.cached_size = None
self._cached_table_counts = None
@ -65,7 +80,8 @@ class Database:
self._write_connection = None
# This is used to track all file connections so they can be closed
self._all_file_connections = []
self.mode = mode
if not is_temp_disk:
self.mode = mode
@property
def cached_table_counts(self):
@ -86,6 +102,8 @@ class Database:
return md5_not_usedforsecurity(self.name)[:6]
def suggest_name(self):
if self.is_temp_disk:
return "_temp_disk"
if self.path:
return Path(self.path).stem
elif self.memory_name:
@ -124,12 +142,25 @@ class Database:
f"file:{self.path}{qs}", uri=True, check_same_thread=False, **extra_kwargs
)
self._all_file_connections.append(conn)
if self.is_temp_disk and not self._wal_enabled:
conn.execute("PRAGMA journal_mode=WAL")
self._wal_enabled = True
return conn
def close(self):
# Close all connections - useful to avoid running out of file handles in tests
for connection in self._all_file_connections:
connection.close()
if self.is_temp_disk:
self._cleanup_temp_file()
def _cleanup_temp_file(self):
if self.is_temp_disk and self.path:
for suffix in ("", "-wal", "-shm"):
try:
os.unlink(self.path + suffix)
except OSError:
pass
async def execute_write(self, sql, params=None, block=True, request=None):
def _inner(conn):
@ -405,7 +436,7 @@ class Database:
def hash(self):
if self.cached_hash is not None:
return self.cached_hash
elif self.is_mutable or self.is_memory:
elif self.is_mutable or self.is_memory or self.is_temp_disk:
return None
elif self.ds.inspect_data and self.ds.inspect_data.get(self.name):
self.cached_hash = self.ds.inspect_data[self.name]["hash"]
@ -704,6 +735,8 @@ class Database:
tags.append("mutable")
if self.is_memory:
tags.append("memory")
if self.is_temp_disk:
tags.append("temp_disk")
if self.hash:
tags.append(f"hash={self.hash}")
if self.size is not None:

View file

@ -1552,8 +1552,8 @@ Instances of the ``Database`` class can be used to execute queries against attac
.. _database_constructor:
Database(ds, path=None, is_mutable=True, is_memory=False, memory_name=None)
---------------------------------------------------------------------------
Database(ds, path=None, is_mutable=True, is_memory=False, memory_name=None, is_temp_disk=False)
-----------------------------------------------------------------------------------------------
The ``Database()`` constructor can be used by plugins, in conjunction with :ref:`datasette_add_database`, to create and register new databases.
@ -1574,6 +1574,13 @@ The arguments are as follows:
``memory_name`` - string or ``None``
Use this to create a named in-memory database. Unlike regular memory databases these can be accessed by multiple threads and will persist an changes made to them for the lifetime of the Datasette server process.
``is_temp_disk`` - boolean
Set this to ``True`` to create a temporary file-backed database. This creates a SQLite database in a temporary file on disk (using Python's ``tempfile.mkstemp()``) with WAL mode enabled for better concurrent read/write performance. The temporary file is automatically cleaned up when the database is closed or when the process exits.
Unlike named in-memory databases (``memory_name``), temporary disk databases support concurrent readers and writers without locking errors, because WAL mode allows readers and writers to operate simultaneously. This makes them suitable for use cases like the internal database where concurrent access is common.
When ``is_temp_disk=True``, the ``path``, ``is_mutable``, and ``mode`` parameters are set automatically and should not be provided.
The first argument is the ``datasette`` instance you are attaching to, the second is a ``path=``, then ``is_mutable`` and ``is_memory`` are both optional arguments.
.. _database_hash:
@ -1825,6 +1832,9 @@ The ``Database`` class also provides properties and methods for introspecting th
``db.is_memory`` - boolean
Is this database an in-memory database?
``db.is_temp_disk`` - boolean
Is this database a temporary file-backed database? See :ref:`database_constructor` for details. Temporary disk databases report ``hash`` as ``None`` but have real values for ``size`` and ``mtime_ns`` since they are backed by a file on disk.
``await db.attached_databases()`` - list of named tuples
Returns a list of additional databases that have been connected to this database using the SQLite ATTACH command. Each named tuple has fields ``seq``, ``name`` and ``file``.

View file

@ -767,3 +767,32 @@ async def test_replace_database(tmpdir):
db2 = datasette.get_database("data1")
count = (await db2.execute("select count(*) from t")).first()[0]
assert count == 1
@pytest.mark.parametrize(
"kwargs,expected_repr",
[
({"is_memory": True}, "<Database: test_db (mutable, memory, size=0)>"),
({"memory_name": "my_mem"}, "<Database: test_db (mutable, memory, size=0)>"),
(
{"is_memory": True, "is_mutable": False},
"<Database: test_db (memory, size=0)>",
),
],
ids=["memory", "named_memory", "immutable_memory"],
)
def test_repr(app_client, kwargs, expected_repr):
db = Database(app_client.ds, **kwargs)
db.name = "test_db"
assert repr(db) == expected_repr
def test_repr_temp_disk(app_client):
db = Database(app_client.ds, is_temp_disk=True)
db.name = "test_db"
r = repr(db)
assert r.startswith("<Database: test_db (mutable, temp_disk, size=")
assert r.endswith(")>")
assert isinstance(db.size, int)
assert isinstance(db.mtime_ns, int)
db.close()