From dabf8e4199cd4598697e538c495cc66aa429a262 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:08:46 -0700 Subject: [PATCH 01/12] Database.close() shuts down write thread and raises DatasetteClosedError After this commit, Database.close() sends a sentinel to the write queue so the background write thread exits cleanly, closes cached read/write connections, and marks the instance closed. Subsequent calls to execute*() raise DatasetteClosedError. close() remains idempotent and one-way. Refs #2692 Co-Authored-By: Claude Opus 4.7 (1M context) --- datasette/database.py | 79 +++++++++++++++++++++++++++++++- docs/internals.rst | 6 ++- tests/test_internals_database.py | 56 ++++++++++++++++++++++ 3 files changed, 138 insertions(+), 3 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index 7364ff7f..e3c4bfec 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -34,6 +34,13 @@ connections = threading.local() AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file")) +class DatasetteClosedError(RuntimeError): + """Raised when using a Datasette or Database instance after close().""" + + +_SHUTDOWN = object() + + class Database: # For table counts stop at this many rows: count_limit = 10000 @@ -76,6 +83,7 @@ class Database: self._cached_table_counts = None self._write_thread = None self._write_queue = None + self._closed = False # These are used when in non-threaded mode: self._read_connection = None self._write_connection = None @@ -84,6 +92,12 @@ class Database: if not is_temp_disk: self.mode = mode + def _check_not_closed(self): + if self._closed: + raise DatasetteClosedError( + "Database {!r} has been closed".format(self.name) + ) + @property def cached_table_counts(self): if self._cached_table_counts is not None: @@ -149,9 +163,53 @@ class Database: return conn def close(self): - # Close all connections - useful to avoid running out of file handles in tests + """Release all resources held by this database. + + Idempotent. After close() further calls to execute()/execute_fn()/ + execute_write()/execute_write_fn() raise DatasetteClosedError. + """ + if self._closed: + return + self._closed = True + # Shut down the write thread, if any, via a sentinel. The thread + # drains any writes already queued before the sentinel and then + # closes its own write connection and returns. + write_thread = self._write_thread + if write_thread is not None and self._write_queue is not None: + self._write_queue.put(_SHUTDOWN) + write_thread.join(timeout=10) + if write_thread.is_alive(): + sys.stderr.write( + "Datasette: write thread for {!r} did not exit within 10s\n".format( + self.name + ) + ) + sys.stderr.flush() + # Close anything still tracked in _all_file_connections for connection in self._all_file_connections: - connection.close() + try: + connection.close() + except Exception: + pass + self._all_file_connections = [] + # Drop per-thread cached read connections we can reach + try: + delattr(connections, self._thread_local_id) + except AttributeError: + pass + # Close non-threaded-mode cached connections if still open + if self._read_connection is not None: + try: + self._read_connection.close() + except Exception: + pass + self._read_connection = None + if self._write_connection is not None: + try: + self._write_connection.close() + except Exception: + pass + self._write_connection = None if self.is_temp_disk: self._cleanup_temp_file() @@ -164,6 +222,8 @@ class Database: pass async def execute_write(self, sql, params=None, block=True, request=None): + self._check_not_closed() + def _inner(conn): return conn.execute(sql, params or []) @@ -172,6 +232,8 @@ class Database: return results async def execute_write_script(self, sql, block=True, request=None): + self._check_not_closed() + def _inner(conn): return conn.executescript(sql) @@ -182,6 +244,8 @@ class Database: return results async def execute_write_many(self, sql, params_seq, block=True, request=None): + self._check_not_closed() + def _inner(conn): count = 0 @@ -203,6 +267,7 @@ class Database: return results async def execute_isolated_fn(self, fn): + self._check_not_closed() # Open a new connection just for the duration of this function # blocking the write queue to avoid any writes occurring during it if self.ds.executor is None: @@ -223,6 +288,7 @@ class Database: return await self._send_to_write_thread(fn, isolated_connection=True) async def execute_write_fn(self, fn, block=True, transaction=True, request=None): + self._check_not_closed() pending_events = [] def track_event(event): @@ -334,6 +400,13 @@ class Database: conn_exception = e while True: task = self._write_queue.get() + if task is _SHUTDOWN: + if conn is not None: + try: + conn.close() + except Exception: + pass + return if conn_exception is not None: result = conn_exception else: @@ -366,6 +439,7 @@ class Database: task.reply_queue.sync_q.put(result) async def execute_fn(self, fn): + self._check_not_closed() if self.ds.executor is None: # non-threaded mode if self._read_connection is None: @@ -396,6 +470,7 @@ class Database: log_sql_errors=True, ): """Executes sql against db_name in a thread""" + self._check_not_closed() page_size = page_size or self.ds.page_size def sql_operation_in_thread(conn): diff --git a/docs/internals.rst b/docs/internals.rst index ba9d3131..53c20106 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1830,7 +1830,11 @@ The return value of the function will be returned by this method. Any exceptions db.close() ---------- -Closes all of the open connections to file-backed databases. This is mainly intended to be used by large test suites, to avoid hitting limits on the number of open files. +Release all resources held by this ``Database`` instance. This shuts down the background write thread (if one was started by a previous call to :ref:`database_execute_write_fn` or similar), closes the write connection, and closes any cached read connections. + +After ``db.close()`` has been called, any further call to :ref:`database_execute`, :ref:`database_execute_fn`, :ref:`database_execute_write`, :ref:`database_execute_write_fn`, :ref:`database_execute_write_many`, :ref:`database_execute_write_script` or :ref:`database_execute_isolated_fn` will raise a ``datasette.database.DatasetteClosedError`` exception. + +``close()`` is idempotent — calling it a second time is a no-op. It is one-way: a closed ``Database`` cannot be reopened. .. _internals_database_introspection: diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 0d565d61..8ff74a83 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -4,6 +4,7 @@ Tests for the datasette.database.Database class from datasette.app import Datasette from datasette.database import Database, Results, MultipleValues +from datasette.database import DatasetteClosedError from datasette.utils.sqlite import sqlite3, sqlite_version from datasette.utils import Column import pytest @@ -833,3 +834,58 @@ def test_repr_temp_disk(app_client): assert isinstance(db.size, int) assert isinstance(db.mtime_ns, int) db.close() + + +@pytest.mark.asyncio +async def test_database_close_shuts_down_write_thread(tmpdir): + path = str(tmpdir / "dbclose.db") + conn = sqlite3.connect(path) + conn.execute("create table t (id integer primary key)") + conn.close() + ds = Datasette([path]) + db = ds.get_database("dbclose") + # Trigger write thread creation + await db.execute_write("insert into t (id) values (1)") + assert db._write_thread is not None + assert db._write_thread.is_alive() + db.close() + # Wait briefly for the thread to exit — the sentinel should cause it to return. + db._write_thread.join(timeout=5) + assert not db._write_thread.is_alive() + ds._internal_database.close() + + +@pytest.mark.asyncio +async def test_database_close_raises_on_further_use(tmpdir): + path = str(tmpdir / "closed.db") + conn = sqlite3.connect(path) + conn.execute("create table t (id integer primary key)") + conn.close() + ds = Datasette([path]) + db = ds.get_database("closed") + await db.execute("select 1") + db.close() + with pytest.raises(DatasetteClosedError): + await db.execute("select 1") + with pytest.raises(DatasetteClosedError): + await db.execute_write("insert into t (id) values (1)") + with pytest.raises(DatasetteClosedError): + await db.execute_fn(lambda conn: conn.execute("select 1").fetchone()) + with pytest.raises(DatasetteClosedError): + await db.execute_write_fn(lambda conn: conn.execute("select 1")) + ds._internal_database.close() + + +@pytest.mark.asyncio +async def test_database_close_is_idempotent(tmpdir): + path = str(tmpdir / "idemp.db") + conn = sqlite3.connect(path) + conn.execute("create table t (id integer primary key)") + conn.close() + ds = Datasette([path]) + db = ds.get_database("idemp") + await db.execute_write("insert into t (id) values (1)") + db.close() + # Second call should be a no-op, not raise + db.close() + ds._internal_database.close() From 290f27158f1b53a9a90a36dbfb271ffbb6eef310 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:10:18 -0700 Subject: [PATCH 02/12] Datasette.close() closes databases, shuts down executor, unlinks temp file Datasette.close() iterates over every attached Database (including the internal database), calls Database.close() on each, then shuts down the ThreadPoolExecutor. Exceptions raised by one Database don't prevent the others from being closed; the first exception is re-raised afterwards. Idempotent. Refs #2692 Co-Authored-By: Claude Opus 4.7 (1M context) --- datasette/app.py | 28 +++++++++++++++ docs/internals.rst | 13 +++++++ tests/test_internals_datasette.py | 59 +++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+) diff --git a/datasette/app.py b/datasette/app.py index 0f417ec9..367f38f9 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -326,6 +326,7 @@ class Datasette: default_deny=False, ): self._startup_invoked = False + self._closed = False assert config_dir is None or isinstance( config_dir, Path ), "config_dir= should be a pathlib.Path" @@ -834,6 +835,33 @@ class Datasette: new_databases.pop(name) self.databases = new_databases + def close(self): + """Release all resources held by this Datasette instance. + + Closes every attached Database (including the internal database), + shuts down the executor, and unlinks the temporary file used for + the internal database if one was created. Idempotent and one-way. + """ + if self._closed: + return + self._closed = True + first_exception = None + dbs = list(self.databases.values()) + [self._internal_database] + for db in dbs: + try: + db.close() + except Exception as e: + if first_exception is None: + first_exception = e + if self.executor is not None: + try: + self.executor.shutdown(wait=True, cancel_futures=True) + except Exception as e: + if first_exception is None: + first_exception = e + if first_exception is not None: + raise first_exception + def setting(self, key): return self._settings.get(key, None) diff --git a/docs/internals.rst b/docs/internals.rst index 53c20106..2710345b 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1079,6 +1079,19 @@ The ``name`` and ``route`` parameters are optional and work the same way as they This removes a database that has been previously added. ``name=`` is the unique name of that database. +.. _datasette_close: + +.close() +-------- + +Release all resources held by this ``Datasette`` instance. This calls :ref:`database_close` on every attached database (including the internal database), shuts down the thread pool executor used to run SQL queries, and unlinks the temporary file used to back the internal database if one was created. + +``close()`` is synchronous, idempotent and one-way: after a call to ``close()`` any attempt to use the Datasette instance to execute SQL will raise a ``datasette.database.DatasetteClosedError`` exception. A closed ``Datasette`` cannot be reopened — callers that need a fresh instance should construct a new one. + +If a call to ``Database.close()`` on one of the attached databases raises an exception, ``Datasette.close()`` will continue trying to close the remaining databases and will re-raise the first exception after every database has been processed. + +When Datasette is being served over ASGI the ``close()`` method is wired up to the lifespan shutdown event, so resources are released cleanly on ``SIGTERM`` / ``SIGINT``. + .. _datasette_track_event: await .track_event(event) diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index ec0180a7..5f773658 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -3,8 +3,10 @@ Tests for the datasette.app.Datasette class """ import dataclasses +import os from datasette import Context from datasette.app import Datasette, Database, ResourcesSQL +from datasette.database import DatasetteClosedError from datasette.resources import DatabaseResource from itsdangerous import BadSignature import pytest @@ -213,3 +215,60 @@ async def test_allowed_resources_sql(datasette): assert isinstance(result, ResourcesSQL) assert "all_rules AS" in result.sql assert result.params["action"] == "view-table" + + +@pytest.mark.asyncio +async def test_datasette_close_closes_all_databases_and_executor(): + ds = Datasette(memory=True) + await ds.invoke_startup() + # Confirm internal DB has write machinery running + assert ds._internal_database._write_thread is not None + assert ds._internal_database._write_thread.is_alive() + temp_path = ds._internal_database.path + assert os.path.exists(temp_path) + executor = ds.executor + ds.close() + # Executor is shut down + assert executor._shutdown + # All attached Database instances are closed + for db in ds.databases.values(): + assert db._closed + assert ds._internal_database._closed + # Temp internal DB file is unlinked + assert not os.path.exists(temp_path) + + +@pytest.mark.asyncio +async def test_datasette_close_is_idempotent(): + ds = Datasette(memory=True) + await ds.invoke_startup() + ds.close() + # Second call should be a no-op + ds.close() + + +@pytest.mark.asyncio +async def test_datasette_close_raises_on_use(): + ds = Datasette(memory=True) + await ds.invoke_startup() + ds.close() + with pytest.raises(DatasetteClosedError): + await ds.get_internal_database().execute("select 1") + + +@pytest.mark.asyncio +async def test_datasette_close_continues_past_db_error(): + # If one Database raises during close(), the others still get closed. + ds = Datasette(memory=True) + await ds.invoke_startup() + + class Boom(Database): + def close(self): + raise RuntimeError("boom") + + bad = ds.add_database(Boom(ds, is_memory=True), name="bad") + good = ds.add_database(Database(ds, is_memory=True), name="good") + with pytest.raises(RuntimeError, match="boom"): + ds.close() + assert good._closed + assert ds._internal_database._closed From d72dd3537850988fb24cd53d50c690dc7acb4332 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:11:02 -0700 Subject: [PATCH 03/12] Wire Datasette.close into ASGI lifespan shutdown AsgiLifespan now receives an on_shutdown callback that invokes Datasette.close(), so resources are released cleanly when the ASGI server delivers a lifespan.shutdown message (SIGTERM / SIGINT for uvicorn). Refs #2692 Co-Authored-By: Claude Opus 4.7 (1M context) --- datasette/app.py | 5 ++++- tests/test_internals_datasette.py | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/datasette/app.py b/datasette/app.py index 367f38f9..358081ef 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2338,10 +2338,13 @@ class Datasette: if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) + async def _close_on_shutdown(): + self.close() + asgi = CrossOriginProtectionMiddleware(DatasetteRouter(self, routes), self) if self.setting("trace_debug"): asgi = AsgiTracer(asgi) - asgi = AsgiLifespan(asgi) + asgi = AsgiLifespan(asgi, on_shutdown=[_close_on_shutdown]) asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index 5f773658..11463eda 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -256,6 +256,29 @@ async def test_datasette_close_raises_on_use(): await ds.get_internal_database().execute("select 1") +@pytest.mark.asyncio +async def test_asgi_lifespan_shutdown_closes_datasette(): + ds = Datasette(memory=True) + app = ds.app() + # Drive an ASGI lifespan: startup, then shutdown. + messages_sent = [] + inbox = [ + {"type": "lifespan.startup"}, + {"type": "lifespan.shutdown"}, + ] + + async def receive(): + return inbox.pop(0) + + async def send(message): + messages_sent.append(message) + + await app({"type": "lifespan"}, receive, send) + assert {"type": "lifespan.startup.complete"} in messages_sent + assert {"type": "lifespan.shutdown.complete"} in messages_sent + assert ds._closed + + @pytest.mark.asyncio async def test_datasette_close_continues_past_db_error(): # If one Database raises during close(), the others still get closed. From 34cc320eabb09d7d62f8a6045b868c746adfe9d2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:15:50 -0700 Subject: [PATCH 04/12] Pytest auto-close plugin for Datasette instances Installs a pytest11 entry point so that every Datasette() constructed inside a pytest_runtest_call phase is auto-closed at the end of the test. Fixture-scoped instances are untouched. Opt out via the datasette_autoclose = false ini option. This gives large test suites a safety net against FD exhaustion and leaked write threads from the now-default temp-disk internal database without requiring every existing test to be rewritten. Refs #2692 Co-Authored-By: Claude Opus 4.7 (1M context) --- datasette/_pytest_plugin.py | 78 ++++++++++++++++++++++ docs/testing_plugins.rst | 16 +++++ pyproject.toml | 3 + tests/test_pytest_autoclose_plugin.py | 93 +++++++++++++++++++++++++++ 4 files changed, 190 insertions(+) create mode 100644 datasette/_pytest_plugin.py create mode 100644 tests/test_pytest_autoclose_plugin.py diff --git a/datasette/_pytest_plugin.py b/datasette/_pytest_plugin.py new file mode 100644 index 00000000..3f6c0d96 --- /dev/null +++ b/datasette/_pytest_plugin.py @@ -0,0 +1,78 @@ +""" +Pytest plugin that automatically closes any Datasette instances constructed +inside a test body. Fixture-scoped instances survive. + +Registered as a pytest11 entry point in pyproject.toml so that downstream +projects using Datasette get the same FD-safety net for their own tests. + +Opt out by setting ``datasette_autoclose = false`` in pytest.ini (or the +equivalent ini file). +""" + +from __future__ import annotations + +import contextvars +import weakref + +import pytest + +from datasette.app import Datasette + +_active_instances: contextvars.ContextVar[list | None] = contextvars.ContextVar( + "datasette_active_instances", default=None +) + +_original_init = Datasette.__init__ + + +def _tracking_init(self, *args, **kwargs): + _original_init(self, *args, **kwargs) + instances = _active_instances.get() + if instances is not None: + instances.append(weakref.ref(self)) + + +Datasette.__init__ = _tracking_init + + +def pytest_addoption(parser): + parser.addini( + "datasette_autoclose", + help=( + "Automatically close Datasette instances created inside test " + "bodies (default: true)." + ), + default="true", + ) + + +def _enabled(config) -> bool: + value = config.getini("datasette_autoclose") + if isinstance(value, bool): + return value + return str(value).strip().lower() not in ("false", "0", "no", "off") + + +@pytest.hookimpl(hookwrapper=True) +def pytest_runtest_call(item): + if not _enabled(item.config): + yield + return + refs: list[weakref.ref] = [] + token = _active_instances.set(refs) + try: + yield + finally: + _active_instances.reset(token) + for ref in reversed(refs): + ds = ref() + if ds is None: + continue + try: + ds.close() + except Exception as e: + item.warn( + pytest.PytestUnraisableExceptionWarning( + f"Error closing Datasette instance: {e!r}" + ) + ) diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index 1b10c132..070ab6cf 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -82,6 +82,22 @@ This method registers any :ref:`plugin_hook_startup` or :ref:`plugin_hook_prepar If you are using ``await datasette.client.get()`` and similar methods then you don't need to worry about this - Datasette automatically calls ``invoke_startup()`` the first time it handles a request. +.. _testing_plugins_autoclose: + +Automatic cleanup of Datasette instances +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Installing Datasette also installs a small pytest plugin that automatically calls :ref:`datasette_close` on any ``Datasette()`` instance constructed inside the body of a test function. This helps prevent large test suites from running out of file descriptors or leaking background threads from the hundreds of instances they may build up across a session. + +Instances created inside a pytest fixture are **not** closed by this plugin — pytest fixtures often create a single ``Datasette`` that is shared across many tests, and closing it automatically would break those tests. If you need a per-test instance and want to share it between multiple tests, create it inside a fixture rather than at the top level of a test function. + +If you need to opt out of this behavior, add the following to your ``pytest.ini`` (or equivalent): + +.. code-block:: ini + + [pytest] + datasette_autoclose = false + .. _testing_datasette_client: Using datasette.client in tests diff --git a/pyproject.toml b/pyproject.toml index a0ee050c..4a4ed75e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,9 @@ CI = "https://github.com/simonw/datasette/actions?query=workflow%3ATest" [project.scripts] datasette = "datasette.cli:cli" +[project.entry-points.pytest11] +datasette = "datasette._pytest_plugin" + [dependency-groups] dev = [ "pytest>=9", diff --git a/tests/test_pytest_autoclose_plugin.py b/tests/test_pytest_autoclose_plugin.py new file mode 100644 index 00000000..78154ef5 --- /dev/null +++ b/tests/test_pytest_autoclose_plugin.py @@ -0,0 +1,93 @@ +""" +Tests for datasette._pytest_plugin — the pytest plugin that auto-closes +Datasette instances constructed inside test bodies. + +These tests drive a real pytest session in a subprocess so the plugin +operates exactly as it would for a downstream consumer. +""" + +import subprocess +import sys +import textwrap +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).parent.parent + + +def _run_pytest(tmp_path: Path) -> subprocess.CompletedProcess: + return subprocess.run( + [sys.executable, "-m", "pytest", "-v", str(tmp_path)], + cwd=str(tmp_path), + capture_output=True, + text=True, + ) + + +def test_auto_close_of_instances_made_in_test_body(tmp_path): + # Two ordered tests: + # test_a makes a Datasette() and stashes a hard reference + # test_b asserts that the hard-reffed instance was closed by the plugin + (tmp_path / "test_sample.py").write_text(textwrap.dedent(""" + from datasette.app import Datasette + + _stash = {} + + def test_a(): + ds = Datasette(memory=True) + _stash["ds"] = ds + assert ds._closed is False + + def test_b(): + assert _stash["ds"]._closed is True + """)) + result = _run_pytest(tmp_path) + assert result.returncode == 0, result.stdout + result.stderr + + +def test_fixture_scoped_instance_is_not_closed(tmp_path): + # A module-scoped fixture instance must survive across tests in the module. + (tmp_path / "test_fixture.py").write_text(textwrap.dedent(""" + import pytest + from datasette.app import Datasette + + @pytest.fixture(scope="module") + def ds(): + return Datasette(memory=True) + + def test_first(ds): + assert ds._closed is False + + def test_second(ds): + # Still alive because the plugin only tracks instances + # constructed during pytest_runtest_call, not during fixture + # setup. + assert ds._closed is False + """)) + result = _run_pytest(tmp_path) + assert result.returncode == 0, result.stdout + result.stderr + + +def test_opt_out_via_ini(tmp_path): + # datasette_autoclose = false should leave instances untouched. + (tmp_path / "pytest.ini").write_text(textwrap.dedent(""" + [pytest] + datasette_autoclose = false + """).strip()) + (tmp_path / "test_optout.py").write_text(textwrap.dedent(""" + from datasette.app import Datasette + + _stash = {} + + def test_a(): + ds = Datasette(memory=True) + _stash["ds"] = ds + + def test_b(): + # Opt-out: plugin must not have closed it. + assert _stash["ds"]._closed is False + _stash["ds"].close() + """)) + result = _run_pytest(tmp_path) + assert result.returncode == 0, result.stdout + result.stderr From c0153386ef20126a289da96204718570d571b4b2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:18:05 -0700 Subject: [PATCH 05/12] FD-leak regression test for Datasette.close() Creates and disposes 50 Datasette instances in a loop and asserts that the number of open file descriptors and live threads does not grow, exercising the full close() path end to end. Refs #2692 Co-Authored-By: Claude Opus 4.7 (1M context) --- pyproject.toml | 1 + tests/test_fd_leak.py | 56 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tests/test_fd_leak.py diff --git a/pyproject.toml b/pyproject.toml index 4a4ed75e..e6007afd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ dev = [ "myst-parser", "sphinx-markdown-builder", "ruamel.yaml", + "psutil>=5.9", ] [project.optional-dependencies] diff --git a/tests/test_fd_leak.py b/tests/test_fd_leak.py new file mode 100644 index 00000000..926722a1 --- /dev/null +++ b/tests/test_fd_leak.py @@ -0,0 +1,56 @@ +""" +Regression test for https://github.com/simonw/datasette/issues/2692 — +confirm that creating and closing Datasette instances in a loop does not +leak open file descriptors. + +Each Datasette() with is_temp_disk internal DB opens a temp file and a +write thread with its own SQLite connection. Without Datasette.close() +nothing unwinds this state, and a large pytest run exhausts the process +FD limit. +""" + +import asyncio +import threading + +import pytest + +try: + import psutil +except ImportError: # pragma: no cover + psutil = None + +from datasette.app import Datasette + + +def _count_open_files(): + return len(psutil.Process().open_files()) + + +def _count_threads(): + return threading.active_count() + + +@pytest.mark.skipif(psutil is None, reason="psutil not installed") +def test_close_releases_file_descriptors(): + # Warm-up so Python/library caches don't skew the baseline + ds = Datasette(memory=True) + asyncio.run(ds.invoke_startup()) + ds.close() + + baseline_fds = _count_open_files() + baseline_threads = _count_threads() + + for _ in range(50): + ds = Datasette(memory=True) + asyncio.run(ds.invoke_startup()) + ds.close() + + after_fds = _count_open_files() + after_threads = _count_threads() + + assert ( + after_fds - baseline_fds <= 2 + ), f"Leaked FDs: baseline={baseline_fds}, after=50 iterations={after_fds}" + assert ( + after_threads - baseline_threads <= 2 + ), f"Leaked threads: baseline={baseline_threads}, after={after_threads}" From d23b32c3e57469f0c0de149aee8594205dfdb319 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:25:58 -0700 Subject: [PATCH 06/12] Call ds.close() in more places in tests Refs #2692 --- tests/test_api_write.py | 7 +------ tests/test_column_types.py | 10 ++-------- tests/test_docs_plugins.py | 1 + tests/test_write_wrapper.py | 1 + 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 9ba08848..64f91701 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -22,12 +22,7 @@ def ds_write(tmp_path_factory): ds = Datasette([db_path], immutables=[db_path_immutable]) ds.root_enabled = True yield ds - # Close both setup connections plus any Datasette-managed connections. - db1.close() - db2.close() - for database in ds.databases.values(): - if not database.is_memory: - database.close() + ds.close() def write_token(ds, actor_id="root", permissions=None): diff --git a/tests/test_column_types.py b/tests/test_column_types.py index 6e89acb9..d77f2cf5 100644 --- a/tests/test_column_types.py +++ b/tests/test_column_types.py @@ -52,10 +52,7 @@ def ds_ct(tmp_path_factory): ) ds.root_enabled = True yield ds - db.close() - for database in ds.databases.values(): - if not database.is_memory: - database.close() + ds.close() @pytest.fixture @@ -95,10 +92,7 @@ def ds_ct_editor_permission(tmp_path_factory): ) ds.root_enabled = True yield ds - db.close() - for database in ds.databases.values(): - if not database.is_memory: - database.close() + ds.close() def write_token(ds, actor_id="root", permissions=None): diff --git a/tests/test_docs_plugins.py b/tests/test_docs_plugins.py index c51858d3..613160ac 100644 --- a/tests/test_docs_plugins.py +++ b/tests/test_docs_plugins.py @@ -23,6 +23,7 @@ async def datasette_with_plugin(): yield datasette finally: datasette.pm.unregister(name="undo") + datasette.close() # -- end datasette_with_plugin_fixture -- diff --git a/tests/test_write_wrapper.py b/tests/test_write_wrapper.py index c2ceb344..48c964b4 100644 --- a/tests/test_write_wrapper.py +++ b/tests/test_write_wrapper.py @@ -505,6 +505,7 @@ def ds_with_event_tracking(tmp_path): ds.track_event = recording_track_event yield ds + ds.close() @pytest.mark.asyncio From df96e12737454077c707f469506be0ee96091965 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:32:19 -0700 Subject: [PATCH 07/12] Auto-close Datasette instances from function-scoped fixtures too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The plugin now tracks instances across the full test protocol (setup, call, teardown) and closes all of them at the end — including ones created inside function-scoped pytest fixtures. Session-, module-, class- and package-scoped fixtures are still exempted by subtracting any instances their setup adds from the tracking list. This makes downstream projects like datasette-alerts work at low FD limits without every fixture needing an explicit ds.close() call. Refs #2692 See https://github.com/simonw/datasette/issues/2692#issuecomment-4265072230 Co-Authored-By: Claude Opus 4.7 (1M context) --- datasette/_pytest_plugin.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/datasette/_pytest_plugin.py b/datasette/_pytest_plugin.py index 3f6c0d96..5fb6b473 100644 --- a/datasette/_pytest_plugin.py +++ b/datasette/_pytest_plugin.py @@ -1,6 +1,9 @@ """ Pytest plugin that automatically closes any Datasette instances constructed -inside a test body. Fixture-scoped instances survive. +during a pytest test — both in the test body and in function-scoped +fixtures. Instances constructed by session-, module-, class- or package- +scoped fixtures are left alone, because other tests in the session will +still want to use them. Registered as a pytest11 entry point in pyproject.toml so that downstream projects using Datasette get the same FD-safety net for their own tests. @@ -40,7 +43,7 @@ def pytest_addoption(parser): "datasette_autoclose", help=( "Automatically close Datasette instances created inside test " - "bodies (default: true)." + "bodies and function-scoped fixtures (default: true)." ), default="true", ) @@ -54,7 +57,8 @@ def _enabled(config) -> bool: @pytest.hookimpl(hookwrapper=True) -def pytest_runtest_call(item): +def pytest_runtest_protocol(item, nextitem): + """Track Datasette instances across setup, call and teardown; close at end.""" if not _enabled(item.config): yield return @@ -76,3 +80,29 @@ def pytest_runtest_call(item): f"Error closing Datasette instance: {e!r}" ) ) + + +@pytest.hookimpl(hookwrapper=True) +def pytest_fixture_setup(fixturedef, request): + """Exempt instances created by non-function-scoped fixtures. + + Session-, module-, class- and package-scoped fixtures produce Datasette + instances that must survive beyond the current test — other tests in + the session will still use them. When such a fixture creates one or + more Datasette instances during its setup, we snapshot the tracking + list before the fixture runs and subtract off any instances that were + added during its setup, so they don't get closed at test teardown. + """ + refs = _active_instances.get() + if refs is None: + yield + return + before_ids = {id(ref) for ref in refs} + yield + if fixturedef.scope != "function": + new_refs = [ref for ref in refs if id(ref) not in before_ids] + for new_ref in new_refs: + try: + refs.remove(new_ref) + except ValueError: + pass From ede942a32e65191ccf554d481987f2d42f4a9a92 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:34:48 -0700 Subject: [PATCH 08/12] Fix ruff lints in close-related tests Drop unused `bad = ...` assignment and unused `import pytest`. Refs #2692 Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_internals_datasette.py | 2 +- tests/test_pytest_autoclose_plugin.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index 11463eda..d58c9a29 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -289,7 +289,7 @@ async def test_datasette_close_continues_past_db_error(): def close(self): raise RuntimeError("boom") - bad = ds.add_database(Boom(ds, is_memory=True), name="bad") + ds.add_database(Boom(ds, is_memory=True), name="bad") good = ds.add_database(Database(ds, is_memory=True), name="good") with pytest.raises(RuntimeError, match="boom"): ds.close() diff --git a/tests/test_pytest_autoclose_plugin.py b/tests/test_pytest_autoclose_plugin.py index 78154ef5..3af1aace 100644 --- a/tests/test_pytest_autoclose_plugin.py +++ b/tests/test_pytest_autoclose_plugin.py @@ -11,8 +11,6 @@ import sys import textwrap from pathlib import Path -import pytest - REPO_ROOT = Path(__file__).parent.parent From 03eeeb9d92e3821611931d9fa259811d95b646e8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:38:08 -0700 Subject: [PATCH 09/12] Docs: auto-close plugin now handles function-scoped fixtures Describe the updated scoping rule: instances from test bodies and function-scoped fixtures are closed automatically; session-, module-, class- and package-scoped fixtures are exempt. Refs #2692 --- docs/testing_plugins.rst | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index 070ab6cf..b82a6e0c 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -87,9 +87,18 @@ If you are using ``await datasette.client.get()`` and similar methods then you d Automatic cleanup of Datasette instances ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Installing Datasette also installs a small pytest plugin that automatically calls :ref:`datasette_close` on any ``Datasette()`` instance constructed inside the body of a test function. This helps prevent large test suites from running out of file descriptors or leaking background threads from the hundreds of instances they may build up across a session. +Installing Datasette also installs a small pytest plugin that automatically calls :ref:`datasette_close` on any ``Datasette()`` instance constructed during a test. This helps prevent large test suites from running out of file descriptors or leaking background threads from the hundreds of instances they may build up across a session. -Instances created inside a pytest fixture are **not** closed by this plugin — pytest fixtures often create a single ``Datasette`` that is shared across many tests, and closing it automatically would break those tests. If you need a per-test instance and want to share it between multiple tests, create it inside a fixture rather than at the top level of a test function. +The plugin closes: + +- Instances created in the body of a test function. +- Instances created inside **function-scoped** pytest fixtures (the default scope — ``@pytest.fixture`` with no ``scope=`` argument, or ``scope="function"``). + +The plugin deliberately does **not** close: + +- Instances created inside higher-scoped fixtures (``scope="session"``, ``"module"``, ``"class"`` or ``"package"``). Those fixtures are typically designed to produce a single ``Datasette`` that is shared across many tests, and closing it automatically would break the tests that run after the first. + +In practice this means downstream projects rarely need to call ``ds.close()`` themselves — function-scoped fixtures and inline test code are both covered automatically, while long-lived shared fixtures keep working as before. If you need to opt out of this behavior, add the following to your ``pytest.ini`` (or equivalent): From c9a7dc9be21f586e86ae09e3e55376c5f9a5fd25 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:40:51 -0700 Subject: [PATCH 10/12] Declare ds_client as session-scoped so auto-close plugin spares it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ds_client already caches a single Datasette for the whole session via a module-level _ds_client global, so the declared fixture scope should match. With function scope the auto-close plugin correctly closes it after the first test that uses it, which then breaks every subsequent test that reuses the cached (now-closed) instance — as seen in the CI coverage job, which runs serially rather than under pytest-xdist. Refs #2692 Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 3a3203fd..171a5433 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -53,7 +53,7 @@ def bare_ds(): return Datasette(memory=True) -@pytest_asyncio.fixture +@pytest_asyncio.fixture(scope="session") async def ds_client(): from datasette.app import Datasette from datasette.database import Database From b3001c1e5a5d5d5b2a04daf4a7445023f24806d5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:41:58 -0700 Subject: [PATCH 11/12] Drop redundant _ds_client global now that ds_client is session-scoped Session-scoped fixtures are cached per worker by pytest itself, so the manual _ds_client module global is no longer needed. Refs #2692 Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/conftest.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 171a5433..5f1cc587 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,9 +28,6 @@ UNDOCUMENTED_PERMISSIONS = { "view_document", } -_ds_client = None - - def wait_until_responds(url, timeout=5.0, client=httpx, **kwargs): start = time.time() while time.time() - start < timeout: @@ -60,10 +57,6 @@ async def ds_client(): from .fixtures import CONFIG, METADATA, PLUGINS_DIR import secrets - global _ds_client - if _ds_client is not None: - return _ds_client - ds = Datasette( metadata=METADATA, config=CONFIG, @@ -95,8 +88,7 @@ async def ds_client(): await db.execute_write_fn(prepare) await ds.invoke_startup() - _ds_client = ds.client - return _ds_client + return ds.client def pytest_report_header(config): From 630e557cdb7cce7ac05b4b3f8067990211e5477c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 16 Apr 2026 20:44:21 -0700 Subject: [PATCH 12/12] Ran black --- tests/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/conftest.py b/tests/conftest.py index 5f1cc587..4ea89458 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,6 +28,7 @@ UNDOCUMENTED_PERMISSIONS = { "view_document", } + def wait_until_responds(url, timeout=5.0, client=httpx, **kwargs): start = time.time() while time.time() - start < timeout: