From 1af1041f91a9b91b321078d354132d1df5204660 Mon Sep 17 00:00:00 2001 From: Robert Christie Date: Thu, 3 Feb 2022 01:58:35 +0000 Subject: [PATCH 0001/1218] Jinja template_name should use "/" even on Windows (#1617) Closes #1545. Thanks, Robert Christie --- datasette/app.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 09d7d034..7bdf076c 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1212,9 +1212,10 @@ class DatasetteRouter: else: # Is there a pages/* template matching this path? route_path = request.scope.get("route_path", request.scope["path"]) - template_path = os.path.join("pages", *route_path.split("/")) + ".html" + # Jinja requires template names to use "/" even on Windows + template_name = "pages" + route_path + ".html" try: - template = self.ds.jinja_env.select_template([template_path]) + template = self.ds.jinja_env.select_template([template_name]) except TemplateNotFound: template = None if template is None: From ac239d34ab2de6987afac43f5d38b576b26e9457 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 4 Feb 2022 20:45:13 -0800 Subject: [PATCH 0002/1218] Refactor test_trace into separate test module, refs #1576 --- tests/test_api.py | 51 ------------------------------------------ tests/test_tracer.py | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 51 deletions(-) create mode 100644 tests/test_tracer.py diff --git a/tests/test_api.py b/tests/test_api.py index 9741ffc5..57471af2 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -911,57 +911,6 @@ def test_config_force_https_urls(): assert client.ds._last_request.scheme == "https" -@pytest.mark.parametrize("trace_debug", (True, False)) -def test_trace(trace_debug): - with make_app_client(settings={"trace_debug": trace_debug}) as client: - response = client.get("/fixtures/simple_primary_key.json?_trace=1") - assert response.status == 200 - - data = response.json - if not trace_debug: - assert "_trace" not in data - return - - assert "_trace" in data - trace_info = data["_trace"] - assert isinstance(trace_info["request_duration_ms"], float) - assert isinstance(trace_info["sum_trace_duration_ms"], float) - assert isinstance(trace_info["num_traces"], int) - assert isinstance(trace_info["traces"], list) - traces = trace_info["traces"] - assert len(traces) == trace_info["num_traces"] - for trace in traces: - assert isinstance(trace["type"], str) - assert isinstance(trace["start"], float) - assert isinstance(trace["end"], float) - assert trace["duration_ms"] == (trace["end"] - trace["start"]) * 1000 - assert isinstance(trace["traceback"], list) - assert isinstance(trace["database"], str) - assert isinstance(trace["sql"], str) - assert isinstance(trace.get("params"), (list, dict, None.__class__)) - - sqls = [trace["sql"] for trace in traces if "sql" in trace] - # There should be a mix of different types of SQL statement - expected = ( - "CREATE TABLE ", - "PRAGMA ", - "INSERT OR REPLACE INTO ", - "INSERT INTO", - "select ", - ) - for prefix in expected: - assert any( - sql.startswith(prefix) for sql in sqls - ), "No trace beginning with: {}".format(prefix) - - # Should be at least one executescript - assert any(trace for trace in traces if trace.get("executescript")) - # And at least one executemany - execute_manys = [trace for trace in traces if trace.get("executemany")] - assert execute_manys - assert all(isinstance(trace["count"], int) for trace in execute_manys) - - @pytest.mark.parametrize( "path,status_code", [ diff --git a/tests/test_tracer.py b/tests/test_tracer.py new file mode 100644 index 00000000..20a4427e --- /dev/null +++ b/tests/test_tracer.py @@ -0,0 +1,53 @@ +import pytest +from .fixtures import make_app_client + + +@pytest.mark.parametrize("trace_debug", (True, False)) +def test_trace(trace_debug): + with make_app_client(settings={"trace_debug": trace_debug}) as client: + response = client.get("/fixtures/simple_primary_key.json?_trace=1") + assert response.status == 200 + + data = response.json + if not trace_debug: + assert "_trace" not in data + return + + assert "_trace" in data + trace_info = data["_trace"] + assert isinstance(trace_info["request_duration_ms"], float) + assert isinstance(trace_info["sum_trace_duration_ms"], float) + assert isinstance(trace_info["num_traces"], int) + assert isinstance(trace_info["traces"], list) + traces = trace_info["traces"] + assert len(traces) == trace_info["num_traces"] + for trace in traces: + assert isinstance(trace["type"], str) + assert isinstance(trace["start"], float) + assert isinstance(trace["end"], float) + assert trace["duration_ms"] == (trace["end"] - trace["start"]) * 1000 + assert isinstance(trace["traceback"], list) + assert isinstance(trace["database"], str) + assert isinstance(trace["sql"], str) + assert isinstance(trace.get("params"), (list, dict, None.__class__)) + + sqls = [trace["sql"] for trace in traces if "sql" in trace] + # There should be a mix of different types of SQL statement + expected = ( + "CREATE TABLE ", + "PRAGMA ", + "INSERT OR REPLACE INTO ", + "INSERT INTO", + "select ", + ) + for prefix in expected: + assert any( + sql.startswith(prefix) for sql in sqls + ), "No trace beginning with: {}".format(prefix) + + # Should be at least one executescript + assert any(trace for trace in traces if trace.get("executescript")) + # And at least one executemany + execute_manys = [trace for trace in traces if trace.get("executemany")] + assert execute_manys + assert all(isinstance(trace["count"], int) for trace in execute_manys) From da53e0360da4771ffb56a8e3eb3f7476f3168299 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 4 Feb 2022 21:19:49 -0800 Subject: [PATCH 0003/1218] tracer.trace_child_tasks() for asyncio.gather tracing Also added documentation for datasette.tracer module. Closes #1576 --- datasette/tracer.py | 20 +++++++---- docs/internals.rst | 71 ++++++++++++++++++++++++++++++++++++++ tests/plugins/my_plugin.py | 12 +++++++ tests/test_tracer.py | 15 ++++++++ 4 files changed, 111 insertions(+), 7 deletions(-) diff --git a/datasette/tracer.py b/datasette/tracer.py index 6703f060..fc7338b0 100644 --- a/datasette/tracer.py +++ b/datasette/tracer.py @@ -1,5 +1,6 @@ import asyncio from contextlib import contextmanager +from contextvars import ContextVar from markupsafe import escape import time import json @@ -9,20 +10,25 @@ tracers = {} TRACE_RESERVED_KEYS = {"type", "start", "end", "duration_ms", "traceback"} - -# asyncio.current_task was introduced in Python 3.7: -for obj in (asyncio, asyncio.Task): - current_task = getattr(obj, "current_task", None) - if current_task is not None: - break +trace_task_id = ContextVar("trace_task_id", default=None) def get_task_id(): + current = trace_task_id.get(None) + if current is not None: + return current try: loop = asyncio.get_event_loop() except RuntimeError: return None - return id(current_task(loop=loop)) + return id(asyncio.current_task(loop=loop)) + + +@contextmanager +def trace_child_tasks(): + token = trace_task_id.set(get_task_id()) + yield + trace_task_id.reset(token) @contextmanager diff --git a/docs/internals.rst b/docs/internals.rst index 6a5666fd..a5dbdfb4 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -864,3 +864,74 @@ parse_metadata(content) This function accepts a string containing either JSON or YAML, expected to be of the format described in :ref:`metadata`. It returns a nested Python dictionary representing the parsed data from that string. If the metadata cannot be parsed as either JSON or YAML the function will raise a ``utils.BadMetadataError`` exception. + +.. _internals_tracer + +datasette.tracer +================ + +Running Datasette with ``--setting trace_debug 1`` enables trace debug output, which can then be viewed by adding ``?_trace=1`` to the query string for any page. + +You can see an example of this at the bottom of `latest.datasette.io/fixtures/facetable?_trace=1 `__. The JSON output shows full details of every SQL query that was executed to generate the page. + +The `datasette-pretty-traces `__ plugin can be installed to provide a more readable display of this information. You can see `a demo of that here `__. + +You can add your own custom traces to the JSON output using the ``trace()`` context manager. This takes a string that identifies the type of trace being recorded, and records any keyword arguments as additional JSON keys on the resulting trace object. + +The start and end time, duration and a traceback of where the trace was executed will be automatically attached to the JSON object. + +This example uses trace to record the start, end and duration of any HTTP GET requests made using the function: + +.. code-block:: python + + from datasette.tracer import trace + import httpx + + async def fetch_url(url): + with trace("fetch-url", url=url): + async with httpx.AsyncClient() as client: + return await client.get(url) + +.. _internals_tracer_trace_child_tasks + +Tracing child tasks +------------------- + +If your code uses a mechanism such as ``asyncio.gather()`` to execute code in additional tasks you may find that some of the traces are missing from the display. + +You can use the ``trace_child_tasks()`` context manager to ensure these child tasks are correctly handled. + +.. code-block:: python + + from datasette import tracer + + with tracer.trace_child_tasks(): + results = await asyncio.gather( + # ... async tasks here + ) + +This example uses the :ref:`register_routes() ` plugin hook to add a page at ``/parallel-queries`` which executes two SQL queries in parallel using ``asyncio.gather()`` and returns their results. + +.. code-block:: python + + from datasette import hookimpl + from datasette import tracer + + @hookimpl + def register_routes(): + + async def parallel_queries(datasette): + db = datasette.get_database() + with tracer.trace_child_tasks(): + one, two = await asyncio.gather( + db.execute("select 1"), + db.execute("select 2"), + ) + return Response.json({"one": one.single_value(), "two": two.single_value()}) + + return [ + (r"/parallel-queries$", parallel_queries), + ] + + +Adding ``?_trace=1`` will show that the trace covers both of those child tasks. diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 75c76ea8..610cea17 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -1,5 +1,7 @@ +import asyncio from datasette import hookimpl from datasette.facets import Facet +from datasette import tracer from datasette.utils import path_with_added_args from datasette.utils.asgi import asgi_send_json, Response import base64 @@ -270,6 +272,15 @@ def register_routes(): def asgi_scope(scope): return Response.json(scope, default=repr) + async def parallel_queries(datasette): + db = datasette.get_database() + with tracer.trace_child_tasks(): + one, two = await asyncio.gather( + db.execute("select coalesce(sleep(0.1), 1)"), + db.execute("select coalesce(sleep(0.1), 2)"), + ) + return Response.json({"one": one.single_value(), "two": two.single_value()}) + return [ (r"/one/$", one), (r"/two/(?P.*)$", two), @@ -281,6 +292,7 @@ def register_routes(): (r"/add-message/$", add_message), (r"/render-message/$", render_message), (r"/asgi-scope$", asgi_scope), + (r"/parallel-queries$", parallel_queries), ] diff --git a/tests/test_tracer.py b/tests/test_tracer.py index 20a4427e..ceadee50 100644 --- a/tests/test_tracer.py +++ b/tests/test_tracer.py @@ -51,3 +51,18 @@ def test_trace(trace_debug): execute_manys = [trace for trace in traces if trace.get("executemany")] assert execute_manys assert all(isinstance(trace["count"], int) for trace in execute_manys) + + +def test_trace_parallel_queries(): + with make_app_client(settings={"trace_debug": True}) as client: + response = client.get("/parallel-queries?_trace=1") + assert response.status == 200 + + data = response.json + assert data["one"] == 1 + assert data["two"] == 2 + trace_info = data["_trace"] + traces = [trace for trace in trace_info["traces"] if "sql" in trace] + one, two = traces + # "two" should have started before "one" ended + assert two["start"] < one["end"] From 1c6b297e3ec288cf1f838796df499a9c21c31664 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 4 Feb 2022 21:28:35 -0800 Subject: [PATCH 0004/1218] Link to datasette.tracer from trace_debug docs, refs #1576 --- docs/settings.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/settings.rst b/docs/settings.rst index 7cc4bae0..da06d6a0 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -302,6 +302,8 @@ Some examples: * https://latest.datasette.io/?_trace=1 * https://latest.datasette.io/fixtures/roadside_attractions?_trace=1 +See :ref:`internals_tracer` for details on how to hook into this mechanism as a plugin author. + .. _setting_base_url: base_url From d25b55ab5e4d7368d374ea752b2232755869d40d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 5 Feb 2022 22:32:23 -0800 Subject: [PATCH 0005/1218] Fixed rST warnings --- docs/internals.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index a5dbdfb4..0b010295 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -865,7 +865,7 @@ This function accepts a string containing either JSON or YAML, expected to be of If the metadata cannot be parsed as either JSON or YAML the function will raise a ``utils.BadMetadataError`` exception. -.. _internals_tracer +.. _internals_tracer: datasette.tracer ================ @@ -892,7 +892,7 @@ This example uses trace to record the start, end and duration of any HTTP GET re async with httpx.AsyncClient() as client: return await client.get(url) -.. _internals_tracer_trace_child_tasks +.. _internals_tracer_trace_child_tasks: Tracing child tasks ------------------- From 8a25ea9bcae7ae4c9a4bd99f90c955828ff5676d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 5 Feb 2022 22:34:33 -0800 Subject: [PATCH 0006/1218] Implemented import shortcuts, closes #957 --- datasette/__init__.py | 2 ++ docs/internals.rst | 15 +++++++++++++++ docs/plugin_hooks.rst | 4 +++- tests/plugins/my_plugin.py | 9 +++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/datasette/__init__.py b/datasette/__init__.py index 0e59760a..faa36051 100644 --- a/datasette/__init__.py +++ b/datasette/__init__.py @@ -1,3 +1,5 @@ from datasette.version import __version_info__, __version__ # noqa +from datasette.utils.asgi import Forbidden, NotFound, Response # noqa +from datasette.utils import actor_matches_allow # noqa from .hookspecs import hookimpl # noqa from .hookspecs import hookspec # noqa diff --git a/docs/internals.rst b/docs/internals.rst index 0b010295..632f7d7a 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -935,3 +935,18 @@ This example uses the :ref:`register_routes() ` plugin h Adding ``?_trace=1`` will show that the trace covers both of those child tasks. + +.. _internals_shortcuts: + +Import shortcuts +================ + +The following commonly used symbols can be imported directly from the ``datasette`` module: + +.. code-block:: python + + from datasette import Response + from datasette import Forbidden + from datasette import NotFound + from datasette import hookimpl + from datasette import actor_matches_allow diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 88e1def0..1308b704 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -542,7 +542,7 @@ Return a list of ``(regex, view_function)`` pairs, something like this: .. code-block:: python - from datasette.utils.asgi import Response + from datasette import Response import html @@ -582,6 +582,8 @@ The view function can be a regular function or an ``async def`` function, depend The function can either return a :ref:`internals_response` or it can return nothing and instead respond directly to the request using the ASGI ``send`` function (for advanced uses only). +It can also rase the ``datasette.NotFound`` exception to return a 404 not found error, or the ``datasette.Forbidden`` exception for a 403 forbidden. + See :ref:`writing_plugins_designing_urls` for tips on designing the URL routes used by your plugin. Examples: `datasette-auth-github `__, `datasette-psutil `__ diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 610cea17..1c9b0575 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -300,6 +300,15 @@ def register_routes(): def startup(datasette): datasette._startup_hook_fired = True + # And test some import shortcuts too + from datasette import Response + from datasette import Forbidden + from datasette import NotFound + from datasette import hookimpl + from datasette import actor_matches_allow + + _ = (Response, Forbidden, NotFound, hookimpl, actor_matches_allow) + @hookimpl def canned_queries(datasette, database, actor): From 9b83ff2ee4d3cb5bfc5cb09a3ec99819ac214434 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 5 Feb 2022 22:46:33 -0800 Subject: [PATCH 0007/1218] Fixed spelling of "raise" --- docs/plugin_hooks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 1308b704..a63d441e 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -582,7 +582,7 @@ The view function can be a regular function or an ``async def`` function, depend The function can either return a :ref:`internals_response` or it can return nothing and instead respond directly to the request using the ASGI ``send`` function (for advanced uses only). -It can also rase the ``datasette.NotFound`` exception to return a 404 not found error, or the ``datasette.Forbidden`` exception for a 403 forbidden. +It can also raise the ``datasette.NotFound`` exception to return a 404 not found error, or the ``datasette.Forbidden`` exception for a 403 forbidden. See :ref:`writing_plugins_designing_urls` for tips on designing the URL routes used by your plugin. From d9b508ffaa91f9f1840b366f5d282712d445f16b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 6 Feb 2022 22:30:00 -0800 Subject: [PATCH 0008/1218] @documented decorator plus unit test plus sphinx.ext.autodoc New mechanism for marking datasette.utils functions that should be covered by the documentation, then testing that they have indeed been documented. Also enabled sphinx.ext.autodoc which can now be used to embed the documented versions of those functions. Refs #1176 --- datasette/utils/__init__.py | 16 ++++++++++++++-- docs/conf.py | 2 +- docs/internals.rst | 11 +++++++++++ tests/test_docs.py | 18 +++++++++++++++++- 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index dc4e1c99..610e916f 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -12,6 +12,7 @@ import os import re import shlex import tempfile +import typing import time import types import shutil @@ -59,8 +60,17 @@ Column = namedtuple( "Column", ("cid", "name", "type", "notnull", "default_value", "is_pk", "hidden") ) +functions_marked_as_documented = [] -async def await_me_maybe(value): + +def documented(fn): + functions_marked_as_documented.append(fn) + return fn + + +@documented +async def await_me_maybe(value: typing.Any) -> typing.Any: + "If value is callable, call it. If awaitable, await it. Otherwise return it." if callable(value): value = value() if asyncio.iscoroutine(value): @@ -915,7 +925,9 @@ class BadMetadataError(Exception): pass -def parse_metadata(content): +@documented +def parse_metadata(content: str) -> dict: + "Detects if content is JSON or YAML and parses it appropriately." # content can be JSON or YAML try: return json.loads(content) diff --git a/docs/conf.py b/docs/conf.py index 89009ea9..d114bc52 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,7 +31,7 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.extlinks"] +extensions = ["sphinx.ext.extlinks", "sphinx.ext.autodoc"] extlinks = { "issue": ("https://github.com/simonw/datasette/issues/%s", "#"), diff --git a/docs/internals.rst b/docs/internals.rst index 632f7d7a..12ef5c54 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -865,6 +865,17 @@ This function accepts a string containing either JSON or YAML, expected to be of If the metadata cannot be parsed as either JSON or YAML the function will raise a ``utils.BadMetadataError`` exception. +.. autofunction:: datasette.utils.parse_metadata + +.. _internals_utils_await_me_maybe: + +await_me_maybe(value) +--------------------- + +Utility function for calling ``await`` on a return value if it is awaitable, otherwise returning the value. This is used by Datasette to support plugin hooks that can optionally return awaitable functions. Read more about this function in `The โ€œawait me maybeโ€ pattern for Python asyncio `__. + +.. autofunction:: datasette.utils.await_me_maybe + .. _internals_tracer: datasette.tracer diff --git a/tests/test_docs.py b/tests/test_docs.py index 0d17b8e3..cd5a6c13 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -2,7 +2,7 @@ Tests to ensure certain things are documented. """ from click.testing import CliRunner -from datasette import app +from datasette import app, utils from datasette.cli import cli from datasette.filters import Filters from pathlib import Path @@ -86,3 +86,19 @@ def documented_table_filters(): @pytest.mark.parametrize("filter", [f.key for f in Filters._filters]) def test_table_filters_are_documented(documented_table_filters, filter): assert filter in documented_table_filters + + +@pytest.fixture(scope="session") +def documented_fns(): + internals_rst = (docs_path / "internals.rst").read_text() + # Any line that starts .. _internals_utils_X + lines = internals_rst.split("\n") + prefix = ".. _internals_utils_" + return { + line.split(prefix)[1].split(":")[0] for line in lines if line.startswith(prefix) + } + + +@pytest.mark.parametrize("fn", utils.functions_marked_as_documented) +def test_functions_marked_with_documented_are_documented(documented_fns, fn): + assert fn.__name__ in documented_fns From fdce6f29e19c3c6b477b72f86e187abee9627b92 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 6 Feb 2022 22:38:27 -0800 Subject: [PATCH 0009/1218] Reconfigure ReadTheDocs, refs #1176 --- .readthedocs.yaml | 8 ++++++-- docs/readthedocs-requirements.txt | 1 - 2 files changed, 6 insertions(+), 3 deletions(-) delete mode 100644 docs/readthedocs-requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 70db5313..60b73b30 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,5 +9,9 @@ sphinx: configuration: docs/conf.py python: - install: - - requirements: docs/readthedocs-requirements.txt + version: "3.9" + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/docs/readthedocs-requirements.txt b/docs/readthedocs-requirements.txt deleted file mode 100644 index db1851ad..00000000 --- a/docs/readthedocs-requirements.txt +++ /dev/null @@ -1 +0,0 @@ -docutils<0.19 From 03305ea183b1534bc4cef3a721fe5f3700273b84 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 6 Feb 2022 22:40:47 -0800 Subject: [PATCH 0010/1218] Remove python.version, refs #1176 --- .readthedocs.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 60b73b30..e157fb9c 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,7 +9,6 @@ sphinx: configuration: docs/conf.py python: - version: "3.9" install: - method: pip path: . From 0cd982fc6af45b60e0c9306516dd412ae948c89b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Feb 2022 15:28:46 -0800 Subject: [PATCH 0011/1218] De-duplicate 'datasette db.db db.db', closes #1632 Refs https://github.com/simonw/datasette-publish-fly/pull/12 --- datasette/cli.py | 3 +++ tests/test_cli.py | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/datasette/cli.py b/datasette/cli.py index 9d1b5ee5..61e7ce91 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -549,6 +549,9 @@ def serve( ) ) + # De-duplicate files so 'datasette db.db db.db' only attaches one /db + files = list(dict.fromkeys(files)) + try: ds = Datasette(files, **kwargs) except SpatialiteNotFound: diff --git a/tests/test_cli.py b/tests/test_cli.py index bbc5df30..3fbfdee2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -257,6 +257,7 @@ def test_serve_create(ensure_eventloop, tmpdir): def test_serve_duplicate_database_names(ensure_eventloop, tmpdir): + "'datasette db.db nested/db.db' should attach two databases, /db and /db_2" runner = CliRunner() db_1_path = str(tmpdir / "db.db") nested = tmpdir / "nested" @@ -270,6 +271,17 @@ def test_serve_duplicate_database_names(ensure_eventloop, tmpdir): assert {db["name"] for db in databases} == {"db", "db_2"} +def test_serve_deduplicate_same_database_path(ensure_eventloop, tmpdir): + "'datasette db.db db.db' should only attach one database, /db" + runner = CliRunner() + db_path = str(tmpdir / "db.db") + sqlite3.connect(db_path).execute("vacuum") + result = runner.invoke(cli, [db_path, db_path, "--get", "/-/databases.json"]) + assert result.exit_code == 0, result.output + databases = json.loads(result.output) + assert {db["name"] for db in databases} == {"db"} + + @pytest.mark.parametrize( "filename", ["test-database (1).sqlite", "database (1).sqlite"] ) From fa5fc327adbbf70656ac533912f3fc0526a3873d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Feb 2022 15:32:54 -0800 Subject: [PATCH 0012/1218] Release 0.60.2 Refs #1632 --- datasette/version.py | 2 +- docs/changelog.rst | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index a4e340b3..91224615 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.60" +__version__ = "0.60.2" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index d7e2af39..c58c8444 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,20 @@ Changelog ========= +.. _v0_60.2: + +0.60.2 (2022-02-07) +------------------- + +- Fixed a bug where Datasette would open the same file twice with two different database names if you ran ``datasette file.db file.db``. (:issue:`1632`) + +.. _v0_60.1: + +0.60.1 (2022-01-20) +------------------- + +- Fixed a bug where installation on Python 3.6 stopped working due to a change to an underlying dependency. This release can now be installed on Python 3.6, but is the last release of Datasette that will support anything less than Python 3.7. (:issue:`1609`) + .. _v0_60: 0.60 (2022-01-13) From 5bfd001b55357106dba090c83a1c88912a004665 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Feb 2022 15:42:37 -0800 Subject: [PATCH 0013/1218] Use de-dupe idiom that works with Python 3.6, refs #1632 --- datasette/cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datasette/cli.py b/datasette/cli.py index 61e7ce91..a8da0741 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -550,7 +550,9 @@ def serve( ) # De-duplicate files so 'datasette db.db db.db' only attaches one /db - files = list(dict.fromkeys(files)) + files_seen = set() + deduped_files = [f for f in files if f not in files_seen and not files_seen.add(f)] + files = deduped_files try: ds = Datasette(files, **kwargs) From 1b2f0ab6bbc9274dac1ba5fe126b1d6b8587ea96 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Feb 2022 15:43:20 -0800 Subject: [PATCH 0014/1218] Revert "Use de-dupe idiom that works with Python 3.6, refs #1632" This reverts commit 5bfd001b55357106dba090c83a1c88912a004665. No need for this on the main branch because it doesn't support Python 3.6 any more. --- datasette/cli.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datasette/cli.py b/datasette/cli.py index a8da0741..61e7ce91 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -550,9 +550,7 @@ def serve( ) # De-duplicate files so 'datasette db.db db.db' only attaches one /db - files_seen = set() - deduped_files = [f for f in files if f not in files_seen and not files_seen.add(f)] - files = deduped_files + files = list(dict.fromkeys(files)) try: ds = Datasette(files, **kwargs) From 458f03ad3a454d271f47a643f4530bd8b60ddb76 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 8 Feb 2022 22:32:19 -0800 Subject: [PATCH 0015/1218] More SpatiaLite details on /-/versions, closes #1607 --- datasette/app.py | 12 ++++++++++++ datasette/utils/__init__.py | 32 ++++++++++++++++++++++++++++++++ tests/test_spatialite.py | 21 +++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 tests/test_spatialite.py diff --git a/datasette/app.py b/datasette/app.py index 7bdf076c..8c5480cf 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -46,6 +46,7 @@ from .database import Database, QueryInterrupted from .utils import ( PrefixedUrlString, + SPATIALITE_FUNCTIONS, StartupError, add_cors_headers, async_call_with_supported_arguments, @@ -724,6 +725,17 @@ class Datasette: sqlite_extensions[extension] = None except Exception: pass + # More details on SpatiaLite + if "spatialite" in sqlite_extensions: + spatialite_details = {} + for fn in SPATIALITE_FUNCTIONS: + try: + result = conn.execute("select {}()".format(fn)) + spatialite_details[fn] = result.fetchone()[0] + except Exception as e: + spatialite_details[fn] = {"error": str(e)} + sqlite_extensions["spatialite"] = spatialite_details + # Figure out supported FTS versions fts_versions = [] for fts in ("FTS5", "FTS4", "FTS3"): diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 610e916f..e17b4d7f 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -52,9 +52,41 @@ SPATIALITE_PATHS = ( "/usr/local/lib/mod_spatialite.dylib", "/usr/local/lib/mod_spatialite.so", ) +# Used to display /-/versions.json SpatiaLite information +SPATIALITE_FUNCTIONS = ( + "spatialite_version", + "spatialite_target_cpu", + "check_strict_sql_quoting", + "freexl_version", + "proj_version", + "geos_version", + "rttopo_version", + "libxml2_version", + "HasIconv", + "HasMathSQL", + "HasGeoCallbacks", + "HasProj", + "HasProj6", + "HasGeos", + "HasGeosAdvanced", + "HasGeosTrunk", + "HasGeosReentrant", + "HasGeosOnlyReentrant", + "HasMiniZip", + "HasRtTopo", + "HasLibXML2", + "HasEpsg", + "HasFreeXL", + "HasGeoPackage", + "HasGCP", + "HasTopology", + "HasKNN", + "HasRouting", +) # Length of hash subset used in hashed URLs: HASH_LENGTH = 7 + # Can replace this with Column from sqlite_utils when I add that dependency Column = namedtuple( "Column", ("cid", "name", "type", "notnull", "default_value", "is_pk", "hidden") diff --git a/tests/test_spatialite.py b/tests/test_spatialite.py new file mode 100644 index 00000000..8b98c5d6 --- /dev/null +++ b/tests/test_spatialite.py @@ -0,0 +1,21 @@ +from datasette.app import Datasette +from datasette.utils import find_spatialite, SpatialiteNotFound, SPATIALITE_FUNCTIONS +import pytest + + +def has_spatialite(): + try: + find_spatialite() + return True + except SpatialiteNotFound: + return False + + +@pytest.mark.asyncio +@pytest.mark.skipif(not has_spatialite(), reason="Requires SpatiaLite") +async def test_spatialite_version_info(): + ds = Datasette(sqlite_extensions=["spatialite"]) + response = await ds.client.get("/-/versions.json") + assert response.status_code == 200 + spatialite = response.json()["sqlite"]["extensions"]["spatialite"] + assert set(SPATIALITE_FUNCTIONS) == set(spatialite) From 7d24fd405f3c60e4c852c5d746c91aa2ba23cf5b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 9 Feb 2022 09:47:54 -0800 Subject: [PATCH 0016/1218] datasette-auth-passwords is now an example of register_commands Refs https://github.com/simonw/datasette-auth-passwords/issues/19 --- docs/plugin_hooks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index a63d441e..92cf662f 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -633,7 +633,7 @@ Note that ``register_commands()`` plugins cannot used with the :ref:`--plugins-d pip install -e path/to/my/datasette-plugin -Example: `datasette-verify `_ +Examples: `datasette-auth-passwords `__, `datasette-verify `__ .. _plugin_register_facet_classes: From dd94157f8958bdfe9f45575add934ccf1aba6d63 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 27 Feb 2022 10:04:03 -0800 Subject: [PATCH 0017/1218] Link to tutorials from documentation index page --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index acca943f..a2888822 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,7 +25,7 @@ Datasette is aimed at data journalists, museum curators, archivists, local gover `Explore a demo `__, watch `a presentation about the project `__ or :ref:`getting_started_glitch`. -More examples: https://datasette.io/examples +Interested in learning Datasette? Start with `the official tutorials `__. Support questions, feedback? Join our `GitHub Discussions forum `__. From 5010d1359b9e9db90a5a69a3ca22d12862893e00 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 5 Mar 2022 11:45:04 -0800 Subject: [PATCH 0018/1218] Fix for test failure caused by SQLite 3.37.0+, closes #1647 --- datasette/templates/_table.html | 2 +- tests/test_internals_database.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/datasette/templates/_table.html b/datasette/templates/_table.html index d91a1a57..5332f831 100644 --- a/datasette/templates/_table.html +++ b/datasette/templates/_table.html @@ -4,7 +4,7 @@ {% for column in display_columns %} - + {% if not column.sortable %} {{ column.name }} {% else %} diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index bcecb486..31538a24 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -279,7 +279,15 @@ async def test_table_columns(db, table, expected): @pytest.mark.asyncio async def test_table_column_details(db, table, expected): columns = await db.table_column_details(table) - assert columns == expected + # Convert "type" to lowercase before comparison + # https://github.com/simonw/datasette/issues/1647 + compare_columns = [ + Column( + c.cid, c.name, c.type.lower(), c.notnull, c.default_value, c.is_pk, c.hidden + ) + for c in columns + ] + assert compare_columns == expected @pytest.mark.asyncio From a22ec96c3ac555337eb49121450723a273fb52d1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 5 Mar 2022 17:29:53 -0800 Subject: [PATCH 0019/1218] Update pytest-asyncio requirement from <0.17,>=0.10 to >=0.10,<0.19 (#1631) Updates the requirements on [pytest-asyncio](https://github.com/pytest-dev/pytest-asyncio) to permit the latest version. - [Release notes](https://github.com/pytest-dev/pytest-asyncio/releases) - [Commits](https://github.com/pytest-dev/pytest-asyncio/compare/v0.10.0...v0.18.0) --- updated-dependencies: - dependency-name: pytest-asyncio dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6accaa30..6a097e0f 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ setup( "test": [ "pytest>=5.2.2,<6.3.0", "pytest-xdist>=2.2.1,<2.6", - "pytest-asyncio>=0.10,<0.17", + "pytest-asyncio>=0.10,<0.19", "beautifulsoup4>=4.8.1,<4.11.0", "black==22.1.0", "pytest-timeout>=1.4.2,<2.1", From b21839dd1a005f6269c4e9a9f763195fe7aa9c86 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 5 Mar 2022 17:30:05 -0800 Subject: [PATCH 0020/1218] Update pytest requirement from <6.3.0,>=5.2.2 to >=5.2.2,<7.1.0 (#1629) Updates the requirements on [pytest](https://github.com/pytest-dev/pytest) to permit the latest version. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/5.2.2...7.0.0) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6a097e0f..b9db0700 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ setup( extras_require={ "docs": ["sphinx_rtd_theme", "sphinx-autobuild", "codespell"], "test": [ - "pytest>=5.2.2,<6.3.0", + "pytest>=5.2.2,<7.1.0", "pytest-xdist>=2.2.1,<2.6", "pytest-asyncio>=0.10,<0.19", "beautifulsoup4>=4.8.1,<4.11.0", From 73f2d25f70d741c6b53f7312674c91f0aec83e17 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 5 Mar 2022 17:30:27 -0800 Subject: [PATCH 0021/1218] Update asgiref requirement from <3.5.0,>=3.2.10 to >=3.2.10,<3.6.0 (#1610) Updates the requirements on [asgiref](https://github.com/django/asgiref) to permit the latest version. - [Release notes](https://github.com/django/asgiref/releases) - [Changelog](https://github.com/django/asgiref/blob/main/CHANGELOG.txt) - [Commits](https://github.com/django/asgiref/compare/3.2.10...3.5.0) --- updated-dependencies: - dependency-name: asgiref dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b9db0700..b13f7496 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ setup( include_package_data=True, python_requires=">=3.7", install_requires=[ - "asgiref>=3.2.10,<3.5.0", + "asgiref>=3.2.10,<3.6.0", "click>=7.1.1,<8.1.0", "click-default-group~=1.2.2", "Jinja2>=2.10.3,<3.1.0", From 7b78279b93b6e7a5fce6b53e5a85ca421a801496 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 5 Mar 2022 17:41:49 -0800 Subject: [PATCH 0022/1218] Update pytest-timeout requirement from <2.1,>=1.4.2 to >=1.4.2,<2.2 (#1602) Updates the requirements on [pytest-timeout](https://github.com/pytest-dev/pytest-timeout) to permit the latest version. - [Release notes](https://github.com/pytest-dev/pytest-timeout/releases) - [Commits](https://github.com/pytest-dev/pytest-timeout/compare/1.4.2...2.1.0) --- updated-dependencies: - dependency-name: pytest-timeout dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b13f7496..8e69c2f5 100644 --- a/setup.py +++ b/setup.py @@ -72,7 +72,7 @@ setup( "pytest-asyncio>=0.10,<0.19", "beautifulsoup4>=4.8.1,<4.11.0", "black==22.1.0", - "pytest-timeout>=1.4.2,<2.1", + "pytest-timeout>=1.4.2,<2.2", "trustme>=0.7,<0.10", "cogapp>=3.3.0", ], From 0499f174c063283aa9b589d475a32077aaf7adc5 Mon Sep 17 00:00:00 2001 From: David Larlet <3556+davidbgk@users.noreply.github.com> Date: Sat, 5 Mar 2022 20:58:31 -0500 Subject: [PATCH 0023/1218] Typo in docs about default redirect status code (#1589) --- docs/custom_templates.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/custom_templates.rst b/docs/custom_templates.rst index 3e4eb633..97dea2af 100644 --- a/docs/custom_templates.rst +++ b/docs/custom_templates.rst @@ -428,7 +428,7 @@ You can use the ``custom_redirect(location)`` function to redirect users to anot Now requests to ``http://localhost:8001/datasette`` will result in a redirect. -These redirects are served with a ``301 Found`` status code by default. You can send a ``301 Moved Permanently`` code by passing ``301`` as the second argument to the function: +These redirects are served with a ``302 Found`` status code by default. You can send a ``301 Moved Permanently`` code by passing ``301`` as the second argument to the function: .. code-block:: jinja From de810f49cc57a4f88e4a1553d26c579253ce4531 Mon Sep 17 00:00:00 2001 From: Dan Peterson Date: Sun, 6 Mar 2022 15:39:15 -0400 Subject: [PATCH 0024/1218] Add /opt/homebrew to where spatialite extension can be found (#1649) Helps homebrew on Apple Silicon setups find spatialite without needing a full path. Similar to #1114 Thanks, @danp --- datasette/utils/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index e17b4d7f..133b9bc7 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -51,6 +51,7 @@ SPATIALITE_PATHS = ( "/usr/lib/x86_64-linux-gnu/mod_spatialite.so", "/usr/local/lib/mod_spatialite.dylib", "/usr/local/lib/mod_spatialite.so", + "/opt/homebrew/lib/mod_spatialite.dylib", ) # Used to display /-/versions.json SpatiaLite information SPATIALITE_FUNCTIONS = ( From 1baa030eca375f839f3471237547ab403523e643 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Mar 2022 07:38:29 -0800 Subject: [PATCH 0025/1218] Switch to dash encoding for table/database/row-pk in paths * Dash encoding functions, tests and docs, refs #1439 * dash encoding is now like percent encoding but with dashes * Use dash-encoding for row PKs and ?_next=, refs #1439 * Use dash encoding for table names, refs #1439 * Use dash encoding for database names, too, refs #1439 See also https://simonwillison.net/2022/Mar/5/dash-encoding/ --- datasette/url_builder.py | 10 ++++---- datasette/utils/__init__.py | 41 ++++++++++++++++++++++++++--- datasette/views/base.py | 24 ++++++++--------- datasette/views/table.py | 9 ++++--- docs/internals.rst | 26 +++++++++++++++++++ tests/fixtures.py | 1 + tests/test_api.py | 19 +++++++++++--- tests/test_cli.py | 5 ++-- tests/test_html.py | 50 ++++++++++++++++++++++++------------ tests/test_internals_urls.py | 2 +- tests/test_table_api.py | 7 +++-- tests/test_table_html.py | 12 ++++++--- tests/test_utils.py | 20 ++++++++++++++- 13 files changed, 173 insertions(+), 53 deletions(-) diff --git a/datasette/url_builder.py b/datasette/url_builder.py index 2bcda869..eebfe31e 100644 --- a/datasette/url_builder.py +++ b/datasette/url_builder.py @@ -1,4 +1,4 @@ -from .utils import path_with_format, HASH_LENGTH, PrefixedUrlString +from .utils import dash_encode, path_with_format, HASH_LENGTH, PrefixedUrlString import urllib @@ -31,20 +31,20 @@ class Urls: db = self.ds.databases[database] if self.ds.setting("hash_urls") and db.hash: path = self.path( - f"{urllib.parse.quote(database)}-{db.hash[:HASH_LENGTH]}", format=format + f"{dash_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format ) else: - path = self.path(urllib.parse.quote(database), format=format) + path = self.path(dash_encode(database), format=format) return path def table(self, database, table, format=None): - path = f"{self.database(database)}/{urllib.parse.quote_plus(table)}" + path = f"{self.database(database)}/{dash_encode(table)}" if format is not None: path = path_with_format(path=path, format=format) return PrefixedUrlString(path) def query(self, database, query, format=None): - path = f"{self.database(database)}/{urllib.parse.quote_plus(query)}" + path = f"{self.database(database)}/{dash_encode(query)}" if format is not None: path = path_with_format(path=path, format=format) return PrefixedUrlString(path) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 133b9bc7..79feeef6 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -112,12 +112,12 @@ async def await_me_maybe(value: typing.Any) -> typing.Any: def urlsafe_components(token): - """Splits token on commas and URL decodes each component""" - return [urllib.parse.unquote_plus(b) for b in token.split(",")] + """Splits token on commas and dash-decodes each component""" + return [dash_decode(b) for b in token.split(",")] def path_from_row_pks(row, pks, use_rowid, quote=True): - """Generate an optionally URL-quoted unique identifier + """Generate an optionally dash-quoted unique identifier for a row from its primary keys.""" if use_rowid: bits = [row["rowid"]] @@ -126,7 +126,7 @@ def path_from_row_pks(row, pks, use_rowid, quote=True): row[pk]["value"] if isinstance(row[pk], dict) else row[pk] for pk in pks ] if quote: - bits = [urllib.parse.quote_plus(str(bit)) for bit in bits] + bits = [dash_encode(str(bit)) for bit in bits] else: bits = [str(bit) for bit in bits] @@ -1140,3 +1140,36 @@ def add_cors_headers(headers): headers["Access-Control-Allow-Origin"] = "*" headers["Access-Control-Allow-Headers"] = "Authorization" headers["Access-Control-Expose-Headers"] = "Link" + + +_DASH_ENCODING_SAFE = frozenset( + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"abcdefghijklmnopqrstuvwxyz" + b"0123456789_" + # This is the same as Python percent-encoding but I removed + # '.' and '-' and '~' +) + + +class DashEncoder(dict): + # Keeps a cache internally, via __missing__ + def __missing__(self, b): + # Handle a cache miss, store encoded string in cache and return. + res = chr(b) if b in _DASH_ENCODING_SAFE else "-{:02X}".format(b) + self[b] = res + return res + + +_dash_encoder = DashEncoder().__getitem__ + + +@documented +def dash_encode(s: str) -> str: + "Returns dash-encoded string - for example ``/foo/bar`` -> ``-2Ffoo-2Fbar``" + return "".join(_dash_encoder(char) for char in s.encode("utf-8")) + + +@documented +def dash_decode(s: str) -> str: + "Decodes a dash-encoded string, so ``-2Ffoo-2Fbar`` -> ``/foo/bar``" + return urllib.parse.unquote(s.replace("-", "%")) diff --git a/datasette/views/base.py b/datasette/views/base.py index c74d6141..7cd385b7 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -17,6 +17,8 @@ from datasette.utils import ( InvalidSql, LimitedWriter, call_with_supported_arguments, + dash_decode, + dash_encode, path_from_row_pks, path_with_added_args, path_with_removed_args, @@ -203,17 +205,17 @@ class DataView(BaseView): async def resolve_db_name(self, request, db_name, **kwargs): hash = None name = None - db_name = urllib.parse.unquote_plus(db_name) - if db_name not in self.ds.databases and "-" in db_name: + decoded_name = dash_decode(db_name) + if decoded_name not in self.ds.databases and "-" in db_name: # No matching DB found, maybe it's a name-hash? name_bit, hash_bit = db_name.rsplit("-", 1) - if name_bit not in self.ds.databases: + if dash_decode(name_bit) not in self.ds.databases: raise NotFound(f"Database not found: {name}") else: - name = name_bit + name = dash_decode(name_bit) hash = hash_bit else: - name = db_name + name = decoded_name try: db = self.ds.databases[name] @@ -233,9 +235,7 @@ class DataView(BaseView): return await db.table_exists(t) table, _format = await resolve_table_and_format( - table_and_format=urllib.parse.unquote_plus( - kwargs["table_and_format"] - ), + table_and_format=dash_decode(kwargs["table_and_format"]), table_exists=async_table_exists, allowed_formats=self.ds.renderers.keys(), ) @@ -243,11 +243,11 @@ class DataView(BaseView): if _format: kwargs["as_format"] = f".{_format}" elif kwargs.get("table"): - kwargs["table"] = urllib.parse.unquote_plus(kwargs["table"]) + kwargs["table"] = dash_decode(kwargs["table"]) should_redirect = self.ds.urls.path(f"{name}-{expected}") if kwargs.get("table"): - should_redirect += "/" + urllib.parse.quote_plus(kwargs["table"]) + should_redirect += "/" + dash_encode(kwargs["table"]) if kwargs.get("pk_path"): should_redirect += "/" + kwargs["pk_path"] if kwargs.get("as_format"): @@ -467,7 +467,7 @@ class DataView(BaseView): return await db.table_exists(t) table, _ext_format = await resolve_table_and_format( - table_and_format=urllib.parse.unquote_plus(args["table_and_format"]), + table_and_format=dash_decode(args["table_and_format"]), table_exists=async_table_exists, allowed_formats=self.ds.renderers.keys(), ) @@ -475,7 +475,7 @@ class DataView(BaseView): args["table"] = table del args["table_and_format"] elif "table" in args: - args["table"] = urllib.parse.unquote_plus(args["table"]) + args["table"] = dash_decode(args["table"]) return _format, args async def view_get(self, request, database, hash, correct_hash_provided, **kwargs): diff --git a/datasette/views/table.py b/datasette/views/table.py index be9e9c3b..1d81755e 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -12,6 +12,7 @@ from datasette.utils import ( MultiParams, append_querystring, compound_keys_after_sql, + dash_encode, escape_sqlite, filters_should_redirect, is_url, @@ -142,7 +143,7 @@ class RowTableShared(DataView): '{flat_pks}'.format( base_url=base_url, database=database, - table=urllib.parse.quote_plus(table), + table=dash_encode(table), flat_pks=str(markupsafe.escape(pk_path)), flat_pks_quoted=path_from_row_pks(row, pks, not pks), ) @@ -199,8 +200,8 @@ class RowTableShared(DataView): link_template.format( database=database, base_url=base_url, - table=urllib.parse.quote_plus(other_table), - link_id=urllib.parse.quote_plus(str(value)), + table=dash_encode(other_table), + link_id=dash_encode(str(value)), id=str(markupsafe.escape(value)), label=str(markupsafe.escape(label)) or "-", ) @@ -765,7 +766,7 @@ class TableView(RowTableShared): if prefix is None: prefix = "$null" else: - prefix = urllib.parse.quote_plus(str(prefix)) + prefix = dash_encode(str(prefix)) next_value = f"{prefix},{next_value}" added_args = {"_next": next_value} if sort: diff --git a/docs/internals.rst b/docs/internals.rst index 12ef5c54..d035e1f1 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -876,6 +876,32 @@ Utility function for calling ``await`` on a return value if it is awaitable, oth .. autofunction:: datasette.utils.await_me_maybe +.. _internals_dash_encoding: + +Dash encoding +------------- + +Datasette uses a custom encoding scheme in some places, called **dash encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them. + +Dash encoding uses the same algorithm as `URL percent-encoding `__, but with the ``-`` hyphen character used in place of ``%``. + +Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_`` will be replaced by the numeric equivalent preceded by a hyphen. For example: + +- ``/`` becomes ``-2F`` +- ``.`` becomes ``-2E`` +- ``%`` becomes ``-25`` +- ``-`` becomes ``-2D`` +- Space character becomes ``-20`` +- ``polls/2022.primary`` becomes ``polls-2F2022-2Eprimary`` + +.. _internals_utils_dash_encode: + +.. autofunction:: datasette.utils.dash_encode + +.. _internals_utils_dash_decode: + +.. autofunction:: datasette.utils.dash_decode + .. _internals_tracer: datasette.tracer diff --git a/tests/fixtures.py b/tests/fixtures.py index 26f0cf7b..11f09c41 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -406,6 +406,7 @@ CREATE TABLE compound_primary_key ( ); INSERT INTO compound_primary_key VALUES ('a', 'b', 'c'); +INSERT INTO compound_primary_key VALUES ('a/b', '.c-d', 'c'); CREATE TABLE compound_three_primary_keys ( pk1 varchar(30), diff --git a/tests/test_api.py b/tests/test_api.py index 57471af2..dd916cf0 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -143,7 +143,7 @@ def test_database_page(app_client): "name": "compound_primary_key", "columns": ["pk1", "pk2", "content"], "primary_keys": ["pk1", "pk2"], - "count": 1, + "count": 2, "hidden": False, "fts_table": None, "foreign_keys": {"incoming": [], "outgoing": []}, @@ -942,7 +942,7 @@ def test_cors(app_client_with_cors, path, status_code): ) def test_database_with_space_in_name(app_client_two_attached_databases, path): response = app_client_two_attached_databases.get( - "/extra database" + path, follow_redirects=True + "/extra-20database" + path, follow_redirects=True ) assert response.status == 200 @@ -953,7 +953,7 @@ def test_common_prefix_database_names(app_client_conflicting_database_names): d["name"] for d in app_client_conflicting_database_names.get("/-/databases.json").json ] - for db_name, path in (("foo", "/foo.json"), ("foo-bar", "/foo-bar.json")): + for db_name, path in (("foo", "/foo.json"), ("foo-bar", "/foo-2Dbar.json")): data = app_client_conflicting_database_names.get(path).json assert db_name == data["database"] @@ -992,3 +992,16 @@ async def test_hidden_sqlite_stat1_table(): data = (await ds.client.get("/db.json?_show_hidden=1")).json() tables = [(t["name"], t["hidden"]) for t in data["tables"]] assert tables == [("normal", False), ("sqlite_stat1", True)] + + +@pytest.mark.asyncio +@pytest.mark.parametrize("db_name", ("foo", r"fo%o", "f~/c.d")) +async def test_dash_encoded_database_names(db_name): + ds = Datasette() + ds.add_memory_database(db_name) + response = await ds.client.get("/.json") + assert db_name in response.json().keys() + path = response.json()[db_name]["path"] + # And the JSON for that database + response2 = await ds.client.get(path + ".json") + assert response2.status_code == 200 diff --git a/tests/test_cli.py b/tests/test_cli.py index 3fbfdee2..e30c2ad3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,6 +9,7 @@ from datasette.app import SETTINGS from datasette.plugins import DEFAULT_PLUGINS from datasette.cli import cli, serve from datasette.version import __version__ +from datasette.utils import dash_encode from datasette.utils.sqlite import sqlite3 from click.testing import CliRunner import io @@ -294,12 +295,12 @@ def test_weird_database_names(ensure_eventloop, tmpdir, filename): assert result1.exit_code == 0, result1.output filename_no_stem = filename.rsplit(".", 1)[0] expected_link = '{}'.format( - urllib.parse.quote(filename_no_stem), filename_no_stem + dash_encode(filename_no_stem), filename_no_stem ) assert expected_link in result1.output # Now try hitting that database page result2 = runner.invoke( - cli, [db_path, "--get", "/{}".format(urllib.parse.quote(filename_no_stem))] + cli, [db_path, "--get", "/{}".format(dash_encode(filename_no_stem))] ) assert result2.exit_code == 0, result2.output diff --git a/tests/test_html.py b/tests/test_html.py index d5f4250d..b4a12b8a 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -29,7 +29,7 @@ def test_homepage(app_client_two_attached_databases): ) # Should be two attached databases assert [ - {"href": r"/extra%20database", "text": "extra database"}, + {"href": r"/extra-20database", "text": "extra database"}, {"href": "/fixtures", "text": "fixtures"}, ] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")] # Database should show count text and attached tables @@ -44,8 +44,8 @@ def test_homepage(app_client_two_attached_databases): {"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a") ] assert [ - {"href": r"/extra%20database/searchable", "text": "searchable"}, - {"href": r"/extra%20database/searchable_view", "text": "searchable_view"}, + {"href": r"/extra-20database/searchable", "text": "searchable"}, + {"href": r"/extra-20database/searchable_view", "text": "searchable_view"}, ] == table_links @@ -140,7 +140,7 @@ def test_database_page(app_client): assert queries_ul is not None assert [ ( - "/fixtures/%F0%9D%90%9C%F0%9D%90%A2%F0%9D%90%AD%F0%9D%90%A2%F0%9D%90%9E%F0%9D%90%AC", + "/fixtures/-F0-9D-90-9C-F0-9D-90-A2-F0-9D-90-AD-F0-9D-90-A2-F0-9D-90-9E-F0-9D-90-AC", "๐œ๐ข๐ญ๐ข๐ž๐ฌ", ), ("/fixtures/from_async_hook", "from_async_hook"), @@ -193,11 +193,11 @@ def test_row_redirects_with_url_hash(app_client_with_hash): def test_row_strange_table_name_with_url_hash(app_client_with_hash): - response = app_client_with_hash.get("/fixtures/table%2Fwith%2Fslashes.csv/3") + response = app_client_with_hash.get("/fixtures/table-2Fwith-2Fslashes-2Ecsv/3") assert response.status == 302 - assert response.headers["Location"].endswith("/table%2Fwith%2Fslashes.csv/3") + assert response.headers["Location"].endswith("/table-2Fwith-2Fslashes-2Ecsv/3") response = app_client_with_hash.get( - "/fixtures/table%2Fwith%2Fslashes.csv/3", follow_redirects=True + "/fixtures/table-2Fwith-2Fslashes-2Ecsv/3", follow_redirects=True ) assert response.status == 200 @@ -345,20 +345,38 @@ def test_row_links_from_other_tables(app_client, path, expected_text, expected_l assert link == expected_link -def test_row_html_compound_primary_key(app_client): - response = app_client.get("/fixtures/compound_primary_key/a,b") +@pytest.mark.parametrize( + "path,expected", + ( + ( + "/fixtures/compound_primary_key/a,b", + [ + [ + 'a', + 'b', + 'c', + ] + ], + ), + ( + "/fixtures/compound_primary_key/a-2Fb,-2Ec-2Dd", + [ + [ + 'a/b', + '.c-d', + 'c', + ] + ], + ), + ), +) +def test_row_html_compound_primary_key(app_client, path, expected): + response = app_client.get(path) assert response.status == 200 table = Soup(response.body, "html.parser").find("table") assert ["pk1", "pk2", "content"] == [ th.string.strip() for th in table.select("thead th") ] - expected = [ - [ - 'a', - 'b', - 'c', - ] - ] assert expected == [ [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") ] diff --git a/tests/test_internals_urls.py b/tests/test_internals_urls.py index e486e4c9..16515ad6 100644 --- a/tests/test_internals_urls.py +++ b/tests/test_internals_urls.py @@ -121,7 +121,7 @@ def test_database(ds, base_url, format, expected): ("/", "name", None, "/_memory/name"), ("/prefix/", "name", None, "/prefix/_memory/name"), ("/", "name", "json", "/_memory/name.json"), - ("/", "name.json", "json", "/_memory/name.json?_format=json"), + ("/", "name.json", "json", "/_memory/name-2Ejson.json"), ], ) def test_table_and_query(ds, base_url, name, format, expected): diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 6a6daed5..cc38d392 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -136,7 +136,10 @@ def test_table_shape_object(app_client): def test_table_shape_object_compound_primary_key(app_client): response = app_client.get("/fixtures/compound_primary_key.json?_shape=object") - assert {"a,b": {"pk1": "a", "pk2": "b", "content": "c"}} == response.json + assert response.json == { + "a,b": {"pk1": "a", "pk2": "b", "content": "c"}, + "a-2Fb,-2Ec-2Dd": {"pk1": "a/b", "pk2": ".c-d", "content": "c"}, + } def test_table_with_slashes_in_name(app_client): @@ -308,7 +311,7 @@ def test_sortable(app_client, query_string, sort_key, human_description_en): path = response.json["next_url"] if path: path = path.replace("http://localhost", "") - assert 5 == page + assert page == 5 expected = list(generate_sortable_rows(201)) expected.sort(key=sort_key) assert [r["content"] for r in expected] == [r["content"] for r in fetched] diff --git a/tests/test_table_html.py b/tests/test_table_html.py index 021268c3..77d97d80 100644 --- a/tests/test_table_html.py +++ b/tests/test_table_html.py @@ -563,11 +563,17 @@ def test_table_html_compound_primary_key(app_client): 'a', 'b', 'c', - ] + ], + [ + 'a/b,.c-d', + 'a/b', + '.c-d', + 'c', + ], ] - assert expected == [ + assert [ [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") - ] + ] == expected def test_table_html_foreign_key_links(app_client): diff --git a/tests/test_utils.py b/tests/test_utils.py index e7d67045..1c3ab495 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -93,7 +93,7 @@ def test_path_with_replaced_args(path, args, expected): "row,pks,expected_path", [ ({"A": "foo", "B": "bar"}, ["A", "B"], "foo,bar"), - ({"A": "f,o", "B": "bar"}, ["A", "B"], "f%2Co,bar"), + ({"A": "f,o", "B": "bar"}, ["A", "B"], "f-2Co,bar"), ({"A": 123}, ["A"], "123"), ( utils.CustomRow( @@ -646,3 +646,21 @@ async def test_derive_named_parameters(sql, expected): db = ds.get_database("_memory") params = await utils.derive_named_parameters(db, sql) assert params == expected + + +@pytest.mark.parametrize( + "original,expected", + ( + ("abc", "abc"), + ("/foo/bar", "-2Ffoo-2Fbar"), + ("/-/bar", "-2F-2D-2Fbar"), + ("-/db-/table.csv", "-2D-2Fdb-2D-2Ftable-2Ecsv"), + (r"%~-/", "-25-7E-2D-2F"), + ("-25-7E-2D-2F", "-2D25-2D7E-2D2D-2D2F"), + ), +) +def test_dash_encoding(original, expected): + actual = utils.dash_encode(original) + assert actual == expected + # And test round-trip + assert original == utils.dash_decode(actual) From 644d25d1de78a36b105cca479e7b3e4375a6eadc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Mar 2022 08:01:03 -0800 Subject: [PATCH 0026/1218] Redirect old % URLs to new - encoded URLs, closes #1650 Refs #1439 --- datasette/app.py | 7 +++++++ tests/test_html.py | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/datasette/app.py b/datasette/app.py index 8c5480cf..2907d90e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1211,6 +1211,13 @@ class DatasetteRouter: return await self.handle_404(request, send) async def handle_404(self, request, send, exception=None): + # If path contains % encoding, redirect to dash encoding + if "%" in request.path: + # Try the same path but with "%" replaced by "-" + # and "-" replaced with "-2D" + new_path = request.path.replace("-", "-2D").replace("%", "-") + await asgi_send_redirect(send, new_path) + return # If URL has a trailing slash, redirect to URL without it path = request.scope.get( "raw_path", request.scope["path"].encode("utf8") diff --git a/tests/test_html.py b/tests/test_html.py index b4a12b8a..3e24009e 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -952,3 +952,9 @@ def test_no_alternate_url_json(app_client, path): assert ( ' Date: Mon, 7 Mar 2022 08:09:15 -0800 Subject: [PATCH 0027/1218] asyncio_mode = strict to avoid pytest warnings --- pytest.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/pytest.ini b/pytest.ini index d702ce5f..559e518c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -8,3 +8,4 @@ filterwarnings= ignore:.*current_task.*:PendingDeprecationWarning markers = serial: tests to avoid using with pytest-xdist +asyncio_mode = strict From 020effe47bf89f35182960a9645f2383a42ebd54 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Mar 2022 08:18:07 -0800 Subject: [PATCH 0028/1218] Preserve query string in % to - redirects, refs #1650 --- datasette/app.py | 2 ++ tests/test_html.py | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 2907d90e..7abccc05 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1216,6 +1216,8 @@ class DatasetteRouter: # Try the same path but with "%" replaced by "-" # and "-" replaced with "-2D" new_path = request.path.replace("-", "-2D").replace("%", "-") + if request.query_string: + new_path += "?{}".format(request.query_string) await asgi_send_redirect(send, new_path) return # If URL has a trailing slash, redirect to URL without it diff --git a/tests/test_html.py b/tests/test_html.py index 3e24009e..de703284 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -954,7 +954,18 @@ def test_no_alternate_url_json(app_client, path): ) -def test_redirect_percent_encoding_to_dash_encoding(app_client): - response = app_client.get("/fivethirtyeight/twitter-ratio%2Fsenators") +@pytest.mark.parametrize( + "path,expected", + ( + ( + "/fivethirtyeight/twitter-ratio%2Fsenators", + "/fivethirtyeight/twitter-2Dratio-2Fsenators", + ), + # query string should be preserved + ("/foo/bar%2Fbaz?id=5", "/foo/bar-2Fbaz?id=5"), + ), +) +def test_redirect_percent_encoding_to_dash_encoding(app_client, path, expected): + response = app_client.get(path) assert response.status == 302 - assert response.headers["location"] == "/fivethirtyeight/twitter-2Dratio-2Fsenators" + assert response.headers["location"] == expected From c85d669de387b40e667fd6942c6cc1c15b4f5964 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Mar 2022 11:26:08 -0800 Subject: [PATCH 0029/1218] Fix bug with percentage redirects, close #1650 --- datasette/utils/__init__.py | 7 ++++++- tests/test_html.py | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 79feeef6..e7c9fb1c 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -10,6 +10,7 @@ import markupsafe import mergedeep import os import re +import secrets import shlex import tempfile import typing @@ -1172,4 +1173,8 @@ def dash_encode(s: str) -> str: @documented def dash_decode(s: str) -> str: "Decodes a dash-encoded string, so ``-2Ffoo-2Fbar`` -> ``/foo/bar``" - return urllib.parse.unquote(s.replace("-", "%")) + # Avoid accidentally decoding a %2f style sequence + temp = secrets.token_hex(16) + s = s.replace("%", temp) + decoded = urllib.parse.unquote(s.replace("-", "%")) + return decoded.replace(temp, "%") diff --git a/tests/test_html.py b/tests/test_html.py index de703284..55d78c05 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -961,6 +961,10 @@ def test_no_alternate_url_json(app_client, path): "/fivethirtyeight/twitter-ratio%2Fsenators", "/fivethirtyeight/twitter-2Dratio-2Fsenators", ), + ( + "/fixtures/table%2Fwith%2Fslashes", + "/fixtures/table-2Fwith-2Fslashes", + ), # query string should be preserved ("/foo/bar%2Fbaz?id=5", "/foo/bar-2Fbaz?id=5"), ), From bb499942c15c4e2cfa4b6afab8f8debe5948c009 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Mar 2022 11:33:31 -0800 Subject: [PATCH 0030/1218] Fixed tests for urlsafe_components, refs #1650 --- tests/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 1c3ab495..ff4f649a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -19,8 +19,8 @@ from unittest.mock import patch ("foo", ["foo"]), ("foo,bar", ["foo", "bar"]), ("123,433,112", ["123", "433", "112"]), - ("123%2C433,112", ["123,433", "112"]), - ("123%2F433%2F112", ["123/433/112"]), + ("123-2C433,112", ["123,433", "112"]), + ("123-2F433-2F112", ["123/433/112"]), ], ) def test_urlsafe_components(path, expected): From c5791156d92615f25696ba93dae5bb2dcc192c98 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Mar 2022 14:04:10 -0800 Subject: [PATCH 0031/1218] Code of conduct, refs #1654 --- CODE_OF_CONDUCT.md | 128 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 CODE_OF_CONDUCT.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..14d4c567 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +`swillison+datasette-code-of-conduct@gmail.com`. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. From 239aed182053903ed69108776b6864d42bfe1eb4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 15 Mar 2022 08:36:35 -0700 Subject: [PATCH 0032/1218] Revert "Code of conduct, refs #1654" This reverts commit c5791156d92615f25696ba93dae5bb2dcc192c98. Refs #1658 --- CODE_OF_CONDUCT.md | 128 --------------------------------------------- 1 file changed, 128 deletions(-) delete mode 100644 CODE_OF_CONDUCT.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 14d4c567..00000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,128 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -We as members, contributors, and leaders pledge to make participation in our -community a harassment-free experience for everyone, regardless of age, body -size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, -nationality, personal appearance, race, religion, or sexual identity -and orientation. - -We pledge to act and interact in ways that contribute to an open, welcoming, -diverse, inclusive, and healthy community. - -## Our Standards - -Examples of behavior that contributes to a positive environment for our -community include: - -* Demonstrating empathy and kindness toward other people -* Being respectful of differing opinions, viewpoints, and experiences -* Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, - and learning from the experience -* Focusing on what is best not just for us as individuals, but for the - overall community - -Examples of unacceptable behavior include: - -* The use of sexualized language or imagery, and sexual attention or - advances of any kind -* Trolling, insulting or derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or email - address, without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Enforcement Responsibilities - -Community leaders are responsible for clarifying and enforcing our standards of -acceptable behavior and will take appropriate and fair corrective action in -response to any behavior that they deem inappropriate, threatening, offensive, -or harmful. - -Community leaders have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions that are -not aligned to this Code of Conduct, and will communicate reasons for moderation -decisions when appropriate. - -## Scope - -This Code of Conduct applies within all community spaces, and also applies when -an individual is officially representing the community in public spaces. -Examples of representing our community include using an official e-mail address, -posting via an official social media account, or acting as an appointed -representative at an online or offline event. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported to the community leaders responsible for enforcement at -`swillison+datasette-code-of-conduct@gmail.com`. -All complaints will be reviewed and investigated promptly and fairly. - -All community leaders are obligated to respect the privacy and security of the -reporter of any incident. - -## Enforcement Guidelines - -Community leaders will follow these Community Impact Guidelines in determining -the consequences for any action they deem in violation of this Code of Conduct: - -### 1. Correction - -**Community Impact**: Use of inappropriate language or other behavior deemed -unprofessional or unwelcome in the community. - -**Consequence**: A private, written warning from community leaders, providing -clarity around the nature of the violation and an explanation of why the -behavior was inappropriate. A public apology may be requested. - -### 2. Warning - -**Community Impact**: A violation through a single incident or series -of actions. - -**Consequence**: A warning with consequences for continued behavior. No -interaction with the people involved, including unsolicited interaction with -those enforcing the Code of Conduct, for a specified period of time. This -includes avoiding interactions in community spaces as well as external channels -like social media. Violating these terms may lead to a temporary or -permanent ban. - -### 3. Temporary Ban - -**Community Impact**: A serious violation of community standards, including -sustained inappropriate behavior. - -**Consequence**: A temporary ban from any sort of interaction or public -communication with the community for a specified period of time. No public or -private interaction with the people involved, including unsolicited interaction -with those enforcing the Code of Conduct, is allowed during this period. -Violating these terms may lead to a permanent ban. - -### 4. Permanent Ban - -**Community Impact**: Demonstrating a pattern of violation of community -standards, including sustained inappropriate behavior, harassment of an -individual, or aggression toward or disparagement of classes of individuals. - -**Consequence**: A permanent ban from any sort of public interaction within -the community. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 2.0, available at -https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. - -Community Impact Guidelines were inspired by [Mozilla's code of conduct -enforcement ladder](https://github.com/mozilla/diversity). - -[homepage]: https://www.contributor-covenant.org - -For answers to common questions about this code of conduct, see the FAQ at -https://www.contributor-covenant.org/faq. Translations are available at -https://www.contributor-covenant.org/translations. From 5a353a32b9c4d75acbe3193fd72f735a8e78516a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 15 Mar 2022 08:37:14 -0700 Subject: [PATCH 0033/1218] Revert "Fixed tests for urlsafe_components, refs #1650" This reverts commit bb499942c15c4e2cfa4b6afab8f8debe5948c009. Refs #1658 --- tests/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index ff4f649a..1c3ab495 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -19,8 +19,8 @@ from unittest.mock import patch ("foo", ["foo"]), ("foo,bar", ["foo", "bar"]), ("123,433,112", ["123", "433", "112"]), - ("123-2C433,112", ["123,433", "112"]), - ("123-2F433-2F112", ["123/433/112"]), + ("123%2C433,112", ["123,433", "112"]), + ("123%2F433%2F112", ["123/433/112"]), ], ) def test_urlsafe_components(path, expected): From 77e718c3ffb30473759a8b1ed347f73cb2ff5cfe Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 15 Mar 2022 08:37:31 -0700 Subject: [PATCH 0034/1218] Revert "Fix bug with percentage redirects, close #1650" This reverts commit c85d669de387b40e667fd6942c6cc1c15b4f5964. Refs #1658 --- datasette/utils/__init__.py | 7 +------ tests/test_html.py | 4 ---- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index e7c9fb1c..79feeef6 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -10,7 +10,6 @@ import markupsafe import mergedeep import os import re -import secrets import shlex import tempfile import typing @@ -1173,8 +1172,4 @@ def dash_encode(s: str) -> str: @documented def dash_decode(s: str) -> str: "Decodes a dash-encoded string, so ``-2Ffoo-2Fbar`` -> ``/foo/bar``" - # Avoid accidentally decoding a %2f style sequence - temp = secrets.token_hex(16) - s = s.replace("%", temp) - decoded = urllib.parse.unquote(s.replace("-", "%")) - return decoded.replace(temp, "%") + return urllib.parse.unquote(s.replace("-", "%")) diff --git a/tests/test_html.py b/tests/test_html.py index 55d78c05..de703284 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -961,10 +961,6 @@ def test_no_alternate_url_json(app_client, path): "/fivethirtyeight/twitter-ratio%2Fsenators", "/fivethirtyeight/twitter-2Dratio-2Fsenators", ), - ( - "/fixtures/table%2Fwith%2Fslashes", - "/fixtures/table-2Fwith-2Fslashes", - ), # query string should be preserved ("/foo/bar%2Fbaz?id=5", "/foo/bar-2Fbaz?id=5"), ), From 645381a5ed23c016281e8c6c7d141518f91b67e5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 15 Mar 2022 08:36:35 -0700 Subject: [PATCH 0035/1218] Add code of conduct again Refs #1658 --- CODE_OF_CONDUCT.md | 128 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 CODE_OF_CONDUCT.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..14d4c567 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +`swillison+datasette-code-of-conduct@gmail.com`. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. From c10cd48baf106659bf3f129ad7bfb2226be73821 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Mar 2022 11:56:59 -0800 Subject: [PATCH 0036/1218] Min pytest-asyncio of 0.17 So that the asyncio_mode in pytest.ini does not produce a warning on older versions of that library. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8e69c2f5..e70839d6 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ setup( "test": [ "pytest>=5.2.2,<7.1.0", "pytest-xdist>=2.2.1,<2.6", - "pytest-asyncio>=0.10,<0.19", + "pytest-asyncio>=0.17,<0.19", "beautifulsoup4>=4.8.1,<4.11.0", "black==22.1.0", "pytest-timeout>=1.4.2,<2.2", From a35393b29cfb5b8abdc6a94e577af1c9a5c13652 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 15 Mar 2022 11:01:57 -0700 Subject: [PATCH 0037/1218] Tilde encoding (#1659) Closes #1657 Refs #1439 --- datasette/app.py | 11 +++++---- datasette/url_builder.py | 10 ++++---- datasette/utils/__init__.py | 37 ++++++++++++++++------------- datasette/views/base.py | 25 +++++++++++--------- datasette/views/table.py | 14 +++++++---- docs/csv_export.rst | 18 --------------- docs/internals.rst | 34 +++++++++++++-------------- tests/test_api.py | 17 ++++---------- tests/test_cli.py | 6 ++--- tests/test_html.py | 45 +++++++++++++++++++++--------------- tests/test_internals_urls.py | 2 +- tests/test_table_api.py | 9 +++++--- tests/test_table_html.py | 2 +- tests/test_utils.py | 36 +++++++++-------------------- 14 files changed, 125 insertions(+), 141 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 7abccc05..b39ef7cd 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1211,11 +1211,14 @@ class DatasetteRouter: return await self.handle_404(request, send) async def handle_404(self, request, send, exception=None): - # If path contains % encoding, redirect to dash encoding + # If path contains % encoding, redirect to tilde encoding if "%" in request.path: - # Try the same path but with "%" replaced by "-" - # and "-" replaced with "-2D" - new_path = request.path.replace("-", "-2D").replace("%", "-") + # Try the same path but with "%" replaced by "~" + # and "~" replaced with "~7E" + # and "." replaced with "~2E" + new_path = ( + request.path.replace("~", "~7E").replace("%", "~").replace(".", "~2E") + ) if request.query_string: new_path += "?{}".format(request.query_string) await asgi_send_redirect(send, new_path) diff --git a/datasette/url_builder.py b/datasette/url_builder.py index eebfe31e..9f072462 100644 --- a/datasette/url_builder.py +++ b/datasette/url_builder.py @@ -1,4 +1,4 @@ -from .utils import dash_encode, path_with_format, HASH_LENGTH, PrefixedUrlString +from .utils import tilde_encode, path_with_format, HASH_LENGTH, PrefixedUrlString import urllib @@ -31,20 +31,20 @@ class Urls: db = self.ds.databases[database] if self.ds.setting("hash_urls") and db.hash: path = self.path( - f"{dash_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format + f"{tilde_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format ) else: - path = self.path(dash_encode(database), format=format) + path = self.path(tilde_encode(database), format=format) return path def table(self, database, table, format=None): - path = f"{self.database(database)}/{dash_encode(table)}" + path = f"{self.database(database)}/{tilde_encode(table)}" if format is not None: path = path_with_format(path=path, format=format) return PrefixedUrlString(path) def query(self, database, query, format=None): - path = f"{self.database(database)}/{dash_encode(query)}" + path = f"{self.database(database)}/{tilde_encode(query)}" if format is not None: path = path_with_format(path=path, format=format) return PrefixedUrlString(path) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 79feeef6..bd591459 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -15,6 +15,7 @@ import tempfile import typing import time import types +import secrets import shutil import urllib import yaml @@ -112,12 +113,12 @@ async def await_me_maybe(value: typing.Any) -> typing.Any: def urlsafe_components(token): - """Splits token on commas and dash-decodes each component""" - return [dash_decode(b) for b in token.split(",")] + """Splits token on commas and tilde-decodes each component""" + return [tilde_decode(b) for b in token.split(",")] def path_from_row_pks(row, pks, use_rowid, quote=True): - """Generate an optionally dash-quoted unique identifier + """Generate an optionally tilde-encoded unique identifier for a row from its primary keys.""" if use_rowid: bits = [row["rowid"]] @@ -126,7 +127,7 @@ def path_from_row_pks(row, pks, use_rowid, quote=True): row[pk]["value"] if isinstance(row[pk], dict) else row[pk] for pk in pks ] if quote: - bits = [dash_encode(str(bit)) for bit in bits] + bits = [tilde_encode(str(bit)) for bit in bits] else: bits = [str(bit) for bit in bits] @@ -1142,34 +1143,38 @@ def add_cors_headers(headers): headers["Access-Control-Expose-Headers"] = "Link" -_DASH_ENCODING_SAFE = frozenset( +_TILDE_ENCODING_SAFE = frozenset( b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" b"abcdefghijklmnopqrstuvwxyz" - b"0123456789_" + b"0123456789_-" # This is the same as Python percent-encoding but I removed - # '.' and '-' and '~' + # '.' and '~' ) -class DashEncoder(dict): +class TildeEncoder(dict): # Keeps a cache internally, via __missing__ def __missing__(self, b): # Handle a cache miss, store encoded string in cache and return. - res = chr(b) if b in _DASH_ENCODING_SAFE else "-{:02X}".format(b) + res = chr(b) if b in _TILDE_ENCODING_SAFE else "~{:02X}".format(b) self[b] = res return res -_dash_encoder = DashEncoder().__getitem__ +_tilde_encoder = TildeEncoder().__getitem__ @documented -def dash_encode(s: str) -> str: - "Returns dash-encoded string - for example ``/foo/bar`` -> ``-2Ffoo-2Fbar``" - return "".join(_dash_encoder(char) for char in s.encode("utf-8")) +def tilde_encode(s: str) -> str: + "Returns tilde-encoded string - for example ``/foo/bar`` -> ``~2Ffoo~2Fbar``" + return "".join(_tilde_encoder(char) for char in s.encode("utf-8")) @documented -def dash_decode(s: str) -> str: - "Decodes a dash-encoded string, so ``-2Ffoo-2Fbar`` -> ``/foo/bar``" - return urllib.parse.unquote(s.replace("-", "%")) +def tilde_decode(s: str) -> str: + "Decodes a tilde-encoded string, so ``~2Ffoo~2Fbar`` -> ``/foo/bar``" + # Avoid accidentally decoding a %2f style sequence + temp = secrets.token_hex(16) + s = s.replace("%", temp) + decoded = urllib.parse.unquote(s.replace("~", "%")) + return decoded.replace(temp, "%") diff --git a/datasette/views/base.py b/datasette/views/base.py index 7cd385b7..1c0c3f9b 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -10,6 +10,7 @@ import pint from datasette import __version__ from datasette.database import QueryInterrupted +from datasette.utils.asgi import Request from datasette.utils import ( add_cors_headers, await_me_maybe, @@ -17,8 +18,8 @@ from datasette.utils import ( InvalidSql, LimitedWriter, call_with_supported_arguments, - dash_decode, - dash_encode, + tilde_decode, + tilde_encode, path_from_row_pks, path_with_added_args, path_with_removed_args, @@ -205,14 +206,14 @@ class DataView(BaseView): async def resolve_db_name(self, request, db_name, **kwargs): hash = None name = None - decoded_name = dash_decode(db_name) + decoded_name = tilde_decode(db_name) if decoded_name not in self.ds.databases and "-" in db_name: # No matching DB found, maybe it's a name-hash? name_bit, hash_bit = db_name.rsplit("-", 1) - if dash_decode(name_bit) not in self.ds.databases: + if tilde_decode(name_bit) not in self.ds.databases: raise NotFound(f"Database not found: {name}") else: - name = dash_decode(name_bit) + name = tilde_decode(name_bit) hash = hash_bit else: name = decoded_name @@ -235,7 +236,7 @@ class DataView(BaseView): return await db.table_exists(t) table, _format = await resolve_table_and_format( - table_and_format=dash_decode(kwargs["table_and_format"]), + table_and_format=tilde_decode(kwargs["table_and_format"]), table_exists=async_table_exists, allowed_formats=self.ds.renderers.keys(), ) @@ -243,11 +244,11 @@ class DataView(BaseView): if _format: kwargs["as_format"] = f".{_format}" elif kwargs.get("table"): - kwargs["table"] = dash_decode(kwargs["table"]) + kwargs["table"] = tilde_decode(kwargs["table"]) should_redirect = self.ds.urls.path(f"{name}-{expected}") if kwargs.get("table"): - should_redirect += "/" + dash_encode(kwargs["table"]) + should_redirect += "/" + tilde_encode(kwargs["table"]) if kwargs.get("pk_path"): should_redirect += "/" + kwargs["pk_path"] if kwargs.get("as_format"): @@ -291,6 +292,7 @@ class DataView(BaseView): if not request.args.get(key) ] if extra_parameters: + # Replace request object with a new one with modified scope if not request.query_string: new_query_string = "&".join(extra_parameters) else: @@ -300,7 +302,8 @@ class DataView(BaseView): new_scope = dict( request.scope, query_string=new_query_string.encode("latin-1") ) - request.scope = new_scope + receive = request.receive + request = Request(new_scope, receive) if stream: # Some quick soundness checks if not self.ds.setting("allow_csv_stream"): @@ -467,7 +470,7 @@ class DataView(BaseView): return await db.table_exists(t) table, _ext_format = await resolve_table_and_format( - table_and_format=dash_decode(args["table_and_format"]), + table_and_format=tilde_decode(args["table_and_format"]), table_exists=async_table_exists, allowed_formats=self.ds.renderers.keys(), ) @@ -475,7 +478,7 @@ class DataView(BaseView): args["table"] = table del args["table_and_format"] elif "table" in args: - args["table"] = dash_decode(args["table"]) + args["table"] = tilde_decode(args["table"]) return _format, args async def view_get(self, request, database, hash, correct_hash_provided, **kwargs): diff --git a/datasette/views/table.py b/datasette/views/table.py index 1d81755e..72b8e9a4 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -12,7 +12,8 @@ from datasette.utils import ( MultiParams, append_querystring, compound_keys_after_sql, - dash_encode, + tilde_decode, + tilde_encode, escape_sqlite, filters_should_redirect, is_url, @@ -143,7 +144,7 @@ class RowTableShared(DataView): '{flat_pks}'.format( base_url=base_url, database=database, - table=dash_encode(table), + table=tilde_encode(table), flat_pks=str(markupsafe.escape(pk_path)), flat_pks_quoted=path_from_row_pks(row, pks, not pks), ) @@ -200,8 +201,8 @@ class RowTableShared(DataView): link_template.format( database=database, base_url=base_url, - table=dash_encode(other_table), - link_id=dash_encode(str(value)), + table=tilde_encode(other_table), + link_id=tilde_encode(str(value)), id=str(markupsafe.escape(value)), label=str(markupsafe.escape(label)) or "-", ) @@ -346,6 +347,8 @@ class TableView(RowTableShared): write=bool(canned_query.get("write")), ) + table = tilde_decode(table) + db = self.ds.databases[database] is_view = bool(await db.get_view_definition(table)) table_exists = bool(await db.table_exists(table)) @@ -766,7 +769,7 @@ class TableView(RowTableShared): if prefix is None: prefix = "$null" else: - prefix = dash_encode(str(prefix)) + prefix = tilde_encode(str(prefix)) next_value = f"{prefix},{next_value}" added_args = {"_next": next_value} if sort: @@ -938,6 +941,7 @@ class RowView(RowTableShared): name = "row" async def data(self, request, database, hash, table, pk_path, default_labels=False): + table = tilde_decode(table) await self.check_permissions( request, [ diff --git a/docs/csv_export.rst b/docs/csv_export.rst index b1cc673c..023fa05e 100644 --- a/docs/csv_export.rst +++ b/docs/csv_export.rst @@ -59,21 +59,3 @@ truncation error message. You can increase or remove this limit using the :ref:`setting_max_csv_mb` config setting. You can also disable the CSV export feature entirely using :ref:`setting_allow_csv_stream`. - -A note on URLs --------------- - -The default URL for the CSV representation of a table is that table with -``.csv`` appended to it: - -* https://latest.datasette.io/fixtures/facetable - HTML interface -* https://latest.datasette.io/fixtures/facetable.csv - CSV export -* https://latest.datasette.io/fixtures/facetable.json - JSON API - -This pattern doesn't work for tables with names that already end in ``.csv`` or -``.json``. For those tables, you can instead use the ``_format=`` query string -parameter: - -* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv - HTML interface -* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv?_format=csv - CSV export -* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv?_format=json - JSON API diff --git a/docs/internals.rst b/docs/internals.rst index d035e1f1..3d223603 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -545,7 +545,7 @@ These functions can be accessed via the ``{{ urls }}`` object in Datasette templ facetable table pragma_cache_size query -Use the ``format="json"`` (or ``"csv"`` or other formats supported by plugins) arguments to get back URLs to the JSON representation. This is usually the path with ``.json`` added on the end, but it may use ``?_format=json`` in cases where the path already includes ``.json``, for example a URL to a table named ``table.json``. +Use the ``format="json"`` (or ``"csv"`` or other formats supported by plugins) arguments to get back URLs to the JSON representation. This is the path with ``.json`` added on the end. These methods each return a ``datasette.utils.PrefixedUrlString`` object, which is a subclass of the Python ``str`` type. This allows the logic that considers the ``base_url`` setting to detect if that prefix has already been applied to the path. @@ -876,31 +876,31 @@ Utility function for calling ``await`` on a return value if it is awaitable, oth .. autofunction:: datasette.utils.await_me_maybe -.. _internals_dash_encoding: +.. _internals_tilde_encoding: -Dash encoding -------------- +Tilde encoding +-------------- -Datasette uses a custom encoding scheme in some places, called **dash encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them. +Datasette uses a custom encoding scheme in some places, called **tilde encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them. -Dash encoding uses the same algorithm as `URL percent-encoding `__, but with the ``-`` hyphen character used in place of ``%``. +Tilde encoding uses the same algorithm as `URL percent-encoding `__, but with the ``~`` tilde character used in place of ``%``. -Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_`` will be replaced by the numeric equivalent preceded by a hyphen. For example: +Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example: -- ``/`` becomes ``-2F`` -- ``.`` becomes ``-2E`` -- ``%`` becomes ``-25`` -- ``-`` becomes ``-2D`` -- Space character becomes ``-20`` -- ``polls/2022.primary`` becomes ``polls-2F2022-2Eprimary`` +- ``/`` becomes ``~2F`` +- ``.`` becomes ``~2E`` +- ``%`` becomes ``~25`` +- ``~`` becomes ``~7E`` +- Space character becomes ``~20`` +- ``polls/2022.primary`` becomes ``polls~2F2022~2Eprimary`` -.. _internals_utils_dash_encode: +.. _internals_utils_tilde_encode: -.. autofunction:: datasette.utils.dash_encode +.. autofunction:: datasette.utils.tilde_encode -.. _internals_utils_dash_decode: +.. _internals_utils_tilde_decode: -.. autofunction:: datasette.utils.dash_decode +.. autofunction:: datasette.utils.tilde_decode .. _internals_tracer: diff --git a/tests/test_api.py b/tests/test_api.py index dd916cf0..87d91e56 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -679,18 +679,9 @@ def test_row(app_client): assert [{"id": "1", "content": "hello"}] == response.json["rows"] -def test_row_format_in_querystring(app_client): - # regression test for https://github.com/simonw/datasette/issues/563 - response = app_client.get( - "/fixtures/simple_primary_key/1?_format=json&_shape=objects" - ) - assert response.status == 200 - assert [{"id": "1", "content": "hello"}] == response.json["rows"] - - def test_row_strange_table_name(app_client): response = app_client.get( - "/fixtures/table%2Fwith%2Fslashes.csv/3.json?_shape=objects" + "/fixtures/table~2Fwith~2Fslashes~2Ecsv/3.json?_shape=objects" ) assert response.status == 200 assert [{"pk": "3", "content": "hey"}] == response.json["rows"] @@ -942,7 +933,7 @@ def test_cors(app_client_with_cors, path, status_code): ) def test_database_with_space_in_name(app_client_two_attached_databases, path): response = app_client_two_attached_databases.get( - "/extra-20database" + path, follow_redirects=True + "/extra~20database" + path, follow_redirects=True ) assert response.status == 200 @@ -953,7 +944,7 @@ def test_common_prefix_database_names(app_client_conflicting_database_names): d["name"] for d in app_client_conflicting_database_names.get("/-/databases.json").json ] - for db_name, path in (("foo", "/foo.json"), ("foo-bar", "/foo-2Dbar.json")): + for db_name, path in (("foo", "/foo.json"), ("foo-bar", "/foo-bar.json")): data = app_client_conflicting_database_names.get(path).json assert db_name == data["database"] @@ -996,7 +987,7 @@ async def test_hidden_sqlite_stat1_table(): @pytest.mark.asyncio @pytest.mark.parametrize("db_name", ("foo", r"fo%o", "f~/c.d")) -async def test_dash_encoded_database_names(db_name): +async def test_tilde_encoded_database_names(db_name): ds = Datasette() ds.add_memory_database(db_name) response = await ds.client.get("/.json") diff --git a/tests/test_cli.py b/tests/test_cli.py index e30c2ad3..5afe72c1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,7 +9,7 @@ from datasette.app import SETTINGS from datasette.plugins import DEFAULT_PLUGINS from datasette.cli import cli, serve from datasette.version import __version__ -from datasette.utils import dash_encode +from datasette.utils import tilde_encode from datasette.utils.sqlite import sqlite3 from click.testing import CliRunner import io @@ -295,12 +295,12 @@ def test_weird_database_names(ensure_eventloop, tmpdir, filename): assert result1.exit_code == 0, result1.output filename_no_stem = filename.rsplit(".", 1)[0] expected_link = '{}'.format( - dash_encode(filename_no_stem), filename_no_stem + tilde_encode(filename_no_stem), filename_no_stem ) assert expected_link in result1.output # Now try hitting that database page result2 = runner.invoke( - cli, [db_path, "--get", "/{}".format(dash_encode(filename_no_stem))] + cli, [db_path, "--get", "/{}".format(tilde_encode(filename_no_stem))] ) assert result2.exit_code == 0, result2.output diff --git a/tests/test_html.py b/tests/test_html.py index de703284..76a8423a 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -29,7 +29,7 @@ def test_homepage(app_client_two_attached_databases): ) # Should be two attached databases assert [ - {"href": r"/extra-20database", "text": "extra database"}, + {"href": "/extra~20database", "text": "extra database"}, {"href": "/fixtures", "text": "fixtures"}, ] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")] # Database should show count text and attached tables @@ -44,8 +44,8 @@ def test_homepage(app_client_two_attached_databases): {"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a") ] assert [ - {"href": r"/extra-20database/searchable", "text": "searchable"}, - {"href": r"/extra-20database/searchable_view", "text": "searchable_view"}, + {"href": r"/extra~20database/searchable", "text": "searchable"}, + {"href": r"/extra~20database/searchable_view", "text": "searchable_view"}, ] == table_links @@ -139,15 +139,15 @@ def test_database_page(app_client): queries_ul = soup.find("h2", text="Queries").find_next_sibling("ul") assert queries_ul is not None assert [ - ( - "/fixtures/-F0-9D-90-9C-F0-9D-90-A2-F0-9D-90-AD-F0-9D-90-A2-F0-9D-90-9E-F0-9D-90-AC", - "๐œ๐ข๐ญ๐ข๐ž๐ฌ", - ), ("/fixtures/from_async_hook", "from_async_hook"), ("/fixtures/from_hook", "from_hook"), ("/fixtures/magic_parameters", "magic_parameters"), ("/fixtures/neighborhood_search#fragment-goes-here", "Search neighborhoods"), ("/fixtures/pragma_cache_size", "pragma_cache_size"), + ( + "/fixtures/~F0~9D~90~9C~F0~9D~90~A2~F0~9D~90~AD~F0~9D~90~A2~F0~9D~90~9E~F0~9D~90~AC", + "๐œ๐ข๐ญ๐ข๐ž๐ฌ", + ), ] == sorted( [(a["href"], a.text) for a in queries_ul.find_all("a")], key=lambda p: p[0] ) @@ -193,11 +193,11 @@ def test_row_redirects_with_url_hash(app_client_with_hash): def test_row_strange_table_name_with_url_hash(app_client_with_hash): - response = app_client_with_hash.get("/fixtures/table-2Fwith-2Fslashes-2Ecsv/3") + response = app_client_with_hash.get("/fixtures/table~2Fwith~2Fslashes~2Ecsv/3") assert response.status == 302 - assert response.headers["Location"].endswith("/table-2Fwith-2Fslashes-2Ecsv/3") + assert response.headers["Location"].endswith("/table~2Fwith~2Fslashes~2Ecsv/3") response = app_client_with_hash.get( - "/fixtures/table-2Fwith-2Fslashes-2Ecsv/3", follow_redirects=True + "/fixtures/table~2Fwith~2Fslashes~2Ecsv/3", follow_redirects=True ) assert response.status == 200 @@ -229,7 +229,7 @@ def test_row_page_does_not_truncate(): ["query", "db-fixtures", "query-neighborhood_search"], ), ( - "/fixtures/table%2Fwith%2Fslashes.csv", + "/fixtures/table~2Fwith~2Fslashes~2Ecsv", ["table", "db-fixtures", "table-tablewithslashescsv-fa7563"], ), ( @@ -255,7 +255,7 @@ def test_css_classes_on_body(app_client, path, expected_classes): "table-fixtures-simple_primary_key.html, *table.html", ), ( - "/fixtures/table%2Fwith%2Fslashes.csv", + "/fixtures/table~2Fwith~2Fslashes~2Ecsv", "table-fixtures-tablewithslashescsv-fa7563.html, *table.html", ), ( @@ -359,7 +359,7 @@ def test_row_links_from_other_tables(app_client, path, expected_text, expected_l ], ), ( - "/fixtures/compound_primary_key/a-2Fb,-2Ec-2Dd", + "/fixtures/compound_primary_key/a~2Fb,~2Ec~2Dd", [ [ 'a/b', @@ -816,7 +816,8 @@ def test_base_url_affects_metadata_extra_css_urls(app_client_base_url_prefix): ), ("/fixtures/pragma_cache_size", None), ( - "/fixtures/๐œ๐ข๐ญ๐ข๐ž๐ฌ", + # /fixtures/๐œ๐ข๐ญ๐ข๐ž๐ฌ + "/fixtures/~F0~9D~90~9C~F0~9D~90~A2~F0~9D~90~AD~F0~9D~90~A2~F0~9D~90~9E~F0~9D~90~AC", "/fixtures?sql=select+id%2C+name+from+facet_cities+order+by+id+limit+1%3B", ), ("/fixtures/magic_parameters", None), @@ -824,6 +825,7 @@ def test_base_url_affects_metadata_extra_css_urls(app_client_base_url_prefix): ) def test_edit_sql_link_on_canned_queries(app_client, path, expected): response = app_client.get(path) + assert response.status == 200 expected_link = f'Edit SQL' if expected: assert expected_link in response.text @@ -898,8 +900,8 @@ def test_trace_correctly_escaped(app_client): # Table page ("/fixtures/facetable", "http://localhost/fixtures/facetable.json"), ( - "/fixtures/table%2Fwith%2Fslashes.csv", - "http://localhost/fixtures/table%2Fwith%2Fslashes.csv?_format=json", + "/fixtures/table~2Fwith~2Fslashes~2Ecsv", + "http://localhost/fixtures/table~2Fwith~2Fslashes~2Ecsv.json", ), # Row page ( @@ -930,6 +932,7 @@ def test_trace_correctly_escaped(app_client): ) def test_alternate_url_json(app_client, path, expected): response = app_client.get(path) + assert response.status == 200 link = response.headers["link"] assert link == '{}; rel="alternate"; type="application/json+datasette"'.format( expected @@ -959,13 +962,17 @@ def test_no_alternate_url_json(app_client, path): ( ( "/fivethirtyeight/twitter-ratio%2Fsenators", - "/fivethirtyeight/twitter-2Dratio-2Fsenators", + "/fivethirtyeight/twitter-ratio~2Fsenators", + ), + ( + "/fixtures/table%2Fwith%2Fslashes.csv", + "/fixtures/table~2Fwith~2Fslashes~2Ecsv", ), # query string should be preserved - ("/foo/bar%2Fbaz?id=5", "/foo/bar-2Fbaz?id=5"), + ("/foo/bar%2Fbaz?id=5", "/foo/bar~2Fbaz?id=5"), ), ) -def test_redirect_percent_encoding_to_dash_encoding(app_client, path, expected): +def test_redirect_percent_encoding_to_tilde_encoding(app_client, path, expected): response = app_client.get(path) assert response.status == 302 assert response.headers["location"] == expected diff --git a/tests/test_internals_urls.py b/tests/test_internals_urls.py index 16515ad6..4307789c 100644 --- a/tests/test_internals_urls.py +++ b/tests/test_internals_urls.py @@ -121,7 +121,7 @@ def test_database(ds, base_url, format, expected): ("/", "name", None, "/_memory/name"), ("/prefix/", "name", None, "/prefix/_memory/name"), ("/", "name", "json", "/_memory/name.json"), - ("/", "name.json", "json", "/_memory/name-2Ejson.json"), + ("/", "name.json", "json", "/_memory/name~2Ejson.json"), ], ) def test_table_and_query(ds, base_url, name, format, expected): diff --git a/tests/test_table_api.py b/tests/test_table_api.py index cc38d392..3ab369b3 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -138,13 +138,13 @@ def test_table_shape_object_compound_primary_key(app_client): response = app_client.get("/fixtures/compound_primary_key.json?_shape=object") assert response.json == { "a,b": {"pk1": "a", "pk2": "b", "content": "c"}, - "a-2Fb,-2Ec-2Dd": {"pk1": "a/b", "pk2": ".c-d", "content": "c"}, + "a~2Fb,~2Ec-d": {"pk1": "a/b", "pk2": ".c-d", "content": "c"}, } def test_table_with_slashes_in_name(app_client): response = app_client.get( - "/fixtures/table%2Fwith%2Fslashes.csv?_shape=objects&_format=json" + "/fixtures/table~2Fwith~2Fslashes~2Ecsv.json?_shape=objects" ) assert response.status == 200 data = response.json @@ -1032,7 +1032,10 @@ def test_infinity_returned_as_invalid_json_if_requested(app_client): def test_custom_query_with_unicode_characters(app_client): - response = app_client.get("/fixtures/๐œ๐ข๐ญ๐ข๐ž๐ฌ.json?_shape=array") + # /fixtures/๐œ๐ข๐ญ๐ข๐ž๐ฌ.json + response = app_client.get( + "/fixtures/~F0~9D~90~9C~F0~9D~90~A2~F0~9D~90~AD~F0~9D~90~A2~F0~9D~90~9E~F0~9D~90~AC.json?_shape=array" + ) assert [{"id": 1, "name": "San Francisco"}] == response.json diff --git a/tests/test_table_html.py b/tests/test_table_html.py index 77d97d80..d40f017a 100644 --- a/tests/test_table_html.py +++ b/tests/test_table_html.py @@ -565,7 +565,7 @@ def test_table_html_compound_primary_key(app_client): 'c', ], [ - 'a/b,.c-d', + 'a/b,.c-d', 'a/b', '.c-d', 'c', diff --git a/tests/test_utils.py b/tests/test_utils.py index 1c3ab495..790aadc7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -19,8 +19,8 @@ from unittest.mock import patch ("foo", ["foo"]), ("foo,bar", ["foo", "bar"]), ("123,433,112", ["123", "433", "112"]), - ("123%2C433,112", ["123,433", "112"]), - ("123%2F433%2F112", ["123/433/112"]), + ("123~2C433,112", ["123,433", "112"]), + ("123~2F433~2F112", ["123/433/112"]), ], ) def test_urlsafe_components(path, expected): @@ -93,7 +93,7 @@ def test_path_with_replaced_args(path, args, expected): "row,pks,expected_path", [ ({"A": "foo", "B": "bar"}, ["A", "B"], "foo,bar"), - ({"A": "f,o", "B": "bar"}, ["A", "B"], "f-2Co,bar"), + ({"A": "f,o", "B": "bar"}, ["A", "B"], "f~2Co,bar"), ({"A": 123}, ["A"], "123"), ( utils.CustomRow( @@ -393,9 +393,7 @@ def test_table_columns(): ("/foo?sql=select+1", "json", {}, "/foo.json?sql=select+1"), ("/foo/bar", "json", {}, "/foo/bar.json"), ("/foo/bar", "csv", {}, "/foo/bar.csv"), - ("/foo/bar.csv", "json", {}, "/foo/bar.csv?_format=json"), ("/foo/bar", "csv", {"_dl": 1}, "/foo/bar.csv?_dl=1"), - ("/foo/b.csv", "json", {"_dl": 1}, "/foo/b.csv?_dl=1&_format=json"), ( "/sf-trees/Street_Tree_List?_search=cherry&_size=1000", "csv", @@ -410,18 +408,6 @@ def test_path_with_format(path, format, extra_qs, expected): assert expected == actual -def test_path_with_format_replace_format(): - request = Request.fake("/foo/bar.csv") - assert ( - utils.path_with_format(request=request, format="blob") - == "/foo/bar.csv?_format=blob" - ) - assert ( - utils.path_with_format(request=request, format="blob", replace_format="csv") - == "/foo/bar.blob" - ) - - @pytest.mark.parametrize( "bytes,expected", [ @@ -652,15 +638,15 @@ async def test_derive_named_parameters(sql, expected): "original,expected", ( ("abc", "abc"), - ("/foo/bar", "-2Ffoo-2Fbar"), - ("/-/bar", "-2F-2D-2Fbar"), - ("-/db-/table.csv", "-2D-2Fdb-2D-2Ftable-2Ecsv"), - (r"%~-/", "-25-7E-2D-2F"), - ("-25-7E-2D-2F", "-2D25-2D7E-2D2D-2D2F"), + ("/foo/bar", "~2Ffoo~2Fbar"), + ("/-/bar", "~2F-~2Fbar"), + ("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"), + (r"%~-/", "~25~7E-~2F"), + ("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"), ), ) -def test_dash_encoding(original, expected): - actual = utils.dash_encode(original) +def test_tilde_encoding(original, expected): + actual = utils.tilde_encode(original) assert actual == expected # And test round-trip - assert original == utils.dash_decode(actual) + assert original == utils.tilde_decode(actual) From 77a904fea14f743560af9cc668146339bdbbd0a9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 15 Mar 2022 11:03:01 -0700 Subject: [PATCH 0038/1218] Update pytest requirement from <7.1.0,>=5.2.2 to >=5.2.2,<7.2.0 (#1656) Updates the requirements on [pytest](https://github.com/pytest-dev/pytest) to permit the latest version. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/5.2.2...7.1.0) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e70839d6..4b58b8c4 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ setup( extras_require={ "docs": ["sphinx_rtd_theme", "sphinx-autobuild", "codespell"], "test": [ - "pytest>=5.2.2,<7.1.0", + "pytest>=5.2.2,<7.2.0", "pytest-xdist>=2.2.1,<2.6", "pytest-asyncio>=0.17,<0.19", "beautifulsoup4>=4.8.1,<4.11.0", From 30e5f0e67c38054a8087a2a4eae3fc4d1779af90 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 17 Mar 2022 14:30:02 -0700 Subject: [PATCH 0039/1218] Documented internals used by datasette-hashed-urls Closes #1663 --- docs/internals.rst | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/internals.rst b/docs/internals.rst index 3d223603..117cb95c 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -217,12 +217,18 @@ You can create your own instance of this - for example to help write tests for a } }) +Constructor parameters include: + +- ``files=[...]`` - a list of database files to open +- ``immutables=[...]`` - a list of database files to open in immutable mode +- ``metadata={...}`` - a dictionary of :ref:`metadata` + .. _datasette_databases: .databases ---------- -Property exposing an ordered dictionary of databases currently connected to Datasette. +Property exposing a ``collections.OrderedDict`` of databases currently connected to Datasette. The dictionary keys are the name of the database that is used in the URL - e.g. ``/fixtures`` would have a key of ``"fixtures"``. The values are :ref:`internals_database` instances. @@ -582,6 +588,13 @@ The arguments are as follows: The first argument is the ``datasette`` instance you are attaching to, the second is a ``path=``, then ``is_mutable`` and ``is_memory`` are both optional arguments. +.. _database_hash: + +db.hash +------- + +If the database was opened in immutable mode, this property returns the 64 character SHA-256 hash of the database contents as a string. Otherwise it returns ``None``. + .. _database_execute: await db.execute(sql, ...) From d4f60c2388c01ddce1b16f95c16d310e037c9912 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Mar 2022 17:12:03 -0700 Subject: [PATCH 0040/1218] Remove hashed URL mode Also simplified how view class routing works. Refs #1661 --- datasette/app.py | 2 +- datasette/views/base.py | 153 ++++++----------------------------- datasette/views/database.py | 19 +++-- datasette/views/index.py | 3 +- datasette/views/special.py | 3 +- datasette/views/table.py | 34 ++++---- tests/fixtures.py | 6 -- tests/test_api.py | 29 ------- tests/test_custom_pages.py | 42 +++++----- tests/test_html.py | 28 ------- tests/test_internals_urls.py | 18 ----- tests/test_table_api.py | 8 -- 12 files changed, 79 insertions(+), 266 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index b39ef7cd..3099ada7 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1097,7 +1097,7 @@ class Datasette: ) add_route( TableView.as_view(self), - r"/(?P[^/]+)/(?P[^/]+?$)", + r"/(?P[^/]+)/(?P[^\/\.]+)(\.[a-zA-Z0-9_]+)?$", ) add_route( RowView.as_view(self), diff --git a/datasette/views/base.py b/datasette/views/base.py index 1c0c3f9b..e31beb19 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -122,11 +122,11 @@ class BaseView: async def delete(self, request, *args, **kwargs): return Response.text("Method not allowed", status=405) - async def dispatch_request(self, request, *args, **kwargs): + async def dispatch_request(self, request): if self.ds: await self.ds.refresh_schemas() handler = getattr(self, request.method.lower(), None) - return await handler(request, *args, **kwargs) + return await handler(request) async def render(self, templates, request, context=None): context = context or {} @@ -169,9 +169,7 @@ class BaseView: def as_view(cls, *class_args, **class_kwargs): async def view(request, send): self = view.view_class(*class_args, **class_kwargs) - return await self.dispatch_request( - request, **request.scope["url_route"]["kwargs"] - ) + return await self.dispatch_request(request) view.view_class = cls view.__doc__ = cls.__doc__ @@ -200,90 +198,14 @@ class DataView(BaseView): add_cors_headers(r.headers) return r - async def data(self, request, database, hash, **kwargs): + async def data(self, request): raise NotImplementedError - async def resolve_db_name(self, request, db_name, **kwargs): - hash = None - name = None - decoded_name = tilde_decode(db_name) - if decoded_name not in self.ds.databases and "-" in db_name: - # No matching DB found, maybe it's a name-hash? - name_bit, hash_bit = db_name.rsplit("-", 1) - if tilde_decode(name_bit) not in self.ds.databases: - raise NotFound(f"Database not found: {name}") - else: - name = tilde_decode(name_bit) - hash = hash_bit - else: - name = decoded_name - - try: - db = self.ds.databases[name] - except KeyError: - raise NotFound(f"Database not found: {name}") - - # Verify the hash - expected = "000" - if db.hash is not None: - expected = db.hash[:HASH_LENGTH] - correct_hash_provided = expected == hash - - if not correct_hash_provided: - if "table_and_format" in kwargs: - - async def async_table_exists(t): - return await db.table_exists(t) - - table, _format = await resolve_table_and_format( - table_and_format=tilde_decode(kwargs["table_and_format"]), - table_exists=async_table_exists, - allowed_formats=self.ds.renderers.keys(), - ) - kwargs["table"] = table - if _format: - kwargs["as_format"] = f".{_format}" - elif kwargs.get("table"): - kwargs["table"] = tilde_decode(kwargs["table"]) - - should_redirect = self.ds.urls.path(f"{name}-{expected}") - if kwargs.get("table"): - should_redirect += "/" + tilde_encode(kwargs["table"]) - if kwargs.get("pk_path"): - should_redirect += "/" + kwargs["pk_path"] - if kwargs.get("as_format"): - should_redirect += kwargs["as_format"] - if kwargs.get("as_db"): - should_redirect += kwargs["as_db"] - - if ( - (self.ds.setting("hash_urls") or "_hash" in request.args) - and - # Redirect only if database is immutable - not self.ds.databases[name].is_mutable - ): - return name, expected, correct_hash_provided, should_redirect - - return name, expected, correct_hash_provided, None - def get_templates(self, database, table=None): assert NotImplemented - async def get(self, request, db_name, **kwargs): - ( - database, - hash, - correct_hash_provided, - should_redirect, - ) = await self.resolve_db_name(request, db_name, **kwargs) - if should_redirect: - return self.redirect(request, should_redirect, remove_args={"_hash"}) - - return await self.view_get( - request, database, hash, correct_hash_provided, **kwargs - ) - - async def as_csv(self, request, database, hash, **kwargs): + async def as_csv(self, request, database): + kwargs = {} stream = request.args.get("_stream") # Do not calculate facets or counts: extra_parameters = [ @@ -313,9 +235,7 @@ class DataView(BaseView): kwargs["_size"] = "max" # Fetch the first page try: - response_or_template_contexts = await self.data( - request, database, hash, **kwargs - ) + response_or_template_contexts = await self.data(request) if isinstance(response_or_template_contexts, Response): return response_or_template_contexts elif len(response_or_template_contexts) == 4: @@ -367,10 +287,11 @@ class DataView(BaseView): next = None while first or (next and stream): try: + kwargs = {} if next: kwargs["_next"] = next if not first: - data, _, _ = await self.data(request, database, hash, **kwargs) + data, _, _ = await self.data(request, **kwargs) if first: if request.args.get("_header") != "off": await writer.writerow(headings) @@ -445,60 +366,39 @@ class DataView(BaseView): if not trace: content_type = "text/csv; charset=utf-8" disposition = 'attachment; filename="{}.csv"'.format( - kwargs.get("table", database) + request.url_vars.get("table", database) ) headers["content-disposition"] = disposition return AsgiStream(stream_fn, headers=headers, content_type=content_type) - async def get_format(self, request, database, args): - """Determine the format of the response from the request, from URL - parameters or from a file extension. - - `args` is a dict of the path components parsed from the URL by the router. - """ - # If ?_format= is provided, use that as the format - _format = request.args.get("_format", None) - if not _format: - _format = (args.pop("as_format", None) or "").lstrip(".") + def get_format(self, request): + # Format is the bit from the path following the ., if one exists + last_path_component = request.path.split("/")[-1] + if "." in last_path_component: + return last_path_component.split(".")[-1] else: - args.pop("as_format", None) - if "table_and_format" in args: - db = self.ds.databases[database] + return None - async def async_table_exists(t): - return await db.table_exists(t) - - table, _ext_format = await resolve_table_and_format( - table_and_format=tilde_decode(args["table_and_format"]), - table_exists=async_table_exists, - allowed_formats=self.ds.renderers.keys(), - ) - _format = _format or _ext_format - args["table"] = table - del args["table_and_format"] - elif "table" in args: - args["table"] = tilde_decode(args["table"]) - return _format, args - - async def view_get(self, request, database, hash, correct_hash_provided, **kwargs): - _format, kwargs = await self.get_format(request, database, kwargs) + async def get(self, request): + db_name = request.url_vars["db_name"] + database = tilde_decode(db_name) + _format = self.get_format(request) + data_kwargs = {} if _format == "csv": - return await self.as_csv(request, database, hash, **kwargs) + return await self.as_csv(request, database) if _format is None: # HTML views default to expanding all foreign key labels - kwargs["default_labels"] = True + data_kwargs["default_labels"] = True extra_template_data = {} start = time.perf_counter() status_code = None templates = [] try: - response_or_template_contexts = await self.data( - request, database, hash, **kwargs - ) + response_or_template_contexts = await self.data(request, **data_kwargs) if isinstance(response_or_template_contexts, Response): return response_or_template_contexts # If it has four items, it includes an HTTP status code @@ -650,10 +550,7 @@ class DataView(BaseView): ttl = request.args.get("_ttl", None) if ttl is None or not ttl.isdigit(): - if correct_hash_provided: - ttl = self.ds.setting("default_cache_ttl_hashed") - else: - ttl = self.ds.setting("default_cache_ttl") + ttl = self.ds.setting("default_cache_ttl") return self.set_response_headers(r, ttl) diff --git a/datasette/views/database.py b/datasette/views/database.py index e26706e7..48635e01 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -12,6 +12,7 @@ from datasette.utils import ( await_me_maybe, check_visibility, derive_named_parameters, + tilde_decode, to_css_class, validate_sql_select, is_url, @@ -21,7 +22,7 @@ from datasette.utils import ( sqlite3, InvalidSql, ) -from datasette.utils.asgi import AsgiFileDownload, Response, Forbidden +from datasette.utils.asgi import AsgiFileDownload, NotFound, Response, Forbidden from datasette.plugins import pm from .base import DatasetteError, DataView @@ -30,7 +31,8 @@ from .base import DatasetteError, DataView class DatabaseView(DataView): name = "database" - async def data(self, request, database, hash, default_labels=False, _size=None): + async def data(self, request, default_labels=False, _size=None): + database = tilde_decode(request.url_vars["db_name"]) await self.check_permissions( request, [ @@ -45,10 +47,13 @@ class DatabaseView(DataView): sql = request.args.get("sql") validate_sql_select(sql) return await QueryView(self.ds).data( - request, database, hash, sql, _size=_size, metadata=metadata + request, sql, _size=_size, metadata=metadata ) - db = self.ds.databases[database] + try: + db = self.ds.databases[database] + except KeyError: + raise NotFound("Database not found: {}".format(database)) table_counts = await db.table_counts(5) hidden_table_names = set(await db.hidden_table_names()) @@ -156,7 +161,8 @@ class DatabaseView(DataView): class DatabaseDownload(DataView): name = "database_download" - async def view_get(self, request, database, hash, correct_hash_present, **kwargs): + async def get(self, request): + database = tilde_decode(request.url_vars["db_name"]) await self.check_permissions( request, [ @@ -191,8 +197,6 @@ class QueryView(DataView): async def data( self, request, - database, - hash, sql, editable=True, canned_query=None, @@ -201,6 +205,7 @@ class QueryView(DataView): named_parameters=None, write=False, ): + database = tilde_decode(request.url_vars["db_name"]) params = {key: request.args.get(key) for key in request.args} if "sql" in params: params.pop("sql") diff --git a/datasette/views/index.py b/datasette/views/index.py index 18454759..311a49db 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -18,7 +18,8 @@ COUNT_DB_SIZE_LIMIT = 100 * 1024 * 1024 class IndexView(BaseView): name = "index" - async def get(self, request, as_format): + async def get(self, request): + as_format = request.url_vars["as_format"] await self.check_permission(request, "view-instance") databases = [] for name, db in self.ds.databases.items(): diff --git a/datasette/views/special.py b/datasette/views/special.py index cdd530f0..c7b5061f 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -14,7 +14,8 @@ class JsonDataView(BaseView): self.data_callback = data_callback self.needs_request = needs_request - async def get(self, request, as_format): + async def get(self, request): + as_format = request.url_vars["as_format"] await self.check_permission(request, "view-instance") if self.needs_request: data = self.data_callback(request) diff --git a/datasette/views/table.py b/datasette/views/table.py index 72b8e9a4..8bdc7417 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -271,20 +271,18 @@ class RowTableShared(DataView): class TableView(RowTableShared): name = "table" - async def post(self, request, db_name, table_and_format): + async def post(self, request): + db_name = tilde_decode(request.url_vars["db_name"]) + table = tilde_decode(request.url_vars["table"]) # Handle POST to a canned query - canned_query = await self.ds.get_canned_query( - db_name, table_and_format, request.actor - ) + canned_query = await self.ds.get_canned_query(db_name, table, request.actor) assert canned_query, "You may only POST to a canned query" return await QueryView(self.ds).data( request, - db_name, - None, canned_query["sql"], metadata=canned_query, editable=False, - canned_query=table_and_format, + canned_query=table, named_parameters=canned_query.get("params"), write=bool(canned_query.get("write")), ) @@ -325,20 +323,22 @@ class TableView(RowTableShared): async def data( self, request, - database, - hash, - table, default_labels=False, _next=None, _size=None, ): + database = tilde_decode(request.url_vars["db_name"]) + table = tilde_decode(request.url_vars["table"]) + try: + db = self.ds.databases[database] + except KeyError: + raise NotFound("Database not found: {}".format(database)) + # If this is a canned query, not a table, then dispatch to QueryView instead canned_query = await self.ds.get_canned_query(database, table, request.actor) if canned_query: return await QueryView(self.ds).data( request, - database, - hash, canned_query["sql"], metadata=canned_query, editable=False, @@ -347,9 +347,6 @@ class TableView(RowTableShared): write=bool(canned_query.get("write")), ) - table = tilde_decode(table) - - db = self.ds.databases[database] is_view = bool(await db.get_view_definition(table)) table_exists = bool(await db.table_exists(table)) @@ -940,8 +937,9 @@ async def _sql_params_pks(db, table, pk_values): class RowView(RowTableShared): name = "row" - async def data(self, request, database, hash, table, pk_path, default_labels=False): - table = tilde_decode(table) + async def data(self, request, default_labels=False): + database = tilde_decode(request.url_vars["db_name"]) + table = tilde_decode(request.url_vars["table"]) await self.check_permissions( request, [ @@ -950,7 +948,7 @@ class RowView(RowTableShared): "view-instance", ], ) - pk_values = urlsafe_components(pk_path) + pk_values = urlsafe_components(request.url_vars["pk_path"]) db = self.ds.databases[database] sql, params, pks = await _sql_params_pks(db, table, pk_values) results = await db.execute(sql, params, truncate=True) diff --git a/tests/fixtures.py b/tests/fixtures.py index 11f09c41..342a3020 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -214,12 +214,6 @@ def app_client_two_attached_databases_one_immutable(): yield client -@pytest.fixture(scope="session") -def app_client_with_hash(): - with make_app_client(settings={"hash_urls": True}, is_immutable=True) as client: - yield client - - @pytest.fixture(scope="session") def app_client_with_trace(): with make_app_client(settings={"trace_debug": True}, is_immutable=True) as client: diff --git a/tests/test_api.py b/tests/test_api.py index 87d91e56..46e41afb 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -825,35 +825,6 @@ def test_config_redirects_to_settings(app_client, path, expected_redirect): assert response.headers["Location"] == expected_redirect -@pytest.mark.parametrize( - "path,expected_redirect", - [ - ("/fixtures/facetable.json?_hash=1", "/fixtures-HASH/facetable.json"), - ( - "/fixtures/facetable.json?city_id=1&_hash=1", - "/fixtures-HASH/facetable.json?city_id=1", - ), - ], -) -def test_hash_parameter( - app_client_two_attached_databases_one_immutable, path, expected_redirect -): - # First get the current hash for the fixtures database - current_hash = app_client_two_attached_databases_one_immutable.ds.databases[ - "fixtures" - ].hash[:7] - response = app_client_two_attached_databases_one_immutable.get(path) - assert response.status == 302 - location = response.headers["Location"] - assert expected_redirect.replace("HASH", current_hash) == location - - -def test_hash_parameter_ignored_for_mutable_databases(app_client): - path = "/fixtures/facetable.json?_hash=1" - response = app_client.get(path) - assert response.status == 200 - - test_json_columns_default_expected = [ {"intval": 1, "strval": "s", "floatval": 0.5, "jsonval": '{"foo": "bar"}'} ] diff --git a/tests/test_custom_pages.py b/tests/test_custom_pages.py index 66b7437a..f2cfe394 100644 --- a/tests/test_custom_pages.py +++ b/tests/test_custom_pages.py @@ -21,61 +21,61 @@ def custom_pages_client_with_base_url(): def test_custom_pages_view_name(custom_pages_client): response = custom_pages_client.get("/about") - assert 200 == response.status - assert "ABOUT! view_name:page" == response.text + assert response.status == 200 + assert response.text == "ABOUT! view_name:page" def test_request_is_available(custom_pages_client): response = custom_pages_client.get("/request") - assert 200 == response.status - assert "path:/request" == response.text + assert response.status == 200 + assert response.text == "path:/request" def test_custom_pages_with_base_url(custom_pages_client_with_base_url): response = custom_pages_client_with_base_url.get("/prefix/request") - assert 200 == response.status - assert "path:/prefix/request" == response.text + assert response.status == 200 + assert response.text == "path:/prefix/request" def test_custom_pages_nested(custom_pages_client): response = custom_pages_client.get("/nested/nest") - assert 200 == response.status - assert "Nest!" == response.text + assert response.status == 200 + assert response.text == "Nest!" response = custom_pages_client.get("/nested/nest2") - assert 404 == response.status + assert response.status == 404 def test_custom_status(custom_pages_client): response = custom_pages_client.get("/202") - assert 202 == response.status - assert "202!" == response.text + assert response.status == 202 + assert response.text == "202!" def test_custom_headers(custom_pages_client): response = custom_pages_client.get("/headers") - assert 200 == response.status - assert "foo" == response.headers["x-this-is-foo"] - assert "bar" == response.headers["x-this-is-bar"] - assert "FOOBAR" == response.text + assert response.status == 200 + assert response.headers["x-this-is-foo"] == "foo" + assert response.headers["x-this-is-bar"] == "bar" + assert response.text == "FOOBAR" def test_custom_content_type(custom_pages_client): response = custom_pages_client.get("/atom") - assert 200 == response.status + assert response.status == 200 assert response.headers["content-type"] == "application/xml" - assert "" == response.text + assert response.text == "" def test_redirect(custom_pages_client): response = custom_pages_client.get("/redirect") - assert 302 == response.status - assert "/example" == response.headers["Location"] + assert response.status == 302 + assert response.headers["Location"] == "/example" def test_redirect2(custom_pages_client): response = custom_pages_client.get("/redirect2") - assert 301 == response.status - assert "/example" == response.headers["Location"] + assert response.status == 301 + assert response.headers["Location"] == "/example" @pytest.mark.parametrize( diff --git a/tests/test_html.py b/tests/test_html.py index 76a8423a..6e4c22b1 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -5,7 +5,6 @@ from .fixtures import ( # noqa app_client_base_url_prefix, app_client_shorter_time_limit, app_client_two_attached_databases, - app_client_with_hash, make_app_client, METADATA, ) @@ -101,13 +100,6 @@ def test_not_allowed_methods(): assert response.status == 405 -def test_database_page_redirects_with_url_hash(app_client_with_hash): - response = app_client_with_hash.get("/fixtures") - assert response.status == 302 - response = app_client_with_hash.get("/fixtures", follow_redirects=True) - assert "fixtures" in response.text - - def test_database_page(app_client): response = app_client.get("/fixtures") soup = Soup(response.body, "html.parser") @@ -182,26 +174,6 @@ def test_sql_time_limit(app_client_shorter_time_limit): assert expected_html_fragment in response.text -def test_row_redirects_with_url_hash(app_client_with_hash): - response = app_client_with_hash.get("/fixtures/simple_primary_key/1") - assert response.status == 302 - assert response.headers["Location"].endswith("/1") - response = app_client_with_hash.get( - "/fixtures/simple_primary_key/1", follow_redirects=True - ) - assert response.status == 200 - - -def test_row_strange_table_name_with_url_hash(app_client_with_hash): - response = app_client_with_hash.get("/fixtures/table~2Fwith~2Fslashes~2Ecsv/3") - assert response.status == 302 - assert response.headers["Location"].endswith("/table~2Fwith~2Fslashes~2Ecsv/3") - response = app_client_with_hash.get( - "/fixtures/table~2Fwith~2Fslashes~2Ecsv/3", follow_redirects=True - ) - assert response.status == 200 - - def test_row_page_does_not_truncate(): with make_app_client(settings={"truncate_cells_html": 5}) as client: response = client.get("/fixtures/facetable/1") diff --git a/tests/test_internals_urls.py b/tests/test_internals_urls.py index 4307789c..d60aafcf 100644 --- a/tests/test_internals_urls.py +++ b/tests/test_internals_urls.py @@ -1,6 +1,5 @@ from datasette.app import Datasette from datasette.utils import PrefixedUrlString -from .fixtures import app_client_with_hash import pytest @@ -147,20 +146,3 @@ def test_row(ds, base_url, format, expected): actual = ds.urls.row("_memory", "facetable", "1", format=format) assert actual == expected assert isinstance(actual, PrefixedUrlString) - - -@pytest.mark.parametrize("base_url", ["/", "/prefix/"]) -def test_database_hashed(app_client_with_hash, base_url): - ds = app_client_with_hash.ds - original_base_url = ds._settings["base_url"] - try: - ds._settings["base_url"] = base_url - db_hash = ds.get_database("fixtures").hash - assert len(db_hash) == 64 - expected = f"{base_url}fixtures-{db_hash[:7]}" - assert ds.urls.database("fixtures") == expected - assert ds.urls.table("fixtures", "name") == expected + "/name" - assert ds.urls.query("fixtures", "name") == expected + "/name" - finally: - # Reset this since fixture is shared with other tests - ds._settings["base_url"] = original_base_url diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 3ab369b3..3d0a7fbd 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -2,7 +2,6 @@ from datasette.utils import detect_json1 from datasette.utils.sqlite import sqlite_version from .fixtures import ( # noqa app_client, - app_client_with_hash, app_client_with_trace, app_client_returned_rows_matches_page_size, generate_compound_rows, @@ -41,13 +40,6 @@ def test_table_not_exists_json(app_client): } == app_client.get("/fixtures/blah.json").json -def test_jsono_redirects_to_shape_objects(app_client_with_hash): - response_1 = app_client_with_hash.get("/fixtures/simple_primary_key.jsono") - response = app_client_with_hash.get(response_1.headers["Location"]) - assert response.status == 302 - assert response.headers["Location"].endswith("?_shape=objects") - - def test_table_shape_arrays(app_client): response = app_client.get("/fixtures/simple_primary_key.json?_shape=arrays") assert [ From 8658c66438ec71edc7e9adc495f4692b937a0f57 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Mar 2022 17:19:31 -0700 Subject: [PATCH 0041/1218] Show error if --setting hash_urls 1 used, refs #1661 --- datasette/app.py | 15 ++++++++++----- datasette/cli.py | 21 ++++++++++++++++++--- tests/test_cli.py | 7 +++++++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 3099ada7..c1c0663d 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -118,11 +118,6 @@ SETTINGS = ( 50, "Time limit for calculating a suggested facet", ), - Setting( - "hash_urls", - False, - "Include DB file contents hash in URLs, for far-future caching", - ), Setting( "allow_facet", True, @@ -177,6 +172,16 @@ SETTINGS = ( ), Setting("base_url", "/", "Datasette URLs should use this base path"), ) +OBSOLETE_SETTINGS = { + option.name: option + for option in ( + Setting( + "hash_urls", + False, + "The hash_urls setting has been removed, try the datasette-hashed-urls plugin instead", + ), + ) +} DEFAULT_SETTINGS = {option.name: option.default for option in SETTINGS} diff --git a/datasette/cli.py b/datasette/cli.py index 61e7ce91..b94ac192 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -12,7 +12,14 @@ from subprocess import call import sys from runpy import run_module import webbrowser -from .app import Datasette, DEFAULT_SETTINGS, SETTINGS, SQLITE_LIMIT_ATTACHED, pm +from .app import ( + OBSOLETE_SETTINGS, + Datasette, + DEFAULT_SETTINGS, + SETTINGS, + SQLITE_LIMIT_ATTACHED, + pm, +) from .utils import ( StartupError, check_connection, @@ -50,8 +57,12 @@ class Config(click.ParamType): return name, value = config.split(":", 1) if name not in DEFAULT_SETTINGS: + if name in OBSOLETE_SETTINGS: + msg = OBSOLETE_SETTINGS[name].help + else: + msg = f"{name} is not a valid option (--help-settings to see all)" self.fail( - f"{name} is not a valid option (--help-settings to see all)", + msg, param, ctx, ) @@ -83,8 +94,12 @@ class Setting(CompositeParamType): def convert(self, config, param, ctx): name, value = config if name not in DEFAULT_SETTINGS: + if name in OBSOLETE_SETTINGS: + msg = OBSOLETE_SETTINGS[name].help + else: + msg = f"{name} is not a valid option (--help-settings to see all)" self.fail( - f"{name} is not a valid option (--help-settings to see all)", + msg, param, ctx, ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 5afe72c1..89e8d044 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -310,3 +310,10 @@ def test_help_settings(): result = runner.invoke(cli, ["--help-settings"]) for setting in SETTINGS: assert setting.name in result.output + + +def test_help_error_on_hash_urls_setting(): + runner = CliRunner() + result = runner.invoke(cli, ["--setting", "hash_urls", 1]) + assert result.exit_code == 2 + assert 'The hash_urls setting has been removed' in result.output From 9979dcd07f9921ac30c4c0b5ea60d09cd1e10556 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Mar 2022 17:25:14 -0700 Subject: [PATCH 0042/1218] Also remove default_cache_ttl_hashed setting, refs #1661 --- datasette/app.py | 17 +++-------------- datasette/cli.py | 16 ++++++++-------- datasette/url_builder.py | 9 +-------- tests/test_api.py | 2 -- tests/test_cli.py | 7 ++++--- 5 files changed, 16 insertions(+), 35 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index c1c0663d..f52e3283 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -134,11 +134,6 @@ SETTINGS = ( 5, "Default HTTP cache TTL (used in Cache-Control: max-age= header)", ), - Setting( - "default_cache_ttl_hashed", - 365 * 24 * 60 * 60, - "Default HTTP cache TTL for hashed URL pages", - ), Setting("cache_size_kb", 0, "SQLite cache size in KB (0 == use SQLite default)"), Setting( "allow_csv_stream", @@ -172,17 +167,11 @@ SETTINGS = ( ), Setting("base_url", "/", "Datasette URLs should use this base path"), ) +_HASH_URLS_REMOVED = "The hash_urls setting has been removed, try the datasette-hashed-urls plugin instead" OBSOLETE_SETTINGS = { - option.name: option - for option in ( - Setting( - "hash_urls", - False, - "The hash_urls setting has been removed, try the datasette-hashed-urls plugin instead", - ), - ) + "hash_urls": _HASH_URLS_REMOVED, + "default_cache_ttl_hashed": _HASH_URLS_REMOVED, } - DEFAULT_SETTINGS = {option.name: option.default for option in SETTINGS} FAVICON_PATH = app_root / "datasette" / "static" / "favicon.png" diff --git a/datasette/cli.py b/datasette/cli.py index b94ac192..3c6e1b2c 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -57,10 +57,10 @@ class Config(click.ParamType): return name, value = config.split(":", 1) if name not in DEFAULT_SETTINGS: - if name in OBSOLETE_SETTINGS: - msg = OBSOLETE_SETTINGS[name].help - else: - msg = f"{name} is not a valid option (--help-settings to see all)" + msg = ( + OBSOLETE_SETTINGS.get(name) + or f"{name} is not a valid option (--help-settings to see all)" + ) self.fail( msg, param, @@ -94,10 +94,10 @@ class Setting(CompositeParamType): def convert(self, config, param, ctx): name, value = config if name not in DEFAULT_SETTINGS: - if name in OBSOLETE_SETTINGS: - msg = OBSOLETE_SETTINGS[name].help - else: - msg = f"{name} is not a valid option (--help-settings to see all)" + msg = ( + OBSOLETE_SETTINGS.get(name) + or f"{name} is not a valid option (--help-settings to see all)" + ) self.fail( msg, param, diff --git a/datasette/url_builder.py b/datasette/url_builder.py index 9f072462..498ec85d 100644 --- a/datasette/url_builder.py +++ b/datasette/url_builder.py @@ -28,14 +28,7 @@ class Urls: return self.path("-/logout") def database(self, database, format=None): - db = self.ds.databases[database] - if self.ds.setting("hash_urls") and db.hash: - path = self.path( - f"{tilde_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format - ) - else: - path = self.path(tilde_encode(database), format=format) - return path + return self.path(tilde_encode(database), format=format) def table(self, database, table, format=None): path = f"{self.database(database)}/{tilde_encode(table)}" diff --git a/tests/test_api.py b/tests/test_api.py index 46e41afb..d3c94023 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -798,14 +798,12 @@ def test_settings_json(app_client): "allow_facet": True, "suggest_facets": True, "default_cache_ttl": 5, - "default_cache_ttl_hashed": 365 * 24 * 60 * 60, "num_sql_threads": 1, "cache_size_kb": 0, "allow_csv_stream": True, "max_csv_mb": 100, "truncate_cells_html": 2048, "force_https_urls": False, - "hash_urls": False, "template_debug": False, "trace_debug": False, "base_url": "/", diff --git a/tests/test_cli.py b/tests/test_cli.py index 89e8d044..dca65f26 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -312,8 +312,9 @@ def test_help_settings(): assert setting.name in result.output -def test_help_error_on_hash_urls_setting(): +@pytest.mark.parametrize("setting", ("hash_urls", "default_cache_ttl_hashed")) +def test_help_error_on_hash_urls_setting(setting): runner = CliRunner() - result = runner.invoke(cli, ["--setting", "hash_urls", 1]) + result = runner.invoke(cli, ["--setting", setting, 1]) assert result.exit_code == 2 - assert 'The hash_urls setting has been removed' in result.output + assert "The hash_urls setting has been removed" in result.output From 32963018e7edfab1233de7c7076c428d0e5c7813 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Mar 2022 17:33:06 -0700 Subject: [PATCH 0043/1218] Updated documentation to remove hash_urls, refs #1661 --- docs/cli-reference.rst | 4 ---- docs/performance.rst | 18 +++++++++++------- docs/settings.rst | 27 --------------------------- 3 files changed, 11 insertions(+), 38 deletions(-) diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 155a005d..69670d8a 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -142,8 +142,6 @@ datasette serve --help-settings (default=200) facet_suggest_time_limit_ms Time limit for calculating a suggested facet (default=50) - hash_urls Include DB file contents hash in URLs, for far- - future caching (default=False) allow_facet Allow users to specify columns to facet using ?_facet= parameter (default=True) allow_download Allow users to download the original SQLite @@ -152,8 +150,6 @@ datasette serve --help-settings (default=True) default_cache_ttl Default HTTP cache TTL (used in Cache-Control: max-age= header) (default=5) - default_cache_ttl_hashed Default HTTP cache TTL for hashed URL pages - (default=31536000) cache_size_kb SQLite cache size in KB (0 == use SQLite default) (default=0) allow_csv_stream Allow .csv?_stream=1 to download all rows diff --git a/docs/performance.rst b/docs/performance.rst index bcf3208e..d37f1804 100644 --- a/docs/performance.rst +++ b/docs/performance.rst @@ -60,18 +60,22 @@ The :ref:`setting_default_cache_ttl` setting sets the default HTTP cache TTL for You can also change the cache timeout on a per-request basis using the ``?_ttl=10`` query string parameter. This can be useful when you are working with the Datasette JSON API - you may decide that a specific query can be cached for a longer time, or maybe you need to set ``?_ttl=0`` for some requests for example if you are running a SQL ``order by random()`` query. -Hashed URL mode ---------------- +datasette-hashed-urls +--------------------- -When you open a database file in immutable mode using the ``-i`` option, Datasette calculates a SHA-256 hash of the contents of that file on startup. This content hash can then optionally be used to create URLs that are guaranteed to change if the contents of the file changes in the future. This results in URLs that can then be cached indefinitely by both browsers and caching proxies - an enormous potential performance optimization. +If you open a database file in immutable mode using the ``-i`` option, you can be assured that the content of that database will not change for the lifetime of the Datasette server. -You can enable these hashed URLs in two ways: using the :ref:`setting_hash_urls` configuration setting (which affects all requests to Datasette) or via the ``?_hash=1`` query string parameter (which only applies to the current request). +The `datasette-hashed-urls plugin `__ implements an optimization where your database is served with part of the SHA-256 hash of the database contents baked into the URL. -With hashed URLs enabled, any request to e.g. ``/mydatabase/mytable`` will 302 redirect to ``mydatabase-455fe3a/mytable``. The URL containing the hash will be served with a very long cache expire header - configured using :ref:`setting_default_cache_ttl_hashed` which defaults to 365 days. +A database at ``/fixtures`` will instead be served at ``/fixtures-aa7318b``, and a year-long cache expiry header will be returned with those pages. -Since these responses are cached for a long time, you may wish to build API clients against the non-hashed version of these URLs. These 302 redirects are served extremely quickly, so this should still be a performant way to work against the Datasette API. +This will then be cached by both browsers and caching proxies such as Cloudflare or Fastly, providing a potentially significant performance boost. -If you run Datasette behind an `HTTP/2 server push `__ aware proxy such as Cloudflare Datasette will serve the 302 redirects in such a way that the redirected page will be efficiently "pushed" to the browser as part of the response, without the browser needing to make a second HTTP request to fetch the redirected resource. +To install the plugin, run the following:: + + datasette install datasette-hashed-urls .. note:: + Prior to Datasette 0.61 hashed URL mode was a core Datasette feature, enabled using the ``hash_urls`` setting. This implementation has now been removed in favor of the ``datasette-hashed-urls`` plugin. + Prior to Datasette 0.28 hashed URL mode was the default behaviour for Datasette, since all database files were assumed to be immutable and unchanging. From 0.28 onwards the default has been to treat database files as mutable unless explicitly configured otherwise. diff --git a/docs/settings.rst b/docs/settings.rst index da06d6a0..60c4b36d 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -178,17 +178,6 @@ Default HTTP caching max-age header in seconds, used for ``Cache-Control: max-ag datasette mydatabase.db --setting default_cache_ttl 60 -.. _setting_default_cache_ttl_hashed: - -default_cache_ttl_hashed -~~~~~~~~~~~~~~~~~~~~~~~~ - -Default HTTP caching max-age for responses served using using the :ref:`hashed-urls mechanism `. Defaults to 365 days (31536000 seconds). - -:: - - datasette mydatabase.db --setting default_cache_ttl_hashed 10000 - .. _setting_cache_size_kb: cache_size_kb @@ -251,22 +240,6 @@ HTTP but is served to the outside world via a proxy that enables HTTPS. datasette mydatabase.db --setting force_https_urls 1 -.. _setting_hash_urls: - -hash_urls -~~~~~~~~~ - -When enabled, this setting causes Datasette to append a content hash of the -database file to the URL path for every table and query within that database. - -When combined with far-future expire headers this ensures that queries can be -cached forever, safe in the knowledge that any modifications to the database -itself will result in new, uncached URL paths. - -:: - - datasette mydatabase.db --setting hash_urls 1 - .. _setting_template_debug: template_debug From 4e47a2d894b96854348343374c8e97c9d7055cf6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Mar 2022 18:37:54 -0700 Subject: [PATCH 0044/1218] Fixed bug where tables with a column called n caused 500 errors Closes #1228 --- datasette/facets.py | 6 +++--- tests/fixtures.py | 33 ++++++++++++++++---------------- tests/test_api.py | 1 + tests/test_csv.py | 32 +++++++++++++++---------------- tests/test_internals_database.py | 10 ++++++++++ tests/test_plugins.py | 6 ++++-- tests/test_table_api.py | 8 ++++++++ 7 files changed, 59 insertions(+), 37 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index a1bb4a5f..b15a758c 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -151,10 +151,10 @@ class ColumnFacet(Facet): if column in already_enabled: continue suggested_facet_sql = """ - select {column}, count(*) as n from ( + select {column} as value, count(*) as n from ( {sql} - ) where {column} is not null - group by {column} + ) where value is not null + group by value limit {limit} """.format( column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 diff --git a/tests/fixtures.py b/tests/fixtures.py index 342a3020..e0e4ec7b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -564,26 +564,27 @@ CREATE TABLE facetable ( tags text, complex_array text, distinct_some_null, + n text, FOREIGN KEY ("_city_id") REFERENCES [facet_cities](id) ); INSERT INTO facetable - (created, planet_int, on_earth, state, _city_id, _neighborhood, tags, complex_array, distinct_some_null) + (created, planet_int, on_earth, state, _city_id, _neighborhood, tags, complex_array, distinct_some_null, n) VALUES - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]', '[{"foo": "bar"}]', 'one'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]', '[]', 'two'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]', '[]', null), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]', '[]', null), - ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]', '[]', null), - ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]', '[]', null), - ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]', '[]', null), - ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]', '[]', null), - ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]', '[]', null), - ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]', '[]', null), - ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]', '[]', null), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]', '[]', null), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]', '[]', null), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]', '[]', null), - ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]', '[]', null) + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]', '[{"foo": "bar"}]', 'one', 'n1'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]', '[]', 'two', 'n2'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]', '[]', null, null), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]', '[]', null, null), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]', '[]', null, null), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]', '[]', null, null), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]', '[]', null, null), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]', '[]', null, null), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]', '[]', null, null), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]', '[]', null, null), + ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]', '[]', null, null), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]', '[]', null, null), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]', '[]', null, null), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]', '[]', null, null), + ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]', '[]', null, null) ; CREATE TABLE binary_data ( diff --git a/tests/test_api.py b/tests/test_api.py index d3c94023..421bb1fe 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -210,6 +210,7 @@ def test_database_page(app_client): "tags", "complex_array", "distinct_some_null", + "n", ], "primary_keys": ["pk"], "count": 15, diff --git a/tests/test_csv.py b/tests/test_csv.py index 8749cd8b..7fc25a09 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -24,22 +24,22 @@ world ) EXPECTED_TABLE_WITH_LABELS_CSV = """ -pk,created,planet_int,on_earth,state,_city_id,_city_id_label,_neighborhood,tags,complex_array,distinct_some_null -1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]","[{""foo"": ""bar""}]",one -2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]",[],two -3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[],[], -4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[],[], -5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[],[], -6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[],[], -7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[],[], -8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[],[], -9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[],[], -10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[],[], -11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[],[], -12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[],[], -13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[],[], -14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[],[], -15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[],[], +pk,created,planet_int,on_earth,state,_city_id,_city_id_label,_neighborhood,tags,complex_array,distinct_some_null,n +1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]","[{""foo"": ""bar""}]",one,n1 +2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]",[],two,n2 +3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[],[],, +4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[],[],, +5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[],[],, +6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[],[],, +7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[],[],, +8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[],[],, +9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[],[],, +10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[],[],, +11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[],[],, +12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[],[],, +13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[],[],, +14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[],[],, +15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[],[],, """.lstrip().replace( "\n", "\r\n" ) diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 31538a24..551f67e1 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -86,6 +86,7 @@ async def test_table_exists(db, tables, exists): "tags", "complex_array", "distinct_some_null", + "n", ], ), ( @@ -204,6 +205,15 @@ async def test_table_columns(db, table, expected): is_pk=0, hidden=0, ), + Column( + cid=10, + name="n", + type="text", + notnull=0, + default_value=None, + is_pk=0, + hidden=0, + ), ], ), ( diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 656f39e4..15bde962 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -442,6 +442,7 @@ def test_hook_register_output_renderer_all_parameters(app_client): "tags", "complex_array", "distinct_some_null", + "n", ], "rows": [ "", @@ -460,7 +461,7 @@ def test_hook_register_output_renderer_all_parameters(app_client): "", "", ], - "sql": "select pk, created, planet_int, on_earth, state, _city_id, _neighborhood, tags, complex_array, distinct_some_null from facetable order by pk limit 51", + "sql": "select pk, created, planet_int, on_earth, state, _city_id, _neighborhood, tags, complex_array, distinct_some_null, n from facetable order by pk limit 51", "query_name": None, "database": "fixtures", "table": "facetable", @@ -531,8 +532,9 @@ def test_hook_register_output_renderer_can_render(app_client): "tags", "complex_array", "distinct_some_null", + "n", ], - "sql": "select pk, created, planet_int, on_earth, state, _city_id, _neighborhood, tags, complex_array, distinct_some_null from facetable order by pk limit 51", + "sql": "select pk, created, planet_int, on_earth, state, _city_id, _neighborhood, tags, complex_array, distinct_some_null, n from facetable order by pk limit 51", "query_name": None, "database": "fixtures", "table": "facetable", diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 3d0a7fbd..9db383c3 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -532,6 +532,7 @@ def test_table_filter_json_arraycontains(app_client): '["tag1", "tag2"]', '[{"foo": "bar"}]', "one", + "n1", ], [ 2, @@ -544,6 +545,7 @@ def test_table_filter_json_arraycontains(app_client): '["tag1", "tag3"]', "[]", "two", + "n2", ], ] @@ -565,6 +567,7 @@ def test_table_filter_json_arraynotcontains(app_client): '["tag1", "tag2"]', '[{"foo": "bar"}]', "one", + "n1", ] ] @@ -585,6 +588,7 @@ def test_table_filter_extra_where(app_client): '["tag1", "tag3"]', "[]", "two", + "n2", ] ] == response.json["rows"] @@ -958,6 +962,7 @@ def test_expand_labels(app_client): "tags": '["tag1", "tag3"]', "complex_array": "[]", "distinct_some_null": "two", + "n": "n2", }, "13": { "pk": 13, @@ -970,6 +975,7 @@ def test_expand_labels(app_client): "tags": "[]", "complex_array": "[]", "distinct_some_null": None, + "n": None, }, } == response.json @@ -1161,6 +1167,7 @@ def test_generated_columns_are_visible_in_datasette(): "tags", "complex_array", "distinct_some_null", + "n", ], ), ( @@ -1188,6 +1195,7 @@ def test_generated_columns_are_visible_in_datasette(): "tags", "complex_array", "distinct_some_null", + "n", ], ), ( From 711767bcd3c1e76a0861fe7f24069ff1c8efc97a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Mar 2022 21:03:08 -0700 Subject: [PATCH 0045/1218] Refactored URL routing to add tests, closes #1666 Refs #1660 --- datasette/app.py | 54 ++++++++++++++++++++----------------- datasette/utils/__init__.py | 8 ++++++ tests/test_routes.py | 34 +++++++++++++++++++++++ 3 files changed, 72 insertions(+), 24 deletions(-) create mode 100644 tests/test_routes.py diff --git a/datasette/app.py b/datasette/app.py index f52e3283..8987112c 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -60,6 +60,7 @@ from .utils import ( module_from_path, parse_metadata, resolve_env_secrets, + resolve_routes, to_css_class, ) from .utils.asgi import ( @@ -974,8 +975,7 @@ class Datasette: output.append(script) return output - def app(self): - """Returns an ASGI app function that serves the whole of Datasette""" + def _routes(self): routes = [] for routes_to_add in pm.hook.register_routes(datasette=self): @@ -1099,6 +1099,15 @@ class Datasette: + renderer_regex + r")?$", ) + return [ + # Compile any strings to regular expressions + ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) + for pattern, view in routes + ] + + def app(self): + """Returns an ASGI app function that serves the whole of Datasette""" + routes = self._routes() self._register_custom_units() async def setup_db(): @@ -1129,12 +1138,7 @@ class Datasette: class DatasetteRouter: def __init__(self, datasette, routes): self.ds = datasette - routes = routes or [] - self.routes = [ - # Compile any strings to regular expressions - ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) - for pattern, view in routes - ] + self.routes = routes or [] # Build a list of pages/blah/{name}.html matching expressions pattern_templates = [ filepath @@ -1187,22 +1191,24 @@ class DatasetteRouter: break scope_modifications["actor"] = actor or default_actor scope = dict(scope, **scope_modifications) - for regex, view in self.routes: - match = regex.match(path) - if match is not None: - new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) - request.scope = new_scope - try: - response = await view(request, send) - if response: - self.ds._write_messages_to_response(request, response) - await response.asgi_send(send) - return - except NotFound as exception: - return await self.handle_404(request, send, exception) - except Exception as exception: - return await self.handle_500(request, send, exception) - return await self.handle_404(request, send) + + match, view = resolve_routes(self.routes, path) + + if match is None: + return await self.handle_404(request, send) + + new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) + request.scope = new_scope + try: + response = await view(request, send) + if response: + self.ds._write_messages_to_response(request, response) + await response.asgi_send(send) + return + except NotFound as exception: + return await self.handle_404(request, send, exception) + except Exception as exception: + return await self.handle_500(request, send, exception) async def handle_404(self, request, send, exception=None): # If path contains % encoding, redirect to tilde encoding diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index bd591459..ccdf8ad4 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -1178,3 +1178,11 @@ def tilde_decode(s: str) -> str: s = s.replace("%", temp) decoded = urllib.parse.unquote(s.replace("~", "%")) return decoded.replace(temp, "%") + + +def resolve_routes(routes, path): + for regex, view in routes: + match = regex.match(path) + if match is not None: + return match, view + return None, None diff --git a/tests/test_routes.py b/tests/test_routes.py new file mode 100644 index 00000000..a1960f14 --- /dev/null +++ b/tests/test_routes.py @@ -0,0 +1,34 @@ +from datasette.app import Datasette +from datasette.utils import resolve_routes +import pytest + + +@pytest.fixture(scope="session") +def routes(): + ds = Datasette() + return ds._routes() + + +@pytest.mark.parametrize( + "path,expected", + ( + ("/", "IndexView"), + ("/foo", "DatabaseView"), + ("/foo.csv", "DatabaseView"), + ("/foo.json", "DatabaseView"), + ("/foo.humbug", "DatabaseView"), + ("/foo/humbug", "TableView"), + ("/foo/humbug.json", "TableView"), + ("/foo/humbug.blah", "TableView"), + ("/foo/humbug/1", "RowView"), + ("/foo/humbug/1.json", "RowView"), + ("/-/metadata.json", "JsonDataView"), + ("/-/metadata", "JsonDataView"), + ), +) +def test_routes(routes, path, expected): + match, view = resolve_routes(routes, path) + if expected is None: + assert match is None + else: + assert view.view_class.__name__ == expected From 764738dfcb16cd98b0987d443f59d5baa9d3c332 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 09:30:22 -0700 Subject: [PATCH 0046/1218] test_routes also now asserts matches, refs #1666 --- tests/test_routes.py | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/tests/test_routes.py b/tests/test_routes.py index a1960f14..6718c232 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -10,25 +10,34 @@ def routes(): @pytest.mark.parametrize( - "path,expected", + "path,expected_class,expected_matches", ( - ("/", "IndexView"), - ("/foo", "DatabaseView"), - ("/foo.csv", "DatabaseView"), - ("/foo.json", "DatabaseView"), - ("/foo.humbug", "DatabaseView"), - ("/foo/humbug", "TableView"), - ("/foo/humbug.json", "TableView"), - ("/foo/humbug.blah", "TableView"), - ("/foo/humbug/1", "RowView"), - ("/foo/humbug/1.json", "RowView"), - ("/-/metadata.json", "JsonDataView"), - ("/-/metadata", "JsonDataView"), + ("/", "IndexView", {"as_format": ""}), + ("/foo", "DatabaseView", {"as_format": None, "db_name": "foo"}), + ("/foo.csv", "DatabaseView", {"as_format": ".csv", "db_name": "foo"}), + ("/foo.json", "DatabaseView", {"as_format": ".json", "db_name": "foo"}), + ("/foo.humbug", "DatabaseView", {"as_format": None, "db_name": "foo.humbug"}), + ("/foo/humbug", "TableView", {"db_name": "foo", "table": "humbug"}), + ("/foo/humbug.json", "TableView", {"db_name": "foo", "table": "humbug"}), + ("/foo/humbug.blah", "TableView", {"db_name": "foo", "table": "humbug"}), + ( + "/foo/humbug/1", + "RowView", + {"as_format": None, "db_name": "foo", "pk_path": "1", "table": "humbug"}, + ), + ( + "/foo/humbug/1.json", + "RowView", + {"as_format": ".json", "db_name": "foo", "pk_path": "1", "table": "humbug"}, + ), + ("/-/metadata.json", "JsonDataView", {"as_format": ".json"}), + ("/-/metadata", "JsonDataView", {"as_format": ""}), ), ) -def test_routes(routes, path, expected): +def test_routes(routes, path, expected_class, expected_matches): match, view = resolve_routes(routes, path) - if expected is None: + if expected_class is None: assert match is None else: - assert view.view_class.__name__ == expected + assert view.view_class.__name__ == expected_class + assert match.groupdict() == expected_matches From 61419388c134001118aaf7dfb913562d467d7913 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 09:52:08 -0700 Subject: [PATCH 0047/1218] Rename route match groups for consistency, refs #1667, #1660 --- datasette/app.py | 28 ++++++++++++---------------- datasette/blob_renderer.py | 4 ++-- datasette/views/base.py | 2 +- datasette/views/database.py | 6 +++--- datasette/views/index.py | 2 +- datasette/views/special.py | 2 +- datasette/views/table.py | 8 ++++---- tests/test_routes.py | 24 ++++++++++++------------ 8 files changed, 36 insertions(+), 40 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 8987112c..5259c50c 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -988,7 +988,7 @@ class Datasette: # Generate a regex snippet to match all registered renderer file extensions renderer_regex = "|".join(r"\." + key for key in self.renderers.keys()) - add_route(IndexView.as_view(self), r"/(?P(\.jsono?)?$)") + add_route(IndexView.as_view(self), r"/(?P(\.jsono?)?$)") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires add_route(favicon, "/favicon.ico") @@ -1020,21 +1020,21 @@ class Datasette: ) add_route( JsonDataView.as_view(self, "metadata.json", lambda: self.metadata()), - r"/-/metadata(?P(\.json)?)$", + r"/-/metadata(?P(\.json)?)$", ) add_route( JsonDataView.as_view(self, "versions.json", self._versions), - r"/-/versions(?P(\.json)?)$", + r"/-/versions(?P(\.json)?)$", ) add_route( JsonDataView.as_view( self, "plugins.json", self._plugins, needs_request=True ), - r"/-/plugins(?P(\.json)?)$", + r"/-/plugins(?P(\.json)?)$", ) add_route( JsonDataView.as_view(self, "settings.json", lambda: self._settings), - r"/-/settings(?P(\.json)?)$", + r"/-/settings(?P(\.json)?)$", ) add_route( permanent_redirect("/-/settings.json"), @@ -1046,15 +1046,15 @@ class Datasette: ) add_route( JsonDataView.as_view(self, "threads.json", self._threads), - r"/-/threads(?P(\.json)?)$", + r"/-/threads(?P(\.json)?)$", ) add_route( JsonDataView.as_view(self, "databases.json", self._connected_databases), - r"/-/databases(?P(\.json)?)$", + r"/-/databases(?P(\.json)?)$", ) add_route( JsonDataView.as_view(self, "actor.json", self._actor, needs_request=True), - r"/-/actor(?P(\.json)?)$", + r"/-/actor(?P(\.json)?)$", ) add_route( AuthTokenView.as_view(self), @@ -1080,22 +1080,18 @@ class Datasette: PatternPortfolioView.as_view(self), r"/-/patterns$", ) - add_route( - DatabaseDownload.as_view(self), r"/(?P[^/]+?)(?P\.db)$" - ) + add_route(DatabaseDownload.as_view(self), r"/(?P[^/]+?)\.db$") add_route( DatabaseView.as_view(self), - r"/(?P[^/]+?)(?P" - + renderer_regex - + r"|.jsono|\.csv)?$", + r"/(?P[^/]+?)(?P" + renderer_regex + r"|.jsono|\.csv)?$", ) add_route( TableView.as_view(self), - r"/(?P[^/]+)/(?P
[^\/\.]+)(\.[a-zA-Z0-9_]+)?$", + r"/(?P[^/]+)/(?P
[^\/\.]+)(\.[a-zA-Z0-9_]+)?$", ) add_route( RowView.as_view(self), - r"/(?P[^/]+)/(?P
[^/]+?)/(?P[^/]+?)(?P" + r"/(?P[^/]+)/(?P
[^/]+?)/(?P[^/]+?)(?P" + renderer_regex + r")?$", ) diff --git a/datasette/blob_renderer.py b/datasette/blob_renderer.py index 217b3638..4d8c6bea 100644 --- a/datasette/blob_renderer.py +++ b/datasette/blob_renderer.py @@ -34,8 +34,8 @@ async def render_blob(datasette, database, rows, columns, request, table, view_n filename_bits = [] if table: filename_bits.append(to_css_class(table)) - if "pk_path" in request.url_vars: - filename_bits.append(request.url_vars["pk_path"]) + if "pks" in request.url_vars: + filename_bits.append(request.url_vars["pks"]) filename_bits.append(to_css_class(blob_column)) if blob_hash: filename_bits.append(blob_hash[:6]) diff --git a/datasette/views/base.py b/datasette/views/base.py index e31beb19..0bbf98bb 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -381,7 +381,7 @@ class DataView(BaseView): return None async def get(self, request): - db_name = request.url_vars["db_name"] + db_name = request.url_vars["database"] database = tilde_decode(db_name) _format = self.get_format(request) data_kwargs = {} diff --git a/datasette/views/database.py b/datasette/views/database.py index 48635e01..93bd1011 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -32,7 +32,7 @@ class DatabaseView(DataView): name = "database" async def data(self, request, default_labels=False, _size=None): - database = tilde_decode(request.url_vars["db_name"]) + database = tilde_decode(request.url_vars["database"]) await self.check_permissions( request, [ @@ -162,7 +162,7 @@ class DatabaseDownload(DataView): name = "database_download" async def get(self, request): - database = tilde_decode(request.url_vars["db_name"]) + database = tilde_decode(request.url_vars["database"]) await self.check_permissions( request, [ @@ -205,7 +205,7 @@ class QueryView(DataView): named_parameters=None, write=False, ): - database = tilde_decode(request.url_vars["db_name"]) + database = tilde_decode(request.url_vars["database"]) params = {key: request.args.get(key) for key in request.args} if "sql" in params: params.pop("sql") diff --git a/datasette/views/index.py b/datasette/views/index.py index 311a49db..f5e31181 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -19,7 +19,7 @@ class IndexView(BaseView): name = "index" async def get(self, request): - as_format = request.url_vars["as_format"] + as_format = request.url_vars["format"] await self.check_permission(request, "view-instance") databases = [] for name, db in self.ds.databases.items(): diff --git a/datasette/views/special.py b/datasette/views/special.py index c7b5061f..395ee587 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -15,7 +15,7 @@ class JsonDataView(BaseView): self.needs_request = needs_request async def get(self, request): - as_format = request.url_vars["as_format"] + as_format = request.url_vars["format"] await self.check_permission(request, "view-instance") if self.needs_request: data = self.data_callback(request) diff --git a/datasette/views/table.py b/datasette/views/table.py index 8bdc7417..ea4f24b7 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -272,7 +272,7 @@ class TableView(RowTableShared): name = "table" async def post(self, request): - db_name = tilde_decode(request.url_vars["db_name"]) + db_name = tilde_decode(request.url_vars["database"]) table = tilde_decode(request.url_vars["table"]) # Handle POST to a canned query canned_query = await self.ds.get_canned_query(db_name, table, request.actor) @@ -327,7 +327,7 @@ class TableView(RowTableShared): _next=None, _size=None, ): - database = tilde_decode(request.url_vars["db_name"]) + database = tilde_decode(request.url_vars["database"]) table = tilde_decode(request.url_vars["table"]) try: db = self.ds.databases[database] @@ -938,7 +938,7 @@ class RowView(RowTableShared): name = "row" async def data(self, request, default_labels=False): - database = tilde_decode(request.url_vars["db_name"]) + database = tilde_decode(request.url_vars["database"]) table = tilde_decode(request.url_vars["table"]) await self.check_permissions( request, @@ -948,7 +948,7 @@ class RowView(RowTableShared): "view-instance", ], ) - pk_values = urlsafe_components(request.url_vars["pk_path"]) + pk_values = urlsafe_components(request.url_vars["pks"]) db = self.ds.databases[database] sql, params, pks = await _sql_params_pks(db, table, pk_values) results = await db.execute(sql, params, truncate=True) diff --git a/tests/test_routes.py b/tests/test_routes.py index 6718c232..349ac302 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -12,26 +12,26 @@ def routes(): @pytest.mark.parametrize( "path,expected_class,expected_matches", ( - ("/", "IndexView", {"as_format": ""}), - ("/foo", "DatabaseView", {"as_format": None, "db_name": "foo"}), - ("/foo.csv", "DatabaseView", {"as_format": ".csv", "db_name": "foo"}), - ("/foo.json", "DatabaseView", {"as_format": ".json", "db_name": "foo"}), - ("/foo.humbug", "DatabaseView", {"as_format": None, "db_name": "foo.humbug"}), - ("/foo/humbug", "TableView", {"db_name": "foo", "table": "humbug"}), - ("/foo/humbug.json", "TableView", {"db_name": "foo", "table": "humbug"}), - ("/foo/humbug.blah", "TableView", {"db_name": "foo", "table": "humbug"}), + ("/", "IndexView", {"format": ""}), + ("/foo", "DatabaseView", {"format": None, "database": "foo"}), + ("/foo.csv", "DatabaseView", {"format": ".csv", "database": "foo"}), + ("/foo.json", "DatabaseView", {"format": ".json", "database": "foo"}), + ("/foo.humbug", "DatabaseView", {"format": None, "database": "foo.humbug"}), + ("/foo/humbug", "TableView", {"database": "foo", "table": "humbug"}), + ("/foo/humbug.json", "TableView", {"database": "foo", "table": "humbug"}), + ("/foo/humbug.blah", "TableView", {"database": "foo", "table": "humbug"}), ( "/foo/humbug/1", "RowView", - {"as_format": None, "db_name": "foo", "pk_path": "1", "table": "humbug"}, + {"format": None, "database": "foo", "pks": "1", "table": "humbug"}, ), ( "/foo/humbug/1.json", "RowView", - {"as_format": ".json", "db_name": "foo", "pk_path": "1", "table": "humbug"}, + {"format": ".json", "database": "foo", "pks": "1", "table": "humbug"}, ), - ("/-/metadata.json", "JsonDataView", {"as_format": ".json"}), - ("/-/metadata", "JsonDataView", {"as_format": ""}), + ("/-/metadata.json", "JsonDataView", {"format": ".json"}), + ("/-/metadata", "JsonDataView", {"format": ""}), ), ) def test_routes(routes, path, expected_class, expected_matches): From b9c2b1cfc8692b9700416db98721fa3ec982f6be Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 13:29:10 -0700 Subject: [PATCH 0048/1218] Consistent treatment of format in route capturing, refs #1667 Also refs #1660 --- datasette/app.py | 30 ++++++++++++------------------ tests/test_api.py | 4 ++-- tests/test_routes.py | 32 ++++++++++++++++++++++---------- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 5259c50c..edef34e9 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -985,10 +985,7 @@ class Datasette: def add_route(view, regex): routes.append((regex, view)) - # Generate a regex snippet to match all registered renderer file extensions - renderer_regex = "|".join(r"\." + key for key in self.renderers.keys()) - - add_route(IndexView.as_view(self), r"/(?P(\.jsono?)?$)") + add_route(IndexView.as_view(self), r"/(\.(?Pjsono?))?$") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires add_route(favicon, "/favicon.ico") @@ -1020,21 +1017,21 @@ class Datasette: ) add_route( JsonDataView.as_view(self, "metadata.json", lambda: self.metadata()), - r"/-/metadata(?P(\.json)?)$", + r"/-/metadata(\.(?Pjson))?$", ) add_route( JsonDataView.as_view(self, "versions.json", self._versions), - r"/-/versions(?P(\.json)?)$", + r"/-/versions(\.(?Pjson))?$", ) add_route( JsonDataView.as_view( self, "plugins.json", self._plugins, needs_request=True ), - r"/-/plugins(?P(\.json)?)$", + r"/-/plugins(\.(?Pjson))?$", ) add_route( JsonDataView.as_view(self, "settings.json", lambda: self._settings), - r"/-/settings(?P(\.json)?)$", + r"/-/settings(\.(?Pjson))?$", ) add_route( permanent_redirect("/-/settings.json"), @@ -1046,15 +1043,15 @@ class Datasette: ) add_route( JsonDataView.as_view(self, "threads.json", self._threads), - r"/-/threads(?P(\.json)?)$", + r"/-/threads(\.(?Pjson))?$", ) add_route( JsonDataView.as_view(self, "databases.json", self._connected_databases), - r"/-/databases(?P(\.json)?)$", + r"/-/databases(\.(?Pjson))?$", ) add_route( JsonDataView.as_view(self, "actor.json", self._actor, needs_request=True), - r"/-/actor(?P(\.json)?)$", + r"/-/actor(\.(?Pjson))?$", ) add_route( AuthTokenView.as_view(self), @@ -1080,20 +1077,17 @@ class Datasette: PatternPortfolioView.as_view(self), r"/-/patterns$", ) - add_route(DatabaseDownload.as_view(self), r"/(?P[^/]+?)\.db$") + add_route(DatabaseDownload.as_view(self), r"/(?P[^\/\.]+)\.db$") add_route( - DatabaseView.as_view(self), - r"/(?P[^/]+?)(?P" + renderer_regex + r"|.jsono|\.csv)?$", + DatabaseView.as_view(self), r"/(?P[^\/\.]+)(\.(?P\w+))?$" ) add_route( TableView.as_view(self), - r"/(?P[^/]+)/(?P
[^\/\.]+)(\.[a-zA-Z0-9_]+)?$", + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)(\.(?P\w+))?$", ) add_route( RowView.as_view(self), - r"/(?P[^/]+)/(?P
[^/]+?)/(?P[^/]+?)(?P" - + renderer_regex - + r")?$", + r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)(\.(?P\w+))?$", ) return [ # Compile any strings to regular expressions diff --git a/tests/test_api.py b/tests/test_api.py index 421bb1fe..253c1718 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -629,8 +629,8 @@ def test_old_memory_urls_redirect(app_client_no_files, path, expected_redirect): def test_database_page_for_database_with_dot_in_name(app_client_with_dot): - response = app_client_with_dot.get("/fixtures.dot.json") - assert 200 == response.status + response = app_client_with_dot.get("/fixtures~2Edot.json") + assert response.status == 200 def test_custom_sql(app_client): diff --git a/tests/test_routes.py b/tests/test_routes.py index 349ac302..1fa55018 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -12,14 +12,26 @@ def routes(): @pytest.mark.parametrize( "path,expected_class,expected_matches", ( - ("/", "IndexView", {"format": ""}), + ("/", "IndexView", {"format": None}), ("/foo", "DatabaseView", {"format": None, "database": "foo"}), - ("/foo.csv", "DatabaseView", {"format": ".csv", "database": "foo"}), - ("/foo.json", "DatabaseView", {"format": ".json", "database": "foo"}), - ("/foo.humbug", "DatabaseView", {"format": None, "database": "foo.humbug"}), - ("/foo/humbug", "TableView", {"database": "foo", "table": "humbug"}), - ("/foo/humbug.json", "TableView", {"database": "foo", "table": "humbug"}), - ("/foo/humbug.blah", "TableView", {"database": "foo", "table": "humbug"}), + ("/foo.csv", "DatabaseView", {"format": "csv", "database": "foo"}), + ("/foo.json", "DatabaseView", {"format": "json", "database": "foo"}), + ("/foo.humbug", "DatabaseView", {"format": "humbug", "database": "foo"}), + ( + "/foo/humbug", + "TableView", + {"database": "foo", "table": "humbug", "format": None}, + ), + ( + "/foo/humbug.json", + "TableView", + {"database": "foo", "table": "humbug", "format": "json"}, + ), + ( + "/foo/humbug.blah", + "TableView", + {"database": "foo", "table": "humbug", "format": "blah"}, + ), ( "/foo/humbug/1", "RowView", @@ -28,10 +40,10 @@ def routes(): ( "/foo/humbug/1.json", "RowView", - {"format": ".json", "database": "foo", "pks": "1", "table": "humbug"}, + {"format": "json", "database": "foo", "pks": "1", "table": "humbug"}, ), - ("/-/metadata.json", "JsonDataView", {"format": ".json"}), - ("/-/metadata", "JsonDataView", {"format": ""}), + ("/-/metadata.json", "JsonDataView", {"format": "json"}), + ("/-/metadata", "JsonDataView", {"format": None}), ), ) def test_routes(routes, path, expected_class, expected_matches): From 798f075ef9b98819fdb564f9f79c78975a0f71e8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 13:32:29 -0700 Subject: [PATCH 0049/1218] Read format from route captures, closes #1667 Refs #1660 --- datasette/utils/__init__.py | 20 -------------------- datasette/views/base.py | 12 +----------- tests/test_utils.py | 25 ------------------------- 3 files changed, 1 insertion(+), 56 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index ccdf8ad4..c89b9d23 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -731,26 +731,6 @@ def module_from_path(path, name): return mod -async def resolve_table_and_format( - table_and_format, table_exists, allowed_formats=None -): - if allowed_formats is None: - allowed_formats = [] - if "." in table_and_format: - # Check if a table exists with this exact name - it_exists = await table_exists(table_and_format) - if it_exists: - return table_and_format, None - - # Check if table ends with a known format - formats = list(allowed_formats) + ["csv", "jsono"] - for _format in formats: - if table_and_format.endswith(f".{_format}"): - table = table_and_format[: -(len(_format) + 1)] - return table, _format - return table_and_format, None - - def path_with_format( *, request=None, path=None, format=None, extra_qs=None, replace_format=None ): diff --git a/datasette/views/base.py b/datasette/views/base.py index 0bbf98bb..24e97d95 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -19,12 +19,10 @@ from datasette.utils import ( LimitedWriter, call_with_supported_arguments, tilde_decode, - tilde_encode, path_from_row_pks, path_with_added_args, path_with_removed_args, path_with_format, - resolve_table_and_format, sqlite3, HASH_LENGTH, ) @@ -372,18 +370,10 @@ class DataView(BaseView): return AsgiStream(stream_fn, headers=headers, content_type=content_type) - def get_format(self, request): - # Format is the bit from the path following the ., if one exists - last_path_component = request.path.split("/")[-1] - if "." in last_path_component: - return last_path_component.split(".")[-1] - else: - return None - async def get(self, request): db_name = request.url_vars["database"] database = tilde_decode(db_name) - _format = self.get_format(request) + _format = request.url_vars["format"] data_kwargs = {} if _format == "csv": diff --git a/tests/test_utils.py b/tests/test_utils.py index 790aadc7..7b41a87f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -351,31 +351,6 @@ def test_compound_keys_after_sql(): ) -async def table_exists(table): - return table == "exists.csv" - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "table_and_format,expected_table,expected_format", - [ - ("blah", "blah", None), - ("blah.csv", "blah", "csv"), - ("blah.json", "blah", "json"), - ("blah.baz", "blah.baz", None), - ("exists.csv", "exists.csv", None), - ], -) -async def test_resolve_table_and_format( - table_and_format, expected_table, expected_format -): - actual_table, actual_format = await utils.resolve_table_and_format( - table_and_format, table_exists, ["json"] - ) - assert expected_table == actual_table - assert expected_format == actual_format - - def test_table_columns(): conn = sqlite3.connect(":memory:") conn.executescript( From 7a6654a253dee243518dc542ce4c06dbb0d0801d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 17:11:17 -0700 Subject: [PATCH 0050/1218] Databases can now have a .route separate from their .name, refs #1668 --- datasette/app.py | 13 ++++++-- datasette/database.py | 1 + datasette/views/base.py | 12 +++++-- datasette/views/database.py | 18 ++++++----- datasette/views/table.py | 29 ++++++++++++----- docs/internals.rst | 11 ++++--- tests/test_internals_datasette.py | 1 + tests/test_routes.py | 52 ++++++++++++++++++++++++++++++- 8 files changed, 111 insertions(+), 26 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index edef34e9..5c8101a3 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -388,13 +388,18 @@ class Datasette: def unsign(self, signed, namespace="default"): return URLSafeSerializer(self._secret, namespace).loads(signed) - def get_database(self, name=None): + def get_database(self, name=None, route=None): + if route is not None: + matches = [db for db in self.databases.values() if db.route == route] + if not matches: + raise KeyError + return matches[0] if name is None: - # Return first no-_schemas database + # Return first database that isn't "_internal" name = [key for key in self.databases.keys() if key != "_internal"][0] return self.databases[name] - def add_database(self, db, name=None): + def add_database(self, db, name=None, route=None): new_databases = self.databases.copy() if name is None: # Pick a unique name for this database @@ -407,6 +412,7 @@ class Datasette: name = "{}_{}".format(suggestion, i) i += 1 db.name = name + db.route = route or name new_databases[name] = db # don't mutate! that causes race conditions with live import self.databases = new_databases @@ -693,6 +699,7 @@ class Datasette: return [ { "name": d.name, + "route": d.route, "path": d.path, "size": d.size, "is_mutable": d.is_mutable, diff --git a/datasette/database.py b/datasette/database.py index 6ce87215..ba594a8c 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -31,6 +31,7 @@ class Database: self, ds, path=None, is_mutable=False, is_memory=False, memory_name=None ): self.name = None + self.route = None self.ds = ds self.path = path self.is_mutable = is_mutable diff --git a/datasette/views/base.py b/datasette/views/base.py index 24e97d95..afa9eaa6 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -371,13 +371,19 @@ class DataView(BaseView): return AsgiStream(stream_fn, headers=headers, content_type=content_type) async def get(self, request): - db_name = request.url_vars["database"] - database = tilde_decode(db_name) + database_route = tilde_decode(request.url_vars["database"]) + + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database = db.name + _format = request.url_vars["format"] data_kwargs = {} if _format == "csv": - return await self.as_csv(request, database) + return await self.as_csv(request, database_route) if _format is None: # HTML views default to expanding all foreign key labels diff --git a/datasette/views/database.py b/datasette/views/database.py index 93bd1011..2563c5b2 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -32,7 +32,13 @@ class DatabaseView(DataView): name = "database" async def data(self, request, default_labels=False, _size=None): - database = tilde_decode(request.url_vars["database"]) + database_route = tilde_decode(request.url_vars["database"]) + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database = db.name + await self.check_permissions( request, [ @@ -50,11 +56,6 @@ class DatabaseView(DataView): request, sql, _size=_size, metadata=metadata ) - try: - db = self.ds.databases[database] - except KeyError: - raise NotFound("Database not found: {}".format(database)) - table_counts = await db.table_counts(5) hidden_table_names = set(await db.hidden_table_names()) all_foreign_keys = await db.get_all_foreign_keys() @@ -171,9 +172,10 @@ class DatabaseDownload(DataView): "view-instance", ], ) - if database not in self.ds.databases: + try: + db = self.ds.get_database(route=database) + except KeyError: raise DatasetteError("Invalid database", status=404) - db = self.ds.databases[database] if db.is_memory: raise DatasetteError("Cannot download in-memory databases", status=404) if not self.ds.setting("allow_download") or db.is_mutable: diff --git a/datasette/views/table.py b/datasette/views/table.py index ea4f24b7..7fa1da3a 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -272,10 +272,15 @@ class TableView(RowTableShared): name = "table" async def post(self, request): - db_name = tilde_decode(request.url_vars["database"]) + database_route = tilde_decode(request.url_vars["database"]) + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database = db.name table = tilde_decode(request.url_vars["table"]) # Handle POST to a canned query - canned_query = await self.ds.get_canned_query(db_name, table, request.actor) + canned_query = await self.ds.get_canned_query(database, table, request.actor) assert canned_query, "You may only POST to a canned query" return await QueryView(self.ds).data( request, @@ -327,12 +332,13 @@ class TableView(RowTableShared): _next=None, _size=None, ): - database = tilde_decode(request.url_vars["database"]) + database_route = tilde_decode(request.url_vars["database"]) table = tilde_decode(request.url_vars["table"]) try: - db = self.ds.databases[database] + db = self.ds.get_database(route=database_route) except KeyError: - raise NotFound("Database not found: {}".format(database)) + raise NotFound("Database not found: {}".format(database_route)) + database = db.name # If this is a canned query, not a table, then dispatch to QueryView instead canned_query = await self.ds.get_canned_query(database, table, request.actor) @@ -938,8 +944,13 @@ class RowView(RowTableShared): name = "row" async def data(self, request, default_labels=False): - database = tilde_decode(request.url_vars["database"]) + database_route = tilde_decode(request.url_vars["database"]) table = tilde_decode(request.url_vars["table"]) + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database = db.name await self.check_permissions( request, [ @@ -949,7 +960,11 @@ class RowView(RowTableShared): ], ) pk_values = urlsafe_components(request.url_vars["pks"]) - db = self.ds.databases[database] + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database = db.name sql, params, pks = await _sql_params_pks(db, table, pk_values) results = await db.execute(sql, params, truncate=True) columns = [r[0] for r in results.description] diff --git a/docs/internals.rst b/docs/internals.rst index 117cb95c..323256c7 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -307,14 +307,17 @@ Returns the specified database object. Raises a ``KeyError`` if the database doe .. _datasette_add_database: -.add_database(db, name=None) ----------------------------- +.add_database(db, name=None, route=None) +---------------------------------------- ``db`` - datasette.database.Database instance The database to be attached. ``name`` - string, optional - The name to be used for this database - this will be used in the URL path, e.g. ``/dbname``. If not specified Datasette will pick one based on the filename or memory name. + The name to be used for this database . If not specified Datasette will pick one based on the filename or memory name. + +``route`` - string, optional + This will be used in the URL path. If not specified, it will default to the same thing as the ``name``. The ``datasette.add_database(db)`` method lets you add a new database to the current Datasette instance. @@ -371,7 +374,7 @@ Using either of these pattern will result in the in-memory database being served ``name`` - string The name of the database to be removed. -This removes a database that has been previously added. ``name=`` is the unique name of that database, used in its URL path. +This removes a database that has been previously added. ``name=`` is the unique name of that database. .. _datasette_sign: diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index adf84be9..cc200a2d 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -55,6 +55,7 @@ async def test_datasette_constructor(): assert databases == [ { "name": "_memory", + "route": "_memory", "path": None, "size": 0, "is_mutable": False, diff --git a/tests/test_routes.py b/tests/test_routes.py index 1fa55018..dd3bc644 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -1,6 +1,7 @@ -from datasette.app import Datasette +from datasette.app import Datasette, Database from datasette.utils import resolve_routes import pytest +import pytest_asyncio @pytest.fixture(scope="session") @@ -53,3 +54,52 @@ def test_routes(routes, path, expected_class, expected_matches): else: assert view.view_class.__name__ == expected_class assert match.groupdict() == expected_matches + + +@pytest_asyncio.fixture +async def ds_with_route(): + ds = Datasette() + ds.remove_database("_memory") + db = Database(ds, is_memory=True, memory_name="route-name-db") + ds.add_database(db, name="name", route="route-name") + await db.execute_write_script( + """ + create table if not exists t (id integer primary key); + insert or replace into t (id) values (1); + """ + ) + return ds + + +@pytest.mark.asyncio +async def test_db_with_route_databases(ds_with_route): + response = await ds_with_route.client.get("/-/databases.json") + assert response.json()[0] == { + "name": "name", + "route": "route-name", + "path": None, + "size": 0, + "is_mutable": True, + "is_memory": True, + "hash": None, + } + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "path,expected_status", + ( + ("/", 200), + ("/name", 404), + ("/name/t", 404), + ("/name/t/1", 404), + ("/route-name", 200), + ("/route-name/t", 200), + ("/route-name/t/1", 200), + ), +) +async def test_db_with_route_that_does_not_match_name( + ds_with_route, path, expected_status +): + response = await ds_with_route.client.get(path) + assert response.status_code == expected_status From e10da9af3595c0a4e09c6f370103571aa4ea106e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 17:21:56 -0700 Subject: [PATCH 0051/1218] alternative-route demo, refs #1668 --- .github/workflows/deploy-latest.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 1ae96e89..92aa1c6b 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -42,6 +42,17 @@ jobs: sphinx-build -b xml . _build sphinx-to-sqlite ../docs.db _build cd .. + - name: Set up the alternate-route demo + run: | + echo ' + from datasette import hookimpl + + @hookimpl + def startup(datasette): + db = datasette.get_database("fixtures2") + db.route = "alternative-route" + ' > plugins/alternative_route.py + cp fixtures.db fixtures2.db - name: Set up Cloud Run uses: google-github-actions/setup-gcloud@master with: @@ -54,7 +65,7 @@ jobs: gcloud config set project datasette-222320 export SUFFIX="-${GITHUB_REF#refs/heads/}" export SUFFIX=${SUFFIX#-main} - datasette publish cloudrun fixtures.db extra_database.db \ + datasette publish cloudrun fixtures.db fixtures2.db extra_database.db \ -m fixtures.json \ --plugins-dir=plugins \ --branch=$GITHUB_SHA \ From cdbae2b93f441653616dd889644c63e4150ceec1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 17:31:23 -0700 Subject: [PATCH 0052/1218] Fixed internal links to respect db.route, refs #1668 --- datasette/url_builder.py | 3 ++- datasette/views/table.py | 5 ++--- tests/test_routes.py | 22 +++++++++++++--------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/datasette/url_builder.py b/datasette/url_builder.py index 498ec85d..574bf3c1 100644 --- a/datasette/url_builder.py +++ b/datasette/url_builder.py @@ -28,7 +28,8 @@ class Urls: return self.path("-/logout") def database(self, database, format=None): - return self.path(tilde_encode(database), format=format) + db = self.ds.get_database(database) + return self.path(tilde_encode(db.route), format=format) def table(self, database, table, format=None): path = f"{self.database(database)}/{tilde_encode(table)}" diff --git a/datasette/views/table.py b/datasette/views/table.py index 7fa1da3a..8745c28a 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -141,10 +141,9 @@ class RowTableShared(DataView): "is_special_link_column": is_special_link_column, "raw": pk_path, "value": markupsafe.Markup( - '{flat_pks}'.format( + '{flat_pks}'.format( base_url=base_url, - database=database, - table=tilde_encode(table), + table_path=self.ds.urls.table(database, table), flat_pks=str(markupsafe.escape(pk_path)), flat_pks_quoted=path_from_row_pks(row, pks, not pks), ) diff --git a/tests/test_routes.py b/tests/test_routes.py index dd3bc644..211b77b5 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -61,7 +61,7 @@ async def ds_with_route(): ds = Datasette() ds.remove_database("_memory") db = Database(ds, is_memory=True, memory_name="route-name-db") - ds.add_database(db, name="name", route="route-name") + ds.add_database(db, name="original-name", route="custom-route-name") await db.execute_write_script( """ create table if not exists t (id integer primary key); @@ -75,8 +75,8 @@ async def ds_with_route(): async def test_db_with_route_databases(ds_with_route): response = await ds_with_route.client.get("/-/databases.json") assert response.json()[0] == { - "name": "name", - "route": "route-name", + "name": "original-name", + "route": "custom-route-name", "path": None, "size": 0, "is_mutable": True, @@ -90,12 +90,12 @@ async def test_db_with_route_databases(ds_with_route): "path,expected_status", ( ("/", 200), - ("/name", 404), - ("/name/t", 404), - ("/name/t/1", 404), - ("/route-name", 200), - ("/route-name/t", 200), - ("/route-name/t/1", 200), + ("/original-name", 404), + ("/original-name/t", 404), + ("/original-name/t/1", 404), + ("/custom-route-name", 200), + ("/custom-route-name/t", 200), + ("/custom-route-name/t/1", 200), ), ) async def test_db_with_route_that_does_not_match_name( @@ -103,3 +103,7 @@ async def test_db_with_route_that_does_not_match_name( ): response = await ds_with_route.client.get(path) assert response.status_code == expected_status + # There should be links to custom-route-name but none to original-name + if response.status_code == 200: + assert "/custom-route-name" in response.text + assert "/original-name" not in response.text From 5471e3c4914837de957e206d8fb80c9ec383bc2e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 18:14:40 -0700 Subject: [PATCH 0053/1218] Release 0.61a0 Refs #957, #1533, #1545, #1576, #1577, #1587, #1601, #1603, #1607, #1612, #1621, #1649, #1654, #1657, #1661, #1668 --- datasette/version.py | 2 +- docs/changelog.rst | 29 +++++++++++++++++++++++++++-- docs/performance.rst | 2 ++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/datasette/version.py b/datasette/version.py index 91224615..ccc1e04b 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.60.2" +__version__ = "0.61a0" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index c58c8444..0f3d3aff 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,14 +4,39 @@ Changelog ========= -.. _v0_60.2: +.. _v0_61_a0: + +0.61a0 (2022-03-19) +------------------- + +- Removed hashed URL mode from Datasette. The new ``datasette-hashed-urls`` plugin can be used to achieve the same result, see :ref:`performance_hashed_urls` for details. (:issue:`1661`) +- Databases can now have a custom path within the Datasette instance that is indpendent of the database name, using the ``db.route`` property. (:issue:`1668`) +- URLs within Datasette now use a different encoding scheme for tables or databases that include "special" characters outside of the range of ``a-zA-Z0-9_-``. This scheme is explained here: :ref:`internals_tilde_encoding`. (:issue:`1657`) +- Table and row HTML pages now include a ```` element and return a ``Link: URL; rel="alternate"; type="application/json+datasette"`` HTTP header pointing to the JSON version of those pages. (:issue:`1533`) +- ``Access-Control-Expose-Headers: Link`` is now added to the CORS headers, allowing remote JavaScript to access that header. +- Canned queries are now shown at the top of the database page, directly below the SQL editor. Previously they were shown at the bottom, below the list of tables. (:issue:`1612`) +- Datasette now has a default favicon. (:issue:`1603`) +- ``sqlite_stat`` tables are now hidden by default. (:issue:`1587`) +- SpatiaLite tables ``data_licenses``, ``KNN`` and ``KNN2`` are now hidden by default. (:issue:`1601`) +- Python 3.6 is no longer supported. (:issue:`1577`) +- Tests now run against Python 3.11-dev. (:issue:`1621`) +- Fixed bug where :ref:`custom pages ` did not work on Windows. Thanks, Robert Christie. (:issue:`1545`) +- SQL query tracing mechanism now works for queries executed in ``asyncio`` sub-tasks, such as those created by ``asyncio.gather()``. (:issue:`1576`) +- :ref:`internals_tracer` mechanism is now documented. +- Common Datasette symbols can now be imported directly from the top-level ``datasette`` package, see :ref:`internals_shortcuts`. Those symbols are ``Response``, ``Forbidden``, ``NotFound``, ``hookimpl``, ``actor_matches_allow``. (:issue:`957`) +- ``/-/versions`` page now returns additional details for libraries used by SpatiaLite. (:issue:`1607`) +- Documentation now links to the `Datasette Tutorials `__. +- Datasette will now also look for SpatiaLite in ``/opt/homebrew`` - thanks, Dan Peterson. (`#1649 `__) +- Datasette is now covered by a `Code of Conduct `__. (:issue:`1654`) + +.. _v0_60_2: 0.60.2 (2022-02-07) ------------------- - Fixed a bug where Datasette would open the same file twice with two different database names if you ran ``datasette file.db file.db``. (:issue:`1632`) -.. _v0_60.1: +.. _v0_60_1: 0.60.1 (2022-01-20) ------------------- diff --git a/docs/performance.rst b/docs/performance.rst index d37f1804..89bbf5ae 100644 --- a/docs/performance.rst +++ b/docs/performance.rst @@ -60,6 +60,8 @@ The :ref:`setting_default_cache_ttl` setting sets the default HTTP cache TTL for You can also change the cache timeout on a per-request basis using the ``?_ttl=10`` query string parameter. This can be useful when you are working with the Datasette JSON API - you may decide that a specific query can be cached for a longer time, or maybe you need to set ``?_ttl=0`` for some requests for example if you are running a SQL ``order by random()`` query. +.. _performance_hashed_urls: + datasette-hashed-urls --------------------- From cb4854a435cc1418665edec2a73664ad74a32017 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 18:17:58 -0700 Subject: [PATCH 0054/1218] Fixed typo --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 0f3d3aff..9f5a143c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,7 +10,7 @@ Changelog ------------------- - Removed hashed URL mode from Datasette. The new ``datasette-hashed-urls`` plugin can be used to achieve the same result, see :ref:`performance_hashed_urls` for details. (:issue:`1661`) -- Databases can now have a custom path within the Datasette instance that is indpendent of the database name, using the ``db.route`` property. (:issue:`1668`) +- Databases can now have a custom path within the Datasette instance that is independent of the database name, using the ``db.route`` property. (:issue:`1668`) - URLs within Datasette now use a different encoding scheme for tables or databases that include "special" characters outside of the range of ``a-zA-Z0-9_-``. This scheme is explained here: :ref:`internals_tilde_encoding`. (:issue:`1657`) - Table and row HTML pages now include a ```` element and return a ``Link: URL; rel="alternate"; type="application/json+datasette"`` HTTP header pointing to the JSON version of those pages. (:issue:`1533`) - ``Access-Control-Expose-Headers: Link`` is now added to the CORS headers, allowing remote JavaScript to access that header. From 4a4164b81191dec35e423486a208b05a9edc65e4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 19 Mar 2022 18:23:03 -0700 Subject: [PATCH 0055/1218] Added another note to the 0.61a0 release notes, refs #1228 --- docs/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9f5a143c..05ad85f2 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -28,6 +28,7 @@ Changelog - Documentation now links to the `Datasette Tutorials `__. - Datasette will now also look for SpatiaLite in ``/opt/homebrew`` - thanks, Dan Peterson. (`#1649 `__) - Datasette is now covered by a `Code of Conduct `__. (:issue:`1654`) +- Fixed error caused when a table had a column named ``n``. (:issue:`1228`) .. _v0_60_2: From e627510b760198ccedba9e5af47a771e847785c9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 21 Mar 2022 10:13:16 -0700 Subject: [PATCH 0056/1218] BaseView.check_permissions is now datasette.ensure_permissions, closes #1675 Refs #1660 --- datasette/app.py | 35 +++++++++++++++++++++++++++++++++++ datasette/views/base.py | 26 -------------------------- datasette/views/database.py | 12 ++++++------ datasette/views/table.py | 8 ++++---- docs/internals.rst | 26 ++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 36 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 5c8101a3..9e509e96 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,4 +1,5 @@ import asyncio +from typing import Sequence, Union, Tuple import asgi_csrf import collections import datetime @@ -628,6 +629,40 @@ class Datasette: ) return result + async def ensure_permissions( + self, + actor: dict, + permissions: Sequence[Union[Tuple[str, Union[str, Tuple[str, str]]], str]], + ): + """ + permissions is a list of (action, resource) tuples or 'action' strings + + Raises datasette.Forbidden() if any of the checks fail + """ + for permission in permissions: + if isinstance(permission, str): + action = permission + resource = None + elif isinstance(permission, (tuple, list)) and len(permission) == 2: + action, resource = permission + else: + assert ( + False + ), "permission should be string or tuple of two items: {}".format( + repr(permission) + ) + ok = await self.permission_allowed( + actor, + action, + resource=resource, + default=None, + ) + if ok is not None: + if ok: + return + else: + raise Forbidden(action) + async def execute( self, db_name, diff --git a/datasette/views/base.py b/datasette/views/base.py index afa9eaa6..d1e684a2 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -76,32 +76,6 @@ class BaseView: if not ok: raise Forbidden(action) - async def check_permissions(self, request, permissions): - """permissions is a list of (action, resource) tuples or 'action' strings""" - for permission in permissions: - if isinstance(permission, str): - action = permission - resource = None - elif isinstance(permission, (tuple, list)) and len(permission) == 2: - action, resource = permission - else: - assert ( - False - ), "permission should be string or tuple of two items: {}".format( - repr(permission) - ) - ok = await self.ds.permission_allowed( - request.actor, - action, - resource=resource, - default=None, - ) - if ok is not None: - if ok: - return - else: - raise Forbidden(action) - def database_color(self, database): return "ff0000" diff --git a/datasette/views/database.py b/datasette/views/database.py index 2563c5b2..69ed1233 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -39,8 +39,8 @@ class DatabaseView(DataView): raise NotFound("Database not found: {}".format(database_route)) database = db.name - await self.check_permissions( - request, + await self.ds.ensure_permissions( + request.actor, [ ("view-database", database), "view-instance", @@ -164,8 +164,8 @@ class DatabaseDownload(DataView): async def get(self, request): database = tilde_decode(request.url_vars["database"]) - await self.check_permissions( - request, + await self.ds.ensure_permissions( + request.actor, [ ("view-database-download", database), ("view-database", database), @@ -217,8 +217,8 @@ class QueryView(DataView): private = False if canned_query: # Respect canned query permissions - await self.check_permissions( - request, + await self.ds.ensure_permissions( + request.actor, [ ("view-query", (database, canned_query)), ("view-database", database), diff --git a/datasette/views/table.py b/datasette/views/table.py index 8745c28a..84169820 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -360,8 +360,8 @@ class TableView(RowTableShared): raise NotFound(f"Table not found: {table}") # Ensure user has permission to view this table - await self.check_permissions( - request, + await self.ds.ensure_permissions( + request.actor, [ ("view-table", (database, table)), ("view-database", database), @@ -950,8 +950,8 @@ class RowView(RowTableShared): except KeyError: raise NotFound("Database not found: {}".format(database_route)) database = db.name - await self.check_permissions( - request, + await self.ds.ensure_permissions( + request.actor, [ ("view-table", (database, table)), ("view-database", database), diff --git a/docs/internals.rst b/docs/internals.rst index 323256c7..12adde00 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -295,6 +295,32 @@ If neither ``metadata.json`` nor any of the plugins provide an answer to the per See :ref:`permissions` for a full list of permission actions included in Datasette core. +.. _datasette_permission_allowed: + +await .ensure_permissions(actor, permissions) +--------------------------------------------- + +``actor`` - dictionary + The authenticated actor. This is usually ``request.actor``. + +``permissions`` - list + A list of permissions to check. Each permission in that list can be a string ``action`` name or a 2-tuple of ``(action, resource)``. + +This method allows multiple permissions to be checked at onced. It raises a ``datasette.Forbidden`` exception if any of the checks are denied before one of them is explicitly granted. + +This is useful when you need to check multiple permissions at once. For example, an actor should be able to view a table if either one of the following checks returns ``True`` or not a single one of them returns ``False``: + +.. code-block:: python + + await self.ds.ensure_permissions( + request.actor, + [ + ("view-table", (database, table)), + ("view-database", database), + "view-instance", + ] + ) + .. _datasette_get_database: .get_database(name) From dfafce6d962d615d98a7080e546c7b3662ae7d34 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 21 Mar 2022 11:37:27 -0700 Subject: [PATCH 0057/1218] Display no-opinion permission checks on /-/permissions --- datasette/templates/permissions_debug.html | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datasette/templates/permissions_debug.html b/datasette/templates/permissions_debug.html index d898ea8c..db709c14 100644 --- a/datasette/templates/permissions_debug.html +++ b/datasette/templates/permissions_debug.html @@ -10,6 +10,9 @@ .check-result-false { color: red; } +.check-result-no-opinion { + color: #aaa; +} .check h2 { font-size: 1em } @@ -38,6 +41,8 @@ {{ check.when }} {% if check.result %} โœ“ + {% elif check.result is none %} + none {% else %} โœ— {% endif %} From 194e4f6c3fffde69eb196f8535ca45386b40ec2d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 21 Mar 2022 11:41:56 -0700 Subject: [PATCH 0058/1218] Removed check_permission() from BaseView, closes #1677 Refs #1660 --- datasette/app.py | 1 + datasette/views/base.py | 10 ---------- datasette/views/database.py | 2 +- datasette/views/index.py | 2 +- datasette/views/special.py | 10 +++++----- tests/test_permissions.py | 13 ++++++++----- 6 files changed, 16 insertions(+), 22 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 9e509e96..22ae211f 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -639,6 +639,7 @@ class Datasette: Raises datasette.Forbidden() if any of the checks fail """ + assert actor is None or isinstance(actor, dict) for permission in permissions: if isinstance(permission, str): action = permission diff --git a/datasette/views/base.py b/datasette/views/base.py index d1e684a2..221e1882 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -66,16 +66,6 @@ class BaseView: response.body = b"" return response - async def check_permission(self, request, action, resource=None): - ok = await self.ds.permission_allowed( - request.actor, - action, - resource=resource, - default=True, - ) - if not ok: - raise Forbidden(action) - def database_color(self, database): return "ff0000" diff --git a/datasette/views/database.py b/datasette/views/database.py index 69ed1233..31a1839f 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -229,7 +229,7 @@ class QueryView(DataView): None, "view-query", (database, canned_query), default=True ) else: - await self.check_permission(request, "execute-sql", database) + await self.ds.ensure_permissions(request.actor, [("execute-sql", database)]) # Extract any :named parameters named_parameters = named_parameters or await derive_named_parameters( diff --git a/datasette/views/index.py b/datasette/views/index.py index f5e31181..1c391e26 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -20,7 +20,7 @@ class IndexView(BaseView): async def get(self, request): as_format = request.url_vars["format"] - await self.check_permission(request, "view-instance") + await self.ds.ensure_permissions(request.actor, ["view-instance"]) databases = [] for name, db in self.ds.databases.items(): visible, database_private = await check_visibility( diff --git a/datasette/views/special.py b/datasette/views/special.py index 395ee587..dd834528 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -16,7 +16,7 @@ class JsonDataView(BaseView): async def get(self, request): as_format = request.url_vars["format"] - await self.check_permission(request, "view-instance") + await self.ds.ensure_permissions(request.actor, ["view-instance"]) if self.needs_request: data = self.data_callback(request) else: @@ -47,7 +47,7 @@ class PatternPortfolioView(BaseView): has_json_alternate = False async def get(self, request): - await self.check_permission(request, "view-instance") + await self.ds.ensure_permissions(request.actor, ["view-instance"]) return await self.render(["patterns.html"], request=request) @@ -95,7 +95,7 @@ class PermissionsDebugView(BaseView): has_json_alternate = False async def get(self, request): - await self.check_permission(request, "view-instance") + await self.ds.ensure_permissions(request.actor, ["view-instance"]) if not await self.ds.permission_allowed(request.actor, "permissions-debug"): raise Forbidden("Permission denied") return await self.render( @@ -146,11 +146,11 @@ class MessagesDebugView(BaseView): has_json_alternate = False async def get(self, request): - await self.check_permission(request, "view-instance") + await self.ds.ensure_permissions(request.actor, ["view-instance"]) return await self.render(["messages_debug.html"], request) async def post(self, request): - await self.check_permission(request, "view-instance") + await self.ds.ensure_permissions(request.actor, ["view-instance"]) post = await request.post_vars() message = post.get("message", "") message_type = post.get("message_type") or "INFO" diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 788523b0..f4169dbe 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -321,17 +321,20 @@ def test_permissions_debug(app_client): checks = [ { "action": div.select_one(".check-action").text, - "result": bool(div.select(".check-result-true")), + # True = green tick, False = red cross, None = gray None + "result": None + if div.select(".check-result-no-opinion") + else bool(div.select(".check-result-true")), "used_default": bool(div.select(".check-used-default")), } for div in check_divs ] - assert [ + assert checks == [ {"action": "permissions-debug", "result": True, "used_default": False}, - {"action": "view-instance", "result": True, "used_default": True}, + {"action": "view-instance", "result": None, "used_default": True}, {"action": "permissions-debug", "result": False, "used_default": True}, - {"action": "view-instance", "result": True, "used_default": True}, - ] == checks + {"action": "view-instance", "result": None, "used_default": True}, + ] @pytest.mark.parametrize( From 1a7750eb29fd15dd2eea3b9f6e33028ce441b143 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 21 Mar 2022 12:01:37 -0700 Subject: [PATCH 0059/1218] Documented datasette.check_visibility() method, closes #1678 --- datasette/app.py | 18 ++++++++++++++++++ datasette/utils/__init__.py | 19 ------------------- datasette/views/database.py | 10 +++------- datasette/views/index.py | 11 ++++------- docs/internals.rst | 28 +++++++++++++++++++++++++++- 5 files changed, 52 insertions(+), 34 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 22ae211f..c9eede26 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -664,6 +664,24 @@ class Datasette: else: raise Forbidden(action) + async def check_visibility(self, actor, action, resource): + """Returns (visible, private) - visible = can you see it, private = can others see it too""" + visible = await self.permission_allowed( + actor, + action, + resource=resource, + default=True, + ) + if not visible: + return False, False + private = not await self.permission_allowed( + None, + action, + resource=resource, + default=True, + ) + return visible, private + async def execute( self, db_name, diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index c89b9d23..cd8e3d61 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -1002,25 +1002,6 @@ def actor_matches_allow(actor, allow): return False -async def check_visibility(datasette, actor, action, resource, default=True): - """Returns (visible, private) - visible = can you see it, private = can others see it too""" - visible = await datasette.permission_allowed( - actor, - action, - resource=resource, - default=default, - ) - if not visible: - return False, False - private = not await datasette.permission_allowed( - None, - action, - resource=resource, - default=default, - ) - return visible, private - - def resolve_env_secrets(config, environ): """Create copy that recursively replaces {"$env": "NAME"} with values from environ""" if isinstance(config, dict): diff --git a/datasette/views/database.py b/datasette/views/database.py index 31a1839f..103bd575 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -10,7 +10,6 @@ import markupsafe from datasette.utils import ( add_cors_headers, await_me_maybe, - check_visibility, derive_named_parameters, tilde_decode, to_css_class, @@ -62,8 +61,7 @@ class DatabaseView(DataView): views = [] for view_name in await db.view_names(): - visible, private = await check_visibility( - self.ds, + visible, private = await self.ds.check_visibility( request.actor, "view-table", (database, view_name), @@ -78,8 +76,7 @@ class DatabaseView(DataView): tables = [] for table in table_counts: - visible, private = await check_visibility( - self.ds, + visible, private = await self.ds.check_visibility( request.actor, "view-table", (database, table), @@ -105,8 +102,7 @@ class DatabaseView(DataView): for query in ( await self.ds.get_canned_queries(database, request.actor) ).values(): - visible, private = await check_visibility( - self.ds, + visible, private = await self.ds.check_visibility( request.actor, "view-query", (database, query["name"]), diff --git a/datasette/views/index.py b/datasette/views/index.py index 1c391e26..aec78814 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -1,7 +1,7 @@ import hashlib import json -from datasette.utils import add_cors_headers, check_visibility, CustomJSONEncoder +from datasette.utils import add_cors_headers, CustomJSONEncoder from datasette.utils.asgi import Response from datasette.version import __version__ @@ -23,8 +23,7 @@ class IndexView(BaseView): await self.ds.ensure_permissions(request.actor, ["view-instance"]) databases = [] for name, db in self.ds.databases.items(): - visible, database_private = await check_visibility( - self.ds, + visible, database_private = await self.ds.check_visibility( request.actor, "view-database", name, @@ -36,8 +35,7 @@ class IndexView(BaseView): views = [] for view_name in await db.view_names(): - visible, private = await check_visibility( - self.ds, + visible, private = await self.ds.check_visibility( request.actor, "view-table", (name, view_name), @@ -55,8 +53,7 @@ class IndexView(BaseView): tables = {} for table in table_names: - visible, private = await check_visibility( - self.ds, + visible, private = await self.ds.check_visibility( request.actor, "view-table", (name, table), diff --git a/docs/internals.rst b/docs/internals.rst index 12adde00..f9a24fea 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -295,7 +295,7 @@ If neither ``metadata.json`` nor any of the plugins provide an answer to the per See :ref:`permissions` for a full list of permission actions included in Datasette core. -.. _datasette_permission_allowed: +.. _datasette_ensure_permissions: await .ensure_permissions(actor, permissions) --------------------------------------------- @@ -321,6 +321,32 @@ This is useful when you need to check multiple permissions at once. For example, ] ) +.. _datasette_check_visibilty: + +await .check_visibility(actor, action, resource=None) +----------------------------------------------------- + +``actor`` - dictionary + The authenticated actor. This is usually ``request.actor``. + +``action`` - string + The name of the action that is being permission checked. + +``resource`` - string or tuple, optional + The resource, e.g. the name of the database, or a tuple of two strings containing the name of the database and the name of the table. Only some permissions apply to a resource. + +This convenience method can be used to answer the question "should this item be considered private, in that it is visible to me but it is not visible to anonymous users?" + +It returns a tuple of two booleans, ``(visible, private)``. ``visible`` indicates if the actor can see this resource. ``private`` will be ``True`` if an anonymous user would not be able to view the resource. + +This example checks if the user can access a specific table, and sets ``private`` so that a padlock icon can later be displayed: + +.. code-block:: python + + visible, private = await self.ds.check_visibility( + request.actor, "view-table", (database, table) + ) + .. _datasette_get_database: .get_database(name) From 72bfd75fb7241893c931348e6aca712edc67ab04 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 21 Mar 2022 14:55:50 -0700 Subject: [PATCH 0060/1218] Drop n=1 threshold down to <= 20ms, closes #1679 --- datasette/utils/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index cd8e3d61..9109f823 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -182,15 +182,16 @@ class CustomJSONEncoder(json.JSONEncoder): def sqlite_timelimit(conn, ms): deadline = time.perf_counter() + (ms / 1000) # n is the number of SQLite virtual machine instructions that will be - # executed between each check. It's hard to know what to pick here. - # After some experimentation, I've decided to go with 1000 by default and - # 1 for time limits that are less than 50ms + # executed between each check. It takes about 0.08ms to execute 1000. + # https://github.com/simonw/datasette/issues/1679 n = 1000 - if ms < 50: + if ms <= 20: + # This mainly happens while executing our test suite n = 1 def handler(): if time.perf_counter() >= deadline: + # Returning 1 terminates the query with an error return 1 conn.set_progress_handler(handler, n) From 12f3ca79956ed9003c874f67748432adcacc6fd2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 21 Mar 2022 18:42:03 -0700 Subject: [PATCH 0061/1218] google-github-actions/setup-gcloud@v0 --- .github/workflows/deploy-latest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 92aa1c6b..a61f6629 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: "3.10" - uses: actions/cache@v2 name: Configure pip caching with: @@ -54,7 +54,7 @@ jobs: ' > plugins/alternative_route.py cp fixtures.db fixtures2.db - name: Set up Cloud Run - uses: google-github-actions/setup-gcloud@master + uses: google-github-actions/setup-gcloud@v0 with: version: '275.0.0' service_account_email: ${{ secrets.GCP_SA_EMAIL }} From c4c9dbd0386e46d2bf199f0ed34e4895c98cb78c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 22 Mar 2022 09:49:26 -0700 Subject: [PATCH 0062/1218] google-github-actions/setup-gcloud@v0 --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3cfc67da..3e4f8146 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -85,7 +85,7 @@ jobs: sphinx-to-sqlite ../docs.db _build cd .. - name: Set up Cloud Run - uses: google-github-actions/setup-gcloud@master + uses: google-github-actions/setup-gcloud@v0 with: version: '275.0.0' service_account_email: ${{ secrets.GCP_SA_EMAIL }} From d7c793d7998388d915f8d270079c68a77a785051 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 23 Mar 2022 11:12:26 -0700 Subject: [PATCH 0063/1218] Release 0.61 Refs #957, #1228, #1533, #1545, #1576, #1577, #1587, #1601, #1603, #1607, #1612, #1621, #1649, #1654, #1657, #1661, #1668, #1675, #1678 --- datasette/version.py | 2 +- docs/changelog.rst | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/datasette/version.py b/datasette/version.py index ccc1e04b..f9b10696 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.61a0" +__version__ = "0.61" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 05ad85f2..d2de8da1 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,30 +4,36 @@ Changelog ========= -.. _v0_61_a0: +.. _v0_61: -0.61a0 (2022-03-19) -------------------- +0.61 (2022-03-23) +----------------- +In preparation for Datasette 1.0, this release includes two potentially backwards-incompatible changes. Hashed URL mode has been moved to a separate plugin, and the way Datasette generates URLs to databases and tables with special characters in their name such as ``/`` and ``.`` has changed. + +Datasette also now requires Python 3.7 or higher. + +- URLs within Datasette now use a different encoding scheme for tables or databases that include "special" characters outside of the range of ``a-zA-Z0-9_-``. This scheme is explained here: :ref:`internals_tilde_encoding`. (:issue:`1657`) - Removed hashed URL mode from Datasette. The new ``datasette-hashed-urls`` plugin can be used to achieve the same result, see :ref:`performance_hashed_urls` for details. (:issue:`1661`) - Databases can now have a custom path within the Datasette instance that is independent of the database name, using the ``db.route`` property. (:issue:`1668`) -- URLs within Datasette now use a different encoding scheme for tables or databases that include "special" characters outside of the range of ``a-zA-Z0-9_-``. This scheme is explained here: :ref:`internals_tilde_encoding`. (:issue:`1657`) +- Datasette is now covered by a `Code of Conduct `__. (:issue:`1654`) +- Python 3.6 is no longer supported. (:issue:`1577`) +- Tests now run against Python 3.11-dev. (:issue:`1621`) +- New :ref:`datasette.ensure_permissions(actor, permissions) ` internal method for checking multiple permissions at once. (:issue:`1675`) +- New :ref:`datasette.check_visibility(actor, action, resource=None) ` internal method for checking if a user can see a resource that would otherwise be invisible to unauthenticated users. (:issue:`1678`) - Table and row HTML pages now include a ```` element and return a ``Link: URL; rel="alternate"; type="application/json+datasette"`` HTTP header pointing to the JSON version of those pages. (:issue:`1533`) - ``Access-Control-Expose-Headers: Link`` is now added to the CORS headers, allowing remote JavaScript to access that header. - Canned queries are now shown at the top of the database page, directly below the SQL editor. Previously they were shown at the bottom, below the list of tables. (:issue:`1612`) - Datasette now has a default favicon. (:issue:`1603`) - ``sqlite_stat`` tables are now hidden by default. (:issue:`1587`) - SpatiaLite tables ``data_licenses``, ``KNN`` and ``KNN2`` are now hidden by default. (:issue:`1601`) -- Python 3.6 is no longer supported. (:issue:`1577`) -- Tests now run against Python 3.11-dev. (:issue:`1621`) -- Fixed bug where :ref:`custom pages ` did not work on Windows. Thanks, Robert Christie. (:issue:`1545`) - SQL query tracing mechanism now works for queries executed in ``asyncio`` sub-tasks, such as those created by ``asyncio.gather()``. (:issue:`1576`) - :ref:`internals_tracer` mechanism is now documented. - Common Datasette symbols can now be imported directly from the top-level ``datasette`` package, see :ref:`internals_shortcuts`. Those symbols are ``Response``, ``Forbidden``, ``NotFound``, ``hookimpl``, ``actor_matches_allow``. (:issue:`957`) - ``/-/versions`` page now returns additional details for libraries used by SpatiaLite. (:issue:`1607`) - Documentation now links to the `Datasette Tutorials `__. - Datasette will now also look for SpatiaLite in ``/opt/homebrew`` - thanks, Dan Peterson. (`#1649 `__) -- Datasette is now covered by a `Code of Conduct `__. (:issue:`1654`) +- Fixed bug where :ref:`custom pages ` did not work on Windows. Thanks, Robert Christie. (:issue:`1545`) - Fixed error caused when a table had a column named ``n``. (:issue:`1228`) .. _v0_60_2: From 0159662ab8ccb363c59647861360e0cb7a6f930d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 23 Mar 2022 11:48:10 -0700 Subject: [PATCH 0064/1218] Fix for bug running ?sql= against databases with a different route, closes #1682 --- datasette/views/database.py | 7 ++++++- tests/test_routes.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/datasette/views/database.py b/datasette/views/database.py index 103bd575..bdd433cc 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -203,7 +203,12 @@ class QueryView(DataView): named_parameters=None, write=False, ): - database = tilde_decode(request.url_vars["database"]) + database_route = tilde_decode(request.url_vars["database"]) + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database = db.name params = {key: request.args.get(key) for key in request.args} if "sql" in params: params.pop("sql") diff --git a/tests/test_routes.py b/tests/test_routes.py index 211b77b5..5ae55d21 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -94,6 +94,7 @@ async def test_db_with_route_databases(ds_with_route): ("/original-name/t", 404), ("/original-name/t/1", 404), ("/custom-route-name", 200), + ("/custom-route-name?sql=select+id+from+t", 200), ("/custom-route-name/t", 200), ("/custom-route-name/t/1", 200), ), From d431a9055e977aefe48689a2e5866ea8d3558a6c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 23 Mar 2022 11:54:10 -0700 Subject: [PATCH 0065/1218] Release 0.61.1 Refs #1682 Refs https://github.com/simonw/datasette-hashed-urls/issues/13 --- datasette/version.py | 2 +- docs/changelog.rst | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index f9b10696..02451a1e 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.61" +__version__ = "0.61.1" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index d2de8da1..03cf62b6 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,13 @@ Changelog ========= +.. _v0_61_1: + +0.61.1 (2022-03-23) +------------------- + +- Fixed a bug where databases with a different route from their name (as used by the `datasette-hashed-urls plugin `__) returned errors when executing custom SQL queries. (:issue:`1682`) + .. _v0_61: 0.61 (2022-03-23) From c496f2b663ff0cef908ffaaa68b8cb63111fb5f2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 24 Mar 2022 12:16:19 -0700 Subject: [PATCH 0066/1218] Don't show facet in cog menu if not allow_facet, closes #1683 --- datasette/static/table.js | 10 ++++++++-- datasette/templates/table.html | 1 + datasette/views/table.py | 3 +++ tests/test_table_html.py | 14 ++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/datasette/static/table.js b/datasette/static/table.js index 3c88cc40..096a27ac 100644 --- a/datasette/static/table.js +++ b/datasette/static/table.js @@ -128,7 +128,8 @@ var DROPDOWN_ICON_SVG = ` el.dataset.column); @@ -137,7 +138,12 @@ var DROPDOWN_ICON_SVG = ` + `_, `datasette-publish-vercel `_ @@ -400,7 +422,9 @@ If the value matches that pattern, the plugin returns an HTML link element: if not isinstance(value, str): return None stripped = value.strip() - if not stripped.startswith("{") and stripped.endswith("}"): + if not stripped.startswith("{") and stripped.endswith( + "}" + ): return None try: data = json.loads(value) @@ -412,14 +436,18 @@ If the value matches that pattern, the plugin returns an HTML link element: return None href = data["href"] if not ( - href.startswith("/") or href.startswith("http://") + href.startswith("/") + or href.startswith("http://") or href.startswith("https://") ): return None - return markupsafe.Markup('{label}'.format( - href=markupsafe.escape(data["href"]), - label=markupsafe.escape(data["label"] or "") or " " - )) + return markupsafe.Markup( + '{label}'.format( + href=markupsafe.escape(data["href"]), + label=markupsafe.escape(data["label"] or "") + or " ", + ) + ) Examples: `datasette-render-binary `_, `datasette-render-markdown `__, `datasette-json-html `__ @@ -516,7 +544,7 @@ Here is a more complex example: return Response( "\n".join(lines), content_type="text/plain; charset=utf-8", - headers={"x-sqlite-version": result.first()[0]} + headers={"x-sqlite-version": result.first()[0]}, ) And here is an example ``can_render`` function which returns ``True`` only if the query results contain the columns ``atom_id``, ``atom_title`` and ``atom_updated``: @@ -524,7 +552,11 @@ And here is an example ``can_render`` function which returns ``True`` only if th .. code-block:: python def can_render_demo(columns): - return {"atom_id", "atom_title", "atom_updated"}.issubset(columns) + return { + "atom_id", + "atom_title", + "atom_updated", + }.issubset(columns) Examples: `datasette-atom `_, `datasette-ics `_, `datasette-geojson `__ @@ -548,16 +580,14 @@ Return a list of ``(regex, view_function)`` pairs, something like this: async def hello_from(request): name = request.url_vars["name"] - return Response.html("Hello from {}".format( - html.escape(name) - )) + return Response.html( + "Hello from {}".format(html.escape(name)) + ) @hookimpl def register_routes(): - return [ - (r"^/hello-from/(?P.*)$", hello_from) - ] + return [(r"^/hello-from/(?P.*)$", hello_from)] The view functions can take a number of different optional arguments. The corresponding argument will be passed to your function depending on its named parameters - a form of dependency injection. @@ -606,10 +636,13 @@ This example registers a new ``datasette verify file1.db file2.db`` command that import click import sqlite3 + @hookimpl def register_commands(cli): @cli.command() - @click.argument("files", type=click.Path(exists=True), nargs=-1) + @click.argument( + "files", type=click.Path(exists=True), nargs=-1 + ) def verify(files): "Verify that files can be opened by Datasette" for file in files: @@ -617,7 +650,9 @@ This example registers a new ``datasette verify file1.db file2.db`` command that try: conn.execute("select * from sqlite_master") except sqlite3.DatabaseError: - raise click.ClickException("Invalid database: {}".format(file)) + raise click.ClickException( + "Invalid database: {}".format(file) + ) The new command can then be executed like so:: @@ -656,15 +691,18 @@ Each Facet subclass implements a new type of facet operation. The class should l async def suggest(self): # Use self.sql and self.params to suggest some facets suggested_facets = [] - suggested_facets.append({ - "name": column, # Or other unique name - # Construct the URL that will enable this facet: - "toggle_url": self.ds.absolute_url( - self.request, path_with_added_args( - self.request, {"_facet": column} - ) - ), - }) + suggested_facets.append( + { + "name": column, # Or other unique name + # Construct the URL that will enable this facet: + "toggle_url": self.ds.absolute_url( + self.request, + path_with_added_args( + self.request, {"_facet": column} + ), + ), + } + ) return suggested_facets async def facet_results(self): @@ -678,18 +716,25 @@ Each Facet subclass implements a new type of facet operation. The class should l try: facet_results_values = [] # More calculations... - facet_results_values.append({ - "value": value, - "label": label, - "count": count, - "toggle_url": self.ds.absolute_url(self.request, toggle_path), - "selected": selected, - }) - facet_results.append({ - "name": column, - "results": facet_results_values, - "truncated": len(facet_rows_results) > facet_size, - }) + facet_results_values.append( + { + "value": value, + "label": label, + "count": count, + "toggle_url": self.ds.absolute_url( + self.request, toggle_path + ), + "selected": selected, + } + ) + facet_results.append( + { + "name": column, + "results": facet_results_values, + "truncated": len(facet_rows_results) + > facet_size, + } + ) except QueryInterrupted: facets_timed_out.append(column) @@ -728,21 +773,33 @@ This example plugin adds a ``x-databases`` HTTP header listing the currently att def asgi_wrapper(datasette): def wrap_with_databases_header(app): @wraps(app) - async def add_x_databases_header(scope, receive, send): + async def add_x_databases_header( + scope, receive, send + ): async def wrapped_send(event): if event["type"] == "http.response.start": - original_headers = event.get("headers") or [] + original_headers = ( + event.get("headers") or [] + ) event = { "type": event["type"], "status": event["status"], - "headers": original_headers + [ - [b"x-databases", - ", ".join(datasette.databases.keys()).encode("utf-8")] + "headers": original_headers + + [ + [ + b"x-databases", + ", ".join( + datasette.databases.keys() + ).encode("utf-8"), + ] ], } await send(event) + await app(scope, receive, wrapped_send) + return add_x_databases_header + return wrap_with_databases_header Examples: `datasette-cors `__, `datasette-pyinstrument `__ @@ -759,7 +816,9 @@ This hook fires when the Datasette application server first starts up. You can i @hookimpl def startup(datasette): config = datasette.plugin_config("my-plugin") or {} - assert "required-setting" in config, "my-plugin requires setting required-setting" + assert ( + "required-setting" in config + ), "my-plugin requires setting required-setting" Or you can return an async function which will be awaited on startup. Use this option if you need to make any database queries: @@ -770,9 +829,12 @@ Or you can return an async function which will be awaited on startup. Use this o async def inner(): db = datasette.get_database() if "my_table" not in await db.table_names(): - await db.execute_write(""" + await db.execute_write( + """ create table my_table (mycol text) - """) + """ + ) + return inner Potential use-cases: @@ -815,6 +877,7 @@ Ues this hook to return a dictionary of additional :ref:`canned query `__ @@ -888,9 +960,12 @@ Here's an example that authenticates the actor based on an incoming API key: SECRET_KEY = "this-is-a-secret" + @hookimpl def actor_from_request(datasette, request): - authorization = request.headers.get("authorization") or "" + authorization = ( + request.headers.get("authorization") or "" + ) expected = "Bearer {}".format(SECRET_KEY) if secrets.compare_digest(authorization, expected): @@ -906,6 +981,7 @@ Instead of returning a dictionary, this function can return an awaitable functio from datasette import hookimpl + @hookimpl def actor_from_request(datasette, request): async def inner(): @@ -914,7 +990,8 @@ Instead of returning a dictionary, this function can return an awaitable functio return None # Look up ?_token=xxx in sessions table result = await datasette.get_database().execute( - "select count(*) from sessions where token = ?", [token] + "select count(*) from sessions where token = ?", + [token], ) if result.first()[0]: return {"token": token} @@ -952,7 +1029,7 @@ The hook should return an instance of ``datasette.filters.FilterArguments`` whic where_clauses=["id > :max_id"], params={"max_id": 5}, human_descriptions=["max_id is greater than 5"], - extra_context={} + extra_context={}, ) The arguments to the ``FilterArguments`` class constructor are as follows: @@ -973,10 +1050,13 @@ This example plugin causes 0 results to be returned if ``?_nothing=1`` is added from datasette import hookimpl from datasette.filters import FilterArguments + @hookimpl def filters_from_request(self, request): if request.args.get("_nothing"): - return FilterArguments(["1 = 0"], human_descriptions=["NOTHING"]) + return FilterArguments( + ["1 = 0"], human_descriptions=["NOTHING"] + ) Example: `datasette-leaflet-freedraw `_ @@ -1006,6 +1086,7 @@ Here's an example plugin which randomly selects if a permission should be allowe from datasette import hookimpl import random + @hookimpl def permission_allowed(action): if action != "view-instance": @@ -1024,11 +1105,16 @@ Here's an example that allows users to view the ``admin_log`` table only if thei async def inner(): if action == "execute-sql" and resource == "staff": return False - if action == "view-table" and resource == ("staff", "admin_log"): + if action == "view-table" and resource == ( + "staff", + "admin_log", + ): if not actor: return False user_id = actor["id"] - return await datasette.get_database("staff").execute( + return await datasette.get_database( + "staff" + ).execute( "select count(*) from admin_users where user_id = :user_id", {"user_id": user_id}, ) @@ -1059,18 +1145,21 @@ This example registers two new magic parameters: ``:_request_http_version`` retu from uuid import uuid4 + def uuid(key, request): if key == "new": return str(uuid4()) else: raise KeyError + def request(key, request): if key == "http_version": return request.scope["http_version"] else: raise KeyError + @hookimpl def register_magic_parameters(datasette): return [ @@ -1103,9 +1192,12 @@ This example returns a redirect to a ``/-/login`` page: from datasette import hookimpl from urllib.parse import urlencode + @hookimpl def forbidden(request, message): - return Response.redirect("/-/login?=" + urlencode({"message": message})) + return Response.redirect( + "/-/login?=" + urlencode({"message": message}) + ) The function can alternatively return an awaitable function if it needs to make any asynchronous method calls. This example renders a template: @@ -1114,10 +1206,15 @@ The function can alternatively return an awaitable function if it needs to make from datasette import hookimpl from datasette.utils.asgi import Response + @hookimpl def forbidden(datasette): async def inner(): - return Response.html(await datasette.render_template("forbidden.html")) + return Response.html( + await datasette.render_template( + "forbidden.html" + ) + ) return inner @@ -1147,11 +1244,17 @@ This example adds a new menu item but only if the signed in user is ``"root"``: from datasette import hookimpl + @hookimpl def menu_links(datasette, actor): if actor and actor.get("id") == "root": return [ - {"href": datasette.urls.path("/-/edit-schema"), "label": "Edit schema"}, + { + "href": datasette.urls.path( + "/-/edit-schema" + ), + "label": "Edit schema", + }, ] Using :ref:`internals_datasette_urls` here ensures that links in the menu will take the :ref:`setting_base_url` setting into account. @@ -1188,13 +1291,20 @@ This example adds a new table action if the signed in user is ``"root"``: from datasette import hookimpl + @hookimpl def table_actions(datasette, actor): if actor and actor.get("id") == "root": - return [{ - "href": datasette.urls.path("/-/edit-schema/{}/{}".format(database, table)), - "label": "Edit schema for this table", - }] + return [ + { + "href": datasette.urls.path( + "/-/edit-schema/{}/{}".format( + database, table + ) + ), + "label": "Edit schema for this table", + } + ] Example: `datasette-graphql `_ @@ -1238,6 +1348,7 @@ This example will disable CSRF protection for that specific URL path: from datasette import hookimpl + @hookimpl def skip_csrf(scope): return scope["path"] == "/submit-comment" @@ -1278,7 +1389,9 @@ This hook is responsible for returning a dictionary corresponding to Datasette : "description": get_instance_description(datasette), "databases": [], } - for db_name, db_data_dict in get_my_database_meta(datasette, database, table, key): + for db_name, db_data_dict in get_my_database_meta( + datasette, database, table, key + ): metadata["databases"][db_name] = db_data_dict # whatever we return here will be merged with any other plugins using this hook and # will be overwritten by a local metadata.yaml if one exists! From 498e1536f5f3e69c50934c0c031055e0af770bf6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 24 Apr 2022 09:08:56 -0700 Subject: [PATCH 0085/1218] One more blacken-docs test, refs #1718 --- docs/testing_plugins.rst | 45 ++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index 8e4e3f91..6361d744 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -19,7 +19,10 @@ If you use the template described in :ref:`writing_plugins_cookiecutter` your pl response = await datasette.client.get("/-/plugins.json") assert response.status_code == 200 installed_plugins = {p["name"] for p in response.json()} - assert "datasette-plugin-template-demo" in installed_plugins + assert ( + "datasette-plugin-template-demo" + in installed_plugins + ) This test uses the :ref:`internals_datasette_client` object to exercise a test instance of Datasette. ``datasette.client`` is a wrapper around the `HTTPX `__ Python library which can imitate HTTP requests using ASGI. This is the recommended way to write tests against a Datasette instance. @@ -37,9 +40,7 @@ If you are building an installable package you can add them as test dependencies setup( name="datasette-my-plugin", # ... - extras_require={ - "test": ["pytest", "pytest-asyncio"] - }, + extras_require={"test": ["pytest", "pytest-asyncio"]}, tests_require=["datasette-my-plugin[test]"], ) @@ -87,31 +88,34 @@ Here's an example that uses the `sqlite-utils library Date: Sun, 24 Apr 2022 09:17:59 -0700 Subject: [PATCH 0086/1218] Finished applying blacken-docs, closes #1718 --- .github/workflows/test.yml | 3 +-- docs/testing_plugins.rst | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 38b62995..8d916e49 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,6 +34,5 @@ jobs: cog --check docs/*.rst - name: Check if blacken-docs needs to be run run: | + # This fails on syntax errors, or a diff was applied blacken-docs -l 60 docs/*.rst - # This fails if a diff was generated: - git diff-index --quiet HEAD -- diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index 6361d744..1bbaaac1 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -118,7 +118,9 @@ Here's an example that uses the `sqlite-utils library Date: Sun, 24 Apr 2022 09:59:20 -0700 Subject: [PATCH 0087/1218] Cosmetic tweaks after blacken-docs, refs #1718 --- docs/plugin_hooks.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index ace206b7..4560ec9a 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -162,9 +162,8 @@ And here's an example which adds a ``sql_first(sql_query)`` function which execu or database or next(iter(datasette.databases.keys())) ) - return (await datasette.execute(dbname, sql)).rows[ - 0 - ][0] + result = await datasette.execute(dbname, sql) + return result.rows[0][0] return {"sql_first": sql_first} @@ -422,8 +421,8 @@ If the value matches that pattern, the plugin returns an HTML link element: if not isinstance(value, str): return None stripped = value.strip() - if not stripped.startswith("{") and stripped.endswith( - "}" + if not ( + stripped.startswith("{") and stripped.endswith("}") ): return None try: From 579f59dcec43a91dd7d404e00b87a00afd8515f2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 Apr 2022 11:33:35 -0700 Subject: [PATCH 0088/1218] Refactor to remove RowTableShared class, closes #1719 Refs #1715 --- datasette/app.py | 3 +- datasette/views/row.py | 142 +++++++++++ datasette/views/table.py | 497 +++++++++++++++------------------------ 3 files changed, 328 insertions(+), 314 deletions(-) create mode 100644 datasette/views/row.py diff --git a/datasette/app.py b/datasette/app.py index c9eede26..d269372c 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -40,7 +40,8 @@ from .views.special import ( PermissionsDebugView, MessagesDebugView, ) -from .views.table import RowView, TableView +from .views.table import TableView +from .views.row import RowView from .renderer import json_renderer from .url_builder import Urls from .database import Database, QueryInterrupted diff --git a/datasette/views/row.py b/datasette/views/row.py new file mode 100644 index 00000000..b1c7362d --- /dev/null +++ b/datasette/views/row.py @@ -0,0 +1,142 @@ +from datasette.utils.asgi import NotFound +from datasette.database import QueryInterrupted +from .base import DataView +from datasette.utils import ( + tilde_decode, + urlsafe_components, + to_css_class, + escape_sqlite, +) +from .table import _sql_params_pks, display_columns_and_rows + + +class RowView(DataView): + name = "row" + + async def data(self, request, default_labels=False): + database_route = tilde_decode(request.url_vars["database"]) + table = tilde_decode(request.url_vars["table"]) + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database = db.name + await self.ds.ensure_permissions( + request.actor, + [ + ("view-table", (database, table)), + ("view-database", database), + "view-instance", + ], + ) + pk_values = urlsafe_components(request.url_vars["pks"]) + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database = db.name + sql, params, pks = await _sql_params_pks(db, table, pk_values) + results = await db.execute(sql, params, truncate=True) + columns = [r[0] for r in results.description] + rows = list(results.rows) + if not rows: + raise NotFound(f"Record not found: {pk_values}") + + async def template_data(): + display_columns, display_rows = await display_columns_and_rows( + self.ds, + database, + table, + results.description, + rows, + link_column=False, + truncate_cells=0, + ) + for column in display_columns: + column["sortable"] = False + return { + "foreign_key_tables": await self.foreign_key_tables( + database, table, pk_values + ), + "display_columns": display_columns, + "display_rows": display_rows, + "custom_table_templates": [ + f"_table-{to_css_class(database)}-{to_css_class(table)}.html", + f"_table-row-{to_css_class(database)}-{to_css_class(table)}.html", + "_table.html", + ], + "metadata": (self.ds.metadata("databases") or {}) + .get(database, {}) + .get("tables", {}) + .get(table, {}), + } + + data = { + "database": database, + "table": table, + "rows": rows, + "columns": columns, + "primary_keys": pks, + "primary_key_values": pk_values, + "units": self.ds.table_metadata(database, table).get("units", {}), + } + + if "foreign_key_tables" in (request.args.get("_extras") or "").split(","): + data["foreign_key_tables"] = await self.foreign_key_tables( + database, table, pk_values + ) + + return ( + data, + template_data, + ( + f"row-{to_css_class(database)}-{to_css_class(table)}.html", + "row.html", + ), + ) + + async def foreign_key_tables(self, database, table, pk_values): + if len(pk_values) != 1: + return [] + db = self.ds.databases[database] + all_foreign_keys = await db.get_all_foreign_keys() + foreign_keys = all_foreign_keys[table]["incoming"] + if len(foreign_keys) == 0: + return [] + + sql = "select " + ", ".join( + [ + "(select count(*) from {table} where {column}=:id)".format( + table=escape_sqlite(fk["other_table"]), + column=escape_sqlite(fk["other_column"]), + ) + for fk in foreign_keys + ] + ) + try: + rows = list(await db.execute(sql, {"id": pk_values[0]})) + except QueryInterrupted: + # Almost certainly hit the timeout + return [] + + foreign_table_counts = dict( + zip( + [(fk["other_table"], fk["other_column"]) for fk in foreign_keys], + list(rows[0]), + ) + ) + foreign_key_tables = [] + for fk in foreign_keys: + count = ( + foreign_table_counts.get((fk["other_table"], fk["other_column"])) or 0 + ) + key = fk["other_column"] + if key.startswith("_"): + key += "__exact" + link = "{}?{}={}".format( + self.ds.urls.table(database, fk["other_table"]), + key, + ",".join(pk_values), + ) + foreign_key_tables.append({**fk, **{"count": count, "link": link}}) + return foreign_key_tables diff --git a/datasette/views/table.py b/datasette/views/table.py index dc85165e..37fb2ebb 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1,4 +1,3 @@ -import urllib import itertools import json @@ -9,7 +8,6 @@ from datasette.database import QueryInterrupted from datasette.utils import ( await_me_maybe, CustomRow, - MultiParams, append_querystring, compound_keys_after_sql, format_bytes, @@ -21,7 +19,6 @@ from datasette.utils import ( is_url, path_from_row_pks, path_with_added_args, - path_with_format, path_with_removed_args, path_with_replaced_args, to_css_class, @@ -68,7 +65,9 @@ class Row: return json.dumps(d, default=repr, indent=2) -class RowTableShared(DataView): +class TableView(DataView): + name = "table" + async def sortable_columns_for_table(self, database, table, use_rowid): db = self.ds.databases[database] table_metadata = self.ds.table_metadata(database, table) @@ -89,193 +88,6 @@ class RowTableShared(DataView): expandables.append((fk, label_column)) return expandables - async def display_columns_and_rows( - self, database, table, description, rows, link_column=False, truncate_cells=0 - ): - """Returns columns, rows for specified table - including fancy foreign key treatment""" - db = self.ds.databases[database] - table_metadata = self.ds.table_metadata(database, table) - column_descriptions = table_metadata.get("columns") or {} - column_details = {col.name: col for col in await db.table_column_details(table)} - sortable_columns = await self.sortable_columns_for_table(database, table, True) - pks = await db.primary_keys(table) - pks_for_display = pks - if not pks_for_display: - pks_for_display = ["rowid"] - - columns = [] - for r in description: - if r[0] == "rowid" and "rowid" not in column_details: - type_ = "integer" - notnull = 0 - else: - type_ = column_details[r[0]].type - notnull = column_details[r[0]].notnull - columns.append( - { - "name": r[0], - "sortable": r[0] in sortable_columns, - "is_pk": r[0] in pks_for_display, - "type": type_, - "notnull": notnull, - "description": column_descriptions.get(r[0]), - } - ) - - column_to_foreign_key_table = { - fk["column"]: fk["other_table"] - for fk in await db.foreign_keys_for_table(table) - } - - cell_rows = [] - base_url = self.ds.setting("base_url") - for row in rows: - cells = [] - # Unless we are a view, the first column is a link - either to the rowid - # or to the simple or compound primary key - if link_column: - is_special_link_column = len(pks) != 1 - pk_path = path_from_row_pks(row, pks, not pks, False) - cells.append( - { - "column": pks[0] if len(pks) == 1 else "Link", - "value_type": "pk", - "is_special_link_column": is_special_link_column, - "raw": pk_path, - "value": markupsafe.Markup( - '{flat_pks}'.format( - base_url=base_url, - table_path=self.ds.urls.table(database, table), - flat_pks=str(markupsafe.escape(pk_path)), - flat_pks_quoted=path_from_row_pks(row, pks, not pks), - ) - ), - } - ) - - for value, column_dict in zip(row, columns): - column = column_dict["name"] - if link_column and len(pks) == 1 and column == pks[0]: - # If there's a simple primary key, don't repeat the value as it's - # already shown in the link column. - continue - - # First let the plugins have a go - # pylint: disable=no-member - plugin_display_value = None - for candidate in pm.hook.render_cell( - value=value, - column=column, - table=table, - database=database, - datasette=self.ds, - ): - candidate = await await_me_maybe(candidate) - if candidate is not None: - plugin_display_value = candidate - break - if plugin_display_value: - display_value = plugin_display_value - elif isinstance(value, bytes): - formatted = format_bytes(len(value)) - display_value = markupsafe.Markup( - '<Binary: {:,} byte{}>'.format( - self.ds.urls.row_blob( - database, - table, - path_from_row_pks(row, pks, not pks), - column, - ), - ' title="{}"'.format(formatted) - if "bytes" not in formatted - else "", - len(value), - "" if len(value) == 1 else "s", - ) - ) - elif isinstance(value, dict): - # It's an expanded foreign key - display link to other row - label = value["label"] - value = value["value"] - # The table we link to depends on the column - other_table = column_to_foreign_key_table[column] - link_template = ( - LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE - ) - display_value = markupsafe.Markup( - link_template.format( - database=database, - base_url=base_url, - table=tilde_encode(other_table), - link_id=tilde_encode(str(value)), - id=str(markupsafe.escape(value)), - label=str(markupsafe.escape(label)) or "-", - ) - ) - elif value in ("", None): - display_value = markupsafe.Markup(" ") - elif is_url(str(value).strip()): - display_value = markupsafe.Markup( - '{url}'.format( - url=markupsafe.escape(value.strip()) - ) - ) - elif column in table_metadata.get("units", {}) and value != "": - # Interpret units using pint - value = value * ureg(table_metadata["units"][column]) - # Pint uses floating point which sometimes introduces errors in the compact - # representation, which we have to round off to avoid ugliness. In the vast - # majority of cases this rounding will be inconsequential. I hope. - value = round(value.to_compact(), 6) - display_value = markupsafe.Markup( - f"{value:~P}".replace(" ", " ") - ) - else: - display_value = str(value) - if truncate_cells and len(display_value) > truncate_cells: - display_value = display_value[:truncate_cells] + "\u2026" - - cells.append( - { - "column": column, - "value": display_value, - "raw": value, - "value_type": "none" - if value is None - else str(type(value).__name__), - } - ) - cell_rows.append(Row(cells)) - - if link_column: - # Add the link column header. - # If it's a simple primary key, we have to remove and re-add that column name at - # the beginning of the header row. - first_column = None - if len(pks) == 1: - columns = [col for col in columns if col["name"] != pks[0]] - first_column = { - "name": pks[0], - "sortable": len(pks) == 1, - "is_pk": True, - "type": column_details[pks[0]].type, - "notnull": column_details[pks[0]].notnull, - } - else: - first_column = { - "name": "Link", - "sortable": False, - "is_pk": False, - "type": "", - "notnull": 0, - } - columns = [first_column] + columns - return columns, cell_rows - - -class TableView(RowTableShared): - name = "table" - async def post(self, request): database_route = tilde_decode(request.url_vars["database"]) try: @@ -807,13 +619,17 @@ class TableView(RowTableShared): async def extra_template(): nonlocal sort - display_columns, display_rows = await self.display_columns_and_rows( + display_columns, display_rows = await display_columns_and_rows( + self.ds, database, table, results.description, rows, link_column=not is_view, truncate_cells=self.ds.setting("truncate_cells_html"), + sortable_columns=await self.sortable_columns_for_table( + database, table, use_rowid=True + ), ) metadata = ( (self.ds.metadata("databases") or {}) @@ -948,132 +764,187 @@ async def _sql_params_pks(db, table, pk_values): return sql, params, pks -class RowView(RowTableShared): - name = "row" +async def display_columns_and_rows( + datasette, + database, + table, + description, + rows, + link_column=False, + truncate_cells=0, + sortable_columns=None, +): + """Returns columns, rows for specified table - including fancy foreign key treatment""" + sortable_columns = sortable_columns or set() + db = datasette.databases[database] + table_metadata = datasette.table_metadata(database, table) + column_descriptions = table_metadata.get("columns") or {} + column_details = {col.name: col for col in await db.table_column_details(table)} + pks = await db.primary_keys(table) + pks_for_display = pks + if not pks_for_display: + pks_for_display = ["rowid"] - async def data(self, request, default_labels=False): - database_route = tilde_decode(request.url_vars["database"]) - table = tilde_decode(request.url_vars["table"]) - try: - db = self.ds.get_database(route=database_route) - except KeyError: - raise NotFound("Database not found: {}".format(database_route)) - database = db.name - await self.ds.ensure_permissions( - request.actor, - [ - ("view-table", (database, table)), - ("view-database", database), - "view-instance", - ], - ) - pk_values = urlsafe_components(request.url_vars["pks"]) - try: - db = self.ds.get_database(route=database_route) - except KeyError: - raise NotFound("Database not found: {}".format(database_route)) - database = db.name - sql, params, pks = await _sql_params_pks(db, table, pk_values) - results = await db.execute(sql, params, truncate=True) - columns = [r[0] for r in results.description] - rows = list(results.rows) - if not rows: - raise NotFound(f"Record not found: {pk_values}") - - async def template_data(): - display_columns, display_rows = await self.display_columns_and_rows( - database, - table, - results.description, - rows, - link_column=False, - truncate_cells=0, - ) - for column in display_columns: - column["sortable"] = False - return { - "foreign_key_tables": await self.foreign_key_tables( - database, table, pk_values - ), - "display_columns": display_columns, - "display_rows": display_rows, - "custom_table_templates": [ - f"_table-{to_css_class(database)}-{to_css_class(table)}.html", - f"_table-row-{to_css_class(database)}-{to_css_class(table)}.html", - "_table.html", - ], - "metadata": (self.ds.metadata("databases") or {}) - .get(database, {}) - .get("tables", {}) - .get(table, {}), + columns = [] + for r in description: + if r[0] == "rowid" and "rowid" not in column_details: + type_ = "integer" + notnull = 0 + else: + type_ = column_details[r[0]].type + notnull = column_details[r[0]].notnull + columns.append( + { + "name": r[0], + "sortable": r[0] in sortable_columns, + "is_pk": r[0] in pks_for_display, + "type": type_, + "notnull": notnull, + "description": column_descriptions.get(r[0]), } - - data = { - "database": database, - "table": table, - "rows": rows, - "columns": columns, - "primary_keys": pks, - "primary_key_values": pk_values, - "units": self.ds.table_metadata(database, table).get("units", {}), - } - - if "foreign_key_tables" in (request.args.get("_extras") or "").split(","): - data["foreign_key_tables"] = await self.foreign_key_tables( - database, table, pk_values - ) - - return ( - data, - template_data, - ( - f"row-{to_css_class(database)}-{to_css_class(table)}.html", - "row.html", - ), ) - async def foreign_key_tables(self, database, table, pk_values): - if len(pk_values) != 1: - return [] - db = self.ds.databases[database] - all_foreign_keys = await db.get_all_foreign_keys() - foreign_keys = all_foreign_keys[table]["incoming"] - if len(foreign_keys) == 0: - return [] + column_to_foreign_key_table = { + fk["column"]: fk["other_table"] for fk in await db.foreign_keys_for_table(table) + } - sql = "select " + ", ".join( - [ - "(select count(*) from {table} where {column}=:id)".format( - table=escape_sqlite(fk["other_table"]), - column=escape_sqlite(fk["other_column"]), + cell_rows = [] + base_url = datasette.setting("base_url") + for row in rows: + cells = [] + # Unless we are a view, the first column is a link - either to the rowid + # or to the simple or compound primary key + if link_column: + is_special_link_column = len(pks) != 1 + pk_path = path_from_row_pks(row, pks, not pks, False) + cells.append( + { + "column": pks[0] if len(pks) == 1 else "Link", + "value_type": "pk", + "is_special_link_column": is_special_link_column, + "raw": pk_path, + "value": markupsafe.Markup( + '{flat_pks}'.format( + base_url=base_url, + table_path=datasette.urls.table(database, table), + flat_pks=str(markupsafe.escape(pk_path)), + flat_pks_quoted=path_from_row_pks(row, pks, not pks), + ) + ), + } + ) + + for value, column_dict in zip(row, columns): + column = column_dict["name"] + if link_column and len(pks) == 1 and column == pks[0]: + # If there's a simple primary key, don't repeat the value as it's + # already shown in the link column. + continue + + # First let the plugins have a go + # pylint: disable=no-member + plugin_display_value = None + for candidate in pm.hook.render_cell( + value=value, + column=column, + table=table, + database=database, + datasette=datasette, + ): + candidate = await await_me_maybe(candidate) + if candidate is not None: + plugin_display_value = candidate + break + if plugin_display_value: + display_value = plugin_display_value + elif isinstance(value, bytes): + formatted = format_bytes(len(value)) + display_value = markupsafe.Markup( + '<Binary: {:,} byte{}>'.format( + datasette.urls.row_blob( + database, + table, + path_from_row_pks(row, pks, not pks), + column, + ), + ' title="{}"'.format(formatted) + if "bytes" not in formatted + else "", + len(value), + "" if len(value) == 1 else "s", + ) ) - for fk in foreign_keys - ] - ) - try: - rows = list(await db.execute(sql, {"id": pk_values[0]})) - except QueryInterrupted: - # Almost certainly hit the timeout - return [] + elif isinstance(value, dict): + # It's an expanded foreign key - display link to other row + label = value["label"] + value = value["value"] + # The table we link to depends on the column + other_table = column_to_foreign_key_table[column] + link_template = LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE + display_value = markupsafe.Markup( + link_template.format( + database=database, + base_url=base_url, + table=tilde_encode(other_table), + link_id=tilde_encode(str(value)), + id=str(markupsafe.escape(value)), + label=str(markupsafe.escape(label)) or "-", + ) + ) + elif value in ("", None): + display_value = markupsafe.Markup(" ") + elif is_url(str(value).strip()): + display_value = markupsafe.Markup( + '{url}'.format( + url=markupsafe.escape(value.strip()) + ) + ) + elif column in table_metadata.get("units", {}) and value != "": + # Interpret units using pint + value = value * ureg(table_metadata["units"][column]) + # Pint uses floating point which sometimes introduces errors in the compact + # representation, which we have to round off to avoid ugliness. In the vast + # majority of cases this rounding will be inconsequential. I hope. + value = round(value.to_compact(), 6) + display_value = markupsafe.Markup(f"{value:~P}".replace(" ", " ")) + else: + display_value = str(value) + if truncate_cells and len(display_value) > truncate_cells: + display_value = display_value[:truncate_cells] + "\u2026" - foreign_table_counts = dict( - zip( - [(fk["other_table"], fk["other_column"]) for fk in foreign_keys], - list(rows[0]), + cells.append( + { + "column": column, + "value": display_value, + "raw": value, + "value_type": "none" + if value is None + else str(type(value).__name__), + } ) - ) - foreign_key_tables = [] - for fk in foreign_keys: - count = ( - foreign_table_counts.get((fk["other_table"], fk["other_column"])) or 0 - ) - key = fk["other_column"] - if key.startswith("_"): - key += "__exact" - link = "{}?{}={}".format( - self.ds.urls.table(database, fk["other_table"]), - key, - ",".join(pk_values), - ) - foreign_key_tables.append({**fk, **{"count": count, "link": link}}) - return foreign_key_tables + cell_rows.append(Row(cells)) + + if link_column: + # Add the link column header. + # If it's a simple primary key, we have to remove and re-add that column name at + # the beginning of the header row. + first_column = None + if len(pks) == 1: + columns = [col for col in columns if col["name"] != pks[0]] + first_column = { + "name": pks[0], + "sortable": len(pks) == 1, + "is_pk": True, + "type": column_details[pks[0]].type, + "notnull": column_details[pks[0]].notnull, + } + else: + first_column = { + "name": "Link", + "sortable": False, + "is_pk": False, + "type": "", + "notnull": 0, + } + columns = [first_column] + columns + return columns, cell_rows From c101f0efeec4f6e49298a542c5e2b59236cfa0ff Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 Apr 2022 15:34:29 -0700 Subject: [PATCH 0089/1218] datasette-total-page-time example of asgi_wrapper --- docs/plugin_hooks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 4560ec9a..3c9ae2e2 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -801,7 +801,7 @@ This example plugin adds a ``x-databases`` HTTP header listing the currently att return wrap_with_databases_header -Examples: `datasette-cors `__, `datasette-pyinstrument `__ +Examples: `datasette-cors `__, `datasette-pyinstrument `__, `datasette-total-page-time `__ .. _plugin_hook_startup: From 8a0c38f0b89543e652a968a90d480859cb102510 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 Apr 2022 13:56:27 -0700 Subject: [PATCH 0090/1218] Rename database->database_name and table-> table_name, refs #1715 --- datasette/views/table.py | 143 +++++++++++++++++++++------------------ 1 file changed, 76 insertions(+), 67 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 37fb2ebb..d66adb82 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -68,22 +68,22 @@ class Row: class TableView(DataView): name = "table" - async def sortable_columns_for_table(self, database, table, use_rowid): - db = self.ds.databases[database] - table_metadata = self.ds.table_metadata(database, table) + async def sortable_columns_for_table(self, database_name, table_name, use_rowid): + db = self.ds.databases[database_name] + table_metadata = self.ds.table_metadata(database_name, table_name) if "sortable_columns" in table_metadata: sortable_columns = set(table_metadata["sortable_columns"]) else: - sortable_columns = set(await db.table_columns(table)) + sortable_columns = set(await db.table_columns(table_name)) if use_rowid: sortable_columns.add("rowid") return sortable_columns - async def expandable_columns(self, database, table): + async def expandable_columns(self, database_name, table_name): # Returns list of (fk_dict, label_column-or-None) pairs for that table expandables = [] - db = self.ds.databases[database] - for fk in await db.foreign_keys_for_table(table): + db = self.ds.databases[database_name] + for fk in await db.foreign_keys_for_table(table_name): label_column = await db.label_column_for_table(fk["other_table"]) expandables.append((fk, label_column)) return expandables @@ -94,17 +94,19 @@ class TableView(DataView): db = self.ds.get_database(route=database_route) except KeyError: raise NotFound("Database not found: {}".format(database_route)) - database = db.name - table = tilde_decode(request.url_vars["table"]) + database_name = db.name + table_name = tilde_decode(request.url_vars["table"]) # Handle POST to a canned query - canned_query = await self.ds.get_canned_query(database, table, request.actor) + canned_query = await self.ds.get_canned_query( + database_name, table_name, request.actor + ) assert canned_query, "You may only POST to a canned query" return await QueryView(self.ds).data( request, canned_query["sql"], metadata=canned_query, editable=False, - canned_query=table, + canned_query=table_name, named_parameters=canned_query.get("params"), write=bool(canned_query.get("write")), ) @@ -150,45 +152,47 @@ class TableView(DataView): _size=None, ): database_route = tilde_decode(request.url_vars["database"]) - table = tilde_decode(request.url_vars["table"]) + table_name = tilde_decode(request.url_vars["table"]) try: db = self.ds.get_database(route=database_route) except KeyError: raise NotFound("Database not found: {}".format(database_route)) - database = db.name + database_name = db.name # If this is a canned query, not a table, then dispatch to QueryView instead - canned_query = await self.ds.get_canned_query(database, table, request.actor) + canned_query = await self.ds.get_canned_query( + database_name, table_name, request.actor + ) if canned_query: return await QueryView(self.ds).data( request, canned_query["sql"], metadata=canned_query, editable=False, - canned_query=table, + canned_query=table_name, named_parameters=canned_query.get("params"), write=bool(canned_query.get("write")), ) - is_view = bool(await db.get_view_definition(table)) - table_exists = bool(await db.table_exists(table)) + is_view = bool(await db.get_view_definition(table_name)) + table_exists = bool(await db.table_exists(table_name)) # If table or view not found, return 404 if not is_view and not table_exists: - raise NotFound(f"Table not found: {table}") + raise NotFound(f"Table not found: {table_name}") # Ensure user has permission to view this table await self.ds.ensure_permissions( request.actor, [ - ("view-table", (database, table)), - ("view-database", database), + ("view-table", (database_name, table_name)), + ("view-database", database_name), "view-instance", ], ) private = not await self.ds.permission_allowed( - None, "view-table", (database, table), default=True + None, "view-table", (database_name, table_name), default=True ) # Handle ?_filter_column and redirect, if present @@ -216,8 +220,8 @@ class TableView(DataView): ) # Introspect columns and primary keys for table - pks = await db.primary_keys(table) - table_columns = await db.table_columns(table) + pks = await db.primary_keys(table_name) + table_columns = await db.table_columns(table_name) # Take ?_col= and ?_nocol= into account specified_columns = await self.columns_to_select(table_columns, pks, request) @@ -248,7 +252,7 @@ class TableView(DataView): nocount = True nofacet = True - table_metadata = self.ds.table_metadata(database, table) + table_metadata = self.ds.table_metadata(database_name, table_name) units = table_metadata.get("units", {}) # Arguments that start with _ and don't contain a __ are @@ -262,7 +266,7 @@ class TableView(DataView): # Build where clauses from query string arguments filters = Filters(sorted(filter_args), units, ureg) - where_clauses, params = filters.build_where_clauses(table) + where_clauses, params = filters.build_where_clauses(table_name) # Execute filters_from_request plugin hooks - including the default # ones that live in datasette/filters.py @@ -271,8 +275,8 @@ class TableView(DataView): for hook in pm.hook.filters_from_request( request=request, - table=table, - database=database, + table=table_name, + database=database_name, datasette=self.ds, ): filter_arguments = await await_me_maybe(hook) @@ -284,7 +288,7 @@ class TableView(DataView): # Deal with custom sort orders sortable_columns = await self.sortable_columns_for_table( - database, table, use_rowid + database_name, table_name, use_rowid ) sort = request.args.get("_sort") sort_desc = request.args.get("_sort_desc") @@ -309,7 +313,7 @@ class TableView(DataView): order_by = f"{escape_sqlite(sort_desc)} desc" from_sql = "from {table_name} {where}".format( - table_name=escape_sqlite(table), + table_name=escape_sqlite(table_name), where=("where {} ".format(" and ".join(where_clauses))) if where_clauses else "", @@ -422,7 +426,7 @@ class TableView(DataView): sql_no_order_no_limit = ( "select {select_all_columns} from {table_name} {where}".format( select_all_columns=select_all_columns, - table_name=escape_sqlite(table), + table_name=escape_sqlite(table_name), where=where_clause, ) ) @@ -430,7 +434,7 @@ class TableView(DataView): # This is the SQL that populates the main table on the page sql = "select {select_specified_columns} from {table_name} {where}{order_by} limit {page_size}{offset}".format( select_specified_columns=select_specified_columns, - table_name=escape_sqlite(table), + table_name=escape_sqlite(table_name), where=where_clause, order_by=order_by, page_size=page_size + 1, @@ -448,13 +452,13 @@ class TableView(DataView): if ( not db.is_mutable and self.ds.inspect_data - and count_sql == f"select count(*) from {table} " + and count_sql == f"select count(*) from {table_name} " ): # We can use a previously cached table row count try: - filtered_table_rows_count = self.ds.inspect_data[database]["tables"][ - table - ]["count"] + filtered_table_rows_count = self.ds.inspect_data[database_name][ + "tables" + ][table_name]["count"] except KeyError: pass @@ -484,10 +488,10 @@ class TableView(DataView): klass( self.ds, request, - database, + database_name, sql=sql_no_order_no_limit, params=params, - table=table, + table=table_name, metadata=table_metadata, row_count=filtered_table_rows_count, ) @@ -527,7 +531,7 @@ class TableView(DataView): # Expand labeled columns if requested expanded_columns = [] - expandable_columns = await self.expandable_columns(database, table) + expandable_columns = await self.expandable_columns(database_name, table_name) columns_to_expand = None try: all_labels = value_as_boolean(request.args.get("_labels", "")) @@ -554,7 +558,9 @@ class TableView(DataView): values = [row[column_index] for row in rows] # Expand them expanded_labels.update( - await self.ds.expand_foreign_keys(database, table, column, values) + await self.ds.expand_foreign_keys( + database_name, table_name, column, values + ) ) if expanded_labels: # Rewrite the rows @@ -621,21 +627,21 @@ class TableView(DataView): display_columns, display_rows = await display_columns_and_rows( self.ds, - database, - table, + database_name, + table_name, results.description, rows, link_column=not is_view, truncate_cells=self.ds.setting("truncate_cells_html"), sortable_columns=await self.sortable_columns_for_table( - database, table, use_rowid=True + database_name, table_name, use_rowid=True ), ) metadata = ( (self.ds.metadata("databases") or {}) - .get(database, {}) + .get(database_name, {}) .get("tables", {}) - .get(table, {}) + .get(table_name, {}) ) self.ds.update_with_inherited_metadata(metadata) @@ -661,8 +667,8 @@ class TableView(DataView): links = [] for hook in pm.hook.table_actions( datasette=self.ds, - table=table, - database=database, + table=table_name, + database=database_name, actor=request.actor, request=request, ): @@ -703,13 +709,13 @@ class TableView(DataView): "sort_desc": sort_desc, "disable_sort": is_view, "custom_table_templates": [ - f"_table-{to_css_class(database)}-{to_css_class(table)}.html", - f"_table-table-{to_css_class(database)}-{to_css_class(table)}.html", + f"_table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", + f"_table-table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", "_table.html", ], "metadata": metadata, - "view_definition": await db.get_view_definition(table), - "table_definition": await db.get_table_definition(table), + "view_definition": await db.get_view_definition(table_name), + "table_definition": await db.get_table_definition(table_name), "datasette_allow_facet": "true" if self.ds.setting("allow_facet") else "false", @@ -719,8 +725,8 @@ class TableView(DataView): return ( { - "database": database, - "table": table, + "database": database_name, + "table": table_name, "is_view": is_view, "human_description_en": human_description_en, "rows": rows[:page_size], @@ -738,12 +744,12 @@ class TableView(DataView): "next_url": next_url, "private": private, "allow_execute_sql": await self.ds.permission_allowed( - request.actor, "execute-sql", database, default=True + request.actor, "execute-sql", database_name, default=True ), }, extra_template, ( - f"table-{to_css_class(database)}-{to_css_class(table)}.html", + f"table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", "table.html", ), ) @@ -766,8 +772,8 @@ async def _sql_params_pks(db, table, pk_values): async def display_columns_and_rows( datasette, - database, - table, + database_name, + table_name, description, rows, link_column=False, @@ -776,11 +782,13 @@ async def display_columns_and_rows( ): """Returns columns, rows for specified table - including fancy foreign key treatment""" sortable_columns = sortable_columns or set() - db = datasette.databases[database] - table_metadata = datasette.table_metadata(database, table) + db = datasette.databases[database_name] + table_metadata = datasette.table_metadata(database_name, table_name) column_descriptions = table_metadata.get("columns") or {} - column_details = {col.name: col for col in await db.table_column_details(table)} - pks = await db.primary_keys(table) + column_details = { + col.name: col for col in await db.table_column_details(table_name) + } + pks = await db.primary_keys(table_name) pks_for_display = pks if not pks_for_display: pks_for_display = ["rowid"] @@ -805,7 +813,8 @@ async def display_columns_and_rows( ) column_to_foreign_key_table = { - fk["column"]: fk["other_table"] for fk in await db.foreign_keys_for_table(table) + fk["column"]: fk["other_table"] + for fk in await db.foreign_keys_for_table(table_name) } cell_rows = [] @@ -826,7 +835,7 @@ async def display_columns_and_rows( "value": markupsafe.Markup( '{flat_pks}'.format( base_url=base_url, - table_path=datasette.urls.table(database, table), + table_path=datasette.urls.table(database_name, table_name), flat_pks=str(markupsafe.escape(pk_path)), flat_pks_quoted=path_from_row_pks(row, pks, not pks), ) @@ -847,8 +856,8 @@ async def display_columns_and_rows( for candidate in pm.hook.render_cell( value=value, column=column, - table=table, - database=database, + table=table_name, + database=database_name, datasette=datasette, ): candidate = await await_me_maybe(candidate) @@ -862,8 +871,8 @@ async def display_columns_and_rows( display_value = markupsafe.Markup( '<Binary: {:,} byte{}>'.format( datasette.urls.row_blob( - database, - table, + database_name, + table_name, path_from_row_pks(row, pks, not pks), column, ), @@ -883,7 +892,7 @@ async def display_columns_and_rows( link_template = LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE display_value = markupsafe.Markup( link_template.format( - database=database, + database=database_name, base_url=base_url, table=tilde_encode(other_table), link_id=tilde_encode(str(value)), From 942411ef946e9a34a2094944d3423cddad27efd3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 Apr 2022 15:48:56 -0700 Subject: [PATCH 0091/1218] Execute some TableView queries in parallel Use ?_noparallel=1 to opt out (undocumented, useful for benchmark comparisons) Refs #1723, #1715 --- datasette/views/table.py | 91 +++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 25 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index d66adb82..23289b29 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1,3 +1,4 @@ +import asyncio import itertools import json @@ -5,6 +6,7 @@ import markupsafe from datasette.plugins import pm from datasette.database import QueryInterrupted +from datasette import tracer from datasette.utils import ( await_me_maybe, CustomRow, @@ -150,6 +152,16 @@ class TableView(DataView): default_labels=False, _next=None, _size=None, + ): + with tracer.trace_child_tasks(): + return await self._data_traced(request, default_labels, _next, _size) + + async def _data_traced( + self, + request, + default_labels=False, + _next=None, + _size=None, ): database_route = tilde_decode(request.url_vars["database"]) table_name = tilde_decode(request.url_vars["table"]) @@ -159,6 +171,20 @@ class TableView(DataView): raise NotFound("Database not found: {}".format(database_route)) database_name = db.name + # For performance profiling purposes, ?_noparallel=1 turns off asyncio.gather + async def _gather_parallel(*args): + return await asyncio.gather(*args) + + async def _gather_sequential(*args): + results = [] + for fn in args: + results.append(await fn) + return results + + gather = ( + _gather_sequential if request.args.get("_noparallel") else _gather_parallel + ) + # If this is a canned query, not a table, then dispatch to QueryView instead canned_query = await self.ds.get_canned_query( database_name, table_name, request.actor @@ -174,8 +200,12 @@ class TableView(DataView): write=bool(canned_query.get("write")), ) - is_view = bool(await db.get_view_definition(table_name)) - table_exists = bool(await db.table_exists(table_name)) + is_view, table_exists = map( + bool, + await gather( + db.get_view_definition(table_name), db.table_exists(table_name) + ), + ) # If table or view not found, return 404 if not is_view and not table_exists: @@ -497,33 +527,44 @@ class TableView(DataView): ) ) - if not nofacet: - for facet in facet_instances: - ( + async def execute_facets(): + if not nofacet: + # Run them in parallel + facet_awaitables = [facet.facet_results() for facet in facet_instances] + facet_awaitable_results = await gather(*facet_awaitables) + for ( instance_facet_results, instance_facets_timed_out, - ) = await facet.facet_results() - for facet_info in instance_facet_results: - base_key = facet_info["name"] - key = base_key - i = 1 - while key in facet_results: - i += 1 - key = f"{base_key}_{i}" - facet_results[key] = facet_info - facets_timed_out.extend(instance_facets_timed_out) + ) in facet_awaitable_results: + for facet_info in instance_facet_results: + base_key = facet_info["name"] + key = base_key + i = 1 + while key in facet_results: + i += 1 + key = f"{base_key}_{i}" + facet_results[key] = facet_info + facets_timed_out.extend(instance_facets_timed_out) - # Calculate suggested facets suggested_facets = [] - if ( - self.ds.setting("suggest_facets") - and self.ds.setting("allow_facet") - and not _next - and not nofacet - and not nosuggest - ): - for facet in facet_instances: - suggested_facets.extend(await facet.suggest()) + + async def execute_suggested_facets(): + # Calculate suggested facets + if ( + self.ds.setting("suggest_facets") + and self.ds.setting("allow_facet") + and not _next + and not nofacet + and not nosuggest + ): + # Run them in parallel + facet_suggest_awaitables = [ + facet.suggest() for facet in facet_instances + ] + for suggest_result in await gather(*facet_suggest_awaitables): + suggested_facets.extend(suggest_result) + + await gather(execute_facets(), execute_suggested_facets()) # Figure out columns and rows for the query columns = [r[0] for r in results.description] From 94a3171b01fde5c52697aeeff052e3ad4bab5391 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 Apr 2022 13:29:11 -0700 Subject: [PATCH 0092/1218] .plugin_config() can return None --- docs/internals.rst | 4 ++++ docs/writing_plugins.rst | 2 ++ 2 files changed, 6 insertions(+) diff --git a/docs/internals.rst b/docs/internals.rst index aad608dc..18822d47 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -288,6 +288,10 @@ All databases are listed, irrespective of user permissions. This means that the This method lets you read plugin configuration values that were set in ``metadata.json``. See :ref:`writing_plugins_configuration` for full details of how this method should be used. +The return value will be the value from the configuration file - usually a dictionary. + +If the plugin is not configured the return value will be ``None``. + .. _datasette_render_template: await .render_template(template, context=None, request=None) diff --git a/docs/writing_plugins.rst b/docs/writing_plugins.rst index 89f7f5eb..9aee70f6 100644 --- a/docs/writing_plugins.rst +++ b/docs/writing_plugins.rst @@ -182,6 +182,8 @@ When you are writing plugins, you can access plugin configuration like this usin This will return the ``{"latitude_column": "lat", "longitude_column": "lng"}`` in the above example. +If there is no configuration for that plugin, the method will return ``None``. + If it cannot find the requested configuration at the table layer, it will fall back to the database layer and then the root layer. For example, a user may have set the plugin configuration option like so:: { From 4afc1afc721ac0d14f58b0f8339c1bf431d5313c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 2 May 2022 12:13:11 -0700 Subject: [PATCH 0093/1218] Depend on click-default-group-wheel>=1.2.2 Refs #1733 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7f0562fd..fcb43aa1 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ setup( install_requires=[ "asgiref>=3.2.10,<3.6.0", "click>=7.1.1,<8.2.0", - "click-default-group~=1.2.2", + "click-default-group-wheel>=1.2.2", "Jinja2>=2.10.3,<3.1.0", "hupper~=1.9", "httpx>=0.20", From 7e03394734307a5761e4c98d902b6a8cab188562 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 2 May 2022 12:20:14 -0700 Subject: [PATCH 0094/1218] Optional uvicorn import for Pyodide, refs #1733 --- datasette/app.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index d269372c..a5330458 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -26,7 +26,6 @@ from itsdangerous import URLSafeSerializer from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader from jinja2.environment import Template from jinja2.exceptions import TemplateNotFound -import uvicorn from .views.base import DatasetteError, ureg from .views.database import DatabaseDownload, DatabaseView @@ -806,6 +805,15 @@ class Datasette: datasette_version = {"version": __version__} if self.version_note: datasette_version["note"] = self.version_note + + try: + # Optional import to avoid breaking Pyodide + # https://github.com/simonw/datasette/issues/1733#issuecomment-1115268245 + import uvicorn + + uvicorn_version = uvicorn.__version__ + except ImportError: + uvicorn_version = None info = { "python": { "version": ".".join(map(str, sys.version_info[:3])), @@ -813,7 +821,7 @@ class Datasette: }, "datasette": datasette_version, "asgi": "3.0", - "uvicorn": uvicorn.__version__, + "uvicorn": uvicorn_version, "sqlite": { "version": sqlite_version, "fts_versions": fts_versions, From 687907aa2b1bde4de6ae7155b0e2a949ca015ca9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 2 May 2022 12:39:06 -0700 Subject: [PATCH 0095/1218] Remove python-baseconv dependency, refs #1733, closes #1734 --- datasette/actor_auth_cookie.py | 2 +- datasette/utils/baseconv.py | 59 ++++++++++++++++++++++++++++++++++ docs/authentication.rst | 4 +-- setup.py | 1 - tests/test_auth.py | 2 +- 5 files changed, 63 insertions(+), 5 deletions(-) create mode 100644 datasette/utils/baseconv.py diff --git a/datasette/actor_auth_cookie.py b/datasette/actor_auth_cookie.py index 15ecd331..368213af 100644 --- a/datasette/actor_auth_cookie.py +++ b/datasette/actor_auth_cookie.py @@ -1,6 +1,6 @@ from datasette import hookimpl from itsdangerous import BadSignature -import baseconv +from datasette.utils import baseconv import time diff --git a/datasette/utils/baseconv.py b/datasette/utils/baseconv.py new file mode 100644 index 00000000..27e4fb00 --- /dev/null +++ b/datasette/utils/baseconv.py @@ -0,0 +1,59 @@ +""" +Convert numbers from base 10 integers to base X strings and back again. + +Sample usage: + +>>> base20 = BaseConverter('0123456789abcdefghij') +>>> base20.from_decimal(1234) +'31e' +>>> base20.to_decimal('31e') +1234 + +Originally shared here: https://www.djangosnippets.org/snippets/1431/ +""" + + +class BaseConverter(object): + decimal_digits = "0123456789" + + def __init__(self, digits): + self.digits = digits + + def from_decimal(self, i): + return self.convert(i, self.decimal_digits, self.digits) + + def to_decimal(self, s): + return int(self.convert(s, self.digits, self.decimal_digits)) + + def convert(number, fromdigits, todigits): + # Based on http://code.activestate.com/recipes/111286/ + if str(number)[0] == "-": + number = str(number)[1:] + neg = 1 + else: + neg = 0 + + # make an integer out of the number + x = 0 + for digit in str(number): + x = x * len(fromdigits) + fromdigits.index(digit) + + # create the result in base 'len(todigits)' + if x == 0: + res = todigits[0] + else: + res = "" + while x > 0: + digit = x % len(todigits) + res = todigits[digit] + res + x = int(x / len(todigits)) + if neg: + res = "-" + res + return res + + convert = staticmethod(convert) + + +bin = BaseConverter("01") +hexconv = BaseConverter("0123456789ABCDEF") +base62 = BaseConverter("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz") diff --git a/docs/authentication.rst b/docs/authentication.rst index 24960733..685dab15 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -401,12 +401,12 @@ Including an expiry time ``ds_actor`` cookies can optionally include a signed expiry timestamp, after which the cookies will no longer be valid. Authentication plugins may chose to use this mechanism to limit the lifetime of the cookie. For example, if a plugin implements single-sign-on against another source it may decide to set short-lived cookies so that if the user is removed from the SSO system their existing Datasette cookies will stop working shortly afterwards. -To include an expiry, add a ``"e"`` key to the cookie value containing a `base62-encoded integer `__ representing the timestamp when the cookie should expire. For example, here's how to set a cookie that expires after 24 hours: +To include an expiry, add a ``"e"`` key to the cookie value containing a base62-encoded integer representing the timestamp when the cookie should expire. For example, here's how to set a cookie that expires after 24 hours: .. code-block:: python import time - import baseconv + from datasette.utils import baseconv expires_at = int(time.time()) + (24 * 60 * 60) diff --git a/setup.py b/setup.py index fcb43aa1..ca449f02 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,6 @@ setup( "PyYAML>=5.3,<7.0", "mergedeep>=1.1.1,<1.4.0", "itsdangerous>=1.1,<3.0", - "python-baseconv==1.2.2", ], entry_points=""" [console_scripts] diff --git a/tests/test_auth.py b/tests/test_auth.py index 974f89ea..4ef35a76 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -1,5 +1,5 @@ from .fixtures import app_client -import baseconv +from datasette.utils import baseconv import pytest import time From a29c1277896b6a7905ef5441c42a37bc15f67599 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 2 May 2022 12:44:09 -0700 Subject: [PATCH 0096/1218] Rename to_decimal/from_decimal to decode/encode, refs #1734 --- datasette/utils/baseconv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datasette/utils/baseconv.py b/datasette/utils/baseconv.py index 27e4fb00..c4b64908 100644 --- a/datasette/utils/baseconv.py +++ b/datasette/utils/baseconv.py @@ -19,10 +19,10 @@ class BaseConverter(object): def __init__(self, digits): self.digits = digits - def from_decimal(self, i): + def encode(self, i): return self.convert(i, self.decimal_digits, self.digits) - def to_decimal(self, s): + def decode(self, s): return int(self.convert(s, self.digits, self.decimal_digits)) def convert(number, fromdigits, todigits): From 3f00a29141bdea5be747f6d1c93871ccdb792167 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 2 May 2022 13:15:27 -0700 Subject: [PATCH 0097/1218] Clean up compatibility with Pyodide (#1736) * Optional uvicorn import for Pyodide, refs #1733 * --setting num_sql_threads 0 to disable threading, refs #1735 --- datasette/app.py | 11 ++++++++--- datasette/database.py | 19 +++++++++++++++++++ docs/settings.rst | 2 ++ tests/test_internals_datasette.py | 14 +++++++++++++- 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index a5330458..b7b84371 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -288,9 +288,12 @@ class Datasette: self._settings = dict(DEFAULT_SETTINGS, **(settings or {})) self.renderers = {} # File extension -> (renderer, can_render) functions self.version_note = version_note - self.executor = futures.ThreadPoolExecutor( - max_workers=self.setting("num_sql_threads") - ) + if self.setting("num_sql_threads") == 0: + self.executor = None + else: + self.executor = futures.ThreadPoolExecutor( + max_workers=self.setting("num_sql_threads") + ) self.max_returned_rows = self.setting("max_returned_rows") self.sql_time_limit_ms = self.setting("sql_time_limit_ms") self.page_size = self.setting("default_page_size") @@ -862,6 +865,8 @@ class Datasette: ] def _threads(self): + if self.setting("num_sql_threads") == 0: + return {"num_threads": 0, "threads": []} threads = list(threading.enumerate()) d = { "num_threads": len(threads), diff --git a/datasette/database.py b/datasette/database.py index ba594a8c..44d32667 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -45,6 +45,9 @@ class Database: self._cached_table_counts = None self._write_thread = None self._write_queue = None + # These are used when in non-threaded mode: + self._read_connection = None + self._write_connection = None if not self.is_mutable and not self.is_memory: p = Path(path) self.hash = inspect_hash(p) @@ -134,6 +137,14 @@ class Database: return results async def execute_write_fn(self, fn, block=True): + if self.ds.executor is None: + # non-threaded mode + if self._write_connection is None: + self._write_connection = self.connect(write=True) + self.ds._prepare_connection(self._write_connection, self.name) + return fn(self._write_connection) + + # threaded mode task_id = uuid.uuid5(uuid.NAMESPACE_DNS, "datasette.io") if self._write_queue is None: self._write_queue = queue.Queue() @@ -177,6 +188,14 @@ class Database: task.reply_queue.sync_q.put(result) async def execute_fn(self, fn): + if self.ds.executor is None: + # non-threaded mode + if self._read_connection is None: + self._read_connection = self.connect() + self.ds._prepare_connection(self._read_connection, self.name) + return fn(self._read_connection) + + # threaded mode def in_thread(): conn = getattr(connections, self.name, None) if not conn: diff --git a/docs/settings.rst b/docs/settings.rst index 60c4b36d..8437fb04 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -107,6 +107,8 @@ Maximum number of threads in the thread pool Datasette uses to execute SQLite qu datasette mydatabase.db --setting num_sql_threads 10 +Setting this to 0 turns off threaded SQL queries entirely - useful for environments that do not support threading such as `Pyodide `__. + .. _setting_allow_facet: allow_facet diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index cc200a2d..1dc14cab 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -1,7 +1,7 @@ """ Tests for the datasette.app.Datasette class """ -from datasette.app import Datasette +from datasette.app import Datasette, Database from itsdangerous import BadSignature from .fixtures import app_client import pytest @@ -63,3 +63,15 @@ async def test_datasette_constructor(): "hash": None, } ] + + +@pytest.mark.asyncio +async def test_num_sql_threads_zero(): + ds = Datasette([], memory=True, settings={"num_sql_threads": 0}) + db = ds.add_database(Database(ds, memory_name="test_num_sql_threads_zero")) + await db.execute_write("create table t(id integer primary key)") + await db.execute_write("insert into t (id) values (1)") + response = await ds.client.get("/-/threads.json") + assert response.json() == {"num_threads": 0, "threads": []} + response2 = await ds.client.get("/test_num_sql_threads_zero/t.json?_shape=array") + assert response2.json() == [{"id": 1}] From 943aa2e1f7341cb51e60332cde46bde650c64217 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 2 May 2022 14:38:34 -0700 Subject: [PATCH 0098/1218] Release 0.62a0 Refs #1683, #1701, #1712, #1717, #1718, #1733 --- datasette/version.py | 2 +- docs/changelog.rst | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 02451a1e..cf18c441 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.61.1" +__version__ = "0.62a0" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 03cf62b6..74814fcb 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,20 @@ Changelog ========= +.. _v0_62a0: + +0.62a0 (2022-05-02) +------------------- + +- Datasette now runs some SQL queries in parallel. This has limited impact on performance, see `this research issue `__ for details. +- Datasette should now be compatible with Pyodide. (:issue:`1733`) +- ``datasette publish cloudrun`` has a new ``--timeout`` option which can be used to increase the time limit applied by the Google Cloud build environment. Thanks, Tim Sherratt. (`#1717 `__) +- Spaces in database names are now encoded as ``+`` rather than ``~20``. (:issue:`1701`) +- ```` is now displayed as ```` and is accompanied by tooltip showing "2.3MB". (:issue:`1712`) +- Don't show the facet option in the cog menu if faceting is not allowed. (:issue:`1683`) +- Code examples in the documentation are now all formatted using Black. (:issue:`1718`) +- ``Request.fake()`` method is now documented, see :ref:`internals_request`. + .. _v0_61_1: 0.61.1 (2022-03-23) From 847d6b1aac38c3e776e8c600eed07ba4c9ac9942 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 2 May 2022 16:32:24 -0700 Subject: [PATCH 0099/1218] Test wheel against Pyodide, refs #1737, #1733 --- .github/workflows/test-pyodide.yml | 28 ++++++++++++++++++ test-in-pyodide-with-shot-scraper.sh | 43 ++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 .github/workflows/test-pyodide.yml create mode 100755 test-in-pyodide-with-shot-scraper.sh diff --git a/.github/workflows/test-pyodide.yml b/.github/workflows/test-pyodide.yml new file mode 100644 index 00000000..3715d055 --- /dev/null +++ b/.github/workflows/test-pyodide.yml @@ -0,0 +1,28 @@ +name: Test in Pyodide with shot-scraper + +on: + workflow_dispatch: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + cache: 'pip' + cache-dependency-path: '**/setup.py' + - name: Cache Playwright browsers + uses: actions/cache@v2 + with: + path: ~/.cache/ms-playwright/ + key: ${{ runner.os }}-browsers + - name: Install Playwright dependencies + run: | + pip install shot-scraper + shot-scraper install + - name: Run test + run: | + ./test-in-pyodide-with-shot-scraper.sh diff --git a/test-in-pyodide-with-shot-scraper.sh b/test-in-pyodide-with-shot-scraper.sh new file mode 100755 index 00000000..0f29c0e0 --- /dev/null +++ b/test-in-pyodide-with-shot-scraper.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# Build the wheel +python3 -m build + +# Find name of wheel +wheel=$(basename $(ls dist/*.whl)) +# strip off the dist/ + + +# Create a blank index page +echo ' + +' > dist/index.html + +# Run a server for that dist/ folder +cd dist +python3 -m http.server 8529 & +cd .. + +shot-scraper javascript http://localhost:8529/ " +async () => { + let pyodide = await loadPyodide(); + await pyodide.loadPackage(['micropip', 'ssl', 'setuptools']); + let output = await pyodide.runPythonAsync(\` + import micropip + await micropip.install('h11==0.12.0') + await micropip.install('http://localhost:8529/$wheel') + import ssl + import setuptools + from datasette.app import Datasette + ds = Datasette(memory=True, settings={'num_sql_threads': 0}) + (await ds.client.get('/_memory.json?sql=select+55+as+itworks&_shape=array')).text + \`); + if (JSON.parse(output)[0].itworks != 55) { + throw 'Got ' + output + ', expected itworks: 55'; + } + return 'Test passed!'; +} +" + +# Shut down the server +pkill -f 'http.server 8529' From c0cbcf2aba0d8393ba464acc515803ebf2eeda12 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 2 May 2022 16:36:58 -0700 Subject: [PATCH 0100/1218] Tweaks to test scripts, refs #1737 --- .github/workflows/test-pyodide.yml | 2 +- test-in-pyodide-with-shot-scraper.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-pyodide.yml b/.github/workflows/test-pyodide.yml index 3715d055..beb6a5fb 100644 --- a/.github/workflows/test-pyodide.yml +++ b/.github/workflows/test-pyodide.yml @@ -21,7 +21,7 @@ jobs: key: ${{ runner.os }}-browsers - name: Install Playwright dependencies run: | - pip install shot-scraper + pip install shot-scraper build shot-scraper install - name: Run test run: | diff --git a/test-in-pyodide-with-shot-scraper.sh b/test-in-pyodide-with-shot-scraper.sh index 0f29c0e0..e5df7398 100755 --- a/test-in-pyodide-with-shot-scraper.sh +++ b/test-in-pyodide-with-shot-scraper.sh @@ -1,12 +1,12 @@ #!/bin/bash +set -e +# So the script fails if there are any errors # Build the wheel python3 -m build -# Find name of wheel +# Find name of wheel, strip off the dist/ wheel=$(basename $(ls dist/*.whl)) -# strip off the dist/ - # Create a blank index page echo ' From d60f163528f466b1127b2935c3b6869c34fd6545 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 2 May 2022 16:40:49 -0700 Subject: [PATCH 0101/1218] Run on push and PR, closes #1737 --- .github/workflows/test-pyodide.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-pyodide.yml b/.github/workflows/test-pyodide.yml index beb6a5fb..1b75aade 100644 --- a/.github/workflows/test-pyodide.yml +++ b/.github/workflows/test-pyodide.yml @@ -1,6 +1,8 @@ name: Test in Pyodide with shot-scraper on: + push: + pull_request: workflow_dispatch: jobs: From 280ff372ab30df244f6c54f6f3002da57334b3d7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 3 May 2022 07:59:18 -0700 Subject: [PATCH 0102/1218] ETag support for .db downloads, closes #1739 --- datasette/utils/testing.py | 20 ++++++++++++++++++-- datasette/views/database.py | 7 +++++++ tests/test_html.py | 10 +++++++++- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/datasette/utils/testing.py b/datasette/utils/testing.py index 94750b1f..640c94e6 100644 --- a/datasette/utils/testing.py +++ b/datasette/utils/testing.py @@ -55,10 +55,21 @@ class TestClient: @async_to_sync async def get( - self, path, follow_redirects=False, redirect_count=0, method="GET", cookies=None + self, + path, + follow_redirects=False, + redirect_count=0, + method="GET", + cookies=None, + if_none_match=None, ): return await self._request( - path, follow_redirects, redirect_count, method, cookies + path=path, + follow_redirects=follow_redirects, + redirect_count=redirect_count, + method=method, + cookies=cookies, + if_none_match=if_none_match, ) @async_to_sync @@ -110,6 +121,7 @@ class TestClient: headers=None, post_body=None, content_type=None, + if_none_match=None, ): return await self._request( path, @@ -120,6 +132,7 @@ class TestClient: headers=headers, post_body=post_body, content_type=content_type, + if_none_match=if_none_match, ) async def _request( @@ -132,10 +145,13 @@ class TestClient: headers=None, post_body=None, content_type=None, + if_none_match=None, ): headers = headers or {} if content_type: headers["content-type"] = content_type + if if_none_match: + headers["if-none-match"] = if_none_match httpx_response = await self.ds.client.request( method, path, diff --git a/datasette/views/database.py b/datasette/views/database.py index 9a8aca32..bc08ba05 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -183,6 +183,13 @@ class DatabaseDownload(DataView): headers = {} if self.ds.cors: add_cors_headers(headers) + if db.hash: + etag = '"{}"'.format(db.hash) + headers["Etag"] = etag + # Has user seen this already? + if_none_match = request.headers.get("if-none-match") + if if_none_match and if_none_match == etag: + return Response("", status=304) headers["Transfer-Encoding"] = "chunked" return AsgiFileDownload( filepath, diff --git a/tests/test_html.py b/tests/test_html.py index 42f1a3ee..409fec68 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -401,7 +401,7 @@ def test_database_download_for_immutable(): assert len(soup.findAll("a", {"href": re.compile(r"\.db$")})) # Check we can actually download it download_response = client.get("/fixtures.db") - assert 200 == download_response.status + assert download_response.status == 200 # Check the content-length header exists assert "content-length" in download_response.headers content_length = download_response.headers["content-length"] @@ -413,6 +413,14 @@ def test_database_download_for_immutable(): == 'attachment; filename="fixtures.db"' ) assert download_response.headers["transfer-encoding"] == "chunked" + # ETag header should be present and match db.hash + assert "etag" in download_response.headers + etag = download_response.headers["etag"] + assert etag == '"{}"'.format(client.ds.databases["fixtures"].hash) + # Try a second download with If-None-Match: current-etag + download_response2 = client.get("/fixtures.db", if_none_match=etag) + assert download_response2.body == b"" + assert download_response2.status == 304 def test_database_download_disallowed_for_mutable(app_client): From a5acfff4bd364d30ce8878e19f9839890371ef14 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 16 May 2022 17:06:40 -0700 Subject: [PATCH 0103/1218] Empty Datasette([]) list is no longer required --- docs/testing_plugins.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index 1bbaaac1..41046bfb 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -15,7 +15,7 @@ If you use the template described in :ref:`writing_plugins_cookiecutter` your pl @pytest.mark.asyncio async def test_plugin_is_installed(): - datasette = Datasette([], memory=True) + datasette = Datasette(memory=True) response = await datasette.client.get("/-/plugins.json") assert response.status_code == 200 installed_plugins = {p["name"] for p in response.json()} From 3508bf7875f8d62b2725222f3b07747974d54b97 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 17 May 2022 12:40:05 -0700 Subject: [PATCH 0104/1218] --nolock mode to ignore locked files, closes #1744 --- datasette/app.py | 2 ++ datasette/cli.py | 7 +++++++ datasette/database.py | 2 ++ docs/cli-reference.rst | 1 + docs/getting_started.rst | 4 +++- 5 files changed, 15 insertions(+), 1 deletion(-) diff --git a/datasette/app.py b/datasette/app.py index b7b84371..f43700d4 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -213,6 +213,7 @@ class Datasette: config_dir=None, pdb=False, crossdb=False, + nolock=False, ): assert config_dir is None or isinstance( config_dir, Path @@ -238,6 +239,7 @@ class Datasette: self.databases = collections.OrderedDict() self._refresh_schemas_lock = asyncio.Lock() self.crossdb = crossdb + self.nolock = nolock if memory or crossdb or not self.files: self.add_database(Database(self, is_memory=True), name="_memory") # memory_name is a random string so that each Datasette instance gets its own diff --git a/datasette/cli.py b/datasette/cli.py index 3c6e1b2c..8781747c 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -452,6 +452,11 @@ def uninstall(packages, yes): is_flag=True, help="Enable cross-database joins using the /_memory database", ) +@click.option( + "--nolock", + is_flag=True, + help="Ignore locking, open locked files in read-only mode", +) @click.option( "--ssl-keyfile", help="SSL key file", @@ -486,6 +491,7 @@ def serve( open_browser, create, crossdb, + nolock, ssl_keyfile, ssl_certfile, return_instance=False, @@ -545,6 +551,7 @@ def serve( version_note=version_note, pdb=pdb, crossdb=crossdb, + nolock=nolock, ) # if files is a single directory, use that as config_dir= diff --git a/datasette/database.py b/datasette/database.py index 44d32667..fa558045 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -89,6 +89,8 @@ class Database: # mode=ro or immutable=1? if self.is_mutable: qs = "?mode=ro" + if self.ds.nolock: + qs += "&nolock=1" else: qs = "?immutable=1" assert not (write and not self.is_mutable) diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 2a6fbfc8..1c1aff15 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -115,6 +115,7 @@ datasette serve --help --create Create database files if they do not exist --crossdb Enable cross-database joins using the /_memory database + --nolock Ignore locking, open locked files in read-only mode --ssl-keyfile TEXT SSL key file --ssl-certfile TEXT SSL certificate file --help Show this message and exit. diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 3e357afb..502a9e5a 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -56,7 +56,9 @@ like so: :: - datasette ~/Library/Application\ Support/Google/Chrome/Default/History + datasette ~/Library/Application\ Support/Google/Chrome/Default/History --nolock + +The `--nolock` option ignores any file locks. This is safe as Datasette will open the file in read-only mode. Now visiting http://localhost:8001/History/downloads will show you a web interface to browse your downloads data: From 5555bc8aef043f75d2200f66de90c54aeeaa08c3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 17 May 2022 12:43:44 -0700 Subject: [PATCH 0105/1218] How to run cog, closes #1745 --- docs/contributing.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/contributing.rst b/docs/contributing.rst index c193ba49..bddceafe 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -211,6 +211,17 @@ For added productivity, you can use use `sphinx-autobuild `__. + +To update these pages, run the following command:: + + cog -r docs/*.rst + .. _contributing_continuous_deployment: Continuously deployed demo instances From b393e164dc9e962702546d6f1ad9c857b5788dc0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 17 May 2022 12:45:28 -0700 Subject: [PATCH 0106/1218] ReST fix --- docs/getting_started.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 502a9e5a..af3a1385 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -58,7 +58,7 @@ like so: datasette ~/Library/Application\ Support/Google/Chrome/Default/History --nolock -The `--nolock` option ignores any file locks. This is safe as Datasette will open the file in read-only mode. +The ``--nolock`` option ignores any file locks. This is safe as Datasette will open the file in read-only mode. Now visiting http://localhost:8001/History/downloads will show you a web interface to browse your downloads data: From 7d1e004ff679b3fb4dca36d1d751a1ad16688fe6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 17 May 2022 12:59:28 -0700 Subject: [PATCH 0107/1218] Fix test I broke in #1744 --- tests/test_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index dca65f26..d0f6e26c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -150,6 +150,7 @@ def test_metadata_yaml(): help_settings=False, pdb=False, crossdb=False, + nolock=False, open_browser=False, create=False, ssl_keyfile=None, From 0e2f6f1f82f4445a63f1251470a7778a34f5c8b9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 18 May 2022 17:37:46 -0700 Subject: [PATCH 0108/1218] datasette-copyable is an example of register_output_renderer --- docs/plugin_hooks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 3c9ae2e2..c0d88964 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -557,7 +557,7 @@ And here is an example ``can_render`` function which returns ``True`` only if th "atom_updated", }.issubset(columns) -Examples: `datasette-atom `_, `datasette-ics `_, `datasette-geojson `__ +Examples: `datasette-atom `_, `datasette-ics `_, `datasette-geojson `__, `datasette-copyable `__ .. _plugin_register_routes: From 18a6e05887abf1ac946a6e0d36ce662dfd8aeff1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 May 2022 12:05:33 -0700 Subject: [PATCH 0109/1218] Added "follow a tutorial" to getting started docs Closes #1747 --- docs/getting_started.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index af3a1385..00b753a9 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -1,6 +1,8 @@ Getting started =============== +.. _getting_started_demo: + Play with a live demo --------------------- @@ -9,6 +11,16 @@ The best way to experience Datasette for the first time is with a demo: * `global-power-plants.datasettes.com `__ provides a searchable database of power plants around the world, using data from the `World Resources Institude `__ rendered using the `datasette-cluster-map `__ plugin. * `fivethirtyeight.datasettes.com `__ shows Datasette running against over 400 datasets imported from the `FiveThirtyEight GitHub repository `__. +.. _getting_started_tutorial: + +Follow a tutorial +----------------- + +Datasette has several `tutorials `__ to help you get started with the tool. Try one of the following: + +- `Exploring a database with Datasette `__ shows how to use the Datasette web interface to explore a new database. +- `Learn SQL with Datasette `__ introduces SQL, and shows how to use that query language to ask questions of your data. + .. _getting_started_glitch: Try Datasette without installing anything using Glitch From 1465fea4798599eccfe7e8f012bd8d9adfac3039 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 May 2022 12:11:08 -0700 Subject: [PATCH 0110/1218] sphinx-copybutton for docs, closes #1748 --- docs/conf.py | 2 +- setup.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index d114bc52..351cb1b1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,7 +31,7 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.extlinks", "sphinx.ext.autodoc"] +extensions = ["sphinx.ext.extlinks", "sphinx.ext.autodoc", "sphinx_copybutton"] extlinks = { "issue": ("https://github.com/simonw/datasette/issues/%s", "#"), diff --git a/setup.py b/setup.py index ca449f02..aad05840 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,13 @@ setup( """, setup_requires=["pytest-runner"], extras_require={ - "docs": ["sphinx_rtd_theme", "sphinx-autobuild", "codespell", "blacken-docs"], + "docs": [ + "sphinx_rtd_theme", + "sphinx-autobuild", + "codespell", + "blacken-docs", + "sphinx-copybutton", + ], "test": [ "pytest>=5.2.2,<7.2.0", "pytest-xdist>=2.2.1,<2.6", From 1d33fd03b3c211e0f48a8f3bde83880af89e4e69 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 May 2022 13:34:51 -0700 Subject: [PATCH 0111/1218] Switch docs theme to Furo, refs #1746 --- docs/_static/css/custom.css | 7 ++-- .../layout.html => _static/js/custom.js} | 34 ------------------- docs/_templates/base.html | 6 ++++ docs/_templates/sidebar/brand.html | 16 +++++++++ docs/_templates/sidebar/navigation.html | 11 ++++++ docs/conf.py | 24 +++---------- docs/installation.rst | 1 + docs/plugin_hooks.rst | 1 + setup.py | 2 +- 9 files changed, 45 insertions(+), 57 deletions(-) rename docs/{_templates/layout.html => _static/js/custom.js} (55%) create mode 100644 docs/_templates/base.html create mode 100644 docs/_templates/sidebar/brand.html create mode 100644 docs/_templates/sidebar/navigation.html diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css index 4dabb725..0a6f8799 100644 --- a/docs/_static/css/custom.css +++ b/docs/_static/css/custom.css @@ -1,7 +1,8 @@ a.external { overflow-wrap: anywhere; } - -div .wy-side-nav-search > div.version { - color: rgba(0,0,0,0.75); +body[data-theme="dark"] .sidebar-logo-container { + background-color: white; + padding: 5px; + opacity: 0.6; } diff --git a/docs/_templates/layout.html b/docs/_static/js/custom.js similarity index 55% rename from docs/_templates/layout.html rename to docs/_static/js/custom.js index 785cdc7c..efca33ed 100644 --- a/docs/_templates/layout.html +++ b/docs/_static/js/custom.js @@ -1,35 +1,3 @@ -{%- extends "!layout.html" %} - -{% block htmltitle %} -{{ super() }} - -{% endblock %} - -{% block sidebartitle %} - - - - - -{% if theme_display_version %} - {%- set nav_version = version %} - {% if READTHEDOCS and current_version %} - {%- set nav_version = current_version %} - {% endif %} - {% if nav_version %} -
- {{ nav_version }} -
- {% endif %} -{% endif %} - -{% include "searchbox.html" %} - -{% endblock %} - -{% block footer %} -{{ super() }} - -{% endblock %} diff --git a/docs/_templates/base.html b/docs/_templates/base.html new file mode 100644 index 00000000..969de5ab --- /dev/null +++ b/docs/_templates/base.html @@ -0,0 +1,6 @@ +{%- extends "!base.html" %} + +{% block site_meta %} +{{ super() }} + +{% endblock %} diff --git a/docs/_templates/sidebar/brand.html b/docs/_templates/sidebar/brand.html new file mode 100644 index 00000000..8be9e8ee --- /dev/null +++ b/docs/_templates/sidebar/brand.html @@ -0,0 +1,16 @@ + diff --git a/docs/_templates/sidebar/navigation.html b/docs/_templates/sidebar/navigation.html new file mode 100644 index 00000000..c460a17e --- /dev/null +++ b/docs/_templates/sidebar/navigation.html @@ -0,0 +1,11 @@ + \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 351cb1b1..25d2acfe 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -90,18 +90,15 @@ todo_include_todos = False # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "sphinx_rtd_theme" +html_theme = "furo" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { - "logo_only": True, - "style_nav_header_background": "white", - "prev_next_buttons_location": "both", + "sidebar_hide_name": True, } - # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". @@ -112,20 +109,9 @@ html_logo = "datasette-logo.svg" html_css_files = [ "css/custom.css", ] - - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# This is required for the alabaster theme -# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars -html_sidebars = { - "**": [ - "relations.html", # needs 'show_related': True theme option to display - "searchbox.html", - ] -} - +html_js_files = [ + "js/custom.js" +] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/docs/installation.rst b/docs/installation.rst index e8bef9cd..a4757736 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -13,6 +13,7 @@ If you want to start making contributions to the Datasette project by installing .. contents:: :local: + :class: this-will-duplicate-information-and-it-is-still-useful-here .. _installation_basic: diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index c0d88964..7d10fe37 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -20,6 +20,7 @@ For example, you can implement the ``render_cell`` plugin hook like this even th .. contents:: List of plugin hooks :local: + :class: this-will-duplicate-information-and-it-is-still-useful-here .. _plugin_hook_prepare_connection: diff --git a/setup.py b/setup.py index aad05840..d3fcdbd1 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ setup( setup_requires=["pytest-runner"], extras_require={ "docs": [ - "sphinx_rtd_theme", + "furo==2022.4.7", "sphinx-autobuild", "codespell", "blacken-docs", From 4446075334ea7231beb56b630bc7ec363afc2d08 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 May 2022 13:44:23 -0700 Subject: [PATCH 0112/1218] Append warning to the write element, refs #1746 --- docs/_static/js/custom.js | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/_static/js/custom.js b/docs/_static/js/custom.js index efca33ed..91c3e306 100644 --- a/docs/_static/js/custom.js +++ b/docs/_static/js/custom.js @@ -17,11 +17,7 @@ jQuery(function ($) { ` ); warning.find("a").attr("href", stableUrl); - var body = $("div.body"); - if (!body.length) { - body = $("div.document"); - } - body.prepend(warning); + $("article[role=main]").prepend(warning); } }); }); From b010af7bb85856aeb44f69e7e980f617c1fc0db1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 20 May 2022 15:23:09 -0700 Subject: [PATCH 0113/1218] Updated copyright years in documentation footer --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 25d2acfe..7ffeedd0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,7 +51,7 @@ master_doc = "index" # General information about the project. project = "Datasette" -copyright = "2017-2021, Simon Willison" +copyright = "2017-2022, Simon Willison" author = "Simon Willison" # Disable -- turning into โ€“ From adedd85b68ec66e03b97fb62ff4da8987734436e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 28 May 2022 18:42:31 -0700 Subject: [PATCH 0114/1218] Clarify that request.headers names are converted to lowercase --- docs/internals.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/internals.rst b/docs/internals.rst index 18822d47..da135282 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -26,7 +26,7 @@ The request object is passed to various plugin hooks. It represents an incoming The request scheme - usually ``https`` or ``http``. ``.headers`` - dictionary (str -> str) - A dictionary of incoming HTTP request headers. + A dictionary of incoming HTTP request headers. Header names have been converted to lowercase. ``.cookies`` - dictionary (str -> str) A dictionary of incoming cookies From 8dd816bc76937f1e37f86acce10dc2cb4fa31e52 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 30 May 2022 15:42:38 -0700 Subject: [PATCH 0115/1218] Applied Black --- docs/conf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 7ffeedd0..4ef6b768 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -109,9 +109,7 @@ html_logo = "datasette-logo.svg" html_css_files = [ "css/custom.css", ] -html_js_files = [ - "js/custom.js" -] +html_js_files = ["js/custom.js"] # -- Options for HTMLHelp output ------------------------------------------ From 2e9751672d4fe329b3c359d5b7b1992283185820 Mon Sep 17 00:00:00 2001 From: Naveen <172697+naveensrinivasan@users.noreply.github.com> Date: Tue, 31 May 2022 14:28:40 -0500 Subject: [PATCH 0116/1218] chore: Set permissions for GitHub actions (#1740) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restrict the GitHub token permissions only to the required ones; this way, even if the attackers will succeed in compromising your workflow, they wonโ€™t be able to do much. - Included permissions for the action. https://github.com/ossf/scorecard/blob/main/docs/checks.md#token-permissions https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#permissions https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs [Keeping your GitHub Actions and workflows secure Part 1: Preventing pwn requests](https://securitylab.github.com/research/github-actions-preventing-pwn-requests/) Signed-off-by: naveen <172697+naveensrinivasan@users.noreply.github.com> --- .github/workflows/deploy-latest.yml | 3 +++ .github/workflows/prettier.yml | 3 +++ .github/workflows/publish.yml | 3 +++ .github/workflows/push_docker_tag.yml | 3 +++ .github/workflows/spellcheck.yml | 3 +++ .github/workflows/test-coverage.yml | 3 +++ .github/workflows/test-pyodide.yml | 3 +++ .github/workflows/test.yml | 3 +++ .github/workflows/tmate-mac.yml | 3 +++ .github/workflows/tmate.yml | 3 +++ 10 files changed, 30 insertions(+) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index a61f6629..2b94a7f1 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -5,6 +5,9 @@ on: branches: - main +permissions: + contents: read + jobs: deploy: runs-on: ubuntu-latest diff --git a/.github/workflows/prettier.yml b/.github/workflows/prettier.yml index 9dfe7ee0..ded41040 100644 --- a/.github/workflows/prettier.yml +++ b/.github/workflows/prettier.yml @@ -2,6 +2,9 @@ name: Check JavaScript for conformance with Prettier on: [push] +permissions: + contents: read + jobs: prettier: runs-on: ubuntu-latest diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3e4f8146..9ef09d2e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -4,6 +4,9 @@ on: release: types: [created] +permissions: + contents: read + jobs: test: runs-on: ubuntu-latest diff --git a/.github/workflows/push_docker_tag.yml b/.github/workflows/push_docker_tag.yml index 9a3969f0..afe8d6b2 100644 --- a/.github/workflows/push_docker_tag.yml +++ b/.github/workflows/push_docker_tag.yml @@ -6,6 +6,9 @@ on: version_tag: description: Tag to build and push +permissions: + contents: read + jobs: deploy_docker: runs-on: ubuntu-latest diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml index 2e24d3eb..a2621ecc 100644 --- a/.github/workflows/spellcheck.yml +++ b/.github/workflows/spellcheck.yml @@ -2,6 +2,9 @@ name: Check spelling in documentation on: [push, pull_request] +permissions: + contents: read + jobs: spellcheck: runs-on: ubuntu-latest diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index 1d1cf332..bd720664 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -7,6 +7,9 @@ on: pull_request: branches: - main +permissions: + contents: read + jobs: test: runs-on: ubuntu-latest diff --git a/.github/workflows/test-pyodide.yml b/.github/workflows/test-pyodide.yml index 1b75aade..bc9593a8 100644 --- a/.github/workflows/test-pyodide.yml +++ b/.github/workflows/test-pyodide.yml @@ -5,6 +5,9 @@ on: pull_request: workflow_dispatch: +permissions: + contents: read + jobs: test: runs-on: ubuntu-latest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8d916e49..90b6555e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,6 +2,9 @@ name: Test on: [push, pull_request] +permissions: + contents: read + jobs: test: runs-on: ubuntu-latest diff --git a/.github/workflows/tmate-mac.yml b/.github/workflows/tmate-mac.yml index 46be117e..fcee0f21 100644 --- a/.github/workflows/tmate-mac.yml +++ b/.github/workflows/tmate-mac.yml @@ -3,6 +3,9 @@ name: tmate session mac on: workflow_dispatch: +permissions: + contents: read + jobs: build: runs-on: macos-latest diff --git a/.github/workflows/tmate.yml b/.github/workflows/tmate.yml index 02e7bd33..9792245d 100644 --- a/.github/workflows/tmate.yml +++ b/.github/workflows/tmate.yml @@ -3,6 +3,9 @@ name: tmate session on: workflow_dispatch: +permissions: + contents: read + jobs: build: runs-on: ubuntu-latest From e780b2f5d662ef3579d801d33567440055d4e84d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 20 Jun 2022 10:54:23 -0700 Subject: [PATCH 0117/1218] Trying out one-sentence-per-line As suggested here: https://sive.rs/1s Markdown and reStructuredText will display this as if it is a single paragraph, even though the sentences themselves are separated by newlines. This could result in more useful diffs. Trying it out on this page first. --- docs/facets.rst | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/docs/facets.rst b/docs/facets.rst index 0228aa84..2a2eb039 100644 --- a/docs/facets.rst +++ b/docs/facets.rst @@ -3,7 +3,9 @@ Facets ====== -Datasette facets can be used to add a faceted browse interface to any database table. With facets, tables are displayed along with a summary showing the most common values in specified columns. These values can be selected to further filter the table. +Datasette facets can be used to add a faceted browse interface to any database table. +With facets, tables are displayed along with a summary showing the most common values in specified columns. +These values can be selected to further filter the table. .. image:: facets.png @@ -12,11 +14,13 @@ Facets can be specified in two ways: using query string parameters, or in ``meta Facets in query strings ----------------------- -To turn on faceting for specific columns on a Datasette table view, add one or more ``_facet=COLUMN`` parameters to the URL. For example, if you want to turn on facets for the ``city_id`` and ``state`` columns, construct a URL that looks like this:: +To turn on faceting for specific columns on a Datasette table view, add one or more ``_facet=COLUMN`` parameters to the URL. +For example, if you want to turn on facets for the ``city_id`` and ``state`` columns, construct a URL that looks like this:: /dbname/tablename?_facet=state&_facet=city_id -This works for both the HTML interface and the ``.json`` view. When enabled, facets will cause a ``facet_results`` block to be added to the JSON output, looking something like this: +This works for both the HTML interface and the ``.json`` view. +When enabled, facets will cause a ``facet_results`` block to be added to the JSON output, looking something like this: .. code-block:: json @@ -86,7 +90,8 @@ This works for both the HTML interface and the ``.json`` view. When enabled, fac If Datasette detects that a column is a foreign key, the ``"label"`` property will be automatically derived from the detected label column on the referenced table. -The default number of facet results returned is 30, controlled by the :ref:`setting_default_facet_size` setting. You can increase this on an individual page by adding ``?_facet_size=100`` to the query string, up to a maximum of :ref:`setting_max_returned_rows` (which defaults to 1000). +The default number of facet results returned is 30, controlled by the :ref:`setting_default_facet_size` setting. +You can increase this on an individual page by adding ``?_facet_size=100`` to the query string, up to a maximum of :ref:`setting_max_returned_rows` (which defaults to 1000). .. _facets_metadata: @@ -137,12 +142,14 @@ For the currently filtered data are there any columns which, if applied as a fac * Will return less unique options than the total number of filtered rows * And the query used to evaluate this criteria can be completed in under 50ms -That last point is particularly important: Datasette runs a query for every column that is displayed on a page, which could get expensive - so to avoid slow load times it sets a time limit of just 50ms for each of those queries. This means suggested facets are unlikely to appear for tables with millions of records in them. +That last point is particularly important: Datasette runs a query for every column that is displayed on a page, which could get expensive - so to avoid slow load times it sets a time limit of just 50ms for each of those queries. +This means suggested facets are unlikely to appear for tables with millions of records in them. Speeding up facets with indexes ------------------------------- -The performance of facets can be greatly improved by adding indexes on the columns you wish to facet by. Adding indexes can be performed using the ``sqlite3`` command-line utility. Here's how to add an index on the ``state`` column in a table called ``Food_Trucks``:: +The performance of facets can be greatly improved by adding indexes on the columns you wish to facet by. +Adding indexes can be performed using the ``sqlite3`` command-line utility. Here's how to add an index on the ``state`` column in a table called ``Food_Trucks``:: $ sqlite3 mydatabase.db SQLite version 3.19.3 2017-06-27 16:48:08 @@ -169,6 +176,7 @@ Example here: `latest.datasette.io/fixtures/facetable?_facet_array=tags `__ From 00e59ec461dc0150772b999c7cc15fcb9b507d58 Mon Sep 17 00:00:00 2001 From: "M. Nasimul Haque" Date: Mon, 20 Jun 2022 19:05:44 +0100 Subject: [PATCH 0118/1218] Extract facet pieces of table.html into included templates Thanks, @nsmgr8 --- datasette/templates/_facet_results.html | 28 ++++++++++++++++++ datasette/templates/_suggested_facets.html | 3 ++ datasette/templates/table.html | 33 ++-------------------- 3 files changed, 33 insertions(+), 31 deletions(-) create mode 100644 datasette/templates/_facet_results.html create mode 100644 datasette/templates/_suggested_facets.html diff --git a/datasette/templates/_facet_results.html b/datasette/templates/_facet_results.html new file mode 100644 index 00000000..d0cbcf77 --- /dev/null +++ b/datasette/templates/_facet_results.html @@ -0,0 +1,28 @@ +
+ {% for facet_info in sorted_facet_results %} +
+

+ {{ facet_info.name }}{% if facet_info.type != "column" %} ({{ facet_info.type }}){% endif %} + {% if facet_info.truncated %}>{% endif %}{{ facet_info.results|length }} + + {% if facet_info.hideable %} + + {% endif %} +

+
    + {% for facet_value in facet_info.results %} + {% if not facet_value.selected %} +
  • {{ (facet_value.label | string()) or "-" }} {{ "{:,}".format(facet_value.count) }}
  • + {% else %} +
  • {{ facet_value.label or "-" }} · {{ "{:,}".format(facet_value.count) }}
  • + {% endif %} + {% endfor %} + {% if facet_info.truncated %} +
  • {% if request.args._facet_size != "max" -%} + โ€ฆ{% else -%}โ€ฆ{% endif %} +
  • + {% endif %} +
+
+ {% endfor %} +
diff --git a/datasette/templates/_suggested_facets.html b/datasette/templates/_suggested_facets.html new file mode 100644 index 00000000..ec98fb36 --- /dev/null +++ b/datasette/templates/_suggested_facets.html @@ -0,0 +1,3 @@ +

+ Suggested facets: {% for facet in suggested_facets %}{{ facet.name }}{% if facet.type %} ({{ facet.type }}){% endif %}{% if not loop.last %}, {% endif %}{% endfor %} +

diff --git a/datasette/templates/table.html b/datasette/templates/table.html index a9e88330..a86398ea 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -142,9 +142,7 @@ {% if suggested_facets %} -

- Suggested facets: {% for facet in suggested_facets %}{{ facet.name }}{% if facet.type %} ({{ facet.type }}){% endif %}{% if not loop.last %}, {% endif %}{% endfor %} -

+ {% include "_suggested_facets.html" %} {% endif %} {% if facets_timed_out %} @@ -152,34 +150,7 @@ {% endif %} {% if facet_results %} -
- {% for facet_info in sorted_facet_results %} -
-

- {{ facet_info.name }}{% if facet_info.type != "column" %} ({{ facet_info.type }}){% endif %} - {% if facet_info.truncated %}>{% endif %}{{ facet_info.results|length }} - - {% if facet_info.hideable %} - - {% endif %} -

-
    - {% for facet_value in facet_info.results %} - {% if not facet_value.selected %} -
  • {{ (facet_value.label | string()) or "-" }} {{ "{:,}".format(facet_value.count) }}
  • - {% else %} -
  • {{ facet_value.label or "-" }} · {{ "{:,}".format(facet_value.count) }}
  • - {% endif %} - {% endfor %} - {% if facet_info.truncated %} -
  • {% if request.args._facet_size != "max" -%} - โ€ฆ{% else -%}โ€ฆ{% endif %} -
  • - {% endif %} -
-
- {% endfor %} -
+ {% include "_facet_results.html" %} {% endif %} {% include custom_table_templates %} From 9f1eb0d4eac483b953392157bd9fd6cc4df37de7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Jun 2022 10:40:24 -0700 Subject: [PATCH 0119/1218] Bump black from 22.1.0 to 22.6.0 (#1763) Bumps [black](https://github.com/psf/black) from 22.1.0 to 22.6.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/22.1.0...22.6.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d3fcdbd1..29cb77bf 100644 --- a/setup.py +++ b/setup.py @@ -76,7 +76,7 @@ setup( "pytest-xdist>=2.2.1,<2.6", "pytest-asyncio>=0.17,<0.19", "beautifulsoup4>=4.8.1,<4.12.0", - "black==22.1.0", + "black==22.6.0", "blacken-docs==1.12.1", "pytest-timeout>=1.4.2,<2.2", "trustme>=0.7,<0.10", From 6373bb341457e5becfd5b67792ac2c8b9ed7c384 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 7 Jul 2022 09:30:49 -0700 Subject: [PATCH 0120/1218] Expose current SQLite row to render_cell hook, closes #1300 --- datasette/hookspecs.py | 2 +- datasette/views/database.py | 1 + datasette/views/table.py | 1 + docs/plugin_hooks.rst | 9 ++++++--- tests/plugins/my_plugin.py | 3 ++- tests/test_plugins.py | 5 +++-- 6 files changed, 14 insertions(+), 7 deletions(-) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 8f4fecab..c84db0a3 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -60,7 +60,7 @@ def publish_subcommand(publish): @hookspec -def render_cell(value, column, table, database, datasette): +def render_cell(row, value, column, table, database, datasette): """Customize rendering of HTML table cell values""" diff --git a/datasette/views/database.py b/datasette/views/database.py index bc08ba05..42058752 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -375,6 +375,7 @@ class QueryView(DataView): # pylint: disable=no-member plugin_display_value = None for candidate in pm.hook.render_cell( + row=row, value=value, column=column, table=None, diff --git a/datasette/views/table.py b/datasette/views/table.py index 23289b29..cd4be823 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -895,6 +895,7 @@ async def display_columns_and_rows( # pylint: disable=no-member plugin_display_value = None for candidate in pm.hook.render_cell( + row=row, value=value, column=column, table=table_name, diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 7d10fe37..f5c3ee83 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -373,12 +373,15 @@ Examples: `datasette-publish-fly Date: Sat, 9 Jul 2022 10:25:37 -0700 Subject: [PATCH 0121/1218] More than 90 plugins now --- docs/writing_plugins.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/writing_plugins.rst b/docs/writing_plugins.rst index 9aee70f6..01ee8c90 100644 --- a/docs/writing_plugins.rst +++ b/docs/writing_plugins.rst @@ -5,7 +5,7 @@ Writing plugins You can write one-off plugins that apply to just one Datasette instance, or you can write plugins which can be installed using ``pip`` and can be shipped to the Python Package Index (`PyPI `__) for other people to install. -Want to start by looking at an example? The `Datasette plugins directory `__ lists more than 50 open source plugins with code you can explore. The :ref:`plugin hooks ` page includes links to example plugins for each of the documented hooks. +Want to start by looking at an example? The `Datasette plugins directory `__ lists more than 90 open source plugins with code you can explore. The :ref:`plugin hooks ` page includes links to example plugins for each of the documented hooks. .. _writing_plugins_one_off: From 5d76c1f81b2d978f48b85c70d041a2142cf8ee26 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 14 Jul 2022 15:03:33 -0700 Subject: [PATCH 0122/1218] Discord badge Refs https://github.com/simonw/datasette.io/issues/112 --- README.md | 1 + docs/index.rst | 2 ++ 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index 557d9290..c57ee604 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ [![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](https://docs.datasette.io/en/latest/?badge=latest) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette/blob/main/LICENSE) [![docker: datasette](https://img.shields.io/badge/docker-datasette-blue)](https://hub.docker.com/r/datasetteproject/datasette) +[![discord](https://img.shields.io/discord/823971286308356157?label=Discord)](https://discord.gg/ktd74dm5mw) *An open source multi-tool for exploring and publishing data* diff --git a/docs/index.rst b/docs/index.rst index a2888822..62ed70f8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,6 +16,8 @@ datasette| :target: https://github.com/simonw/datasette/blob/main/LICENSE .. |docker: datasette| image:: https://img.shields.io/badge/docker-datasette-blue :target: https://hub.docker.com/r/datasetteproject/datasette +.. |discord| image:: https://img.shields.io/discord/823971286308356157?label=Discord + :target: https://discord.gg/ktd74dm5mw *An open source multi-tool for exploring and publishing data* From c133545fe9c7ac2d509e55bf4bf6164bfbe892ad Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 14 Jul 2022 15:04:38 -0700 Subject: [PATCH 0123/1218] Make discord badge lowercase Refs https://github.com/simonw/datasette.io/issues/112 --- README.md | 2 +- docs/index.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c57ee604..032180aa 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](https://docs.datasette.io/en/latest/?badge=latest) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette/blob/main/LICENSE) [![docker: datasette](https://img.shields.io/badge/docker-datasette-blue)](https://hub.docker.com/r/datasetteproject/datasette) -[![discord](https://img.shields.io/discord/823971286308356157?label=Discord)](https://discord.gg/ktd74dm5mw) +[![discord](https://img.shields.io/discord/823971286308356157?label=discord)](https://discord.gg/ktd74dm5mw) *An open source multi-tool for exploring and publishing data* diff --git a/docs/index.rst b/docs/index.rst index 62ed70f8..051898b1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,7 +16,7 @@ datasette| :target: https://github.com/simonw/datasette/blob/main/LICENSE .. |docker: datasette| image:: https://img.shields.io/badge/docker-datasette-blue :target: https://hub.docker.com/r/datasetteproject/datasette -.. |discord| image:: https://img.shields.io/discord/823971286308356157?label=Discord +.. |discord| image:: https://img.shields.io/discord/823971286308356157?label=discord :target: https://discord.gg/ktd74dm5mw *An open source multi-tool for exploring and publishing data* From 950cc7677f65aa2543067b3bbfc2b6acb98b62c8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 14 Jul 2022 15:18:28 -0700 Subject: [PATCH 0124/1218] Fix missing Discord image Refs https://github.com/simonw/datasette.io/issues/112 --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 051898b1..efe196b3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,7 +2,7 @@ Datasette ========= |PyPI| |Changelog| |Python 3.x| |Tests| |License| |docker: -datasette| +datasette| |discord| .. |PyPI| image:: https://img.shields.io/pypi/v/datasette.svg :target: https://pypi.org/project/datasette/ From 8188f55efc0fcca1be692b0d0c875f2d1ee99f17 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 17 Jul 2022 15:24:16 -0700 Subject: [PATCH 0125/1218] Rename handle_500 to handle_exception, refs #1770 --- datasette/app.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index f43700d4..43e60dbc 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1275,7 +1275,7 @@ class DatasetteRouter: except NotFound as exception: return await self.handle_404(request, send, exception) except Exception as exception: - return await self.handle_500(request, send, exception) + return await self.handle_exception(request, send, exception) async def handle_404(self, request, send, exception=None): # If path contains % encoding, redirect to tilde encoding @@ -1354,7 +1354,7 @@ class DatasetteRouter: view_name="page", ) except NotFoundExplicit as e: - await self.handle_500(request, send, e) + await self.handle_exception(request, send, e) return # Pull content-type out into separate parameter content_type = "text/html; charset=utf-8" @@ -1369,9 +1369,9 @@ class DatasetteRouter: content_type=content_type, ) else: - await self.handle_500(request, send, exception or NotFound("404")) + await self.handle_exception(request, send, exception or NotFound("404")) - async def handle_500(self, request, send, exception): + async def handle_exception(self, request, send, exception): if self.ds.pdb: import pdb From c09c53f3455a7b9574cf7695478f2b87d20897db Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 17 Jul 2022 16:24:39 -0700 Subject: [PATCH 0126/1218] New handle_exception plugin hook, refs #1770 Also refs: - https://github.com/simonw/datasette-sentry/issues/1 - https://github.com/simonw/datasette-show-errors/issues/2 --- datasette/app.py | 97 +++++++++-------------------------- datasette/forbidden.py | 20 ++++++++ datasette/handle_exception.py | 74 ++++++++++++++++++++++++++ datasette/hookspecs.py | 5 ++ datasette/plugins.py | 2 + docs/plugin_hooks.rst | 78 ++++++++++++++++++++-------- tests/fixtures.py | 1 + tests/plugins/my_plugin_2.py | 18 +++++++ tests/test_permissions.py | 1 + tests/test_plugins.py | 14 +++++ 10 files changed, 215 insertions(+), 95 deletions(-) create mode 100644 datasette/forbidden.py create mode 100644 datasette/handle_exception.py diff --git a/datasette/app.py b/datasette/app.py index 43e60dbc..edd05bb3 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -16,7 +16,6 @@ import re import secrets import sys import threading -import traceback import urllib.parse from concurrent import futures from pathlib import Path @@ -27,7 +26,7 @@ from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader from jinja2.environment import Template from jinja2.exceptions import TemplateNotFound -from .views.base import DatasetteError, ureg +from .views.base import ureg from .views.database import DatabaseDownload, DatabaseView from .views.index import IndexView from .views.special import ( @@ -49,7 +48,6 @@ from .utils import ( PrefixedUrlString, SPATIALITE_FUNCTIONS, StartupError, - add_cors_headers, async_call_with_supported_arguments, await_me_maybe, call_with_supported_arguments, @@ -87,11 +85,6 @@ from .tracer import AsgiTracer from .plugins import pm, DEFAULT_PLUGINS, get_plugins from .version import __version__ -try: - import rich -except ImportError: - rich = None - app_root = Path(__file__).parent.parent # https://github.com/simonw/datasette/issues/283#issuecomment-781591015 @@ -1274,6 +1267,16 @@ class DatasetteRouter: return except NotFound as exception: return await self.handle_404(request, send, exception) + except Forbidden as exception: + # Try the forbidden() plugin hook + for custom_response in pm.hook.forbidden( + datasette=self.ds, request=request, message=exception.args[0] + ): + custom_response = await await_me_maybe(custom_response) + assert ( + custom_response + ), "Default forbidden() hook should have been called" + return await custom_response.asgi_send(send) except Exception as exception: return await self.handle_exception(request, send, exception) @@ -1372,72 +1375,20 @@ class DatasetteRouter: await self.handle_exception(request, send, exception or NotFound("404")) async def handle_exception(self, request, send, exception): - if self.ds.pdb: - import pdb + responses = [] + for hook in pm.hook.handle_exception( + datasette=self.ds, + request=request, + exception=exception, + ): + response = await await_me_maybe(hook) + if response is not None: + responses.append(response) - pdb.post_mortem(exception.__traceback__) - - if rich is not None: - rich.get_console().print_exception(show_locals=True) - - title = None - if isinstance(exception, Forbidden): - status = 403 - info = {} - message = exception.args[0] - # Try the forbidden() plugin hook - for custom_response in pm.hook.forbidden( - datasette=self.ds, request=request, message=message - ): - custom_response = await await_me_maybe(custom_response) - if custom_response is not None: - await custom_response.asgi_send(send) - return - elif isinstance(exception, Base400): - status = exception.status - info = {} - message = exception.args[0] - elif isinstance(exception, DatasetteError): - status = exception.status - info = exception.error_dict - message = exception.message - if exception.message_is_html: - message = Markup(message) - title = exception.title - else: - status = 500 - info = {} - message = str(exception) - traceback.print_exc() - templates = [f"{status}.html", "error.html"] - info.update( - { - "ok": False, - "error": message, - "status": status, - "title": title, - } - ) - headers = {} - if self.ds.cors: - add_cors_headers(headers) - if request.path.split("?")[0].endswith(".json"): - await asgi_send_json(send, info, status=status, headers=headers) - else: - template = self.ds.jinja_env.select_template(templates) - await asgi_send_html( - send, - await template.render_async( - dict( - info, - urls=self.ds.urls, - app_css_hash=self.ds.app_css_hash(), - menu_links=lambda: [], - ) - ), - status=status, - headers=headers, - ) + assert responses, "Default exception handler should have returned something" + # Even if there are multiple responses use just the first one + response = responses[0] + await response.asgi_send(send) _cleaner_task_str_re = re.compile(r"\S*site-packages/") diff --git a/datasette/forbidden.py b/datasette/forbidden.py new file mode 100644 index 00000000..156a44d4 --- /dev/null +++ b/datasette/forbidden.py @@ -0,0 +1,20 @@ +from os import stat +from datasette import hookimpl, Response + + +@hookimpl(trylast=True) +def forbidden(datasette, request, message): + async def inner(): + return Response.html( + await datasette.render_template( + "error.html", + { + "title": "Forbidden", + "error": message, + }, + request=request, + ), + status=403, + ) + + return inner diff --git a/datasette/handle_exception.py b/datasette/handle_exception.py new file mode 100644 index 00000000..8b7e83e3 --- /dev/null +++ b/datasette/handle_exception.py @@ -0,0 +1,74 @@ +from datasette import hookimpl, Response +from .utils import await_me_maybe, add_cors_headers +from .utils.asgi import ( + Base400, + Forbidden, +) +from .views.base import DatasetteError +from markupsafe import Markup +import pdb +import traceback +from .plugins import pm + +try: + import rich +except ImportError: + rich = None + + +@hookimpl(trylast=True) +def handle_exception(datasette, request, exception): + async def inner(): + if datasette.pdb: + pdb.post_mortem(exception.__traceback__) + + if rich is not None: + rich.get_console().print_exception(show_locals=True) + + title = None + if isinstance(exception, Base400): + status = exception.status + info = {} + message = exception.args[0] + elif isinstance(exception, DatasetteError): + status = exception.status + info = exception.error_dict + message = exception.message + if exception.message_is_html: + message = Markup(message) + title = exception.title + else: + status = 500 + info = {} + message = str(exception) + traceback.print_exc() + templates = [f"{status}.html", "error.html"] + info.update( + { + "ok": False, + "error": message, + "status": status, + "title": title, + } + ) + headers = {} + if datasette.cors: + add_cors_headers(headers) + if request.path.split("?")[0].endswith(".json"): + return Response.json(info, status=status, headers=headers) + else: + template = datasette.jinja_env.select_template(templates) + return Response.html( + await template.render_async( + dict( + info, + urls=datasette.urls, + app_css_hash=datasette.app_css_hash(), + menu_links=lambda: [], + ) + ), + status=status, + headers=headers, + ) + + return inner diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index c84db0a3..a5fb536f 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -138,3 +138,8 @@ def database_actions(datasette, actor, database, request): @hookspec def skip_csrf(datasette, scope): """Mechanism for skipping CSRF checks for certain requests""" + + +@hookspec +def handle_exception(datasette, request, exception): + """Handle an uncaught exception. Can return a Response or None.""" diff --git a/datasette/plugins.py b/datasette/plugins.py index 76b46a47..fef0c8e9 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -15,6 +15,8 @@ DEFAULT_PLUGINS = ( "datasette.default_magic_parameters", "datasette.blob_renderer", "datasette.default_menu_links", + "datasette.handle_exception", + "datasette.forbidden", ) pm = pluggy.PluginManager("datasette") diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index f5c3ee83..6020a941 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -107,8 +107,8 @@ Extra template variables that should be made available in the rendered template ``view_name`` - string The name of the view being displayed. (``index``, ``database``, ``table``, and ``row`` are the most important ones.) -``request`` - object or None - The current HTTP :ref:`internals_request`. This can be ``None`` if the request object is not available. +``request`` - :ref:`internals_request` or None + The current HTTP request. This can be ``None`` if the request object is not available. ``datasette`` - :ref:`internals_datasette` You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)`` @@ -504,7 +504,7 @@ When a request is received, the ``"render"`` callback function is called with ze The table or view, if one is being rendered. ``request`` - :ref:`internals_request` - The incoming HTTP request. + The current HTTP request. ``view_name`` - string The name of the current view being called. ``index``, ``database``, ``table``, and ``row`` are the most important ones. @@ -599,8 +599,8 @@ The optional view function arguments are as follows: ``datasette`` - :ref:`internals_datasette` You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``, or to execute SQL queries. -``request`` - Request object - The current HTTP :ref:`internals_request`. +``request`` - :ref:`internals_request` + The current HTTP request. ``scope`` - dictionary The incoming ASGI scope dictionary. @@ -947,8 +947,8 @@ actor_from_request(datasette, request) ``datasette`` - :ref:`internals_datasette` You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``, or to execute SQL queries. -``request`` - object - The current HTTP :ref:`internals_request`. +``request`` - :ref:`internals_request` + The current HTTP request. This is part of Datasette's :ref:`authentication and permissions system `. The function should attempt to authenticate an actor (either a user or an API actor of some sort) based on information in the request. @@ -1010,8 +1010,8 @@ Example: `datasette-auth-tokens `__ and then renders a custom error page: + +.. code-block:: python + + from datasette import hookimpl, Response + import sentry_sdk + + + @hookimpl + def handle_exception(datasette, exception): + sentry_sdk.capture_exception(exception) + async def inner(): + return Response.html( + await datasette.render_template("custom_error.html", request=request) + ) + return inner + .. _plugin_hook_menu_links: menu_links(datasette, actor, request) @@ -1232,8 +1266,8 @@ menu_links(datasette, actor, request) ``actor`` - dictionary or None The currently authenticated :ref:`actor `. -``request`` - object or None - The current HTTP :ref:`internals_request`. This can be ``None`` if the request object is not available. +``request`` - :ref:`internals_request` + The current HTTP request. This can be ``None`` if the request object is not available. This hook allows additional items to be included in the menu displayed by Datasette's top right menu icon. @@ -1281,8 +1315,8 @@ table_actions(datasette, actor, database, table, request) ``table`` - string The name of the table. -``request`` - object - The current HTTP :ref:`internals_request`. This can be ``None`` if the request object is not available. +``request`` - :ref:`internals_request` + The current HTTP request. This can be ``None`` if the request object is not available. This hook allows table actions to be displayed in a menu accessed via an action icon at the top of the table page. It should return a list of ``{"href": "...", "label": "..."}`` menu items. @@ -1325,8 +1359,8 @@ database_actions(datasette, actor, database, request) ``database`` - string The name of the database. -``request`` - object - The current HTTP :ref:`internals_request`. +``request`` - :ref:`internals_request` + The current HTTP request. This hook is similar to :ref:`plugin_hook_table_actions` but populates an actions menu on the database page. diff --git a/tests/fixtures.py b/tests/fixtures.py index e0e4ec7b..c145ac78 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -68,6 +68,7 @@ EXPECTED_PLUGINS = [ "canned_queries", "extra_js_urls", "extra_template_vars", + "handle_exception", "menu_links", "permission_allowed", "register_routes", diff --git a/tests/plugins/my_plugin_2.py b/tests/plugins/my_plugin_2.py index f5ce36b3..4df02343 100644 --- a/tests/plugins/my_plugin_2.py +++ b/tests/plugins/my_plugin_2.py @@ -185,3 +185,21 @@ def register_routes(datasette): # Also serves to demonstrate over-ride of default paths: (r"/(?P[^/]+)/(?P[^/]+?$)", new_table), ] + + +@hookimpl +def handle_exception(datasette, request, exception): + datasette._exception_hook_fired = (request, exception) + if request.args.get("_custom_error"): + return Response.text("_custom_error") + elif request.args.get("_custom_error_async"): + + async def inner(): + return Response.text("_custom_error_async") + + return inner + + +@hookimpl(specname="register_routes") +def register_triger_error(): + return ((r"/trigger-error", lambda: 1 / 0),) diff --git a/tests/test_permissions.py b/tests/test_permissions.py index f4169dbe..2a519e76 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -332,6 +332,7 @@ def test_permissions_debug(app_client): assert checks == [ {"action": "permissions-debug", "result": True, "used_default": False}, {"action": "view-instance", "result": None, "used_default": True}, + {"action": "debug-menu", "result": False, "used_default": True}, {"action": "permissions-debug", "result": False, "used_default": True}, {"action": "view-instance", "result": None, "used_default": True}, ] diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 4a7ad7c6..948a40b8 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -824,6 +824,20 @@ def test_hook_forbidden(restore_working_directory): assert "view-database" == client.ds._last_forbidden_message +def test_hook_handle_exception(app_client): + app_client.get("/trigger-error?x=123") + assert hasattr(app_client.ds, "_exception_hook_fired") + request, exception = app_client.ds._exception_hook_fired + assert request.url == "http://localhost/trigger-error?x=123" + assert isinstance(exception, ZeroDivisionError) + + +@pytest.mark.parametrize("param", ("_custom_error", "_custom_error_async")) +def test_hook_handle_exception_custom_response(app_client, param): + response = app_client.get("/trigger-error?{}=1".format(param)) + assert response.text == param + + def test_hook_menu_links(app_client): def get_menu_links(html): soup = Soup(html, "html.parser") From 58fd1e33ec7ac5ed85431d5c86d60600cd5280fb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 17 Jul 2022 16:30:58 -0700 Subject: [PATCH 0127/1218] Hint that you can render templates for these hooks, refs #1770 --- docs/plugin_hooks.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 6020a941..b4869606 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1176,7 +1176,7 @@ forbidden(datasette, request, message) -------------------------------------- ``datasette`` - :ref:`internals_datasette` - You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``, or to execute SQL queries. + You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``, or to render templates or execute SQL queries. ``request`` - :ref:`internals_request` The current HTTP request. @@ -1224,7 +1224,7 @@ handle_exception(datasette, request, exception) ----------------------------------------------- ``datasette`` - :ref:`internals_datasette` - You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``, or to execute SQL queries. + You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``, or to render templates or execute SQL queries. ``request`` - :ref:`internals_request` The current HTTP request. From e543a095cc4c1ca895b082cfd1263ca25203a7c0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 17 Jul 2022 17:57:41 -0700 Subject: [PATCH 0128/1218] Updated default plugins in docs, refs #1770 --- docs/plugins.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/plugins.rst b/docs/plugins.rst index f2ed02f7..29078054 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -172,6 +172,24 @@ If you run ``datasette plugins --all`` it will include default plugins that ship "filters_from_request" ] }, + { + "name": "datasette.forbidden", + "static": false, + "templates": false, + "version": null, + "hooks": [ + "forbidden" + ] + }, + { + "name": "datasette.handle_exception", + "static": false, + "templates": false, + "version": null, + "hooks": [ + "handle_exception" + ] + }, { "name": "datasette.publish.cloudrun", "static": false, From 6d5e1955470424cf4faf5d35788d328ebdd6d463 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 17 Jul 2022 17:59:20 -0700 Subject: [PATCH 0129/1218] Release 0.62a1 Refs #1300, #1739, #1744, #1746, #1748, #1759, #1770 --- datasette/version.py | 2 +- docs/changelog.rst | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index cf18c441..86f4cf7e 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.62a0" +__version__ = "0.62a1" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 74814fcb..3f105811 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,20 @@ Changelog ========= +.. _v0_62a1: + +0.62a1 (2022-07-17) +------------------- + +- New plugin hook: :ref:`handle_exception() `, for custom handling of exceptions caught by Datasette. (:issue:`1770`) +- The :ref:`render_cell() ` plugin hook is now also passed a ``row`` argument, representing the ``sqlite3.Row`` object that is being rendered. (:issue:`1300`) +- New ``--nolock`` option for ignoring file locks when opening read-only databases. (:issue:`1744`) +- Documentation now uses the `Furo `__ Sphinx theme. (:issue:`1746`) +- Datasette now has a `Discord community `__. +- Database file downloads now implement conditional GET using ETags. (:issue:`1739`) +- Examples in the documentation now include a copy-to-clipboard button. (:issue:`1748`) +- HTML for facet results and suggested results has been extracted out into new templates ``_facet_results.html`` and ``_suggested_facets.html``. Thanks, M. Nasimul Haque. (`#1759 `__) + .. _v0_62a0: 0.62a0 (2022-05-02) From ed1ebc0f1d4153e3e0934f2af19f82e5fdf137d3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 17 Jul 2022 18:03:33 -0700 Subject: [PATCH 0130/1218] Run blacken-docs, refs #1770 --- docs/plugin_hooks.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index b4869606..aec1df56 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1213,7 +1213,9 @@ The function can alternatively return an awaitable function if it needs to make def forbidden(datasette): async def inner(): return Response.html( - await datasette.render_template("render_message.html", request=request) + await datasette.render_template( + "render_message.html", request=request + ) ) return inner @@ -1249,10 +1251,14 @@ This example logs an error to `Sentry `__ and then renders a @hookimpl def handle_exception(datasette, exception): sentry_sdk.capture_exception(exception) + async def inner(): return Response.html( - await datasette.render_template("custom_error.html", request=request) + await datasette.render_template( + "custom_error.html", request=request + ) ) + return inner .. _plugin_hook_menu_links: From ea6161f8475d9fa41c4879049511c58f692cce04 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 17 Jul 2022 18:06:26 -0700 Subject: [PATCH 0131/1218] Bump furo from 2022.4.7 to 2022.6.21 (#1760) Bumps [furo](https://github.com/pradyunsg/furo) from 2022.4.7 to 2022.6.21. - [Release notes](https://github.com/pradyunsg/furo/releases) - [Changelog](https://github.com/pradyunsg/furo/blob/main/docs/changelog.md) - [Commits](https://github.com/pradyunsg/furo/compare/2022.04.07...2022.06.21) --- updated-dependencies: - dependency-name: furo dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 29cb77bf..558b5c87 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ setup( setup_requires=["pytest-runner"], extras_require={ "docs": [ - "furo==2022.4.7", + "furo==2022.6.21", "sphinx-autobuild", "codespell", "blacken-docs", From 22354c48ce4d514d7a1b321e5651c7f1340e3f5e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 17 Jul 2022 18:06:37 -0700 Subject: [PATCH 0132/1218] Update pytest-asyncio requirement from <0.19,>=0.17 to >=0.17,<0.20 (#1769) Updates the requirements on [pytest-asyncio](https://github.com/pytest-dev/pytest-asyncio) to permit the latest version. - [Release notes](https://github.com/pytest-dev/pytest-asyncio/releases) - [Changelog](https://github.com/pytest-dev/pytest-asyncio/blob/master/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest-asyncio/compare/v0.17.0...v0.19.0) --- updated-dependencies: - dependency-name: pytest-asyncio dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 558b5c87..a1c51d0b 100644 --- a/setup.py +++ b/setup.py @@ -74,7 +74,7 @@ setup( "test": [ "pytest>=5.2.2,<7.2.0", "pytest-xdist>=2.2.1,<2.6", - "pytest-asyncio>=0.17,<0.19", + "pytest-asyncio>=0.17,<0.20", "beautifulsoup4>=4.8.1,<4.12.0", "black==22.6.0", "blacken-docs==1.12.1", From 01369176b0a8943ab45292ffc6f9c929b80a00e8 Mon Sep 17 00:00:00 2001 From: Chris Amico Date: Sun, 17 Jul 2022 21:12:45 -0400 Subject: [PATCH 0133/1218] Keep track of datasette.config_dir (#1766) Thanks, @eyeseast - closes #1764 --- datasette/app.py | 1 + tests/test_config_dir.py | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/datasette/app.py b/datasette/app.py index edd05bb3..1a9afc10 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -211,6 +211,7 @@ class Datasette: assert config_dir is None or isinstance( config_dir, Path ), "config_dir= should be a pathlib.Path" + self.config_dir = config_dir self.pdb = pdb self._secret = secret or secrets.token_hex(32) self.files = tuple(files or []) + tuple(immutables or []) diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py index 015c6ace..fe927c42 100644 --- a/tests/test_config_dir.py +++ b/tests/test_config_dir.py @@ -1,4 +1,5 @@ import json +import pathlib import pytest from datasette.app import Datasette @@ -150,3 +151,11 @@ def test_metadata_yaml(tmp_path_factory, filename): response = client.get("/-/metadata.json") assert 200 == response.status assert {"title": "Title from metadata"} == response.json + + +def test_store_config_dir(config_dir_client): + ds = config_dir_client.ds + + assert hasattr(ds, "config_dir") + assert ds.config_dir is not None + assert isinstance(ds.config_dir, pathlib.Path) From 7af67b54b7d9bca43e948510fc62f6db2b748fa8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 18 Jul 2022 14:31:09 -0700 Subject: [PATCH 0134/1218] How to register temporary plugins in tests, closes #903 --- docs/testing_plugins.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index 41046bfb..d02003a9 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -219,3 +219,39 @@ Here's a test for that plugin that mocks the HTTPX outbound request: assert ( outbound_request.url == "https://www.example.com/" ) + +.. _testing_plugins_register_in_test: + +Registering a plugin for the duration of a test +----------------------------------------------- + +When writing tests for plugins you may find it useful to register a test plugin just for the duration of a single test. You can do this using ``pm.register()`` and ``pm.unregister()`` like this: + +.. code-block:: python + + from datasette import hookimpl + from datasette.app import Datasette + from datasette.plugins import pm + import pytest + + + @pytest.mark.asyncio + async def test_using_test_plugin(): + class TestPlugin: + __name__ = "TestPlugin" + + # Use hookimpl and method names to register hooks + @hookimpl + def register_routes(self): + return [ + (r"^/error$", lambda: 1/0), + ] + + pm.register(TestPlugin(), name="undo") + try: + # The test implementation goes here + datasette = Datasette() + response = await datasette.client.get("/error") + assert response.status_code == 500 + finally: + pm.unregister(name="undo") From bca2d95d0228f80a108e13408f8e72b2c06c2c7b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Aug 2022 16:38:02 -0700 Subject: [PATCH 0135/1218] Configure readthedocs/readthedocs-preview --- .github/workflows/documentation-links.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .github/workflows/documentation-links.yml diff --git a/.github/workflows/documentation-links.yml b/.github/workflows/documentation-links.yml new file mode 100644 index 00000000..e7062a46 --- /dev/null +++ b/.github/workflows/documentation-links.yml @@ -0,0 +1,16 @@ +name: Read the Docs Pull Request Preview +on: + pull_request_target: + types: + - opened + +permissions: + pull-requests: write + +jobs: + documentation-links: + runs-on: ubuntu-latest + steps: + - uses: readthedocs/readthedocs-preview@main + with: + project-slug: "datasette" From 8cfc72336878dd846d149658e99cc598e835b661 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 9 Aug 2022 11:21:53 -0700 Subject: [PATCH 0136/1218] Ran blacken-docs --- docs/testing_plugins.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index d02003a9..992b4b0e 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -244,7 +244,7 @@ When writing tests for plugins you may find it useful to register a test plugin @hookimpl def register_routes(self): return [ - (r"^/error$", lambda: 1/0), + (r"^/error$", lambda: 1 / 0), ] pm.register(TestPlugin(), name="undo") From 05d9c682689a0f1d23cbb502e027364ab3363910 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 08:16:53 -0700 Subject: [PATCH 0137/1218] Promote Discord more in the README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 032180aa..7ebbca57 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Datasette is aimed at data journalists, museum curators, archivists, local gover * Comprehensive documentation:ย https://docs.datasette.io/ * Examples: https://datasette.io/examples * Live demo of current main: https://latest.datasette.io/ -* Support questions, feedback? Join our [GitHub Discussions forum](https://github.com/simonw/datasette/discussions) +* Questions, feedback or want to talk about the project? Join our [Discord](https://discord.gg/ktd74dm5mw) Want to stay up-to-date with the project? Subscribe to the [Datasette newsletter](https://datasette.substack.com/) for tips, tricks and news on what's new in the Datasette ecosystem. From db00c00f6397287749331e8042fe998ee7f3b919 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 08:19:30 -0700 Subject: [PATCH 0138/1218] Promote Datasette Lite in the README, refs #1781 --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7ebbca57..1af20129 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Datasette is aimed at data journalists, museum curators, archivists, local gover * Latest [Datasette News](https://datasette.io/news) * Comprehensive documentation:ย https://docs.datasette.io/ * Examples: https://datasette.io/examples -* Live demo of current main: https://latest.datasette.io/ +* Live demo of current `main` branch: https://latest.datasette.io/ * Questions, feedback or want to talk about the project? Join our [Discord](https://discord.gg/ktd74dm5mw) Want to stay up-to-date with the project? Subscribe to the [Datasette newsletter](https://datasette.substack.com/) for tips, tricks and news on what's new in the Datasette ecosystem. @@ -85,3 +85,7 @@ Or: This will create a docker image containing both the datasette application and the specified SQLite database files. It will then deploy that image to Heroku or Cloud Run and give you a URL to access the resulting website and API. See [Publishing data](https://docs.datasette.io/en/stable/publish.html) in the documentation for more details. + +## Datasette Lite + +[Datasette Lite](https://lite.datasette.io/) is Datasette packaged using WebAssembly so that it runs entirely in your browser, no Python web application server required. Read more about that in the [Datasette Lite documentation](https://github.com/simonw/datasette-lite/blob/main/README.md). From 8eb699de7becdefc6d72555d9fb17c9f06235dc4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 08:24:39 -0700 Subject: [PATCH 0139/1218] Datasette Lite in Getting Started docs, closes #1781 --- docs/getting_started.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 00b753a9..571540cf 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -21,6 +21,17 @@ Datasette has several `tutorials `__ to help you - `Exploring a database with Datasette `__ shows how to use the Datasette web interface to explore a new database. - `Learn SQL with Datasette `__ introduces SQL, and shows how to use that query language to ask questions of your data. +.. _getting_started_datasette_lite: + +Datasette in your browser with Datasette Lite +--------------------------------------------- + +`Datasette Lite `__ is Datasette packaged using WebAssembly so that it runs entirely in your browser, no Python web application server required. + +You can pass a URL to a CSV, SQLite or raw SQL file directly to Datasette Lite to explore that data in your browser. + +This `example link `__ opens Datasette Lite and loads the SQL Murder Mystery example database from `Northwestern University Knight Lab `__. + .. _getting_started_glitch: Try Datasette without installing anything using Glitch From df4fd2d7ddca8956d8a51c72ce007b8c75227f32 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 08:44:02 -0700 Subject: [PATCH 0140/1218] _sort= works even if sort column not selected, closes #1773 --- datasette/views/table.py | 22 +++++++++++++++++++++- tests/test_table_api.py | 2 ++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index cd4be823..94d2673b 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -630,7 +630,27 @@ class TableView(DataView): next_value = path_from_row_pks(rows[-2], pks, use_rowid) # If there's a sort or sort_desc, add that value as a prefix if (sort or sort_desc) and not is_view: - prefix = rows[-2][sort or sort_desc] + try: + prefix = rows[-2][sort or sort_desc] + except IndexError: + # sort/sort_desc column missing from SELECT - look up value by PK instead + prefix_where_clause = " and ".join( + "[{}] = :pk{}".format(pk, i) for i, pk in enumerate(pks) + ) + prefix_lookup_sql = "select [{}] from [{}] where {}".format( + sort or sort_desc, table_name, prefix_where_clause + ) + prefix = ( + await db.execute( + prefix_lookup_sql, + { + **{ + "pk{}".format(i): rows[-2][pk] + for i, pk in enumerate(pks) + } + }, + ) + ).single_value() if isinstance(prefix, dict) and "value" in prefix: prefix = prefix["value"] if prefix is None: diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 9db383c3..e56a72b5 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -288,6 +288,8 @@ def test_paginate_compound_keys_with_extra_filters(app_client): ), # text column contains '$null' - ensure it doesn't confuse pagination: ("_sort=text", lambda row: row["text"], "sorted by text"), + # Still works if sort column removed using _col= + ("_sort=text&_col=content", lambda row: row["text"], "sorted by text"), ], ) def test_sortable(app_client, query_string, sort_key, human_description_en): From 668415df9f6334bd255c22ab02018bed5bc14edd Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 08:47:17 -0700 Subject: [PATCH 0141/1218] Upgrade Docker baes to 3.10.6-slim-bullseye - refs #1768 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 42f5529b..ee7ed957 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9.7-slim-bullseye as build +FROM python:3.10.6-slim-bullseye as build # Version of Datasette to install, e.g. 0.55 # docker build . -t datasette --build-arg VERSION=0.55 From 080d4b3e065d78faf977c6ded6ead31aae24e2ae Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 08:49:14 -0700 Subject: [PATCH 0142/1218] Switch to python:3.10.6-slim-bullseye for datasette publish - refs #1768 --- datasette/utils/__init__.py | 2 +- demos/apache-proxy/Dockerfile | 2 +- docs/publish.rst | 2 +- tests/test_package.py | 2 +- tests/test_publish_cloudrun.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 77768112..d148cc2c 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -390,7 +390,7 @@ def make_dockerfile( "SQLITE_EXTENSIONS" ] = "/usr/lib/x86_64-linux-gnu/mod_spatialite.so" return """ -FROM python:3.8 +FROM python:3.10.6-slim-bullseye COPY . /app WORKDIR /app {apt_get_extras} diff --git a/demos/apache-proxy/Dockerfile b/demos/apache-proxy/Dockerfile index 6c921963..70b33bec 100644 --- a/demos/apache-proxy/Dockerfile +++ b/demos/apache-proxy/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9.7-slim-bullseye +FROM python:3.10.6-slim-bullseye RUN apt-get update && \ apt-get install -y apache2 supervisor && \ diff --git a/docs/publish.rst b/docs/publish.rst index 166f2883..9c7c99cc 100644 --- a/docs/publish.rst +++ b/docs/publish.rst @@ -144,7 +144,7 @@ Here's example output for the package command:: $ datasette package parlgov.db --extra-options="--setting sql_time_limit_ms 2500" Sending build context to Docker daemon 4.459MB - Step 1/7 : FROM python:3 + Step 1/7 : FROM python:3.10.6-slim-bullseye ---> 79e1dc9af1c1 Step 2/7 : COPY . /app ---> Using cache diff --git a/tests/test_package.py b/tests/test_package.py index 02ed1775..ac15e61e 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -12,7 +12,7 @@ class CaptureDockerfile: EXPECTED_DOCKERFILE = """ -FROM python:3.8 +FROM python:3.10.6-slim-bullseye COPY . /app WORKDIR /app diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index 3427f4f7..60079ab3 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -223,7 +223,7 @@ def test_publish_cloudrun_plugin_secrets( ) expected = textwrap.dedent( r""" - FROM python:3.8 + FROM python:3.10.6-slim-bullseye COPY . /app WORKDIR /app @@ -290,7 +290,7 @@ def test_publish_cloudrun_apt_get_install( ) expected = textwrap.dedent( r""" - FROM python:3.8 + FROM python:3.10.6-slim-bullseye COPY . /app WORKDIR /app From 1563c22a8c65e6cff5194aa07df54d0ab8d4eecb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 09:13:12 -0700 Subject: [PATCH 0143/1218] Don't duplicate _sort_desc, refs #1738 --- datasette/views/table.py | 2 +- tests/test_table_html.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 94d2673b..49c30c9c 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -710,7 +710,7 @@ class TableView(DataView): for key in request.args: if ( key.startswith("_") - and key not in ("_sort", "_search", "_next") + and key not in ("_sort", "_sort_desc", "_search", "_next") and "__" not in key ): for value in request.args.getlist(key): diff --git a/tests/test_table_html.py b/tests/test_table_html.py index d3cb3e17..f3808ea3 100644 --- a/tests/test_table_html.py +++ b/tests/test_table_html.py @@ -828,6 +828,7 @@ def test_other_hidden_form_fields(app_client, path, expected_hidden): [ ("/fixtures/searchable?_search=terry", []), ("/fixtures/searchable?_sort=text2", []), + ("/fixtures/searchable?_sort_desc=text2", []), ("/fixtures/searchable?_sort=text2&_where=1", [("_where", "1")]), ], ) From c1396bf86033a7bd99fa0c0431f585475391a11a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 09:34:31 -0700 Subject: [PATCH 0144/1218] Don't allow canned write queries on immutable DBs, closes #1728 --- datasette/templates/query.html | 6 ++++- datasette/views/database.py | 4 ++++ tests/test_canned_queries.py | 40 ++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 8c920527..cee779fc 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -28,6 +28,10 @@ {% block content %} +{% if canned_write and db_is_immutable %} +

This query cannot be executed because the database is immutable.

+{% endif %} +

{{ metadata.title or database }}{% if canned_query and not metadata.title %}: {{ canned_query }}{% endif %}{% if private %} ๐Ÿ”’{% endif %}

{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} @@ -61,7 +65,7 @@

{% if not hide_sql %}{% endif %} {% if canned_write %}{% endif %} - + {{ show_hide_hidden }} {% if canned_query and edit_sql_url %}Edit SQL{% endif %}

diff --git a/datasette/views/database.py b/datasette/views/database.py index 42058752..77632b9d 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -273,6 +273,9 @@ class QueryView(DataView): # Execute query - as write or as read if write: if request.method == "POST": + # If database is immutable, return an error + if not db.is_mutable: + raise Forbidden("Database is immutable") body = await request.post_body() body = body.decode("utf-8").strip() if body.startswith("{") and body.endswith("}"): @@ -326,6 +329,7 @@ class QueryView(DataView): async def extra_template(): return { "request": request, + "db_is_immutable": not db.is_mutable, "path_with_added_args": path_with_added_args, "path_with_removed_args": path_with_removed_args, "named_parameter_values": named_parameter_values, diff --git a/tests/test_canned_queries.py b/tests/test_canned_queries.py index 5abffdcc..976aa0db 100644 --- a/tests/test_canned_queries.py +++ b/tests/test_canned_queries.py @@ -53,6 +53,26 @@ def canned_write_client(tmpdir): yield client +@pytest.fixture +def canned_write_immutable_client(): + with make_app_client( + is_immutable=True, + metadata={ + "databases": { + "fixtures": { + "queries": { + "add": { + "sql": "insert into sortable (text) values (:text)", + "write": True, + }, + } + } + } + }, + ) as client: + yield client + + def test_canned_query_with_named_parameter(app_client): response = app_client.get("/fixtures/neighborhood_search.json?text=town") assert [ @@ -373,3 +393,23 @@ def test_canned_write_custom_template(canned_write_client): response.headers["link"] == 'http://localhost/data/update_name.json; rel="alternate"; type="application/json+datasette"' ) + + +def test_canned_write_query_disabled_for_immutable_database( + canned_write_immutable_client, +): + response = canned_write_immutable_client.get("/fixtures/add") + assert response.status == 200 + assert ( + "This query cannot be executed because the database is immutable." + in response.text + ) + assert '' in response.text + # Submitting form should get a forbidden error + response = canned_write_immutable_client.post( + "/fixtures/add", + {"text": "text"}, + csrftoken_from=True, + ) + assert response.status == 403 + assert "Database is immutable" in response.text From 82167105ee699c850cc106ea927de1ad09276cfe Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 10:07:30 -0700 Subject: [PATCH 0145/1218] --min-instances and --max-instances Cloud Run publish options, closes #1779 --- datasette/publish/cloudrun.py | 26 +++++++++++++++++--- docs/cli-reference.rst | 2 ++ tests/test_publish_cloudrun.py | 43 ++++++++++++++++++++++++---------- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/datasette/publish/cloudrun.py b/datasette/publish/cloudrun.py index 50b2b2fd..77274eb0 100644 --- a/datasette/publish/cloudrun.py +++ b/datasette/publish/cloudrun.py @@ -52,6 +52,16 @@ def publish_subcommand(publish): multiple=True, help="Additional packages to apt-get install", ) + @click.option( + "--max-instances", + type=int, + help="Maximum Cloud Run instances", + ) + @click.option( + "--min-instances", + type=int, + help="Minimum Cloud Run instances", + ) def cloudrun( files, metadata, @@ -79,6 +89,8 @@ def publish_subcommand(publish): cpu, timeout, apt_get_extras, + max_instances, + min_instances, ): "Publish databases to Datasette running on Cloud Run" fail_if_publish_binary_not_installed( @@ -168,12 +180,20 @@ def publish_subcommand(publish): ), shell=True, ) + extra_deploy_options = [] + for option, value in ( + ("--memory", memory), + ("--cpu", cpu), + ("--max-instances", max_instances), + ("--min-instances", min_instances), + ): + if value: + extra_deploy_options.append("{} {}".format(option, value)) check_call( - "gcloud run deploy --allow-unauthenticated --platform=managed --image {} {}{}{}".format( + "gcloud run deploy --allow-unauthenticated --platform=managed --image {} {}{}".format( image_id, service, - " --memory {}".format(memory) if memory else "", - " --cpu {}".format(cpu) if cpu else "", + " " + " ".join(extra_deploy_options) if extra_deploy_options else "", ), shell=True, ) diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 1c1aff15..415af13c 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -251,6 +251,8 @@ datasette publish cloudrun --help --cpu [1|2|4] Number of vCPUs to allocate in Cloud Run --timeout INTEGER Build timeout in seconds --apt-get-install TEXT Additional packages to apt-get install + --max-instances INTEGER Maximum Cloud Run instances + --min-instances INTEGER Minimum Cloud Run instances --help Show this message and exit. diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index 60079ab3..e64534d2 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -105,19 +105,36 @@ def test_publish_cloudrun(mock_call, mock_output, mock_which, tmp_path_factory): @mock.patch("datasette.publish.cloudrun.check_output") @mock.patch("datasette.publish.cloudrun.check_call") @pytest.mark.parametrize( - "memory,cpu,timeout,expected_gcloud_args", + "memory,cpu,timeout,min_instances,max_instances,expected_gcloud_args", [ - ["1Gi", None, None, "--memory 1Gi"], - ["2G", None, None, "--memory 2G"], - ["256Mi", None, None, "--memory 256Mi"], - ["4", None, None, None], - ["GB", None, None, None], - [None, 1, None, "--cpu 1"], - [None, 2, None, "--cpu 2"], - [None, 3, None, None], - [None, 4, None, "--cpu 4"], - ["2G", 4, None, "--memory 2G --cpu 4"], - [None, None, 1800, "--timeout 1800"], + ["1Gi", None, None, None, None, "--memory 1Gi"], + ["2G", None, None, None, None, "--memory 2G"], + ["256Mi", None, None, None, None, "--memory 256Mi"], + [ + "4", + None, + None, + None, + None, + None, + ], + [ + "GB", + None, + None, + None, + None, + None, + ], + [None, 1, None, None, None, "--cpu 1"], + [None, 2, None, None, None, "--cpu 2"], + [None, 3, None, None, None, None], + [None, 4, None, None, None, "--cpu 4"], + ["2G", 4, None, None, None, "--memory 2G --cpu 4"], + [None, None, 1800, None, None, "--timeout 1800"], + [None, None, None, 2, None, "--min-instances 2"], + [None, None, None, 2, 4, "--min-instances 2 --max-instances 4"], + [None, 2, None, None, 4, "--cpu 2 --max-instances 4"], ], ) def test_publish_cloudrun_memory_cpu( @@ -127,6 +144,8 @@ def test_publish_cloudrun_memory_cpu( memory, cpu, timeout, + min_instances, + max_instances, expected_gcloud_args, tmp_path_factory, ): From 5e6c5c9e3191a80f17a91c5205d9d69efdebb73f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 10:18:47 -0700 Subject: [PATCH 0146/1218] Document datasette.config_dir, refs #1766 --- docs/internals.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/internals.rst b/docs/internals.rst index da135282..20797e98 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -260,6 +260,7 @@ Constructor parameters include: - ``files=[...]`` - a list of database files to open - ``immutables=[...]`` - a list of database files to open in immutable mode - ``metadata={...}`` - a dictionary of :ref:`metadata` +- ``config_dir=...`` - the :ref:`configuration directory ` to use, stored in ``datasette.config_dir`` .. _datasette_databases: From 815162cf029fab9f1c9308c1d6ecdba7ee369ebe Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 10:32:42 -0700 Subject: [PATCH 0147/1218] Release 0.62 Refs #903, #1300, #1683, #1701, #1712, #1717, #1718, #1728, #1733, #1738, #1739, #1744, #1746, #1748, #1759, #1766, #1768, #1770, #1773, #1779 Closes #1782 --- datasette/version.py | 2 +- docs/changelog.rst | 53 ++++++++++++++++++++++++++++++-------------- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/datasette/version.py b/datasette/version.py index 86f4cf7e..0453346c 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.62a1" +__version__ = "0.62" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 3f105811..1225c63f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,33 +4,52 @@ Changelog ========= -.. _v0_62a1: +.. _v0_62: -0.62a1 (2022-07-17) +0.62 (2022-08-14) ------------------- +Datasette can now run entirely in your browser using WebAssembly. Try out `Datasette Lite `__, take a look `at the code `__ or read more about it in `Datasette Lite: a server-side Python web application running in a browser `__. + +Datasette now has a `Discord community `__ for questions and discussions about Datasette and its ecosystem of projects. + +Features +~~~~~~~~ + +- Datasette is now compatible with `Pyodide `__. This is the enabling technology behind `Datasette Lite `__. (:issue:`1733`) +- Database file downloads now implement conditional GET using ETags. (:issue:`1739`) +- HTML for facet results and suggested results has been extracted out into new templates ``_facet_results.html`` and ``_suggested_facets.html``. Thanks, M. Nasimul Haque. (`#1759 `__) +- Datasette now runs some SQL queries in parallel. This has limited impact on performance, see `this research issue `__ for details. +- New ``--nolock`` option for ignoring file locks when opening read-only databases. (:issue:`1744`) +- Spaces in the database names in URLs are now encoded as ``+`` rather than ``~20``. (:issue:`1701`) +- ```` is now displayed as ```` and is accompanied by tooltip showing "2.3MB". (:issue:`1712`) +- The base Docker image used by ``datasette publish cloudrun``, ``datasette package`` and the `official Datasette image `__ has been upgraded to ``3.10.6-slim-bullseye``. (:issue:`1768`) +- Canned writable queries against immutable databases now show a warning message. (:issue:`1728`) +- ``datasette publish cloudrun`` has a new ``--timeout`` option which can be used to increase the time limit applied by the Google Cloud build environment. Thanks, Tim Sherratt. (`#1717 `__) +- ``datasette publish cloudrun`` has new ``--min-instances`` and ``--max-instances`` options. (:issue:`1779`) + +Plugin hooks +~~~~~~~~~~~~ + - New plugin hook: :ref:`handle_exception() `, for custom handling of exceptions caught by Datasette. (:issue:`1770`) - The :ref:`render_cell() ` plugin hook is now also passed a ``row`` argument, representing the ``sqlite3.Row`` object that is being rendered. (:issue:`1300`) -- New ``--nolock`` option for ignoring file locks when opening read-only databases. (:issue:`1744`) -- Documentation now uses the `Furo `__ Sphinx theme. (:issue:`1746`) -- Datasette now has a `Discord community `__. -- Database file downloads now implement conditional GET using ETags. (:issue:`1739`) -- Examples in the documentation now include a copy-to-clipboard button. (:issue:`1748`) -- HTML for facet results and suggested results has been extracted out into new templates ``_facet_results.html`` and ``_suggested_facets.html``. Thanks, M. Nasimul Haque. (`#1759 `__) +- The :ref:`configuration directory ` is now stored in ``datasette.config_dir``, making it available to plugins. Thanks, Chris Amico. (`#1766 `__) -.. _v0_62a0: +Bug fixes +~~~~~~~~~ -0.62a0 (2022-05-02) -------------------- - -- Datasette now runs some SQL queries in parallel. This has limited impact on performance, see `this research issue `__ for details. -- Datasette should now be compatible with Pyodide. (:issue:`1733`) -- ``datasette publish cloudrun`` has a new ``--timeout`` option which can be used to increase the time limit applied by the Google Cloud build environment. Thanks, Tim Sherratt. (`#1717 `__) -- Spaces in database names are now encoded as ``+`` rather than ``~20``. (:issue:`1701`) -- ```` is now displayed as ```` and is accompanied by tooltip showing "2.3MB". (:issue:`1712`) - Don't show the facet option in the cog menu if faceting is not allowed. (:issue:`1683`) +- ``?_sort`` and ``?_sort_desc`` now work if the column that is being sorted has been excluded from the query using ``?_col=`` or ``?_nocol=``. (:issue:`1773`) +- Fixed bug where ``?_sort_desc`` was duplicated in the URL every time the Apply button was clicked. (:issue:`1738`) + +Documentation +~~~~~~~~~~~~~ + +- Examples in the documentation now include a copy-to-clipboard button. (:issue:`1748`) +- Documentation now uses the `Furo `__ Sphinx theme. (:issue:`1746`) - Code examples in the documentation are now all formatted using Black. (:issue:`1718`) - ``Request.fake()`` method is now documented, see :ref:`internals_request`. +- New documentation for plugin authors: :ref:`testing_plugins_register_in_test`. (:issue:`903`) .. _v0_61_1: From a107e3a028923c1ab3911c0f880011283f93f368 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Aug 2022 16:07:46 -0700 Subject: [PATCH 0148/1218] datasette-sentry is an example of handle_exception --- docs/plugin_hooks.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index aec1df56..c6f35d06 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1261,6 +1261,8 @@ This example logs an error to `Sentry `__ and then renders a return inner +Example: `datasette-sentry `_ + .. _plugin_hook_menu_links: menu_links(datasette, actor, request) From 481eb96d85291cdfa5767a83884a1525dfc382d8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 15 Aug 2022 13:17:28 -0700 Subject: [PATCH 0149/1218] https://datasette.io/tutorials/clean-data tutorial Refs #1783 --- docs/getting_started.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 571540cf..a9eaa404 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -20,6 +20,7 @@ Datasette has several `tutorials `__ to help you - `Exploring a database with Datasette `__ shows how to use the Datasette web interface to explore a new database. - `Learn SQL with Datasette `__ introduces SQL, and shows how to use that query language to ask questions of your data. +- `Cleaning data with sqlite-utils and Datasette `__ guides you through using `sqlite-utils `__ to turn a CSV file into a database that you can explore using Datasette. .. _getting_started_datasette_lite: From a3e6f1b16757fb2d39e7ddba4e09eda2362508bf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 18 Aug 2022 09:06:02 -0700 Subject: [PATCH 0150/1218] Increase height of non-JS textarea to fit query Closes #1786 --- datasette/templates/query.html | 3 ++- tests/test_html.py | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/datasette/templates/query.html b/datasette/templates/query.html index cee779fc..a35e3afe 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -45,7 +45,8 @@ {% endif %} {% if not hide_sql %} {% if editable and allow_execute_sql %} -

+

{% else %}
{% if query %}{{ query.sql }}{% endif %}
{% endif %} diff --git a/tests/test_html.py b/tests/test_html.py index 409fec68..be21bd84 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -695,10 +695,8 @@ def test_query_error(app_client): response = app_client.get("/fixtures?sql=select+*+from+notatable") html = response.text assert '

no such table: notatable

' in html - assert ( - '' - in html - ) + assert '" in html assert "0 results" not in html From 09a41662e70b788469157bb58ed9ca4acdf2f904 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 18 Aug 2022 09:10:48 -0700 Subject: [PATCH 0151/1218] Fix typo --- docs/plugin_hooks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index c6f35d06..30bd75b7 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -874,7 +874,7 @@ canned_queries(datasette, database, actor) ``actor`` - dictionary or None The currently authenticated :ref:`actor `. -Ues this hook to return a dictionary of additional :ref:`canned query ` definitions for the specified database. The return value should be the same shape as the JSON described in the :ref:`canned query ` documentation. +Use this hook to return a dictionary of additional :ref:`canned query ` definitions for the specified database. The return value should be the same shape as the JSON described in the :ref:`canned query ` documentation. .. code-block:: python From 6c0ba7c00c2ae3ecbb5309efa59079cea1c850b3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 18 Aug 2022 14:52:04 -0700 Subject: [PATCH 0152/1218] Improved CLI reference documentation, refs #1787 --- datasette/cli.py | 2 +- docs/changelog.rst | 2 +- docs/cli-reference.rst | 325 ++++++++++++++++++++++++++++++--------- docs/getting_started.rst | 50 ------ docs/index.rst | 2 +- docs/publish.rst | 2 + 6 files changed, 259 insertions(+), 124 deletions(-) diff --git a/datasette/cli.py b/datasette/cli.py index 8781747c..f2a03d53 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -282,7 +282,7 @@ def package( port, **extra_metadata, ): - """Package specified SQLite files into a new datasette Docker container""" + """Package SQLite files into a Datasette Docker container""" if not shutil.which("docker"): click.secho( ' The package command requires "docker" to be installed and configured ', diff --git a/docs/changelog.rst b/docs/changelog.rst index 1225c63f..f9dcc980 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -621,7 +621,7 @@ See also `Datasette 0.49: The annotated release notes `__ for conversations about the project that go beyond just bug reports and issues. - Datasette can now be installed on macOS using Homebrew! Run ``brew install simonw/datasette/datasette``. See :ref:`installation_homebrew`. (:issue:`335`) - Two new commands: ``datasette install name-of-plugin`` and ``datasette uninstall name-of-plugin``. These are equivalent to ``pip install`` and ``pip uninstall`` but automatically run in the same virtual environment as Datasette, so users don't have to figure out where that virtual environment is - useful for installations created using Homebrew or ``pipx``. See :ref:`plugins_installing`. (:issue:`925`) -- A new command-line option, ``datasette --get``, accepts a path to a URL within the Datasette instance. It will run that request through Datasette (without starting a web server) and print out the response. See :ref:`getting_started_datasette_get` for an example. (:issue:`926`) +- A new command-line option, ``datasette --get``, accepts a path to a URL within the Datasette instance. It will run that request through Datasette (without starting a web server) and print out the response. See :ref:`cli_datasette_get` for an example. (:issue:`926`) .. _v0_46: diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 415af13c..a1e56774 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -4,44 +4,34 @@ CLI reference =============== -This page lists the ``--help`` for every ``datasette`` CLI command. +The ``datasette`` CLI tool provides a number of commands. + +Running ``datasette`` without specifying a command runs the default command, ``datasette serve``. See :ref:`cli_help_serve___help` for the full list of options for that command. .. [[[cog from datasette import cli from click.testing import CliRunner import textwrap - commands = [ - ["--help"], - ["serve", "--help"], - ["serve", "--help-settings"], - ["plugins", "--help"], - ["publish", "--help"], - ["publish", "cloudrun", "--help"], - ["publish", "heroku", "--help"], - ["package", "--help"], - ["inspect", "--help"], - ["install", "--help"], - ["uninstall", "--help"], - ] - cog.out("\n") - for command in commands: - title = "datasette " + " ".join(command) - ref = "_cli_help_" + ("_".join(command).replace("-", "_")) - cog.out(".. {}:\n\n".format(ref)) - cog.out(title + "\n") - cog.out(("=" * len(title)) + "\n\n") + def help(args): + title = "datasette " + " ".join(args) cog.out("::\n\n") - result = CliRunner().invoke(cli.cli, command) + result = CliRunner().invoke(cli.cli, args) output = result.output.replace("Usage: cli ", "Usage: datasette ") cog.out(textwrap.indent(output, ' ')) cog.out("\n\n") .. ]]] +.. [[[end]]] .. _cli_help___help: datasette --help ================ +Running ``datasette --help`` shows a list of all of the available commands. + +.. [[[cog + help(["--help"]) +.. ]]] :: Usage: datasette [OPTIONS] COMMAND [ARGS]... @@ -59,17 +49,34 @@ datasette --help serve* Serve up specified SQLite database files with a web UI inspect Generate JSON summary of provided database files install Install plugins and packages from PyPI into the same... - package Package specified SQLite files into a new datasette Docker... + package Package SQLite files into a Datasette Docker container plugins List currently installed plugins publish Publish specified SQLite database files to the internet along... uninstall Uninstall plugins and Python packages from the Datasette... +.. [[[end]]] + +Additional commands added by plugins that use the :ref:`plugin_hook_register_commands` hook will be listed here as well. + .. _cli_help_serve___help: -datasette serve --help -====================== +datasette serve +=============== +This command starts the Datasette web application running on your machine:: + + datasette serve mydatabase.db + +Or since this is the default command you can run this instead:: + + datasette mydatabase.db + +Once started you can access it at ``http://localhost:8001`` + +.. [[[cog + help(["serve", "--help"]) +.. ]]] :: Usage: datasette serve [OPTIONS] [FILES]... @@ -121,11 +128,75 @@ datasette serve --help --help Show this message and exit. +.. [[[end]]] + + +.. _cli_datasette_get: + +datasette --get +--------------- + +The ``--get`` option to ``datasette serve`` (or just ``datasette``) specifies the path to a page within Datasette and causes Datasette to output the content from that path without starting the web server. + +This means that all of Datasette's functionality can be accessed directly from the command-line. + +For example:: + + $ datasette --get '/-/versions.json' | jq . + { + "python": { + "version": "3.8.5", + "full": "3.8.5 (default, Jul 21 2020, 10:48:26) \n[Clang 11.0.3 (clang-1103.0.32.62)]" + }, + "datasette": { + "version": "0.46+15.g222a84a.dirty" + }, + "asgi": "3.0", + "uvicorn": "0.11.8", + "sqlite": { + "version": "3.32.3", + "fts_versions": [ + "FTS5", + "FTS4", + "FTS3" + ], + "extensions": { + "json1": null + }, + "compile_options": [ + "COMPILER=clang-11.0.3", + "ENABLE_COLUMN_METADATA", + "ENABLE_FTS3", + "ENABLE_FTS3_PARENTHESIS", + "ENABLE_FTS4", + "ENABLE_FTS5", + "ENABLE_GEOPOLY", + "ENABLE_JSON1", + "ENABLE_PREUPDATE_HOOK", + "ENABLE_RTREE", + "ENABLE_SESSION", + "MAX_VARIABLE_NUMBER=250000", + "THREADSAFE=1" + ] + } + } + +The exit code will be 0 if the request succeeds and 1 if the request produced an HTTP status code other than 200 - e.g. a 404 or 500 error. + +This lets you use ``datasette --get /`` to run tests against a Datasette application in a continuous integration environment such as GitHub Actions. + .. _cli_help_serve___help_settings: datasette serve --help-settings -=============================== +------------------------------- +This command outputs all of the available Datasette :ref:`settings `. + +These can be passed to ``datasette serve`` using ``datasette serve --setting name value``. + +.. [[[cog + help(["--help-settings"]) +.. ]]] :: Settings: @@ -170,11 +241,18 @@ datasette serve --help-settings +.. [[[end]]] + .. _cli_help_plugins___help: -datasette plugins --help -======================== +datasette plugins +================= +Output JSON showing all currently installed plugins, their versions, whether they include static files or templates and which :ref:`plugin_hooks` they use. + +.. [[[cog + help(["plugins", "--help"]) +.. ]]] :: Usage: datasette plugins [OPTIONS] @@ -187,11 +265,110 @@ datasette plugins --help --help Show this message and exit. +.. [[[end]]] + +Example output: + +.. code-block:: json + + [ + { + "name": "datasette-geojson", + "static": false, + "templates": false, + "version": "0.3.1", + "hooks": [ + "register_output_renderer" + ] + }, + { + "name": "datasette-geojson-map", + "static": true, + "templates": false, + "version": "0.4.0", + "hooks": [ + "extra_body_script", + "extra_css_urls", + "extra_js_urls" + ] + }, + { + "name": "datasette-leaflet", + "static": true, + "templates": false, + "version": "0.2.2", + "hooks": [ + "extra_body_script", + "extra_template_vars" + ] + } + ] + + +.. _cli_help_install___help: + +datasette install +================= + +Install new Datasette plugins. This command works like ``pip install`` but ensures that your plugins will be installed into the same environment as Datasette. + +This command:: + + datasette install datasette-cluster-map + +Would install the `datasette-cluster-map `__ plugin. + +.. [[[cog + help(["install", "--help"]) +.. ]]] +:: + + Usage: datasette install [OPTIONS] PACKAGES... + + Install plugins and packages from PyPI into the same environment as Datasette + + Options: + -U, --upgrade Upgrade packages to latest version + --help Show this message and exit. + + +.. [[[end]]] + +.. _cli_help_uninstall___help: + +datasette uninstall +=================== + +Uninstall one or more plugins. + +.. [[[cog + help(["uninstall", "--help"]) +.. ]]] +:: + + Usage: datasette uninstall [OPTIONS] PACKAGES... + + Uninstall plugins and Python packages from the Datasette environment + + Options: + -y, --yes Don't ask for confirmation + --help Show this message and exit. + + +.. [[[end]]] + .. _cli_help_publish___help: -datasette publish --help -======================== +datasette publish +================= +Shows a list of available deployment targets for :ref:`publishing data ` with Datasette. + +Additional deployment targets can be added by plugins that use the :ref:`plugin_hook_publish_subcommand` hook. + +.. [[[cog + help(["publish", "--help"]) +.. ]]] :: Usage: datasette publish [OPTIONS] COMMAND [ARGS]... @@ -207,11 +384,19 @@ datasette publish --help heroku Publish databases to Datasette running on Heroku +.. [[[end]]] + + .. _cli_help_publish_cloudrun___help: -datasette publish cloudrun --help -================================= +datasette publish cloudrun +========================== +See :ref:`publish_cloud_run`. + +.. [[[cog + help(["publish", "cloudrun", "--help"]) +.. ]]] :: Usage: datasette publish cloudrun [OPTIONS] [FILES]... @@ -256,11 +441,19 @@ datasette publish cloudrun --help --help Show this message and exit. +.. [[[end]]] + + .. _cli_help_publish_heroku___help: -datasette publish heroku --help -=============================== +datasette publish heroku +======================== +See :ref:`publish_heroku`. + +.. [[[cog + help(["publish", "heroku", "--help"]) +.. ]]] :: Usage: datasette publish heroku [OPTIONS] [FILES]... @@ -297,16 +490,23 @@ datasette publish heroku --help --help Show this message and exit. +.. [[[end]]] + .. _cli_help_package___help: -datasette package --help -======================== +datasette package +================= +Package SQLite files into a Datasette Docker container, see :ref:`cli_package`. + +.. [[[cog + help(["package", "--help"]) +.. ]]] :: Usage: datasette package [OPTIONS] FILES... - Package specified SQLite files into a new datasette Docker container + Package SQLite files into a Datasette Docker container Options: -t, --tag TEXT Name for the resulting Docker container, can @@ -335,11 +535,26 @@ datasette package --help --help Show this message and exit. +.. [[[end]]] + + .. _cli_help_inspect___help: -datasette inspect --help -======================== +datasette inspect +================= +Outputs JSON representing introspected data about one or more SQLite database files. + +If you are opening an immutable database, you can pass this file to the ``--inspect-data`` option to improve Datasette's performance by allowing it to skip running row counts against the database when it first starts running:: + + datasette inspect mydatabase.db > inspect-data.json + datasette serve -i mydatabase.db --inspect-file inspect-data.json + +This performance optimization is used automatically by some of the ``datasette publish`` commands. You are unlikely to need to apply this optimization manually. + +.. [[[cog + help(["inspect", "--help"]) +.. ]]] :: Usage: datasette inspect [OPTIONS] [FILES]... @@ -355,36 +570,4 @@ datasette inspect --help --help Show this message and exit. -.. _cli_help_install___help: - -datasette install --help -======================== - -:: - - Usage: datasette install [OPTIONS] PACKAGES... - - Install plugins and packages from PyPI into the same environment as Datasette - - Options: - -U, --upgrade Upgrade packages to latest version - --help Show this message and exit. - - -.. _cli_help_uninstall___help: - -datasette uninstall --help -========================== - -:: - - Usage: datasette uninstall [OPTIONS] PACKAGES... - - Uninstall plugins and Python packages from the Datasette environment - - Options: - -y, --yes Don't ask for confirmation - --help Show this message and exit. - - .. [[[end]]] diff --git a/docs/getting_started.rst b/docs/getting_started.rst index a9eaa404..6515ef8d 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -138,53 +138,3 @@ JSON in a more convenient format: } ] } - -.. _getting_started_datasette_get: - -datasette --get ---------------- - -The ``--get`` option can specify the path to a page within Datasette and cause Datasette to output the content from that path without starting the web server. This means that all of Datasette's functionality can be accessed directly from the command-line. For example:: - - $ datasette --get '/-/versions.json' | jq . - { - "python": { - "version": "3.8.5", - "full": "3.8.5 (default, Jul 21 2020, 10:48:26) \n[Clang 11.0.3 (clang-1103.0.32.62)]" - }, - "datasette": { - "version": "0.46+15.g222a84a.dirty" - }, - "asgi": "3.0", - "uvicorn": "0.11.8", - "sqlite": { - "version": "3.32.3", - "fts_versions": [ - "FTS5", - "FTS4", - "FTS3" - ], - "extensions": { - "json1": null - }, - "compile_options": [ - "COMPILER=clang-11.0.3", - "ENABLE_COLUMN_METADATA", - "ENABLE_FTS3", - "ENABLE_FTS3_PARENTHESIS", - "ENABLE_FTS4", - "ENABLE_FTS5", - "ENABLE_GEOPOLY", - "ENABLE_JSON1", - "ENABLE_PREUPDATE_HOOK", - "ENABLE_RTREE", - "ENABLE_SESSION", - "MAX_VARIABLE_NUMBER=250000", - "THREADSAFE=1" - ] - } - } - -The exit code will be 0 if the request succeeds and 1 if the request produced an HTTP status code other than 200 - e.g. a 404 or 500 error. This means you can use ``datasette --get /`` to run tests against a Datasette application in a continuous integration environment such as GitHub Actions. - -Running ``datasette`` without specifying a command runs the default command, ``datasette serve``. See :ref:`cli_help_serve___help` for the full list of options for that command. diff --git a/docs/index.rst b/docs/index.rst index efe196b3..5a9cc7ed 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -40,6 +40,7 @@ Contents getting_started installation ecosystem + cli-reference pages publish deploying @@ -61,6 +62,5 @@ Contents plugin_hooks testing_plugins internals - cli-reference contributing changelog diff --git a/docs/publish.rst b/docs/publish.rst index 9c7c99cc..dd8566ed 100644 --- a/docs/publish.rst +++ b/docs/publish.rst @@ -56,6 +56,8 @@ Cloud Run provides a URL on the ``.run.app`` domain, but you can also point your See :ref:`cli_help_publish_cloudrun___help` for the full list of options for this command. +.. _publish_heroku: + Publishing to Heroku -------------------- From aff3df03d4fe0806ce432d1818f6643cdb2a854e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 18 Aug 2022 14:55:08 -0700 Subject: [PATCH 0153/1218] Ignore ro which stands for read only Refs #1787 where it caused tests to break --- docs/codespell-ignore-words.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/codespell-ignore-words.txt b/docs/codespell-ignore-words.txt index a625cde5..d6744d05 100644 --- a/docs/codespell-ignore-words.txt +++ b/docs/codespell-ignore-words.txt @@ -1 +1 @@ -AddWordsToIgnoreHere +ro From 0d9d33955b503c88a2c712144d97f094baa5d46d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 18 Aug 2022 16:06:12 -0700 Subject: [PATCH 0154/1218] Clarify you can publish multiple files, closes #1788 --- docs/publish.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/publish.rst b/docs/publish.rst index dd8566ed..d817ed31 100644 --- a/docs/publish.rst +++ b/docs/publish.rst @@ -31,7 +31,7 @@ Publishing to Google Cloud Run You will first need to install and configure the Google Cloud CLI tools by following `these instructions `__. -You can then publish a database to Google Cloud Run using the following command:: +You can then publish one or more SQLite database files to Google Cloud Run using the following command:: datasette publish cloudrun mydatabase.db --service=my-database @@ -63,7 +63,7 @@ Publishing to Heroku To publish your data using `Heroku `__, first create an account there and install and configure the `Heroku CLI tool `_. -You can publish a database to Heroku using the following command:: +You can publish one or more databases to Heroku using the following command:: datasette publish heroku mydatabase.db @@ -138,7 +138,7 @@ If a plugin has any :ref:`plugins_configuration_secret` you can use the ``--plug datasette package ================= -If you have docker installed (e.g. using `Docker for Mac `_) you can use the ``datasette package`` command to create a new Docker image in your local repository containing the datasette app bundled together with your selected SQLite databases:: +If you have docker installed (e.g. using `Docker for Mac `_) you can use the ``datasette package`` command to create a new Docker image in your local repository containing the datasette app bundled together with one or more SQLite databases:: datasette package mydatabase.db From 663ac431fe7202c85967568d82b2034f92b9aa43 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Sat, 20 Aug 2022 02:04:16 +0200 Subject: [PATCH 0155/1218] Use Read the Docs action v1 (#1778) Read the Docs repository was renamed from `readthedocs/readthedocs-preview` to `readthedocs/actions/`. Now, the `preview` action is under `readthedocs/actions/preview` and is tagged as `v1` --- .github/workflows/documentation-links.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/documentation-links.yml b/.github/workflows/documentation-links.yml index e7062a46..a54bd83a 100644 --- a/.github/workflows/documentation-links.yml +++ b/.github/workflows/documentation-links.yml @@ -11,6 +11,6 @@ jobs: documentation-links: runs-on: ubuntu-latest steps: - - uses: readthedocs/readthedocs-preview@main + - uses: readthedocs/actions/preview@v1 with: project-slug: "datasette" From 1d64c9a8dac45b9a3452acf8e76dfadea2b0bc49 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Tue, 23 Aug 2022 11:34:30 -0700 Subject: [PATCH 0156/1218] Add new entrypoint option to --load-extensions. (#1789) Thanks, @asg017 --- .gitignore | 6 ++++ datasette/app.py | 8 ++++- datasette/cli.py | 4 ++- datasette/utils/__init__.py | 11 ++++++ tests/ext.c | 48 ++++++++++++++++++++++++++ tests/test_load_extensions.py | 65 +++++++++++++++++++++++++++++++++++ 6 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 tests/ext.c create mode 100644 tests/test_load_extensions.py diff --git a/.gitignore b/.gitignore index 066009f0..277ff653 100644 --- a/.gitignore +++ b/.gitignore @@ -118,3 +118,9 @@ ENV/ .DS_Store node_modules .*.swp + +# In case someone compiled tests/ext.c for test_load_extensions, don't +# include it in source control. +tests/*.dylib +tests/*.so +tests/*.dll \ No newline at end of file diff --git a/datasette/app.py b/datasette/app.py index 1a9afc10..bb9232c9 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -559,7 +559,13 @@ class Datasette: if self.sqlite_extensions: conn.enable_load_extension(True) for extension in self.sqlite_extensions: - conn.execute("SELECT load_extension(?)", [extension]) + # "extension" is either a string path to the extension + # or a 2-item tuple that specifies which entrypoint to load. + if isinstance(extension, tuple): + path, entrypoint = extension + conn.execute("SELECT load_extension(?, ?)", [path, entrypoint]) + else: + conn.execute("SELECT load_extension(?)", [extension]) if self.setting("cache_size_kb"): conn.execute(f"PRAGMA cache_size=-{self.setting('cache_size_kb')}") # pylint: disable=no-member diff --git a/datasette/cli.py b/datasette/cli.py index f2a03d53..6eb42712 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -21,6 +21,7 @@ from .app import ( pm, ) from .utils import ( + LoadExtension, StartupError, check_connection, find_spatialite, @@ -128,9 +129,10 @@ def sqlite_extensions(fn): return click.option( "sqlite_extensions", "--load-extension", + type=LoadExtension(), envvar="SQLITE_EXTENSIONS", multiple=True, - help="Path to a SQLite extension to load", + help="Path to a SQLite extension to load, and optional entrypoint", )(fn) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index d148cc2c..0fc87d51 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -833,6 +833,17 @@ class StaticMount(click.ParamType): self.fail(f"{value} is not a valid directory path", param, ctx) return path, dirpath +# The --load-extension parameter can optionally include a specific entrypoint. +# This is done by appending ":entrypoint_name" after supplying the path to the extension +class LoadExtension(click.ParamType): + name = "path:entrypoint?" + + def convert(self, value, param, ctx): + if ":" not in value: + return value + path, entrypoint = value.split(":", 1) + return path, entrypoint + def format_bytes(bytes): current = float(bytes) diff --git a/tests/ext.c b/tests/ext.c new file mode 100644 index 00000000..5fe970d9 --- /dev/null +++ b/tests/ext.c @@ -0,0 +1,48 @@ +/* +** This file implements a SQLite extension with multiple entrypoints. +** +** The default entrypoint, sqlite3_ext_init, has a single function "a". +** The 1st alternate entrypoint, sqlite3_ext_b_init, has a single function "b". +** The 2nd alternate entrypoint, sqlite3_ext_c_init, has a single function "c". +** +** Compiling instructions: +** https://www.sqlite.org/loadext.html#compiling_a_loadable_extension +** +*/ + +#include "sqlite3ext.h" + +SQLITE_EXTENSION_INIT1 + +// SQL function that returns back the value supplied during sqlite3_create_function() +static void func(sqlite3_context *context, int argc, sqlite3_value **argv) { + sqlite3_result_text(context, (char *) sqlite3_user_data(context), -1, SQLITE_STATIC); +} + + +// The default entrypoint, since it matches the "ext.dylib"/"ext.so" name +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_ext_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi) { + SQLITE_EXTENSION_INIT2(pApi); + return sqlite3_create_function(db, "a", 0, 0, "a", func, 0, 0); +} + +// Alternate entrypoint #1 +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_ext_b_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi) { + SQLITE_EXTENSION_INIT2(pApi); + return sqlite3_create_function(db, "b", 0, 0, "b", func, 0, 0); +} + +// Alternate entrypoint #2 +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_ext_c_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi) { + SQLITE_EXTENSION_INIT2(pApi); + return sqlite3_create_function(db, "c", 0, 0, "c", func, 0, 0); +} diff --git a/tests/test_load_extensions.py b/tests/test_load_extensions.py new file mode 100644 index 00000000..360bc8f3 --- /dev/null +++ b/tests/test_load_extensions.py @@ -0,0 +1,65 @@ +from datasette.app import Datasette +import pytest +from pathlib import Path + +# not necessarily a full path - the full compiled path looks like "ext.dylib" +# or another suffix, but sqlite will, under the hood, decide which file +# extension to use based on the operating system (apple=dylib, windows=dll etc) +# this resolves to "./ext", which is enough for SQLite to calculate the rest +COMPILED_EXTENSION_PATH = str(Path(__file__).parent / "ext") + +# See if ext.c has been compiled, based off the different possible suffixes. +def has_compiled_ext(): + for ext in ["dylib", "so", "dll"]: + path = Path(__file__).parent / f"ext.{ext}" + if path.is_file(): + return True + return False + + +@pytest.mark.asyncio +@pytest.mark.skipif(not has_compiled_ext(), reason="Requires compiled ext.c") +async def test_load_extension_default_entrypoint(): + + # The default entrypoint only loads a() and NOT b() or c(), so those + # should fail. + ds = Datasette(sqlite_extensions=[COMPILED_EXTENSION_PATH]) + + response = await ds.client.get("/_memory.json?sql=select+a()") + assert response.status_code == 200 + assert response.json()["rows"][0][0] == "a" + + response = await ds.client.get("/_memory.json?sql=select+b()") + assert response.status_code == 400 + assert response.json()["error"] == "no such function: b" + + response = await ds.client.get("/_memory.json?sql=select+c()") + assert response.status_code == 400 + assert response.json()["error"] == "no such function: c" + + +@pytest.mark.asyncio +@pytest.mark.skipif(not has_compiled_ext(), reason="Requires compiled ext.c") +async def test_load_extension_multiple_entrypoints(): + + # Load in the default entrypoint and the other 2 custom entrypoints, now + # all a(), b(), and c() should run successfully. + ds = Datasette( + sqlite_extensions=[ + COMPILED_EXTENSION_PATH, + (COMPILED_EXTENSION_PATH, "sqlite3_ext_b_init"), + (COMPILED_EXTENSION_PATH, "sqlite3_ext_c_init"), + ] + ) + + response = await ds.client.get("/_memory.json?sql=select+a()") + assert response.status_code == 200 + assert response.json()["rows"][0][0] == "a" + + response = await ds.client.get("/_memory.json?sql=select+b()") + assert response.status_code == 200 + assert response.json()["rows"][0][0] == "b" + + response = await ds.client.get("/_memory.json?sql=select+c()") + assert response.status_code == 200 + assert response.json()["rows"][0][0] == "c" From fd1086c6867f3e3582b1eca456e4ea95f6cecf8b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 23 Aug 2022 11:35:41 -0700 Subject: [PATCH 0157/1218] Applied Black, refs #1789 --- datasette/app.py | 4 ++-- datasette/utils/__init__.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index bb9232c9..f2a6763a 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -559,8 +559,8 @@ class Datasette: if self.sqlite_extensions: conn.enable_load_extension(True) for extension in self.sqlite_extensions: - # "extension" is either a string path to the extension - # or a 2-item tuple that specifies which entrypoint to load. + # "extension" is either a string path to the extension + # or a 2-item tuple that specifies which entrypoint to load. if isinstance(extension, tuple): path, entrypoint = extension conn.execute("SELECT load_extension(?, ?)", [path, entrypoint]) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 0fc87d51..bbaa0510 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -833,6 +833,7 @@ class StaticMount(click.ParamType): self.fail(f"{value} is not a valid directory path", param, ctx) return path, dirpath + # The --load-extension parameter can optionally include a specific entrypoint. # This is done by appending ":entrypoint_name" after supplying the path to the extension class LoadExtension(click.ParamType): From 456dc155d491a009942ace71a4e1827cddc6b93d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 23 Aug 2022 11:40:36 -0700 Subject: [PATCH 0158/1218] Ran cog, refs #1789 --- docs/cli-reference.rst | 95 +++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index a1e56774..f8419d58 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -84,48 +84,53 @@ Once started you can access it at ``http://localhost:8001`` Serve up specified SQLite database files with a web UI Options: - -i, --immutable PATH Database files to open in immutable mode - -h, --host TEXT Host for server. Defaults to 127.0.0.1 which means - only connections from the local machine will be - allowed. Use 0.0.0.0 to listen to all IPs and allow - access from other machines. - -p, --port INTEGER RANGE Port for server, defaults to 8001. Use -p 0 to - automatically assign an available port. - [0<=x<=65535] - --uds TEXT Bind to a Unix domain socket - --reload Automatically reload if code or metadata change - detected - useful for development - --cors Enable CORS by serving Access-Control-Allow-Origin: - * - --load-extension TEXT Path to a SQLite extension to load - --inspect-file TEXT Path to JSON file created using "datasette inspect" - -m, --metadata FILENAME Path to JSON/YAML file containing license/source - metadata - --template-dir DIRECTORY Path to directory containing custom templates - --plugins-dir DIRECTORY Path to directory containing custom plugins - --static MOUNT:DIRECTORY Serve static files from this directory at /MOUNT/... - --memory Make /_memory database available - --config CONFIG Deprecated: set config option using - configname:value. Use --setting instead. - --setting SETTING... Setting, see - docs.datasette.io/en/stable/settings.html - --secret TEXT Secret used for signing secure values, such as - signed cookies - --root Output URL that sets a cookie authenticating the - root user - --get TEXT Run an HTTP GET request against this path, print - results and exit - --version-note TEXT Additional note to show on /-/versions - --help-settings Show available settings - --pdb Launch debugger on any errors - -o, --open Open Datasette in your web browser - --create Create database files if they do not exist - --crossdb Enable cross-database joins using the /_memory - database - --nolock Ignore locking, open locked files in read-only mode - --ssl-keyfile TEXT SSL key file - --ssl-certfile TEXT SSL certificate file - --help Show this message and exit. + -i, --immutable PATH Database files to open in immutable mode + -h, --host TEXT Host for server. Defaults to 127.0.0.1 which + means only connections from the local machine + will be allowed. Use 0.0.0.0 to listen to all + IPs and allow access from other machines. + -p, --port INTEGER RANGE Port for server, defaults to 8001. Use -p 0 to + automatically assign an available port. + [0<=x<=65535] + --uds TEXT Bind to a Unix domain socket + --reload Automatically reload if code or metadata + change detected - useful for development + --cors Enable CORS by serving Access-Control-Allow- + Origin: * + --load-extension PATH:ENTRYPOINT? + Path to a SQLite extension to load, and + optional entrypoint + --inspect-file TEXT Path to JSON file created using "datasette + inspect" + -m, --metadata FILENAME Path to JSON/YAML file containing + license/source metadata + --template-dir DIRECTORY Path to directory containing custom templates + --plugins-dir DIRECTORY Path to directory containing custom plugins + --static MOUNT:DIRECTORY Serve static files from this directory at + /MOUNT/... + --memory Make /_memory database available + --config CONFIG Deprecated: set config option using + configname:value. Use --setting instead. + --setting SETTING... Setting, see + docs.datasette.io/en/stable/settings.html + --secret TEXT Secret used for signing secure values, such as + signed cookies + --root Output URL that sets a cookie authenticating + the root user + --get TEXT Run an HTTP GET request against this path, + print results and exit + --version-note TEXT Additional note to show on /-/versions + --help-settings Show available settings + --pdb Launch debugger on any errors + -o, --open Open Datasette in your web browser + --create Create database files if they do not exist + --crossdb Enable cross-database joins using the /_memory + database + --nolock Ignore locking, open locked files in read-only + mode + --ssl-keyfile TEXT SSL key file + --ssl-certfile TEXT SSL certificate file + --help Show this message and exit. .. [[[end]]] @@ -566,8 +571,10 @@ This performance optimization is used automatically by some of the ``datasette p Options: --inspect-file TEXT - --load-extension TEXT Path to a SQLite extension to load - --help Show this message and exit. + --load-extension PATH:ENTRYPOINT? + Path to a SQLite extension to load, and + optional entrypoint + --help Show this message and exit. .. [[[end]]] From ba35105eee2d3ba620e4f230028a02b2e2571df2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 23 Aug 2022 17:11:45 -0700 Subject: [PATCH 0159/1218] Test `--load-extension` in GitHub Actions (#1792) * Run the --load-extension test, refs #1789 * Ran cog, refs #1789 --- .github/workflows/test.yml | 3 +++ tests/test_api.py | 2 +- tests/test_html.py | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 90b6555e..e38d5ee9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,6 +24,9 @@ jobs: key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} restore-keys: | ${{ runner.os }}-pip- + - name: Build extension for --load-extension test + run: |- + (cd tests && gcc ext.c -fPIC -shared -o ext.so) - name: Install dependencies run: | pip install -e '.[test]' diff --git a/tests/test_api.py b/tests/test_api.py index 253c1718..f6db2f9d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -36,7 +36,7 @@ def test_homepage(app_client): # 4 hidden FTS tables + no_primary_key (hidden in metadata) assert d["hidden_tables_count"] == 6 # 201 in no_primary_key, plus 6 in other hidden tables: - assert d["hidden_table_rows_sum"] == 207 + assert d["hidden_table_rows_sum"] == 207, response.json assert d["views_count"] == 4 diff --git a/tests/test_html.py b/tests/test_html.py index be21bd84..d6e969ad 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -115,7 +115,7 @@ def test_database_page(app_client): assert fragment in response.text # And views - views_ul = soup.find("h2", text="Views").find_next_sibling("ul") + views_ul = soup.find("h2", string="Views").find_next_sibling("ul") assert views_ul is not None assert [ ("/fixtures/paginated_view", "paginated_view"), @@ -128,7 +128,7 @@ def test_database_page(app_client): ] == sorted([(a["href"], a.text) for a in views_ul.find_all("a")]) # And a list of canned queries - queries_ul = soup.find("h2", text="Queries").find_next_sibling("ul") + queries_ul = soup.find("h2", string="Queries").find_next_sibling("ul") assert queries_ul is not None assert [ ("/fixtures/from_async_hook", "from_async_hook"), From 51030df1869b3b574dd3584d1563415776b9cd4e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 5 Sep 2022 11:35:40 -0700 Subject: [PATCH 0160/1218] Don't use upper bound dependencies any more See https://iscinumpy.dev/post/bound-version-constraints/ for the rationale behind this change. Closes #1800 --- setup.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/setup.py b/setup.py index a1c51d0b..b2e50b38 100644 --- a/setup.py +++ b/setup.py @@ -42,21 +42,21 @@ setup( include_package_data=True, python_requires=">=3.7", install_requires=[ - "asgiref>=3.2.10,<3.6.0", - "click>=7.1.1,<8.2.0", + "asgiref>=3.2.10", + "click>=7.1.1", "click-default-group-wheel>=1.2.2", - "Jinja2>=2.10.3,<3.1.0", - "hupper~=1.9", + "Jinja2>=2.10.3", + "hupper>=1.9", "httpx>=0.20", - "pint~=0.9", - "pluggy>=1.0,<1.1", - "uvicorn~=0.11", - "aiofiles>=0.4,<0.9", - "janus>=0.6.2,<1.1", + "pint>=0.9", + "pluggy>=1.0", + "uvicorn>=0.11", + "aiofiles>=0.4", + "janus>=0.6.2", "asgi-csrf>=0.9", - "PyYAML>=5.3,<7.0", - "mergedeep>=1.1.1,<1.4.0", - "itsdangerous>=1.1,<3.0", + "PyYAML>=5.3", + "mergedeep>=1.1.1", + "itsdangerous>=1.1", ], entry_points=""" [console_scripts] @@ -72,14 +72,14 @@ setup( "sphinx-copybutton", ], "test": [ - "pytest>=5.2.2,<7.2.0", - "pytest-xdist>=2.2.1,<2.6", - "pytest-asyncio>=0.17,<0.20", - "beautifulsoup4>=4.8.1,<4.12.0", + "pytest>=5.2.2", + "pytest-xdist>=2.2.1", + "pytest-asyncio>=0.17", + "beautifulsoup4>=4.8.1", "black==22.6.0", "blacken-docs==1.12.1", - "pytest-timeout>=1.4.2,<2.2", - "trustme>=0.7,<0.10", + "pytest-timeout>=1.4.2", + "trustme>=0.7", "cogapp>=3.3.0", ], "rich": ["rich"], From 294ecd45f7801971dbeef383d0c5456ee95ab839 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Sep 2022 11:51:51 -0700 Subject: [PATCH 0161/1218] Bump black from 22.6.0 to 22.8.0 (#1797) Bumps [black](https://github.com/psf/black) from 22.6.0 to 22.8.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/22.6.0...22.8.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b2e50b38..92fa60d0 100644 --- a/setup.py +++ b/setup.py @@ -76,7 +76,7 @@ setup( "pytest-xdist>=2.2.1", "pytest-asyncio>=0.17", "beautifulsoup4>=4.8.1", - "black==22.6.0", + "black==22.8.0", "blacken-docs==1.12.1", "pytest-timeout>=1.4.2", "trustme>=0.7", From b91e17280c05bbb9cf97432081bdcea8665879f9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 5 Sep 2022 16:50:53 -0700 Subject: [PATCH 0162/1218] Run tests in serial, refs #1802 --- .github/workflows/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e38d5ee9..9c8c48ef 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,8 +33,7 @@ jobs: pip freeze - name: Run tests run: | - pytest -n auto -m "not serial" - pytest -m "serial" + pytest - name: Check if cog needs to be run run: | cog --check docs/*.rst From b2b901e8c4b939e50ee1117ffcd2881ed8a8e3bf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 5 Sep 2022 17:05:23 -0700 Subject: [PATCH 0163/1218] Skip SpatiaLite test if no conn.enable_load_extension() Ran into this problem while working on #1802 --- tests/test_spatialite.py | 2 ++ tests/utils.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/tests/test_spatialite.py b/tests/test_spatialite.py index 8b98c5d6..c07a30e8 100644 --- a/tests/test_spatialite.py +++ b/tests/test_spatialite.py @@ -1,5 +1,6 @@ from datasette.app import Datasette from datasette.utils import find_spatialite, SpatialiteNotFound, SPATIALITE_FUNCTIONS +from .utils import has_load_extension import pytest @@ -13,6 +14,7 @@ def has_spatialite(): @pytest.mark.asyncio @pytest.mark.skipif(not has_spatialite(), reason="Requires SpatiaLite") +@pytest.mark.skipif(not has_load_extension(), reason="Requires enable_load_extension") async def test_spatialite_version_info(): ds = Datasette(sqlite_extensions=["spatialite"]) response = await ds.client.get("/-/versions.json") diff --git a/tests/utils.py b/tests/utils.py index 972300db..191ead9b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,3 +1,6 @@ +from datasette.utils.sqlite import sqlite3 + + def assert_footer_links(soup): footer_links = soup.find("footer").findAll("a") assert 4 == len(footer_links) @@ -22,3 +25,8 @@ def inner_html(soup): # This includes the parent tag - so remove that inner_html = html.split(">", 1)[1].rsplit("<", 1)[0] return inner_html.strip() + + +def has_load_extension(): + conn = sqlite3.connect(":memory:") + return hasattr(conn, "enable_load_extension") From 1c29b925d300d1ee17047504473f2517767aa05b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 5 Sep 2022 17:10:52 -0700 Subject: [PATCH 0164/1218] Run tests in serial again Because this didn't fix the issue I'm seeing in #1802 Revert "Run tests in serial, refs #1802" This reverts commit b91e17280c05bbb9cf97432081bdcea8665879f9. --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9c8c48ef..e38d5ee9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,8 @@ jobs: pip freeze - name: Run tests run: | - pytest + pytest -n auto -m "not serial" + pytest -m "serial" - name: Check if cog needs to be run run: | cog --check docs/*.rst From 64288d827f7ff97f825e10f714da3f781ecf9345 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 5 Sep 2022 17:40:19 -0700 Subject: [PATCH 0165/1218] Workaround for test failure: RuntimeError: There is no current event loop (#1803) * Remove ensure_eventloop hack * Hack to recover from intermittent RuntimeError calling asyncio.Lock() --- datasette/app.py | 10 +++++++++- tests/test_cli.py | 27 ++++++++++----------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index f2a6763a..c6bbdaf0 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -231,7 +231,15 @@ class Datasette: self.inspect_data = inspect_data self.immutables = set(immutables or []) self.databases = collections.OrderedDict() - self._refresh_schemas_lock = asyncio.Lock() + try: + self._refresh_schemas_lock = asyncio.Lock() + except RuntimeError as rex: + # Workaround for intermittent test failure, see: + # https://github.com/simonw/datasette/issues/1802 + if "There is no current event loop in thread" in str(rex): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + self._refresh_schemas_lock = asyncio.Lock() self.crossdb = crossdb self.nolock = nolock if memory or crossdb or not self.files: diff --git a/tests/test_cli.py b/tests/test_cli.py index d0f6e26c..f0d28037 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -22,13 +22,6 @@ from unittest import mock import urllib -@pytest.fixture -def ensure_eventloop(): - # Workaround for "Event loop is closed" error - if asyncio.get_event_loop().is_closed(): - asyncio.set_event_loop(asyncio.new_event_loop()) - - def test_inspect_cli(app_client): runner = CliRunner() result = runner.invoke(cli, ["inspect", "fixtures.db"]) @@ -72,7 +65,7 @@ def test_serve_with_inspect_file_prepopulates_table_counts_cache(): ), ) def test_spatialite_error_if_attempt_to_open_spatialite( - ensure_eventloop, spatialite_paths, should_suggest_load_extension + spatialite_paths, should_suggest_load_extension ): with mock.patch("datasette.utils.SPATIALITE_PATHS", spatialite_paths): runner = CliRunner() @@ -199,14 +192,14 @@ def test_version(): @pytest.mark.parametrize("invalid_port", ["-1", "0.5", "dog", "65536"]) -def test_serve_invalid_ports(ensure_eventloop, invalid_port): +def test_serve_invalid_ports(invalid_port): runner = CliRunner(mix_stderr=False) result = runner.invoke(cli, ["--port", invalid_port]) assert result.exit_code == 2 assert "Invalid value for '-p'" in result.stderr -def test_setting(ensure_eventloop): +def test_setting(): runner = CliRunner() result = runner.invoke( cli, ["--setting", "default_page_size", "5", "--get", "/-/settings.json"] @@ -215,14 +208,14 @@ def test_setting(ensure_eventloop): assert json.loads(result.output)["default_page_size"] == 5 -def test_setting_type_validation(ensure_eventloop): +def test_setting_type_validation(): runner = CliRunner(mix_stderr=False) result = runner.invoke(cli, ["--setting", "default_page_size", "dog"]) assert result.exit_code == 2 assert '"default_page_size" should be an integer' in result.stderr -def test_config_deprecated(ensure_eventloop): +def test_config_deprecated(): # The --config option should show a deprecation message runner = CliRunner(mix_stderr=False) result = runner.invoke( @@ -233,14 +226,14 @@ def test_config_deprecated(ensure_eventloop): assert "will be deprecated in" in result.stderr -def test_sql_errors_logged_to_stderr(ensure_eventloop): +def test_sql_errors_logged_to_stderr(): runner = CliRunner(mix_stderr=False) result = runner.invoke(cli, ["--get", "/_memory.json?sql=select+blah"]) assert result.exit_code == 1 assert "sql = 'select blah', params = {}: no such column: blah\n" in result.stderr -def test_serve_create(ensure_eventloop, tmpdir): +def test_serve_create(tmpdir): runner = CliRunner() db_path = tmpdir / "does_not_exist_yet.db" assert not db_path.exists() @@ -258,7 +251,7 @@ def test_serve_create(ensure_eventloop, tmpdir): assert db_path.exists() -def test_serve_duplicate_database_names(ensure_eventloop, tmpdir): +def test_serve_duplicate_database_names(tmpdir): "'datasette db.db nested/db.db' should attach two databases, /db and /db_2" runner = CliRunner() db_1_path = str(tmpdir / "db.db") @@ -273,7 +266,7 @@ def test_serve_duplicate_database_names(ensure_eventloop, tmpdir): assert {db["name"] for db in databases} == {"db", "db_2"} -def test_serve_deduplicate_same_database_path(ensure_eventloop, tmpdir): +def test_serve_deduplicate_same_database_path(tmpdir): "'datasette db.db db.db' should only attach one database, /db" runner = CliRunner() db_path = str(tmpdir / "db.db") @@ -287,7 +280,7 @@ def test_serve_deduplicate_same_database_path(ensure_eventloop, tmpdir): @pytest.mark.parametrize( "filename", ["test-database (1).sqlite", "database (1).sqlite"] ) -def test_weird_database_names(ensure_eventloop, tmpdir, filename): +def test_weird_database_names(tmpdir, filename): # https://github.com/simonw/datasette/issues/1181 runner = CliRunner() db_path = str(tmpdir / filename) From c9d1943aede436fa3413fd49bc56335cbda4ad07 Mon Sep 17 00:00:00 2001 From: Daniel Rech Date: Tue, 6 Sep 2022 02:45:41 +0200 Subject: [PATCH 0166/1218] Fix word break in facets by adding ul.tight-bullets li word-break: break-all (#1794) Thanks, @dmr --- datasette/static/app.css | 1 + 1 file changed, 1 insertion(+) diff --git a/datasette/static/app.css b/datasette/static/app.css index af3e14d5..712b9925 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -260,6 +260,7 @@ ul.bullets li { ul.tight-bullets li { list-style-type: disc; margin-bottom: 0; + word-break: break-all; } a.not-underlined { text-decoration: none; From d80775a48d20917633792fdc9525f075d3bc2c7a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 5 Sep 2022 17:44:44 -0700 Subject: [PATCH 0167/1218] Raise error if it's not about loops, refs #1802 --- datasette/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datasette/app.py b/datasette/app.py index c6bbdaf0..aeb81687 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -240,6 +240,8 @@ class Datasette: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) self._refresh_schemas_lock = asyncio.Lock() + else: + raise self.crossdb = crossdb self.nolock = nolock if memory or crossdb or not self.files: From 8430c3bc7dd22b173c1a8c6cd7180e3b31240cd1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Sep 2022 08:59:19 -0700 Subject: [PATCH 0168/1218] table facet_size in metadata, refs #1804 --- datasette/facets.py | 14 +++++++++++--- tests/test_facets.py | 17 +++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index b15a758c..e70d42df 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -102,11 +102,19 @@ class Facet: def get_facet_size(self): facet_size = self.ds.setting("default_facet_size") max_returned_rows = self.ds.setting("max_returned_rows") + table_facet_size = None + if self.table: + tables_metadata = self.ds.metadata("tables", database=self.database) or {} + table_metadata = tables_metadata.get(self.table) or {} + if table_metadata: + table_facet_size = table_metadata.get("facet_size") custom_facet_size = self.request.args.get("_facet_size") - if custom_facet_size == "max": - facet_size = max_returned_rows - elif custom_facet_size and custom_facet_size.isdigit(): + if custom_facet_size and custom_facet_size.isdigit(): facet_size = int(custom_facet_size) + elif table_facet_size: + facet_size = table_facet_size + if facet_size == "max": + facet_size = max_returned_rows return min(facet_size, max_returned_rows) async def suggest(self): diff --git a/tests/test_facets.py b/tests/test_facets.py index c28dc43c..cbee23b0 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -581,6 +581,23 @@ async def test_facet_size(): ) data5 = response5.json() assert len(data5["facet_results"]["city"]["results"]) == 20 + # Now try messing with facet_size in the table metadata + ds._metadata_local = { + "databases": { + "test_facet_size": {"tables": {"neighbourhoods": {"facet_size": 6}}} + } + } + response6 = await ds.client.get("/test_facet_size/neighbourhoods.json?_facet=city") + data6 = response6.json() + assert len(data6["facet_results"]["city"]["results"]) == 6 + # Setting it to max bumps it up to 50 again + ds._metadata_local["databases"]["test_facet_size"]["tables"]["neighbourhoods"][ + "facet_size" + ] = "max" + data7 = ( + await ds.client.get("/test_facet_size/neighbourhoods.json?_facet=city") + ).json() + assert len(data7["facet_results"]["city"]["results"]) == 20 def test_other_types_of_facet_in_metadata(): From 303c6c733d95a6133558ec1b468f5bea5827d0d2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Sep 2022 11:05:00 -0700 Subject: [PATCH 0169/1218] Fix for incorrectly handled _facet_size=max, refs #1804 --- datasette/facets.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index e70d42df..7fb0c68b 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -109,12 +109,19 @@ class Facet: if table_metadata: table_facet_size = table_metadata.get("facet_size") custom_facet_size = self.request.args.get("_facet_size") - if custom_facet_size and custom_facet_size.isdigit(): - facet_size = int(custom_facet_size) - elif table_facet_size: - facet_size = table_facet_size - if facet_size == "max": - facet_size = max_returned_rows + if custom_facet_size: + if custom_facet_size == "max": + facet_size = max_returned_rows + elif custom_facet_size.isdigit(): + facet_size = int(custom_facet_size) + else: + # Invalid value, ignore it + custom_facet_size = None + if table_facet_size and not custom_facet_size: + if table_facet_size == "max": + facet_size = max_returned_rows + else: + facet_size = table_facet_size return min(facet_size, max_returned_rows) async def suggest(self): From 0a7815d2038255a0834c955066a2a16c01f707b2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Sep 2022 11:06:49 -0700 Subject: [PATCH 0170/1218] Documentation for facet_size in metadata, closes #1804 --- docs/facets.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/facets.rst b/docs/facets.rst index 2a2eb039..6c9d99bd 100644 --- a/docs/facets.rst +++ b/docs/facets.rst @@ -129,6 +129,22 @@ You can specify :ref:`array ` or :ref:`date ] } +You can change the default facet size (the number of results shown for each facet) for a table using ``facet_size``: + +.. code-block:: json + + { + "databases": { + "sf-trees": { + "tables": { + "Street_Tree_List": { + "facets": ["qLegalStatus"], + "facet_size": 10 + } + } + } + } + } Suggested facets ---------------- From d0476897e10249bb4867473722270d02491c2c1f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Sep 2022 11:24:30 -0700 Subject: [PATCH 0171/1218] Fixed Sphinx warning about language = None --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 4ef6b768..8965974a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -71,7 +71,7 @@ release = "" # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. From ff9c87197dde8b09f9787ee878804cb6842ea5dc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Sep 2022 11:26:21 -0700 Subject: [PATCH 0172/1218] Fixed Sphinx warnings on cli-reference page --- docs/cli-reference.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index f8419d58..4a8465cb 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -14,7 +14,7 @@ Running ``datasette`` without specifying a command runs the default command, ``d import textwrap def help(args): title = "datasette " + " ".join(args) - cog.out("::\n\n") + cog.out("\n::\n\n") result = CliRunner().invoke(cli.cli, args) output = result.output.replace("Usage: cli ", "Usage: datasette ") cog.out(textwrap.indent(output, ' ')) @@ -32,6 +32,7 @@ Running ``datasette --help`` shows a list of all of the available commands. .. [[[cog help(["--help"]) .. ]]] + :: Usage: datasette [OPTIONS] COMMAND [ARGS]... @@ -77,6 +78,7 @@ Once started you can access it at ``http://localhost:8001`` .. [[[cog help(["serve", "--help"]) .. ]]] + :: Usage: datasette serve [OPTIONS] [FILES]... @@ -202,6 +204,7 @@ These can be passed to ``datasette serve`` using ``datasette serve --setting nam .. [[[cog help(["--help-settings"]) .. ]]] + :: Settings: @@ -258,6 +261,7 @@ Output JSON showing all currently installed plugins, their versions, whether the .. [[[cog help(["plugins", "--help"]) .. ]]] + :: Usage: datasette plugins [OPTIONS] @@ -326,6 +330,7 @@ Would install the `datasette-cluster-map Date: Tue, 6 Sep 2022 16:50:43 -0700 Subject: [PATCH 0173/1218] truncate_cells_html now affects URLs too, refs #1805 --- datasette/utils/__init__.py | 10 ++++++++++ datasette/views/database.py | 11 ++++++++--- datasette/views/table.py | 8 ++++++-- tests/fixtures.py | 9 +++++---- tests/test_api.py | 2 +- tests/test_table_api.py | 11 +++++++---- tests/test_table_html.py | 11 +++++++++++ tests/test_utils.py | 20 ++++++++++++++++++++ 8 files changed, 68 insertions(+), 14 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index bbaa0510..2bdea673 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -1167,3 +1167,13 @@ def resolve_routes(routes, path): if match is not None: return match, view return None, None + + +def truncate_url(url, length): + if (not length) or (len(url) <= length): + return url + bits = url.rsplit(".", 1) + if len(bits) == 2 and 1 <= len(bits[1]) <= 4 and "/" not in bits[1]: + rest, ext = bits + return rest[: length - 1 - len(ext)] + "โ€ฆ." + ext + return url[: length - 1] + "โ€ฆ" diff --git a/datasette/views/database.py b/datasette/views/database.py index 77632b9d..fc344245 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -20,6 +20,7 @@ from datasette.utils import ( path_with_format, path_with_removed_args, sqlite3, + truncate_url, InvalidSql, ) from datasette.utils.asgi import AsgiFileDownload, NotFound, Response, Forbidden @@ -371,6 +372,7 @@ class QueryView(DataView): async def extra_template(): display_rows = [] + truncate_cells = self.ds.setting("truncate_cells_html") for row in results.rows if results else []: display_row = [] for column, value in zip(results.columns, row): @@ -396,9 +398,12 @@ class QueryView(DataView): if value in ("", None): display_value = Markup(" ") elif is_url(str(display_value).strip()): - display_value = Markup( - '{url}'.format( - url=escape(value.strip()) + display_value = markupsafe.Markup( + '{truncated_url}'.format( + url=markupsafe.escape(value.strip()), + truncated_url=markupsafe.escape( + truncate_url(value.strip(), truncate_cells) + ), ) ) elif isinstance(display_value, bytes): diff --git a/datasette/views/table.py b/datasette/views/table.py index 49c30c9c..60c092f9 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -24,6 +24,7 @@ from datasette.utils import ( path_with_removed_args, path_with_replaced_args, to_css_class, + truncate_url, urlsafe_components, value_as_boolean, ) @@ -966,8 +967,11 @@ async def display_columns_and_rows( display_value = markupsafe.Markup(" ") elif is_url(str(value).strip()): display_value = markupsafe.Markup( - '{url}'.format( - url=markupsafe.escape(value.strip()) + '{truncated_url}'.format( + url=markupsafe.escape(value.strip()), + truncated_url=markupsafe.escape( + truncate_url(value.strip(), truncate_cells) + ), ) ) elif column in table_metadata.get("units", {}) and value != "": diff --git a/tests/fixtures.py b/tests/fixtures.py index c145ac78..82d8452e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -598,23 +598,24 @@ CREATE TABLE roadside_attractions ( pk integer primary key, name text, address text, + url text, latitude real, longitude real ); INSERT INTO roadside_attractions VALUES ( - 1, "The Mystery Spot", "465 Mystery Spot Road, Santa Cruz, CA 95065", + 1, "The Mystery Spot", "465 Mystery Spot Road, Santa Cruz, CA 95065", "https://www.mysteryspot.com/", 37.0167, -122.0024 ); INSERT INTO roadside_attractions VALUES ( - 2, "Winchester Mystery House", "525 South Winchester Boulevard, San Jose, CA 95128", + 2, "Winchester Mystery House", "525 South Winchester Boulevard, San Jose, CA 95128", "https://winchestermysteryhouse.com/", 37.3184, -121.9511 ); INSERT INTO roadside_attractions VALUES ( - 3, "Burlingame Museum of PEZ Memorabilia", "214 California Drive, Burlingame, CA 94010", + 3, "Burlingame Museum of PEZ Memorabilia", "214 California Drive, Burlingame, CA 94010", null, 37.5793, -122.3442 ); INSERT INTO roadside_attractions VALUES ( - 4, "Bigfoot Discovery Museum", "5497 Highway 9, Felton, CA 95018", + 4, "Bigfoot Discovery Museum", "5497 Highway 9, Felton, CA 95018", "https://www.bigfootdiscoveryproject.com/", 37.0414, -122.0725 ); diff --git a/tests/test_api.py b/tests/test_api.py index f6db2f9d..7a2bf91f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -339,7 +339,7 @@ def test_database_page(app_client): }, { "name": "roadside_attractions", - "columns": ["pk", "name", "address", "latitude", "longitude"], + "columns": ["pk", "name", "address", "url", "latitude", "longitude"], "primary_keys": ["pk"], "count": 4, "hidden": False, diff --git a/tests/test_table_api.py b/tests/test_table_api.py index e56a72b5..0db04434 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -615,11 +615,12 @@ def test_table_through(app_client): response = app_client.get( '/fixtures/roadside_attractions.json?_through={"table":"roadside_attraction_characteristics","column":"characteristic_id","value":"1"}' ) - assert [ + assert response.json["rows"] == [ [ 3, "Burlingame Museum of PEZ Memorabilia", "214 California Drive, Burlingame, CA 94010", + None, 37.5793, -122.3442, ], @@ -627,13 +628,15 @@ def test_table_through(app_client): 4, "Bigfoot Discovery Museum", "5497 Highway 9, Felton, CA 95018", + "https://www.bigfootdiscoveryproject.com/", 37.0414, -122.0725, ], - ] == response.json["rows"] + ] + assert ( - 'where roadside_attraction_characteristics.characteristic_id = "1"' - == response.json["human_description_en"] + response.json["human_description_en"] + == 'where roadside_attraction_characteristics.characteristic_id = "1"' ) diff --git a/tests/test_table_html.py b/tests/test_table_html.py index f3808ea3..8e37468f 100644 --- a/tests/test_table_html.py +++ b/tests/test_table_html.py @@ -69,6 +69,17 @@ def test_table_cell_truncation(): td.string for td in table.findAll("td", {"class": "col-neighborhood-b352a7"}) ] + # URLs should be truncated too + response2 = client.get("/fixtures/roadside_attractions") + assert response2.status == 200 + table = Soup(response2.body, "html.parser").find("table") + tds = table.findAll("td", {"class": "col-url"}) + assert [str(td) for td in tds] == [ + '
', + '', + '', + '', + ] def test_add_filter_redirects(app_client): diff --git a/tests/test_utils.py b/tests/test_utils.py index df788767..d71a612d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -626,3 +626,23 @@ def test_tilde_encoding(original, expected): assert actual == expected # And test round-trip assert original == utils.tilde_decode(actual) + + +@pytest.mark.parametrize( + "url,length,expected", + ( + ("https://example.com/", 5, "httpโ€ฆ"), + ("https://example.com/foo/bar", 15, "https://examplโ€ฆ"), + ("https://example.com/foo/bar/baz.jpg", 30, "https://example.com/foo/baโ€ฆ.jpg"), + # Extensions longer than 4 characters are not treated specially: + ("https://example.com/foo/bar/baz.jpeg2", 30, "https://example.com/foo/bar/bโ€ฆ"), + ( + "https://example.com/foo/bar/baz.jpeg2", + None, + "https://example.com/foo/bar/baz.jpeg2", + ), + ), +) +def test_truncate_url(url, length, expected): + actual = utils.truncate_url(url, length) + assert actual == expected From 5aa359b86907d11b3ee601510775a85a90224da8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Sep 2022 16:58:30 -0700 Subject: [PATCH 0174/1218] Apply cell truncation on query page too, refs #1805 --- datasette/views/database.py | 7 ++++++- tests/test_html.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/datasette/views/database.py b/datasette/views/database.py index fc344245..affbc540 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -428,7 +428,12 @@ class QueryView(DataView): "" if len(value) == 1 else "s", ) ) - + else: + display_value = str(value) + if truncate_cells and len(display_value) > truncate_cells: + display_value = ( + display_value[:truncate_cells] + "\u2026" + ) display_row.append(display_value) display_rows.append(display_row) diff --git a/tests/test_html.py b/tests/test_html.py index d6e969ad..bf915247 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -186,6 +186,25 @@ def test_row_page_does_not_truncate(): ] +def test_query_page_truncates(): + with make_app_client(settings={"truncate_cells_html": 5}) as client: + response = client.get( + "/fixtures?" + + urllib.parse.urlencode( + { + "sql": "select 'this is longer than 5' as a, 'https://example.com/' as b" + } + ) + ) + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + tds = table.findAll("td") + assert [str(td) for td in tds] == [ + '', + '', + ] + + @pytest.mark.parametrize( "path,expected_classes", [ From bf8d84af5422606597be893cedd375020cb2b369 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 6 Sep 2022 20:34:59 -0700 Subject: [PATCH 0175/1218] word-wrap: anywhere on links in cells, refs #1805 --- datasette/static/app.css | 1 + 1 file changed, 1 insertion(+) diff --git a/datasette/static/app.css b/datasette/static/app.css index 712b9925..08b724f6 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -446,6 +446,7 @@ th { } table a:link { text-decoration: none; + word-wrap: anywhere; } .rows-and-columns td:before { display: block; From fb7e70d5e72a951efe4b29ad999d8915c032d021 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 9 Sep 2022 09:19:20 -0700 Subject: [PATCH 0176/1218] Database(is_mutable=) now defaults to True, closes #1808 Refs https://github.com/simonw/datasette-upload-dbs/issues/6 --- datasette/database.py | 3 +-- docs/internals.rst | 9 +++++---- tests/test_internals_database.py | 1 + tests/test_internals_datasette.py | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index fa558045..44467370 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -28,7 +28,7 @@ AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file")) class Database: def __init__( - self, ds, path=None, is_mutable=False, is_memory=False, memory_name=None + self, ds, path=None, is_mutable=True, is_memory=False, memory_name=None ): self.name = None self.route = None @@ -39,7 +39,6 @@ class Database: self.memory_name = memory_name if memory_name is not None: self.is_memory = True - self.is_mutable = True self.hash = None self.cached_size = None self._cached_table_counts = None diff --git a/docs/internals.rst b/docs/internals.rst index 20797e98..adeec1d8 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -426,12 +426,13 @@ The ``db`` parameter should be an instance of the ``datasette.database.Database` Database( datasette, path="path/to/my-new-database.db", - is_mutable=True, ) ) This will add a mutable database and serve it at ``/my-new-database``. +Use ``is_mutable=False`` to add an immutable database. + ``.add_database()`` returns the Database instance, with its name set as the ``database.name`` attribute. Any time you are working with a newly added database you should use the return value of ``.add_database()``, for example: .. code-block:: python @@ -671,8 +672,8 @@ Instances of the ``Database`` class can be used to execute queries against attac .. _database_constructor: -Database(ds, path=None, is_mutable=False, is_memory=False, memory_name=None) ----------------------------------------------------------------------------- +Database(ds, path=None, is_mutable=True, is_memory=False, memory_name=None) +--------------------------------------------------------------------------- The ``Database()`` constructor can be used by plugins, in conjunction with :ref:`datasette_add_database`, to create and register new databases. @@ -685,7 +686,7 @@ The arguments are as follows: Path to a SQLite database file on disk. ``is_mutable`` - boolean - Set this to ``True`` if it is possible that updates will be made to that database - otherwise Datasette will open it in immutable mode and any changes could cause undesired behavior. + Set this to ``False`` to cause Datasette to open the file in immutable mode. ``is_memory`` - boolean Use this to create non-shared memory connections. diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 551f67e1..9e81c1d6 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -499,6 +499,7 @@ def test_mtime_ns_is_none_for_memory(app_client): def test_is_mutable(app_client): + assert Database(app_client.ds, is_memory=True).is_mutable is True assert Database(app_client.ds, is_memory=True, is_mutable=True).is_mutable is True assert Database(app_client.ds, is_memory=True, is_mutable=False).is_mutable is False diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index 1dc14cab..249920fe 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -58,7 +58,7 @@ async def test_datasette_constructor(): "route": "_memory", "path": None, "size": 0, - "is_mutable": False, + "is_mutable": True, "is_memory": True, "hash": None, } From 610425460b519e9c16d386cb81aa081c9d730ef0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 10 Sep 2022 14:24:26 -0700 Subject: [PATCH 0177/1218] Add --nolock to the README Chrome demo Refs #1744 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1af20129..af95b85e 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ This will start a web server on port 8001 - visit http://localhost:8001/ to acce Use Chrome on OS X? You can run datasette against your browser history like so: - datasette ~/Library/Application\ Support/Google/Chrome/Default/History + datasette ~/Library/Application\ Support/Google/Chrome/Default/History --nolock Now visiting http://localhost:8001/History/downloads will show you a web interface to browse your downloads data: From b40872f5e5ae5dad331c58f75451e2d206565196 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 14 Sep 2022 14:31:54 -0700 Subject: [PATCH 0178/1218] prepare_jinja2_environment(datasette) argument, refs #1809 --- datasette/app.py | 2 +- datasette/hookspecs.py | 2 +- docs/plugin_hooks.rst | 9 +++++++-- tests/plugins/my_plugin.py | 3 ++- tests/test_plugins.py | 5 +++-- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index aeb81687..db686670 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -345,7 +345,7 @@ class Datasette: self.jinja_env.filters["escape_sqlite"] = escape_sqlite self.jinja_env.filters["to_css_class"] = to_css_class # pylint: disable=no-member - pm.hook.prepare_jinja2_environment(env=self.jinja_env) + pm.hook.prepare_jinja2_environment(env=self.jinja_env, datasette=self) self._register_renderers() self._permission_checks = collections.deque(maxlen=200) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index a5fb536f..34e19664 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -26,7 +26,7 @@ def prepare_connection(conn, database, datasette): @hookspec -def prepare_jinja2_environment(env): +def prepare_jinja2_environment(env, datasette): """Modify Jinja2 template environment e.g. register custom template tags""" diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 30bd75b7..62ec5c90 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -61,12 +61,15 @@ Examples: `datasette-jellyfish `_, for @@ -85,6 +88,8 @@ You can now use this filter in your custom templates like so:: Table name: {{ table|uppercase }} +Examples: `datasette-edit-templates `_ + .. _plugin_hook_extra_template_vars: extra_template_vars(template, database, table, columns, view_name, request, datasette) diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 53613b7d..d49a7a34 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -142,8 +142,9 @@ def extra_template_vars( @hookimpl -def prepare_jinja2_environment(env): +def prepare_jinja2_environment(env, datasette): env.filters["format_numeric"] = lambda s: f"{float(s):,.0f}" + env.filters["to_hello"] = lambda s: datasette._HELLO @hookimpl diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 948a40b8..590d88f6 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -545,11 +545,12 @@ def test_hook_register_output_renderer_can_render(app_client): @pytest.mark.asyncio async def test_hook_prepare_jinja2_environment(app_client): + app_client.ds._HELLO = "HI" template = app_client.ds.jinja_env.from_string( - "Hello there, {{ a|format_numeric }}", {"a": 3412341} + "Hello there, {{ a|format_numeric }}, {{ a|to_hello }}", {"a": 3412341} ) rendered = await app_client.ds.render_template(template) - assert "Hello there, 3,412,341" == rendered + assert "Hello there, 3,412,341, HI" == rendered def test_hook_publish_subcommand(): From 2ebcffe2226ece2a5a86722790d486a480338632 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 16 Sep 2022 12:50:52 -0700 Subject: [PATCH 0179/1218] Bump furo from 2022.6.21 to 2022.9.15 (#1812) Bumps [furo](https://github.com/pradyunsg/furo) from 2022.6.21 to 2022.9.15. - [Release notes](https://github.com/pradyunsg/furo/releases) - [Changelog](https://github.com/pradyunsg/furo/blob/main/docs/changelog.md) - [Commits](https://github.com/pradyunsg/furo/compare/2022.06.21...2022.09.15) --- updated-dependencies: - dependency-name: furo dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 92fa60d0..afcba1f0 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ setup( setup_requires=["pytest-runner"], extras_require={ "docs": [ - "furo==2022.6.21", + "furo==2022.9.15", "sphinx-autobuild", "codespell", "blacken-docs", From ddc999ad1296e8c69cffede3e367dda059b8adad Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 16 Sep 2022 20:38:15 -0700 Subject: [PATCH 0180/1218] Async support for prepare_jinja2_environment, closes #1809 --- datasette/app.py | 22 ++++++++++++++--- datasette/utils/testing.py | 1 + docs/plugin_hooks.rst | 2 ++ docs/testing_plugins.rst | 30 ++++++++++++++++++++++++ tests/fixtures.py | 1 + tests/plugins/my_plugin.py | 10 ++++++-- tests/plugins/my_plugin_2.py | 6 +++++ tests/test_internals_datasette_client.py | 6 +++-- tests/test_plugins.py | 6 +++-- tests/test_routes.py | 1 + 10 files changed, 76 insertions(+), 9 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index db686670..ea3e7b43 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -208,6 +208,7 @@ class Datasette: crossdb=False, nolock=False, ): + self._startup_invoked = False assert config_dir is None or isinstance( config_dir, Path ), "config_dir= should be a pathlib.Path" @@ -344,9 +345,6 @@ class Datasette: self.jinja_env.filters["quote_plus"] = urllib.parse.quote_plus self.jinja_env.filters["escape_sqlite"] = escape_sqlite self.jinja_env.filters["to_css_class"] = to_css_class - # pylint: disable=no-member - pm.hook.prepare_jinja2_environment(env=self.jinja_env, datasette=self) - self._register_renderers() self._permission_checks = collections.deque(maxlen=200) self._root_token = secrets.token_hex(32) @@ -389,8 +387,16 @@ class Datasette: return Urls(self) async def invoke_startup(self): + # This must be called for Datasette to be in a usable state + if self._startup_invoked: + return + for hook in pm.hook.prepare_jinja2_environment( + env=self.jinja_env, datasette=self + ): + await await_me_maybe(hook) for hook in pm.hook.startup(datasette=self): await await_me_maybe(hook) + self._startup_invoked = True def sign(self, value, namespace="default"): return URLSafeSerializer(self._secret, namespace).dumps(value) @@ -933,6 +939,8 @@ class Datasette: async def render_template( self, templates, context=None, request=None, view_name=None ): + if not self._startup_invoked: + raise Exception("render_template() called before await ds.invoke_startup()") context = context or {} if isinstance(templates, Template): template = templates @@ -1495,34 +1503,42 @@ class DatasetteClient: return path async def get(self, path, **kwargs): + await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.get(self._fix(path), **kwargs) async def options(self, path, **kwargs): + await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.options(self._fix(path), **kwargs) async def head(self, path, **kwargs): + await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.head(self._fix(path), **kwargs) async def post(self, path, **kwargs): + await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.post(self._fix(path), **kwargs) async def put(self, path, **kwargs): + await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.put(self._fix(path), **kwargs) async def patch(self, path, **kwargs): + await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.patch(self._fix(path), **kwargs) async def delete(self, path, **kwargs): + await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.delete(self._fix(path), **kwargs) async def request(self, method, path, **kwargs): + await self.ds.invoke_startup() avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) async with httpx.AsyncClient(app=self.app) as client: return await client.request( diff --git a/datasette/utils/testing.py b/datasette/utils/testing.py index 640c94e6..b28fc575 100644 --- a/datasette/utils/testing.py +++ b/datasette/utils/testing.py @@ -147,6 +147,7 @@ class TestClient: content_type=None, if_none_match=None, ): + await self.ds.invoke_startup() headers = headers or {} if content_type: headers["content-type"] = content_type diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 62ec5c90..f208e727 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -88,6 +88,8 @@ You can now use this filter in your custom templates like so:: Table name: {{ table|uppercase }} +This function can return an awaitable function if it needs to run any async code. + Examples: `datasette-edit-templates `_ .. _plugin_hook_extra_template_vars: diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index 992b4b0e..41f50e56 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -52,6 +52,36 @@ Then run the tests using pytest like so:: pytest +.. _testing_plugins_datasette_test_instance: + +Setting up a Datasette test instance +------------------------------------ + +The above example shows the easiest way to start writing tests against a Datasette instance: + +.. code-block:: python + + from datasette.app import Datasette + import pytest + + + @pytest.mark.asyncio + async def test_plugin_is_installed(): + datasette = Datasette(memory=True) + response = await datasette.client.get("/-/plugins.json") + assert response.status_code == 200 + +Creating a ``Datasette()`` instance like this as useful shortcut in tests, but there is one detail you need to be aware of. It's important to ensure that the async method ``.invoke_startup()`` is called on that instance. You can do that like this: + +.. code-block:: python + + datasette = Datasette(memory=True) + await datasette.invoke_startup() + +This method registers any :ref:`plugin_hook_startup` or :ref:`plugin_hook_prepare_jinja2_environment` plugins that might themselves need to make async calls. + +If you are using ``await datasette.client.get()`` and similar methods then you don't need to worry about this - those method calls ensure that ``.invoke_startup()`` has been called for you. + .. _testing_plugins_pdb: Using pdb for errors thrown inside Datasette diff --git a/tests/fixtures.py b/tests/fixtures.py index 82d8452e..5a875cd2 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -71,6 +71,7 @@ EXPECTED_PLUGINS = [ "handle_exception", "menu_links", "permission_allowed", + "prepare_jinja2_environment", "register_routes", "render_cell", "startup", diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index d49a7a34..1a41de38 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -143,8 +143,14 @@ def extra_template_vars( @hookimpl def prepare_jinja2_environment(env, datasette): - env.filters["format_numeric"] = lambda s: f"{float(s):,.0f}" - env.filters["to_hello"] = lambda s: datasette._HELLO + async def select_times_three(s): + db = datasette.get_database() + return (await db.execute("select 3 * ?", [int(s)])).first()[0] + + async def inner(): + env.filters["select_times_three"] = select_times_three + + return inner @hookimpl diff --git a/tests/plugins/my_plugin_2.py b/tests/plugins/my_plugin_2.py index 4df02343..cee80703 100644 --- a/tests/plugins/my_plugin_2.py +++ b/tests/plugins/my_plugin_2.py @@ -126,6 +126,12 @@ def permission_allowed(datasette, actor, action): return inner +@hookimpl +def prepare_jinja2_environment(env, datasette): + env.filters["format_numeric"] = lambda s: f"{float(s):,.0f}" + env.filters["to_hello"] = lambda s: datasette._HELLO + + @hookimpl def startup(datasette): async def inner(): diff --git a/tests/test_internals_datasette_client.py b/tests/test_internals_datasette_client.py index 8c5b5bd3..497bf475 100644 --- a/tests/test_internals_datasette_client.py +++ b/tests/test_internals_datasette_client.py @@ -1,10 +1,12 @@ from .fixtures import app_client import httpx import pytest +import pytest_asyncio -@pytest.fixture -def datasette(app_client): +@pytest_asyncio.fixture +async def datasette(app_client): + await app_client.ds.invoke_startup() return app_client.ds diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 590d88f6..0ae3abf3 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -546,11 +546,13 @@ def test_hook_register_output_renderer_can_render(app_client): @pytest.mark.asyncio async def test_hook_prepare_jinja2_environment(app_client): app_client.ds._HELLO = "HI" + await app_client.ds.invoke_startup() template = app_client.ds.jinja_env.from_string( - "Hello there, {{ a|format_numeric }}, {{ a|to_hello }}", {"a": 3412341} + "Hello there, {{ a|format_numeric }}, {{ a|to_hello }}, {{ b|select_times_three }}", + {"a": 3412341, "b": 5}, ) rendered = await app_client.ds.render_template(template) - assert "Hello there, 3,412,341, HI" == rendered + assert "Hello there, 3,412,341, HI, 15" == rendered def test_hook_publish_subcommand(): diff --git a/tests/test_routes.py b/tests/test_routes.py index 5ae55d21..d467abe1 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -59,6 +59,7 @@ def test_routes(routes, path, expected_class, expected_matches): @pytest_asyncio.fixture async def ds_with_route(): ds = Datasette() + await ds.invoke_startup() ds.remove_database("_memory") db = Database(ds, is_memory=True, memory_name="route-name-db") ds.add_database(db, name="original-name", route="custom-route-name") From df851c117db031dec50dd4ef1ca34745920ac77a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 19 Sep 2022 16:46:39 -0700 Subject: [PATCH 0181/1218] Validate settings.json keys on startup, closes #1816 Refs #1814 --- datasette/app.py | 4 ++++ tests/test_config_dir.py | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index ea3e7b43..8873ce28 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -292,6 +292,10 @@ class Datasette: raise StartupError("config.json should be renamed to settings.json") if config_dir and (config_dir / "settings.json").exists() and not settings: settings = json.loads((config_dir / "settings.json").read_text()) + # Validate those settings + for key in settings: + if key not in DEFAULT_SETTINGS: + raise StartupError("Invalid setting '{key}' in settings.json") self._settings = dict(DEFAULT_SETTINGS, **(settings or {})) self.renderers = {} # File extension -> (renderer, can_render) functions self.version_note = version_note diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py index fe927c42..e365515b 100644 --- a/tests/test_config_dir.py +++ b/tests/test_config_dir.py @@ -5,6 +5,7 @@ import pytest from datasette.app import Datasette from datasette.cli import cli from datasette.utils.sqlite import sqlite3 +from datasette.utils import StartupError from .fixtures import TestClient as _TestClient from click.testing import CliRunner @@ -27,9 +28,8 @@ body { margin-top: 3em} @pytest.fixture(scope="session") -def config_dir_client(tmp_path_factory): +def config_dir(tmp_path_factory): config_dir = tmp_path_factory.mktemp("config-dir") - plugins_dir = config_dir / "plugins" plugins_dir.mkdir() (plugins_dir / "hooray.py").write_text(PLUGIN, "utf-8") @@ -77,7 +77,23 @@ def config_dir_client(tmp_path_factory): ), "utf-8", ) + return config_dir + +def test_invalid_settings(config_dir): + previous = (config_dir / "settings.json").read_text("utf-8") + (config_dir / "settings.json").write_text( + json.dumps({"invalid": "invalid-setting"}), "utf-8" + ) + try: + with pytest.raises(StartupError): + ds = Datasette([], config_dir=config_dir) + finally: + (config_dir / "settings.json").write_text(previous, "utf-8") + + +@pytest.fixture(scope="session") +def config_dir_client(config_dir): ds = Datasette([], config_dir=config_dir) yield _TestClient(ds) From cb1e093fd361b758120aefc1a444df02462389a3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 19 Sep 2022 18:15:40 -0700 Subject: [PATCH 0182/1218] Fixed error message, closes #1816 --- datasette/app.py | 4 +++- tests/test_config_dir.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 8873ce28..03d1dacc 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -295,7 +295,9 @@ class Datasette: # Validate those settings for key in settings: if key not in DEFAULT_SETTINGS: - raise StartupError("Invalid setting '{key}' in settings.json") + raise StartupError( + "Invalid setting '{}' in settings.json".format(key) + ) self._settings = dict(DEFAULT_SETTINGS, **(settings or {})) self.renderers = {} # File extension -> (renderer, can_render) functions self.version_note = version_note diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py index e365515b..f5ecf0d6 100644 --- a/tests/test_config_dir.py +++ b/tests/test_config_dir.py @@ -86,8 +86,9 @@ def test_invalid_settings(config_dir): json.dumps({"invalid": "invalid-setting"}), "utf-8" ) try: - with pytest.raises(StartupError): + with pytest.raises(StartupError) as ex: ds = Datasette([], config_dir=config_dir) + assert ex.value.args[0] == "Invalid setting 'invalid' in settings.json" finally: (config_dir / "settings.json").write_text(previous, "utf-8") From 212137a90b4291db9605e039f198564dae59c5d0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 26 Sep 2022 14:14:25 -0700 Subject: [PATCH 0183/1218] Release 0.63a0 Refs #1786, #1787, #1789, #1794, #1800, #1804, #1805, #1808, #1809, #1816 --- datasette/version.py | 2 +- docs/changelog.rst | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 0453346c..e5ad585f 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.62" +__version__ = "0.63a0" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index f9dcc980..bd93f4cb 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,23 @@ Changelog ========= +.. _v0_63a0: + +0.63a0 (2022-09-26) +------------------- + +- The :ref:`plugin_hook_prepare_jinja2_environment` plugin hook now accepts an optional ``datasette`` argument. Hook implementations can also now return an ``async`` function which will be awaited automatically. (:issue:`1809`) +- ``--load-extension`` option now supports entrypoints. Thanks, Alex Garcia. (`#1789 `__) +- New tutorial: `Cleaning data with sqlite-utils and Datasette `__. +- Facet size can now be set per-table with the new ``facet_size`` table metadata option. (:issue:`1804`) +- ``truncate_cells_html`` setting now also affects long URLs in columns. (:issue:`1805`) +- ``Database(is_mutable=)`` now defaults to ``True``. (:issue:`1808`) +- Non-JavaScript textarea now increases height to fit the SQL query. (:issue:`1786`) +- More detailed command descriptions on the :ref:`CLI reference ` page. (:issue:`1787`) +- Datasette no longer enforces upper bounds on its depenedencies. (:issue:`1800`) +- Facets are now displayed with better line-breaks in long values. Thanks, Daniel Rech. (`#1794 `__) +- The ``settings.json`` file used in :ref:`config_dir` is now validated on startup. (:issue:`1816`) + .. _v0_62: 0.62 (2022-08-14) From 5f9f567acbc58c9fcd88af440e68034510fb5d2b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 26 Sep 2022 16:06:01 -0700 Subject: [PATCH 0184/1218] Show SQL query when reporting time limit error, closes #1819 --- datasette/database.py | 5 ++++- datasette/views/base.py | 21 +++++++++++++-------- tests/test_api.py | 12 +++++++++++- tests/test_html.py | 10 +++++++--- 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index 44467370..46094bd7 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -476,7 +476,10 @@ class WriteTask: class QueryInterrupted(Exception): - pass + def __init__(self, e, sql, params): + self.e = e + self.sql = sql + self.params = params class MultipleValues(Exception): diff --git a/datasette/views/base.py b/datasette/views/base.py index 221e1882..67aa3a42 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -1,10 +1,12 @@ import asyncio import csv import hashlib -import re import sys +import textwrap import time import urllib +from markupsafe import escape + import pint @@ -24,11 +26,9 @@ from datasette.utils import ( path_with_removed_args, path_with_format, sqlite3, - HASH_LENGTH, ) from datasette.utils.asgi import ( AsgiStream, - Forbidden, NotFound, Response, BadRequest, @@ -371,13 +371,18 @@ class DataView(BaseView): ) = response_or_template_contexts else: data, extra_template_data, templates = response_or_template_contexts - except QueryInterrupted: + except QueryInterrupted as ex: raise DatasetteError( - """ - SQL query took too long. The time limit is controlled by the + textwrap.dedent( + """ +

SQL query took too long. The time limit is controlled by the sql_time_limit_ms - configuration option. - """, + configuration option.

+
{}
+ """.format( + escape(ex.sql) + ) + ).strip(), title="SQL Interrupted", status=400, message_is_html=True, diff --git a/tests/test_api.py b/tests/test_api.py index 7a2bf91f..ad74d16e 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -656,7 +656,17 @@ def test_custom_sql(app_client): def test_sql_time_limit(app_client_shorter_time_limit): response = app_client_shorter_time_limit.get("/fixtures.json?sql=select+sleep(0.5)") assert 400 == response.status - assert "SQL Interrupted" == response.json["title"] + assert response.json == { + "ok": False, + "error": ( + "

SQL query took too long. The time limit is controlled by the\n" + 'sql_time_limit_ms\n' + "configuration option.

\n" + "
select sleep(0.5)
" + ), + "status": 400, + "title": "SQL Interrupted", + } def test_custom_sql_time_limit(app_client): diff --git a/tests/test_html.py b/tests/test_html.py index bf915247..a99b0b6c 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -168,10 +168,14 @@ def test_disallowed_custom_sql_pragma(app_client): def test_sql_time_limit(app_client_shorter_time_limit): response = app_client_shorter_time_limit.get("/fixtures?sql=select+sleep(0.5)") assert 400 == response.status - expected_html_fragment = """ + expected_html_fragments = [ + """ sql_time_limit_ms - """.strip() - assert expected_html_fragment in response.text + """.strip(), + "
select sleep(0.5)
", + ] + for expected_html_fragment in expected_html_fragments: + assert expected_html_fragment in response.text def test_row_page_does_not_truncate(): From 7fb4ea4e39a15e1f7d3202949794d98af1cfa272 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 27 Sep 2022 21:06:40 -0700 Subject: [PATCH 0185/1218] Update note about render_cell signature, refs #1826 --- docs/plugin_hooks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index f208e727..c9cab8ab 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -9,7 +9,7 @@ Each plugin can implement one or more hooks using the ``@hookimpl`` decorator ag When you implement a plugin hook you can accept any or all of the parameters that are documented as being passed to that hook. -For example, you can implement the ``render_cell`` plugin hook like this even though the full documented hook signature is ``render_cell(value, column, table, database, datasette)``: +For example, you can implement the ``render_cell`` plugin hook like this even though the full documented hook signature is ``render_cell(row, value, column, table, database, datasette)``: .. code-block:: python From 984b1df12cf19a6731889fc0665bb5f622e07b7c Mon Sep 17 00:00:00 2001 From: Adam Simpson Date: Wed, 28 Sep 2022 00:21:36 -0400 Subject: [PATCH 0186/1218] Add documentation for serving via OpenRC (#1825) * Add documentation for serving via OpenRC --- docs/deploying.rst | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/docs/deploying.rst b/docs/deploying.rst index d4ad8836..c8552758 100644 --- a/docs/deploying.rst +++ b/docs/deploying.rst @@ -74,18 +74,30 @@ Once the service has started you can confirm that Datasette is running on port 8 curl 127.0.0.1:8000/-/versions.json # Should output JSON showing the installed version -Datasette will not be accessible from outside the server because it is listening on ``127.0.0.1``. You can expose it by instead listening on ``0.0.0.0``, but a better way is to set up a proxy such as ``nginx``. +Datasette will not be accessible from outside the server because it is listening on ``127.0.0.1``. You can expose it by instead listening on ``0.0.0.0``, but a better way is to set up a proxy such as ``nginx`` - see :ref:`deploying_proxy`. -Ubuntu offer `a tutorial on installing nginx `__. Once it is installed you can add configuration to proxy traffic through to Datasette that looks like this:: +.. _deploying_openrc: - server { - server_name mysubdomain.myhost.net; +Running Datasette using OpenRC +=============================== +OpenRC is the service manager on non-systemd Linux distributions like `Alpine Linux `__ and `Gentoo `__. - location / { - proxy_pass http://127.0.0.1:8000/; - proxy_set_header Host $host; - } - } +Create an init script at ``/etc/init.d/datasette`` with the following contents: + +.. code-block:: sh + + #!/sbin/openrc-run + + name="datasette" + command="datasette" + command_args="serve -h 0.0.0.0 /path/to/db.db" + command_background=true + pidfile="/run/${RC_SVCNAME}.pid" + +You then need to configure the service to run at boot and start it:: + + rc-update add datasette + rc-service datasette start .. _deploying_buildpacks: From 34defdc10aa293294ca01cfab70780755447e1d7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 28 Sep 2022 17:39:36 -0700 Subject: [PATCH 0187/1218] Browse the plugins directory --- docs/writing_plugins.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/writing_plugins.rst b/docs/writing_plugins.rst index 01ee8c90..a3fc88ec 100644 --- a/docs/writing_plugins.rst +++ b/docs/writing_plugins.rst @@ -234,7 +234,7 @@ To avoid accidentally conflicting with a database file that may be loaded into D - ``/-/upload-excel`` -Try to avoid registering URLs that clash with other plugins that your users might have installed. There is no central repository of reserved URL paths (yet) but you can review existing plugins by browsing the `datasette-plugin topic `__ on GitHub. +Try to avoid registering URLs that clash with other plugins that your users might have installed. There is no central repository of reserved URL paths (yet) but you can review existing plugins by browsing the `plugins directory `. If your plugin includes functionality that relates to a specific database you could also register a URL route like this: From c92c4318e9892101f75fa158410c0a12c1d80b6e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 30 Sep 2022 10:55:40 -0700 Subject: [PATCH 0188/1218] Bump furo from 2022.9.15 to 2022.9.29 (#1827) Bumps [furo](https://github.com/pradyunsg/furo) from 2022.9.15 to 2022.9.29. - [Release notes](https://github.com/pradyunsg/furo/releases) - [Changelog](https://github.com/pradyunsg/furo/blob/main/docs/changelog.md) - [Commits](https://github.com/pradyunsg/furo/compare/2022.09.15...2022.09.29) --- updated-dependencies: - dependency-name: furo dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index afcba1f0..fe258adb 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ setup( setup_requires=["pytest-runner"], extras_require={ "docs": [ - "furo==2022.9.15", + "furo==2022.9.29", "sphinx-autobuild", "codespell", "blacken-docs", From 883e326dd6ef95f854f7750ef2d4b0e17082fa96 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 2 Oct 2022 14:26:16 -0700 Subject: [PATCH 0189/1218] Drop word-wrap: anywhere, refs #1828, #1805 --- datasette/static/app.css | 1 - 1 file changed, 1 deletion(-) diff --git a/datasette/static/app.css b/datasette/static/app.css index 08b724f6..712b9925 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -446,7 +446,6 @@ th { } table a:link { text-decoration: none; - word-wrap: anywhere; } .rows-and-columns td:before { display: block; From 4218c9cd742b79b1e3cb80878e42b7e39d16ded2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Oct 2022 11:45:36 -0700 Subject: [PATCH 0190/1218] reST markup fix --- docs/plugin_hooks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index c9cab8ab..832a76b0 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -268,7 +268,7 @@ you have one: def extra_js_urls(): return ["/-/static-plugins/your-plugin/app.js"] -Note that `your-plugin` here should be the hyphenated plugin name - the name that is displayed in the list on the `/-/plugins` debug page. +Note that ``your-plugin`` here should be the hyphenated plugin name - the name that is displayed in the list on the ``/-/plugins`` debug page. If your code uses `JavaScript modules `__ you should include the ``"module": True`` key. See :ref:`customization_css_and_javascript` for more details. From b6ba117b7978b58b40e3c3c2b723b92c3010ed53 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Oct 2022 18:25:52 -0700 Subject: [PATCH 0191/1218] Clarify request or None for two hooks --- docs/plugin_hooks.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 832a76b0..b61f953a 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1281,7 +1281,7 @@ menu_links(datasette, actor, request) ``actor`` - dictionary or None The currently authenticated :ref:`actor `. -``request`` - :ref:`internals_request` +``request`` - :ref:`internals_request` or None The current HTTP request. This can be ``None`` if the request object is not available. This hook allows additional items to be included in the menu displayed by Datasette's top right menu icon. @@ -1330,7 +1330,7 @@ table_actions(datasette, actor, database, table, request) ``table`` - string The name of the table. -``request`` - :ref:`internals_request` +``request`` - :ref:`internals_request` or None The current HTTP request. This can be ``None`` if the request object is not available. This hook allows table actions to be displayed in a menu accessed via an action icon at the top of the table page. It should return a list of ``{"href": "...", "label": "..."}`` menu items. From bbf33a763537a1d913180b22bd3b5fe4a5e5b252 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 4 Oct 2022 21:32:11 -0700 Subject: [PATCH 0192/1218] Test for bool(results), closes #1832 --- tests/test_internals_database.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 9e81c1d6..4e33beed 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -30,6 +30,14 @@ async def test_results_first(db): assert isinstance(row, sqlite3.Row) +@pytest.mark.asyncio +@pytest.mark.parametrize("expected", (True, False)) +async def test_results_bool(db, expected): + where = "" if expected else "where pk = 0" + results = await db.execute("select * from facetable {}".format(where)) + assert bool(results) is expected + + @pytest.mark.parametrize( "query,expected", [ From eff112498ecc499323c26612d707908831446d25 Mon Sep 17 00:00:00 2001 From: Forest Gregg Date: Thu, 6 Oct 2022 16:06:06 -0400 Subject: [PATCH 0193/1218] Useuse inspect data for hash and file size on startup Thanks, @fgregg Closes #1834 --- datasette/database.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index 46094bd7..d75bd70c 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -48,9 +48,13 @@ class Database: self._read_connection = None self._write_connection = None if not self.is_mutable and not self.is_memory: - p = Path(path) - self.hash = inspect_hash(p) - self.cached_size = p.stat().st_size + if self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.hash = self.ds.inspect_data[self.name]["hash"] + self.cached_size = self.ds.inspect_data[self.name]["size"] + else: + p = Path(path) + self.hash = inspect_hash(p) + self.cached_size = p.stat().st_size @property def cached_table_counts(self): From b7fec7f9020b79c1fe60cc5a2def86b50eeb5af9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 7 Oct 2022 16:03:09 -0700 Subject: [PATCH 0194/1218] .sqlite/.sqlite3 extensions for config directory mode Closes #1646 --- datasette/app.py | 5 ++++- docs/settings.rst | 2 +- tests/test_config_dir.py | 11 +++++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 03d1dacc..32a911c2 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -217,7 +217,10 @@ class Datasette: self._secret = secret or secrets.token_hex(32) self.files = tuple(files or []) + tuple(immutables or []) if config_dir: - self.files += tuple([str(p) for p in config_dir.glob("*.db")]) + db_files = [] + for ext in ("db", "sqlite", "sqlite3"): + db_files.extend(config_dir.glob("*.{}".format(ext))) + self.files += tuple(str(f) for f in db_files) if ( config_dir and (config_dir / "inspect-data.json").exists() diff --git a/docs/settings.rst b/docs/settings.rst index 8437fb04..a6d50543 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -46,7 +46,7 @@ Datasette will detect the files in that directory and automatically configure it The files that can be included in this directory are as follows. All are optional. -* ``*.db`` - SQLite database files that will be served by Datasette +* ``*.db`` (or ``*.sqlite3`` or ``*.sqlite``) - SQLite database files that will be served by Datasette * ``metadata.json`` - :ref:`metadata` for those databases - ``metadata.yaml`` or ``metadata.yml`` can be used as well * ``inspect-data.json`` - the result of running ``datasette inspect *.db --inspect-file=inspect-data.json`` from the configuration directory - any database files listed here will be treated as immutable, so they should not be changed while Datasette is running * ``settings.json`` - settings that would normally be passed using ``--setting`` - here they should be stored as a JSON object of key/value pairs diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py index f5ecf0d6..c2af3836 100644 --- a/tests/test_config_dir.py +++ b/tests/test_config_dir.py @@ -49,7 +49,7 @@ def config_dir(tmp_path_factory): (config_dir / "metadata.json").write_text(json.dumps(METADATA), "utf-8") (config_dir / "settings.json").write_text(json.dumps(SETTINGS), "utf-8") - for dbname in ("demo.db", "immutable.db"): + for dbname in ("demo.db", "immutable.db", "j.sqlite3", "k.sqlite"): db = sqlite3.connect(str(config_dir / dbname)) db.executescript( """ @@ -151,12 +151,11 @@ def test_databases(config_dir_client): response = config_dir_client.get("/-/databases.json") assert 200 == response.status databases = response.json - assert 2 == len(databases) + assert 4 == len(databases) databases.sort(key=lambda d: d["name"]) - assert "demo" == databases[0]["name"] - assert databases[0]["is_mutable"] - assert "immutable" == databases[1]["name"] - assert not databases[1]["is_mutable"] + for db, expected_name in zip(databases, ("demo", "immutable", "j", "k")): + assert expected_name == db["name"] + assert db["is_mutable"] == (expected_name != "immutable") @pytest.mark.parametrize("filename", ("metadata.yml", "metadata.yaml")) From 1a5e5f2aa951e5bd731067a49819efba68fbe8ef Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 13 Oct 2022 14:42:52 -0700 Subject: [PATCH 0195/1218] Refactor breadcrumbs to respect permissions, refs #1831 --- datasette/app.py | 40 ++++++++++++++++++++++ datasette/templates/_crumbs.html | 15 ++++++++ datasette/templates/base.html | 4 +-- datasette/templates/database.html | 9 ----- datasette/templates/error.html | 7 ---- datasette/templates/logout.html | 7 ---- datasette/templates/permissions_debug.html | 7 ---- datasette/templates/query.html | 8 ++--- datasette/templates/row.html | 9 ++--- datasette/templates/show_json.html | 7 ---- datasette/templates/table.html | 8 ++--- tests/test_permissions.py | 1 + tests/test_plugins.py | 2 +- 13 files changed, 65 insertions(+), 59 deletions(-) create mode 100644 datasette/templates/_crumbs.html diff --git a/datasette/app.py b/datasette/app.py index 32a911c2..5fa4955c 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -631,6 +631,44 @@ class Datasette: else: return [] + async def _crumb_items(self, request, table=None, database=None): + crumbs = [] + # Top-level link + if await self.permission_allowed( + actor=request.actor, action="view-instance", default=True + ): + crumbs.append({"href": self.urls.instance(), "label": "home"}) + # Database link + if database: + if await self.permission_allowed( + actor=request.actor, + action="view-database", + resource=database, + default=True, + ): + crumbs.append( + { + "href": self.urls.database(database), + "label": database, + } + ) + # Table link + if table: + assert database, "table= requires database=" + if await self.permission_allowed( + actor=request.actor, + action="view-table", + resource=(database, table), + default=True, + ): + crumbs.append( + { + "href": self.urls.table(database, table), + "label": table, + } + ) + return crumbs + async def permission_allowed(self, actor, action, resource=None, default=False): """Check permissions using the permissions_allowed plugin hook""" result = None @@ -1009,6 +1047,8 @@ class Datasette: template_context = { **context, **{ + "request": request, + "crumb_items": self._crumb_items, "urls": self.urls, "actor": request.actor if request else None, "menu_links": menu_links, diff --git a/datasette/templates/_crumbs.html b/datasette/templates/_crumbs.html new file mode 100644 index 00000000..bd1ff0da --- /dev/null +++ b/datasette/templates/_crumbs.html @@ -0,0 +1,15 @@ +{% macro nav(request, database=None, table=None) -%} +{% if crumb_items is defined %} + {% set items=crumb_items(request=request, database=database, table=table) %} + {% if items %} +

+ {% for item in items %} + {{ item.label }} + {% if not loop.last %} + / + {% endif %} + {% endfor %} +

+ {% endif %} +{% endif %} +{%- endmacro %} diff --git a/datasette/templates/base.html b/datasette/templates/base.html index c3a71acb..87c939ac 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -1,4 +1,4 @@ - +{% import "_crumbs.html" as crumbs with context %} {% block title %}{% endblock %} @@ -17,7 +17,7 @@
httpโ€ฆhttpโ€ฆ\xa0httpโ€ฆthis โ€ฆhttpโ€ฆ
+ Content-Type: application/json + Authorization: Bearer dstok_ + { + "row": { + "column1": "value1", + "column2": "value2" + } + } + +If successful, this will return a ``201`` status code and the newly inserted row, for example: + +.. code-block:: json + + { + "row": { + "id": 1, + "column1": "value1", + "column2": "value2" + } + } From f6ca86987ba9d7d48eccf2cfe0bfc94942003844 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 06:56:11 -0700 Subject: [PATCH 0227/1218] Delete mirror-master-and-main.yml Closes #1865 --- .github/workflows/mirror-master-and-main.yml | 21 -------------------- 1 file changed, 21 deletions(-) delete mode 100644 .github/workflows/mirror-master-and-main.yml diff --git a/.github/workflows/mirror-master-and-main.yml b/.github/workflows/mirror-master-and-main.yml deleted file mode 100644 index 8418df40..00000000 --- a/.github/workflows/mirror-master-and-main.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Mirror "master" and "main" branches -on: - push: - branches: - - master - - main - -jobs: - mirror: - runs-on: ubuntu-latest - steps: - - name: Mirror to "master" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: master - force: false - - name: Mirror to "main" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: main - force: false From 5f6be3c48b661f74198b8fc85361d3ad6657880e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 11:47:41 -0700 Subject: [PATCH 0228/1218] Better comment handling in SQL regex, refs #1860 --- datasette/utils/__init__.py | 9 +++++---- tests/test_utils.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 977a66d6..5acfb8b4 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -208,16 +208,16 @@ class InvalidSql(Exception): # Allow SQL to start with a /* */ or -- comment comment_re = ( # Start of string, then any amount of whitespace - r"^(\s*" + r"^\s*(" + # Comment that starts with -- and ends at a newline r"(?:\-\-.*?\n\s*)" + - # Comment that starts with /* and ends with */ - r"|(?:/\*[\s\S]*?\*/)" + # Comment that starts with /* and ends with */ - but does not have */ in it + r"|(?:\/\*((?!\*\/)[\s\S])*\*\/)" + # Whitespace - r")*\s*" + r"\s*)*\s*" ) allowed_sql_res = [ @@ -228,6 +228,7 @@ allowed_sql_res = [ re.compile(comment_re + r"explain\s+with\b"), re.compile(comment_re + r"explain\s+query\s+plan\s+with\b"), ] + allowed_pragmas = ( "database_list", "foreign_key_list", diff --git a/tests/test_utils.py b/tests/test_utils.py index e89f1e6b..c1589107 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -142,6 +142,7 @@ def test_custom_json_encoder(obj, expected): "PRAGMA case_sensitive_like = true", "SELECT * FROM pragma_not_on_allow_list('idx52')", "/* This comment is not valid. select 1", + "/**/\nupdate foo set bar = 1\n/* test */ select 1", ], ) def test_validate_sql_select_bad(bad_sql): From d2ca13b699d441a201c55cb72ff96919d3cd22bf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 11:50:54 -0700 Subject: [PATCH 0229/1218] Add test for /* multi line */ comment, refs #1860 --- tests/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index c1589107..8b64f865 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -174,6 +174,7 @@ def test_validate_sql_select_bad(bad_sql): " /* comment */\nselect 1", " /* comment */select 1", "/* comment */\n -- another\n /* one more */ select 1", + "/* This comment \n has multiple lines */\nselect 1", ], ) def test_validate_sql_select_good(good_sql): From 918f3561208ee58c44773d30e21bace7d7c7cf3b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 06:56:11 -0700 Subject: [PATCH 0230/1218] Delete mirror-master-and-main.yml Closes #1865 --- .github/workflows/mirror-master-and-main.yml | 21 -------------------- 1 file changed, 21 deletions(-) delete mode 100644 .github/workflows/mirror-master-and-main.yml diff --git a/.github/workflows/mirror-master-and-main.yml b/.github/workflows/mirror-master-and-main.yml deleted file mode 100644 index 8418df40..00000000 --- a/.github/workflows/mirror-master-and-main.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Mirror "master" and "main" branches -on: - push: - branches: - - master - - main - -jobs: - mirror: - runs-on: ubuntu-latest - steps: - - name: Mirror to "master" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: master - force: false - - name: Mirror to "main" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: main - force: false From b597bb6b3e7c4b449654bbfa5b01ceff3eb3cb33 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 11:47:41 -0700 Subject: [PATCH 0231/1218] Better comment handling in SQL regex, refs #1860 --- datasette/utils/__init__.py | 9 +++++---- tests/test_utils.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 977a66d6..5acfb8b4 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -208,16 +208,16 @@ class InvalidSql(Exception): # Allow SQL to start with a /* */ or -- comment comment_re = ( # Start of string, then any amount of whitespace - r"^(\s*" + r"^\s*(" + # Comment that starts with -- and ends at a newline r"(?:\-\-.*?\n\s*)" + - # Comment that starts with /* and ends with */ - r"|(?:/\*[\s\S]*?\*/)" + # Comment that starts with /* and ends with */ - but does not have */ in it + r"|(?:\/\*((?!\*\/)[\s\S])*\*\/)" + # Whitespace - r")*\s*" + r"\s*)*\s*" ) allowed_sql_res = [ @@ -228,6 +228,7 @@ allowed_sql_res = [ re.compile(comment_re + r"explain\s+with\b"), re.compile(comment_re + r"explain\s+query\s+plan\s+with\b"), ] + allowed_pragmas = ( "database_list", "foreign_key_list", diff --git a/tests/test_utils.py b/tests/test_utils.py index e89f1e6b..c1589107 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -142,6 +142,7 @@ def test_custom_json_encoder(obj, expected): "PRAGMA case_sensitive_like = true", "SELECT * FROM pragma_not_on_allow_list('idx52')", "/* This comment is not valid. select 1", + "/**/\nupdate foo set bar = 1\n/* test */ select 1", ], ) def test_validate_sql_select_bad(bad_sql): From 6958e21b5c2012adf5655d2512cb4106490d10f2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 11:50:54 -0700 Subject: [PATCH 0232/1218] Add test for /* multi line */ comment, refs #1860 --- tests/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index c1589107..8b64f865 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -174,6 +174,7 @@ def test_validate_sql_select_bad(bad_sql): " /* comment */\nselect 1", " /* comment */select 1", "/* comment */\n -- another\n /* one more */ select 1", + "/* This comment \n has multiple lines */\nselect 1", ], ) def test_validate_sql_select_good(good_sql): From a51608090b5ee37593078f71d18b33767ef3af79 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 12:06:18 -0700 Subject: [PATCH 0233/1218] Slight tweak to insert row API design, refs #1851 https://github.com/simonw/datasette/issues/1851#issuecomment-1292997608 --- datasette/views/table.py | 10 +++++----- docs/json_api.rst | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 74d1c532..056b7b04 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -131,11 +131,11 @@ class TableView(DataView): # TODO: handle form-encoded data raise BadRequest("Must send JSON data") data = json.loads(await request.post_body()) - if "row" not in data: - raise BadRequest('Must send "row" data') - row = data["row"] + if "insert" not in data: + raise BadRequest('Must send a "insert" key containing a dictionary') + row = data["insert"] if not isinstance(row, dict): - raise BadRequest("row must be a dictionary") + raise BadRequest("insert must be a dictionary") # Verify all columns exist columns = await db.table_columns(table_name) pks = await db.primary_keys(table_name) @@ -165,7 +165,7 @@ class TableView(DataView): ).first() return Response.json( { - "row": dict(new_row), + "inserted_row": dict(new_row), }, status=201, ) diff --git a/docs/json_api.rst b/docs/json_api.rst index b339a738..2ed8a354 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -476,7 +476,7 @@ This requires the :ref:`permissions_insert_row` permission. Content-Type: application/json Authorization: Bearer dstok_ { - "row": { + "insert": { "column1": "value1", "column2": "value2" } @@ -487,7 +487,7 @@ If successful, this will return a ``201`` status code and the newly inserted row .. code-block:: json { - "row": { + "inserted_row": { "id": 1, "column1": "value1", "column2": "value2" From a2a5dff709c6f1676ac30b5e734c2763002562cf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 12:08:26 -0700 Subject: [PATCH 0234/1218] Missing tests for insert row API, refs #1851 --- tests/test_api_write.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 tests/test_api_write.py diff --git a/tests/test_api_write.py b/tests/test_api_write.py new file mode 100644 index 00000000..86c221d0 --- /dev/null +++ b/tests/test_api_write.py @@ -0,0 +1,38 @@ +from datasette.app import Datasette +from datasette.utils import sqlite3 +import pytest +import time + + +@pytest.fixture +def ds_write(tmp_path_factory): + db_directory = tmp_path_factory.mktemp("dbs") + db_path = str(db_directory / "data.db") + db = sqlite3.connect(str(db_path)) + db.execute("vacuum") + db.execute("create table docs (id integer primary key, title text, score float)") + ds = Datasette([db_path]) + yield ds + db.close() + + +@pytest.mark.asyncio +async def test_write_row(ds_write): + token = "dstok_{}".format( + ds_write.sign( + {"a": "root", "token": "dstok", "t": int(time.time())}, namespace="token" + ) + ) + response = await ds_write.client.post( + "/data/docs", + json={"insert": {"title": "Test", "score": 1.0}}, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + expected_row = {"id": 1, "title": "Test", "score": 1.0} + assert response.status_code == 201 + assert response.json()["inserted_row"] == expected_row + rows = (await ds_write.get_database("data").execute("select * from docs")).rows + assert dict(rows[0]) == expected_row From 6e788b49edf4f842c0817f006eb9d865778eea5e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 13:17:18 -0700 Subject: [PATCH 0235/1218] New URL design /db/table/-/insert, refs #1851 --- datasette/app.py | 6 +++- datasette/views/table.py | 69 +++++++++++++++++++++++++++++++++++++++- docs/json_api.rst | 18 ++++++----- tests/test_api_write.py | 6 ++-- 4 files changed, 86 insertions(+), 13 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 894d7f0f..8bc5fe36 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -39,7 +39,7 @@ from .views.special import ( PermissionsDebugView, MessagesDebugView, ) -from .views.table import TableView +from .views.table import TableView, TableInsertView from .views.row import RowView from .renderer import json_renderer from .url_builder import Urls @@ -1262,6 +1262,10 @@ class Datasette: RowView.as_view(self), r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)(\.(?P\w+))?$", ) + add_route( + TableInsertView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/insert$", + ) return [ # Compile any strings to regular expressions ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) diff --git a/datasette/views/table.py b/datasette/views/table.py index 056b7b04..be3d4f93 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -30,7 +30,7 @@ from datasette.utils import ( ) from datasette.utils.asgi import BadRequest, Forbidden, NotFound, Response from datasette.filters import Filters -from .base import DataView, DatasetteError, ureg +from .base import BaseView, DataView, DatasetteError, ureg from .database import QueryView LINK_WITH_LABEL = ( @@ -1077,3 +1077,70 @@ async def display_columns_and_rows( } columns = [first_column] + columns return columns, cell_rows + + +class TableInsertView(BaseView): + name = "table-insert" + + def __init__(self, datasette): + self.ds = datasette + + async def post(self, request): + database_route = tilde_decode(request.url_vars["database"]) + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database_name = db.name + table_name = tilde_decode(request.url_vars["table"]) + # Table must exist (may handle table creation in the future) + db = self.ds.get_database(database_name) + if not await db.table_exists(table_name): + raise NotFound("Table not found: {}".format(table_name)) + # Must have insert-row permission + if not await self.ds.permission_allowed( + request.actor, "insert-row", resource=(database_name, table_name) + ): + raise Forbidden("Permission denied") + if request.headers.get("content-type") != "application/json": + # TODO: handle form-encoded data + raise BadRequest("Must send JSON data") + data = json.loads(await request.post_body()) + if "row" not in data: + raise BadRequest('Must send a "row" key containing a dictionary') + row = data["row"] + if not isinstance(row, dict): + raise BadRequest("row must be a dictionary") + # Verify all columns exist + columns = await db.table_columns(table_name) + pks = await db.primary_keys(table_name) + for key in row: + if key not in columns: + raise BadRequest("Column not found: {}".format(key)) + if key in pks: + raise BadRequest( + "Cannot insert into primary key column: {}".format(key) + ) + # Perform the insert + sql = "INSERT INTO [{table}] ({columns}) VALUES ({values})".format( + table=escape_sqlite(table_name), + columns=", ".join(escape_sqlite(c) for c in row), + values=", ".join("?" for c in row), + ) + cursor = await db.execute_write(sql, list(row.values())) + # Return the new row + rowid = cursor.lastrowid + new_row = ( + await db.execute( + "SELECT * FROM [{table}] WHERE rowid = ?".format( + table=escape_sqlite(table_name) + ), + [rowid], + ) + ).first() + return Response.json( + { + "inserted": [dict(new_row)], + }, + status=201, + ) diff --git a/docs/json_api.rst b/docs/json_api.rst index 2ed8a354..4a7961f2 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -463,7 +463,7 @@ The JSON write API Datasette provides a write API for JSON data. This is a POST-only API that requires an authenticated API token, see :ref:`CreateTokenView`. -.. _json_api_write_insert_row: +.. _TableInsertView: Inserting a single row ~~~~~~~~~~~~~~~~~~~~~~ @@ -472,11 +472,11 @@ This requires the :ref:`permissions_insert_row` permission. :: - POST //
+ POST //
/-/insert Content-Type: application/json Authorization: Bearer dstok_ { - "insert": { + "row": { "column1": "value1", "column2": "value2" } @@ -487,9 +487,11 @@ If successful, this will return a ``201`` status code and the newly inserted row .. code-block:: json { - "inserted_row": { - "id": 1, - "column1": "value1", - "column2": "value2" - } + "inserted": [ + { + "id": 1, + "column1": "value1", + "column2": "value2" + } + ] } diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 86c221d0..e8222e43 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -24,8 +24,8 @@ async def test_write_row(ds_write): ) ) response = await ds_write.client.post( - "/data/docs", - json={"insert": {"title": "Test", "score": 1.0}}, + "/data/docs/-/insert", + json={"row": {"title": "Test", "score": 1.0}}, headers={ "Authorization": "Bearer {}".format(token), "Content-Type": "application/json", @@ -33,6 +33,6 @@ async def test_write_row(ds_write): ) expected_row = {"id": 1, "title": "Test", "score": 1.0} assert response.status_code == 201 - assert response.json()["inserted_row"] == expected_row + assert response.json()["inserted"] == [expected_row] rows = (await ds_write.get_database("data").execute("select * from docs")).rows assert dict(rows[0]) == expected_row From b912d92b651c4f0b5137da924d135654511f0fe0 Mon Sep 17 00:00:00 2001 From: Forest Gregg Date: Thu, 27 Oct 2022 16:51:20 -0400 Subject: [PATCH 0236/1218] Make hash and size a lazy property (#1837) * use inspect data for hash and file size * make hash and cached_size lazy properties * move hash property near size --- datasette/database.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index d75bd70c..af1df0a8 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -39,7 +39,7 @@ class Database: self.memory_name = memory_name if memory_name is not None: self.is_memory = True - self.hash = None + self.cached_hash = None self.cached_size = None self._cached_table_counts = None self._write_thread = None @@ -47,14 +47,6 @@ class Database: # These are used when in non-threaded mode: self._read_connection = None self._write_connection = None - if not self.is_mutable and not self.is_memory: - if self.ds.inspect_data and self.ds.inspect_data.get(self.name): - self.hash = self.ds.inspect_data[self.name]["hash"] - self.cached_size = self.ds.inspect_data[self.name]["size"] - else: - p = Path(path) - self.hash = inspect_hash(p) - self.cached_size = p.stat().st_size @property def cached_table_counts(self): @@ -266,14 +258,34 @@ class Database: results = await self.execute_fn(sql_operation_in_thread) return results + @property + def hash(self): + if self.cached_hash is not None: + return self.cached_hash + elif self.is_mutable or self.is_memory: + return None + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_hash = self.ds.inspect_data[self.name]["hash"] + return self.cached_hash + else: + p = Path(self.path) + self.cached_hash = inspect_hash(p) + return self.cached_hash + @property def size(self): - if self.is_memory: - return 0 if self.cached_size is not None: return self.cached_size - else: + elif self.is_memory: + return 0 + elif self.is_mutable: return Path(self.path).stat().st_size + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_size = self.ds.inspect_data[self.name]["size"] + return self.cached_size + else: + self.cached_size = Path(self.path).stat().st_size + return self.cached_size async def table_counts(self, limit=10): if not self.is_mutable and self.cached_table_counts is not None: From 2c36e45447494cd7505440943367e29ec57c8e72 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 27 Oct 2022 13:51:45 -0700 Subject: [PATCH 0237/1218] Bump black from 22.8.0 to 22.10.0 (#1839) Bumps [black](https://github.com/psf/black) from 22.8.0 to 22.10.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/22.8.0...22.10.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fe258adb..625557ae 100644 --- a/setup.py +++ b/setup.py @@ -76,7 +76,7 @@ setup( "pytest-xdist>=2.2.1", "pytest-asyncio>=0.17", "beautifulsoup4>=4.8.1", - "black==22.8.0", + "black==22.10.0", "blacken-docs==1.12.1", "pytest-timeout>=1.4.2", "trustme>=0.7", From e5e0459a0b60608cb5e9ff83f6b41f59e6cafdfd Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 13:58:00 -0700 Subject: [PATCH 0238/1218] Release notes for 0.63, refs #1869 --- docs/changelog.rst | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 2255dcce..01957e4f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,36 +4,42 @@ Changelog ========= -.. _v0_63a1: +.. _v0_63: -0.63a1 (2022-10-23) -------------------- +0.63 (2022-10-27) +----------------- +Features +~~~~~~~~ + +- Now tested against Python 3.11. Docker containers used by ``datasette publish`` and ``datasette package`` both now use that version of Python. (:issue:`1853`) +- ``--load-extension`` option now supports entrypoints. Thanks, Alex Garcia. (`#1789 `__) +- Facet size can now be set per-table with the new ``facet_size`` table metadata option. (:issue:`1804`) +- The :ref:`setting_truncate_cells_html` setting now also affects long URLs in columns. (:issue:`1805`) +- The non-JavaScript SQL editor textarea now increases height to fit the SQL query. (:issue:`1786`) +- Facets are now displayed with better line-breaks in long values. Thanks, Daniel Rech. (`#1794 `__) +- The ``settings.json`` file used in :ref:`config_dir` is now validated on startup. (:issue:`1816`) +- SQL queries can now include leading SQL comments, using ``/* ... */`` or ``-- ...`` syntax. Thanks, Charles Nepote. (:issue:`1860`) - SQL query is now re-displayed when terminated with a time limit error. (:issue:`1819`) -- New documentation on :ref:`deploying_openrc` - thanks, Adam Simpson. (`#1825 `__) - The :ref:`inspect data ` mechanism is now used to speed up server startup - thanks, Forest Gregg. (:issue:`1834`) - In :ref:`config_dir` databases with filenames ending in ``.sqlite`` or ``.sqlite3`` are now automatically added to the Datasette instance. (:issue:`1646`) - Breadcrumb navigation display now respects the current user's permissions. (:issue:`1831`) -- Screenshots in the documentation are now maintained using `shot-scraper `__, as described in `Automating screenshots for the Datasette documentation using shot-scraper `__. (:issue:`1844`) -- The :ref:`datasette.check_visibility() ` method now accepts an optional ``permissions=`` list, allowing it to take multiple permissions into account at once when deciding if something should be shown as public or private. This has been used to correctly display padlock icons in more places in the Datasette interface. (:issue:`1829`) - -.. _v0_63a0: - -0.63a0 (2022-09-26) -------------------- +Plugin hooks and internals +~~~~~~~~~~~~~~~~~~~~~~~~~~ - The :ref:`plugin_hook_prepare_jinja2_environment` plugin hook now accepts an optional ``datasette`` argument. Hook implementations can also now return an ``async`` function which will be awaited automatically. (:issue:`1809`) -- ``--load-extension`` option now supports entrypoints. Thanks, Alex Garcia. (`#1789 `__) -- New tutorial: `Cleaning data with sqlite-utils and Datasette `__. -- Facet size can now be set per-table with the new ``facet_size`` table metadata option. (:issue:`1804`) -- ``truncate_cells_html`` setting now also affects long URLs in columns. (:issue:`1805`) - ``Database(is_mutable=)`` now defaults to ``True``. (:issue:`1808`) -- Non-JavaScript textarea now increases height to fit the SQL query. (:issue:`1786`) -- More detailed command descriptions on the :ref:`CLI reference ` page. (:issue:`1787`) +- The :ref:`datasette.check_visibility() ` method now accepts an optional ``permissions=`` list, allowing it to take multiple permissions into account at once when deciding if something should be shown as public or private. This has been used to correctly display padlock icons in more places in the Datasette interface. (:issue:`1829`) - Datasette no longer enforces upper bounds on its dependencies. (:issue:`1800`) -- Facets are now displayed with better line-breaks in long values. Thanks, Daniel Rech. (`#1794 `__) -- The ``settings.json`` file used in :ref:`config_dir` is now validated on startup. (:issue:`1816`) + +Documentation +~~~~~~~~~~~~~ + +- New tutorial: `Cleaning data with sqlite-utils and Datasette `__. +- Screenshots in the documentation are now maintained using `shot-scraper `__, as described in `Automating screenshots for the Datasette documentation using shot-scraper `__. (:issue:`1844`) +- More detailed command descriptions on the :ref:`CLI reference ` page. (:issue:`1787`) +- New documentation on :ref:`deploying_openrc` - thanks, Adam Simpson. (`#1825 `__) .. _v0_62: From bf00b0b59b6692bdec597ac9db4e0b497c5a47b4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 15:11:26 -0700 Subject: [PATCH 0239/1218] Release 0.63 Refs #1646, #1786, #1787, #1789, #1794, #1800, #1804, #1805, #1808, #1809, #1816, #1819, #1825, #1829, #1831, #1834, #1844, #1853, #1860 Closes #1869 --- datasette/version.py | 2 +- docs/changelog.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index eb36da45..ac012640 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63a1" +__version__ = "0.63" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 01957e4f..f573afb3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,8 @@ Changelog 0.63 (2022-10-27) ----------------- +See `Datasette 0.63: The annotated release notes `__ for more background on the changes in this release. + Features ~~~~~~~~ From 2ea60e12d90b7cec03ebab728854d3ec4d553f54 Mon Sep 17 00:00:00 2001 From: Forest Gregg Date: Thu, 27 Oct 2022 16:51:20 -0400 Subject: [PATCH 0240/1218] Make hash and size a lazy property (#1837) * use inspect data for hash and file size * make hash and cached_size lazy properties * move hash property near size --- datasette/database.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index d75bd70c..af1df0a8 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -39,7 +39,7 @@ class Database: self.memory_name = memory_name if memory_name is not None: self.is_memory = True - self.hash = None + self.cached_hash = None self.cached_size = None self._cached_table_counts = None self._write_thread = None @@ -47,14 +47,6 @@ class Database: # These are used when in non-threaded mode: self._read_connection = None self._write_connection = None - if not self.is_mutable and not self.is_memory: - if self.ds.inspect_data and self.ds.inspect_data.get(self.name): - self.hash = self.ds.inspect_data[self.name]["hash"] - self.cached_size = self.ds.inspect_data[self.name]["size"] - else: - p = Path(path) - self.hash = inspect_hash(p) - self.cached_size = p.stat().st_size @property def cached_table_counts(self): @@ -266,14 +258,34 @@ class Database: results = await self.execute_fn(sql_operation_in_thread) return results + @property + def hash(self): + if self.cached_hash is not None: + return self.cached_hash + elif self.is_mutable or self.is_memory: + return None + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_hash = self.ds.inspect_data[self.name]["hash"] + return self.cached_hash + else: + p = Path(self.path) + self.cached_hash = inspect_hash(p) + return self.cached_hash + @property def size(self): - if self.is_memory: - return 0 if self.cached_size is not None: return self.cached_size - else: + elif self.is_memory: + return 0 + elif self.is_mutable: return Path(self.path).stat().st_size + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_size = self.ds.inspect_data[self.name]["size"] + return self.cached_size + else: + self.cached_size = Path(self.path).stat().st_size + return self.cached_size async def table_counts(self, limit=10): if not self.is_mutable and self.cached_table_counts is not None: From 641bc4453b5ef1dff0b2fc7dfad0b692be7aa61c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 27 Oct 2022 13:51:45 -0700 Subject: [PATCH 0241/1218] Bump black from 22.8.0 to 22.10.0 (#1839) Bumps [black](https://github.com/psf/black) from 22.8.0 to 22.10.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/22.8.0...22.10.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fe258adb..625557ae 100644 --- a/setup.py +++ b/setup.py @@ -76,7 +76,7 @@ setup( "pytest-xdist>=2.2.1", "pytest-asyncio>=0.17", "beautifulsoup4>=4.8.1", - "black==22.8.0", + "black==22.10.0", "blacken-docs==1.12.1", "pytest-timeout>=1.4.2", "trustme>=0.7", From 26af9b9c4a6c62ee15870caa1c7bc455165d3b11 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 13:58:00 -0700 Subject: [PATCH 0242/1218] Release notes for 0.63, refs #1869 --- docs/changelog.rst | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 2255dcce..01957e4f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,36 +4,42 @@ Changelog ========= -.. _v0_63a1: +.. _v0_63: -0.63a1 (2022-10-23) -------------------- +0.63 (2022-10-27) +----------------- +Features +~~~~~~~~ + +- Now tested against Python 3.11. Docker containers used by ``datasette publish`` and ``datasette package`` both now use that version of Python. (:issue:`1853`) +- ``--load-extension`` option now supports entrypoints. Thanks, Alex Garcia. (`#1789 `__) +- Facet size can now be set per-table with the new ``facet_size`` table metadata option. (:issue:`1804`) +- The :ref:`setting_truncate_cells_html` setting now also affects long URLs in columns. (:issue:`1805`) +- The non-JavaScript SQL editor textarea now increases height to fit the SQL query. (:issue:`1786`) +- Facets are now displayed with better line-breaks in long values. Thanks, Daniel Rech. (`#1794 `__) +- The ``settings.json`` file used in :ref:`config_dir` is now validated on startup. (:issue:`1816`) +- SQL queries can now include leading SQL comments, using ``/* ... */`` or ``-- ...`` syntax. Thanks, Charles Nepote. (:issue:`1860`) - SQL query is now re-displayed when terminated with a time limit error. (:issue:`1819`) -- New documentation on :ref:`deploying_openrc` - thanks, Adam Simpson. (`#1825 `__) - The :ref:`inspect data ` mechanism is now used to speed up server startup - thanks, Forest Gregg. (:issue:`1834`) - In :ref:`config_dir` databases with filenames ending in ``.sqlite`` or ``.sqlite3`` are now automatically added to the Datasette instance. (:issue:`1646`) - Breadcrumb navigation display now respects the current user's permissions. (:issue:`1831`) -- Screenshots in the documentation are now maintained using `shot-scraper `__, as described in `Automating screenshots for the Datasette documentation using shot-scraper `__. (:issue:`1844`) -- The :ref:`datasette.check_visibility() ` method now accepts an optional ``permissions=`` list, allowing it to take multiple permissions into account at once when deciding if something should be shown as public or private. This has been used to correctly display padlock icons in more places in the Datasette interface. (:issue:`1829`) - -.. _v0_63a0: - -0.63a0 (2022-09-26) -------------------- +Plugin hooks and internals +~~~~~~~~~~~~~~~~~~~~~~~~~~ - The :ref:`plugin_hook_prepare_jinja2_environment` plugin hook now accepts an optional ``datasette`` argument. Hook implementations can also now return an ``async`` function which will be awaited automatically. (:issue:`1809`) -- ``--load-extension`` option now supports entrypoints. Thanks, Alex Garcia. (`#1789 `__) -- New tutorial: `Cleaning data with sqlite-utils and Datasette `__. -- Facet size can now be set per-table with the new ``facet_size`` table metadata option. (:issue:`1804`) -- ``truncate_cells_html`` setting now also affects long URLs in columns. (:issue:`1805`) - ``Database(is_mutable=)`` now defaults to ``True``. (:issue:`1808`) -- Non-JavaScript textarea now increases height to fit the SQL query. (:issue:`1786`) -- More detailed command descriptions on the :ref:`CLI reference ` page. (:issue:`1787`) +- The :ref:`datasette.check_visibility() ` method now accepts an optional ``permissions=`` list, allowing it to take multiple permissions into account at once when deciding if something should be shown as public or private. This has been used to correctly display padlock icons in more places in the Datasette interface. (:issue:`1829`) - Datasette no longer enforces upper bounds on its dependencies. (:issue:`1800`) -- Facets are now displayed with better line-breaks in long values. Thanks, Daniel Rech. (`#1794 `__) -- The ``settings.json`` file used in :ref:`config_dir` is now validated on startup. (:issue:`1816`) + +Documentation +~~~~~~~~~~~~~ + +- New tutorial: `Cleaning data with sqlite-utils and Datasette `__. +- Screenshots in the documentation are now maintained using `shot-scraper `__, as described in `Automating screenshots for the Datasette documentation using shot-scraper `__. (:issue:`1844`) +- More detailed command descriptions on the :ref:`CLI reference ` page. (:issue:`1787`) +- New documentation on :ref:`deploying_openrc` - thanks, Adam Simpson. (`#1825 `__) .. _v0_62: From 61171f01549549e5fb25c72b13280d941d96dbf1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 15:11:26 -0700 Subject: [PATCH 0243/1218] Release 0.63 Refs #1646, #1786, #1787, #1789, #1794, #1800, #1804, #1805, #1808, #1809, #1816, #1819, #1825, #1829, #1831, #1834, #1844, #1853, #1860 Closes #1869 --- datasette/version.py | 2 +- docs/changelog.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index eb36da45..ac012640 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63a1" +__version__ = "0.63" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 01957e4f..f573afb3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,8 @@ Changelog 0.63 (2022-10-27) ----------------- +See `Datasette 0.63: The annotated release notes `__ for more background on the changes in this release. + Features ~~~~~~~~ From c9b5f5d598e7f85cd3e1ce020351a27da334408b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 17:58:36 -0700 Subject: [PATCH 0244/1218] Depend on sqlite-utils>=3.30 Decided to use the most recent version in case I decide later to use the flatten() utility function. Refs #1850 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 625557ae..99e2a4ad 100644 --- a/setup.py +++ b/setup.py @@ -57,6 +57,7 @@ setup( "PyYAML>=5.3", "mergedeep>=1.1.1", "itsdangerous>=1.1", + "sqlite-utils>=3.30", ], entry_points=""" [console_scripts] From c35859ae3df163406f1a1895ccf9803e933b2d8e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 29 Oct 2022 23:03:45 -0700 Subject: [PATCH 0245/1218] API for bulk inserts, closes #1866 --- datasette/app.py | 5 ++ datasette/views/table.py | 136 +++++++++++++++++++++---------- docs/cli-reference.rst | 2 + docs/json_api.rst | 48 ++++++++++- docs/settings.rst | 11 +++ tests/test_api.py | 1 + tests/test_api_write.py | 168 +++++++++++++++++++++++++++++++++++++-- 7 files changed, 320 insertions(+), 51 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 8bc5fe36..f80d3792 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -99,6 +99,11 @@ SETTINGS = ( 1000, "Maximum rows that can be returned from a table or custom query", ), + Setting( + "max_insert_rows", + 100, + "Maximum rows that can be inserted at a time using the bulk insert API", + ), Setting( "num_sql_threads", 3, diff --git a/datasette/views/table.py b/datasette/views/table.py index be3d4f93..fd203036 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -30,6 +30,7 @@ from datasette.utils import ( ) from datasette.utils.asgi import BadRequest, Forbidden, NotFound, Response from datasette.filters import Filters +import sqlite_utils from .base import BaseView, DataView, DatasetteError, ureg from .database import QueryView @@ -1085,62 +1086,109 @@ class TableInsertView(BaseView): def __init__(self, datasette): self.ds = datasette + async def _validate_data(self, request, db, table_name): + errors = [] + + def _errors(errors): + return None, errors, {} + + if request.headers.get("content-type") != "application/json": + # TODO: handle form-encoded data + return _errors(["Invalid content-type, must be application/json"]) + body = await request.post_body() + try: + data = json.loads(body) + except json.JSONDecodeError as e: + return _errors(["Invalid JSON: {}".format(e)]) + if not isinstance(data, dict): + return _errors(["JSON must be a dictionary"]) + keys = data.keys() + # keys must contain "row" or "rows" + if "row" not in keys and "rows" not in keys: + return _errors(['JSON must have one or other of "row" or "rows"']) + rows = [] + if "row" in keys: + if "rows" in keys: + return _errors(['Cannot use "row" and "rows" at the same time']) + row = data["row"] + if not isinstance(row, dict): + return _errors(['"row" must be a dictionary']) + rows = [row] + data["return_rows"] = True + else: + rows = data["rows"] + if not isinstance(rows, list): + return _errors(['"rows" must be a list']) + for row in rows: + if not isinstance(row, dict): + return _errors(['"rows" must be a list of dictionaries']) + # Does this exceed max_insert_rows? + max_insert_rows = self.ds.setting("max_insert_rows") + if len(rows) > max_insert_rows: + return _errors( + ["Too many rows, maximum allowed is {}".format(max_insert_rows)] + ) + # Validate columns of each row + columns = await db.table_columns(table_name) + # TODO: There are cases where pks are OK, if not using auto-incrementing pk + pks = await db.primary_keys(table_name) + allowed_columns = set(columns) - set(pks) + for i, row in enumerate(rows): + invalid_columns = set(row.keys()) - allowed_columns + if invalid_columns: + errors.append( + "Row {} has invalid columns: {}".format( + i, ", ".join(sorted(invalid_columns)) + ) + ) + if errors: + return _errors(errors) + extra = {key: data[key] for key in data if key not in ("rows", "row")} + return rows, errors, extra + async def post(self, request): + def _error(messages, status=400): + return Response.json({"ok": False, "errors": messages}, status=status) + database_route = tilde_decode(request.url_vars["database"]) try: db = self.ds.get_database(route=database_route) except KeyError: - raise NotFound("Database not found: {}".format(database_route)) + return _error(["Database not found: {}".format(database_route)], 404) database_name = db.name table_name = tilde_decode(request.url_vars["table"]) + # Table must exist (may handle table creation in the future) db = self.ds.get_database(database_name) if not await db.table_exists(table_name): - raise NotFound("Table not found: {}".format(table_name)) + return _error(["Table not found: {}".format(table_name)], 404) # Must have insert-row permission if not await self.ds.permission_allowed( request.actor, "insert-row", resource=(database_name, table_name) ): - raise Forbidden("Permission denied") - if request.headers.get("content-type") != "application/json": - # TODO: handle form-encoded data - raise BadRequest("Must send JSON data") - data = json.loads(await request.post_body()) - if "row" not in data: - raise BadRequest('Must send a "row" key containing a dictionary') - row = data["row"] - if not isinstance(row, dict): - raise BadRequest("row must be a dictionary") - # Verify all columns exist - columns = await db.table_columns(table_name) - pks = await db.primary_keys(table_name) - for key in row: - if key not in columns: - raise BadRequest("Column not found: {}".format(key)) - if key in pks: - raise BadRequest( - "Cannot insert into primary key column: {}".format(key) + return _error(["Permission denied"], 403) + rows, errors, extra = await self._validate_data(request, db, table_name) + if errors: + return _error(errors, 400) + + should_return = bool(extra.get("return_rows", False)) + # Insert rows + def insert_rows(conn): + table = sqlite_utils.Database(conn)[table_name] + if should_return: + rowids = [] + for row in rows: + rowids.append(table.insert(row).last_rowid) + return list( + table.rows_where( + "rowid in ({})".format(",".join("?" for _ in rowids)), rowids + ) ) - # Perform the insert - sql = "INSERT INTO [{table}] ({columns}) VALUES ({values})".format( - table=escape_sqlite(table_name), - columns=", ".join(escape_sqlite(c) for c in row), - values=", ".join("?" for c in row), - ) - cursor = await db.execute_write(sql, list(row.values())) - # Return the new row - rowid = cursor.lastrowid - new_row = ( - await db.execute( - "SELECT * FROM [{table}] WHERE rowid = ?".format( - table=escape_sqlite(table_name) - ), - [rowid], - ) - ).first() - return Response.json( - { - "inserted": [dict(new_row)], - }, - status=201, - ) + else: + table.insert_all(rows) + + rows = await db.execute_write_fn(insert_rows) + result = {"ok": True} + if should_return: + result["inserted"] = rows + return Response.json(result, status=201) diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 56156568..649a3dcd 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -213,6 +213,8 @@ These can be passed to ``datasette serve`` using ``datasette serve --setting nam (default=100) max_returned_rows Maximum rows that can be returned from a table or custom query (default=1000) + max_insert_rows Maximum rows that can be inserted at a time using + the bulk insert API (default=1000) num_sql_threads Number of threads in the thread pool for executing SQLite queries (default=3) sql_time_limit_ms Time limit for a SQL query in milliseconds diff --git a/docs/json_api.rst b/docs/json_api.rst index 4a7961f2..01558c23 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -465,11 +465,13 @@ Datasette provides a write API for JSON data. This is a POST-only API that requi .. _TableInsertView: -Inserting a single row -~~~~~~~~~~~~~~~~~~~~~~ +Inserting rows +~~~~~~~~~~~~~~ This requires the :ref:`permissions_insert_row` permission. +A single row can be inserted using the ``"row"`` key: + :: POST //
/-/insert @@ -495,3 +497,45 @@ If successful, this will return a ``201`` status code and the newly inserted row } ] } + +To insert multiple rows at a time, use the same API method but send a list of dictionaries as the ``"rows"`` key: + +:: + + POST //
/-/insert + Content-Type: application/json + Authorization: Bearer dstok_ + { + "rows": [ + { + "column1": "value1", + "column2": "value2" + }, + { + "column1": "value3", + "column2": "value4" + } + ] + } + +If successful, this will return a ``201`` status code and an empty ``{}`` response body. + +To return the newly inserted rows, add the ``"return_rows": true`` key to the request body: + +.. code-block:: json + + { + "rows": [ + { + "column1": "value1", + "column2": "value2" + }, + { + "column1": "value3", + "column2": "value4" + } + ], + "return_rows": true + } + +This will return the same ``"inserted"`` key as the single row example above. There is a small performance penalty for using this option. diff --git a/docs/settings.rst b/docs/settings.rst index a990c78c..b86b18bd 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -96,6 +96,17 @@ You can increase or decrease this limit like so:: datasette mydatabase.db --setting max_returned_rows 2000 +.. _setting_max_insert_rows: + +max_insert_rows +~~~~~~~~~~~~~~~ + +Maximum rows that can be inserted at a time using the bulk insert API, see :ref:`TableInsertView`. Defaults to 100. + +You can increase or decrease this limit like so:: + + datasette mydatabase.db --setting max_insert_rows 1000 + .. _setting_num_sql_threads: num_sql_threads diff --git a/tests/test_api.py b/tests/test_api.py index fc171421..ebd675b9 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -804,6 +804,7 @@ def test_settings_json(app_client): "facet_suggest_time_limit_ms": 50, "facet_time_limit_ms": 200, "max_returned_rows": 100, + "max_insert_rows": 100, "sql_time_limit_ms": 200, "allow_download": True, "allow_signed_tokens": True, diff --git a/tests/test_api_write.py b/tests/test_api_write.py index e8222e43..4a5a58aa 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -18,11 +18,7 @@ def ds_write(tmp_path_factory): @pytest.mark.asyncio async def test_write_row(ds_write): - token = "dstok_{}".format( - ds_write.sign( - {"a": "root", "token": "dstok", "t": int(time.time())}, namespace="token" - ) - ) + token = write_token(ds_write) response = await ds_write.client.post( "/data/docs/-/insert", json={"row": {"title": "Test", "score": 1.0}}, @@ -36,3 +32,165 @@ async def test_write_row(ds_write): assert response.json()["inserted"] == [expected_row] rows = (await ds_write.get_database("data").execute("select * from docs")).rows assert dict(rows[0]) == expected_row + + +@pytest.mark.asyncio +@pytest.mark.parametrize("return_rows", (True, False)) +async def test_write_rows(ds_write, return_rows): + token = write_token(ds_write) + data = {"rows": [{"title": "Test {}".format(i), "score": 1.0} for i in range(20)]} + if return_rows: + data["return_rows"] = True + response = await ds_write.client.post( + "/data/docs/-/insert", + json=data, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + assert response.status_code == 201 + actual_rows = [ + dict(r) + for r in ( + await ds_write.get_database("data").execute("select * from docs") + ).rows + ] + assert len(actual_rows) == 20 + assert actual_rows == [ + {"id": i + 1, "title": "Test {}".format(i), "score": 1.0} for i in range(20) + ] + assert response.json()["ok"] is True + if return_rows: + assert response.json()["inserted"] == actual_rows + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "path,input,special_case,expected_status,expected_errors", + ( + ( + "/data2/docs/-/insert", + {}, + None, + 404, + ["Database not found: data2"], + ), + ( + "/data/docs2/-/insert", + {}, + None, + 404, + ["Table not found: docs2"], + ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"} for i in range(10)]}, + "bad_token", + 403, + ["Permission denied"], + ), + ( + "/data/docs/-/insert", + {}, + "invalid_json", + 400, + [ + "Invalid JSON: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)" + ], + ), + ( + "/data/docs/-/insert", + {}, + "invalid_content_type", + 400, + ["Invalid content-type, must be application/json"], + ), + ( + "/data/docs/-/insert", + [], + None, + 400, + ["JSON must be a dictionary"], + ), + ( + "/data/docs/-/insert", + {"row": "blah"}, + None, + 400, + ['"row" must be a dictionary'], + ), + ( + "/data/docs/-/insert", + {"blah": "blah"}, + None, + 400, + ['JSON must have one or other of "row" or "rows"'], + ), + ( + "/data/docs/-/insert", + {"rows": "blah"}, + None, + 400, + ['"rows" must be a list'], + ), + ( + "/data/docs/-/insert", + {"rows": ["blah"]}, + None, + 400, + ['"rows" must be a list of dictionaries'], + ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"} for i in range(101)]}, + None, + 400, + ["Too many rows, maximum allowed is 100"], + ), + # Validate columns of each row + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test", "bad": 1, "worse": 2} for i in range(2)]}, + None, + 400, + [ + "Row 0 has invalid columns: bad, worse", + "Row 1 has invalid columns: bad, worse", + ], + ), + ), +) +async def test_write_row_errors( + ds_write, path, input, special_case, expected_status, expected_errors +): + token = write_token(ds_write) + if special_case == "bad_token": + token += "bad" + kwargs = dict( + json=input, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "text/plain" + if special_case == "invalid_content_type" + else "application/json", + }, + ) + if special_case == "invalid_json": + del kwargs["json"] + kwargs["content"] = "{bad json" + response = await ds_write.client.post( + path, + **kwargs, + ) + assert response.status_code == expected_status + assert response.json()["ok"] is False + assert response.json()["errors"] == expected_errors + + +def write_token(ds): + return "dstok_{}".format( + ds.sign( + {"a": "root", "token": "dstok", "t": int(time.time())}, namespace="token" + ) + ) From f6bf2d8045cc239fe34357342bff1440561c8909 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 29 Oct 2022 23:20:11 -0700 Subject: [PATCH 0246/1218] Initial prototype of API explorer at /-/api, refs #1871 --- datasette/app.py | 5 ++ datasette/templates/api_explorer.html | 73 +++++++++++++++++++++++++++ datasette/views/special.py | 8 +++ tests/test_docs.py | 2 +- 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 datasette/templates/api_explorer.html diff --git a/datasette/app.py b/datasette/app.py index f80d3792..c3d802a4 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -33,6 +33,7 @@ from .views.special import ( JsonDataView, PatternPortfolioView, AuthTokenView, + ApiExplorerView, CreateTokenView, LogoutView, AllowDebugView, @@ -1235,6 +1236,10 @@ class Datasette: CreateTokenView.as_view(self), r"/-/create-token$", ) + add_route( + ApiExplorerView.as_view(self), + r"/-/api$", + ) add_route( LogoutView.as_view(self), r"/-/logout$", diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html new file mode 100644 index 00000000..034bee60 --- /dev/null +++ b/datasette/templates/api_explorer.html @@ -0,0 +1,73 @@ +{% extends "base.html" %} + +{% block title %}API Explorer{% endblock %} + +{% block content %} + +

API Explorer

+ +

Use this tool to try out the Datasette write API.

+ +{% if errors %} + {% for error in errors %} +

{{ error }}

+ {% endfor %} +{% endif %} + + +
+ + +
+
+ + +
+
+ +
+

+ + + + +{% endblock %} diff --git a/datasette/views/special.py b/datasette/views/special.py index b754a2f0..9922a621 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -235,3 +235,11 @@ class CreateTokenView(BaseView): "token_bits": token_bits, }, ) + + +class ApiExplorerView(BaseView): + name = "api_explorer" + has_json_alternate = False + + async def get(self, request): + return await self.render(["api_explorer.html"], request) diff --git a/tests/test_docs.py b/tests/test_docs.py index cd5a6c13..e9b813fe 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -62,7 +62,7 @@ def documented_views(): if first_word.endswith("View"): view_labels.add(first_word) # We deliberately don't document these: - view_labels.update(("PatternPortfolioView", "AuthTokenView")) + view_labels.update(("PatternPortfolioView", "AuthTokenView", "ApiExplorerView")) return view_labels From 9eb9ffae3ddd4e8ff0b713bf6fd6a0afed3368d7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 30 Oct 2022 13:09:55 -0700 Subject: [PATCH 0247/1218] Drop API token requirement from API explorer, refs #1871 --- datasette/default_permissions.py | 9 +++++++++ datasette/templates/api_explorer.html | 13 ++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 87684e2a..151ba2b5 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -131,3 +131,12 @@ def register_commands(cli): if debug: click.echo("\nDecoded:\n") click.echo(json.dumps(ds.unsign(token, namespace="token"), indent=2)) + + +@hookimpl +def skip_csrf(scope): + # Skip CSRF check for requests with content-type: application/json + if scope["type"] == "http": + headers = scope.get("headers") or {} + if dict(headers).get(b"content-type") == b"application/json": + return True diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html index 034bee60..01b182d8 100644 --- a/datasette/templates/api_explorer.html +++ b/datasette/templates/api_explorer.html @@ -15,16 +15,13 @@ {% endif %}
-
- - -
- +
-
- +
+ +

@@ -46,7 +43,6 @@ form.addEventListener("submit", (ev) => { var formData = new FormData(form); var json = formData.get('json'); var path = formData.get('path'); - var token = formData.get('token'); // Validate JSON try { var data = JSON.parse(json); @@ -60,7 +56,6 @@ form.addEventListener("submit", (ev) => { body: json, headers: { 'Content-Type': 'application/json', - 'Authorization': `Bearer ${token}` } }).then(r => r.json()).then(r => { alert(JSON.stringify(r, null, 2)); From fedbfcc36873366143195d8fe124e1859bf88346 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 30 Oct 2022 14:49:07 -0700 Subject: [PATCH 0248/1218] Neater display of output and errors in API explorer, refs #1871 --- datasette/templates/api_explorer.html | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html index 01b182d8..38fdb7bc 100644 --- a/datasette/templates/api_explorer.html +++ b/datasette/templates/api_explorer.html @@ -26,6 +26,12 @@

+ + """.format( escape(ex.sql) ) diff --git a/tests/test_api.py b/tests/test_api.py index ad74d16e..4027a7a5 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -662,7 +662,11 @@ def test_sql_time_limit(app_client_shorter_time_limit): "

SQL query took too long. The time limit is controlled by the\n" 'sql_time_limit_ms\n' "configuration option.

\n" - "
select sleep(0.5)
" + '\n' + "" ), "status": 400, "title": "SQL Interrupted", diff --git a/tests/test_html.py b/tests/test_html.py index 4b394199..7cfe9d90 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -172,7 +172,7 @@ def test_sql_time_limit(app_client_shorter_time_limit): """ sql_time_limit_ms """.strip(), - "
select sleep(0.5)
", + '', ] for expected_html_fragment in expected_html_fragments: assert expected_html_fragment in response.text From 93a02281dad2f23da84210f6ae9c63777ad8af5e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 1 Nov 2022 10:22:26 -0700 Subject: [PATCH 0253/1218] Show interrupted query in resizing textarea, closes #1876 --- datasette/views/base.py | 6 +++++- tests/test_api.py | 6 +++++- tests/test_html.py | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index 67aa3a42..6b01fdd2 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -378,7 +378,11 @@ class DataView(BaseView):

SQL query took too long. The time limit is controlled by the sql_time_limit_ms configuration option.

-
{}
+ + """.format( escape(ex.sql) ) diff --git a/tests/test_api.py b/tests/test_api.py index ebd675b9..de0223e2 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -662,7 +662,11 @@ def test_sql_time_limit(app_client_shorter_time_limit): "

SQL query took too long. The time limit is controlled by the\n" 'sql_time_limit_ms\n' "configuration option.

\n" - "
select sleep(0.5)
" + '\n' + "" ), "status": 400, "title": "SQL Interrupted", diff --git a/tests/test_html.py b/tests/test_html.py index 4b394199..7cfe9d90 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -172,7 +172,7 @@ def test_sql_time_limit(app_client_shorter_time_limit): """ sql_time_limit_ms """.strip(), - "
select sleep(0.5)
", + '', ] for expected_html_fragment in expected_html_fragments: assert expected_html_fragment in response.text From 9bec7c38eb93cde5afb16df9bdd96aea2a5b0459 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 1 Nov 2022 11:07:59 -0700 Subject: [PATCH 0254/1218] ignore and replace options for bulk inserts, refs #1873 Also removed the rule that you cannot include primary keys in the rows you insert. And added validation that catches invalid parameters in the incoming JSON. And renamed "inserted" to "rows" in the returned JSON for return_rows: true --- datasette/views/table.py | 41 ++++++++++++++------ docs/json_api.rst | 4 +- tests/test_api_write.py | 83 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 111 insertions(+), 17 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 1e3d566e..7692a4e3 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1107,6 +1107,7 @@ class TableInsertView(BaseView): if not isinstance(data, dict): return _errors(["JSON must be a dictionary"]) keys = data.keys() + # keys must contain "row" or "rows" if "row" not in keys and "rows" not in keys: return _errors(['JSON must have one or other of "row" or "rows"']) @@ -1126,19 +1127,31 @@ class TableInsertView(BaseView): for row in rows: if not isinstance(row, dict): return _errors(['"rows" must be a list of dictionaries']) + # Does this exceed max_insert_rows? max_insert_rows = self.ds.setting("max_insert_rows") if len(rows) > max_insert_rows: return _errors( ["Too many rows, maximum allowed is {}".format(max_insert_rows)] ) + + # Validate other parameters + extras = { + key: value for key, value in data.items() if key not in ("row", "rows") + } + valid_extras = {"return_rows", "ignore", "replace"} + invalid_extras = extras.keys() - valid_extras + if invalid_extras: + return _errors( + ['Invalid parameter: "{}"'.format('", "'.join(sorted(invalid_extras)))] + ) + if extras.get("ignore") and extras.get("replace"): + return _errors(['Cannot use "ignore" and "replace" at the same time']) + # Validate columns of each row - columns = await db.table_columns(table_name) - # TODO: There are cases where pks are OK, if not using auto-incrementing pk - pks = await db.primary_keys(table_name) - allowed_columns = set(columns) - set(pks) + columns = set(await db.table_columns(table_name)) for i, row in enumerate(rows): - invalid_columns = set(row.keys()) - allowed_columns + invalid_columns = set(row.keys()) - columns if invalid_columns: errors.append( "Row {} has invalid columns: {}".format( @@ -1147,8 +1160,7 @@ class TableInsertView(BaseView): ) if errors: return _errors(errors) - extra = {key: data[key] for key in data if key not in ("rows", "row")} - return rows, errors, extra + return rows, errors, extras async def post(self, request): database_route = tilde_decode(request.url_vars["database"]) @@ -1168,18 +1180,23 @@ class TableInsertView(BaseView): request.actor, "insert-row", resource=(database_name, table_name) ): return _error(["Permission denied"], 403) - rows, errors, extra = await self._validate_data(request, db, table_name) + rows, errors, extras = await self._validate_data(request, db, table_name) if errors: return _error(errors, 400) - should_return = bool(extra.get("return_rows", False)) + ignore = extras.get("ignore") + replace = extras.get("replace") + + should_return = bool(extras.get("return_rows", False)) # Insert rows def insert_rows(conn): table = sqlite_utils.Database(conn)[table_name] if should_return: rowids = [] for row in rows: - rowids.append(table.insert(row).last_rowid) + rowids.append( + table.insert(row, ignore=ignore, replace=replace).last_rowid + ) return list( table.rows_where( "rowid in ({})".format(",".join("?" for _ in rowids)), @@ -1187,12 +1204,12 @@ class TableInsertView(BaseView): ) ) else: - table.insert_all(rows) + table.insert_all(rows, ignore=ignore, replace=replace) rows = await db.execute_write_fn(insert_rows) result = {"ok": True} if should_return: - result["inserted"] = rows + result["rows"] = rows return Response.json(result, status=201) diff --git a/docs/json_api.rst b/docs/json_api.rst index da4500ab..34c13211 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -489,7 +489,7 @@ If successful, this will return a ``201`` status code and the newly inserted row .. code-block:: json { - "inserted": [ + "rows": [ { "id": 1, "column1": "value1", @@ -538,7 +538,7 @@ To return the newly inserted rows, add the ``"return_rows": true`` key to the re "return_rows": true } -This will return the same ``"inserted"`` key as the single row example above. There is a small performance penalty for using this option. +This will return the same ``"rows"`` key as the single row example above. There is a small performance penalty for using this option. .. _RowDeleteView: diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 1cfba104..d0b0f324 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -37,7 +37,7 @@ async def test_write_row(ds_write): ) expected_row = {"id": 1, "title": "Test", "score": 1.0} assert response.status_code == 201 - assert response.json()["inserted"] == [expected_row] + assert response.json()["rows"] == [expected_row] rows = (await ds_write.get_database("data").execute("select * from docs")).rows assert dict(rows[0]) == expected_row @@ -70,7 +70,7 @@ async def test_write_rows(ds_write, return_rows): ] assert response.json()["ok"] is True if return_rows: - assert response.json()["inserted"] == actual_rows + assert response.json()["rows"] == actual_rows @pytest.mark.asyncio @@ -156,6 +156,27 @@ async def test_write_rows(ds_write, return_rows): 400, ["Too many rows, maximum allowed is 100"], ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"}], "ignore": True, "replace": True}, + None, + 400, + ['Cannot use "ignore" and "replace" at the same time'], + ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"}], "invalid_param": True}, + None, + 400, + ['Invalid parameter: "invalid_param"'], + ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"}], "one": True, "two": True}, + None, + 400, + ['Invalid parameter: "one", "two"'], + ), # Validate columns of each row ( "/data/docs/-/insert", @@ -196,6 +217,62 @@ async def test_write_row_errors( assert response.json()["errors"] == expected_errors +@pytest.mark.asyncio +@pytest.mark.parametrize( + "ignore,replace,expected_rows", + ( + ( + True, + False, + [ + {"id": 1, "title": "Exists", "score": None}, + ], + ), + ( + False, + True, + [ + {"id": 1, "title": "One", "score": None}, + ], + ), + ), +) +@pytest.mark.parametrize("should_return", (True, False)) +async def test_insert_ignore_replace( + ds_write, ignore, replace, expected_rows, should_return +): + await ds_write.get_database("data").execute_write( + "insert into docs (id, title) values (1, 'Exists')" + ) + token = write_token(ds_write) + data = {"rows": [{"id": 1, "title": "One"}]} + if ignore: + data["ignore"] = True + if replace: + data["replace"] = True + if should_return: + data["return_rows"] = True + response = await ds_write.client.post( + "/data/docs/-/insert", + json=data, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + assert response.status_code == 201 + actual_rows = [ + dict(r) + for r in ( + await ds_write.get_database("data").execute("select * from docs") + ).rows + ] + assert actual_rows == expected_rows + assert response.json()["ok"] is True + if should_return: + assert response.json()["rows"] == expected_rows + + @pytest.mark.asyncio @pytest.mark.parametrize("scenario", ("no_token", "no_perm", "bad_table", "has_perm")) async def test_delete_row(ds_write, scenario): @@ -217,7 +294,7 @@ async def test_delete_row(ds_write, scenario): }, ) assert insert_response.status_code == 201 - pk = insert_response.json()["inserted"][0]["id"] + pk = insert_response.json()["rows"][0]["id"] path = "/data/{}/{}/-/delete".format( "docs" if scenario != "bad_table" else "bad_table", pk From 497290beaf32e6b779f9683ef15f1c5bc142a41a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 1 Nov 2022 12:59:17 -0700 Subject: [PATCH 0255/1218] Handle database errors in /-/insert, refs #1866, #1873 Also improved API explorer to show HTTP status of response, refs #1871 --- datasette/templates/api_explorer.html | 14 +++++++++----- datasette/views/table.py | 5 ++++- tests/test_api_write.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html index 38fdb7bc..93bacde3 100644 --- a/datasette/templates/api_explorer.html +++ b/datasette/templates/api_explorer.html @@ -27,7 +27,8 @@ @@ -64,12 +65,15 @@ form.addEventListener("submit", (ev) => { headers: { 'Content-Type': 'application/json', } - }).then(r => r.json()).then(r => { + }).then(r => { + document.getElementById('response-status').textContent = r.status; + return r.json(); + }).then(data => { var errorList = output.querySelector('.errors'); - if (r.errors) { + if (data.errors) { errorList.style.display = 'block'; errorList.innerHTML = ''; - r.errors.forEach(error => { + data.errors.forEach(error => { var li = document.createElement('li'); li.textContent = error; errorList.appendChild(li); @@ -77,7 +81,7 @@ form.addEventListener("submit", (ev) => { } else { errorList.style.display = 'none'; } - output.querySelector('pre').innerText = JSON.stringify(r, null, 2); + output.querySelector('pre').innerText = JSON.stringify(data, null, 2); output.style.display = 'block'; }).catch(err => { alert("Error: " + err); diff --git a/datasette/views/table.py b/datasette/views/table.py index 7692a4e3..61227206 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1206,7 +1206,10 @@ class TableInsertView(BaseView): else: table.insert_all(rows, ignore=ignore, replace=replace) - rows = await db.execute_write_fn(insert_rows) + try: + rows = await db.execute_write_fn(insert_rows) + except Exception as e: + return _error([str(e)]) result = {"ok": True} if should_return: result["rows"] = rows diff --git a/tests/test_api_write.py b/tests/test_api_write.py index d0b0f324..0b567f48 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -156,6 +156,13 @@ async def test_write_rows(ds_write, return_rows): 400, ["Too many rows, maximum allowed is 100"], ), + ( + "/data/docs/-/insert", + {"rows": [{"id": 1, "title": "Test"}]}, + "duplicate_id", + 400, + ["UNIQUE constraint failed: docs.id"], + ), ( "/data/docs/-/insert", {"rows": [{"title": "Test"}], "ignore": True, "replace": True}, @@ -194,6 +201,10 @@ async def test_write_row_errors( ds_write, path, input, special_case, expected_status, expected_errors ): token = write_token(ds_write) + if special_case == "duplicate_id": + await ds_write.get_database("data").execute_write( + "insert into docs (id) values (1)" + ) if special_case == "bad_token": token += "bad" kwargs = dict( From 0b166befc0096fca30d71e19608a928d59c331a4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 1 Nov 2022 17:31:22 -0700 Subject: [PATCH 0256/1218] API explorer can now do GET, has JSON syntax highlighting Refs #1871 --- .../static/json-format-highlight-1.0.1.js | 43 +++++++++++ datasette/templates/api_explorer.html | 77 +++++++++++++++---- 2 files changed, 103 insertions(+), 17 deletions(-) create mode 100644 datasette/static/json-format-highlight-1.0.1.js diff --git a/datasette/static/json-format-highlight-1.0.1.js b/datasette/static/json-format-highlight-1.0.1.js new file mode 100644 index 00000000..e87c76e1 --- /dev/null +++ b/datasette/static/json-format-highlight-1.0.1.js @@ -0,0 +1,43 @@ +/* +https://github.com/luyilin/json-format-highlight +From https://unpkg.com/json-format-highlight@1.0.1/dist/json-format-highlight.js +MIT Licensed +*/ +(function (global, factory) { + typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() : + typeof define === 'function' && define.amd ? define(factory) : + (global.jsonFormatHighlight = factory()); +}(this, (function () { 'use strict'; + +var defaultColors = { + keyColor: 'dimgray', + numberColor: 'lightskyblue', + stringColor: 'lightcoral', + trueColor: 'lightseagreen', + falseColor: '#f66578', + nullColor: 'cornflowerblue' +}; + +function index (json, colorOptions) { + if ( colorOptions === void 0 ) colorOptions = {}; + + if (!json) { return; } + if (typeof json !== 'string') { + json = JSON.stringify(json, null, 2); + } + var colors = Object.assign({}, defaultColors, colorOptions); + json = json.replace(/&/g, '&').replace(//g, '>'); + return json.replace(/("(\\u[a-zA-Z0-9]{4}|\\[^u]|[^\\"])*"(\s*:)?|\b(true|false|null)\b|-?\d+(?:\.\d*)?(?:[eE][+]?\d+)?)/g, function (match) { + var color = colors.numberColor; + if (/^"/.test(match)) { + color = /:$/.test(match) ? colors.keyColor : colors.stringColor; + } else { + color = /true/.test(match) ? colors.trueColor : /false/.test(match) ? colors.falseColor : /null/.test(match) ? colors.nullColor : color; + } + return ("" + match + ""); + }); +} + +return index; + +}))); diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html index 93bacde3..de5337e3 100644 --- a/datasette/templates/api_explorer.html +++ b/datasette/templates/api_explorer.html @@ -2,6 +2,10 @@ {% block title %}API Explorer{% endblock %} +{% block extra_head %} + +{% endblock %} + {% block content %}

API Explorer

@@ -14,17 +18,30 @@ {% endfor %} {% endif %} -
-
- - -
-
- - -
-

- +
+ GET +
+
+ + + +
+ +
+
+ POST +
+
+ + +
+
+ + +
+

+ +
{% else %} - {% if not canned_write and not error %} + {% if not canned_query_write and not error %}

0 results

{% endif %} {% endif %} diff --git a/datasette/views/database.py b/datasette/views/database.py index 0770a380..658c35e6 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1,4 +1,3 @@ -from asyncinject import Registry from dataclasses import dataclass, field from typing import Callable from urllib.parse import parse_qsl, urlencode @@ -33,7 +32,7 @@ from datasette.utils import ( from datasette.utils.asgi import AsgiFileDownload, NotFound, Response, Forbidden from datasette.plugins import pm -from .base import BaseView, DatasetteError, DataView, View, _error, stream_csv +from .base import BaseView, DatasetteError, View, _error, stream_csv class DatabaseView(View): @@ -57,7 +56,7 @@ class DatabaseView(View): sql = (request.args.get("sql") or "").strip() if sql: - return await query_view(request, datasette) + return await QueryView()(request, datasette) if format_ not in ("html", "json"): raise NotFound("Invalid format: {}".format(format_)) @@ -65,10 +64,6 @@ class DatabaseView(View): metadata = (datasette.metadata("databases") or {}).get(database, {}) datasette.update_with_inherited_metadata(metadata) - table_counts = await db.table_counts(5) - hidden_table_names = set(await db.hidden_table_names()) - all_foreign_keys = await db.get_all_foreign_keys() - sql_views = [] for view_name in await db.view_names(): view_visible, view_private = await datasette.check_visibility( @@ -196,8 +191,13 @@ class QueryContext: # urls: dict = field( # metadata={"help": "Object containing URL helpers like `database()`"} # ) - canned_write: bool = field( - metadata={"help": "Boolean indicating if this canned query allows writes"} + canned_query_write: bool = field( + metadata={ + "help": "Boolean indicating if this is a canned query that allows writes" + } + ) + metadata: dict = field( + metadata={"help": "Metadata about the database or the canned query"} ) db_is_immutable: bool = field( metadata={"help": "Boolean indicating if this database is immutable"} @@ -232,7 +232,6 @@ class QueryContext: show_hide_hidden: str = field( metadata={"help": "Hidden input field for the _show_sql parameter"} ) - metadata: dict = field(metadata={"help": "Metadata about the query/database"}) database_color: Callable = field( metadata={"help": "Function that returns a color for a given database name"} ) @@ -242,6 +241,12 @@ class QueryContext: alternate_url_json: str = field( metadata={"help": "URL for alternate JSON version of this page"} ) + # TODO: refactor this to somewhere else, probably ds.render_template() + select_templates: list = field( + metadata={ + "help": "List of templates that were considered for rendering this page" + } + ) async def get_tables(datasette, request, db): @@ -320,287 +325,105 @@ async def database_download(request, datasette): ) -async def query_view( - request, - datasette, - # canned_query=None, - # _size=None, - # named_parameters=None, - # write=False, -): - db = await datasette.resolve_database(request) - database = db.name - # Flattened because of ?sql=&name1=value1&name2=value2 feature - params = {key: request.args.get(key) for key in request.args} - sql = None - if "sql" in params: - sql = params.pop("sql") - if "_shape" in params: - params.pop("_shape") +class QueryView(View): + async def post(self, request, datasette): + from datasette.app import TableNotFound - # extras come from original request.args to avoid being flattened - extras = request.args.getlist("_extra") + db = await datasette.resolve_database(request) - # TODO: Behave differently for canned query here: - await datasette.ensure_permissions(request.actor, [("execute-sql", database)]) - - _, private = await datasette.check_visibility( - request.actor, - permissions=[ - ("view-database", database), - "view-instance", - ], - ) - - extra_args = {} - if params.get("_timelimit"): - extra_args["custom_time_limit"] = int(params["_timelimit"]) - - format_ = request.url_vars.get("format") or "html" - query_error = None - try: - validate_sql_select(sql) - results = await datasette.execute( - database, sql, params, truncate=True, **extra_args - ) - columns = results.columns - rows = results.rows - except QueryInterrupted as ex: - raise DatasetteError( - textwrap.dedent( - """ -

SQL query took too long. The time limit is controlled by the - sql_time_limit_ms - configuration option.

- - - """.format( - markupsafe.escape(ex.sql) - ) - ).strip(), - title="SQL Interrupted", - status=400, - message_is_html=True, - ) - except sqlite3.DatabaseError as ex: - query_error = str(ex) - results = None - rows = [] - columns = [] - except (sqlite3.OperationalError, InvalidSql) as ex: - raise DatasetteError(str(ex), title="Invalid SQL", status=400) - except sqlite3.OperationalError as ex: - raise DatasetteError(str(ex)) - except DatasetteError: - raise - - # Handle formats from plugins - if format_ == "csv": - - async def fetch_data_for_csv(request, _next=None): - results = await db.execute(sql, params, truncate=True) - data = {"rows": results.rows, "columns": results.columns} - return data, None, None - - return await stream_csv(datasette, fetch_data_for_csv, request, db.name) - elif format_ in datasette.renderers.keys(): - # Dispatch request to the correct output format renderer - # (CSV is not handled here due to streaming) - result = call_with_supported_arguments( - datasette.renderers[format_][0], - datasette=datasette, - columns=columns, - rows=rows, - sql=sql, - query_name=None, - database=database, - table=None, - request=request, - view_name="table", - truncated=results.truncated if results else False, - error=query_error, - # These will be deprecated in Datasette 1.0: - args=request.args, - data={"rows": rows, "columns": columns}, - ) - if asyncio.iscoroutine(result): - result = await result - if result is None: - raise NotFound("No data") - if isinstance(result, dict): - r = Response( - body=result.get("body"), - status=result.get("status_code") or 200, - content_type=result.get("content_type", "text/plain"), - headers=result.get("headers"), + # We must be a canned query + table_found = False + try: + await datasette.resolve_table(request) + table_found = True + except TableNotFound as table_not_found: + canned_query = await datasette.get_canned_query( + table_not_found.database_name, table_not_found.table, request.actor ) - elif isinstance(result, Response): - r = result - # if status_code is not None: - # # Over-ride the status code - # r.status = status_code - else: - assert False, f"{result} should be dict or Response" - elif format_ == "html": - headers = {} - templates = [f"query-{to_css_class(database)}.html", "query.html"] - template = datasette.jinja_env.select_template(templates) - alternate_url_json = datasette.absolute_url( - request, - datasette.urls.path(path_with_format(request=request, format="json")), - ) - data = {} - headers.update( - { - "Link": '{}; rel="alternate"; type="application/json+datasette"'.format( - alternate_url_json - ) - } - ) - metadata = (datasette.metadata("databases") or {}).get(database, {}) - datasette.update_with_inherited_metadata(metadata) + if canned_query is None: + raise + if table_found: + # That should not have happened + raise DatasetteError("Unexpected table found on POST", status=404) - renderers = {} - for key, (_, can_render) in datasette.renderers.items(): - it_can_render = call_with_supported_arguments( - can_render, - datasette=datasette, - columns=data.get("columns") or [], - rows=data.get("rows") or [], - sql=data.get("query", {}).get("sql", None), - query_name=data.get("query_name"), - database=database, - table=data.get("table"), - request=request, - view_name="database", + # If database is immutable, return an error + if not db.is_mutable: + raise Forbidden("Database is immutable") + + # Process the POST + body = await request.post_body() + body = body.decode("utf-8").strip() + if body.startswith("{") and body.endswith("}"): + params = json.loads(body) + # But we want key=value strings + for key, value in params.items(): + params[key] = str(value) + else: + params = dict(parse_qsl(body, keep_blank_values=True)) + # Should we return JSON? + should_return_json = ( + request.headers.get("accept") == "application/json" + or request.args.get("_json") + or params.get("_json") + ) + params_for_query = MagicParameters(params, request, datasette) + ok = None + redirect_url = None + try: + cursor = await db.execute_write(canned_query["sql"], params_for_query) + message = canned_query.get( + "on_success_message" + ) or "Query executed, {} row{} affected".format( + cursor.rowcount, "" if cursor.rowcount == 1 else "s" + ) + message_type = datasette.INFO + redirect_url = canned_query.get("on_success_redirect") + ok = True + except Exception as ex: + message = canned_query.get("on_error_message") or str(ex) + message_type = datasette.ERROR + redirect_url = canned_query.get("on_error_redirect") + ok = False + if should_return_json: + return Response.json( + { + "ok": ok, + "message": message, + "redirect": redirect_url, + } ) - it_can_render = await await_me_maybe(it_can_render) - if it_can_render: - renderers[key] = datasette.urls.path( - path_with_format(request=request, format=key) - ) - - allow_execute_sql = await datasette.permission_allowed( - request.actor, "execute-sql", database - ) - - show_hide_hidden = "" - if metadata.get("hide_sql"): - if bool(params.get("_show_sql")): - show_hide_link = path_with_removed_args(request, {"_show_sql"}) - show_hide_text = "hide" - show_hide_hidden = '' - else: - show_hide_link = path_with_added_args(request, {"_show_sql": 1}) - show_hide_text = "show" else: - if bool(params.get("_hide_sql")): - show_hide_link = path_with_removed_args(request, {"_hide_sql"}) - show_hide_text = "show" - show_hide_hidden = '' - else: - show_hide_link = path_with_added_args(request, {"_hide_sql": 1}) - show_hide_text = "hide" - hide_sql = show_hide_text == "show" + datasette.add_message(request, message, message_type) + return Response.redirect(redirect_url or request.path) - # Extract any :named parameters - named_parameters = await derive_named_parameters( - datasette.get_database(database), sql - ) - named_parameter_values = { - named_parameter: params.get(named_parameter) or "" - for named_parameter in named_parameters - if not named_parameter.startswith("_") - } + async def get(self, request, datasette): + from datasette.app import TableNotFound - # Set to blank string if missing from params - for named_parameter in named_parameters: - if named_parameter not in params and not named_parameter.startswith("_"): - params[named_parameter] = "" - - r = Response.html( - await datasette.render_template( - template, - QueryContext( - database=database, - query={ - "sql": sql, - "params": params, - }, - canned_query=None, - private=private, - canned_write=False, - db_is_immutable=not db.is_mutable, - error=query_error, - hide_sql=hide_sql, - show_hide_link=datasette.urls.path(show_hide_link), - show_hide_text=show_hide_text, - editable=True, # TODO - allow_execute_sql=allow_execute_sql, - tables=await get_tables(datasette, request, db), - named_parameter_values=named_parameter_values, - edit_sql_url="todo", - display_rows=await display_rows( - datasette, database, request, rows, columns - ), - table_columns=await _table_columns(datasette, database) - if allow_execute_sql - else {}, - columns=columns, - renderers=renderers, - url_csv=datasette.urls.path( - path_with_format( - request=request, format="csv", extra_qs={"_size": "max"} - ) - ), - show_hide_hidden=markupsafe.Markup(show_hide_hidden), - metadata=metadata, - database_color=lambda _: "#ff0000", - alternate_url_json=alternate_url_json, - ), - request=request, - view_name="database", - ), - headers=headers, - ) - else: - assert False, "Invalid format: {}".format(format_) - if datasette.cors: - add_cors_headers(r.headers) - return r - - -class QueryView(DataView): - async def data( - self, - request, - sql, - editable=True, - canned_query=None, - metadata=None, - _size=None, - named_parameters=None, - write=False, - default_labels=None, - ): - db = await self.ds.resolve_database(request) + db = await datasette.resolve_database(request) database = db.name - params = {key: request.args.get(key) for key in request.args} - if "sql" in params: - params.pop("sql") - if "_shape" in params: - params.pop("_shape") + + # Are we a canned query? + canned_query = None + canned_query_write = False + if "table" in request.url_vars: + try: + await datasette.resolve_table(request) + except TableNotFound as table_not_found: + # Was this actually a canned query? + canned_query = await datasette.get_canned_query( + table_not_found.database_name, table_not_found.table, request.actor + ) + if canned_query is None: + raise + canned_query_write = bool(canned_query.get("write")) private = False if canned_query: # Respect canned query permissions - visible, private = await self.ds.check_visibility( + visible, private = await datasette.check_visibility( request.actor, permissions=[ - ("view-query", (database, canned_query)), + ("view-query", (database, canned_query["name"])), ("view-database", database), "view-instance", ], @@ -609,18 +432,32 @@ class QueryView(DataView): raise Forbidden("You do not have permission to view this query") else: - await self.ds.ensure_permissions(request.actor, [("execute-sql", database)]) + await datasette.ensure_permissions( + request.actor, [("execute-sql", database)] + ) + + # Flattened because of ?sql=&name1=value1&name2=value2 feature + params = {key: request.args.get(key) for key in request.args} + sql = None + + if canned_query: + sql = canned_query["sql"] + elif "sql" in params: + sql = params.pop("sql") # Extract any :named parameters - named_parameters = named_parameters or await derive_named_parameters( - self.ds.get_database(database), sql - ) + named_parameters = [] + if canned_query and canned_query.get("params"): + named_parameters = canned_query["params"] + if not named_parameters: + named_parameters = await derive_named_parameters( + datasette.get_database(database), sql + ) named_parameter_values = { named_parameter: params.get(named_parameter) or "" for named_parameter in named_parameters if not named_parameter.startswith("_") } - # Set to blank string if missing from params for named_parameter in named_parameters: if named_parameter not in params and not named_parameter.startswith("_"): @@ -629,212 +466,159 @@ class QueryView(DataView): extra_args = {} if params.get("_timelimit"): extra_args["custom_time_limit"] = int(params["_timelimit"]) - if _size: - extra_args["page_size"] = _size - templates = [f"query-{to_css_class(database)}.html", "query.html"] - if canned_query: - templates.insert( - 0, - f"query-{to_css_class(database)}-{to_css_class(canned_query)}.html", - ) + format_ = request.url_vars.get("format") or "html" query_error = None + results = None + rows = [] + columns = [] - # Execute query - as write or as read - if write: - if request.method == "POST": - # If database is immutable, return an error - if not db.is_mutable: - raise Forbidden("Database is immutable") - body = await request.post_body() - body = body.decode("utf-8").strip() - if body.startswith("{") and body.endswith("}"): - params = json.loads(body) - # But we want key=value strings - for key, value in params.items(): - params[key] = str(value) - else: - params = dict(parse_qsl(body, keep_blank_values=True)) - # Should we return JSON? - should_return_json = ( - request.headers.get("accept") == "application/json" - or request.args.get("_json") - or params.get("_json") - ) - if canned_query: - params_for_query = MagicParameters(params, request, self.ds) - else: - params_for_query = params - ok = None - try: - cursor = await self.ds.databases[database].execute_write( - sql, params_for_query - ) - message = metadata.get( - "on_success_message" - ) or "Query executed, {} row{} affected".format( - cursor.rowcount, "" if cursor.rowcount == 1 else "s" - ) - message_type = self.ds.INFO - redirect_url = metadata.get("on_success_redirect") - ok = True - except Exception as e: - message = metadata.get("on_error_message") or str(e) - message_type = self.ds.ERROR - redirect_url = metadata.get("on_error_redirect") - ok = False - if should_return_json: - return Response.json( - { - "ok": ok, - "message": message, - "redirect": redirect_url, - } - ) - else: - self.ds.add_message(request, message, message_type) - return self.redirect(request, redirect_url or request.path) - else: + params_for_query = params - async def extra_template(): - return { - "request": request, - "db_is_immutable": not db.is_mutable, - "path_with_added_args": path_with_added_args, - "path_with_removed_args": path_with_removed_args, - "named_parameter_values": named_parameter_values, - "canned_query": canned_query, - "success_message": request.args.get("_success") or "", - "canned_write": True, - } - - return ( - { - "database": database, - "rows": [], - "truncated": False, - "columns": [], - "query": {"sql": sql, "params": params}, - "private": private, - }, - extra_template, - templates, - ) - else: # Not a write - if canned_query: - params_for_query = MagicParameters(params, request, self.ds) - else: - params_for_query = params + if not canned_query_write: try: - results = await self.ds.execute( + if not canned_query: + # For regular queries we only allow SELECT, plus other rules + validate_sql_select(sql) + else: + # Canned queries can run magic parameters + params_for_query = MagicParameters(params, request, datasette) + results = await datasette.execute( database, sql, params_for_query, truncate=True, **extra_args ) - columns = [r[0] for r in results.description] - except sqlite3.DatabaseError as e: - query_error = e + columns = results.columns + rows = results.rows + except QueryInterrupted as ex: + raise DatasetteError( + textwrap.dedent( + """ +

SQL query took too long. The time limit is controlled by the + sql_time_limit_ms + configuration option.

+ + + """.format( + markupsafe.escape(ex.sql) + ) + ).strip(), + title="SQL Interrupted", + status=400, + message_is_html=True, + ) + except sqlite3.DatabaseError as ex: + query_error = str(ex) results = None + rows = [] columns = [] + except (sqlite3.OperationalError, InvalidSql) as ex: + raise DatasetteError(str(ex), title="Invalid SQL", status=400) + except sqlite3.OperationalError as ex: + raise DatasetteError(str(ex)) + except DatasetteError: + raise - allow_execute_sql = await self.ds.permission_allowed( - request.actor, "execute-sql", database - ) + # Handle formats from plugins + if format_ == "csv": - async def extra_template(): - display_rows = [] - truncate_cells = self.ds.setting("truncate_cells_html") - for row in results.rows if results else []: - display_row = [] - for column, value in zip(results.columns, row): - display_value = value - # Let the plugins have a go - # pylint: disable=no-member - plugin_display_value = None - for candidate in pm.hook.render_cell( - row=row, - value=value, - column=column, - table=None, - database=database, - datasette=self.ds, - request=request, - ): - candidate = await await_me_maybe(candidate) - if candidate is not None: - plugin_display_value = candidate - break - if plugin_display_value is not None: - display_value = plugin_display_value - else: - if value in ("", None): - display_value = markupsafe.Markup(" ") - elif is_url(str(display_value).strip()): - display_value = markupsafe.Markup( - '{truncated_url}'.format( - url=markupsafe.escape(value.strip()), - truncated_url=markupsafe.escape( - truncate_url(value.strip(), truncate_cells) - ), - ) - ) - elif isinstance(display_value, bytes): - blob_url = path_with_format( - request=request, - format="blob", - extra_qs={ - "_blob_column": column, - "_blob_hash": hashlib.sha256( - display_value - ).hexdigest(), - }, - ) - formatted = format_bytes(len(value)) - display_value = markupsafe.Markup( - '<Binary: {:,} byte{}>'.format( - blob_url, - ' title="{}"'.format(formatted) - if "bytes" not in formatted - else "", - len(value), - "" if len(value) == 1 else "s", - ) - ) - else: - display_value = str(value) - if truncate_cells and len(display_value) > truncate_cells: - display_value = ( - display_value[:truncate_cells] + "\u2026" - ) - display_row.append(display_value) - display_rows.append(display_row) + async def fetch_data_for_csv(request, _next=None): + results = await db.execute(sql, params, truncate=True) + data = {"rows": results.rows, "columns": results.columns} + return data, None, None - # Show 'Edit SQL' button only if: - # - User is allowed to execute SQL - # - SQL is an approved SELECT statement - # - No magic parameters, so no :_ in the SQL string - edit_sql_url = None - is_validated_sql = False - try: - validate_sql_select(sql) - is_validated_sql = True - except InvalidSql: - pass - if allow_execute_sql and is_validated_sql and ":_" not in sql: - edit_sql_url = ( - self.ds.urls.database(database) - + "?" - + urlencode( - { - **{ - "sql": sql, - }, - **named_parameter_values, - } - ) + return await stream_csv(datasette, fetch_data_for_csv, request, db.name) + elif format_ in datasette.renderers.keys(): + # Dispatch request to the correct output format renderer + # (CSV is not handled here due to streaming) + result = call_with_supported_arguments( + datasette.renderers[format_][0], + datasette=datasette, + columns=columns, + rows=rows, + sql=sql, + query_name=canned_query["name"] if canned_query else None, + database=database, + table=None, + request=request, + view_name="table", + truncated=results.truncated if results else False, + error=query_error, + # These will be deprecated in Datasette 1.0: + args=request.args, + data={"rows": rows, "columns": columns}, + ) + if asyncio.iscoroutine(result): + result = await result + if result is None: + raise NotFound("No data") + if isinstance(result, dict): + r = Response( + body=result.get("body"), + status=result.get("status_code") or 200, + content_type=result.get("content_type", "text/plain"), + headers=result.get("headers"), + ) + elif isinstance(result, Response): + r = result + # if status_code is not None: + # # Over-ride the status code + # r.status = status_code + else: + assert False, f"{result} should be dict or Response" + elif format_ == "html": + headers = {} + templates = [f"query-{to_css_class(database)}.html", "query.html"] + if canned_query: + templates.insert( + 0, + f"query-{to_css_class(database)}-{to_css_class(canned_query['name'])}.html", ) + template = datasette.jinja_env.select_template(templates) + alternate_url_json = datasette.absolute_url( + request, + datasette.urls.path(path_with_format(request=request, format="json")), + ) + data = {} + headers.update( + { + "Link": '{}; rel="alternate"; type="application/json+datasette"'.format( + alternate_url_json + ) + } + ) + metadata = (datasette.metadata("databases") or {}).get(database, {}) + datasette.update_with_inherited_metadata(metadata) + + renderers = {} + for key, (_, can_render) in datasette.renderers.items(): + it_can_render = call_with_supported_arguments( + can_render, + datasette=datasette, + columns=data.get("columns") or [], + rows=data.get("rows") or [], + sql=data.get("query", {}).get("sql", None), + query_name=data.get("query_name"), + database=database, + table=data.get("table"), + request=request, + view_name="database", + ) + it_can_render = await await_me_maybe(it_can_render) + if it_can_render: + renderers[key] = datasette.urls.path( + path_with_format(request=request, format=key) + ) + + allow_execute_sql = await datasette.permission_allowed( + request.actor, "execute-sql", database + ) + show_hide_hidden = "" - if metadata.get("hide_sql"): + if canned_query and canned_query.get("hide_sql"): if bool(params.get("_show_sql")): show_hide_link = path_with_removed_args(request, {"_show_sql"}) show_hide_text = "hide" @@ -855,42 +639,86 @@ class QueryView(DataView): show_hide_link = path_with_added_args(request, {"_hide_sql": 1}) show_hide_text = "hide" hide_sql = show_hide_text == "show" - return { - "display_rows": display_rows, - "custom_sql": True, - "named_parameter_values": named_parameter_values, - "editable": editable, - "canned_query": canned_query, - "edit_sql_url": edit_sql_url, - "metadata": metadata, - "settings": self.ds.settings_dict(), - "request": request, - "show_hide_link": self.ds.urls.path(show_hide_link), - "show_hide_text": show_hide_text, - "show_hide_hidden": markupsafe.Markup(show_hide_hidden), - "hide_sql": hide_sql, - "table_columns": await _table_columns(self.ds, database) - if allow_execute_sql - else {}, - } - return ( - { - "ok": not query_error, - "database": database, - "query_name": canned_query, - "rows": results.rows if results else [], - "truncated": results.truncated if results else False, - "columns": columns, - "query": {"sql": sql, "params": params}, - "error": str(query_error) if query_error else None, - "private": private, - "allow_execute_sql": allow_execute_sql, - }, - extra_template, - templates, - 400 if query_error else 200, - ) + # Show 'Edit SQL' button only if: + # - User is allowed to execute SQL + # - SQL is an approved SELECT statement + # - No magic parameters, so no :_ in the SQL string + edit_sql_url = None + is_validated_sql = False + try: + validate_sql_select(sql) + is_validated_sql = True + except InvalidSql: + pass + if allow_execute_sql and is_validated_sql and ":_" not in sql: + edit_sql_url = ( + datasette.urls.database(database) + + "?" + + urlencode( + { + **{ + "sql": sql, + }, + **named_parameter_values, + } + ) + ) + + r = Response.html( + await datasette.render_template( + template, + QueryContext( + database=database, + query={ + "sql": sql, + "params": params, + }, + canned_query=canned_query["name"] if canned_query else None, + private=private, + canned_query_write=canned_query_write, + db_is_immutable=not db.is_mutable, + error=query_error, + hide_sql=hide_sql, + show_hide_link=datasette.urls.path(show_hide_link), + show_hide_text=show_hide_text, + editable=not canned_query, + allow_execute_sql=allow_execute_sql, + tables=await get_tables(datasette, request, db), + named_parameter_values=named_parameter_values, + edit_sql_url=edit_sql_url, + display_rows=await display_rows( + datasette, database, request, rows, columns + ), + table_columns=await _table_columns(datasette, database) + if allow_execute_sql + else {}, + columns=columns, + renderers=renderers, + url_csv=datasette.urls.path( + path_with_format( + request=request, format="csv", extra_qs={"_size": "max"} + ) + ), + show_hide_hidden=markupsafe.Markup(show_hide_hidden), + metadata=canned_query or metadata, + database_color=lambda _: "#ff0000", + alternate_url_json=alternate_url_json, + select_templates=[ + f"{'*' if template_name == template.name else ''}{template_name}" + for template_name in templates + ], + ), + request=request, + view_name="database", + ), + headers=headers, + ) + else: + assert False, "Invalid format: {}".format(format_) + if datasette.cors: + add_cors_headers(r.headers) + return r class MagicParameters(dict): diff --git a/datasette/views/table.py b/datasette/views/table.py index 77acfd95..28264e92 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -9,7 +9,6 @@ import markupsafe from datasette.plugins import pm from datasette.database import QueryInterrupted from datasette import tracer -from datasette.renderer import json_renderer from datasette.utils import ( add_cors_headers, await_me_maybe, @@ -21,7 +20,6 @@ from datasette.utils import ( tilde_encode, escape_sqlite, filters_should_redirect, - format_bytes, is_url, path_from_row_pks, path_with_added_args, @@ -38,7 +36,7 @@ from datasette.utils import ( from datasette.utils.asgi import BadRequest, Forbidden, NotFound, Response from datasette.filters import Filters import sqlite_utils -from .base import BaseView, DataView, DatasetteError, ureg, _error, stream_csv +from .base import BaseView, DatasetteError, ureg, _error, stream_csv from .database import QueryView LINK_WITH_LABEL = ( @@ -698,57 +696,6 @@ async def table_view(datasette, request): return response -class CannedQueryView(DataView): - def __init__(self, datasette): - self.ds = datasette - - async def post(self, request): - from datasette.app import TableNotFound - - try: - await self.ds.resolve_table(request) - except TableNotFound as e: - # Was this actually a canned query? - canned_query = await self.ds.get_canned_query( - e.database_name, e.table, request.actor - ) - if canned_query: - # Handle POST to a canned query - return await QueryView(self.ds).data( - request, - canned_query["sql"], - metadata=canned_query, - editable=False, - canned_query=e.table, - named_parameters=canned_query.get("params"), - write=bool(canned_query.get("write")), - ) - - return Response.text("Method not allowed", status=405) - - async def data(self, request, **kwargs): - from datasette.app import TableNotFound - - try: - await self.ds.resolve_table(request) - except TableNotFound as not_found: - canned_query = await self.ds.get_canned_query( - not_found.database_name, not_found.table, request.actor - ) - if canned_query: - return await QueryView(self.ds).data( - request, - canned_query["sql"], - metadata=canned_query, - editable=False, - canned_query=not_found.table, - named_parameters=canned_query.get("params"), - write=bool(canned_query.get("write")), - ) - else: - raise - - async def table_view_traced(datasette, request): from datasette.app import TableNotFound @@ -761,10 +708,7 @@ async def table_view_traced(datasette, request): ) # If this is a canned query, not a table, then dispatch to QueryView instead if canned_query: - if request.method == "POST": - return await CannedQueryView(datasette).post(request) - else: - return await CannedQueryView(datasette).get(request) + return await QueryView()(request, datasette) else: raise diff --git a/tests/test_canned_queries.py b/tests/test_canned_queries.py index d6a88733..e9ad3239 100644 --- a/tests/test_canned_queries.py +++ b/tests/test_canned_queries.py @@ -95,12 +95,12 @@ def test_insert(canned_write_client): csrftoken_from=True, cookies={"foo": "bar"}, ) - assert 302 == response.status - assert "/data/add_name?success" == response.headers["Location"] messages = canned_write_client.ds.unsign( response.cookies["ds_messages"], "messages" ) - assert [["Query executed, 1 row affected", 1]] == messages + assert messages == [["Query executed, 1 row affected", 1]] + assert response.status == 302 + assert response.headers["Location"] == "/data/add_name?success" @pytest.mark.parametrize( @@ -382,11 +382,11 @@ def test_magic_parameters_cannot_be_used_in_arbitrary_queries(magic_parameters_c def test_canned_write_custom_template(canned_write_client): response = canned_write_client.get("/data/update_name") assert response.status == 200 + assert "!!!CUSTOM_UPDATE_NAME_TEMPLATE!!!" in response.text assert ( "" in response.text ) - assert "!!!CUSTOM_UPDATE_NAME_TEMPLATE!!!" in response.text # And test for link rel=alternate while we're here: assert ( '' From 8920d425f4d417cfd998b61016c5ff3530cd34e1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 9 Aug 2023 10:20:58 -0700 Subject: [PATCH 0498/1218] 1.0a3 release notes, smaller changes section - refs #2135 --- docs/changelog.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index ee48d075..b4416f94 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,25 @@ Changelog ========= +.. _v1_0_a3: + +1.0a3 (2023-08-09) +------------------ + +This alpha release previews the updated design for Datasette's default JSON API. + +Smaller changes +~~~~~~~~~~~~~~~ + +- Datasette documentation now shows YAML examples for :ref:`metadata` by default, with a tab interface for switching to JSON. (:issue:`1153`) +- :ref:`plugin_register_output_renderer` plugins now have access to ``error`` and ``truncated`` arguments, allowing them to display error messages and take into account truncated results. (:issue:`2130`) +- ``render_cell()`` plugin hook now also supports an optional ``request`` argument. (:issue:`2007`) +- New ``Justfile`` to support development workflows for Datasette using `Just `__. +- ``datasette.render_template()`` can now accepts a ``datasette.views.Context`` subclass as an alternative to a dictionary. (:issue:`2127`) +- ``datasette install -e path`` option for editable installations, useful while developing plugins. (:issue:`2106`) +- When started with the ``--cors`` option Datasette now serves an ``Access-Control-Max-Age: 3600`` header, ensuring CORS OPTIONS requests are repeated no more than once an hour. (:issue:`2079`) +- Fixed a bug where the ``_internal`` database could display ``None`` instead of ``null`` for in-memory databases. (:issue:`1970`) + .. _v0_64_2: 0.64.2 (2023-03-08) From e34d09c6ec16ff5e7717e112afdad67f7c05a62a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 9 Aug 2023 12:01:59 -0700 Subject: [PATCH 0499/1218] Don't include columns in query JSON, refs #2136 --- datasette/renderer.py | 8 +++++++- datasette/views/database.py | 2 +- tests/test_api.py | 1 - tests/test_cli_serve_get.py | 11 ++++++----- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/datasette/renderer.py b/datasette/renderer.py index 0bd74e81..224031a7 100644 --- a/datasette/renderer.py +++ b/datasette/renderer.py @@ -27,7 +27,7 @@ def convert_specific_columns_to_json(rows, columns, json_cols): return new_rows -def json_renderer(args, data, error, truncated=None): +def json_renderer(request, args, data, error, truncated=None): """Render a response as JSON""" status_code = 200 @@ -106,6 +106,12 @@ def json_renderer(args, data, error, truncated=None): "status": 400, "title": None, } + + # Don't include "columns" in output + # https://github.com/simonw/datasette/issues/2136 + if isinstance(data, dict) and "columns" not in request.args.getlist("_extra"): + data.pop("columns", None) + # Handle _nl option for _shape=array nl = args.get("_nl", "") if nl and shape == "array": diff --git a/datasette/views/database.py b/datasette/views/database.py index 658c35e6..cf76f3c2 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -548,7 +548,7 @@ class QueryView(View): error=query_error, # These will be deprecated in Datasette 1.0: args=request.args, - data={"rows": rows, "columns": columns}, + data={"ok": True, "rows": rows, "columns": columns}, ) if asyncio.iscoroutine(result): result = await result diff --git a/tests/test_api.py b/tests/test_api.py index 28415a0b..f96f571e 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -649,7 +649,6 @@ async def test_custom_sql(ds_client): {"content": "RENDER_CELL_DEMO"}, {"content": "RENDER_CELL_ASYNC"}, ], - "columns": ["content"], "ok": True, "truncated": False, } diff --git a/tests/test_cli_serve_get.py b/tests/test_cli_serve_get.py index 2e0390bb..dc7fc1e2 100644 --- a/tests/test_cli_serve_get.py +++ b/tests/test_cli_serve_get.py @@ -34,11 +34,12 @@ def test_serve_with_get(tmp_path_factory): "/_memory.json?sql=select+sqlite_version()", ], ) - assert 0 == result.exit_code, result.output - assert { - "truncated": False, - "columns": ["sqlite_version()"], - }.items() <= json.loads(result.output).items() + assert result.exit_code == 0, result.output + data = json.loads(result.output) + # Should have a single row with a single column + assert len(data["rows"]) == 1 + assert list(data["rows"][0].keys()) == ["sqlite_version()"] + assert set(data.keys()) == {"rows", "ok", "truncated"} # The plugin should have created hello.txt assert (plugins_dir / "hello.txt").read_text() == "hello" From 856ca68d94708c6e94673cb6bc28bf3e3ca17845 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 9 Aug 2023 12:04:40 -0700 Subject: [PATCH 0500/1218] Update default JSON representation docs, refs #2135 --- docs/json_api.rst | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/json_api.rst b/docs/json_api.rst index c273c2a8..16b997eb 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -9,10 +9,10 @@ through the Datasette user interface can also be accessed as JSON via the API. To access the API for a page, either click on the ``.json`` link on that page or edit the URL and add a ``.json`` extension to it. -.. _json_api_shapes: +.. _json_api_default: -Different shapes ----------------- +Default representation +---------------------- The default JSON representation of data from a SQLite table or custom query looks like this: @@ -21,7 +21,6 @@ looks like this: { "ok": true, - "next": null, "rows": [ { "id": 3, @@ -39,13 +38,22 @@ looks like this: "id": 1, "name": "San Francisco" } - ] + ], + "truncated": false } -The ``rows`` key is a list of objects, each one representing a row. ``next`` indicates if -there is another page, and ``ok`` is always ``true`` if an error did not occur. +``"ok"`` is always ``true`` if an error did not occur. -If ``next`` is present then the next page in the pagination set can be retrieved using ``?_next=VALUE``. +The ``"rows"`` key is a list of objects, each one representing a row. + +The ``"truncated"`` key lets you know if the query was truncated. This can happen if a SQL query returns more than 1,000 results (or the :ref:`setting_max_returned_rows` setting). + +For table pages, an additional key ``"next"`` may be present. This indicates that the next page in the pagination set can be retrieved using ``?_next=VALUE``. + +.. _json_api_shapes: + +Different shapes +---------------- The ``_shape`` parameter can be used to access alternative formats for the ``rows`` key which may be more convenient for your application. There are three From 90cb9ca58d910f49e8f117bbdd94df6f0855cf99 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 9 Aug 2023 12:11:16 -0700 Subject: [PATCH 0501/1218] JSON changes in release notes, refs #2135 --- docs/changelog.rst | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index b4416f94..4c70855b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,7 +9,40 @@ Changelog 1.0a3 (2023-08-09) ------------------ -This alpha release previews the updated design for Datasette's default JSON API. +This alpha release previews the updated design for Datasette's default JSON API. (:issue:`782`) + +The new :ref:`default JSON representation ` for both table pages (``/dbname/table.json``) and arbitrary SQL queries (``/dbname.json?sql=...``) is now shaped like this: + +.. code-block:: json + + { + "ok": true, + "rows": [ + { + "id": 3, + "name": "Detroit" + }, + { + "id": 2, + "name": "Los Angeles" + }, + { + "id": 4, + "name": "Memnonia" + }, + { + "id": 1, + "name": "San Francisco" + } + ], + "truncated": false + } + +Tables will include an additional ``"next"`` key for pagination, which can be passed to ``?_next=`` to fetch the next page of results. + +The various ``?_shape=`` options continue to work as before - see :ref:`json_api_shapes` for details. + +A new ``?_extra=`` mechanism is available for tables, but has not yet been stabilized or documented. Details on that are available in :issue:`262`. Smaller changes ~~~~~~~~~~~~~~~ From 19ab4552e212c9845a59461cc73e82d5ae8c278a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 9 Aug 2023 12:13:11 -0700 Subject: [PATCH 0502/1218] Release 1.0a3 Closes #2135 Refs #262, #782, #1153, #1970, #2007, #2079, #2106, #2127, #2130 --- datasette/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 3b81ab21..61dee464 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a2" +__version__ = "1.0a3" __version_info__ = tuple(__version__.split(".")) From 4a42476bb7ce4c5ed941f944115dedd9bce34656 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 9 Aug 2023 15:04:16 -0700 Subject: [PATCH 0503/1218] datasette plugins --requirements, closes #2133 --- datasette/cli.py | 12 ++++++++++-- docs/cli-reference.rst | 1 + docs/plugins.rst | 32 ++++++++++++++++++++++++++++---- tests/test_cli.py | 3 +++ 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/datasette/cli.py b/datasette/cli.py index 32266888..21fd25d6 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -223,15 +223,23 @@ pm.hook.publish_subcommand(publish=publish) @cli.command() @click.option("--all", help="Include built-in default plugins", is_flag=True) +@click.option( + "--requirements", help="Output requirements.txt of installed plugins", is_flag=True +) @click.option( "--plugins-dir", type=click.Path(exists=True, file_okay=False, dir_okay=True), help="Path to directory containing custom plugins", ) -def plugins(all, plugins_dir): +def plugins(all, requirements, plugins_dir): """List currently installed plugins""" app = Datasette([], plugins_dir=plugins_dir) - click.echo(json.dumps(app._plugins(all=all), indent=4)) + if requirements: + for plugin in app._plugins(): + if plugin["version"]: + click.echo("{}=={}".format(plugin["name"], plugin["version"])) + else: + click.echo(json.dumps(app._plugins(all=all), indent=4)) @cli.command() diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 2177fc9e..7a96d311 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -282,6 +282,7 @@ Output JSON showing all currently installed plugins, their versions, whether the Options: --all Include built-in default plugins + --requirements Output requirements.txt of installed plugins --plugins-dir DIRECTORY Path to directory containing custom plugins --help Show this message and exit. diff --git a/docs/plugins.rst b/docs/plugins.rst index 979f94dd..19bfdd0c 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -90,7 +90,12 @@ You can see a list of installed plugins by navigating to the ``/-/plugins`` page You can also use the ``datasette plugins`` command:: - $ datasette plugins + datasette plugins + +Which outputs: + +.. code-block:: json + [ { "name": "datasette_json_html", @@ -107,7 +112,8 @@ You can also use the ``datasette plugins`` command:: cog.out("\n") result = CliRunner().invoke(cli.cli, ["plugins", "--all"]) # cog.out() with text containing newlines was unindenting for some reason - cog.outl("If you run ``datasette plugins --all`` it will include default plugins that ship as part of Datasette::\n") + cog.outl("If you run ``datasette plugins --all`` it will include default plugins that ship as part of Datasette:\n") + cog.outl(".. code-block:: json\n") plugins = [p for p in json.loads(result.output) if p["name"].startswith("datasette.")] indented = textwrap.indent(json.dumps(plugins, indent=4), " ") for line in indented.split("\n"): @@ -115,7 +121,9 @@ You can also use the ``datasette plugins`` command:: cog.out("\n\n") .. ]]] -If you run ``datasette plugins --all`` it will include default plugins that ship as part of Datasette:: +If you run ``datasette plugins --all`` it will include default plugins that ship as part of Datasette: + +.. code-block:: json [ { @@ -236,6 +244,22 @@ If you run ``datasette plugins --all`` it will include default plugins that ship You can add the ``--plugins-dir=`` option to include any plugins found in that directory. +Add ``--requirements`` to output a list of installed plugins that can then be installed in another Datasette instance using ``datasette install -r requirements.txt``:: + + datasette plugins --requirements + +The output will look something like this:: + + datasette-codespaces==0.1.1 + datasette-graphql==2.2 + datasette-json-html==1.0.1 + datasette-pretty-json==0.2.2 + datasette-x-forwarded-host==0.1 + +To write that to a ``requirements.txt`` file, run this:: + + datasette plugins --requirements > requirements.txt + .. _plugins_configuration: Plugin configuration @@ -390,7 +414,7 @@ Any values embedded in ``metadata.yaml`` will be visible to anyone who views the If you are publishing your data using the :ref:`datasette publish ` family of commands, you can use the ``--plugin-secret`` option to set these secrets at publish time. For example, using Heroku you might run the following command:: - $ datasette publish heroku my_database.db \ + datasette publish heroku my_database.db \ --name my-heroku-app-demo \ --install=datasette-auth-github \ --plugin-secret datasette-auth-github client_id your_client_id \ diff --git a/tests/test_cli.py b/tests/test_cli.py index 75724f61..056e2821 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -108,6 +108,9 @@ def test_plugins_cli(app_client): assert set(names).issuperset({p["name"] for p in EXPECTED_PLUGINS}) # And the following too: assert set(names).issuperset(DEFAULT_PLUGINS) + # --requirements should be empty because there are no installed non-plugins-dir plugins + result3 = runner.invoke(cli, ["plugins", "--requirements"]) + assert result3.output == "" def test_metadata_yaml(): From a3593c901580ea50854c3e0774b0ba0126e8a76f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 9 Aug 2023 17:32:07 -0700 Subject: [PATCH 0504/1218] on_success_message_sql, closes #2138 --- datasette/views/database.py | 29 ++++++++++++++++---- docs/sql_queries.rst | 21 ++++++++++---- tests/test_canned_queries.py | 53 +++++++++++++++++++++++++++++++----- 3 files changed, 85 insertions(+), 18 deletions(-) diff --git a/datasette/views/database.py b/datasette/views/database.py index cf76f3c2..79b3f88d 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -360,6 +360,10 @@ class QueryView(View): params[key] = str(value) else: params = dict(parse_qsl(body, keep_blank_values=True)) + + # Don't ever send csrftoken as a SQL parameter + params.pop("csrftoken", None) + # Should we return JSON? should_return_json = ( request.headers.get("accept") == "application/json" @@ -371,12 +375,27 @@ class QueryView(View): redirect_url = None try: cursor = await db.execute_write(canned_query["sql"], params_for_query) - message = canned_query.get( - "on_success_message" - ) or "Query executed, {} row{} affected".format( - cursor.rowcount, "" if cursor.rowcount == 1 else "s" - ) + # success message can come from on_success_message or on_success_message_sql + message = None message_type = datasette.INFO + on_success_message_sql = canned_query.get("on_success_message_sql") + if on_success_message_sql: + try: + message_result = ( + await db.execute(on_success_message_sql, params_for_query) + ).first() + if message_result: + message = message_result[0] + except Exception as ex: + message = "Error running on_success_message_sql: {}".format(ex) + message_type = datasette.ERROR + if not message: + message = canned_query.get( + "on_success_message" + ) or "Query executed, {} row{} affected".format( + cursor.rowcount, "" if cursor.rowcount == 1 else "s" + ) + redirect_url = canned_query.get("on_success_redirect") ok = True except Exception as ex: diff --git a/docs/sql_queries.rst b/docs/sql_queries.rst index 3c2cb228..1ae07e1f 100644 --- a/docs/sql_queries.rst +++ b/docs/sql_queries.rst @@ -392,6 +392,7 @@ This configuration will create a page at ``/mydatabase/add_name`` displaying a f You can customize how Datasette represents success and errors using the following optional properties: - ``on_success_message`` - the message shown when a query is successful +- ``on_success_message_sql`` - alternative to ``on_success_message``: a SQL query that should be executed to generate the message - ``on_success_redirect`` - the path or URL the user is redirected to on success - ``on_error_message`` - the message shown when a query throws an error - ``on_error_redirect`` - the path or URL the user is redirected to on error @@ -405,11 +406,12 @@ For example: "queries": { "add_name": { "sql": "INSERT INTO names (name) VALUES (:name)", + "params": ["name"], "write": True, - "on_success_message": "Name inserted", + "on_success_message_sql": "select 'Name inserted: ' || :name", "on_success_redirect": "/mydatabase/names", "on_error_message": "Name insert failed", - "on_error_redirect": "/mydatabase" + "on_error_redirect": "/mydatabase", } } } @@ -426,8 +428,10 @@ For example: queries: add_name: sql: INSERT INTO names (name) VALUES (:name) + params: + - name write: true - on_success_message: Name inserted + on_success_message_sql: 'select ''Name inserted: '' || :name' on_success_redirect: /mydatabase/names on_error_message: Name insert failed on_error_redirect: /mydatabase @@ -443,8 +447,11 @@ For example: "queries": { "add_name": { "sql": "INSERT INTO names (name) VALUES (:name)", + "params": [ + "name" + ], "write": true, - "on_success_message": "Name inserted", + "on_success_message_sql": "select 'Name inserted: ' || :name", "on_success_redirect": "/mydatabase/names", "on_error_message": "Name insert failed", "on_error_redirect": "/mydatabase" @@ -455,10 +462,12 @@ For example: } .. [[[end]]] -You can use ``"params"`` to explicitly list the named parameters that should be displayed as form fields - otherwise they will be automatically detected. +You can use ``"params"`` to explicitly list the named parameters that should be displayed as form fields - otherwise they will be automatically detected. ``"params"`` is not necessary in the above example, since without it ``"name"`` would be automatically detected from the query. You can pre-populate form fields when the page first loads using a query string, e.g. ``/mydatabase/add_name?name=Prepopulated``. The user will have to submit the form to execute the query. +If you specify a query in ``"on_success_message_sql"``, that query will be executed after the main query. The first column of the first row return by that query will be displayed as a success message. Named parameters from the main query will be made available to the success message query as well. + .. _canned_queries_magic_parameters: Magic parameters @@ -589,7 +598,7 @@ The JSON response will look like this: "redirect": "/data/add_name" } -The ``"message"`` and ``"redirect"`` values here will take into account ``on_success_message``, ``on_success_redirect``, ``on_error_message`` and ``on_error_redirect``, if they have been set. +The ``"message"`` and ``"redirect"`` values here will take into account ``on_success_message``, ``on_success_message_sql``, ``on_success_redirect``, ``on_error_message`` and ``on_error_redirect``, if they have been set. .. _pagination: diff --git a/tests/test_canned_queries.py b/tests/test_canned_queries.py index e9ad3239..5256c24c 100644 --- a/tests/test_canned_queries.py +++ b/tests/test_canned_queries.py @@ -31,9 +31,15 @@ def canned_write_client(tmpdir): }, "add_name_specify_id": { "sql": "insert into names (rowid, name) values (:rowid, :name)", + "on_success_message_sql": "select 'Name added: ' || :name || ' with rowid ' || :rowid", "write": True, "on_error_redirect": "/data/add_name_specify_id?error", }, + "add_name_specify_id_with_error_in_on_success_message_sql": { + "sql": "insert into names (rowid, name) values (:rowid, :name)", + "on_success_message_sql": "select this is bad SQL", + "write": True, + }, "delete_name": { "sql": "delete from names where rowid = :rowid", "write": True, @@ -179,6 +185,34 @@ def test_insert_error(canned_write_client): ) +def test_on_success_message_sql(canned_write_client): + response = canned_write_client.post( + "/data/add_name_specify_id", + {"rowid": 5, "name": "Should be OK"}, + csrftoken_from=True, + ) + assert response.status == 302 + assert response.headers["Location"] == "/data/add_name_specify_id" + messages = canned_write_client.ds.unsign( + response.cookies["ds_messages"], "messages" + ) + assert messages == [["Name added: Should be OK with rowid 5", 1]] + + +def test_error_in_on_success_message_sql(canned_write_client): + response = canned_write_client.post( + "/data/add_name_specify_id_with_error_in_on_success_message_sql", + {"rowid": 1, "name": "Should fail"}, + csrftoken_from=True, + ) + messages = canned_write_client.ds.unsign( + response.cookies["ds_messages"], "messages" + ) + assert messages == [ + ["Error running on_success_message_sql: no such column: bad", 3] + ] + + def test_custom_params(canned_write_client): response = canned_write_client.get("/data/update_name?extra=foo") assert '' in response.text @@ -232,21 +266,22 @@ def test_canned_query_permissions_on_database_page(canned_write_client): query_names = { q["name"] for q in canned_write_client.get("/data.json").json["queries"] } - assert { + assert query_names == { + "add_name_specify_id_with_error_in_on_success_message_sql", + "from_hook", + "update_name", + "add_name_specify_id", + "from_async_hook", "canned_read", "add_name", - "add_name_specify_id", - "update_name", - "from_async_hook", - "from_hook", - } == query_names + } # With auth shows four response = canned_write_client.get( "/data.json", cookies={"ds_actor": canned_write_client.actor_cookie({"id": "root"})}, ) - assert 200 == response.status + assert response.status == 200 query_names_and_private = sorted( [ {"name": q["name"], "private": q["private"]} @@ -257,6 +292,10 @@ def test_canned_query_permissions_on_database_page(canned_write_client): assert query_names_and_private == [ {"name": "add_name", "private": False}, {"name": "add_name_specify_id", "private": False}, + { + "name": "add_name_specify_id_with_error_in_on_success_message_sql", + "private": False, + }, {"name": "canned_read", "private": False}, {"name": "delete_name", "private": True}, {"name": "from_async_hook", "private": False}, From 33251d04e78d575cca62bb59069bb43a7d924746 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 9 Aug 2023 17:56:27 -0700 Subject: [PATCH 0505/1218] Canned query write counters demo, refs #2134 --- .github/workflows/deploy-latest.yml | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index ed60376c..4746aa07 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -57,6 +57,36 @@ jobs: db.route = "alternative-route" ' > plugins/alternative_route.py cp fixtures.db fixtures2.db + - name: And the counters writable canned query demo + run: | + cat > plugins/counters.py < Date: Thu, 10 Aug 2023 22:16:19 -0700 Subject: [PATCH 0506/1218] Fixed display of database color Closes #2139, closes #2119 --- datasette/database.py | 7 +++++++ datasette/templates/database.html | 2 +- datasette/templates/query.html | 2 +- datasette/templates/row.html | 2 +- datasette/templates/table.html | 2 +- datasette/views/base.py | 4 ---- datasette/views/database.py | 8 +++----- datasette/views/index.py | 4 +--- datasette/views/row.py | 4 +++- datasette/views/table.py | 2 +- tests/test_html.py | 20 ++++++++++++++++++++ 11 files changed, 39 insertions(+), 18 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index d8043c24..af39ac9e 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -1,6 +1,7 @@ import asyncio from collections import namedtuple from pathlib import Path +import hashlib import janus import queue import sys @@ -62,6 +63,12 @@ class Database: } return self._cached_table_counts + @property + def color(self): + if self.hash: + return self.hash[:6] + return hashlib.md5(self.name.encode("utf8")).hexdigest()[:6] + def suggest_name(self): if self.path: return Path(self.path).stem diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 7acf0369..3d4dae07 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -10,7 +10,7 @@ {% block body_class %}db db-{{ database|to_css_class }}{% endblock %} {% block content %} - + {% endblock %} diff --git a/datasette/views/database.py b/datasette/views/database.py index de02cd0f..3c660bc7 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -487,9 +487,9 @@ def _as_optional_bool(value, name): raise QueryValidationError("{} must be 0 or 1".format(name)) -def _query_list_limit(value): +def _query_list_limit(value, default=50): if value in (None, ""): - return 50 + return default try: return min(max(1, int(value)), 1000) except ValueError as ex: @@ -1136,7 +1136,10 @@ class QueryListView(BaseView): database = await self.database_name(request) format_ = request.url_vars.get("format") or "html" try: - limit = _query_list_limit(request.args.get("_size")) + limit = _query_list_limit( + request.args.get("_size"), + default=20 if format_ == "html" else 50, + ) is_write = _as_optional_bool(request.args.get("is_write"), "is_write") is_published = _as_optional_bool( request.args.get("is_published"), "is_published" @@ -1175,6 +1178,9 @@ class QueryListView(BaseView): data = { "ok": True, "database": database, + "database_color": ( + self.ds.get_database(database).color if database is not None else None + ), "queries": page["queries"], "next": page["next"], "next_url": next_url, diff --git a/tests/test_queries.py b/tests/test_queries.py index c31d7205..b7416ac7 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -451,12 +451,34 @@ async def test_query_list_search_filter_and_html(): assert html_response.status_code == 200 assert "Demo query 02" in html_response.text assert "Demo query 01" not in html_response.text + assert 'class="query-list-results"' in html_response.text + assert "Mode" in html_response.text + assert 'type="radio" name="is_published" value="1"' in html_response.text assert json_response.json()["queries"][0]["name"] == "demo_query_02" assert [query["name"] for query in filtered_response.json()["queries"]] == [ "private_query" ] +@pytest.mark.asyncio +async def test_query_list_html_defaults_to_twenty_and_shows_pagination(): + ds = Datasette(memory=True) + ds.root_enabled = True + ds.add_memory_database("query_list_html_pagination", name="data") + await ds.invoke_startup() + await add_numbered_queries(ds, "data", 25) + + response = await ds.client.get("/data/-/queries", actor={"id": "root"}) + json_response = await ds.client.get("/data/-/queries.json", actor={"id": "root"}) + + assert response.status_code == 200 + assert response.text.count('aria-label="Query pagination"') == 1 + assert "Demo query 20" in response.text + assert "Demo query 21" not in response.text + assert 'href="/data/-/queries?_next=' in response.text + assert len(json_response.json()["queries"]) == 25 + + @pytest.mark.asyncio async def test_global_query_list_api_and_html(): ds = Datasette(memory=True) @@ -519,7 +541,8 @@ async def test_global_query_list_api_and_html(): ("beta", "beta_first"), ] assert html_response.status_code == 200 - assert 'href="/beta">beta:' in html_response.text + assert 'Database' in html_response.text + assert 'class="query-list-database" href="/beta">beta' in html_response.text assert "Beta first" in html_response.text assert "Alpha first" not in html_response.text From f1dd86ebfb01644fead19f9f007b9b76f863d72e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 25 May 2026 14:05:26 -0700 Subject: [PATCH 1155/1218] Tweak URL designs of new endpoints --- datasette/app.py | 6 +++--- datasette/templates/database.html | 2 +- datasette/templates/execute_write.html | 2 +- datasette/templates/query.html | 2 +- datasette/templates/query_create.html | 2 +- docs/json_api.rst | 6 +++--- queries-plan.md | 4 ++-- tests/test_html.py | 4 ++-- tests/test_queries.py | 22 +++++++++++----------- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 90e41521..232aa0cf 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -2745,11 +2745,11 @@ class Datasette: ) add_route( QueryInsertView.as_view(self), - r"/(?P[^\/\.]+)/-/queries/-/insert$", + r"/(?P[^\/\.]+)/-/queries/insert$", ) add_route( ExecuteWriteAnalyzeView.as_view(self), - r"/(?P[^\/\.]+)/-/execute-write/-/analyze$", + r"/(?P[^\/\.]+)/-/execute-write/analyze$", ) add_route( ExecuteWriteView.as_view(self), @@ -2761,7 +2761,7 @@ class Datasette: ) add_route( QueryParametersView.as_view(self), - r"/(?P[^\/\.]+)/-/query/-/parameters$", + r"/(?P[^\/\.]+)/-/query/parameters$", ) add_route( wrap_view(QueryView, self), diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 0c9ec94c..62f9c620 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -26,7 +26,7 @@ {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} {% if allow_execute_sql %} -
+

Custom SQL query

{% set parameter_names = [] %} diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 9b522f66..46f58c3b 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -95,7 +95,7 @@

{{ execution_message }}{% for link in execution_links %} {{ link.label }}{% endfor %}

{% endif %} - + {% if write_template_tables %}
diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 3bcc7178..f74d21f1 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -37,7 +37,7 @@ {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} - +

Custom SQL query{% if display_rows %} returning {% if truncated %}more than {% endif %}{{ "{:,}".format(display_rows|length) }} row{% if display_rows|length == 1 %}{% else %}s{% endif %}{% endif %}{% if not query_error %} ({{ show_hide_text }}) {% endif %}

diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index fb2599d2..3c027def 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -17,7 +17,7 @@

Create query

- +


diff --git a/docs/json_api.rst b/docs/json_api.rst index 91ed5306..dd54c459 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -525,7 +525,7 @@ Creating saved queries in the UI Creating saved queries ~~~~~~~~~~~~~~~~~~~~~~ -``POST //-/queries/-/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. +``POST //-/queries/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. .. _QueryParametersView: .. _ExecuteWriteView: @@ -534,13 +534,13 @@ Creating saved queries Executing write SQL ~~~~~~~~~~~~~~~~~~~ -``GET //-/query/-/parameters?sql=...`` returns the named parameters used by a SQL query. This requires ``execute-sql`` for the database. +``GET //-/query/parameters?sql=...`` returns the named parameters used by a SQL query. This requires ``execute-sql`` for the database. ``GET //-/execute-write`` displays a form for executing writable SQL. A ``?sql=`` query string pre-populates the form without executing it. ``POST //-/execute-write`` executes writable SQL. This requires ``execute-write-sql`` for the database plus the relevant table-level write permissions. -``GET //-/execute-write/-/analyze?sql=...`` returns the derived parameters plus the write operations that SQL would need in order to execute. +``GET //-/execute-write/analyze?sql=...`` returns the derived parameters plus the write operations that SQL would need in order to execute. .. _QueryDefinitionView: diff --git a/queries-plan.md b/queries-plan.md index a708e887..72427df2 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -211,7 +211,7 @@ JSON endpoints should follow Datasette's existing write API style: use `POST` pl Endpoints: - `GET /-/queries` and `GET /{database}/-/queries` show searchable HTML query browsers. `GET /-/queries.json` lists query definitions across every database the actor can view; `GET /{database}/-/queries.json` scopes that list to one database. Both JSON endpoints use cursor pagination with `_next` and `_size`. -- `POST /{database}/-/queries/-/insert` creates a query. +- `POST /{database}/-/queries/insert` creates a query. - `GET /{database}/{query}/-/definition` returns one query definition without executing it. - `POST /{database}/{query}/-/update` updates one query. - `POST /{database}/{query}/-/delete` deletes one query. @@ -388,7 +388,7 @@ The read methods should reconstruct the existing dictionary shape used by query On `/{database}/-/query`, if the actor has both `execute-sql` and `insert-query`, show a save control for valid read-only SQL. That page already executes read-only arbitrary SQL, so the first UI can stay read-only even though the JSON API can accept writable SQL after `Database.analyze_sql()` validation. -The save form should call `POST /{database}/-/queries/-/insert` and default to `is_published=false`. +The save form should call `POST /{database}/-/queries/insert` and default to `is_published=false`. If the actor also has `publish-query`, include a publish control. The UI copy should make it clear that publishing allows people without arbitrary SQL permission to run this query. diff --git a/tests/test_html.py b/tests/test_html.py index b49391a6..8cda6dba 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -329,7 +329,7 @@ async def test_query_parameter_form_fields(ds_client): ' ' in response.text ) - assert 'data-parameters-url="/fixtures/-/query/-/parameters"' in response.text + assert 'data-parameters-url="/fixtures/-/query/parameters"' in response.text assert 'id="sql-parameters-section"' in response.text assert "setupSqlParameterRefresh" in response.text response2 = await ds_client.get("/fixtures/-/query?sql=select+:name&name=hello") @@ -344,7 +344,7 @@ async def test_query_parameter_form_fields(ds_client): async def test_database_page_sql_parameter_refresh_markup(ds_client): response = await ds_client.get("/fixtures") assert response.status_code == 200 - assert 'data-parameters-url="/fixtures/-/query/-/parameters"' in response.text + assert 'data-parameters-url="/fixtures/-/query/parameters"' in response.text assert 'id="sql-parameters-section"' in response.text assert "setupSqlParameterRefresh" in response.text diff --git a/tests/test_queries.py b/tests/test_queries.py index b7416ac7..57920584 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -356,7 +356,7 @@ async def test_query_insert_api_creates_read_only_query(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "root"}, json={ "query": { @@ -568,7 +568,7 @@ async def test_query_insert_api_publish_requires_publish_query(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "writer"}, json={"query": {"name": "public", "sql": "select 1", "is_published": True}}, ) @@ -586,7 +586,7 @@ async def test_query_insert_api_creates_writable_query(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "root"}, json={ "query": { @@ -603,7 +603,7 @@ async def test_query_insert_api_creates_writable_query(): assert query["parameters"] == ["name"] bad_response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "root"}, json={ "query": { @@ -671,7 +671,7 @@ async def test_query_insert_api_rejects_magic_parameters(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/-/insert", + "/data/-/queries/insert", actor={"id": "root"}, json={"query": {"name": "magic", "sql": "select :_actor_id"}}, ) @@ -742,7 +742,7 @@ async def test_execute_write_get_prepopulates_without_executing(): assert 'data-sql-template="insert"' in response.text assert 'data-sql-template="update"' in response.text assert 'data-sql-template="delete"' in response.text - assert 'data-analyze-url="/data/-/execute-write/-/analyze"' in response.text + assert 'data-analyze-url="/data/-/execute-write/analyze"' in response.text assert 'addEventListener("paste"' in response.text assert "setupSqlParameterRefresh" in response.text assert '' in response.text @@ -771,12 +771,12 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): await ds.invoke_startup() response = await ds.client.get( - "/data/-/execute-write/-/analyze", + "/data/-/execute-write/analyze", actor={"id": "root"}, params={"sql": "insert into dogs (name) values (:name)"}, ) read_only_response = await ds.client.get( - "/data/-/execute-write/-/analyze", + "/data/-/execute-write/analyze", actor={"id": "root"}, params={"sql": "select * from dogs where name = :name"}, ) @@ -818,19 +818,19 @@ async def test_query_parameters_endpoint_uses_get_sql_only(): await ds.invoke_startup() response = await ds.client.get( - "/data/-/query/-/parameters", + "/data/-/query/parameters", actor={"id": "root"}, params={ "sql": "select * from dogs where name = :name and id = :id", }, ) permission_denied_response = await ds.client.get( - "/data/-/query/-/parameters", + "/data/-/query/parameters", actor={"id": "not-root"}, params={"sql": "select * from dogs where name = :name"}, ) magic_parameter_response = await ds.client.get( - "/data/-/query/-/parameters", + "/data/-/query/parameters", actor={"id": "root"}, params={"sql": "select :_actor_id"}, ) From 4a1a4d7807fb99203b9053b6d270b265df61f0af Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 11:59:49 -0700 Subject: [PATCH 1156/1218] Query is_trusted and is_private properties Refs https://github.com/simonw/datasette/issues/2735#issuecomment-4547270516 Diff explanation: https://gist.github.com/simonw/1e4de6c4b041a51968eb273ee96dec1f --- datasette/app.py | 39 ++-- datasette/default_actions.py | 7 - datasette/default_permissions/defaults.py | 100 +++++---- datasette/templates/query_create.html | 4 +- datasette/templates/query_list.html | 65 +++++- datasette/utils/internal_db.py | 3 +- datasette/views/database.py | 79 ++++--- docs/authentication.rst | 10 - docs/internals.rst | 3 +- queries-plan.md | 84 ++++---- tests/test_queries.py | 245 ++++++++++++++++++---- 11 files changed, 421 insertions(+), 218 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 232aa0cf..3329ee7e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -618,7 +618,8 @@ class Datasette: fragment=query_config.get("fragment"), parameters=query_config.get("params"), is_write=bool(query_config.get("write")), - is_published=bool(query_config.get("is_published")), + is_private=bool(query_config.get("is_private")), + is_trusted=bool(query_config.get("is_trusted", True)), source="config", on_success_message=query_config.get("on_success_message"), on_success_message_sql=query_config.get("on_success_message_sql"), @@ -1084,7 +1085,8 @@ class Datasette: "parameters": parameters, "is_write": is_write, "write": is_write, - "is_published": bool(row["is_published"]), + "is_private": bool(row["is_private"]), + "is_trusted": bool(row["is_trusted"]), "source": row["source"], "owner_id": row["owner_id"], "on_success_message": options.get("on_success_message"), @@ -1119,7 +1121,8 @@ class Datasette: fragment=None, parameters=None, is_write=False, - is_published=False, + is_private=False, + is_trusted=False, source="plugin", owner_id=None, on_success_message=None, @@ -1144,8 +1147,8 @@ class Datasette: sql_statement = """ INSERT INTO queries ( database_name, name, sql, title, description, description_html, - options, parameters, is_write, is_published, source, owner_id - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + options, parameters, is_write, is_private, is_trusted, source, owner_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ if replace: sql_statement += """ @@ -1157,7 +1160,8 @@ class Datasette: options = excluded.options, parameters = excluded.parameters, is_write = excluded.is_write, - is_published = excluded.is_published, + is_private = excluded.is_private, + is_trusted = excluded.is_trusted, source = excluded.source, owner_id = excluded.owner_id, updated_at = CURRENT_TIMESTAMP @@ -1174,7 +1178,8 @@ class Datasette: options_json, parameters_json, int(bool(is_write)), - int(bool(is_published)), + int(bool(is_private)), + int(bool(is_trusted)), source, owner_id, ], @@ -1193,7 +1198,8 @@ class Datasette: fragment=UNCHANGED, parameters=UNCHANGED, is_write=UNCHANGED, - is_published=UNCHANGED, + is_private=UNCHANGED, + is_trusted=UNCHANGED, source=UNCHANGED, owner_id=UNCHANGED, on_success_message=UNCHANGED, @@ -1209,7 +1215,8 @@ class Datasette: "description_html": description_html, "parameters": parameters, "is_write": is_write, - "is_published": is_published, + "is_private": is_private, + "is_trusted": is_trusted, "source": source, "owner_id": owner_id, } @@ -1227,7 +1234,7 @@ class Datasette: for field, value in fields.items(): if value is UNCHANGED: continue - if field in {"is_write", "is_published"}: + if field in {"is_write", "is_private", "is_trusted"}: value = int(bool(value)) elif field == "parameters": value = json.dumps(list(value or [])) @@ -1300,7 +1307,8 @@ class Datasette: cursor=None, q=None, is_write=None, - is_published=None, + is_private=None, + is_trusted=None, source=None, owner_id=None, include_private=False, @@ -1372,9 +1380,12 @@ class Datasette: if is_write is not None: where_clauses.append("q.is_write = :query_is_write") params["query_is_write"] = int(bool(is_write)) - if is_published is not None: - where_clauses.append("q.is_published = :query_is_published") - params["query_is_published"] = int(bool(is_published)) + if is_private is not None: + where_clauses.append("q.is_private = :query_is_private") + params["query_is_private"] = int(bool(is_private)) + if is_trusted is not None: + where_clauses.append("q.is_trusted = :query_is_trusted") + params["query_is_trusted"] = int(bool(is_trusted)) if source is not None: where_clauses.append("q.source = :query_source") params["query_source"] = source diff --git a/datasette/default_actions.py b/datasette/default_actions.py index 6787b80e..6a1f77b8 100644 --- a/datasette/default_actions.py +++ b/datasette/default_actions.py @@ -68,13 +68,6 @@ def register_actions(): resource_class=DatabaseResource, also_requires="execute-sql", ), - Action( - name="publish-query", - abbr="pq", - description="Publish saved queries for actors without execute-sql", - resource_class=DatabaseResource, - also_requires="insert-query", - ), # Table-level actions (child-level) Action( name="view-table", diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index 58deea01..dfd8d3e9 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -26,6 +26,32 @@ DEFAULT_ALLOW_ACTIONS = frozenset( ) +def _configured_query_restriction_selects(datasette: "Datasette") -> tuple[list[str], dict]: + selects = [] + params = {} + for index, (database_name, db_config) in enumerate( + ((datasette.config or {}).get("databases") or {}).items() + ): + for query_name, query_config in (db_config.get("queries") or {}).items(): + if isinstance(query_config, dict) and query_config.get("is_private"): + continue + parent_param = f"query_config_parent_{index}_{len(selects)}" + child_param = f"query_config_child_{index}_{len(selects)}" + selects.append( + f""" + SELECT :{parent_param} AS parent, :{child_param} AS child + WHERE NOT EXISTS ( + SELECT 1 FROM queries + WHERE database_name = :{parent_param} + AND name = :{child_param} + ) + """ + ) + params[parent_param] = database_name + params[child_param] = query_name + return selects, params + + @hookimpl(specname="permission_resources_sql") async def default_allow_sql_check( datasette: "Datasette", @@ -93,61 +119,45 @@ async def default_query_permissions_sql( if action != "view-query": return None - execute_sql = await datasette.allowed_resources_sql( - action="execute-sql", actor=actor - ) - sql = execute_sql.sql - params = {} - for key, value in execute_sql.params.items(): - new_key = f"query_execute_sql_{key}" - sql = sql.replace(f":{key}", f":{new_key}") - params[new_key] = value - - trusted_writable_sql = "" + params = {"query_owner_id": actor_id} + rule_sqls = [] if not datasette.default_deny: - trusted_writable_sql = """ - UNION ALL + rule_sqls.append( + """ SELECT database_name AS parent, name AS child, 1 AS allow, - 'trusted writable query' AS reason + 'non-private query' AS reason FROM queries - WHERE is_write = 1 - AND source IN ('config', 'plugin') - """ + WHERE is_private = 0 + """ + ) - user_writable_sql = "" if actor_id is not None: - params["query_owner_id"] = actor_id - user_writable_sql = """ - UNION ALL + rule_sqls.append( + """ SELECT database_name AS parent, name AS child, 1 AS allow, 'query owner' AS reason FROM queries - WHERE is_write = 1 - AND source = 'user' - AND owner_id = :query_owner_id + WHERE owner_id = :query_owner_id + """ + ) + + config_restriction_selects, config_restriction_params = ( + _configured_query_restriction_selects(datasette) + ) + + restriction_sqls = [ """ + SELECT database_name AS parent, name AS child + FROM queries + WHERE is_private = 0 + OR owner_id = :query_owner_id + """ + ] + restriction_sqls.extend(config_restriction_selects) + params.update(config_restriction_params) return PermissionSQL( - sql=f""" - WITH execute_sql_allowed AS ( - {sql} - ) - SELECT database_name AS parent, name AS child, 1 AS allow, - 'published query' AS reason - FROM queries - WHERE is_write = 0 - AND is_published = 1 - UNION ALL - SELECT q.database_name AS parent, q.name AS child, 1 AS allow, - 'execute-sql allows query' AS reason - FROM queries q - JOIN execute_sql_allowed es - ON es.parent = q.database_name - AND es.child IS NULL - WHERE q.is_write = 0 - AND q.is_published = 0 - {trusted_writable_sql} - {user_writable_sql} - """, + sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, + restriction_sql="\nUNION ALL\n".join(restriction_sqls), params=params, ) diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index 3c027def..686d971e 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -27,9 +27,7 @@

- {% if can_publish %} -

- {% endif %} +

{% if sql and analysis_is_write %}

Execute write SQL

{% endif %} diff --git a/datasette/templates/query_list.html b/datasette/templates/query_list.html index dbd607ab..25259b3d 100644 --- a/datasette/templates/query_list.html +++ b/datasette/templates/query_list.html @@ -73,7 +73,7 @@ border-collapse: collapse; font-size: 0.9rem; margin: 0.25rem 0 1rem; - min-width: 36rem; + min-width: 42rem; width: 100%; } .query-list-results th, @@ -100,6 +100,16 @@ font-size: 0.78rem; margin: 0.15rem 0 0; } +.query-list-owner { + color: #39445a; + font-family: var(--font-monospace, monospace); + white-space: nowrap; +} +.query-list-flags { + display: flex; + flex-wrap: wrap; + gap: 0.3rem; +} .query-list-pill { background-color: #eef1f5; border: 1px solid #d7dde5; @@ -116,15 +126,36 @@ background-color: #fff4db; border-color: #e2b64e; } -.query-list-pill-published { +.query-list-pill-public { background-color: #e7f5ec; border-color: #9ecfab; color: #267a3e; } -.query-list-pill-unpublished { +.query-list-pill-private { background-color: #f7edf0; border-color: #dbb8c1; } +.query-list-pill-trusted { + background-color: #e7f5ec; + border-color: #9ecfab; + color: #267a3e; +} +.query-list-empty { + color: #6b7280; +} +.query-list-footnotes { + border-top: 1px solid #d7dde5; + color: #4f5b6d; + font-size: 0.82rem; + margin: 0.35rem 0 1rem; + padding-top: 0.55rem; +} +.query-list-footnotes p { + margin: 0.25rem 0; +} +.query-list-footnotes .query-list-pill { + margin-right: 0.35rem; +} .query-list-pagination a { border: 1px solid #007bff; border-radius: 0.25rem; @@ -177,10 +208,10 @@
- Publication - - - + Visibility + + +
@@ -191,8 +222,8 @@
{% if show_database %}{% endif %} - - + + @@ -205,12 +236,24 @@ {{ query.title or query.name }}{% if query.private %} ๐Ÿ”’{% endif %} {% if query.description %}

{{ query.description }}

{% endif %} - - + + {% endfor %}
DatabaseQueryModePublicationOwnerFlags
{% if query.is_write %}Writable{% else %}Read-only{% endif %}{% if query.is_published %}Published{% else %}Unpublished{% endif %}{% if query.owner_id is not none %}{{ query.owner_id }}{% else %}-{% endif %} + + {% if query.is_write %}Writable{% else %}Read-only{% endif %} + {% if query.is_private %}Private{% endif %} + {% if query.is_trusted %}Trusted{% endif %} + +
+ {% if show_private_note or show_trusted_note %} +
+ {% if show_private_note %}

PrivateOnly the owning actor can view this query.

{% endif %} + {% if show_trusted_note %}

TrustedExecution skips the usual SQL and write permission checks after view-query allows access.

{% endif %} +
+ {% endif %} {% else %}

No queries found.

{% endif %} diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index 9c693b0a..bf172667 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -123,7 +123,8 @@ async def initialize_metadata_tables(db): options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - is_published INTEGER NOT NULL DEFAULT 0 CHECK (is_published IN (0, 1)), + is_private INTEGER NOT NULL DEFAULT 0 CHECK (is_private IN (0, 1)), + is_trusted INTEGER NOT NULL DEFAULT 0 CHECK (is_trusted IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, diff --git a/datasette/views/database.py b/datasette/views/database.py index 3c660bc7..91e9c350 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -428,7 +428,7 @@ _query_fields = { "fragment", "parameters", "params", - "is_published", + "is_private", "on_success_message", "on_success_message_sql", "on_success_redirect", @@ -571,7 +571,7 @@ async def _check_query_name(db, name, *, existing=False): raise QueryValidationError("Query name conflicts with a table or view") -async def _analyze_user_query(datasette, db, sql, *, actor, is_published): +async def _analyze_user_query(datasette, db, sql, *, actor): if not sql or not isinstance(sql, str): raise QueryValidationError("SQL is required") derived = _derived_query_parameters(sql) @@ -583,8 +583,6 @@ async def _analyze_user_query(datasette, db, sql, *, actor, is_published): is_write = _analysis_is_write(analysis) if is_write: - if is_published: - raise QueryValidationError("Writable queries cannot be published") try: await datasette.ensure_query_write_permissions( db.name, sql, actor=actor, analysis=analysis @@ -680,6 +678,26 @@ async def _prepare_execute_write(datasette, db, sql, params, actor): return parameter_names, params, analysis +async def _ensure_stored_query_execution_permissions(datasette, db, query, actor): + if query.get("is_trusted"): + return + if query.get("write"): + await datasette.ensure_permission( + action="execute-write-sql", + resource=DatabaseResource(db.name), + actor=actor, + ) + await datasette.ensure_query_write_permissions( + db.name, query["sql"], actor=actor + ) + else: + await datasette.ensure_permission( + action="execute-sql", + resource=DatabaseResource(db.name), + actor=actor, + ) + + async def _execute_write_analysis_data(datasette, db, sql, actor): parameter_names = [] analysis_rows = [] @@ -752,7 +770,7 @@ async def _inserted_row_url(datasette, db, analysis, cursor): def _apply_query_data_types(data): typed = dict(data) - for key in ("hide_sql", "is_published"): + for key in ("hide_sql", "is_private"): if key in typed: typed[key] = _as_bool(typed[key]) return typed @@ -769,20 +787,12 @@ async def _prepare_query_create(datasette, request, db, data): if await datasette.get_query(db.name, name) is not None: raise QueryValidationError("Query already exists") - is_published = _as_bool(data.get("is_published")) is_write, derived, analysis = await _analyze_user_query( datasette, db, data.get("sql"), actor=request.actor, - is_published=is_published, ) - if is_published and not await datasette.allowed( - action="publish-query", - resource=DatabaseResource(db.name), - actor=request.actor, - ): - raise QueryValidationError("Permission denied: need publish-query", status=403) if not is_write and any(data.get(field) for field in _query_write_fields): raise QueryValidationError("Writable query fields require writable SQL") @@ -800,7 +810,8 @@ async def _prepare_query_create(datasette, request, db, data): "fragment": data.get("fragment"), "parameters": parameters, "is_write": is_write, - "is_published": is_published, + "is_private": _as_bool(data.get("is_private", True)), + "is_trusted": False, "source": "user", "owner_id": _actor_id(request.actor), "on_success_message": data.get("on_success_message"), @@ -819,7 +830,6 @@ async def _prepare_query_update(datasette, request, db, existing, update): update = _apply_query_data_types(update) sql = update.get("sql", existing["sql"]) - is_published = update.get("is_published", existing["is_published"]) query_is_write = existing["is_write"] derived = _derived_query_parameters(sql) parameters = None @@ -830,19 +840,7 @@ async def _prepare_query_update(datasette, request, db, existing, update): db, sql, actor=request.actor, - is_published=is_published, ) - elif is_published and query_is_write: - raise QueryValidationError("Writable queries cannot be published") - if is_published and not existing["is_published"]: - if not await datasette.allowed( - action="publish-query", - resource=DatabaseResource(db.name), - actor=request.actor, - ): - raise QueryValidationError( - "Permission denied: need publish-query", status=403 - ) if "parameters" in update or "params" in update: parameters = _coerce_query_parameters( @@ -864,7 +862,7 @@ async def _prepare_query_update(datasette, request, db, existing, update): "fragment": update.get("fragment"), "parameters": parameters, "is_write": query_is_write, - "is_published": is_published, + "is_private": update.get("is_private"), "on_success_message": update.get("on_success_message"), "on_success_message_sql": update.get("on_success_message_sql"), "on_success_redirect": update.get("on_success_redirect"), @@ -1141,8 +1139,8 @@ class QueryListView(BaseView): default=20 if format_ == "html" else 50, ) is_write = _as_optional_bool(request.args.get("is_write"), "is_write") - is_published = _as_optional_bool( - request.args.get("is_published"), "is_published" + is_private = _as_optional_bool( + request.args.get("is_private"), "is_private" ) except QueryValidationError as ex: return _error([ex.message], ex.status) @@ -1154,7 +1152,7 @@ class QueryListView(BaseView): cursor=request.args.get("_next"), q=request.args.get("q") or None, is_write=is_write, - is_published=is_published, + is_private=is_private, source=request.args.get("source") or None, owner_id=request.args.get("owner_id") or None, include_private=True, @@ -1186,12 +1184,14 @@ class QueryListView(BaseView): "next_url": next_url, "has_more": page["has_more"], "limit": page["limit"], + "show_private_note": any(query["is_private"] for query in page["queries"]), + "show_trusted_note": any(query["is_trusted"] for query in page["queries"]), "query_list_path": query_list_path, "show_database": database is None, "filters": { "q": request.args.get("q") or "", "is_write": request.args.get("is_write") or "", - "is_published": request.args.get("is_published") or "", + "is_private": request.args.get("is_private") or "", "source": request.args.get("source") or "", "owner_id": request.args.get("owner_id") or "", }, @@ -1255,11 +1255,6 @@ class QueryCreateView(BaseView): "database_color": db.color, "sql": sql, "parameter_names": parameter_names, - "can_publish": await self.ds.allowed( - action="publish-query", - resource=DatabaseResource(db.name), - actor=request.actor, - ), "analysis_error": analysis_error, "analysis_rows": analysis_rows, "analysis_is_write": bool( @@ -1435,9 +1430,9 @@ class QueryView(View): ): raise Forbidden("You do not have permission to view this query") - if canned_query.get("write") and canned_query.get("source") == "user": - await datasette.ensure_query_write_permissions( - db.name, canned_query["sql"], actor=request.actor + if canned_query.get("write"): + await _ensure_stored_query_execution_permissions( + datasette, db, canned_query, request.actor ) # If database is immutable, return an error @@ -1558,6 +1553,10 @@ class QueryView(View): ) if not visible: raise Forbidden("You do not have permission to view this query") + if not canned_query_write: + await _ensure_stored_query_execution_permissions( + datasette, db, canned_query, request.actor + ) else: await datasette.ensure_permission( diff --git a/docs/authentication.rst b/docs/authentication.rst index b6a4cb7e..6e835c8d 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1299,16 +1299,6 @@ insert-query Actor is allowed to create saved queries in a database. -``resource`` - ``datasette.resources.DatabaseResource(database)`` - ``database`` is the name of the database (string) - -.. _actions_publish_query: - -publish-query -------------- - -Actor is allowed to publish a saved read-only query so actors without ``execute-sql`` can run it. - ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) diff --git a/docs/internals.rst b/docs/internals.rst index b5da7cbf..c76de487 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -2158,7 +2158,8 @@ The internal database schema is as follows: options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - is_published INTEGER NOT NULL DEFAULT 0 CHECK (is_published IN (0, 1)), + is_private INTEGER NOT NULL DEFAULT 0 CHECK (is_private IN (0, 1)), + is_trusted INTEGER NOT NULL DEFAULT 0 CHECK (is_trusted IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, diff --git a/queries-plan.md b/queries-plan.md index 72427df2..f4b8049c 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -13,9 +13,9 @@ Terminology change: these are now "queries", not "canned queries". Legacy code a - Internal table name: `queries`. - Query definitions should use real columns, not a JSON blob for all options. - Query parameter names live in a `parameters` text column as a JSON array. No default values for parameters in this pass. -- No `queries_database_is_published_idx` index. -- User-created queries require `execute-sql` and `insert-query` on the database. Writable queries additionally require matching table write permissions discovered by `Database.analyze_sql()`. -- `publish-query` is the permission for creating or updating a query so users without `execute-sql` can execute it. +- No separate index is needed for the privacy/trust flags yet. +- User-created queries require `execute-sql` and `insert-query` on the database. They default to private, and writable queries additionally require matching table write permissions discovered by `Database.analyze_sql()`. +- Configured queries default to trusted, which means actors who can view them can execute them without also holding `execute-sql` or the relevant write permissions. Config can opt out with `is_trusted: false`. - Add `update-query` and `delete-query`, so administrators can manage queries created by other users. - Remove the old `canned_queries()` hook from core. If we want compatibility later, build a separate `datasette-old-canned-queries` plugin. - Writable user-created queries can be supported using `Database.analyze_sql()`, provided we fail closed when analysis cannot prove the required permissions. @@ -45,7 +45,8 @@ CREATE TABLE IF NOT EXISTS queries ( options TEXT NOT NULL DEFAULT '{}', parameters TEXT NOT NULL DEFAULT '[]', is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - is_published INTEGER NOT NULL DEFAULT 0 CHECK (is_published IN (0, 1)), + is_private INTEGER NOT NULL DEFAULT 0 CHECK (is_private IN (0, 1)), + is_trusted INTEGER NOT NULL DEFAULT 0 CHECK (is_trusted IN (0, 1)), source TEXT NOT NULL DEFAULT 'user', owner_id TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, @@ -64,11 +65,12 @@ Column notes: - Less common presentation and writable-query behavior lives in `options`, stored as a JSON object. That covers `hide_sql`, `fragment`, `on_success_message`, `on_success_message_sql`, `on_success_redirect`, `on_error_message`, and `on_error_redirect`. - `parameters` is a JSON array of parameter names, stored as text. This preserves explicit parameter order, but does not support labels or default values. - Existing writable query behavior gets `is_write` as a column. Success/error messages, success/error redirects, and `on_success_message_sql` are stored in `options`. -- `is_published` only applies to read-only queries. A writable query can still be public through explicit `view-query` permissions, but the "publish for users without execute-sql" shortcut should be read-only. +- `is_private` means the query is only visible to its owning actor. This is enforced as a permission restriction, so broader `view-query` grants do not expose private rows. +- `is_trusted` means execution skips the usual `execute-sql` or write-permission checks after `view-query` has allowed access. - `source` distinguishes `user`, `config`, and `plugin` rows. - `owner_id` is the actor id for user-created rows. It is `NULL` for config/plugin rows. -No separate index is needed on `(database_name, name)` because the primary key already creates one. Do not add a `queries_database_is_published_idx` index for now. +No separate index is needed on `(database_name, name)` because the primary key already creates one. `QueryResource.resources_sql()` can become: @@ -104,7 +106,6 @@ Remove the old `canned_queries()` hookspec and all core calls to it. If compatib Add core actions: - `insert-query`, database-level, for creating queries in a database. -- `publish-query`, database-level, for marking read-only queries as executable by actors who lack `execute-sql`. - `update-query`, query-level, for modifying existing query definitions. - `delete-query`, query-level, for deleting existing query definitions. @@ -114,17 +115,11 @@ User-created query creation requires: - `insert-query` on `DatabaseResource(database)` - If analysis shows the query is writable, the table-level write permissions described in the writable query section. -Setting `is_published=1` requires: - -- `publish-query` on `DatabaseResource(database)` -- The query must be read-only according to `Database.analyze_sql()`. - Updating an existing query requires: - `update-query` on `QueryResource(database, query)` or default owner permission for a user-owned row. - If the SQL changes, also require `execute-sql` on the database. - If the changed SQL is writable, also require the table-level write permissions described in the writable query section. -- If `is_published` changes from `0` to `1`, also require `publish-query` on the database. Deleting an existing query requires: @@ -133,18 +128,18 @@ Deleting an existing query requires: Default owner permissions: - For `source='user' AND owner_id = actor.id`, grant `update-query` and `delete-query`. -- Do not automatically grant execution if the user no longer has the execution permission described below. +- For `source='user' AND owner_id = actor.id`, grant `view-query`. If the query is private, restriction SQL ensures no other actor sees it through a broader grant. ## Executing queries Default execution rule for read-only queries: -- If `is_published=0`, the actor needs `execute-sql` on the database. -- If `is_published=1`, the actor can execute the query without `execute-sql`. +- If `is_trusted=0`, the actor needs `execute-sql` on the database. +- If `is_trusted=1`, the actor can execute the query without `execute-sql`, provided `view-query` allows access. Default execution rule for user-created writable queries: -- `is_published` must be `0`. +- `is_trusted` must be `0`. - The actor must have `view-query`. - The actor must currently have every write permission required by fresh `Database.analyze_sql()` results for the query SQL. @@ -152,14 +147,14 @@ Implementation: - Remove `view-query` from the broad `DEFAULT_ALLOW_ACTIONS` set. - Replace it with query-aware default `view-query` permission SQL. -- For `is_published=1 AND is_write=0`, emit a child-level `view-query` allow. -- For `is_published=0 AND is_write=0`, emit child-level `view-query` allows for queries whose parent database is in the actor's `execute-sql` allowed resources. -- For `is_write=1 AND source='user'`, emit `view-query` only for the owner or actors with explicit `view-query` permission, then have `QueryView` perform the fresh analysis/table-permission check before execution. -- For trusted writable queries, preserve current behavior by emitting child-level `view-query` allows for `is_write=1 AND source IN ('config', 'plugin')` when Datasette is not running with `--default-deny`. +- Emit default `view-query` allows for non-private rows when Datasette is not running with `--default-deny`. +- Emit default `view-query` allows for the owning actor. +- Use `restriction_sql` to limit private rows to their owner even when broader `view-query` permissions exist. +- Have `QueryView` perform the fresh `execute-sql` or table-permission check before execution unless the row has `is_trusted=1`. -For read-only queries this keeps `QueryView` simple: it checks `view-query` for the query resource, and the default permission hook encodes the relationship with `execute-sql`. User-created writable queries need one additional runtime permission check because their required table permissions are derived from fresh SQL analysis. +For read-only queries this keeps `QueryView` explicit: it checks `view-query` for the query resource, then checks `execute-sql` unless the row is trusted. User-created writable queries need one additional runtime permission check because their required table permissions are derived from fresh SQL analysis. -Explicit deny rules should still be able to block a published query. +Explicit deny rules should still be able to block a query, and `--default-deny` still blocks trusted queries unless something grants `view-query`. ## Writable queries @@ -180,7 +175,7 @@ Validation flow for user-created queries: 1. Derive named parameters from the SQL and pass harmless placeholder values into `db.analyze_sql()` so SQLite can prepare statements with bindings. 2. If analysis raises a SQLite error, reject the query. 3. If every table access is `read`, treat the query as read-only and require `execute-sql` plus `insert-query`/`update-query` as described above. -4. If any table access is `insert`, `update`, or `delete`, treat the query as writable and force `is_published=0`. +4. If any table access is `insert`, `update`, or `delete`, treat the query as writable and force `is_trusted=0`. 5. Reject writable user-created queries that access a database other than the database they are being saved against, until `analyze_sql()` can reliably map attached SQLite schemas back to Datasette database names. 6. For every write access returned by analysis, require the corresponding permission on `TableResource(access.database, access.table)`: - `insert` -> `insert-row` @@ -200,7 +195,7 @@ Fail closed cases for user-created writable queries: - Analysis reports any write operation that cannot be mapped to a Datasette table resource. - Analysis reports writes outside the target database. - The actor lacks any required table write permission. -- `is_published=1` is requested. +- `is_trusted=1` is requested through the user-facing API. This gives us writable user-created queries without letting `execute-sql` alone become a path to create arbitrary write endpoints. @@ -225,7 +220,7 @@ Create request: "sql": "select * from customers order by revenue desc limit 20", "title": "Top customers", "description": "Highest revenue customers", - "is_published": false, + "is_private": true, "parameters": ["region"] } } @@ -242,7 +237,8 @@ Successful create returns `201` and the created query definition: "sql": "select * from customers order by revenue desc limit 20", "title": "Top customers", "description": "Highest revenue customers", - "is_published": false, + "is_private": true, + "is_trusted": false, "parameters": ["region"] } } @@ -254,7 +250,7 @@ Update request, imitating `RowUpdateView`: { "update": { "title": "Top customers by revenue", - "is_published": true + "is_private": false }, "return": true } @@ -270,7 +266,8 @@ Successful update returns `{"ok": true}` by default. With `"return": true`, retu "name": "top_customers", "sql": "select * from customers order by revenue desc limit 20", "title": "Top customers by revenue", - "is_published": true + "is_private": false, + "is_trusted": false } } ``` @@ -317,7 +314,8 @@ await datasette.add_query( fragment=None, parameters=None, is_write=False, - is_published=False, + is_private=False, + is_trusted=False, source="plugin", owner_id=None, on_success_message=None, @@ -340,7 +338,8 @@ await datasette.update_query( fragment=UNCHANGED, parameters=UNCHANGED, is_write=UNCHANGED, - is_published=UNCHANGED, + is_private=UNCHANGED, + is_trusted=UNCHANGED, source=UNCHANGED, owner_id=UNCHANGED, on_success_message=UNCHANGED, @@ -360,7 +359,8 @@ await datasette.list_queries( cursor=None, q=None, is_write=None, - is_published=None, + is_private=None, + is_trusted=None, source=None, owner_id=None, ) @@ -382,15 +382,13 @@ For column-backed fields, `None` should write SQL `NULL`. For option fields, `No Implementation detail: build the `UPDATE` statement dynamically from fields whose value is not `UNCHANGED`, validate non-nullable fields before writing, and update `updated_at` whenever at least one field changes. -The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `is_published`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. Option values should be unpacked from the `options` JSON object and returned as the same top-level keys accepted by `add_query()` and `update_query()`. +The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `is_private`, `is_trusted`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. Option values should be unpacked from the `options` JSON object and returned as the same top-level keys accepted by `add_query()` and `update_query()`. ## Query page save UI On `/{database}/-/query`, if the actor has both `execute-sql` and `insert-query`, show a save control for valid read-only SQL. That page already executes read-only arbitrary SQL, so the first UI can stay read-only even though the JSON API can accept writable SQL after `Database.analyze_sql()` validation. -The save form should call `POST /{database}/-/queries/insert` and default to `is_published=false`. - -If the actor also has `publish-query`, include a publish control. The UI copy should make it clear that publishing allows people without arbitrary SQL permission to run this query. +The save form should call `POST /{database}/-/queries/insert` and default to `is_private=true`. On `/{database}`, show a preview of the first 5 visible queries using `list_queries(..., limit=5)`. If the page has `has_more`, show a link to `/{database}/-/queries` rather than rendering hundreds or thousands of query links inline. The full `/{database}/-/queries` page provides search, filters, and cursor pagination. The global `/-/queries` page reuses the same interface and shows the database for each query. @@ -403,7 +401,7 @@ This page should require `execute-sql` and `insert-query` to access. It should p - Read-only - Writable -Read-only mode can share the same fields as the arbitrary SQL save flow: name, title, description, parameters, and optional published status if the actor has `publish-query`. +Read-only mode can share the same fields as the arbitrary SQL save flow: name, title, description, parameters, and privacy status. Writable mode should always run `Database.analyze_sql()` and show an analysis panel before saving: @@ -413,7 +411,7 @@ Writable mode should always run `Database.analyze_sql()` and show an analysis pa - whether the actor has that permission - source, when the operation comes from a trigger or view -The Save button should be disabled until analysis succeeds and every required table write permission is allowed. Writable mode should not show a publish control, because user-created writable queries cannot be published. +The Save button should be disabled until analysis succeeds and every required table write permission is allowed. The existing edit-SQL flow from query pages can continue to point back to arbitrary SQL. A later enhancement can add "update this query" when the actor owns it or has `update-query`. @@ -427,14 +425,16 @@ The existing edit-SQL flow from query pages can continue to point back to arbitr - `QueryResource.resources_sql()` returns rows from `queries`. - Database page and `/-/jump` list queries from the internal DB. - `view-query` is no longer globally default-allowed; default query permissions come from the query-aware hook. -- Unpublished read-only query requires `execute-sql` to execute. -- Published read-only query can be executed without `execute-sql`. -- Setting `is_published=true` requires `publish-query`. +- Private query is only visible to its owner, even when a broader `view-query` rule applies. +- Non-trusted read-only query requires `execute-sql` to execute. +- Trusted read-only query can be executed without `execute-sql` after `view-query` passes. +- Config queries default to trusted and can opt out with `is_trusted: false`. +- User API rejects client-supplied `is_trusted`. - User-created query requires both `execute-sql` and `insert-query`. - User-created writable query creation uses `Database.analyze_sql()` and requires matching `insert-row`, `update-row`, and/or `delete-row` permissions for every reported write access. - `/{database}/-/queries/-/create` provides the writable-query authoring UI with an analysis panel and disabled save until all required write permissions pass. - User-created writable query execution re-runs `Database.analyze_sql()` and re-checks table write permissions. -- User-created writable query cannot be published. +- User-created writable query cannot be trusted through the user API. - Query update uses `POST /{database}/{query}/-/update` with an `{"update": {...}}` body. - Query delete uses `POST /{database}/{query}/-/delete`. - There are no `PATCH` or HTTP `DELETE` routes for query management. diff --git a/tests/test_queries.py b/tests/test_queries.py index 57920584..c97b5733 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -15,7 +15,6 @@ async def add_numbered_queries(ds, database, count): "select {} as query_number".format(i), title="Demo query {:02d}".format(i), description="Seeded demo query number {:02d}".format(i), - is_published=True, source="user", owner_id="root", ) @@ -44,7 +43,8 @@ async def test_queries_internal_table_schema(): "options", "parameters", "is_write", - "is_published", + "is_private", + "is_trusted", "source", "owner_id", "created_at", @@ -67,7 +67,7 @@ async def test_add_get_and_remove_query(): hide_sql=True, fragment="chart", parameters=["region"], - is_published=True, + is_trusted=True, source="user", owner_id="alice", ) @@ -100,7 +100,8 @@ async def test_add_get_and_remove_query(): "parameters": ["region"], "is_write": False, "write": False, - "is_published": True, + "is_private": False, + "is_trusted": True, "source": "user", "owner_id": "alice", "on_success_message": None, @@ -161,7 +162,8 @@ async def test_update_query_only_updates_provided_fields(): assert query["params"] == [] assert query["on_success_redirect"] is None assert query["sql"] == "select 1" - assert query["is_published"] is False + assert query["is_private"] is False + assert query["is_trusted"] is False options_row = ( await ds.get_internal_database().execute( """ @@ -208,7 +210,8 @@ async def test_config_queries_imported_to_internal_table(): "parameters": ["name"], "is_write": False, "write": False, - "is_published": False, + "is_private": False, + "is_trusted": True, "source": "config", "owner_id": None, "on_success_message": None, @@ -232,30 +235,171 @@ async def test_query_resources_come_from_internal_table(): @pytest.mark.asyncio -async def test_unpublished_query_requires_execute_sql_but_published_does_not(): - ds = Datasette(memory=True, settings={"default_allow_sql": False}) +async def test_default_deny_blocks_view_query_even_for_trusted_query(): + ds = Datasette(memory=True, default_deny=True) ds.add_memory_database("query_permissions", name="data") await ds.invoke_startup() - await ds.add_query("data", "unpublished", "select 1", is_published=False) - await ds.add_query("data", "published", "select 1", is_published=True) + await ds.add_query("data", "trusted", "select 1", is_trusted=True) assert not await ds.allowed( - action="execute-sql", - resource=DatabaseResource("data"), + action="view-query", + resource=QueryResource("data", "trusted"), actor=None, ) + + +@pytest.mark.asyncio +async def test_private_query_restriction_blocks_broad_view_query_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-query": {"id": "*"}, + } + } + } + }, + ) + ds.add_memory_database("private_query_permissions", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "private_report", + "select 1", + is_private=True, + source="user", + owner_id="alice", + ) + await ds.add_query( + "data", + "shared_report", + "select 2", + is_private=False, + source="user", + owner_id="alice", + ) + + assert await ds.allowed( + action="view-query", + resource=QueryResource("data", "private_report"), + actor={"id": "alice"}, + ) assert not await ds.allowed( action="view-query", - resource=QueryResource("data", "unpublished"), - actor=None, + resource=QueryResource("data", "private_report"), + actor={"id": "bob"}, ) assert await ds.allowed( action="view-query", - resource=QueryResource("data", "published"), - actor=None, + resource=QueryResource("data", "shared_report"), + actor={"id": "bob"}, ) +@pytest.mark.asyncio +async def test_config_query_restriction_does_not_override_private_internal_query(): + ds = Datasette(memory=True, default_deny=True) + ds.add_memory_database("private_query_with_config_name", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "private_report", + "select 1", + is_private=True, + source="user", + owner_id="alice", + ) + ds.config = { + "databases": { + "data": { + "permissions": {"view-query": {"id": "*"}}, + "queries": {"private_report": {"sql": "select 2"}}, + } + } + } + + assert not await ds.allowed( + action="view-query", + resource=QueryResource("data", "private_report"), + actor={"id": "bob"}, + ) + + +@pytest.mark.asyncio +async def test_untrusted_shared_query_execution_requires_execute_sql(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "viewer"}, + "view-query": {"id": "viewer"}, + } + } + } + }, + ) + ds.add_memory_database("untrusted_query_execution", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "shared_report", + "select 1 as one", + is_private=False, + is_trusted=False, + source="user", + owner_id="alice", + ) + + denied = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) + assert denied.status_code == 403 + + ds.config["databases"]["data"]["permissions"]["execute-sql"] = {"id": "viewer"} + allowed = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) + assert allowed.status_code == 200 + assert allowed.json()["rows"] == [{"one": 1}] + + +@pytest.mark.asyncio +async def test_config_queries_are_trusted_by_default_but_can_opt_out(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-query": {"id": "viewer"}, + }, + "queries": { + "trusted_report": {"sql": "select 1 as one"}, + "untrusted_report": { + "sql": "select 2 as two", + "is_trusted": False, + }, + }, + } + } + }, + ) + ds.add_memory_database("trusted_query_config", name="data") + await ds.invoke_startup() + + trusted = await ds.client.get("/data/trusted_report.json", actor={"id": "viewer"}) + untrusted = await ds.client.get( + "/data/untrusted_report.json", actor={"id": "viewer"} + ) + + assert trusted.status_code == 200 + assert trusted.json()["rows"] == [{"one": 1}] + assert untrusted.status_code == 403 + + @pytest.mark.asyncio async def test_database_page_query_preview_is_limited(): ds = Datasette(memory=True) @@ -281,7 +425,6 @@ async def test_query_actions_are_registered(): assert ds.get_action("execute-write-sql").resource_class is DatabaseResource assert ds.get_action("insert-query").resource_class is DatabaseResource - assert ds.get_action("publish-query").resource_class is DatabaseResource assert ds.get_action("update-query").resource_class is QueryResource assert ds.get_action("delete-query").resource_class is QueryResource @@ -430,21 +573,33 @@ async def test_query_list_search_filter_and_html(): "private_query", "select 'private'", title="Private query", - is_published=False, + is_private=True, source="user", owner_id="root", ) + await ds.add_query( + "data", + "trusted_query", + "select 'trusted'", + title="Trusted query", + is_trusted=True, + source="config", + ) html_response = await ds.client.get( "/data/-/queries?q=02", actor={"id": "root"}, ) + flags_response = await ds.client.get( + "/data/-/queries", + actor={"id": "root"}, + ) json_response = await ds.client.get( "/data/-/queries.json?q=02", actor={"id": "root"}, ) filtered_response = await ds.client.get( - "/data/-/queries.json?is_published=0", + "/data/-/queries.json?is_private=1", actor={"id": "root"}, ) @@ -453,7 +608,22 @@ async def test_query_list_search_filter_and_html(): assert "Demo query 01" not in html_response.text assert 'class="query-list-results"' in html_response.text assert "Mode" in html_response.text - assert 'type="radio" name="is_published" value="1"' in html_response.text + assert 'type="radio" name="is_private" value="1"' in html_response.text + assert "Only the owning actor can view this query." not in html_response.text + assert ( + "Execution skips the usual SQL and write permission checks" + not in html_response.text + ) + assert flags_response.status_code == 200 + assert 'Owner' in flags_response.text + assert 'Flags' in flags_response.text + assert 'Mode' not in flags_response.text + assert 'class="query-list-owner">root' in flags_response.text + assert 'class="query-list-pill">Read-only' in flags_response.text + assert 'class="query-list-pill query-list-pill-private">Private' in flags_response.text + assert 'class="query-list-pill query-list-pill-trusted">Trusted' in flags_response.text + assert "Only the owning actor can view this query." in flags_response.text + assert "Execution skips the usual SQL and write permission checks" in flags_response.text assert json_response.json()["queries"][0]["name"] == "demo_query_02" assert [query["name"] for query in filtered_response.json()["queries"]] == [ "private_query" @@ -491,7 +661,6 @@ async def test_global_query_list_api_and_html(): "alpha_first", "select 1", title="Alpha first", - is_published=True, source="user", owner_id="root", ) @@ -500,7 +669,6 @@ async def test_global_query_list_api_and_html(): "alpha_second", "select 2", title="Alpha second", - is_published=True, source="user", owner_id="root", ) @@ -509,7 +677,6 @@ async def test_global_query_list_api_and_html(): "beta_first", "select 3", title="Beta first", - is_published=True, source="user", owner_id="root", ) @@ -548,7 +715,7 @@ async def test_global_query_list_api_and_html(): @pytest.mark.asyncio -async def test_query_insert_api_publish_requires_publish_query(): +async def test_query_insert_api_rejects_is_trusted(): ds = Datasette( memory=True, default_deny=True, @@ -564,17 +731,17 @@ async def test_query_insert_api_publish_requires_publish_query(): } }, ) - ds.add_memory_database("query_publish_api", name="data") + ds.add_memory_database("query_trusted_api", name="data") await ds.invoke_startup() response = await ds.client.post( "/data/-/queries/insert", actor={"id": "writer"}, - json={"query": {"name": "public", "sql": "select 1", "is_published": True}}, + json={"query": {"name": "trusted", "sql": "select 1", "is_trusted": True}}, ) - assert response.status_code == 403 - assert response.json()["errors"] == ["Permission denied: need publish-query"] + assert response.status_code == 400 + assert response.json()["errors"] == ["Invalid keys: is_trusted"] @pytest.mark.asyncio @@ -599,24 +766,10 @@ async def test_query_insert_api_creates_writable_query(): assert response.status_code == 201 query = response.json()["query"] assert query["is_write"] is True - assert query["is_published"] is False + assert query["is_private"] is True + assert query["is_trusted"] is False assert query["parameters"] == ["name"] - bad_response = await ds.client.post( - "/data/-/queries/insert", - actor={"id": "root"}, - json={ - "query": { - "name": "published_insert", - "sql": "insert into dogs (name) values (:name)", - "is_published": True, - } - }, - ) - - assert bad_response.status_code == 400 - assert bad_response.json()["errors"] == ["Writable queries cannot be published"] - @pytest.mark.asyncio async def test_query_update_and_delete_api(): @@ -1103,6 +1256,10 @@ async def test_user_writable_query_execution_rechecks_table_permissions(): config={ "databases": { "data": { + "permissions": { + "view-database": {"id": ["alice", "bob"]}, + "execute-write-sql": {"id": ["alice", "bob"]}, + }, "tables": { "dogs": { "permissions": { From 1cd162e9da48b924c289ec9343e9d801b51a89f9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 12:07:30 -0700 Subject: [PATCH 1157/1218] Removed some no-longer-necessary code, simplified view-query is back in the default allow actions now. We have other mechanisms that work for controlling visibility, and the fact that queries default to running with the permissions of the actor makes this safe. --- datasette/default_permissions/defaults.py | 55 +++-------------------- tests/test_permissions.py | 9 +++- tests/test_queries.py | 39 ++++++++++++++++ 3 files changed, 51 insertions(+), 52 deletions(-) diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index dfd8d3e9..ed0a6d66 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -21,37 +21,12 @@ DEFAULT_ALLOW_ACTIONS = frozenset( "view-database", "view-database-download", "view-table", + "view-query", "execute-sql", } ) -def _configured_query_restriction_selects(datasette: "Datasette") -> tuple[list[str], dict]: - selects = [] - params = {} - for index, (database_name, db_config) in enumerate( - ((datasette.config or {}).get("databases") or {}).items() - ): - for query_name, query_config in (db_config.get("queries") or {}).items(): - if isinstance(query_config, dict) and query_config.get("is_private"): - continue - parent_param = f"query_config_parent_{index}_{len(selects)}" - child_param = f"query_config_child_{index}_{len(selects)}" - selects.append( - f""" - SELECT :{parent_param} AS parent, :{child_param} AS child - WHERE NOT EXISTS ( - SELECT 1 FROM queries - WHERE database_name = :{parent_param} - AND name = :{child_param} - ) - """ - ) - params[parent_param] = database_name - params[child_param] = query_name - return selects, params - - @hookimpl(specname="permission_resources_sql") async def default_allow_sql_check( datasette: "Datasette", @@ -121,16 +96,6 @@ async def default_query_permissions_sql( params = {"query_owner_id": actor_id} rule_sqls = [] - if not datasette.default_deny: - rule_sqls.append( - """ - SELECT database_name AS parent, name AS child, 1 AS allow, - 'non-private query' AS reason - FROM queries - WHERE is_private = 0 - """ - ) - if actor_id is not None: rule_sqls.append( """ @@ -141,23 +106,13 @@ async def default_query_permissions_sql( """ ) - config_restriction_selects, config_restriction_params = ( - _configured_query_restriction_selects(datasette) - ) - - restriction_sqls = [ - """ + return PermissionSQL( + sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, + restriction_sql=""" SELECT database_name AS parent, name AS child FROM queries WHERE is_private = 0 OR owner_id = :query_owner_id - """ - ] - restriction_sqls.extend(config_restriction_selects) - params.update(config_restriction_params) - - return PermissionSQL( - sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, - restriction_sql="\nUNION ALL\n".join(restriction_sqls), + """, params=params, ) diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 22f294bb..4f342d8f 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -937,16 +937,20 @@ async def test_permissions_in_config( updated_config = copy.deepcopy(previous_config) updated_config.update(config) perms_ds.config = updated_config + await perms_ds.apply_queries_config() try: # Convert old-style resource to Resource object - from datasette.resources import DatabaseResource, TableResource + from datasette.resources import DatabaseResource, QueryResource, TableResource resource_obj = None if resource: if isinstance(resource, str): resource_obj = DatabaseResource(database=resource) elif isinstance(resource, tuple) and len(resource) == 2: - resource_obj = TableResource(database=resource[0], table=resource[1]) + if action == "view-query": + resource_obj = QueryResource(database=resource[0], query=resource[1]) + else: + resource_obj = TableResource(database=resource[0], table=resource[1]) result = await perms_ds.allowed( action=action, resource=resource_obj, actor=actor @@ -956,6 +960,7 @@ async def test_permissions_in_config( assert result == expected_result finally: perms_ds.config = previous_config + await perms_ds.apply_queries_config() @pytest.mark.asyncio diff --git a/tests/test_queries.py b/tests/test_queries.py index c97b5733..dde57dea 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -248,6 +248,45 @@ async def test_default_deny_blocks_view_query_even_for_trusted_query(): ) +@pytest.mark.asyncio +async def test_view_query_default_allow_still_respects_private_restriction(): + ds = Datasette(memory=True) + ds.add_memory_database("default_view_query_permissions", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "private_report", + "select 1", + is_private=True, + source="user", + owner_id="alice", + ) + await ds.add_query( + "data", + "shared_report", + "select 2", + is_private=False, + source="user", + owner_id="alice", + ) + + assert await ds.allowed( + action="view-query", + resource=QueryResource("data", "shared_report"), + actor=None, + ) + assert await ds.allowed( + action="view-query", + resource=QueryResource("data", "private_report"), + actor={"id": "alice"}, + ) + assert not await ds.allowed( + action="view-query", + resource=QueryResource("data", "private_report"), + actor={"id": "bob"}, + ) + + @pytest.mark.asyncio async def test_private_query_restriction_blocks_broad_view_query_permission(): ds = Datasette( From 1ac4265ffd295ea62008b13b3e37af96f5450be4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 12:12:59 -0700 Subject: [PATCH 1158/1218] Require permissions for untrusted stored query execution, refs #2735 --- datasette/views/database.py | 7 +++---- docs/authentication.rst | 2 +- queries-plan.md | 8 +++----- tests/test_queries.py | 12 ++++++++++-- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/datasette/views/database.py b/datasette/views/database.py index 91e9c350..bd939d87 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1430,10 +1430,9 @@ class QueryView(View): ): raise Forbidden("You do not have permission to view this query") - if canned_query.get("write"): - await _ensure_stored_query_execution_permissions( - datasette, db, canned_query, request.actor - ) + await _ensure_stored_query_execution_permissions( + datasette, db, canned_query, request.actor + ) # If database is immutable, return an error if not db.is_mutable: diff --git a/docs/authentication.rst b/docs/authentication.rst index 6e835c8d..453aaa19 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1285,7 +1285,7 @@ Actor is allowed to view a table (or view) page, e.g. https://latest.datasette.i view-query ---------- -Actor is allowed to view (and execute) a saved query page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size - this includes executing :ref:`canned_queries_writable`. +Actor is allowed to view a saved query page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size. Executing an untrusted saved query also requires ``execute-sql`` or the relevant write permissions; trusted saved queries can execute with ``view-query`` alone. ``resource`` - ``datasette.resources.QueryResource(database, query)`` ``database`` is the name of the database (string) diff --git a/queries-plan.md b/queries-plan.md index f4b8049c..da6b7c92 100644 --- a/queries-plan.md +++ b/queries-plan.md @@ -25,7 +25,7 @@ Terminology change: these are now "queries", not "canned queries". Legacy code a - Query definitions currently come from `datasette.yaml` or the `canned_queries()` plugin hook. - `Datasette.get_canned_queries(database_name, actor)` calls that hook every time it needs query definitions. - `QueryResource.resources_sql()` currently enumerates databases and calls the hook for each one, because permissions and `/-/jump` need query resources. -- Query pages execute if the actor has `view-query` for `QueryResource(database, query)`. +- Query pages are visible if the actor has `view-query` for `QueryResource(database, query)`. Executing an untrusted stored query also checks `execute-sql` or the relevant write permissions. - Arbitrary SQL executes if the actor has `execute-sql` for `DatabaseResource(database)`. The main performance and architecture win is making query resource enumeration a direct SQL query against the internal database. @@ -145,9 +145,7 @@ Default execution rule for user-created writable queries: Implementation: -- Remove `view-query` from the broad `DEFAULT_ALLOW_ACTIONS` set. -- Replace it with query-aware default `view-query` permission SQL. -- Emit default `view-query` allows for non-private rows when Datasette is not running with `--default-deny`. +- Keep `view-query` in the broad `DEFAULT_ALLOW_ACTIONS` set, so saved queries remain visible by default in all-public Datasette. - Emit default `view-query` allows for the owning actor. - Use `restriction_sql` to limit private rows to their owner even when broader `view-query` permissions exist. - Have `QueryView` perform the fresh `execute-sql` or table-permission check before execution unless the row has `is_trusted=1`. @@ -424,7 +422,7 @@ The existing edit-SQL flow from query pages can continue to point back to arbitr - The old `canned_queries()` hook is no longer called by core. - `QueryResource.resources_sql()` returns rows from `queries`. - Database page and `/-/jump` list queries from the internal DB. -- `view-query` is no longer globally default-allowed; default query permissions come from the query-aware hook. +- `view-query` remains globally default-allowed, with `restriction_sql` narrowing private queries to their owner. - Private query is only visible to its owner, even when a broader `view-query` rule applies. - Non-trusted read-only query requires `execute-sql` to execute. - Trusted read-only query can be executed without `execute-sql` after `view-query` passes. diff --git a/tests/test_queries.py b/tests/test_queries.py index dde57dea..997f8b39 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -395,8 +395,16 @@ async def test_untrusted_shared_query_execution_requires_execute_sql(): owner_id="alice", ) - denied = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) - assert denied.status_code == 403 + denied_get = await ds.client.get( + "/data/shared_report.json", actor={"id": "viewer"} + ) + denied_post = await ds.client.post( + "/data/shared_report", + actor={"id": "viewer"}, + data={}, + ) + assert denied_get.status_code == 403 + assert denied_post.status_code == 403 ds.config["databases"]["data"]["permissions"]["execute-sql"] = {"id": "viewer"} allowed = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) From 866852eff603c219b8bf7d13f2a69b5ff032fa67 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 12:46:18 -0700 Subject: [PATCH 1159/1218] Clarifying comments --- datasette/default_permissions/defaults.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index ed0a6d66..32ad4ef1 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -80,6 +80,7 @@ async def default_query_permissions_sql( if action in {"update-query", "delete-query"}: if actor_id is None: return None + # Query owner can update/delete query return PermissionSQL( sql=""" SELECT database_name AS parent, name AS child, 1 AS allow, @@ -97,15 +98,15 @@ async def default_query_permissions_sql( params = {"query_owner_id": actor_id} rule_sqls = [] if actor_id is not None: - rule_sqls.append( - """ + # Query owner can view-query + rule_sqls.append(""" SELECT database_name AS parent, name AS child, 1 AS allow, 'query owner' AS reason FROM queries WHERE owner_id = :query_owner_id - """ - ) + """) + # restriction_sql enforces private queries ONLY visible to owner return PermissionSQL( sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, restriction_sql=""" From 71c76e38534378cbce8576771238a788feccf3ad Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:08:19 -0700 Subject: [PATCH 1160/1218] Better faceting on /-/queries Ref https://github.com/simonw/datasette/pull/2741#issuecomment-4548321815 --- datasette/app.py | 69 +++++++++++++++++ datasette/templates/query_list.html | 94 +++++++++++++---------- datasette/views/database.py | 99 +++++++++++++++++++++++- tests/test_permissions.py | 8 +- tests/test_queries.py | 115 +++++++++++++++++++++++++--- 5 files changed, 330 insertions(+), 55 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 3329ee7e..1acdfcd8 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1298,6 +1298,75 @@ class Datasette: ) return self._query_row_to_dict(rows.first()) + async def count_queries( + self, + database=None, + *, + actor=None, + q=None, + is_write=None, + is_private=None, + is_trusted=None, + source=None, + owner_id=None, + ): + allowed_sql, allowed_params = await self.allowed_resources_sql( + action="view-query", + actor=actor, + parent=database, + ) + params = dict(allowed_params) + where_clauses = [] + if database is not None: + params["query_database"] = database + where_clauses.append("q.database_name = :query_database") + + if q: + where_clauses.append(""" + ( + q.name LIKE :query_search + OR q.title LIKE :query_search + OR q.description LIKE :query_search + OR q.sql LIKE :query_search + ) + """) + params["query_search"] = "%{}%".format(q) + if is_write is not None: + where_clauses.append("q.is_write = :query_is_write") + params["query_is_write"] = int(bool(is_write)) + if is_private is not None: + where_clauses.append("q.is_private = :query_is_private") + params["query_is_private"] = int(bool(is_private)) + if is_trusted is not None: + where_clauses.append("q.is_trusted = :query_is_trusted") + params["query_is_trusted"] = int(bool(is_trusted)) + if source is not None: + where_clauses.append("q.source = :query_source") + params["query_source"] = source + if owner_id is not None: + where_clauses.append("q.owner_id = :query_owner_id") + params["query_owner_id"] = owner_id + + row = ( + await self.get_internal_database().execute( + """ + SELECT count(*) AS count + FROM queries q + JOIN ( + {allowed_sql} + ) allowed + ON allowed.parent = q.database_name + AND allowed.child = q.name + WHERE {where} + """.format( + allowed_sql=allowed_sql, + where=" AND ".join(where_clauses) or "1 = 1", + ), + params, + ) + ).first() + return row["count"] + async def list_queries( self, database=None, diff --git a/datasette/templates/query_list.html b/datasette/templates/query_list.html index 25259b3d..fa4859b1 100644 --- a/datasette/templates/query_list.html +++ b/datasette/templates/query_list.html @@ -9,7 +9,7 @@ max-width: 64rem; } .query-list-filters { - margin: 0.5rem 0 1rem; + margin: 0.5rem 0 0.75rem; } .query-list-search { align-items: center; @@ -32,43 +32,63 @@ line-height: 1.1; padding: 0.35rem 0.65rem; } -.query-list-filter-groups { +.query-list-facets { align-items: flex-start; display: flex; flex-wrap: wrap; - gap: 0.8rem 1.4rem; + gap: 1rem 1.6rem; + margin: 0 0 1rem; } -.query-list-filter-group { - border: 0; +.query-list-facet { + margin: 0; +} +.query-list-facet h2 { + font-size: 0.9rem; + line-height: 1.2; + margin: 0 0 0.35rem; +} +.query-list-facet ul { display: flex; flex-wrap: wrap; gap: 0.35rem; margin: 0; - min-width: 0; padding: 0; + list-style: none; } -.query-list-filter-group legend { - font-weight: 700; - margin: 0 0.45rem 0 0; - padding: 0; -} -.query-list-filter-group label { +.query-list-facet-link, +.query-list-facet-link:link, +.query-list-facet-link:visited, +.query-list-facet-link:hover, +.query-list-facet-link:focus, +.query-list-facet-link:active { align-items: center; border: 1px solid #c8d1dc; border-radius: 0.25rem; - cursor: pointer; + color: #39445a; display: inline-flex; font-size: 0.82rem; - gap: 0.3rem; + gap: 0.4rem; line-height: 1.1; padding: 0.35rem 0.55rem; + text-decoration: none; } -.query-list-filter-group input { - margin: 0; +.query-list-facet-link:hover { + border-color: #7ca5c8; + color: #1f5d85; } -.query-list-filter-group input:checked + span { +.query-list-facet-link-active { + background-color: #edf6fb; + border-color: #6d9fc0; font-weight: 700; } +.query-list-facet-disabled { + color: #7b8794; + cursor: default; +} +.query-list-facet-count { + color: #4f5b6d; + font-variant-numeric: tabular-nums; +} .query-list-results { border-collapse: collapse; font-size: 0.9rem; @@ -169,15 +189,6 @@ .query-list-search input[type=search] { max-width: none; } - .query-list-filter-group { - display: block; - } - .query-list-filter-group legend { - margin-bottom: 0.3rem; - } - .query-list-filter-group label { - margin: 0 0.25rem 0.35rem 0; - } } {% endblock %} @@ -198,24 +209,27 @@ -
-
- Mode - - - -
-
- Visibility - - - -
-
+ + {% if queries %}
diff --git a/datasette/views/database.py b/datasette/views/database.py index bd939d87..2e77d36b 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1121,6 +1121,21 @@ class QueryParametersView(BaseView): return _block_framing(Response.json({"ok": True, "parameters": parameters})) +def _query_list_url(path, query_string, *, set_args=None, remove_args=None): + set_args = set_args or {} + remove_args = set(remove_args or ()) + skip = set(set_args) | remove_args | {"_next"} + pairs = [ + (key, value) + for key, value in parse_qsl(query_string, keep_blank_values=True) + if key not in skip + ] + for key, value in set_args.items(): + if value not in (None, ""): + pairs.append((key, value)) + return path + (("?" + urlencode(pairs)) if pairs else "") + + class QueryListView(BaseView): name = "query-list" @@ -1139,9 +1154,7 @@ class QueryListView(BaseView): default=20 if format_ == "html" else 50, ) is_write = _as_optional_bool(request.args.get("is_write"), "is_write") - is_private = _as_optional_bool( - request.args.get("is_private"), "is_private" - ) + is_private = _as_optional_bool(request.args.get("is_private"), "is_private") except QueryValidationError as ex: return _error([ex.message], ex.status) @@ -1173,6 +1186,80 @@ class QueryListView(BaseView): urlencode(pairs), ) + current_filters = { + "actor": request.actor, + "q": request.args.get("q") or None, + "is_write": is_write, + "is_private": is_private, + "source": request.args.get("source") or None, + "owner_id": request.args.get("owner_id") or None, + } + + async def facet_count(field, value): + if current_filters[field] is not None and current_filters[field] != value: + return 0 + filters = dict(current_filters) + filters[field] = value + return await self.ds.count_queries(database, **filters) + + def facet_href(field, value): + if current_filters[field] == value: + return _query_list_url( + query_list_path, + request.query_string, + remove_args=[field], + ) + if current_filters[field] is not None: + return None + return _query_list_url( + query_list_path, + request.query_string, + set_args={field: str(int(value))}, + ) + + async def facet_item(label, field, value): + count = await facet_count(field, value) + active = current_filters[field] == value + if not active and not count: + return None + return { + "label": label, + "count": count, + "href": facet_href(field, value) if active or count else None, + "active": active, + } + + async def facet_items(items): + return [ + item + for item in [ + await facet_item(label, field, value) + for label, field, value in items + ] + if item is not None + ] + + facets = [ + { + "title": "Mode", + "items": await facet_items( + [ + ("Read-only", "is_write", False), + ("Writable", "is_write", True), + ] + ), + }, + { + "title": "Visibility", + "items": await facet_items( + [ + ("Not private", "is_private", False), + ("Private", "is_private", True), + ] + ), + }, + ] + data = { "ok": True, "database": database, @@ -1188,6 +1275,7 @@ class QueryListView(BaseView): "show_trusted_note": any(query["is_trusted"] for query in page["queries"]), "query_list_path": query_list_path, "show_database": database is None, + "facets": facets, "filters": { "q": request.args.get("q") or "", "is_write": request.args.get("is_write") or "", @@ -1715,6 +1803,9 @@ class QueryView(View): } ) metadata = await datasette.get_database_metadata(database) + if canned_query: + metadata = dict(canned_query) + metadata.pop("source", None) renderers = {} for key, (_, can_render) in datasette.renderers.items(): @@ -1865,7 +1956,7 @@ class QueryView(View): ) ), show_hide_hidden=markupsafe.Markup(show_hide_hidden), - metadata=canned_query or metadata, + metadata=metadata, alternate_url_json=alternate_url_json, select_templates=[ f"{'*' if template_name == template.name else ''}{template_name}" diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 4f342d8f..eb6cee9f 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -948,9 +948,13 @@ async def test_permissions_in_config( resource_obj = DatabaseResource(database=resource) elif isinstance(resource, tuple) and len(resource) == 2: if action == "view-query": - resource_obj = QueryResource(database=resource[0], query=resource[1]) + resource_obj = QueryResource( + database=resource[0], query=resource[1] + ) else: - resource_obj = TableResource(database=resource[0], table=resource[1]) + resource_obj = TableResource( + database=resource[0], table=resource[1] + ) result = await perms_ds.allowed( action=action, resource=resource_obj, actor=actor diff --git a/tests/test_queries.py b/tests/test_queries.py index 997f8b39..36f7107a 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -395,9 +395,7 @@ async def test_untrusted_shared_query_execution_requires_execute_sql(): owner_id="alice", ) - denied_get = await ds.client.get( - "/data/shared_report.json", actor={"id": "viewer"} - ) + denied_get = await ds.client.get("/data/shared_report.json", actor={"id": "viewer"}) denied_post = await ds.client.post( "/data/shared_report", actor={"id": "viewer"}, @@ -608,6 +606,27 @@ async def test_query_list_and_definition_api(): assert definition_response.json()["query"]["title"] == "Demo query 01" +@pytest.mark.asyncio +async def test_query_page_does_not_show_internal_source(): + ds = Datasette(memory=True) + ds.add_memory_database("query_page_source", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "stored_report", + "select 1 as one", + title="Stored report", + source="user", + owner_id="root", + ) + + response = await ds.client.get("/data/stored_report", actor={"id": "root"}) + + assert response.status_code == 200 + assert "Stored report" in response.text + assert "Data source:" not in response.text + + @pytest.mark.asyncio async def test_query_list_search_filter_and_html(): ds = Datasette(memory=True) @@ -632,6 +651,15 @@ async def test_query_list_search_filter_and_html(): is_trusted=True, source="config", ) + await ds.add_query( + "data", + "writable_query", + "insert into dogs (name) values (:name)", + title="Writable query", + is_write=True, + source="user", + owner_id="root", + ) html_response = await ds.client.get( "/data/-/queries?q=02", @@ -649,13 +677,21 @@ async def test_query_list_search_filter_and_html(): "/data/-/queries.json?is_private=1", actor={"id": "root"}, ) + filtered_write_response = await ds.client.get( + "/data/-/queries?is_write=1", + actor={"id": "root"}, + ) + filtered_private_response = await ds.client.get( + "/data/-/queries?is_private=1", + actor={"id": "root"}, + ) assert html_response.status_code == 200 assert "Demo query 02" in html_response.text assert "Demo query 01" not in html_response.text assert 'class="query-list-results"' in html_response.text - assert "Mode" in html_response.text - assert 'type="radio" name="is_private" value="1"' in html_response.text + assert 'class="query-list-facets"' in html_response.text + assert 'type="radio"' not in html_response.text assert "Only the owning actor can view this query." not in html_response.text assert ( "Execution skips the usual SQL and write permission checks" @@ -667,14 +703,75 @@ async def test_query_list_search_filter_and_html(): assert '' not in flags_response.text assert 'class="query-list-owner">root' in flags_response.text assert 'class="query-list-pill">Read-only' in flags_response.text - assert 'class="query-list-pill query-list-pill-private">Private' in flags_response.text - assert 'class="query-list-pill query-list-pill-trusted">Trusted' in flags_response.text + assert ( + 'class="query-list-pill query-list-pill-write">Writable' + in flags_response.text + ) + assert ( + 'class="query-list-pill query-list-pill-private">Private' + in flags_response.text + ) + assert ( + 'class="query-list-pill query-list-pill-trusted">Trusted' + in flags_response.text + ) + assert ( + 'href="/data/-/queries?is_write=0">Read-only5' + in flags_response.text + ) + assert ( + 'href="/data/-/queries?is_write=1">Writable1' + in flags_response.text + ) + assert ( + 'href="/data/-/queries?is_private=0">Not private5' + in flags_response.text + ) + assert ( + 'href="/data/-/queries?is_private=1">Private1' + in flags_response.text + ) assert "Only the owning actor can view this query." in flags_response.text - assert "Execution skips the usual SQL and write permission checks" in flags_response.text + assert ( + "Execution skips the usual SQL and write permission checks" + in flags_response.text + ) assert json_response.json()["queries"][0]["name"] == "demo_query_02" assert [query["name"] for query in filtered_response.json()["queries"]] == [ "private_query" ] + assert "Writable query" in filtered_write_response.text + assert "Demo query 01" not in filtered_write_response.text + assert ( + 'query-list-facet-link query-list-facet-link-active" href="/data/-/queries"' + in filtered_write_response.text + ) + assert ( + 'Read-only0' + not in filtered_write_response.text + ) + assert ( + 'href="/data/-/queries?is_write=1&is_private=0">Not private1' + in filtered_write_response.text + ) + assert ( + 'Private0' + not in filtered_write_response.text + ) + assert "Private query" in filtered_private_response.text + assert "Demo query 01" not in filtered_private_response.text + assert ( + 'href="/data/-/queries?is_private=1&is_write=0">Read-only1' + in filtered_private_response.text + ) + assert ( + 'Writable0' + not in filtered_private_response.text + ) + assert ( + 'Not private0' + not in filtered_private_response.text + ) @pytest.mark.asyncio @@ -1313,7 +1410,7 @@ async def test_user_writable_query_execution_rechecks_table_permissions(): "insert-row": {"id": "alice"}, } } - } + }, } } }, From 0fcaa5792ba73143661515af0088d7e5d968e96c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:12:07 -0700 Subject: [PATCH 1161/1218] Style query operations on create query Made it consistent with the SQL write page. --- .../_execute_write_analysis_styles.html | 37 +++++++++++++++++++ datasette/templates/execute_write.html | 36 +----------------- datasette/templates/query_create.html | 19 +++++----- tests/test_queries.py | 6 ++- 4 files changed, 52 insertions(+), 46 deletions(-) create mode 100644 datasette/templates/_execute_write_analysis_styles.html diff --git a/datasette/templates/_execute_write_analysis_styles.html b/datasette/templates/_execute_write_analysis_styles.html new file mode 100644 index 00000000..f20e67b2 --- /dev/null +++ b/datasette/templates/_execute_write_analysis_styles.html @@ -0,0 +1,37 @@ + diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 46f58c3b..414d4af7 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -40,42 +40,8 @@ border-radius: 0.25rem; min-width: 13rem; } -.execute-write-analysis { - border-collapse: collapse; - font-size: 0.9rem; - margin: 0.25rem 0 1rem; - min-width: 44rem; -} -.execute-write-analysis th, -.execute-write-analysis td { - border-bottom: 1px solid #d7dde5; - padding: 0.45rem 0.7rem; - text-align: left; - vertical-align: top; -} -.execute-write-analysis th { - background-color: #edf6fb; - border-top: 1px solid #d7dde5; - color: #39445a; - font-weight: 700; -} -.execute-write-analysis tbody tr:nth-child(even) { - background-color: rgba(39, 104, 144, 0.05); -} -.execute-write-analysis code { - background: transparent; - font-size: 0.9em; - white-space: nowrap; -} -.execute-write-analysis-allowed { - color: #267a3e; - font-weight: 700; -} -.execute-write-analysis-denied { - color: #b00020; - font-weight: 700; -} +{% include "_execute_write_analysis_styles.html" %} {% include "_sql_parameter_styles.html" %} {% endblock %} diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index 686d971e..2d8a9122 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -5,6 +5,7 @@ {% block extra_head %} {{- super() -}} {% include "_codemirror.html" %} +{% include "_execute_write_analysis_styles.html" %} {% endblock %} {% block body_class %}query-create db-{{ database|to_css_class }}{% endblock %} @@ -32,30 +33,28 @@

Execute write SQL

{% endif %} -

Analysis

+

Query operations

{% if analysis_error %}

{{ analysis_error }}

{% elif analysis_rows %} -
Mode
+
- + - {% for row in analysis_rows %} - - - - - - + + + + + {% endfor %} diff --git a/tests/test_queries.py b/tests/test_queries.py index 36f7107a..c27c23da 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -998,7 +998,11 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): assert "Create query" in create_response.text assert "Read-only" in create_response.text assert "Writable" in create_response.text - assert "required permission" in create_response.text + assert "

Query operations

" in create_response.text + assert '
Operation Database Tablerequired permissionRequired permission AllowedSource
{{ row.operation }}{{ row.database }}{{ row.table }}{{ row.required_permission }}{% if row.allowed is none %}{% elif row.allowed %}yes{% else %}no{% endif %}{{ row.source or "" }}{{ row.operation }}{{ row.database }}{{ row.table }}{% if row.required_permission %}{{ row.required_permission }}{% endif %}{% if row.allowed is none %}{% elif row.allowed %}yes{% else %}no{% endif %}
' in create_response.text + assert '' in create_response.text + assert '' not in create_response.text + assert "" in create_response.text assert query_response.status_code == 200 assert "Save query" in query_response.text assert "/data/-/queries/-/create?sql=select+%2A+from+dogs" in query_response.text From 70b23ff4a55528083512fab96aa50725f415cbe4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:47:24 -0700 Subject: [PATCH 1162/1218] Tweaked save query link --- datasette/templates/query.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/templates/query.html b/datasette/templates/query.html index f74d21f1..1900bd31 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -66,7 +66,7 @@ {% if not hide_sql %}{% endif %} {{ show_hide_hidden }} - {% if save_query_url %}Save query{% endif %} + {% if save_query_url %}Save this query{% endif %} {% if canned_query and edit_sql_url %}Edit SQL{% endif %}

From eb7c25c57cf914629c08eaa477d0709b0f41efeb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:48:40 -0700 Subject: [PATCH 1163/1218] Major redesign of create saved query UI https://github.com/simonw/datasette/pull/2741#issuecomment-4548707129 --- datasette/app.py | 6 +- datasette/static/app.css | 4 + .../_execute_write_analysis_scripts.html | 111 +++++++ .../_execute_write_analysis_styles.html | 4 + .../templates/_sql_parameter_scripts.html | 17 +- datasette/templates/execute_write.html | 88 +----- datasette/templates/query_create.html | 296 +++++++++++++++--- datasette/views/database.py | 181 ++++++++--- tests/test_queries.py | 170 +++++++++- 9 files changed, 705 insertions(+), 172 deletions(-) create mode 100644 datasette/templates/_execute_write_analysis_scripts.html diff --git a/datasette/app.py b/datasette/app.py index 1acdfcd8..8936b099 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -50,7 +50,7 @@ from .views.database import ( ExecuteWriteView, TableCreateView, QueryView, - QueryCreateView, + QueryCreateAnalyzeView, QueryDeleteView, QueryDefinitionView, GlobalQueryListView, @@ -2820,8 +2820,8 @@ class Datasette: r"/(?P[^\/\.]+)/-/queries(\.(?Pjson))?$", ) add_route( - QueryCreateView.as_view(self), - r"/(?P[^\/\.]+)/-/queries/-/create$", + QueryCreateAnalyzeView.as_view(self), + r"/(?P[^\/\.]+)/-/queries/analyze$", ) add_route( QueryInsertView.as_view(self), diff --git a/datasette/static/app.css b/datasette/static/app.css index c21d0dc4..4f4db133 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -1414,6 +1414,10 @@ svg.dropdown-menu-icon { position: relative; top: 1px; } +.save-query { + display: inline-block; + margin-left: 0.45em; +} .blob-download { display: block; diff --git a/datasette/templates/_execute_write_analysis_scripts.html b/datasette/templates/_execute_write_analysis_scripts.html new file mode 100644 index 00000000..a19bae13 --- /dev/null +++ b/datasette/templates/_execute_write_analysis_scripts.html @@ -0,0 +1,111 @@ + diff --git a/datasette/templates/_execute_write_analysis_styles.html b/datasette/templates/_execute_write_analysis_styles.html index f20e67b2..165cfe9f 100644 --- a/datasette/templates/_execute_write_analysis_styles.html +++ b/datasette/templates/_execute_write_analysis_styles.html @@ -34,4 +34,8 @@ color: #b00020; font-weight: 700; } +.execute-write-analysis-na { + color: #687386; + font-style: italic; +} diff --git a/datasette/templates/_sql_parameter_scripts.html b/datasette/templates/_sql_parameter_scripts.html index 68e46069..159a141c 100644 --- a/datasette/templates/_sql_parameter_scripts.html +++ b/datasette/templates/_sql_parameter_scripts.html @@ -215,9 +215,10 @@ window.datasetteSqlParameters = (() => { if (!form) { return null; } + const shouldRenderParameters = options.renderParameters !== false; const section = options.section || form.querySelector("[data-sql-parameters-section]"); - if (!section) { + if (shouldRenderParameters && !section) { return null; } const manager = { @@ -225,12 +226,16 @@ window.datasetteSqlParameters = (() => { section, allowExpand: options.allowExpand === undefined - ? section.dataset.allowExpand === "1" + ? section + ? section.dataset.allowExpand === "1" + : false : options.allowExpand, parameterState: new Map(), }; - bindParameterControls(manager); - syncParameterState(manager); + if (section) { + bindParameterControls(manager); + syncParameterState(manager); + } const url = options.url || form.dataset.parametersUrl; let refreshTimer = null; @@ -254,7 +259,9 @@ window.datasetteSqlParameters = (() => { if (!response.ok) { throw new Error((data.errors || [response.statusText]).join("; ")); } - renderParameters(manager, data.parameters || []); + if (shouldRenderParameters) { + renderParameters(manager, data.parameters || []); + } if (options.onData) { options.onData(data, manager); } diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 414d4af7..7a627a7a 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -131,6 +131,7 @@ if (executeWriteSqlInput && !executeWriteSqlInput.value) { {% include "_codemirror_foot.html" %} {% include "_sql_parameter_scripts.html" %} +{% include "_execute_write_analysis_scripts.html" %} + + {% endblock %} diff --git a/datasette/views/database.py b/datasette/views/database.py index 2e77d36b..aafcf40b 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -551,6 +551,17 @@ def _wants_json(request, is_json, data): ) +def _query_create_form_error_message(message): + return { + "Query name is required": "URL is required", + "Invalid query name": "Invalid URL", + "Query name conflicts with a table or view": ( + "URL conflicts with an existing table or view" + ), + "Query already exists": "A query already exists at that URL", + }.get(message, message) + + async def _json_or_form_payload(request): content_type = request.headers.get("content-type", "") if content_type.startswith("application/json"): @@ -731,6 +742,54 @@ async def _execute_write_analysis_data(datasette, db, sql, actor): } +async def _query_create_analysis_data(datasette, db, sql, actor): + has_sql = bool(sql and sql.strip()) + parameter_names = [] + analysis_rows = [] + analysis_error = None + if has_sql: + try: + parameter_names = _derived_query_parameters(sql) + params = {parameter: "" for parameter in parameter_names} + analysis = await db.analyze_sql(sql, params) + analysis_rows = await _analysis_rows_with_permissions( + datasette, analysis, actor + ) + except (QueryValidationError, sqlite3.DatabaseError) as ex: + analysis_error = getattr(ex, "message", str(ex)) + return { + "ok": analysis_error is None, + "parameters": parameter_names, + "analysis_error": analysis_error, + "analysis_rows": analysis_rows, + "has_sql": has_sql, + "analysis_is_write": bool( + analysis_rows and any(row["required_permission"] for row in analysis_rows) + ), + "save_disabled": bool( + (not has_sql) + or analysis_error + or any(row["allowed"] is False for row in analysis_rows) + ), + } + + +async def _query_create_form_context( + datasette, request, db, *, sql="", name="", title="", description="", is_private=True +): + analysis_data = await _query_create_analysis_data(datasette, db, sql, request.actor) + return { + "database": db.name, + "database_color": db.color, + "sql": sql, + "name": name, + "title": title, + "description": description, + "is_private": is_private, + **analysis_data, + } + + async def _inserted_row_url(datasette, db, analysis, cursor): if cursor.rowcount != 1: return None @@ -1307,6 +1366,35 @@ class QueryCreateView(BaseView): name = "query-create" has_json_alternate = False + async def _render_form( + self, + request, + db, + *, + sql="", + name="", + title="", + description="", + is_private=True, + status=200, + ): + response = await self.render( + ["query_create.html"], + request, + await _query_create_form_context( + self.ds, + request, + db, + sql=sql, + name=name, + title=title, + description=description, + is_private=is_private, + ), + ) + response.status = status + return response + async def get(self, request): db = await self.ds.resolve_database(request) await self.ds.ensure_permission( @@ -1320,46 +1408,61 @@ class QueryCreateView(BaseView): actor=request.actor, ) - sql = request.args.get("sql") or "" - analysis_error = None - analysis_rows = [] - parameter_names = [] - if sql: - try: - parameter_names = _derived_query_parameters(sql) - params = {parameter: "" for parameter in parameter_names} - analysis = await db.analyze_sql(sql, params) - analysis_rows = await _analysis_rows_with_permissions( - self.ds, analysis, request.actor - ) - except (QueryValidationError, sqlite3.DatabaseError) as ex: - analysis_error = getattr(ex, "message", str(ex)) + return await self._render_form(request, db, sql=request.args.get("sql") or "") - return await self.render( - ["query_create.html"], - request, - { - "database": db.name, - "database_color": db.color, - "sql": sql, - "parameter_names": parameter_names, - "analysis_error": analysis_error, - "analysis_rows": analysis_rows, - "analysis_is_write": bool( - analysis_rows - and any(row["required_permission"] for row in analysis_rows) - ), - "save_disabled": bool( - analysis_error - or any(row["allowed"] is False for row in analysis_rows) - ), - }, + +class QueryCreateAnalyzeView(BaseView): + name = "query-create-analyze" + has_json_alternate = False + + async def get(self, request): + db = await self.ds.resolve_database(request) + if not await self.ds.allowed( + action="execute-sql", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + return _block_framing(_error(["Permission denied: need execute-sql"], 403)) + if not await self.ds.allowed( + action="insert-query", + resource=DatabaseResource(db.name), + actor=request.actor, + ): + return _block_framing(_error(["Permission denied: need insert-query"], 403)) + + invalid_keys = set(request.args) - {"sql"} + if invalid_keys: + return _block_framing( + _error( + ["Invalid keys: {}".format(", ".join(sorted(invalid_keys)))], + 400, + ) + ) + sql = request.args.get("sql") or "" + return _block_framing( + Response.json( + await _query_create_analysis_data(self.ds, db, sql, request.actor) + ) ) -class QueryInsertView(BaseView): +class QueryInsertView(QueryCreateView): name = "query-insert" + async def _error_response(self, request, db, query_data, message, status): + message = _query_create_form_error_message(message) + self.ds.add_message(request, message, self.ds.ERROR) + return await self._render_form( + request, + db, + sql=query_data.get("sql") or "", + name=query_data.get("name") or "", + title=query_data.get("title") or "", + description=query_data.get("description") or "", + is_private=_as_bool(query_data.get("is_private", True)), + status=status, + ) + async def post(self, request): db = await self.ds.resolve_database(request) if not await self.ds.allowed( @@ -1375,6 +1478,8 @@ class QueryInsertView(BaseView): ): return _error(["Permission denied: need insert-query"], 403) + is_json = False + query_data = {} try: data, is_json = await _json_or_form_payload(request) if not isinstance(data, dict): @@ -1384,6 +1489,10 @@ class QueryInsertView(BaseView): raise QueryValidationError("JSON must contain a query dictionary") prepared = await _prepare_query_create(self.ds, request, db, query_data) except QueryValidationError as ex: + if not is_json and isinstance(query_data, dict): + return await self._error_response( + request, db, query_data, ex.message, ex.status + ) return _error([ex.message], ex.status) prepared.pop("analysis") @@ -1391,6 +1500,8 @@ class QueryInsertView(BaseView): try: await self.ds.add_query(db.name, name, replace=False, **prepared) except sqlite3.IntegrityError as ex: + if not is_json and isinstance(query_data, dict): + return await self._error_response(request, db, query_data, str(ex), 400) return _error([str(ex)], 400) query = await self.ds.get_query(db.name, name) @@ -1896,7 +2007,7 @@ class QueryView(View): ): save_query_url = ( datasette.urls.database(database) - + "/-/queries/-/create?" + + "/-/queries/insert?" + urlencode({"sql": sql}) ) diff --git a/tests/test_queries.py b/tests/test_queries.py index c27c23da..32cdfae3 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -986,6 +986,14 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): await ds.invoke_startup() create_response = await ds.client.get( + "/data/-/queries/insert?sql=select+*+from+dogs", + actor={"id": "root"}, + ) + blank_create_response = await ds.client.get( + "/data/-/queries/insert", + actor={"id": "root"}, + ) + old_create_response = await ds.client.get( "/data/-/queries/-/create?sql=select+*+from+dogs", actor={"id": "root"}, ) @@ -996,16 +1004,171 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): assert create_response.status_code == 200 assert "Create query" in create_response.text - assert "Read-only" in create_response.text assert "Writable" in create_response.text + assert 'type="radio"' not in create_response.text + assert 'name="parameters"' not in create_response.text + assert 'id="query-parameters"' not in create_response.text + assert 'class="query-create-field"' in create_response.text + assert '' not in create_response.text + assert '' in create_response.text + assert '' in create_response.text + assert '/data/' in create_response.text + assert ( + '' + in create_response.text + ) + assert 'function slugify(value)' in create_response.text + assert 'data-analyze-url="/data/-/queries/analyze"' in create_response.text + assert "setupSqlParameterRefresh" in create_response.text + assert "renderParameters: false" in create_response.text + assert "datasetteSqlAnalysis.renderAnalysis" in create_response.text + assert "data-query-create-submit" in create_response.text + assert "data-query-create-writable" in create_response.text + assert ( + "Queries marked private can only be seen by you, their creator." + in create_response.text + ) assert "

Query operations

" in create_response.text assert '
Required permissionSourceread
' in create_response.text assert '' in create_response.text assert '' not in create_response.text assert "" in create_response.text + assert ( + create_response.text.count( + '' + ) + == 2 + ) + assert create_response.text.index('value="Save query"') < create_response.text.index( + "

Query operations

" + ) + assert blank_create_response.status_code == 200 + assert ( + '
Required permissionSourcereadn/a
' in response.text assert '' in response.text assert "" in response.text From 5dca2dc9beea96c52e6a9c806df66c9a1f2f7874 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 13:54:47 -0700 Subject: [PATCH 1164/1218] Show query count on database page --- datasette/templates/database.html | 2 +- datasette/views/database.py | 18 +++++++++++++++++- tests/test_queries.py | 11 ++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 62f9c620..371f6a22 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -59,7 +59,7 @@ {% endfor %} {% if queries_more %} -

View all queries

+

View {{ "{:,}".format(queries_count) }} quer{% if queries_count == 1 %}y{% else %}ies{% endif %}

{% endif %} {% endif %} diff --git a/datasette/views/database.py b/datasette/views/database.py index feb38619..d40d69d1 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -102,6 +102,11 @@ class DatabaseView(View): ) canned_queries = queries_page["queries"] queries_more = queries_page["has_more"] + queries_count = ( + await datasette.count_queries(database, actor=request.actor) + if queries_more + else len(canned_queries) + ) async def database_actions(): links = [] @@ -134,6 +139,7 @@ class DatabaseView(View): "views": sql_views, "queries": canned_queries, "queries_more": queries_more, + "queries_count": queries_count, "allow_execute_sql": allow_execute_sql, "table_columns": ( await _table_columns(datasette, database) if allow_execute_sql else {} @@ -168,6 +174,7 @@ class DatabaseView(View): views=sql_views, queries=canned_queries, queries_more=queries_more, + queries_count=queries_count, allow_execute_sql=allow_execute_sql, table_columns=( await _table_columns(datasette, database) @@ -219,6 +226,7 @@ class DatabaseContext(Context): queries_more: bool = field( metadata={"help": "Boolean indicating if more saved queries are available"} ) + queries_count: int = field(metadata={"help": "Count of visible saved queries"}) allow_execute_sql: bool = field( metadata={"help": "Boolean indicating if custom SQL can be executed"} ) @@ -775,7 +783,15 @@ async def _query_create_analysis_data(datasette, db, sql, actor): async def _query_create_form_context( - datasette, request, db, *, sql="", name="", title="", description="", is_private=True + datasette, + request, + db, + *, + sql="", + name="", + title="", + description="", + is_private=True, ): analysis_data = await _query_create_analysis_data(datasette, db, sql, request.actor) return { diff --git a/tests/test_queries.py b/tests/test_queries.py index 32cdfae3..09b41645 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -458,9 +458,10 @@ async def test_database_page_query_preview_is_limited(): assert html_response.status_code == 200 assert "Demo query 05" in html_response.text assert "Demo query 06" not in html_response.text - assert 'href="/data/-/queries"' in html_response.text + assert 'View 25 queries' in html_response.text assert len(json_response.json()["queries"]) == 5 assert json_response.json()["queries_more"] is True + assert json_response.json()["queries_count"] == 25 @pytest.mark.asyncio @@ -1017,7 +1018,7 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): '' in create_response.text ) - assert 'function slugify(value)' in create_response.text + assert "function slugify(value)" in create_response.text assert 'data-analyze-url="/data/-/queries/analyze"' in create_response.text assert "setupSqlParameterRefresh" in create_response.text assert "renderParameters: false" in create_response.text @@ -1039,9 +1040,9 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): ) == 2 ) - assert create_response.text.index('value="Save query"') < create_response.text.index( - "

Query operations

" - ) + assert create_response.text.index( + 'value="Save query"' + ) < create_response.text.index("

Query operations

") assert blank_create_response.status_code == 200 assert ( '
Required permissioninsert
' in create_response.text assert '' in create_response.text @@ -1053,6 +1067,12 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): "

Analysis will show each affected table and required permission.

" not in blank_create_response.text ) + assert "Enter SQL to analyze this query." in blank_create_response.text + assert write_create_response.status_code == 200 + assert ( + 'This query updates data in the database.' + in write_create_response.text + ) assert query_response.status_code == 200 assert "Save this query" in query_response.text assert "/data/-/queries/insert?sql=select+%2A+from+dogs" in query_response.text From 024b9117725bbed17396a5a4b3f48663c23337f5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:09:53 -0700 Subject: [PATCH 1166/1218] Clarifying comment https://github.com/simonw/datasette/pull/2741/changes#r3306856046 --- datasette/default_permissions/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py index a9f2d8bd..6cd46f04 100644 --- a/datasette/default_permissions/__init__.py +++ b/datasette/default_permissions/__init__.py @@ -26,6 +26,7 @@ from .restrictions import ( from .root import root_user_permissions_sql as root_user_permissions_sql from .config import config_permissions_sql as config_permissions_sql from .defaults import ( + # Avoid "datasette.default_permissions" does not explicitly export attribute default_allow_sql_check as default_allow_sql_check, default_action_permissions_sql as default_action_permissions_sql, default_query_permissions_sql as default_query_permissions_sql, From ac6ee097dd06050188d44c6d4b17a98a12c7b481 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:10:48 -0700 Subject: [PATCH 1167/1218] Disallow update/delete of private queries If a user does not own a private query they cannot update or delete it either, even if they have global update-query. https://github.com/simonw/datasette/pull/2741/changes#r3306417463 --- datasette/default_permissions/defaults.py | 33 ++++----- tests/test_queries.py | 81 +++++++++++++++++++++++ 2 files changed, 95 insertions(+), 19 deletions(-) diff --git a/datasette/default_permissions/defaults.py b/datasette/default_permissions/defaults.py index 32ad4ef1..5bc74425 100644 --- a/datasette/default_permissions/defaults.py +++ b/datasette/default_permissions/defaults.py @@ -77,36 +77,31 @@ async def default_query_permissions_sql( ) -> Optional[PermissionSQL]: actor_id = actor.get("id") if isinstance(actor, dict) else None - if action in {"update-query", "delete-query"}: - if actor_id is None: - return None - # Query owner can update/delete query - return PermissionSQL( - sql=""" - SELECT database_name AS parent, name AS child, 1 AS allow, - 'query owner' AS reason - FROM queries - WHERE source = 'user' - AND owner_id = :query_owner_id - """, - params={"query_owner_id": actor_id}, - ) - - if action != "view-query": + if action not in {"view-query", "update-query", "delete-query"}: return None params = {"query_owner_id": actor_id} rule_sqls = [] if actor_id is not None: - # Query owner can view-query - rule_sqls.append(""" + if action in {"update-query", "delete-query"}: + # Query owner can update/delete query + rule_sqls.append(""" + SELECT database_name AS parent, name AS child, 1 AS allow, + 'query owner' AS reason + FROM queries + WHERE source = 'user' + AND owner_id = :query_owner_id + """) + else: + # Query owner can view-query + rule_sqls.append(""" SELECT database_name AS parent, name AS child, 1 AS allow, 'query owner' AS reason FROM queries WHERE owner_id = :query_owner_id """) - # restriction_sql enforces private queries ONLY visible to owner + # restriction_sql enforces private queries ONLY visible/mutable by owner return PermissionSQL( sql="\nUNION ALL\n".join(rule_sqls) if rule_sqls else None, restriction_sql=""" diff --git a/tests/test_queries.py b/tests/test_queries.py index f888dda0..26a0748c 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1581,6 +1581,87 @@ async def test_query_owner_gets_update_delete_and_writable_view_defaults(): ) +@pytest.mark.asyncio +async def test_private_query_restricts_broad_update_delete_permissions(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "update-query": {"id": "bob"}, + "delete-query": {"id": "bob"}, + }, + }, + }, + }, + ) + ds.add_memory_database("query_broad_update_delete", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "alice_private", + "select 1", + is_private=True, + source="user", + owner_id="alice", + ) + await ds.add_query( + "data", + "alice_public", + "select 2", + is_private=False, + source="user", + owner_id="alice", + ) + + for action in ("update-query", "delete-query"): + assert await ds.allowed( + action=action, + resource=QueryResource("data", "alice_private"), + actor={"id": "alice"}, + ) + assert not await ds.allowed( + action=action, + resource=QueryResource("data", "alice_private"), + actor={"id": "bob"}, + ) + assert await ds.allowed( + action=action, + resource=QueryResource("data", "alice_public"), + actor={"id": "bob"}, + ) + + private_update_response = await ds.client.post( + "/data/alice_private/-/update", + actor={"id": "bob"}, + json={"update": {"title": "Nope"}}, + ) + private_delete_response = await ds.client.post( + "/data/alice_private/-/delete", + actor={"id": "bob"}, + json={}, + ) + public_update_response = await ds.client.post( + "/data/alice_public/-/update", + actor={"id": "bob"}, + json={"update": {"title": "Bob can edit public queries"}}, + ) + public_delete_response = await ds.client.post( + "/data/alice_public/-/delete", + actor={"id": "bob"}, + json={}, + ) + + assert private_update_response.status_code == 403 + assert private_delete_response.status_code == 403 + assert public_update_response.status_code == 200 + assert public_delete_response.status_code == 200 + assert await ds.get_query("data", "alice_private") is not None + assert await ds.get_query("data", "alice_public") is None + + @pytest.mark.asyncio async def test_user_writable_query_execution_rechecks_table_permissions(): ds = Datasette( From 180a6a86fd77ac43f6cf3bfb7d7f9150003da419 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:16:10 -0700 Subject: [PATCH 1168/1218] Remove queries-plan.md We do not need this any more. It can live forever in Git history. --- queries-plan.md | 446 ------------------------------------------------ 1 file changed, 446 deletions(-) delete mode 100644 queries-plan.md diff --git a/queries-plan.md b/queries-plan.md deleted file mode 100644 index da6b7c92..00000000 --- a/queries-plan.md +++ /dev/null @@ -1,446 +0,0 @@ -# Queries in the internal database - -Plan for . - -## Goal - -Move named query definitions into Datasette's internal database, so hundreds or thousands of queries can be listed, searched, permission-filtered, managed, and executed efficiently. - -Terminology change: these are now "queries", not "canned queries". Legacy code and documentation can mention the old name only when describing compatibility or migration. - -## Decisions so far - -- Internal table name: `queries`. -- Query definitions should use real columns, not a JSON blob for all options. -- Query parameter names live in a `parameters` text column as a JSON array. No default values for parameters in this pass. -- No separate index is needed for the privacy/trust flags yet. -- User-created queries require `execute-sql` and `insert-query` on the database. They default to private, and writable queries additionally require matching table write permissions discovered by `Database.analyze_sql()`. -- Configured queries default to trusted, which means actors who can view them can execute them without also holding `execute-sql` or the relevant write permissions. Config can opt out with `is_trusted: false`. -- Add `update-query` and `delete-query`, so administrators can manage queries created by other users. -- Remove the old `canned_queries()` hook from core. If we want compatibility later, build a separate `datasette-old-canned-queries` plugin. -- Writable user-created queries can be supported using `Database.analyze_sql()`, provided we fail closed when analysis cannot prove the required permissions. - -## Current shape - -- Query definitions currently come from `datasette.yaml` or the `canned_queries()` plugin hook. -- `Datasette.get_canned_queries(database_name, actor)` calls that hook every time it needs query definitions. -- `QueryResource.resources_sql()` currently enumerates databases and calls the hook for each one, because permissions and `/-/jump` need query resources. -- Query pages are visible if the actor has `view-query` for `QueryResource(database, query)`. Executing an untrusted stored query also checks `execute-sql` or the relevant write permissions. -- Arbitrary SQL executes if the actor has `execute-sql` for `DatabaseResource(database)`. - -The main performance and architecture win is making query resource enumeration a direct SQL query against the internal database. - -## Proposed internal schema - -Start with one `queries` table. - -```sql -CREATE TABLE IF NOT EXISTS queries ( - database_name TEXT NOT NULL, - name TEXT NOT NULL, - sql TEXT NOT NULL, - title TEXT, - description TEXT, - description_html TEXT, - options TEXT NOT NULL DEFAULT '{}', - parameters TEXT NOT NULL DEFAULT '[]', - is_write INTEGER NOT NULL DEFAULT 0 CHECK (is_write IN (0, 1)), - is_private INTEGER NOT NULL DEFAULT 0 CHECK (is_private IN (0, 1)), - is_trusted INTEGER NOT NULL DEFAULT 0 CHECK (is_trusted IN (0, 1)), - source TEXT NOT NULL DEFAULT 'user', - owner_id TEXT, - created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, - updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (database_name, name) -); - -CREATE INDEX IF NOT EXISTS queries_owner_idx - ON queries(owner_id); -``` - -Column notes: - -- `database_name`, `name`, and `sql` are the routing and execution core. -- Display fields become columns: `title`, `description`, and `description_html`. -- Less common presentation and writable-query behavior lives in `options`, stored as a JSON object. That covers `hide_sql`, `fragment`, `on_success_message`, `on_success_message_sql`, `on_success_redirect`, `on_error_message`, and `on_error_redirect`. -- `parameters` is a JSON array of parameter names, stored as text. This preserves explicit parameter order, but does not support labels or default values. -- Existing writable query behavior gets `is_write` as a column. Success/error messages, success/error redirects, and `on_success_message_sql` are stored in `options`. -- `is_private` means the query is only visible to its owning actor. This is enforced as a permission restriction, so broader `view-query` grants do not expose private rows. -- `is_trusted` means execution skips the usual `execute-sql` or write-permission checks after `view-query` has allowed access. -- `source` distinguishes `user`, `config`, and `plugin` rows. -- `owner_id` is the actor id for user-created rows. It is `NULL` for config/plugin rows. - -No separate index is needed on `(database_name, name)` because the primary key already creates one. - -`QueryResource.resources_sql()` can become: - -```sql -SELECT q.database_name AS parent, q.name AS child -FROM queries q -JOIN catalog_databases cd ON cd.database_name = q.database_name -``` - -The join keeps persisted queries for detached databases from appearing as live resources. - -## Config and plugin migration - -`datasette.yaml` can continue to support `databases: {db}: queries:` blocks, but core should import them directly into the internal `queries` tables at startup: - -1. Ensure the internal schema exists. -2. Delete previous `source='config'` rows. -3. Read configured query blocks for each live database. -4. Normalize string definitions to `{"sql": ...}`. -5. Insert rows into `queries`, storing explicit `params` as JSON in `parameters`. - -Plugins should move to: - -```python -await datasette.add_query(...) -await datasette.remove_query(...) -``` - -Remove the old `canned_queries()` hookspec and all core calls to it. If compatibility is needed, build `datasette-old-canned-queries` later as a plugin that restores the hook and imports old hook results using `datasette.add_query()`. - -## Permission model - -Add core actions: - -- `insert-query`, database-level, for creating queries in a database. -- `update-query`, query-level, for modifying existing query definitions. -- `delete-query`, query-level, for deleting existing query definitions. - -User-created query creation requires: - -- `execute-sql` on `DatabaseResource(database)` -- `insert-query` on `DatabaseResource(database)` -- If analysis shows the query is writable, the table-level write permissions described in the writable query section. - -Updating an existing query requires: - -- `update-query` on `QueryResource(database, query)` or default owner permission for a user-owned row. -- If the SQL changes, also require `execute-sql` on the database. -- If the changed SQL is writable, also require the table-level write permissions described in the writable query section. - -Deleting an existing query requires: - -- `delete-query` on `QueryResource(database, query)` or default owner permission for a user-owned row. - -Default owner permissions: - -- For `source='user' AND owner_id = actor.id`, grant `update-query` and `delete-query`. -- For `source='user' AND owner_id = actor.id`, grant `view-query`. If the query is private, restriction SQL ensures no other actor sees it through a broader grant. - -## Executing queries - -Default execution rule for read-only queries: - -- If `is_trusted=0`, the actor needs `execute-sql` on the database. -- If `is_trusted=1`, the actor can execute the query without `execute-sql`, provided `view-query` allows access. - -Default execution rule for user-created writable queries: - -- `is_trusted` must be `0`. -- The actor must have `view-query`. -- The actor must currently have every write permission required by fresh `Database.analyze_sql()` results for the query SQL. - -Implementation: - -- Keep `view-query` in the broad `DEFAULT_ALLOW_ACTIONS` set, so saved queries remain visible by default in all-public Datasette. -- Emit default `view-query` allows for the owning actor. -- Use `restriction_sql` to limit private rows to their owner even when broader `view-query` permissions exist. -- Have `QueryView` perform the fresh `execute-sql` or table-permission check before execution unless the row has `is_trusted=1`. - -For read-only queries this keeps `QueryView` explicit: it checks `view-query` for the query resource, then checks `execute-sql` unless the row is trusted. User-created writable queries need one additional runtime permission check because their required table permissions are derived from fresh SQL analysis. - -Explicit deny rules should still be able to block a query, and `--default-deny` still blocks trusted queries unless something grants `view-query`. - -## Writable queries - -Writable user-created queries should be in scope, guarded by `Database.analyze_sql()`. - -The secure rule: a user can create, update, or execute a writable user-created query only if they currently have the corresponding write permissions for every table the SQL can affect. - -`Database.analyze_sql(sql, params=None)` runs the SQL through SQLite's authorizer on an isolated connection and returns a `SQLAnalysis` object containing `SQLTableAccess` rows: - -- `operation`: `read`, `insert`, `update`, or `delete` -- `database`: Datasette database name for `main`, or SQLite schema name where no Datasette mapping exists -- `table`: affected table or view -- `columns`: read/updated columns where SQLite reports them -- `source`: trigger/view/CTE source when SQLite reports one - -Validation flow for user-created queries: - -1. Derive named parameters from the SQL and pass harmless placeholder values into `db.analyze_sql()` so SQLite can prepare statements with bindings. -2. If analysis raises a SQLite error, reject the query. -3. If every table access is `read`, treat the query as read-only and require `execute-sql` plus `insert-query`/`update-query` as described above. -4. If any table access is `insert`, `update`, or `delete`, treat the query as writable and force `is_trusted=0`. -5. Reject writable user-created queries that access a database other than the database they are being saved against, until `analyze_sql()` can reliably map attached SQLite schemas back to Datasette database names. -6. For every write access returned by analysis, require the corresponding permission on `TableResource(access.database, access.table)`: - - `insert` -> `insert-row` - - `update` -> `update-row` - - `delete` -> `delete-row` -7. Include write accesses reported from triggers and views, since those are real side effects. -8. Re-run the same analysis and permission checks when SQL changes through `update_query()` or `POST .../-/update`. -9. Re-run analysis before executing user-created writable queries, so schema or trigger changes cannot leave a previously saved query with stale permission assumptions. - -The user-facing API should not trust a submitted `is_write` value. It should derive `is_write` from analysis. - -Trusted configuration and plugin code can still call `datasette.add_query(..., is_write=True, ...)`. Those are treated as deployment/admin-authored queries. They keep the existing execution model: they require `view-query`, and the default `view-query` hook should preserve current default-open behavior for trusted writable queries while still respecting `--default-deny`. - -Fail closed cases for user-created writable queries: - -- Analysis fails. -- Analysis reports any write operation that cannot be mapped to a Datasette table resource. -- Analysis reports writes outside the target database. -- The actor lacks any required table write permission. -- `is_trusted=1` is requested through the user-facing API. - -This gives us writable user-created queries without letting `execute-sql` alone become a path to create arbitrary write endpoints. - -## HTTP API sketch - -JSON endpoints should follow Datasette's existing write API style: use `POST` plus action paths such as `/-/insert`, `/-/update`, and `/-/delete`, not HTTP `PATCH` or `DELETE`. - -Endpoints: - -- `GET /-/queries` and `GET /{database}/-/queries` show searchable HTML query browsers. `GET /-/queries.json` lists query definitions across every database the actor can view; `GET /{database}/-/queries.json` scopes that list to one database. Both JSON endpoints use cursor pagination with `_next` and `_size`. -- `POST /{database}/-/queries/insert` creates a query. -- `GET /{database}/{query}/-/definition` returns one query definition without executing it. -- `POST /{database}/{query}/-/update` updates one query. -- `POST /{database}/{query}/-/delete` deletes one query. - -Create request: - -```json -{ - "query": { - "name": "top_customers", - "sql": "select * from customers order by revenue desc limit 20", - "title": "Top customers", - "description": "Highest revenue customers", - "is_private": true, - "parameters": ["region"] - } -} -``` - -Successful create returns `201` and the created query definition: - -```json -{ - "ok": true, - "query": { - "database": "fixtures", - "name": "top_customers", - "sql": "select * from customers order by revenue desc limit 20", - "title": "Top customers", - "description": "Highest revenue customers", - "is_private": true, - "is_trusted": false, - "parameters": ["region"] - } -} -``` - -Update request, imitating `RowUpdateView`: - -```json -{ - "update": { - "title": "Top customers by revenue", - "is_private": false - }, - "return": true -} -``` - -Successful update returns `{"ok": true}` by default. With `"return": true`, return the updated query definition: - -```json -{ - "ok": true, - "query": { - "database": "fixtures", - "name": "top_customers", - "sql": "select * from customers order by revenue desc limit 20", - "title": "Top customers by revenue", - "is_private": false, - "is_trusted": false - } -} -``` - -Delete request: - -```http -POST /{database}/{query}/-/delete -Content-Type: application/json -``` - -Successful delete returns: - -```json -{ - "ok": true -} -``` - -Validation: - -- Update bodies must be dictionaries containing an `update` dictionary, with optional `return`; invalid keys return `{"ok": false, "errors": [...]}`. -- Validate route-safe query names. -- Reject names that collide with a table or view in the same database, since table routes currently win over query routes. -- Analyze user-created SQL with `Database.analyze_sql()`. -- Use `validate_sql_select(sql)` as the read-only fast path when analysis shows only reads, but do not require it for writable queries that pass analysis and permission checks. -- Reject magic parameters such as `:_actor_id`, `:_cookie_*`, and `:_header_*` for user-created queries. -- Reject client-supplied `is_write`; derive it from analysis. -- Reject writable-only success/error fields for read-only queries. - -## Python API sketch - -Add methods on `Datasette`: - -```python -await datasette.add_query( - database, - name, - sql, - title=None, - description=None, - description_html=None, - hide_sql=False, - fragment=None, - parameters=None, - is_write=False, - is_private=False, - is_trusted=False, - source="plugin", - owner_id=None, - on_success_message=None, - on_success_message_sql=None, - on_success_redirect=None, - on_error_message=None, - on_error_redirect=None, - replace=True, -) - -await datasette.update_query( - database, - name, - *, - sql=UNCHANGED, - title=UNCHANGED, - description=UNCHANGED, - description_html=UNCHANGED, - hide_sql=UNCHANGED, - fragment=UNCHANGED, - parameters=UNCHANGED, - is_write=UNCHANGED, - is_private=UNCHANGED, - is_trusted=UNCHANGED, - source=UNCHANGED, - owner_id=UNCHANGED, - on_success_message=UNCHANGED, - on_success_message_sql=UNCHANGED, - on_success_redirect=UNCHANGED, - on_error_message=UNCHANGED, - on_error_redirect=UNCHANGED, -) - -await datasette.remove_query(database, name, source=None) - -await datasette.get_query(database, name) -await datasette.list_queries( - database, - actor=None, - limit=50, - cursor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, -) -``` - -`list_queries()` should return a bounded page shaped like `{"queries": [...], "next": "...", "has_more": true, "limit": 50}`. The `next` value is an opaque cursor token, not an offset. Passing `database=None` lists visible queries across all live databases, still filtered through `view-query` permission SQL. - -`update_query()` should use an internal sentinel default such as `UNCHANGED = object()` so callers can distinguish "leave this column alone" from "set this column to `NULL`": - -```python -await datasette.update_query( - "fixtures", - "top_customers", - on_success_redirect=None, -) -``` - -For column-backed fields, `None` should write SQL `NULL`. For option fields, `None` should remove that key from the JSON object so `get_query()` returns `None`; omitting the field should leave the existing option unchanged. - -Implementation detail: build the `UPDATE` statement dynamically from fields whose value is not `UNCHANGED`, validate non-nullable fields before writing, and update `updated_at` whenever at least one field changes. - -The read methods should reconstruct the existing dictionary shape used by query execution and templates, with `name`, `sql`, display fields, write fields, `params`, `is_private`, `is_trusted`, `owner_id`, and `source`. `parameters` should be returned as the decoded JSON array and exposed as `params` where existing query execution code expects that key. Option values should be unpacked from the `options` JSON object and returned as the same top-level keys accepted by `add_query()` and `update_query()`. - -## Query page save UI - -On `/{database}/-/query`, if the actor has both `execute-sql` and `insert-query`, show a save control for valid read-only SQL. That page already executes read-only arbitrary SQL, so the first UI can stay read-only even though the JSON API can accept writable SQL after `Database.analyze_sql()` validation. - -The save form should call `POST /{database}/-/queries/insert` and default to `is_private=true`. - -On `/{database}`, show a preview of the first 5 visible queries using `list_queries(..., limit=5)`. If the page has `has_more`, show a link to `/{database}/-/queries` rather than rendering hundreds or thousands of query links inline. The full `/{database}/-/queries` page provides search, filters, and cursor pagination. The global `/-/queries` page reuses the same interface and shows the database for each query. - -## Dedicated create query UI - -Add `/{database}/-/queries/-/create` for the fuller query authoring flow, including writable queries. - -This page should require `execute-sql` and `insert-query` to access. It should provide a SQL editor and a mode control: - -- Read-only -- Writable - -Read-only mode can share the same fields as the arbitrary SQL save flow: name, title, description, parameters, and privacy status. - -Writable mode should always run `Database.analyze_sql()` and show an analysis panel before saving: - -- detected operation -- database and table -- required permission -- whether the actor has that permission -- source, when the operation comes from a trigger or view - -The Save button should be disabled until analysis succeeds and every required table write permission is allowed. - -The existing edit-SQL flow from query pages can continue to point back to arbitrary SQL. A later enhancement can add "update this query" when the actor owns it or has `update-query`. - -## Test plan - -- Internal schema creates `queries`. -- Query parameters are stored in the `queries.parameters` text column as a JSON array of names. -- Config `queries:` blocks import into internal tables. -- Legacy string query definitions normalize to SQL rows. -- The old `canned_queries()` hook is no longer called by core. -- `QueryResource.resources_sql()` returns rows from `queries`. -- Database page and `/-/jump` list queries from the internal DB. -- `view-query` remains globally default-allowed, with `restriction_sql` narrowing private queries to their owner. -- Private query is only visible to its owner, even when a broader `view-query` rule applies. -- Non-trusted read-only query requires `execute-sql` to execute. -- Trusted read-only query can be executed without `execute-sql` after `view-query` passes. -- Config queries default to trusted and can opt out with `is_trusted: false`. -- User API rejects client-supplied `is_trusted`. -- User-created query requires both `execute-sql` and `insert-query`. -- User-created writable query creation uses `Database.analyze_sql()` and requires matching `insert-row`, `update-row`, and/or `delete-row` permissions for every reported write access. -- `/{database}/-/queries/-/create` provides the writable-query authoring UI with an analysis panel and disabled save until all required write permissions pass. -- User-created writable query execution re-runs `Database.analyze_sql()` and re-checks table write permissions. -- User-created writable query cannot be trusted through the user API. -- Query update uses `POST /{database}/{query}/-/update` with an `{"update": {...}}` body. -- Query delete uses `POST /{database}/{query}/-/delete`. -- There are no `PATCH` or HTTP `DELETE` routes for query management. -- `datasette.update_query(..., field=None)` writes `NULL` for column-backed fields and removes JSON keys for option fields, while omitted fields are left unchanged. -- Owner gets default `update-query` and `delete-query` for their own user-created rows. -- Admin can manage other users' queries with `update-query` and `delete-query`. -- User API rejects magic parameters. -- User API rejects writable queries if analysis fails, reports writes outside the target database, or reports writes the actor is not allowed to perform. -- Trusted config/plugin writable queries still execute through `view-query`. -- Trusted config/plugin writable queries are not default-allowed under `--default-deny`. -- Persisted internal DB does not expose queries for detached databases. From 24887004cffd52fe801ecd73da78e13b246ddede Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:51:57 -0700 Subject: [PATCH 1169/1218] Rename insert-query to store-query Also queries/insert to queries/store Refs https://github.com/simonw/datasette/pull/2741#issuecomment-4549103663 --- datasette/app.py | 6 ++--- datasette/default_actions.py | 6 ++--- datasette/templates/query_create.html | 2 +- datasette/views/database.py | 22 +++++++-------- docs/authentication.rst | 7 ++--- docs/json_api.rst | 5 ++-- tests/test_queries.py | 39 +++++++++++++++------------ 7 files changed, 47 insertions(+), 40 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 8936b099..42a2d27d 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -54,9 +54,9 @@ from .views.database import ( QueryDeleteView, QueryDefinitionView, GlobalQueryListView, - QueryInsertView, QueryListView, QueryParametersView, + QueryStoreView, QueryUpdateView, ) from .views.index import IndexView @@ -2824,8 +2824,8 @@ class Datasette: r"/(?P[^\/\.]+)/-/queries/analyze$", ) add_route( - QueryInsertView.as_view(self), - r"/(?P[^\/\.]+)/-/queries/insert$", + QueryStoreView.as_view(self), + r"/(?P[^\/\.]+)/-/queries/store$", ) add_route( ExecuteWriteAnalyzeView.as_view(self), diff --git a/datasette/default_actions.py b/datasette/default_actions.py index 6a1f77b8..0f4c25fa 100644 --- a/datasette/default_actions.py +++ b/datasette/default_actions.py @@ -62,9 +62,9 @@ def register_actions(): resource_class=DatabaseResource, ), Action( - name="insert-query", - abbr="iq", - description="Create saved queries", + name="store-query", + abbr="sq", + description="Create stored queries", resource_class=DatabaseResource, also_requires="execute-sql", ), diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index cb14ada4..f5dadbff 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -156,7 +156,7 @@ form.sql .query-create-sql textarea#sql-editor {

Create query

-
+

{{ urls.database(database) }}/

diff --git a/datasette/views/database.py b/datasette/views/database.py index d40d69d1..900b94ba 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1419,7 +1419,7 @@ class QueryCreateView(BaseView): actor=request.actor, ) await self.ds.ensure_permission( - action="insert-query", + action="store-query", resource=DatabaseResource(db.name), actor=request.actor, ) @@ -1440,11 +1440,11 @@ class QueryCreateAnalyzeView(BaseView): ): return _block_framing(_error(["Permission denied: need execute-sql"], 403)) if not await self.ds.allowed( - action="insert-query", + action="store-query", resource=DatabaseResource(db.name), actor=request.actor, ): - return _block_framing(_error(["Permission denied: need insert-query"], 403)) + return _block_framing(_error(["Permission denied: need store-query"], 403)) invalid_keys = set(request.args) - {"sql"} if invalid_keys: @@ -1462,8 +1462,8 @@ class QueryCreateAnalyzeView(BaseView): ) -class QueryInsertView(QueryCreateView): - name = "query-insert" +class QueryStoreView(QueryCreateView): + name = "query-store" async def _error_response(self, request, db, query_data, message, status): message = _query_create_form_error_message(message) @@ -1488,11 +1488,11 @@ class QueryInsertView(QueryCreateView): ): return _error(["Permission denied: need execute-sql"], 403) if not await self.ds.allowed( - action="insert-query", + action="store-query", resource=DatabaseResource(db.name), actor=request.actor, ): - return _error(["Permission denied: need insert-query"], 403) + return _error(["Permission denied: need store-query"], 403) is_json = False query_data = {} @@ -1961,8 +1961,8 @@ class QueryView(View): resource=DatabaseResource(database=database), actor=request.actor, ) - allow_insert_query = await datasette.allowed( - action="insert-query", + allow_store_query = await datasette.allowed( + action="store-query", resource=DatabaseResource(database=database), actor=request.actor, ) @@ -2020,13 +2020,13 @@ class QueryView(View): if ( not canned_query and allow_execute_sql - and allow_insert_query + and allow_store_query and is_validated_sql and ":_" not in sql ): save_query_url = ( datasette.urls.database(database) - + "/-/queries/insert?" + + "/-/queries/store?" + urlencode({"sql": sql}) ) diff --git a/docs/authentication.rst b/docs/authentication.rst index 453aaa19..184fec5e 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1293,11 +1293,12 @@ Actor is allowed to view a saved query page, e.g. https://latest.datasette.io/fi ``query`` is the name of the query (string) .. _actions_insert_query: +.. _actions_store_query: -insert-query ------------- +store-query +----------- -Actor is allowed to create saved queries in a database. +Actor is allowed to create stored queries in a database. ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) diff --git a/docs/json_api.rst b/docs/json_api.rst index dd54c459..1a6c7021 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -518,14 +518,15 @@ Listing saved queries Creating saved queries in the UI ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``GET //-/queries/-/create`` provides a form for creating saved queries. +``GET //-/queries/store`` provides a form for creating stored queries. +.. _QueryStoreView: .. _QueryInsertView: Creating saved queries ~~~~~~~~~~~~~~~~~~~~~~ -``POST //-/queries/insert`` creates a saved query. This requires ``execute-sql`` and ``insert-query`` for the database. +``POST //-/queries/store`` creates a stored query. This requires ``execute-sql`` and ``store-query`` for the database. .. _QueryParametersView: .. _ExecuteWriteView: diff --git a/tests/test_queries.py b/tests/test_queries.py index 26a0748c..5d4da9bb 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -470,7 +470,7 @@ async def test_query_actions_are_registered(): await ds.invoke_startup() assert ds.get_action("execute-write-sql").resource_class is DatabaseResource - assert ds.get_action("insert-query").resource_class is DatabaseResource + assert ds.get_action("store-query").resource_class is DatabaseResource assert ds.get_action("update-query").resource_class is QueryResource assert ds.get_action("delete-query").resource_class is QueryResource @@ -537,15 +537,15 @@ async def test_analyze_write_query_rejects_writes_to_attached_databases(): @pytest.mark.asyncio -async def test_query_insert_api_creates_read_only_query(): +async def test_query_store_api_creates_read_only_query(): ds = Datasette(memory=True, default_deny=True) ds.root_enabled = True - db = ds.add_memory_database("query_insert_api", name="data") + db = ds.add_memory_database("query_store_api", name="data") await db.execute_write("create table dogs (id integer primary key, name text)") await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, json={ "query": { @@ -860,7 +860,7 @@ async def test_global_query_list_api_and_html(): @pytest.mark.asyncio -async def test_query_insert_api_rejects_is_trusted(): +async def test_query_store_api_rejects_is_trusted(): ds = Datasette( memory=True, default_deny=True, @@ -870,7 +870,7 @@ async def test_query_insert_api_rejects_is_trusted(): "permissions": { "view-database": {"id": "writer"}, "execute-sql": {"id": "writer"}, - "insert-query": {"id": "writer"}, + "store-query": {"id": "writer"}, } } } @@ -880,7 +880,7 @@ async def test_query_insert_api_rejects_is_trusted(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "writer"}, json={"query": {"name": "trusted", "sql": "select 1", "is_trusted": True}}, ) @@ -890,7 +890,7 @@ async def test_query_insert_api_rejects_is_trusted(): @pytest.mark.asyncio -async def test_query_insert_api_creates_writable_query(): +async def test_query_store_api_creates_writable_query(): ds = Datasette(memory=True, default_deny=True) ds.root_enabled = True db = ds.add_memory_database("query_write_api", name="data") @@ -898,7 +898,7 @@ async def test_query_insert_api_creates_writable_query(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, json={ "query": { @@ -962,14 +962,14 @@ async def test_query_update_and_delete_api(): @pytest.mark.asyncio -async def test_query_insert_api_rejects_magic_parameters(): +async def test_query_store_api_rejects_magic_parameters(): ds = Datasette(memory=True, default_deny=True) ds.root_enabled = True ds.add_memory_database("query_magic_api", name="data") await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, json={"query": {"name": "magic", "sql": "select :_actor_id"}}, ) @@ -987,15 +987,19 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): await ds.invoke_startup() create_response = await ds.client.get( - "/data/-/queries/insert?sql=select+*+from+dogs", + "/data/-/queries/store?sql=select+*+from+dogs", actor={"id": "root"}, ) write_create_response = await ds.client.get( - "/data/-/queries/insert?sql=insert+into+dogs+(name)+values+('Cleo')", + "/data/-/queries/store?sql=insert+into+dogs+(name)+values+('Cleo')", actor={"id": "root"}, ) blank_create_response = await ds.client.get( - "/data/-/queries/insert", + "/data/-/queries/store", + actor={"id": "root"}, + ) + old_insert_response = await ds.client.get( + "/data/-/queries/insert?sql=select+*+from+dogs", actor={"id": "root"}, ) old_create_response = await ds.client.get( @@ -1075,7 +1079,8 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): ) assert query_response.status_code == 200 assert "Save this query" in query_response.text - assert "/data/-/queries/insert?sql=select+%2A+from+dogs" in query_response.text + assert "/data/-/queries/store?sql=select+%2A+from+dogs" in query_response.text + assert old_insert_response.status_code == 404 assert old_create_response.status_code == 404 @@ -1153,7 +1158,7 @@ async def test_create_query_form_error_redisplays_form_with_values(): await ds.invoke_startup() response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, data={ "name": "dogs", @@ -1176,7 +1181,7 @@ async def test_create_query_form_error_redisplays_form_with_values(): assert 'name="is_private" value="1" checked' in response.text public_response = await ds.client.post( - "/data/-/queries/insert", + "/data/-/queries/store", actor={"id": "root"}, data={ "name": "dogs", From 0cadd071871ef0b33e4ce3a23e316a104b3137c3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:53:31 -0700 Subject: [PATCH 1170/1218] No need to document QueryCreateAnalyzeView --- tests/test_docs.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_docs.py b/tests/test_docs.py index 396ba1a2..0d0ef1e1 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -66,7 +66,14 @@ def documented_views(): if first_word.endswith("View"): view_labels.add(first_word) # We deliberately don't document these: - view_labels.update(("PatternPortfolioView", "AuthTokenView", "ApiExplorerView")) + view_labels.update( + ( + "PatternPortfolioView", + "AuthTokenView", + "ApiExplorerView", + "QueryCreateAnalyzeView", + ) + ) return view_labels From 4bf1c4b065fef64676abf5eabd04ff35e07188c5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 14:54:35 -0700 Subject: [PATCH 1171/1218] Rename canned queries to queries/stored queries in docs --- datasette/default_actions.py | 4 +- datasette/hookspecs.py | 4 +- datasette/resources.py | 2 +- datasette/views/database.py | 24 ++++----- datasette/views/table.py | 4 +- docs/authentication.rst | 16 +++--- docs/configuration.rst | 10 ++-- docs/custom_templates.rst | 8 +-- docs/internals.rst | 12 ++--- docs/introspection.rst | 2 +- docs/json_api.rst | 32 ++++++------ docs/pages.rst | 4 +- docs/plugin_hooks.rst | 16 +++--- docs/spatialite.rst | 2 +- docs/sql_queries.rst | 95 ++++++++++++++++++++++++++---------- tests/test_html.py | 6 +-- tests/test_permissions.py | 4 +- 17 files changed, 144 insertions(+), 101 deletions(-) diff --git a/datasette/default_actions.py b/datasette/default_actions.py index 0f4c25fa..2f78570b 100644 --- a/datasette/default_actions.py +++ b/datasette/default_actions.py @@ -121,13 +121,13 @@ def register_actions(): Action( name="update-query", abbr="uq", - description="Update saved queries", + description="Update stored queries", resource_class=QueryResource, ), Action( name="delete-query", abbr="dq", - description="Delete saved queries", + description="Delete stored queries", resource_class=QueryResource, ), ) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index a4067eaa..22da02a4 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -174,7 +174,7 @@ def view_actions(datasette, actor, database, view, request): @hookspec def query_actions(datasette, actor, database, query_name, request, sql, params): - """Links for the query and canned query actions menu""" + """Links for the query and stored query actions menu""" @hookspec @@ -229,7 +229,7 @@ def top_query(datasette, request, database, sql): @hookspec def top_canned_query(datasette, request, database, query_name): - """HTML to include at the top of the canned query page""" + """HTML to include at the top of the stored query page""" @hookspec diff --git a/datasette/resources.py b/datasette/resources.py index 91a46d36..ee2e6d98 100644 --- a/datasette/resources.py +++ b/datasette/resources.py @@ -41,7 +41,7 @@ class TableResource(Resource): class QueryResource(Resource): - """A saved query in a database.""" + """A stored query in a database.""" name = "query" parent_class = DatabaseResource diff --git a/datasette/views/database.py b/datasette/views/database.py index 900b94ba..f30d3815 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -222,11 +222,11 @@ class DatabaseContext(Context): tables: list = field(metadata={"help": "List of table objects in the database"}) hidden_count: int = field(metadata={"help": "Count of hidden tables"}) views: list = field(metadata={"help": "List of view objects in the database"}) - queries: list = field(metadata={"help": "List of canned query objects"}) + queries: list = field(metadata={"help": "List of stored query objects"}) queries_more: bool = field( - metadata={"help": "Boolean indicating if more saved queries are available"} + metadata={"help": "Boolean indicating if more stored queries are available"} ) - queries_count: int = field(metadata={"help": "Count of visible saved queries"}) + queries_count: int = field(metadata={"help": "Count of visible stored queries"}) allow_execute_sql: bool = field( metadata={"help": "Boolean indicating if custom SQL can be executed"} ) @@ -272,7 +272,7 @@ class QueryContext(Context): metadata={"help": "The SQL query object containing the `sql` string"} ) canned_query: str = field( - metadata={"help": "The name of the canned query if this is a canned query"} + metadata={"help": "The name of the stored query if this is a stored query"} ) private: bool = field( metadata={"help": "Boolean indicating if this is a private database"} @@ -282,11 +282,11 @@ class QueryContext(Context): # ) canned_query_write: bool = field( metadata={ - "help": "Boolean indicating if this is a canned query that allows writes" + "help": "Boolean indicating if this is a stored query that allows writes" } ) metadata: dict = field( - metadata={"help": "Metadata about the database or the canned query"} + metadata={"help": "Metadata about the database or the stored query"} ) db_is_immutable: bool = field( metadata={"help": "Boolean indicating if this database is immutable"} @@ -315,7 +315,7 @@ class QueryContext(Context): metadata={"help": "Dictionary of parameter names/values"} ) edit_sql_url: str = field( - metadata={"help": "URL to edit the SQL for a canned query"} + metadata={"help": "URL to edit the SQL for a stored query"} ) display_rows: list = field(metadata={"help": "List of result rows to display"}) columns: list = field(metadata={"help": "List of column names"}) @@ -1623,7 +1623,7 @@ class QueryView(View): db = await datasette.resolve_database(request) - # We must be a canned query + # We must be a stored query table_found = False try: await datasette.resolve_table(request) @@ -1742,14 +1742,14 @@ class QueryView(View): # Create lookup dict for quick access allowed_dict = {r.child: r for r in allowed_tables_page.resources} - # Are we a canned query? + # Are we a stored query? canned_query = None canned_query_write = False if "table" in request.url_vars: try: await datasette.resolve_table(request) except TableNotFound as table_not_found: - # Was this actually a canned query? + # Was this actually a stored query? canned_query = await datasette.get_canned_query( table_not_found.database_name, table_not_found.table, request.actor ) @@ -1759,7 +1759,7 @@ class QueryView(View): private = False if canned_query: - # Respect canned query permissions + # Respect stored query permissions visible, private = await datasette.check_visibility( request.actor, action="view-query", @@ -1823,7 +1823,7 @@ class QueryView(View): # For regular queries we only allow SELECT, plus other rules validate_sql_select(sql) else: - # Canned queries can run magic parameters + # Stored queries can run magic parameters params_for_query = MagicParameters(sql, params, request, datasette) await params_for_query.execute_params() results = await datasette.execute( diff --git a/datasette/views/table.py b/datasette/views/table.py index 7027bb10..7b1a5a82 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -963,11 +963,11 @@ async def table_view_traced(datasette, request): try: resolved = await datasette.resolve_table(request) except TableNotFound as not_found: - # Was this actually a canned query? + # Was this actually a stored query? canned_query = await datasette.get_canned_query( not_found.database_name, not_found.table, request.actor ) - # If this is a canned query, not a table, then dispatch to QueryView instead + # If this is a stored query, not a table, then dispatch to QueryView instead if canned_query: return await QueryView()(request, datasette) else: diff --git a/docs/authentication.rst b/docs/authentication.rst index 184fec5e..22db41d8 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -468,7 +468,7 @@ You can control the following: * Access to the entire Datasette instance * Access to specific databases * Access to specific tables and views -* Access to specific :ref:`canned_queries` +* Access to specific :ref:`queries ` If a user has permission to view a table they will be able to view that table, independent of if they have permission to view the database or instance that the table exists within. @@ -641,12 +641,12 @@ This works for SQL views as well - you can list their names in the ``"tables"`` .. _authentication_permissions_query: -Access to specific canned queries ---------------------------------- +Access to specific queries +-------------------------- -:ref:`canned_queries` allow you to configure named SQL queries in your ``datasette.yaml`` that can be executed by users. These queries can be set up to both read and write to the database, so controlling who can execute them can be important. +:ref:`Queries ` allow you to configure named SQL queries in your ``datasette.yaml`` that can be executed by users. These queries can be set up to both read and write to the database, so controlling who can execute them can be important. -To limit access to the ``add_name`` canned query in your ``dogs.db`` database to just the :ref:`root user`: +To limit access to the ``add_name`` query in your ``dogs.db`` database to just the :ref:`root user`: .. [[[cog config_example(cog, """ @@ -1285,7 +1285,7 @@ Actor is allowed to view a table (or view) page, e.g. https://latest.datasette.i view-query ---------- -Actor is allowed to view a saved query page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size. Executing an untrusted saved query also requires ``execute-sql`` or the relevant write permissions; trusted saved queries can execute with ``view-query`` alone. +Actor is allowed to view a stored query page, e.g. https://latest.datasette.io/fixtures/pragma_cache_size. Executing an untrusted stored query also requires ``execute-sql`` or the relevant write permissions; :ref:`trusted stored queries ` can execute with ``view-query`` alone. ``resource`` - ``datasette.resources.QueryResource(database, query)`` ``database`` is the name of the database (string) @@ -1308,7 +1308,7 @@ Actor is allowed to create stored queries in a database. update-query ------------ -Actor is allowed to update a saved query. +Actor is allowed to update a stored query. ``resource`` - ``datasette.resources.QueryResource(database, query)`` ``database`` is the name of the database (string) @@ -1320,7 +1320,7 @@ Actor is allowed to update a saved query. delete-query ------------ -Actor is allowed to delete a saved query. +Actor is allowed to delete a stored query. ``resource`` - ``datasette.resources.QueryResource(database, query)`` ``database`` is the name of the database (string) diff --git a/docs/configuration.rst b/docs/configuration.rst index 8c8c8a67..cf9590b8 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -87,6 +87,7 @@ This is equivalent to a ``datasette.yaml`` file containing the following: } .. [[[end]]] + .. _configuration_reference: ``datasette.yaml`` reference @@ -435,10 +436,10 @@ Here is a simple example: .. _configuration_reference_canned_queries: -Canned queries configuration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Queries configuration +~~~~~~~~~~~~~~~~~~~~~ -:ref:`Canned queries ` are named SQL queries that appear in the Datasette interface. They can be configured in ``datasette.yaml`` using the ``queries`` key at the database level: +:ref:`Queries ` are named SQL queries that appear in the Datasette interface. They can be configured in ``datasette.yaml`` using the ``queries`` key at the database level: .. [[[cog from metadata_doc import config_example, config_example @@ -483,7 +484,7 @@ Canned queries configuration } .. [[[end]]] -See the :ref:`canned queries documentation ` for more, including how to configure :ref:`writable canned queries `. +See the :ref:`queries documentation ` for more, including how to configure :ref:`writable queries `. .. _configuration_reference_css_js: @@ -1211,4 +1212,3 @@ For column types that accept additional configuration, use an object with ``type } } .. [[[end]]] - diff --git a/docs/custom_templates.rst b/docs/custom_templates.rst index 8cc40f0f..c324fb79 100644 --- a/docs/custom_templates.rst +++ b/docs/custom_templates.rst @@ -29,7 +29,7 @@ The custom SQL template (``/dbname?sql=...``) gets this: -A canned query template (``/dbname/queryname``) gets this: +A stored query template (``/dbname/queryname``) gets this: .. code-block:: html @@ -193,8 +193,8 @@ The lookup rules Datasette uses are as follows:: query-mydatabase.html query.html - Canned query page (/mydatabase/canned-query): - query-mydatabase-canned-query.html + Stored query page (/mydatabase/query-name): + query-mydatabase-query-name.html query-mydatabase.html query.html @@ -230,7 +230,7 @@ will look something like this:: -This example is from the canned query page for a query called "tz" in the +This example is from the stored query page for a query called "tz" in the database called "mydb". The asterisk shows which template was selected - so in this case, Datasette found a template file called ``query-mydb-tz.html`` and used that - but if that template had not been found, it would have tried for diff --git a/docs/internals.rst b/docs/internals.rst index c76de487..084922f8 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -725,7 +725,7 @@ The builder methods are: - ``allow_all(action)`` - allow an action across all databases and resources - ``allow_database(database, action)`` - allow an action on a specific database -- ``allow_resource(database, resource, action)`` - allow an action on a specific resource (table, SQL view or :ref:`canned query `) within a database +- ``allow_resource(database, resource, action)`` - allow an action on a specific resource (table, SQL view or :ref:`stored query `) within a database Each method returns the ``TokenRestrictions`` instance so calls can be chained. @@ -837,10 +837,10 @@ await .get_resource_metadata(self, database_name, resource_name) ``database_name`` - string The name of the database to query. ``resource_name`` - string - The name of the resource (table, view, or canned query) inside ``database_name`` to query. + The name of the resource (table, view, or stored query) inside ``database_name`` to query. Returns metadata keys and values for the specified "resource" as a dictionary. -A "resource" in this context can be a table, view, or canned query. +A "resource" in this context can be a table, view, or stored query. Internally queries the ``metadata_resources`` table inside the :ref:`internal database `. .. _datasette_get_column_metadata: @@ -851,7 +851,7 @@ await .get_column_metadata(self, database_name, resource_name, column_name) ``database_name`` - string The name of the database to query. ``resource_name`` - string - The name of the resource (table, view, or canned query) inside ``database_name`` to query. + The name of the resource (table, view, or stored query) inside ``database_name`` to query. ``column_name`` - string The name of the column inside ``resource_name`` to query. @@ -897,7 +897,7 @@ await .set_resource_metadata(self, database_name, resource_name, key, value) ``database_name`` - string The database the metadata entry belongs to. ``resource_name`` - string - The resource (table, view, or canned query) the metadata entry belongs to. + The resource (table, view, or stored query) the metadata entry belongs to. ``key`` - string The metadata entry key to insert (ex ``title``, ``description``, etc.) ``value`` - string @@ -915,7 +915,7 @@ await .set_column_metadata(self, database_name, resource_name, column_name, key, ``database_name`` - string The database the metadata entry belongs to. ``resource_name`` - string - The resource (table, view, or canned query) the metadata entry belongs to. + The resource (table, view, or stored query) the metadata entry belongs to. ``column-name`` - string The column the metadata entry belongs to. ``key`` - string diff --git a/docs/introspection.rst b/docs/introspection.rst index d2eb8efd..7702a4b5 100644 --- a/docs/introspection.rst +++ b/docs/introspection.rst @@ -149,7 +149,7 @@ Shows currently attached databases. `Databases example /-/queries.json`` returns saved query definitions for a specific database. Use ``?_size=50`` to set the page size and ``?_next=...`` with the cursor returned by the previous page to fetch the next page. +``GET /-/queries.json`` returns stored query definitions across every database that the actor can view. ``GET //-/queries.json`` returns stored query definitions for a specific database. Use ``?_size=50`` to set the page size and ``?_next=...`` with the cursor returned by the previous page to fetch the next page. .. _QueryCreateView: -Creating saved queries in the UI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Creating stored queries in the UI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``GET //-/queries/store`` provides a form for creating stored queries. .. _QueryStoreView: .. _QueryInsertView: -Creating saved queries -~~~~~~~~~~~~~~~~~~~~~~ +Creating stored queries +~~~~~~~~~~~~~~~~~~~~~~~ ``POST //-/queries/store`` creates a stored query. This requires ``execute-sql`` and ``store-query`` for the database. @@ -545,24 +545,24 @@ Executing write SQL .. _QueryDefinitionView: -Getting a saved query definition -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Getting a stored query definition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``GET ///-/definition`` returns a saved query definition without executing it. +``GET ///-/definition`` returns a stored query definition without executing it. .. _QueryUpdateView: -Updating saved queries -~~~~~~~~~~~~~~~~~~~~~~ +Updating stored queries +~~~~~~~~~~~~~~~~~~~~~~~ -``POST ///-/update`` updates a saved query using a JSON body with an ``"update"`` object. +``POST ///-/update`` updates a stored query using a JSON body with an ``"update"`` object. .. _QueryDeleteView: -Deleting saved queries -~~~~~~~~~~~~~~~~~~~~~~ +Deleting stored queries +~~~~~~~~~~~~~~~~~~~~~~~ -``POST ///-/delete`` deletes a saved query. +``POST ///-/delete`` deletes a stored query. .. _TableInsertView: diff --git a/docs/pages.rst b/docs/pages.rst index 34c851a5..e57c15e6 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -28,7 +28,7 @@ The index page can also be accessed at ``/-/``, useful for if the default index Database ======== -Each database has a page listing the tables, views and canned queries available for that database. If the :ref:`actions_execute_sql` permission is enabled (it's on by default) there will also be an interface for executing arbitrary SQL select queries against the data. +Each database has a page listing the tables, views and stored queries available for that database. If the :ref:`actions_execute_sql` permission is enabled (it's on by default) there will also be an interface for executing arbitrary SQL select queries against the data. Examples: @@ -68,7 +68,7 @@ This means you can link directly to a query by constructing the following URL: ``/database-name/-/query?sql=SELECT+*+FROM+table_name`` -Each configured :ref:`canned query ` has its own page, at ``/database-name/query-name``. Viewing this page will execute the query and display the results. +Each configured :ref:`stored query ` has its own page, at ``/database-name/query-name``. Viewing this page will execute the query and display the results. In both cases adding a ``.json`` extension to the URL will return the results as JSON. diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index b2676b3e..264b473e 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -609,7 +609,7 @@ When a request is received, the ``"render"`` callback function is called with ze The SQL query that was executed. ``query_name`` - string or None - If this was the execution of a :ref:`canned query `, the name of that query. + If this was the execution of a :ref:`stored query `, the name of that query. ``database`` - string The name of the database. @@ -1212,7 +1212,7 @@ Examples: `datasette-saved-queries `__ @@ -1635,7 +1635,7 @@ register_magic_parameters(datasette) ``datasette`` - :ref:`internals_datasette` You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``. -:ref:`canned_queries_magic_parameters` can be used to add automatic parameters to :ref:`canned queries `. This plugin hook allows additional magic parameters to be defined by plugins. +:ref:`canned_queries_magic_parameters` can be used to add automatic parameters to :ref:`configured queries `. This plugin hook allows additional magic parameters to be defined by plugins. Magic parameters all take this format: ``_prefix_rest_of_parameter``. The prefix indicates which magic parameter function should be called - the rest of the parameter is passed as an argument to that function. @@ -1828,7 +1828,7 @@ jump_items_sql(datasette, actor, request) This hook allows plugins to add extra results to Datasette's ``/`` jump menu, which is powered by the ``/-/jump`` JSON endpoint. -Return a ``datasette.jump.JumpSQL`` object, or a list of ``JumpSQL`` objects. Each ``JumpSQL`` object wraps a SQL query to be searched alongside Datasette's own databases, tables, views and canned query results. The hook can also be an ``async def`` function, or return an awaitable that resolves to one of these values. +Return a ``datasette.jump.JumpSQL`` object, or a list of ``JumpSQL`` objects. Each ``JumpSQL`` object wraps a SQL query to be searched alongside Datasette's own databases, tables, views and stored query results. The hook can also be an ``async def`` function, or return an awaitable that resolves to one of these values. ``JumpSQL`` queries run against Datasette's internal database by default. To run a query against another database, pass its name as the optional ``database=`` argument. For example, ``JumpSQL(database="content", sql="...")`` runs against the ``content`` database. @@ -2004,7 +2004,7 @@ query_actions(datasette, actor, database, query_name, request, sql, params) The name of the database. ``query_name`` - string or None - The name of the canned query, or ``None`` if this is an arbitrary SQL query. + The name of the stored query, or ``None`` if this is an arbitrary SQL query. ``request`` - :ref:`internals_request` The current HTTP request. @@ -2015,7 +2015,7 @@ query_actions(datasette, actor, database, query_name, request, sql, params) ``params`` - dictionary The parameters passed to the SQL query, if any. -Populates a "Query actions" menu on the canned query and arbitrary SQL query pages. +Populates a "Query actions" menu on the stored query and arbitrary SQL query pages. This example adds a new query action linking to a page for explaining a query: @@ -2294,9 +2294,9 @@ top_canned_query(datasette, request, database, query_name) The name of the database. ``query_name`` - string - The name of the canned query. + The name of the stored query. -Returns HTML to be displayed at the top of the canned query page. +Returns HTML to be displayed at the top of the stored query page. .. _plugin_event_tracking: diff --git a/docs/spatialite.rst b/docs/spatialite.rst index c93c1e00..1999ab78 100644 --- a/docs/spatialite.rst +++ b/docs/spatialite.rst @@ -30,7 +30,7 @@ Warning The following steps are recommended: - Disable arbitrary SQL queries by untrusted users. See :ref:`authentication_permissions_execute_sql` for ways to do this. The easiest is to start Datasette with the ``datasette --setting default_allow_sql off`` option. - - Define :ref:`canned_queries` with the SQL queries that use SpatiaLite functions that you want people to be able to execute. + - Define :ref:`queries ` with the SQL queries that use SpatiaLite functions that you want people to be able to execute. The `Datasette SpatiaLite tutorial `__ includes detailed instructions for running SpatiaLite safely using these techniques diff --git a/docs/sql_queries.rst b/docs/sql_queries.rst index 7c3cd4ac..d60656e3 100644 --- a/docs/sql_queries.rst +++ b/docs/sql_queries.rst @@ -68,10 +68,10 @@ You can also use the `sqlite-utils `__ tool .. _canned_queries: -Canned queries --------------- +Queries +------- -As an alternative to adding views to your database, you can define canned queries inside your ``datasette.yaml`` file. Here's an example: +As an alternative to adding views to your database, you can define named queries inside your ``datasette.yaml`` file. Here's an example: .. [[[cog from metadata_doc import config_example, config_example @@ -120,24 +120,67 @@ Then run Datasette like this:: datasette sf-trees.db -m metadata.json -Each canned query will be listed on the database index page, and will also get its own URL at:: +Each configured query will be listed on the database index page, and will also get its own URL at:: - /database-name/canned-query-name + /database-name/query-name For the above example, that URL would be:: /sf-trees/just_species -You can optionally include ``"title"`` and ``"description"`` keys to show a title and description on the canned query page. As with regular table metadata you can alternatively specify ``"description_html"`` to have your description rendered as HTML (rather than having HTML special characters escaped). +You can optionally include ``"title"`` and ``"description"`` keys to show a title and description on the query page. As with regular table metadata you can alternatively specify ``"description_html"`` to have your description rendered as HTML (rather than having HTML special characters escaped). + +.. _stored_queries: +.. _saved_queries: + +Stored queries +~~~~~~~~~~~~~~ + +Datasette stores both configured queries and user-created queries in the ``queries`` table in the :ref:`internal database `. Configured queries come from the ``queries`` section of ``datasette.yaml``. User-created stored queries can be created from the SQL query page by actors with the :ref:`actions_store_query` and :ref:`actions_execute_sql` permissions. Writable stored queries also require the permissions needed for the writes they perform. + +Stored queries created by users default to private. Private stored queries can only be viewed, updated or deleted by the actor that created them. Broad ``view-query``, ``update-query`` or ``delete-query`` permission grants still do not allow other actors to access another actor's private stored queries. + +Stored queries created by users are untrusted. This means they execute using the permissions of the actor who runs them, as if that actor had pasted the SQL into the regular custom SQL interface or write SQL interface. Read-only stored queries require ``execute-sql``. Writable stored queries require ``execute-write-sql`` plus the relevant table-level write permissions. + +.. _trusted_stored_queries: +.. _trusted_saved_queries: + +Trusted stored queries +++++++++++++++++++++++ + +A trusted stored query can execute with ``view-query`` permission alone. It skips the additional ``execute-sql`` and write permission checks that are applied to untrusted stored queries. + +Trusted stored queries should only be used for SQL that has been reviewed by someone trusted to configure the Datasette instance. For that reason, trusted stored queries can only be added using configuration. Users cannot create trusted stored queries through the web interface or the stored query JSON API. + +Queries defined in ``datasette.yaml`` are trusted by default: + +.. code-block:: yaml + + databases: + mydatabase: + queries: + report: + sql: select * from report + +You can opt out of this behavior for a configured query using ``is_trusted: false``: + +.. code-block:: yaml + + databases: + mydatabase: + queries: + report: + sql: select * from report + is_trusted: false .. _canned_queries_named_parameters: -Canned query parameters -~~~~~~~~~~~~~~~~~~~~~~~ +Query parameters +~~~~~~~~~~~~~~~~ -Canned queries support named parameters, so if you include those in the SQL you will then be able to enter them using the form fields on the canned query page or by adding them to the URL. This means canned queries can be used to create custom JSON APIs based on a carefully designed SQL statement. +Configured queries support named parameters, so if you include those in the SQL you will then be able to enter them using the form fields on the query page or by adding them to the URL. This means configured queries can be used to create custom JSON APIs based on a carefully designed SQL statement. -Here's an example of a canned query with a named parameter: +Here's an example of a configured query with a named parameter: .. code-block:: sql @@ -147,7 +190,7 @@ Here's an example of a canned query with a named parameter: where neighborhood like '%' || :text || '%' order by neighborhood; -In the canned query configuration looks like this: +The query configuration looks like this: .. [[[cog @@ -204,7 +247,7 @@ In the canned query configuration looks like this: Note that we are using SQLite string concatenation here - the ``||`` operator - to add wildcard ``%`` characters to the string provided by the user. -You can try this canned query out here: +You can try this query out here: https://latest.datasette.io/fixtures/neighborhood_search?text=town In this example the ``:text`` named parameter is automatically extracted from the query using a regular expression. @@ -272,15 +315,15 @@ You can alternatively provide an explicit list of named parameters using the ``" .. _canned_queries_options: -Additional canned query options -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Additional query options +~~~~~~~~~~~~~~~~~~~~~~~~ -Additional options can be specified for canned queries in the YAML or JSON configuration. +Additional options can be specified for configured queries in the YAML or JSON configuration. hide_sql ++++++++ -Canned queries default to displaying their SQL query at the top of the page. If the query is extremely long you may want to hide it by default, with a "show" link that can be used to make it visible. +Configured queries default to displaying their SQL query at the top of the page. If the query is extremely long you may want to hide it by default, with a "show" link that can be used to make it visible. Add the ``"hide_sql": true`` option to hide the SQL query by default. @@ -289,7 +332,7 @@ fragment Some plugins, such as `datasette-vega `__, can be configured by including additional data in the fragment hash of the URL - the bit that comes after a ``#`` symbol. -You can set a default fragment hash that will be included in the link to the canned query from the database index page using the ``"fragment"`` key. +You can set a default fragment hash that will be included in the link to the query from the database index page using the ``"fragment"`` key. This example demonstrates both ``fragment`` and ``hide_sql``: @@ -348,12 +391,12 @@ This example demonstrates both ``fragment`` and ``hide_sql``: .. _canned_queries_writable: -Writable canned queries -~~~~~~~~~~~~~~~~~~~~~~~ +Writable queries +~~~~~~~~~~~~~~~~ -Canned queries by default are read-only. You can use the ``"write": true`` key to indicate that a canned query can write to the database. +Configured queries are read-only by default. You can use the ``"write": true`` key to indicate that a query can write to the database. -See :ref:`authentication_permissions_query` for details on how to add permission checks to canned queries, using the ``"allow"`` key. +See :ref:`authentication_permissions_query` for details on how to add permission checks to queries, using the ``"allow"`` key. .. [[[cog config_example(cog, { @@ -488,7 +531,7 @@ Magic parameters Named parameters that start with an underscore are special: they can be used to automatically add values created by Datasette that are not contained in the incoming form fields or query string. -These magic parameters are only supported for canned queries: to avoid security issues (such as queries that extract the user's private cookies) they are not available to SQL that is executed by the user as a custom SQL query. +These magic parameters are only supported for configured queries: to avoid security issues (such as queries that extract the user's private cookies) they are not available to SQL that is executed by the user as a custom SQL query. Available magic parameters are: @@ -580,12 +623,12 @@ Additional custom magic parameters can be added by plugins using the :ref:`plugi .. _canned_queries_json_api: -JSON API for writable canned queries -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +JSON API for writable queries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Writable canned queries can also be accessed using a JSON API. You can POST data to them using JSON, and you can request that their response is returned to you as JSON. +Writable queries can also be accessed using a JSON API. You can POST data to them using JSON, and you can request that their response is returned to you as JSON. -To submit JSON to a writable canned query, encode key/value parameters as a JSON document:: +To submit JSON to a writable query, encode key/value parameters as a JSON document:: POST /mydatabase/add_message diff --git a/tests/test_html.py b/tests/test_html.py index 9e460da1..8edb9f6e 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -154,7 +154,7 @@ async def test_database_page(ds_client): ("/fixtures/simple_view", "simple_view"), ] == sorted([(a["href"], a.text) for a in views_ul.find_all("a")]) - # And a list of canned queries + # And a list of stored queries queries_ul = soup.find("h2", string="Queries").find_next_sibling("ul") assert queries_ul is not None assert [ @@ -701,7 +701,7 @@ async def test_show_hide_sql_query(ds_client): @pytest.mark.asyncio async def test_canned_query_with_hide_has_no_hidden_sql(ds_client): - # For a canned query the show/hide should NOT have a hidden SQL field + # For a stored query the show/hide should NOT have a hidden SQL field # https://github.com/simonw/datasette/issues/1411 response = await ds_client.get("/fixtures/pragma_cache_size?_hide_sql=1") soup = Soup(response.content, "html.parser") @@ -1106,7 +1106,7 @@ async def test_trace_correctly_escaped(ds_client): "/fixtures/-/query?sql=select+*+from+facetable", "http://localhost/fixtures/-/query.json?sql=select+*+from+facetable", ), - # Canned query page + # Stored query page ( "/fixtures/neighborhood_search?text=town", "http://localhost/fixtures/neighborhood_search.json?text=town", diff --git a/tests/test_permissions.py b/tests/test_permissions.py index eb6cee9f..0e38c876 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -890,7 +890,7 @@ PermConfigTestCase = collections.namedtuple( resource=("perms_ds_one", "t1"), expected_result=True, ), - # view-query on canned query, wrong actor + # view-query on stored query, wrong actor PermConfigTestCase( config={ "databases": { @@ -909,7 +909,7 @@ PermConfigTestCase = collections.namedtuple( resource=("perms_ds_one", "q1"), expected_result=False, ), - # view-query on canned query, right actor + # view-query on stored query, right actor PermConfigTestCase( config={ "databases": { From b1029acc68626c2fddf7b678adc3339be0fce6e0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 15:05:41 -0700 Subject: [PATCH 1172/1218] top_canned_query is now top_stored_query, closes #2747 --- datasette/hookspecs.py | 2 +- datasette/templates/query.html | 2 +- datasette/views/database.py | 8 ++++---- docs/changelog.rst | 1 + docs/plugin_hooks.rst | 4 ++-- tests/test_plugins.py | 10 ++++++---- 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 22da02a4..dcd502af 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -228,7 +228,7 @@ def top_query(datasette, request, database, sql): @hookspec -def top_canned_query(datasette, request, database, query_name): +def top_stored_query(datasette, request, database, query_name): """HTML to include at the top of the stored query page""" diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 785b05af..3f03424a 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -33,7 +33,7 @@ {% set action_links, action_title = query_actions(), "Query actions" %} {% include "_action_menu.html" %} -{% if canned_query %}{{ top_canned_query() }}{% else %}{{ top_query() }}{% endif %} +{% if canned_query %}{{ top_stored_query() }}{% else %}{{ top_query() }}{% endif %} {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} diff --git a/datasette/views/database.py b/datasette/views/database.py index f30d3815..def3c530 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -339,8 +339,8 @@ class QueryContext(Context): top_query: callable = field( metadata={"help": "Callable to render the top_query slot"} ) - top_canned_query: callable = field( - metadata={"help": "Callable to render the top_canned_query slot"} + top_stored_query: callable = field( + metadata={"help": "Callable to render the top_stored_query slot"} ) query_actions: callable = field( metadata={ @@ -2095,8 +2095,8 @@ class QueryView(View): top_query=make_slot_function( "top_query", datasette, request, database=database, sql=sql ), - top_canned_query=make_slot_function( - "top_canned_query", + top_stored_query=make_slot_function( + "top_stored_query", datasette, request, database=database, diff --git a/docs/changelog.rst b/docs/changelog.rst index dfb2a736..300ac02f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,6 +10,7 @@ Unreleased ---------- - Fixed a bug where visiting ``//-/query`` without a ``?sql=`` parameter returned a 500 error. (:issue:`2743`) +- The ``top_canned_query()`` plugin hook has been renamed to :ref:`top_stored_query() `. (:issue:`2747`) .. _v1_0_a30: diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 264b473e..4737ca03 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -2279,9 +2279,9 @@ top_query(datasette, request, database, sql) Returns HTML to be displayed at the top of the query results page. -.. _plugin_hook_top_canned_query: +.. _plugin_hook_top_stored_query: -top_canned_query(datasette, request, database, query_name) +top_stored_query(datasette, request, database, query_name) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``datasette`` - :ref:`internals_datasette` diff --git a/tests/test_plugins.py b/tests/test_plugins.py index f7adbd66..32276437 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1486,8 +1486,10 @@ class SlotPlugin: return "Xtop_query:{}:{}:{}".format(database, sql, request.args["z"]) @hookimpl - def top_canned_query(self, request, database, query_name): - return "Xtop_query:{}:{}:{}".format(database, query_name, request.args["z"]) + def top_stored_query(self, request, database, query_name): + return "Xtop_stored_query:{}:{}:{}".format( + database, query_name, request.args["z"] + ) @pytest.mark.asyncio @@ -1548,12 +1550,12 @@ async def test_hook_top_query(ds_client): @pytest.mark.asyncio -async def test_hook_top_canned_query(ds_client): +async def test_hook_top_stored_query(ds_client): try: pm.register(SlotPlugin(), name="SlotPlugin") response = await ds_client.get("/fixtures/magic_parameters?z=xyz") assert response.status_code == 200 - assert "Xtop_query:fixtures:magic_parameters:xyz" in response.text + assert "Xtop_stored_query:fixtures:magic_parameters:xyz" in response.text finally: pm.unregister(name="SlotPlugin") From 2f73869c09962e320e5f40f4691df70618cd052e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 15:09:48 -0700 Subject: [PATCH 1173/1218] Document that canned_queries() has been removed --- docs/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 300ac02f..674ff5b3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -11,6 +11,7 @@ Unreleased - Fixed a bug where visiting ``//-/query`` without a ``?sql=`` parameter returned a 500 error. (:issue:`2743`) - The ``top_canned_query()`` plugin hook has been renamed to :ref:`top_stored_query() `. (:issue:`2747`) +- The ``canned_queries()`` plugin hook has been removed. Plugins can use the new ``datasette.add_query()``, ``datasette.update_query()`` and ``datasette.remove_query()`` methods to managed stored queries instead. .. _v1_0_a30: From 56b14f37d547e03ba902516ac9ae13ef52765f77 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 15:16:18 -0700 Subject: [PATCH 1174/1218] The stored queries do not live in that DB --- docs/authentication.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/authentication.rst b/docs/authentication.rst index 22db41d8..86df7f04 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1298,7 +1298,7 @@ Actor is allowed to view a stored query page, e.g. https://latest.datasette.io/f store-query ----------- -Actor is allowed to create stored queries in a database. +Actor is allowed to create stored queries against a database. ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) From 02a1468f1b3c8c14fb80037686b43de856e49c1f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 15:17:51 -0700 Subject: [PATCH 1175/1218] Renamed canned queries to queries / stored queries in docs And a few renames in code and YAML as well. --- .github/workflows/deploy-latest.yml | 33 +- datasette/app.py | 7 - datasette/facets.py | 2 +- datasette/static/app.css | 2 +- datasette/templates/query.html | 18 +- datasette/views/database.py | 92 +++--- datasette/views/table.py | 6 +- docs/authentication.rst | 10 +- docs/changelog.rst | 23 +- docs/configuration.rst | 6 +- docs/plugin_hooks.rst | 12 +- docs/spatialite.rst | 2 +- docs/sql_queries.rst | 12 +- docs/upgrade-1.0a20.md | 6 +- tests/test_canned_queries.py | 473 ---------------------------- tests/test_html.py | 12 +- tests/test_jump.py | 4 +- 17 files changed, 115 insertions(+), 605 deletions(-) delete mode 100644 tests/test_canned_queries.py diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 7d8dd37d..166d33d0 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -57,7 +57,7 @@ jobs: db.route = "alternative-route" ' > plugins/alternative_route.py cp fixtures.db fixtures2.db - - name: And the counters writable canned query demo + - name: And the counters writable stored query demo run: | cat > plugins/counters.py <This query cannot be executed because the database is immutable.

{% endif %} -

{{ metadata.title or database }}{% if canned_query and not metadata.title %}: {{ canned_query }}{% endif %}{% if private %} ๐Ÿ”’{% endif %}

+

{{ metadata.title or database }}{% if stored_query and not metadata.title %}: {{ stored_query }}{% endif %}{% if private %} ๐Ÿ”’{% endif %}

{% set action_links, action_title = query_actions(), "Query actions" %} {% include "_action_menu.html" %} -{% if canned_query %}{{ top_stored_query() }}{% else %}{{ top_query() }}{% endif %} +{% if stored_query %}{{ top_stored_query() }}{% else %}{{ top_query() }}{% endif %} {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} - +

Custom SQL query{% if display_rows %} returning {% if truncated %}more than {% endif %}{{ "{:,}".format(display_rows|length) }} row{% if display_rows|length == 1 %}{% else %}s{% endif %}{% endif %}{% if not query_error %} ({{ show_hide_text }}) {% endif %}

@@ -52,7 +52,7 @@
{% if query %}{{ query.sql }}{% endif %}
{% endif %} {% else %} - {% if not canned_query %} + {% if not stored_query %} @@ -64,10 +64,10 @@ {% include "_sql_parameters.html" %}

{% if not hide_sql %}{% endif %} - + {{ show_hide_hidden }} {% if save_query_url %}Save this query{% endif %} - {% if canned_query and edit_sql_url %}Edit SQL{% endif %} + {% if stored_query and edit_sql_url %}Edit SQL{% endif %}

@@ -90,7 +90,7 @@
Required permission
{% else %} - {% if not canned_query_write and not error %} + {% if not stored_query_write and not error %}

0 results

{% endif %} {% endif %} diff --git a/datasette/views/database.py b/datasette/views/database.py index def3c530..c36476f6 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -100,12 +100,12 @@ class DatabaseView(View): limit=5, include_private=True, ) - canned_queries = queries_page["queries"] + stored_queries = queries_page["queries"] queries_more = queries_page["has_more"] queries_count = ( await datasette.count_queries(database, actor=request.actor) if queries_more - else len(canned_queries) + else len(stored_queries) ) async def database_actions(): @@ -137,7 +137,7 @@ class DatabaseView(View): "tables": tables, "hidden_count": len([t for t in tables if t["hidden"]]), "views": sql_views, - "queries": canned_queries, + "queries": stored_queries, "queries_more": queries_more, "queries_count": queries_count, "allow_execute_sql": allow_execute_sql, @@ -172,7 +172,7 @@ class DatabaseView(View): tables=tables, hidden_count=len([t for t in tables if t["hidden"]]), views=sql_views, - queries=canned_queries, + queries=stored_queries, queries_more=queries_more, queries_count=queries_count, allow_execute_sql=allow_execute_sql, @@ -271,7 +271,7 @@ class QueryContext(Context): query: dict = field( metadata={"help": "The SQL query object containing the `sql` string"} ) - canned_query: str = field( + stored_query: str = field( metadata={"help": "The name of the stored query if this is a stored query"} ) private: bool = field( @@ -280,7 +280,7 @@ class QueryContext(Context): # urls: dict = field( # metadata={"help": "Object containing URL helpers like `database()`"} # ) - canned_query_write: bool = field( + stored_query_write: bool = field( metadata={ "help": "Boolean indicating if this is a stored query that allows writes" } @@ -1629,10 +1629,10 @@ class QueryView(View): await datasette.resolve_table(request) table_found = True except TableNotFound as table_not_found: - canned_query = await datasette.get_canned_query( - table_not_found.database_name, table_not_found.table, request.actor + stored_query = await datasette.get_query( + table_not_found.database_name, table_not_found.table ) - if canned_query is None: + if stored_query is None: raise if table_found: # That should not have happened @@ -1640,13 +1640,13 @@ class QueryView(View): if not await datasette.allowed( action="view-query", - resource=QueryResource(database=db.name, query=canned_query["name"]), + resource=QueryResource(database=db.name, query=stored_query["name"]), actor=request.actor, ): raise Forbidden("You do not have permission to view this query") await _ensure_stored_query_execution_permissions( - datasette, db, canned_query, request.actor + datasette, db, stored_query, request.actor ) # If database is immutable, return an error @@ -1674,19 +1674,19 @@ class QueryView(View): or params.get("_json") ) params_for_query = MagicParameters( - canned_query["sql"], params, request, datasette + stored_query["sql"], params, request, datasette ) await params_for_query.execute_params() ok = None redirect_url = None try: cursor = await db.execute_write( - canned_query["sql"], params_for_query, request=request + stored_query["sql"], params_for_query, request=request ) # success message can come from on_success_message or on_success_message_sql message = None message_type = datasette.INFO - on_success_message_sql = canned_query.get("on_success_message_sql") + on_success_message_sql = stored_query.get("on_success_message_sql") if on_success_message_sql: try: message_result = ( @@ -1698,18 +1698,18 @@ class QueryView(View): message = "Error running on_success_message_sql: {}".format(ex) message_type = datasette.ERROR if not message: - message = canned_query.get( + message = stored_query.get( "on_success_message" ) or "Query executed, {} row{} affected".format( cursor.rowcount, "" if cursor.rowcount == 1 else "s" ) - redirect_url = canned_query.get("on_success_redirect") + redirect_url = stored_query.get("on_success_redirect") ok = True except Exception as ex: - message = canned_query.get("on_error_message") or str(ex) + message = stored_query.get("on_error_message") or str(ex) message_type = datasette.ERROR - redirect_url = canned_query.get("on_error_redirect") + redirect_url = stored_query.get("on_error_redirect") ok = False if should_return_json: return Response.json( @@ -1743,33 +1743,33 @@ class QueryView(View): allowed_dict = {r.child: r for r in allowed_tables_page.resources} # Are we a stored query? - canned_query = None - canned_query_write = False + stored_query = None + stored_query_write = False if "table" in request.url_vars: try: await datasette.resolve_table(request) except TableNotFound as table_not_found: # Was this actually a stored query? - canned_query = await datasette.get_canned_query( - table_not_found.database_name, table_not_found.table, request.actor + stored_query = await datasette.get_query( + table_not_found.database_name, table_not_found.table ) - if canned_query is None: + if stored_query is None: raise - canned_query_write = bool(canned_query.get("write")) + stored_query_write = bool(stored_query.get("write")) private = False - if canned_query: + if stored_query: # Respect stored query permissions visible, private = await datasette.check_visibility( request.actor, action="view-query", - resource=QueryResource(database=database, query=canned_query["name"]), + resource=QueryResource(database=database, query=stored_query["name"]), ) if not visible: raise Forbidden("You do not have permission to view this query") - if not canned_query_write: + if not stored_query_write: await _ensure_stored_query_execution_permissions( - datasette, db, canned_query, request.actor + datasette, db, stored_query, request.actor ) else: @@ -1783,15 +1783,15 @@ class QueryView(View): params = {key: request.args.get(key) for key in request.args} sql = None - if canned_query: - sql = canned_query["sql"] + if stored_query: + sql = stored_query["sql"] elif "sql" in params: sql = params.pop("sql") # Extract any :named parameters named_parameters = [] - if canned_query and canned_query.get("params"): - named_parameters = canned_query["params"] + if stored_query and stored_query.get("params"): + named_parameters = stored_query["params"] if not named_parameters and sql: named_parameters = derive_named_parameters(sql) named_parameter_values = { @@ -1817,9 +1817,9 @@ class QueryView(View): params_for_query = params - if sql and not canned_query_write: + if sql and not stored_query_write: try: - if not canned_query: + if not stored_query: # For regular queries we only allow SELECT, plus other rules validate_sql_select(sql) else: @@ -1879,7 +1879,7 @@ class QueryView(View): columns=columns, rows=rows, sql=sql, - query_name=canned_query["name"] if canned_query else None, + query_name=stored_query["name"] if stored_query else None, database=database, table=None, request=request, @@ -1911,10 +1911,10 @@ class QueryView(View): elif format_ == "html": headers = {} templates = [f"query-{to_css_class(database)}.html", "query.html"] - if canned_query: + if stored_query: templates.insert( 0, - f"query-{to_css_class(database)}-{to_css_class(canned_query['name'])}.html", + f"query-{to_css_class(database)}-{to_css_class(stored_query['name'])}.html", ) environment = datasette.get_jinja_environment(request) @@ -1932,8 +1932,8 @@ class QueryView(View): } ) metadata = await datasette.get_database_metadata(database) - if canned_query: - metadata = dict(canned_query) + if stored_query: + metadata = dict(stored_query) metadata.pop("source", None) renderers = {} @@ -1968,7 +1968,7 @@ class QueryView(View): ) show_hide_hidden = "" - if canned_query and canned_query.get("hide_sql"): + if stored_query and stored_query.get("hide_sql"): if bool(params.get("_show_sql")): show_hide_link = path_with_removed_args(request, {"_show_sql"}) show_hide_text = "hide" @@ -2018,7 +2018,7 @@ class QueryView(View): ) save_query_url = None if ( - not canned_query + not stored_query and allow_execute_sql and allow_store_query and is_validated_sql @@ -2036,7 +2036,7 @@ class QueryView(View): datasette=datasette, actor=request.actor, database=database, - query_name=canned_query["name"] if canned_query else None, + query_name=stored_query["name"] if stored_query else None, request=request, sql=sql, params=params, @@ -2056,15 +2056,15 @@ class QueryView(View): "sql": sql, "params": params, }, - canned_query=canned_query["name"] if canned_query else None, + stored_query=stored_query["name"] if stored_query else None, private=private, - canned_query_write=canned_query_write, + stored_query_write=stored_query_write, db_is_immutable=not db.is_mutable, error=query_error, hide_sql=hide_sql, show_hide_link=datasette.urls.path(show_hide_link), show_hide_text=show_hide_text, - editable=not canned_query, + editable=not stored_query, allow_execute_sql=allow_execute_sql, save_query_url=save_query_url, tables=await get_tables(datasette, request, db, allowed_dict), @@ -2100,7 +2100,7 @@ class QueryView(View): datasette, request, database=database, - query_name=canned_query["name"] if canned_query else None, + query_name=stored_query["name"] if stored_query else None, ), query_actions=query_actions, ), diff --git a/datasette/views/table.py b/datasette/views/table.py index 7b1a5a82..da69c6b5 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -964,11 +964,11 @@ async def table_view_traced(datasette, request): resolved = await datasette.resolve_table(request) except TableNotFound as not_found: # Was this actually a stored query? - canned_query = await datasette.get_canned_query( - not_found.database_name, not_found.table, request.actor + stored_query = await datasette.get_query( + not_found.database_name, not_found.table ) # If this is a stored query, not a table, then dispatch to QueryView instead - if canned_query: + if stored_query: return await QueryView()(request, datasette) else: raise diff --git a/docs/authentication.rst b/docs/authentication.rst index 86df7f04..cec47f97 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -121,7 +121,7 @@ This configuration will deny access to everyone except the user with ``id`` of ` How permissions are resolved ---------------------------- -Datasette performs permission checks using the internal :ref:`datasette_allowed`, method which accepts keyword arguments for ``action``, ``resource`` and an optional ``actor``. +Datasette performs permission checks using the internal :ref:`datasette_allowed`, method which accepts keyword arguments for ``action``, ``resource`` and an optional ``actor``. ``resource`` should be an instance of the appropriate ``Resource`` subclass from :mod:`datasette.resources`โ€”for example ``InstanceResource()``, ``DatabaseResource(database="...``)`` or ``TableResource(database="...", table="...")``. This defaults to ``InstanceResource()`` if not specified. @@ -468,7 +468,7 @@ You can control the following: * Access to the entire Datasette instance * Access to specific databases * Access to specific tables and views -* Access to specific :ref:`queries ` +* Access to specific :ref:`queries ` If a user has permission to view a table they will be able to view that table, independent of if they have permission to view the database or instance that the table exists within. @@ -496,7 +496,7 @@ Here's how to restrict access to your entire Datasette instance to just the ``"i title: My private Datasette instance allow: id: root - + .. tab:: datasette.json @@ -644,7 +644,7 @@ This works for SQL views as well - you can list their names in the ``"tables"`` Access to specific queries -------------------------- -:ref:`Queries ` allow you to configure named SQL queries in your ``datasette.yaml`` that can be executed by users. These queries can be set up to both read and write to the database, so controlling who can execute them can be important. +:ref:`Queries ` allow you to configure named SQL queries in your ``datasette.yaml`` that can be executed by users. These queries can be set up to both read and write to the database, so controlling who can execute them can be important. To limit access to the ``add_name`` query in your ``dogs.db`` database to just the :ref:`root user`: @@ -1020,7 +1020,7 @@ You can also restrict permissions such that they can only be used within specifi The resulting token will only be able to insert rows, and only to tables in the ``mydatabase`` database. -Finally, you can restrict permissions to individual resources - tables, SQL views and :ref:`named queries ` - within a specific database:: +Finally, you can restrict permissions to individual resources - tables, SQL views and :ref:`named queries ` - within a specific database:: datasette create-token root --resource mydatabase mytable insert-row diff --git a/docs/changelog.rst b/docs/changelog.rst index 674ff5b3..d15dec50 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -11,7 +11,8 @@ Unreleased - Fixed a bug where visiting ``//-/query`` without a ``?sql=`` parameter returned a 500 error. (:issue:`2743`) - The ``top_canned_query()`` plugin hook has been renamed to :ref:`top_stored_query() `. (:issue:`2747`) -- The ``canned_queries()`` plugin hook has been removed. Plugins can use the new ``datasette.add_query()``, ``datasette.update_query()`` and ``datasette.remove_query()`` methods to managed stored queries instead. +- The ``canned_queries()`` plugin hook has been removed. Plugins can use the new ``datasette.add_query()``, ``datasette.update_query()`` and ``datasette.remove_query()`` methods to manage stored queries instead. +- The ``datasette.get_canned_query()`` and ``datasette.get_canned_queries()`` methods have been removed. Plugins can use ``datasette.get_query()`` and ``datasette.list_queries()`` instead. .. _v1_0_a30: @@ -658,7 +659,7 @@ For more information and workarounds, read `the security advisory `` in a `` -

+

+ + {% if save_query_base_url %}Save this query{% endif %} +

", + "on_success_message_sql": "select 'secret'", + } + }, + ) + form_response = await ds.client.post( + "/data/-/queries/store", + actor={"id": "root"}, + data={ + "name": "unsafe_form", + "sql": "select 1", + "description_html": "", + }, + ) + + assert response.status_code == 400 + assert response.json()["errors"] == [ + "Invalid keys: description_html, on_success_message_sql" + ] + assert form_response.status_code == 400 + assert "Invalid keys: description_html" in form_response.text + assert await ds.get_query("data", "unsafe") is None + assert await ds.get_query("data", "unsafe_form") is None + + @pytest.mark.asyncio async def test_query_store_api_creates_writable_query(): ds = Datasette(memory=True, default_deny=True) @@ -959,6 +1000,42 @@ async def test_query_update_and_delete_api(): assert await ds.get_query("data", "editable") is None +@pytest.mark.asyncio +async def test_query_update_api_rejects_config_only_fields(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("query_update_config_only_fields", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + await ds.add_query( + "data", + "editable", + "insert into dogs (name) values (:name)", + is_write=True, + source="user", + owner_id="root", + ) + + response = await ds.client.post( + "/data/editable/-/update", + actor={"id": "root"}, + json={ + "update": { + "description_html": "", + "on_success_message_sql": "select 'secret'", + } + }, + ) + + assert response.status_code == 400 + assert response.json()["errors"] == [ + "Invalid keys: description_html, on_success_message_sql" + ] + query = await ds.get_query("data", "editable") + assert query["description_html"] is None + assert query["on_success_message_sql"] is None + + @pytest.mark.asyncio async def test_query_update_api_rejects_trusted_queries_but_internal_update_allowed(): ds = Datasette( From b1289a73f9869e83a433a088c2a6c48285e67f2d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 16:51:00 -0700 Subject: [PATCH 1192/1218] stored_queries.StoredQuery dataclass --- datasette/app.py | 102 ++++++------ datasette/stored_queries.py | 258 ++++++++++++++++++++---------- datasette/views/database.py | 56 +++---- datasette/views/query_helpers.py | 19 +-- datasette/views/stored_queries.py | 37 +++-- docs/internals.rst | 14 +- tests/test_queries.py | 128 +++++++-------- 7 files changed, 357 insertions(+), 257 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 96683895..56b89789 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1029,8 +1029,8 @@ class Datasette: ) @staticmethod - def _query_row_to_dict(row): - return stored_queries.query_row_to_dict(row) + def _query_row_to_stored_query(row) -> stored_queries.StoredQuery | None: + return stored_queries.query_row_to_stored_query(row) @staticmethod def _query_options_json(options): @@ -1038,28 +1038,28 @@ class Datasette: async def add_query( self, - database, - name, - sql, + database: str, + name: str, + sql: str, *, - title=None, - description=None, - description_html=None, - hide_sql=False, - fragment=None, - parameters=None, - is_write=False, - is_private=False, - is_trusted=False, - source="plugin", - owner_id=None, - on_success_message=None, - on_success_message_sql=None, - on_success_redirect=None, - on_error_message=None, - on_error_redirect=None, - replace=True, - ): + title: str | None = None, + description: str | None = None, + description_html: str | None = None, + hide_sql: bool = False, + fragment: str | None = None, + parameters: Iterable[str] | None = None, + is_write: bool = False, + is_private: bool = False, + is_trusted: bool = False, + source: str = "plugin", + owner_id: str | None = None, + on_success_message: str | None = None, + on_success_message_sql: str | None = None, + on_success_redirect: str | None = None, + on_error_message: str | None = None, + on_error_redirect: str | None = None, + replace: bool = True, + ) -> None: return await stored_queries.add_query( self, database, @@ -1086,8 +1086,8 @@ class Datasette: async def update_query( self, - database, - name, + database: str, + name: str, *, sql=stored_queries.UNCHANGED, title=stored_queries.UNCHANGED, @@ -1106,7 +1106,7 @@ class Datasette: on_success_redirect=stored_queries.UNCHANGED, on_error_message=stored_queries.UNCHANGED, on_error_redirect=stored_queries.UNCHANGED, - ): + ) -> None: return await stored_queries.update_query( self, database, @@ -1130,24 +1130,28 @@ class Datasette: on_error_redirect=on_error_redirect, ) - async def remove_query(self, database, name, source=None): + async def remove_query( + self, database: str, name: str, source: str | None = None + ) -> None: return await stored_queries.remove_query(self, database, name, source=source) - async def get_query(self, database, name): + async def get_query( + self, database: str, name: str + ) -> stored_queries.StoredQuery | None: return await stored_queries.get_query(self, database, name) async def count_queries( self, - database=None, + database: str | None = None, *, - actor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, - ): + actor: dict[str, Any] | None = None, + q: str | None = None, + is_write: bool | None = None, + is_private: bool | None = None, + is_trusted: bool | None = None, + source: str | None = None, + owner_id: str | None = None, + ) -> int: return await stored_queries.count_queries( self, database, @@ -1162,19 +1166,19 @@ class Datasette: async def list_queries( self, - database=None, + database: str | None = None, *, - actor=None, - limit=50, - cursor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, - include_private=False, - ): + actor: dict[str, Any] | None = None, + limit: int = 50, + cursor: str | None = None, + q: str | None = None, + is_write: bool | None = None, + is_private: bool | None = None, + is_trusted: bool | None = None, + source: str | None = None, + owner_id: str | None = None, + include_private: bool = False, + ) -> stored_queries.StoredQueryPage: return await stored_queries.list_queries( self, database, diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index a28b71bf..bcfdfdb4 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -1,6 +1,8 @@ from __future__ import annotations +from dataclasses import dataclass import json +from typing import Any, Iterable from .resources import TableResource from .utils import named_parameters, sqlite3, tilde_encode, urlsafe_components @@ -19,7 +21,76 @@ QUERY_OPTION_FIELDS = ( ) -async def save_queries_from_config(datasette): +@dataclass +class StoredQuery: + database: str + name: str + sql: str + title: str | None + description: str | None + description_html: str | None + hide_sql: bool + fragment: str | None + parameters: list[str] + is_write: bool + is_private: bool + is_trusted: bool + source: str + owner_id: str | None + on_success_message: str | None + on_success_message_sql: str | None + on_success_redirect: str | None + on_error_message: str | None + on_error_redirect: str | None + private: bool | None = None + + +@dataclass +class StoredQueryPage: + queries: list[StoredQuery] + next: str | None + has_more: bool + limit: int + + +def stored_query_to_dict(query: StoredQuery) -> dict[str, Any]: + data = { + "database": query.database, + "name": query.name, + "sql": query.sql, + "title": query.title, + "description": query.description, + "description_html": query.description_html, + "hide_sql": query.hide_sql, + "fragment": query.fragment, + "params": list(query.parameters), + "parameters": list(query.parameters), + "is_write": query.is_write, + "is_private": query.is_private, + "is_trusted": query.is_trusted, + "source": query.source, + "owner_id": query.owner_id, + "on_success_message": query.on_success_message, + "on_success_message_sql": query.on_success_message_sql, + "on_success_redirect": query.on_success_redirect, + "on_error_message": query.on_error_message, + "on_error_redirect": query.on_error_redirect, + } + if query.private is not None: + data["private"] = query.private + return data + + +def stored_query_page_to_dict(page: StoredQueryPage) -> dict[str, Any]: + return { + "queries": [stored_query_to_dict(query) for query in page.queries], + "next": page.next, + "has_more": page.has_more, + "limit": page.limit, + } + + +async def save_queries_from_config(datasette: Any) -> None: # Apply configured query entries from datasette.yaml to the internal table. await datasette.get_internal_database().execute_write( "DELETE FROM queries WHERE source = 'config'" @@ -50,36 +121,38 @@ async def save_queries_from_config(datasette): ) -def query_row_to_dict(row): +def query_row_to_stored_query( + row: Any, private: bool | None = None +) -> StoredQuery | None: if row is None: return None parameters = json.loads(row["parameters"] or "[]") options = json.loads(row["options"] or "{}") - return { - "database": row["database_name"], - "name": row["name"], - "sql": row["sql"], - "title": row["title"], - "description": row["description"], - "description_html": row["description_html"], - "hide_sql": bool(options.get("hide_sql")), - "fragment": options.get("fragment"), - "params": parameters, - "parameters": parameters, - "is_write": bool(row["is_write"]), - "is_private": bool(row["is_private"]), - "is_trusted": bool(row["is_trusted"]), - "source": row["source"], - "owner_id": row["owner_id"], - "on_success_message": options.get("on_success_message"), - "on_success_message_sql": options.get("on_success_message_sql"), - "on_success_redirect": options.get("on_success_redirect"), - "on_error_message": options.get("on_error_message"), - "on_error_redirect": options.get("on_error_redirect"), - } + return StoredQuery( + database=row["database_name"], + name=row["name"], + sql=row["sql"], + title=row["title"], + description=row["description"], + description_html=row["description_html"], + hide_sql=bool(options.get("hide_sql")), + fragment=options.get("fragment"), + parameters=parameters, + is_write=bool(row["is_write"]), + is_private=bool(row["is_private"]), + is_trusted=bool(row["is_trusted"]), + source=row["source"], + owner_id=row["owner_id"], + on_success_message=options.get("on_success_message"), + on_success_message_sql=options.get("on_success_message_sql"), + on_success_redirect=options.get("on_success_redirect"), + on_error_message=options.get("on_error_message"), + on_error_redirect=options.get("on_error_redirect"), + private=private, + ) -def query_options_json(options): +def query_options_json(options: dict[str, Any]) -> str: options_dict = {} for field in QUERY_OPTION_FIELDS: value = options.get(field) @@ -92,29 +165,29 @@ def query_options_json(options): async def add_query( - datasette, - database, - name, - sql, + datasette: Any, + database: str, + name: str, + sql: str, *, - title=None, - description=None, - description_html=None, - hide_sql=False, - fragment=None, - parameters=None, - is_write=False, - is_private=False, - is_trusted=False, - source="plugin", - owner_id=None, - on_success_message=None, - on_success_message_sql=None, - on_success_redirect=None, - on_error_message=None, - on_error_redirect=None, - replace=True, -): + title: str | None = None, + description: str | None = None, + description_html: str | None = None, + hide_sql: bool = False, + fragment: str | None = None, + parameters: Iterable[str] | None = None, + is_write: bool = False, + is_private: bool = False, + is_trusted: bool = False, + source: str = "plugin", + owner_id: str | None = None, + on_success_message: str | None = None, + on_success_message_sql: str | None = None, + on_success_redirect: str | None = None, + on_error_message: str | None = None, + on_error_redirect: str | None = None, + replace: bool = True, +) -> None: parameters_json = json.dumps(list(parameters or [])) options_json = query_options_json( { @@ -170,9 +243,9 @@ async def add_query( async def update_query( - datasette, - database, - name, + datasette: Any, + database: str, + name: str, *, sql=UNCHANGED, title=UNCHANGED, @@ -191,7 +264,7 @@ async def update_query( on_success_redirect=UNCHANGED, on_error_message=UNCHANGED, on_error_redirect=UNCHANGED, -): +) -> None: fields = { "sql": sql, "title": title, @@ -263,7 +336,9 @@ async def update_query( ) -async def remove_query(datasette, database, name, source=None): +async def remove_query( + datasette: Any, database: str, name: str, source: str | None = None +) -> None: sql = "DELETE FROM queries WHERE database_name = ? AND name = ?" params = [database, name] if source is not None: @@ -272,7 +347,7 @@ async def remove_query(datasette, database, name, source=None): await datasette.get_internal_database().execute_write(sql, params) -async def get_query(datasette, database, name): +async def get_query(datasette: Any, database: str, name: str) -> StoredQuery | None: rows = await datasette.get_internal_database().execute( """ SELECT * FROM queries @@ -280,21 +355,21 @@ async def get_query(datasette, database, name): """, [database, name], ) - return query_row_to_dict(rows.first()) + return query_row_to_stored_query(rows.first()) async def count_queries( - datasette, - database=None, + datasette: Any, + database: str | None = None, *, - actor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, -): + actor: dict[str, Any] | None = None, + q: str | None = None, + is_write: bool | None = None, + is_private: bool | None = None, + is_trusted: bool | None = None, + source: str | None = None, + owner_id: str | None = None, +) -> int: allowed_sql, allowed_params = await datasette.allowed_resources_sql( action="view-query", actor=actor, @@ -354,20 +429,20 @@ async def count_queries( async def list_queries( - datasette, - database=None, + datasette: Any, + database: str | None = None, *, - actor=None, - limit=50, - cursor=None, - q=None, - is_write=None, - is_private=None, - is_trusted=None, - source=None, - owner_id=None, - include_private=False, -): + actor: dict[str, Any] | None = None, + limit: int = 50, + cursor: str | None = None, + q: str | None = None, + is_write: bool | None = None, + is_private: bool | None = None, + is_trusted: bool | None = None, + source: str | None = None, + owner_id: str | None = None, + include_private: bool = False, +) -> StoredQueryPage: limit = min(max(1, int(limit)), 1000) allowed_sql, allowed_params = await datasette.allowed_resources_sql( action="view-query", @@ -480,9 +555,10 @@ async def list_queries( queries = [] for row in rows: - query = query_row_to_dict(row) - if include_private: - query["private"] = bool(row["private"]) + query = query_row_to_stored_query( + row, private=bool(row["private"]) if include_private else None + ) + assert query is not None queries.append(query) next_token = None @@ -499,17 +575,23 @@ async def list_queries( tilde_encode(last_row["sort_key"]), tilde_encode(last_row["name"]), ) - return { - "queries": queries, - "next": next_token, - "has_more": has_more, - "limit": limit, - } + return StoredQueryPage( + queries=queries, + next=next_token, + has_more=has_more, + limit=limit, + ) async def ensure_query_write_permissions( - datasette, database, sql, *, actor=None, params=None, analysis=None -): + datasette: Any, + database: str, + sql: str, + *, + actor: dict[str, Any] | None = None, + params: dict[str, Any] | None = None, + analysis: Any = None, +) -> Any: write_actions = { "insert": "insert-row", "update": "update-row", diff --git a/datasette/views/database.py b/datasette/views/database.py index 98ca989c..b558b002 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -13,6 +13,7 @@ import textwrap from datasette.events import AlterTableEvent, CreateTableEvent, InsertRowsEvent from datasette.database import QueryInterrupted from datasette.resources import DatabaseResource, QueryResource +from datasette.stored_queries import stored_query_to_dict from datasette.utils import ( add_cors_headers, await_me_maybe, @@ -99,8 +100,8 @@ class DatabaseView(View): limit=5, include_private=True, ) - stored_queries = queries_page["queries"] - queries_more = queries_page["has_more"] + stored_queries = queries_page.queries + queries_more = queries_page.has_more queries_count = ( await datasette.count_queries(database, actor=request.actor) if queries_more @@ -136,7 +137,7 @@ class DatabaseView(View): "tables": tables, "hidden_count": len([t for t in tables if t["hidden"]]), "views": sql_views, - "queries": stored_queries, + "queries": [stored_query_to_dict(query) for query in stored_queries], "queries_more": queries_more, "queries_count": queries_count, "allow_execute_sql": allow_execute_sql, @@ -447,7 +448,7 @@ class QueryView(View): if not await datasette.allowed( action="view-query", - resource=QueryResource(database=db.name, query=stored_query["name"]), + resource=QueryResource(database=db.name, query=stored_query.name), actor=request.actor, ): raise Forbidden("You do not have permission to view this query") @@ -480,20 +481,18 @@ class QueryView(View): or request.args.get("_json") or params.get("_json") ) - params_for_query = MagicParameters( - stored_query["sql"], params, request, datasette - ) + params_for_query = MagicParameters(stored_query.sql, params, request, datasette) await params_for_query.execute_params() ok = None redirect_url = None try: cursor = await db.execute_write( - stored_query["sql"], params_for_query, request=request + stored_query.sql, params_for_query, request=request ) # success message can come from on_success_message or on_success_message_sql message = None message_type = datasette.INFO - on_success_message_sql = stored_query.get("on_success_message_sql") + on_success_message_sql = stored_query.on_success_message_sql if on_success_message_sql: try: message_result = ( @@ -505,18 +504,19 @@ class QueryView(View): message = "Error running on_success_message_sql: {}".format(ex) message_type = datasette.ERROR if not message: - message = stored_query.get( - "on_success_message" - ) or "Query executed, {} row{} affected".format( - cursor.rowcount, "" if cursor.rowcount == 1 else "s" + message = ( + stored_query.on_success_message + or "Query executed, {} row{} affected".format( + cursor.rowcount, "" if cursor.rowcount == 1 else "s" + ) ) - redirect_url = stored_query.get("on_success_redirect") + redirect_url = stored_query.on_success_redirect ok = True except Exception as ex: - message = stored_query.get("on_error_message") or str(ex) + message = stored_query.on_error_message or str(ex) message_type = datasette.ERROR - redirect_url = stored_query.get("on_error_redirect") + redirect_url = stored_query.on_error_redirect ok = False if should_return_json: return Response.json( @@ -562,7 +562,7 @@ class QueryView(View): ) if stored_query is None: raise - stored_query_write = bool(stored_query.get("is_write")) + stored_query_write = stored_query.is_write private = False if stored_query: @@ -570,7 +570,7 @@ class QueryView(View): visible, private = await datasette.check_visibility( request.actor, action="view-query", - resource=QueryResource(database=database, query=stored_query["name"]), + resource=QueryResource(database=database, query=stored_query.name), ) if not visible: raise Forbidden("You do not have permission to view this query") @@ -591,14 +591,14 @@ class QueryView(View): sql = None if stored_query: - sql = stored_query["sql"] + sql = stored_query.sql elif "sql" in params: sql = params.pop("sql") # Extract any :named parameters named_parameters = [] - if stored_query and stored_query.get("params"): - named_parameters = stored_query["params"] + if stored_query and stored_query.parameters: + named_parameters = stored_query.parameters if not named_parameters and sql: named_parameters = derive_named_parameters(sql) named_parameter_values = { @@ -686,7 +686,7 @@ class QueryView(View): columns=columns, rows=rows, sql=sql, - query_name=stored_query["name"] if stored_query else None, + query_name=stored_query.name if stored_query else None, database=database, table=None, request=request, @@ -721,7 +721,7 @@ class QueryView(View): if stored_query: templates.insert( 0, - f"query-{to_css_class(database)}-{to_css_class(stored_query['name'])}.html", + f"query-{to_css_class(database)}-{to_css_class(stored_query.name)}.html", ) environment = datasette.get_jinja_environment(request) @@ -740,7 +740,7 @@ class QueryView(View): ) metadata = await datasette.get_database_metadata(database) if stored_query: - metadata = dict(stored_query) + metadata = stored_query_to_dict(stored_query) metadata.pop("source", None) renderers = {} @@ -775,7 +775,7 @@ class QueryView(View): ) show_hide_hidden = "" - if stored_query and stored_query.get("hide_sql"): + if stored_query and stored_query.hide_sql: if bool(params.get("_show_sql")): show_hide_link = path_with_removed_args(request, {"_show_sql"}) show_hide_text = "hide" @@ -843,7 +843,7 @@ class QueryView(View): datasette=datasette, actor=request.actor, database=database, - query_name=stored_query["name"] if stored_query else None, + query_name=stored_query.name if stored_query else None, request=request, sql=sql, params=params, @@ -863,7 +863,7 @@ class QueryView(View): "sql": sql, "params": params, }, - stored_query=stored_query["name"] if stored_query else None, + stored_query=stored_query.name if stored_query else None, private=private, stored_query_write=stored_query_write, db_is_immutable=not db.is_mutable, @@ -907,7 +907,7 @@ class QueryView(View): datasette, request, database=database, - query_name=stored_query["name"] if stored_query else None, + query_name=stored_query.name if stored_query else None, ), query_actions=query_actions, ), diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index de732431..46d71b8e 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -2,6 +2,7 @@ import json import re from datasette.resources import DatabaseResource, TableResource +from datasette.stored_queries import StoredQuery from datasette.utils import ( named_parameters as derive_named_parameters, escape_sqlite, @@ -281,18 +282,18 @@ async def _prepare_execute_write(datasette, db, sql, params, actor): return parameter_names, params, analysis -async def _ensure_stored_query_execution_permissions(datasette, db, query, actor): - if query.get("is_trusted"): +async def _ensure_stored_query_execution_permissions( + datasette, db, query: StoredQuery, actor +): + if query.is_trusted: return - if query.get("is_write"): + if query.is_write: await datasette.ensure_permission( action="execute-write-sql", resource=DatabaseResource(db.name), actor=actor, ) - await datasette.ensure_query_write_permissions( - db.name, query["sql"], actor=actor - ) + await datasette.ensure_query_write_permissions(db.name, query.sql, actor=actor) else: await datasette.ensure_permission( action="execute-sql", @@ -482,7 +483,7 @@ async def _prepare_query_create(datasette, request, db, data): } -async def _prepare_query_update(datasette, request, db, existing, update): +async def _prepare_query_update(datasette, request, db, existing: StoredQuery, update): invalid_keys = set(update) - _query_update_fields if invalid_keys: raise QueryValidationError( @@ -490,8 +491,8 @@ async def _prepare_query_update(datasette, request, db, existing, update): ) update = _apply_query_data_types(update) - sql = update.get("sql", existing["sql"]) - query_is_write = existing["is_write"] + sql = update.get("sql", existing.sql) + query_is_write = existing.is_write derived = _derived_query_parameters(sql) parameters = None diff --git a/datasette/views/stored_queries.py b/datasette/views/stored_queries.py index 1a2c5d00..8c4e849e 100644 --- a/datasette/views/stored_queries.py +++ b/datasette/views/stored_queries.py @@ -1,6 +1,7 @@ from urllib.parse import parse_qsl, urlencode from datasette.resources import DatabaseResource, QueryResource +from datasette.stored_queries import stored_query_to_dict from datasette.utils import sqlite3, tilde_decode from datasette.utils.asgi import Response @@ -100,7 +101,7 @@ class QueryListView(BaseView): ) query_list_path = self.query_list_path(database) next_url = None - if page["next"]: + if page.next: pairs = [ (key, value) for key, value in parse_qsl( @@ -108,7 +109,7 @@ class QueryListView(BaseView): ) if key != "_next" ] - pairs.append(("_next", page["next"])) + pairs.append(("_next", page.next)) next_url = "{}?{}".format( query_list_path, urlencode(pairs), @@ -194,13 +195,13 @@ class QueryListView(BaseView): "database_color": ( self.ds.get_database(database).color if database is not None else None ), - "queries": page["queries"], - "next": page["next"], + "queries": page.queries, + "next": page.next, "next_url": next_url, - "has_more": page["has_more"], - "limit": page["limit"], - "show_private_note": any(query["is_private"] for query in page["queries"]), - "show_trusted_note": any(query["is_trusted"] for query in page["queries"]), + "has_more": page.has_more, + "limit": page.limit, + "show_private_note": any(query.is_private for query in page.queries), + "show_trusted_note": any(query.is_trusted for query in page.queries), "query_list_path": query_list_path, "show_database": database is None, "facets": facets, @@ -213,7 +214,12 @@ class QueryListView(BaseView): }, } if format_ == "json": - return Response.json(data) + return Response.json( + { + **data, + "queries": [stored_query_to_dict(query) for query in page.queries], + } + ) return await self.render( ["query_list.html"], request, @@ -374,8 +380,11 @@ class QueryStoreView(QueryCreateView): return _error([str(ex)], 400) query = await self.ds.get_query(db.name, name) + assert query is not None if is_json: - return Response.json({"ok": True, "query": query}, status=201) + return Response.json( + {"ok": True, "query": stored_query_to_dict(query)}, status=201 + ) self.ds.add_message(request, "Query saved", self.ds.INFO) return Response.redirect(self.ds.urls.path(self.ds.urls.table(db.name, name))) @@ -395,7 +404,7 @@ class QueryDefinitionView(BaseView): actor=request.actor, ): return _error(["Permission denied"], 403) - return Response.json({"ok": True, "query": query}) + return Response.json({"ok": True, "query": stored_query_to_dict(query)}) class QueryUpdateView(BaseView): @@ -413,7 +422,7 @@ class QueryUpdateView(BaseView): actor=request.actor, ): return _error(["Permission denied: need update-query"], 403) - if existing.get("is_trusted"): + if existing.is_trusted: return _error(["Trusted queries cannot be updated using the API"], 403) try: @@ -444,10 +453,12 @@ class QueryUpdateView(BaseView): await self.ds.update_query(db.name, query_name, **update_kwargs) if data.get("return"): + query = await self.ds.get_query(db.name, query_name) + assert query is not None return Response.json( { "ok": True, - "query": await self.ds.get_query(db.name, query_name), + "query": stored_query_to_dict(query), } ) return Response.json({"ok": True}) diff --git a/docs/internals.rst b/docs/internals.rst index 66724aa9..4980ee8b 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1039,11 +1039,11 @@ Example: await .get_query(database, name) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Returns a stored query dictionary, or ``None`` if the query does not exist. +Returns a ``StoredQuery`` dataclass instance, or ``None`` if the query does not exist. -The dictionary contains ``database``, ``name``, ``sql``, ``title``, ``description``, ``description_html``, ``hide_sql``, ``fragment``, ``parameters``, ``params``, ``is_write``, ``is_private``, ``is_trusted``, ``source``, ``owner_id``, ``on_success_message``, ``on_success_message_sql``, ``on_success_redirect``, ``on_error_message`` and ``on_error_redirect``. +``StoredQuery`` has the following attributes: ``database``, ``name``, ``sql``, ``title``, ``description``, ``description_html``, ``hide_sql``, ``fragment``, ``parameters``, ``is_write``, ``is_private``, ``is_trusted``, ``source``, ``owner_id``, ``on_success_message``, ``on_success_message_sql``, ``on_success_redirect``, ``on_error_message`` and ``on_error_redirect``. -``parameters`` and ``params`` contain the same list of explicit parameter names. +``parameters`` is a list of explicit parameter names. .. _datasette_list_queries: @@ -1087,12 +1087,12 @@ Lists stored queries visible to the specified actor. ``owner_id`` - string, optional Filter by owner actor ID. ``include_private`` - boolean, optional - Set to ``True`` to include a ``private`` boolean in each returned query dictionary indicating if anonymous users would be unable to view that query. + Set to ``True`` to populate a ``private`` boolean on each returned ``StoredQuery`` indicating if anonymous users would be unable to view that query. -The return value is a dictionary with these keys: +The return value is a ``StoredQueryPage`` dataclass instance with these attributes: -``queries`` - list of dictionaries - Stored query dictionaries, in the same format returned by :ref:`datasette_get_query`. +``queries`` - list of StoredQuery instances + Stored queries in the same format returned by :ref:`datasette_get_query`. ``next`` - string or None Pagination cursor for the next page, if one exists. ``has_more`` - boolean diff --git a/tests/test_queries.py b/tests/test_queries.py index 70fb7a03..59fab8c0 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -4,6 +4,7 @@ import pytest from datasette.app import Datasette from datasette.resources import DatabaseResource, QueryResource +from datasette.stored_queries import StoredQuery, StoredQueryPage from datasette.utils.asgi import Forbidden @@ -87,38 +88,41 @@ async def test_add_get_and_remove_query(): } query = await ds.get_query("data", "top_customers") - assert query == { - "database": "data", - "name": "top_customers", - "sql": "select * from customers where region = :region", - "title": "Top customers", - "description": "Customers by region", - "description_html": None, - "hide_sql": True, - "fragment": "chart", - "params": ["region"], - "parameters": ["region"], - "is_write": False, - "is_private": False, - "is_trusted": True, - "source": "user", - "owner_id": "alice", - "on_success_message": None, - "on_success_message_sql": None, - "on_success_redirect": None, - "on_error_message": None, - "on_error_redirect": None, - } + assert query == StoredQuery( + database="data", + name="top_customers", + sql="select * from customers where region = :region", + title="Top customers", + description="Customers by region", + description_html=None, + hide_sql=True, + fragment="chart", + parameters=["region"], + is_write=False, + is_private=False, + is_trusted=True, + source="user", + owner_id="alice", + on_success_message=None, + on_success_message_sql=None, + on_success_redirect=None, + on_error_message=None, + on_error_redirect=None, + ) queries_page = await ds.list_queries("data", actor=None) - assert queries_page["queries"] == [query] - assert queries_page["next"] is None + assert queries_page == StoredQueryPage( + queries=[query], + next=None, + has_more=False, + limit=50, + ) await ds.remove_query("data", "top_customers") assert await ds.get_query("data", "top_customers") is None queries_page = await ds.list_queries("data", actor=None) - assert queries_page["queries"] == [] - assert queries_page["next"] is None + assert queries_page.queries == [] + assert queries_page.next is None @pytest.mark.asyncio @@ -156,13 +160,12 @@ async def test_update_query_only_updates_provided_fields(): ) query = await ds.get_query("data", "redirect") - assert query["title"] == "Updated" - assert query["parameters"] == [] - assert query["params"] == [] - assert query["on_success_redirect"] is None - assert query["sql"] == "select 1" - assert query["is_private"] is False - assert query["is_trusted"] is False + assert query.title == "Updated" + assert query.parameters == [] + assert query.on_success_redirect is None + assert query.sql == "select 1" + assert query.is_private is False + assert query.is_trusted is False options_row = ( await ds.get_internal_database().execute( """ @@ -198,28 +201,27 @@ async def test_config_queries_imported_to_internal_table(): ds.add_memory_database("query_config", name="data") await ds.invoke_startup() - assert await ds.get_query("data", "configured") == { - "database": "data", - "name": "configured", - "sql": "select :name as name", - "title": "Configured query", - "description": None, - "description_html": "

Configured HTML

", - "hide_sql": False, - "fragment": None, - "params": ["name"], - "parameters": ["name"], - "is_write": False, - "is_private": False, - "is_trusted": True, - "source": "config", - "owner_id": None, - "on_success_message": None, - "on_success_message_sql": "select 'Hello ' || :name", - "on_success_redirect": None, - "on_error_message": None, - "on_error_redirect": None, - } + assert await ds.get_query("data", "configured") == StoredQuery( + database="data", + name="configured", + sql="select :name as name", + title="Configured query", + description=None, + description_html="

Configured HTML

", + hide_sql=False, + fragment=None, + parameters=["name"], + is_write=False, + is_private=False, + is_trusted=True, + source="config", + owner_id=None, + on_success_message=None, + on_success_message_sql="select 'Hello ' || :name", + on_success_redirect=None, + on_error_message=None, + on_error_redirect=None, + ) @pytest.mark.asyncio @@ -1032,8 +1034,8 @@ async def test_query_update_api_rejects_config_only_fields(): "Invalid keys: description_html, on_success_message_sql" ] query = await ds.get_query("data", "editable") - assert query["description_html"] is None - assert query["on_success_message_sql"] is None + assert query.description_html is None + assert query.on_success_message_sql is None @pytest.mark.asyncio @@ -1072,9 +1074,9 @@ async def test_query_update_api_rejects_trusted_queries_but_internal_update_allo "Trusted queries cannot be updated using the API" ] query = await ds.get_query("data", "trusted_report") - assert query["is_trusted"] is True - assert query["sql"] == "select 1 as one" - assert query["title"] == "Original" + assert query.is_trusted is True + assert query.sql == "select 1 as one" + assert query.title == "Original" await ds.update_query( "data", @@ -1083,9 +1085,9 @@ async def test_query_update_api_rejects_trusted_queries_but_internal_update_allo title="Internal", ) query = await ds.get_query("data", "trusted_report") - assert query["is_trusted"] is True - assert query["sql"] == "select 3 as three" - assert query["title"] == "Internal" + assert query.is_trusted is True + assert query.sql == "select 3 as three" + assert query.title == "Internal" @pytest.mark.asyncio From 9f66cf72c1c9170f10e863d750ac4eee47113a7f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 21:42:50 -0700 Subject: [PATCH 1193/1218] Removed execute write SQL from query create page --- datasette/templates/query_create.html | 7 ------- 1 file changed, 7 deletions(-) diff --git a/datasette/templates/query_create.html b/datasette/templates/query_create.html index f5dadbff..ec910456 100644 --- a/datasette/templates/query_create.html +++ b/datasette/templates/query_create.html @@ -106,9 +106,6 @@ form.sql .query-create-sql textarea#sql-editor { .query-create-analysis-note { margin: 0; } -.query-create-action { - margin: 0.35rem 0 1rem; -} .query-create-analysis { margin-top: 0.8rem; } @@ -171,10 +168,6 @@ form.sql .query-create-sql textarea#sql-editor { Queries marked private can only be seen by you, their creator.

- {% if sql and analysis_is_write %} -

Execute write SQL

- {% endif %} -

From 737ff03efbb2bdc99b10d2654b7818526ec51e13 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 26 May 2026 22:11:06 -0700 Subject: [PATCH 1194/1218] Expanded analysis of SQL operations, refs #2748 --- datasette/permissions.py | 10 ++ datasette/stored_queries.py | 137 +++++++++++++-- datasette/utils/sql_analysis.py | 289 +++++++++++++++++++++++++++---- datasette/views/execute_write.py | 9 +- datasette/views/query_helpers.py | 104 +++++++---- tests/test_actions_sql.py | 14 +- tests/test_internals_database.py | 34 ++-- tests/test_queries.py | 166 ++++++++++++++++++ tests/test_utils_sql_analysis.py | 97 +++++++++-- 9 files changed, 740 insertions(+), 120 deletions(-) diff --git a/datasette/permissions.py b/datasette/permissions.py index 917c58ab..a9a3cc7c 100644 --- a/datasette/permissions.py +++ b/datasette/permissions.py @@ -58,6 +58,16 @@ class Resource(ABC): self.child = child self._private = None # Sentinel to track if private was set + def __str__(self) -> str: + return "/".join( + str(part) for part in (self.parent, self.child) if part is not None + ) + + def __repr__(self) -> str: + return "{}(parent={!r}, child={!r})".format( + self.__class__.__name__, self.parent, self.child + ) + @property def private(self) -> bool: """ diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index bcfdfdb4..c4b083e5 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -2,11 +2,16 @@ from __future__ import annotations from dataclasses import dataclass import json -from typing import Any, Iterable +from typing import Any, Iterable, TYPE_CHECKING -from .resources import TableResource +from .resources import DatabaseResource, TableResource +from .permissions import Resource from .utils import named_parameters, sqlite3, tilde_encode, urlsafe_components from .utils.asgi import Forbidden +from .utils.sql_analysis import Operation, SQLAnalysis + +if TYPE_CHECKING: + from .app import Datasette UNCHANGED = object() @@ -583,20 +588,94 @@ async def list_queries( ) -async def ensure_query_write_permissions( - datasette: Any, - database: str, - sql: str, - *, - actor: dict[str, Any] | None = None, - params: dict[str, Any] | None = None, - analysis: Any = None, -) -> Any: +PermissionRequirement = tuple[str, Resource] + + +def permission_for_operation(operation: Operation) -> PermissionRequirement | None: write_actions = { "insert": "insert-row", "update": "update-row", "delete": "delete-row", } + action = write_actions.get(operation.operation) + if ( + action + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return ( + action, + TableResource(database=operation.database, table=operation.table), + ) + if operation.operation == "create" and operation.target_type == "table": + if operation.database is None: + return None + return ( + "create-table", + DatabaseResource(database=operation.database), + ) + if ( + operation.operation == "alter" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return ( + "alter-table", + TableResource(database=operation.database, table=operation.table), + ) + if ( + operation.operation == "drop" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return ( + "drop-table", + TableResource(database=operation.database, table=operation.table), + ) + if ( + operation.operation in {"create", "drop"} + and operation.target_type == "index" + and operation.database is not None + and operation.table is not None + ): + return ( + "alter-table", + TableResource(database=operation.database, table=operation.table), + ) + return None + + +def operation_is_write(operation: Operation) -> bool: + return operation.operation in { + "insert", + "update", + "delete", + "create", + "alter", + "drop", + "begin", + "commit", + "rollback", + "attach", + "detach", + "pragma", + "analyze", + "reindex", + } + + +async def ensure_query_write_permissions( + datasette: Datasette, + database: str, + sql: str, + *, + actor: dict[str, object] | None = None, + params: dict[str, object] | None = None, + analysis: SQLAnalysis | None = None, +) -> SQLAnalysis: db = datasette.get_database(database) if analysis is None: if params is None: @@ -606,18 +685,38 @@ async def ensure_query_write_permissions( except sqlite3.DatabaseError as ex: raise Forbidden(f"Could not analyze query: {ex}") from ex - for access in analysis.table_accesses: - action = write_actions.get(access.operation) - if action is None: + has_semantic_schema_operation = any( + operation.operation in {"create", "alter", "drop"} + and operation.target_type in {"table", "index", "view", "trigger"} + for operation in analysis.operations + ) + for operation in analysis.operations: + if operation.internal and has_semantic_schema_operation: continue - if access.database != database: + if has_semantic_schema_operation and operation.operation in { + "read", + "insert", + "update", + "delete", + "reindex", + }: + continue + permission = permission_for_operation(operation) + if permission is None: + if operation_is_write(operation): + raise Forbidden( + "Unsupported SQL operation: {} {}".format( + operation.operation, operation.target_type + ) + ) + continue + action, resource = permission + if operation.database != database: raise Forbidden("Writable queries may not write to attached databases") if not await datasette.allowed( action=action, - resource=TableResource(database=access.database, table=access.table), + resource=resource, actor=actor, ): - raise Forbidden( - f"Permission denied: need {action} on {access.database}/{access.table}" - ) + raise Forbidden(f"Permission denied: need {action} on {resource}") return analysis diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index b5317b62..54f310fe 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -3,22 +3,66 @@ from typing import Literal from datasette.utils.sqlite import sqlite3 +SQLOperation = Literal[ + "read", + "insert", + "update", + "delete", + "create", + "alter", + "drop", + "begin", + "commit", + "rollback", + "attach", + "detach", + "pragma", + "analyze", + "reindex", +] +SQLTargetType = Literal[ + "table", + "index", + "view", + "trigger", + "schema", + "transaction", + "database", + "pragma", + "unknown", +] SQLTableOperation = Literal["read", "insert", "update", "delete"] @dataclass(frozen=True) -class SQLTableAccess: - operation: SQLTableOperation +class Operation: + operation: SQLOperation + target_type: SQLTargetType database: str | None - table: str + table: str | None sqlite_schema: str | None + target: str | None = None columns: tuple[str, ...] = () source: str | None = None + internal: bool = False @dataclass(frozen=True) class SQLAnalysis: - table_accesses: tuple[SQLTableAccess, ...] + operations: tuple[Operation, ...] + + +# Hashable dict key for grouping repeated authorizer callbacks while collecting columns. +@dataclass(frozen=True) +class OperationKey: + operation: SQLOperation + target_type: SQLTargetType + database: str | None + table: str | None + sqlite_schema: str | None + target: str | None + source: str | None + internal: bool _ACTION_TO_OPERATION: dict[int, SQLTableOperation] = { @@ -28,6 +72,36 @@ _ACTION_TO_OPERATION: dict[int, SQLTableOperation] = { sqlite3.SQLITE_DELETE: "delete", } +# Values are (operation, target_type) pairs used to construct Operation objects. +_CREATE_ACTIONS = { + sqlite3.SQLITE_CREATE_INDEX: ("create", "index"), + sqlite3.SQLITE_CREATE_TABLE: ("create", "table"), + sqlite3.SQLITE_CREATE_TRIGGER: ("create", "trigger"), + sqlite3.SQLITE_CREATE_VIEW: ("create", "view"), +} +_DROP_ACTIONS = { + sqlite3.SQLITE_DROP_INDEX: ("drop", "index"), + sqlite3.SQLITE_DROP_TABLE: ("drop", "table"), + sqlite3.SQLITE_DROP_TRIGGER: ("drop", "trigger"), + sqlite3.SQLITE_DROP_VIEW: ("drop", "view"), +} +for action_name, operation, target_type in ( + ("SQLITE_CREATE_TEMP_INDEX", "create", "index"), + ("SQLITE_CREATE_TEMP_TABLE", "create", "table"), + ("SQLITE_CREATE_TEMP_TRIGGER", "create", "trigger"), + ("SQLITE_CREATE_TEMP_VIEW", "create", "view"), + ("SQLITE_DROP_TEMP_INDEX", "drop", "index"), + ("SQLITE_DROP_TEMP_TABLE", "drop", "table"), + ("SQLITE_DROP_TEMP_TRIGGER", "drop", "trigger"), + ("SQLITE_DROP_TEMP_VIEW", "drop", "view"), +): + action_value = getattr(sqlite3, action_name, None) + if action_value is not None: + actions = _CREATE_ACTIONS if operation == "create" else _DROP_ACTIONS + actions[action_value] = (operation, target_type) + +_SQLITE_SCHEMA_TABLES = {"sqlite_master", "sqlite_schema"} + def analyze_sql_tables( conn, @@ -38,15 +112,13 @@ def analyze_sql_tables( schema_to_database: dict[str, str] | None = None, ) -> SQLAnalysis: """ - Return tables accessed by a SQL statement according to SQLite's authorizer. + Return operations performed by a SQL statement according to SQLite's authorizer. This function is synchronous and connection-based. It temporarily installs a - SQLite authorizer, prepares ``EXPLAIN ``, and returns the table access + SQLite authorizer, prepares ``EXPLAIN ``, and returns the operation callbacks observed while SQLite compiles the statement. """ - accesses: dict[ - tuple[SQLTableOperation, str | None, str, str | None, str | None], set[str] - ] = {} + operations: dict[OperationKey, set[str]] = {} def database_for_schema(sqlite_schema): if schema_to_database and sqlite_schema in schema_to_database: @@ -55,21 +127,166 @@ def analyze_sql_tables( return database_name return sqlite_schema + def record( + operation: SQLOperation, + target_type: SQLTargetType, + *, + database: str | None, + table: str | None, + sqlite_schema: str | None, + target: str | None, + source: str | None, + column: str | None = None, + internal: bool = False, + ): + key = OperationKey( + operation=operation, + target_type=target_type, + database=database, + table=table, + sqlite_schema=sqlite_schema, + target=target, + source=source, + internal=internal, + ) + columns = operations.setdefault(key, set()) + if column is not None: + columns.add(column) + def authorizer(action, arg1, arg2, sqlite_schema, source): operation = _ACTION_TO_OPERATION.get(action) - if operation is None or arg1 is None: + if operation is not None and arg1 is not None: + target_type = "schema" if arg1 in _SQLITE_SCHEMA_TABLES else "table" + column = ( + arg2 if operation in ("read", "update") and arg2 is not None else None + ) + record( + operation, + target_type, + database=database_for_schema(sqlite_schema), + table=arg1 if target_type == "table" else None, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + column=column, + internal=target_type == "schema", + ) + return sqlite3.SQLITE_OK + + create_operation = _CREATE_ACTIONS.get(action) + if create_operation is not None and arg1 is not None: + operation, target_type = create_operation + related_table = arg2 if target_type in {"index", "trigger"} else arg1 + record( + operation, + target_type, + database=database_for_schema(sqlite_schema), + table=related_table, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + drop_operation = _DROP_ACTIONS.get(action) + if drop_operation is not None and arg1 is not None: + operation, target_type = drop_operation + related_table = arg2 if target_type in {"index", "trigger"} else arg1 + record( + operation, + target_type, + database=database_for_schema(sqlite_schema), + table=related_table, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_ALTER_TABLE and arg2 is not None: + record( + "alter", + "table", + database=database_for_schema(arg1), + table=arg2, + sqlite_schema=arg1, + target=arg2, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_TRANSACTION and arg1 is not None: + record( + arg1.lower(), + "transaction", + database=None, + table=None, + sqlite_schema=None, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_ATTACH and arg1 is not None: + record( + "attach", + "database", + database=None, + table=None, + sqlite_schema=None, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_DETACH and arg1 is not None: + record( + "detach", + "database", + database=None, + table=None, + sqlite_schema=None, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_PRAGMA and arg1 is not None: + record( + "pragma", + "pragma", + database=None, + table=None, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_ANALYZE: + record( + "analyze", + "database" if arg1 is None else "table", + database=database_for_schema(sqlite_schema), + table=arg1, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_REINDEX and arg1 is not None: + record( + "reindex", + "index", + database=database_for_schema(sqlite_schema), + table=None, + sqlite_schema=sqlite_schema, + target=arg1, + source=source, + ) return sqlite3.SQLITE_OK - key = ( - operation, - database_for_schema(sqlite_schema), - arg1, - sqlite_schema, - source, - ) - columns = accesses.setdefault(key, set()) - if operation in ("read", "update") and arg2 is not None: - columns.add(arg2) return sqlite3.SQLITE_OK conn.set_authorizer(authorizer) @@ -78,22 +295,26 @@ def analyze_sql_tables( finally: conn.set_authorizer(None) + has_schema_operation = any( + key.target_type in {"table", "index", "view", "trigger"} + and key.operation in {"create", "alter", "drop"} + for key in operations + ) + return SQLAnalysis( - table_accesses=tuple( - SQLTableAccess( - operation=operation, - database=database, - table=table, - sqlite_schema=sqlite_schema, + operations=tuple( + Operation( + operation=key.operation, + target_type=key.target_type, + database=key.database, + table=key.table, + sqlite_schema=key.sqlite_schema, + target=key.target, columns=tuple(sorted(columns)), - source=source, + source=key.source, + internal=key.internal + or (has_schema_operation and key.target_type == "schema"), ) - for ( - operation, - database, - table, - sqlite_schema, - source, - ), columns in accesses.items() + for key, columns in operations.items() ) ) diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index 0054300c..cead8926 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -193,9 +193,12 @@ class ExecuteWriteView(BaseView): status=400, ) - message = "Query executed, {} row{} affected".format( - cursor.rowcount, "" if cursor.rowcount == 1 else "s" - ) + if cursor.rowcount == -1: + message = "Query executed" + else: + message = "Query executed, {} row{} affected".format( + cursor.rowcount, "" if cursor.rowcount == 1 else "s" + ) if _wants_json(request, is_json, data): return _block_framing( Response.json( diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 46d71b8e..922f4e52 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -1,8 +1,12 @@ import json import re -from datasette.resources import DatabaseResource, TableResource -from datasette.stored_queries import StoredQuery +from datasette.resources import DatabaseResource +from datasette.stored_queries import ( + StoredQuery, + operation_is_write, + permission_for_operation, +) from datasette.utils import ( named_parameters as derive_named_parameters, escape_sqlite, @@ -12,6 +16,7 @@ from datasette.utils import ( InvalidSql, ) from datasette.utils.asgi import Forbidden +from datasette.utils.sql_analysis import Operation, SQLAnalysis _query_name_re = re.compile(r"^[^/\.\n]+$") @@ -123,11 +128,8 @@ def _coerce_query_parameters(value, derived): return parameters -def _analysis_is_write(analysis): - return any( - access.operation in {"insert", "update", "delete"} - for access in analysis.table_accesses - ) +def _analysis_is_write(analysis: SQLAnalysis) -> bool: + return any(operation_is_write(operation) for operation in analysis.operations) def _block_framing(response): @@ -201,34 +203,66 @@ async def _analyze_user_query(datasette, db, sql, *, actor): return is_write, derived, analysis -def _analysis_rows(analysis): - write_actions = { - "insert": "insert-row", - "update": "update-row", - "delete": "delete-row", - } - return [ - { - "operation": access.operation, - "database": access.database, - "table": access.table, - "required_permission": write_actions.get(access.operation, ""), - "source": access.source, - } - for access in analysis.table_accesses - ] +def _semantic_schema_operation_is_present(operations: tuple[Operation, ...]) -> bool: + return any( + operation.operation in {"create", "alter", "drop"} + and operation.target_type in {"table", "index", "view", "trigger"} + for operation in operations + ) -async def _analysis_rows_with_permissions(datasette, analysis, actor): +def _display_operations(analysis: SQLAnalysis) -> list[Operation]: + has_semantic_schema_operation = _semantic_schema_operation_is_present( + analysis.operations + ) + operations = [] + for operation in analysis.operations: + if operation.internal and has_semantic_schema_operation: + continue + if has_semantic_schema_operation and operation.operation in { + "read", + "insert", + "update", + "delete", + "reindex", + }: + continue + operations.append(operation) + return operations + + +def _analysis_rows(analysis: SQLAnalysis) -> list[dict[str, object]]: + rows = [] + for operation in _display_operations(analysis): + permission = permission_for_operation(operation) + required_permission = permission[0] if permission else "" + rows.append( + { + "operation": operation.operation, + "database": operation.database, + "table": operation.table or operation.target, + "required_permission": required_permission, + "source": operation.source, + } + ) + return rows + + +async def _analysis_rows_with_permissions( + datasette, analysis: SQLAnalysis, actor +) -> list[dict[str, object]]: rows = _analysis_rows(analysis) - for row in rows: - permission = row["required_permission"] + for row, operation in zip(rows, _display_operations(analysis)): + permission = permission_for_operation(operation) if permission: + action, resource = permission row["allowed"] = await datasette.allowed( - action=permission, - resource=TableResource(row["database"], row["table"]), + action=action, + resource=resource, actor=actor, ) + elif operation_is_write(operation): + row["allowed"] = False else: row["allowed"] = None return rows @@ -398,15 +432,19 @@ async def _inserted_row_url(datasette, db, analysis, cursor): if lastrowid is None: return None direct_inserts = [ - access - for access in analysis.table_accesses - if access.operation == "insert" - and access.source is None - and access.database == db.name + operation + for operation in analysis.operations + if operation.operation == "insert" + and operation.target_type == "table" + and not operation.internal + and operation.source is None + and operation.database == db.name ] if len(direct_inserts) != 1: return None table = direct_inserts[0].table + if table is None: + return None pks = await db.primary_keys(table) use_rowid = not pks select = ( diff --git a/tests/test_actions_sql.py b/tests/test_actions_sql.py index 863d2529..a1fca971 100644 --- a/tests/test_actions_sql.py +++ b/tests/test_actions_sql.py @@ -12,10 +12,22 @@ import pytest import pytest_asyncio from datasette.app import Datasette from datasette.permissions import PermissionSQL -from datasette.resources import TableResource +from datasette.resources import DatabaseResource, QueryResource, TableResource from datasette import hookimpl +def test_resource_string_representations(): + assert str(DatabaseResource("content")) == "content" + assert repr(DatabaseResource("content")) == ( + "DatabaseResource(parent='content', child=None)" + ) + assert str(TableResource("content", "dogs")) == "content/dogs" + assert repr(TableResource("content", "dogs")) == ( + "TableResource(parent='content', child='dogs')" + ) + assert str(QueryResource("content", "insert-a-dog")) == "content/insert-a-dog" + + # Test plugin that provides permission rules class PermissionRulesPlugin: def __init__(self, rules_callback): diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 5481a398..d6e130b4 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -698,14 +698,17 @@ async def test_analyze_sql(): assert [ ( - access.operation, - access.database, - access.sqlite_schema, - access.table, - access.columns, - access.source, + operation.operation, + operation.database, + operation.sqlite_schema, + operation.table, + operation.columns, + operation.source, ) - for access in analysis.table_accesses + for operation in analysis.operations + if operation.target_type == "table" + and operation.operation in {"read", "insert", "update", "delete"} + and not operation.internal ] == [ ("read", "data", "main", "dogs", ("id", "name"), None), ] @@ -722,14 +725,17 @@ async def test_analyze_sql_insert_select(): assert { ( - access.operation, - access.database, - access.sqlite_schema, - access.table, - access.columns, - access.source, + operation.operation, + operation.database, + operation.sqlite_schema, + operation.table, + operation.columns, + operation.source, ) - for access in analysis.table_accesses + for operation in analysis.operations + if operation.target_type == "table" + and operation.operation in {"read", "insert", "update", "delete"} + and not operation.internal } == { ("insert", "data", "main", "dogs", (), None), ("read", "data", "main", "cats", ("name",), None), diff --git a/tests/test_queries.py b/tests/test_queries.py index 59fab8c0..4b8a6486 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1643,6 +1643,172 @@ async def test_execute_write_post_requires_database_and_table_permissions(): assert (await db.execute("select name from dogs")).first()[0] == "Cleo" +@pytest.mark.asyncio +async def test_execute_write_create_table_uses_create_table_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "permissions": { + "insert-row": {"id": "row-writer"}, + "update-row": {"id": "row-writer"}, + }, + "databases": { + "data": { + "permissions": { + "view-database": {"id": ["creator", "row-writer"]}, + "execute-write-sql": {"id": ["creator", "row-writer"]}, + "create-table": {"id": "creator"}, + } + } + }, + }, + ) + db = ds.add_memory_database("execute_write_create_table", name="data") + await ds.invoke_startup() + + analysis_response = await ds.client.get( + "/data/-/execute-write/analyze", + actor={"id": "creator"}, + params={"sql": "create table foobar (id integer primary key, name text)"}, + ) + allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "creator"}, + json={"sql": "create table foobar (id integer primary key, name text)"}, + ) + row_permission_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "row-writer"}, + json={"sql": "create table should_not_exist (id integer primary key)"}, + ) + + assert analysis_response.status_code == 200 + analysis_data = analysis_response.json() + assert analysis_data["ok"] is True + assert analysis_data["execute_disabled"] is False + assert analysis_data["analysis_rows"] == [ + { + "operation": "create", + "database": "data", + "table": "foobar", + "required_permission": "create-table", + "source": None, + "allowed": True, + } + ] + + assert allowed_response.status_code == 200 + assert allowed_response.json()["ok"] is True + assert allowed_response.json()["message"] == "Query executed" + assert await db.table_exists("foobar") + + assert row_permission_response.status_code == 403 + assert row_permission_response.json()["errors"] == [ + "Permission denied: need create-table on data" + ] + assert not await db.table_exists("should_not_exist") + + +@pytest.mark.asyncio +async def test_execute_write_alter_and_drop_table_use_schema_permissions(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "permissions": { + "delete-row": {"id": "row-writer"}, + "update-row": {"id": "row-writer"}, + }, + "databases": { + "data": { + "permissions": { + "view-database": {"id": ["alterer", "dropper", "row-writer"]}, + "execute-write-sql": { + "id": ["alterer", "dropper", "row-writer"] + }, + }, + "tables": { + "dogs": { + "permissions": { + "alter-table": {"id": "alterer"}, + "drop-table": {"id": "dropper"}, + } + } + }, + } + }, + }, + ) + db = ds.add_memory_database("execute_write_alter_drop_table", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await db.execute_write("create table cats (id integer primary key, name text)") + await ds.invoke_startup() + + alter_allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "alterer"}, + json={"sql": "alter table dogs add column age integer"}, + ) + alter_row_permission_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "row-writer"}, + json={"sql": "alter table cats add column age integer"}, + ) + + assert alter_allowed_response.status_code == 200 + assert "age" in [column.name for column in await db.table_column_details("dogs")] + assert alter_row_permission_response.status_code == 403 + assert alter_row_permission_response.json()["errors"] == [ + "Permission denied: need alter-table on data/cats" + ] + assert "age" not in [ + column.name for column in await db.table_column_details("cats") + ] + + create_index_allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "alterer"}, + json={"sql": "create index idx_dogs_name on dogs(name)"}, + ) + create_index_row_permission_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "row-writer"}, + json={"sql": "create index idx_cats_name on cats(name)"}, + ) + drop_index_allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "alterer"}, + json={"sql": "drop index idx_dogs_name"}, + ) + + assert create_index_allowed_response.status_code == 200 + assert create_index_row_permission_response.status_code == 403 + assert create_index_row_permission_response.json()["errors"] == [ + "Permission denied: need alter-table on data/cats" + ] + assert drop_index_allowed_response.status_code == 200 + + drop_allowed_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "dropper"}, + json={"sql": "drop table dogs"}, + ) + drop_row_permission_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "row-writer"}, + json={"sql": "drop table cats"}, + ) + + assert drop_allowed_response.status_code == 200 + assert not await db.table_exists("dogs") + assert drop_row_permission_response.status_code == 403 + assert drop_row_permission_response.json()["errors"] == [ + "Permission denied: need drop-table on data/cats" + ] + assert await db.table_exists("cats") + + @pytest.mark.asyncio async def test_execute_write_insert_links_to_inserted_row(): ds = Datasette(memory=True, default_deny=True) diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index 5730cd0d..5306a515 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -26,17 +26,20 @@ def conn(): conn.close() -def as_tuples(analysis): +def table_operation_tuples(analysis): return [ ( - access.operation, - access.database, - access.sqlite_schema, - access.table, - access.columns, - access.source, + operation.operation, + operation.database, + operation.sqlite_schema, + operation.table, + operation.columns, + operation.source, ) - for access in analysis.table_accesses + for operation in analysis.operations + if operation.target_type == "table" + and operation.operation in {"read", "insert", "update", "delete"} + and not operation.internal ] @@ -48,7 +51,7 @@ def test_analyze_select_tables(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("read", "data", "main", "cats", ("id", "name"), None), ("read", "data", "main", "dogs", ("age", "id", "name"), None), } @@ -57,11 +60,73 @@ def test_analyze_select_tables(conn): def test_analyze_uses_sqlite_schema_as_default_database(conn): analysis = analyze_sql_tables(conn, "select name from dogs") - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("read", "main", "main", "dogs", ("name",), None), } +def operation_dict(operation): + return { + "operation": operation.operation, + "target_type": operation.target_type, + "database": operation.database, + "sqlite_schema": operation.sqlite_schema, + "table": operation.table, + "target": operation.target, + "columns": operation.columns, + "source": operation.source, + "internal": operation.internal, + } + + +def test_analyze_create_table_operation(): + conn = sqlite3.connect(":memory:") + try: + analysis = analyze_sql_tables( + conn, + "create table foobar (id integer primary key, name text)", + database_name="data", + ) + finally: + conn.close() + + assert { + "operation": "create", + "target_type": "table", + "database": "data", + "sqlite_schema": "main", + "table": "foobar", + "target": "foobar", + "columns": (), + "source": None, + "internal": False, + } in [operation_dict(operation) for operation in analysis.operations] + assert not [ + operation + for operation in analysis.operations + if operation.table in {"sqlite_master", "sqlite_schema"} + and not operation.internal + ] + + +def test_analyze_transaction_operation(conn): + analysis = analyze_sql_tables(conn, "commit", database_name="data") + + assert [operation_dict(operation) for operation in analysis.operations] == [ + { + "operation": "commit", + "target_type": "transaction", + "database": None, + "sqlite_schema": None, + "table": None, + "target": "COMMIT", + "columns": (), + "source": None, + "internal": False, + } + ] + + def test_analyze_insert_tables(conn): analysis = analyze_sql_tables( conn, @@ -70,7 +135,7 @@ def test_analyze_insert_tables(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("insert", "data", "main", "dogs", (), None), ("read", "data", "main", "dogs", ("id", "name"), "dogs_after_insert"), ("update", "data", "main", "cats", ("name",), "dogs_after_insert"), @@ -87,7 +152,7 @@ def test_analyze_update_tables(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("update", "data", "main", "dogs", ("age",), None), ("read", "data", "main", "dogs", ("age", "name"), None), } @@ -101,7 +166,7 @@ def test_analyze_delete_tables(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("delete", "data", "main", "dogs", (), None), ("read", "data", "main", "dogs", ("name",), None), } @@ -121,7 +186,7 @@ def test_analyze_insert_select_with_cte(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("insert", "data", "main", "cats", (), None), ("read", "data", "main", "dogs", ("age", "name"), "old_dogs"), } @@ -135,7 +200,7 @@ def test_analyze_view_with_instead_of_trigger(conn): database_name="data", ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("update", "data", "main", "dog_names", ("name",), None), ("read", "data", "main", "dogs", ("id", "name"), "dog_names"), ("read", "data", "main", "dog_names", ("id", "name"), "dog_names"), @@ -163,7 +228,7 @@ def test_analyze_attached_database_tables(conn): schema_to_database={"extra": "extra_db"}, ) - assert set(as_tuples(analysis)) == { + assert set(table_operation_tuples(analysis)) == { ("insert", "extra_db", "extra", "people", (), None), ("read", "data", "main", "dogs", ("name",), None), } From 86d0e7335f98a88874df31ec0adb64967446dfac Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 14:52:52 -0700 Subject: [PATCH 1195/1218] Deny unsupported write SQL operations by default Require view-table permission for reads discovered inside write SQL analysis, including INSERT ... SELECT and CREATE TABLE ... AS SELECT. Record additional SQLite authorizer callbacks as Operation values so unsupported functions, savepoints, virtual table DDL, and unknown callbacks are denied unless explicitly handled. --- datasette/stored_queries.py | 43 +++---- datasette/utils/sql_analysis.py | 192 +++++++++++++++++++++++++++++-- datasette/views/execute_write.py | 4 +- datasette/views/query_helpers.py | 32 ++---- tests/test_queries.py | 136 ++++++++++++++++++++-- tests/test_utils_sql_analysis.py | 94 +++++++++++++++ 6 files changed, 433 insertions(+), 68 deletions(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index c4b083e5..4b0fe6a6 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -592,6 +592,16 @@ PermissionRequirement = tuple[str, Resource] def permission_for_operation(operation: Operation) -> PermissionRequirement | None: + if ( + operation.operation == "read" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return ( + "view-table", + TableResource(database=operation.database, table=operation.table), + ) write_actions = { "insert": "insert-row", "update": "update-row", @@ -648,6 +658,10 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No return None +def operation_should_be_ignored(operation: Operation) -> bool: + return operation.internal or operation.operation == "select" + + def operation_is_write(operation: Operation) -> bool: return operation.operation in { "insert", @@ -659,11 +673,13 @@ def operation_is_write(operation: Operation) -> bool: "begin", "commit", "rollback", + "savepoint", "attach", "detach", "pragma", "analyze", "reindex", + "unknown", } @@ -685,34 +701,19 @@ async def ensure_query_write_permissions( except sqlite3.DatabaseError as ex: raise Forbidden(f"Could not analyze query: {ex}") from ex - has_semantic_schema_operation = any( - operation.operation in {"create", "alter", "drop"} - and operation.target_type in {"table", "index", "view", "trigger"} - for operation in analysis.operations - ) for operation in analysis.operations: - if operation.internal and has_semantic_schema_operation: - continue - if has_semantic_schema_operation and operation.operation in { - "read", - "insert", - "update", - "delete", - "reindex", - }: + if operation_should_be_ignored(operation): continue permission = permission_for_operation(operation) if permission is None: - if operation_is_write(operation): - raise Forbidden( - "Unsupported SQL operation: {} {}".format( - operation.operation, operation.target_type - ) + raise Forbidden( + "Unsupported SQL operation: {} {}".format( + operation.operation, operation.target_type ) - continue + ) action, resource = permission if operation.database != database: - raise Forbidden("Writable queries may not write to attached databases") + raise Forbidden("Writable queries may not access attached databases") if not await datasette.allowed( action=action, resource=resource, diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index 54f310fe..8963da77 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -8,30 +8,39 @@ SQLOperation = Literal[ "insert", "update", "delete", + "select", + "function", "create", "alter", "drop", "begin", "commit", "rollback", + "savepoint", "attach", "detach", "pragma", "analyze", "reindex", + "unknown", ] SQLTargetType = Literal[ "table", "index", "view", "trigger", + "virtual-table", "schema", + "statement", "transaction", "database", "pragma", + "function", "unknown", ] SQLTableOperation = Literal["read", "insert", "update", "delete"] +SQLSchemaOperation = Literal["create", "drop"] +SQLSchemaTargetType = Literal["index", "table", "trigger", "view", "virtual-table"] @dataclass(frozen=True) @@ -73,19 +82,34 @@ _ACTION_TO_OPERATION: dict[int, SQLTableOperation] = { } # Values are (operation, target_type) pairs used to construct Operation objects. -_CREATE_ACTIONS = { +_CREATE_ACTIONS: dict[int, tuple[SQLSchemaOperation, SQLSchemaTargetType]] = { sqlite3.SQLITE_CREATE_INDEX: ("create", "index"), sqlite3.SQLITE_CREATE_TABLE: ("create", "table"), sqlite3.SQLITE_CREATE_TRIGGER: ("create", "trigger"), sqlite3.SQLITE_CREATE_VIEW: ("create", "view"), } -_DROP_ACTIONS = { +_DROP_ACTIONS: dict[int, tuple[SQLSchemaOperation, SQLSchemaTargetType]] = { sqlite3.SQLITE_DROP_INDEX: ("drop", "index"), sqlite3.SQLITE_DROP_TABLE: ("drop", "table"), sqlite3.SQLITE_DROP_TRIGGER: ("drop", "trigger"), sqlite3.SQLITE_DROP_VIEW: ("drop", "view"), } -for action_name, operation, target_type in ( + + +def _add_schema_action( + action_name: str, + operation: SQLSchemaOperation, + target_type: SQLSchemaTargetType, +) -> None: + action_value = getattr(sqlite3, action_name, None) + if action_value is not None: + actions = _CREATE_ACTIONS if operation == "create" else _DROP_ACTIONS + actions[action_value] = (operation, target_type) + + +_TEMP_SCHEMA_ACTIONS: tuple[ + tuple[str, SQLSchemaOperation, SQLSchemaTargetType], ... +] = ( ("SQLITE_CREATE_TEMP_INDEX", "create", "index"), ("SQLITE_CREATE_TEMP_TABLE", "create", "table"), ("SQLITE_CREATE_TEMP_TRIGGER", "create", "trigger"), @@ -94,13 +118,76 @@ for action_name, operation, target_type in ( ("SQLITE_DROP_TEMP_TABLE", "drop", "table"), ("SQLITE_DROP_TEMP_TRIGGER", "drop", "trigger"), ("SQLITE_DROP_TEMP_VIEW", "drop", "view"), -): - action_value = getattr(sqlite3, action_name, None) - if action_value is not None: - actions = _CREATE_ACTIONS if operation == "create" else _DROP_ACTIONS - actions[action_value] = (operation, target_type) +) +for schema_action in _TEMP_SCHEMA_ACTIONS: + _add_schema_action(*schema_action) -_SQLITE_SCHEMA_TABLES = {"sqlite_master", "sqlite_schema"} +_VTABLE_SCHEMA_ACTIONS: tuple[ + tuple[str, SQLSchemaOperation, SQLSchemaTargetType], ... +] = ( + ("SQLITE_CREATE_VTABLE", "create", "virtual-table"), + ("SQLITE_DROP_VTABLE", "drop", "virtual-table"), +) +for schema_action in _VTABLE_SCHEMA_ACTIONS: + _add_schema_action(*schema_action) + +_SQLITE_SCHEMA_TABLES = { + "sqlite_master", + "sqlite_schema", + "sqlite_temp_master", + "sqlite_temp_schema", +} +_SQLITE_INTERNAL_SCHEMA_FUNCTIONS = { + "length", + "like", + "printf", + "sqlite_drop_column", + "sqlite_rename_column", + "sqlite_rename_quotefix", + "sqlite_rename_table", + "sqlite_rename_test", + "substr", +} + +_AUTHORIZER_ACTION_NAMES = { + getattr(sqlite3, name): name + for name in ( + "SQLITE_CREATE_INDEX", + "SQLITE_CREATE_TABLE", + "SQLITE_CREATE_TEMP_INDEX", + "SQLITE_CREATE_TEMP_TABLE", + "SQLITE_CREATE_TEMP_TRIGGER", + "SQLITE_CREATE_TEMP_VIEW", + "SQLITE_CREATE_TRIGGER", + "SQLITE_CREATE_VIEW", + "SQLITE_DELETE", + "SQLITE_DROP_INDEX", + "SQLITE_DROP_TABLE", + "SQLITE_DROP_TEMP_INDEX", + "SQLITE_DROP_TEMP_TABLE", + "SQLITE_DROP_TEMP_TRIGGER", + "SQLITE_DROP_TEMP_VIEW", + "SQLITE_DROP_TRIGGER", + "SQLITE_DROP_VIEW", + "SQLITE_INSERT", + "SQLITE_PRAGMA", + "SQLITE_READ", + "SQLITE_SELECT", + "SQLITE_TRANSACTION", + "SQLITE_UPDATE", + "SQLITE_ATTACH", + "SQLITE_DETACH", + "SQLITE_ALTER_TABLE", + "SQLITE_REINDEX", + "SQLITE_ANALYZE", + "SQLITE_CREATE_VTABLE", + "SQLITE_DROP_VTABLE", + "SQLITE_FUNCTION", + "SQLITE_SAVEPOINT", + "SQLITE_RECURSIVE", + ) + if hasattr(sqlite3, name) +} def analyze_sql_tables( @@ -287,6 +374,52 @@ def analyze_sql_tables( ) return sqlite3.SQLITE_OK + if action == sqlite3.SQLITE_SELECT: + record( + "select", + "statement", + database=None, + table=None, + sqlite_schema=sqlite_schema, + target=None, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_FUNCTION and arg2 is not None: + record( + "function", + "function", + database=None, + table=None, + sqlite_schema=sqlite_schema, + target=arg2, + source=source, + ) + return sqlite3.SQLITE_OK + + if action == sqlite3.SQLITE_SAVEPOINT and arg1 is not None: + record( + "savepoint", + "transaction", + database=None, + table=None, + sqlite_schema=sqlite_schema, + target="{} {}".format(arg1, arg2) if arg2 is not None else arg1, + source=source, + ) + return sqlite3.SQLITE_OK + + action_name = _AUTHORIZER_ACTION_NAMES.get(action, "SQLITE_{}".format(action)) + record( + "unknown", + "unknown", + database=database_for_schema(sqlite_schema), + table=None, + sqlite_schema=sqlite_schema, + target=action_name, + source=source, + ) return sqlite3.SQLITE_OK conn.set_authorizer(authorizer) @@ -296,10 +429,46 @@ def analyze_sql_tables( conn.set_authorizer(None) has_schema_operation = any( - key.target_type in {"table", "index", "view", "trigger"} + key.target_type in {"table", "index", "view", "trigger", "virtual-table"} and key.operation in {"create", "alter", "drop"} for key in operations ) + dropped_tables = { + (key.database, key.table) + for key in operations + if key.operation == "drop" and key.target_type == "table" + } + + def key_is_drop_table_delete(key: OperationKey) -> bool: + return ( + key.operation == "delete" + and key.target_type == "table" + and (key.database, key.table) in dropped_tables + ) + + has_user_table_access_in_schema_operation = any( + key.operation in {"read", "insert", "update", "delete"} + and key.target_type == "table" + and not key.internal + and not key_is_drop_table_delete(key) + for key in operations + ) + + def operation_is_internal(key: OperationKey) -> bool: + if key.internal or (has_schema_operation and key.target_type == "schema"): + return True + if has_schema_operation and key.operation == "reindex": + return True + if ( + has_schema_operation + and not has_user_table_access_in_schema_operation + and key.operation == "function" + and key.target in _SQLITE_INTERNAL_SCHEMA_FUNCTIONS + ): + return True + if key_is_drop_table_delete(key): + return True + return False return SQLAnalysis( operations=tuple( @@ -312,8 +481,7 @@ def analyze_sql_tables( target=key.target, columns=tuple(sorted(columns)), source=key.source, - internal=key.internal - or (has_schema_operation and key.target_type == "schema"), + internal=operation_is_internal(key), ) for key, columns in operations.items() ) diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index cead8926..19006ac5 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -99,9 +99,7 @@ class ExecuteWriteView(BaseView): "parameter_names": parameter_names, "parameter_values": parameter_values, "analysis_error": analysis_error, - "analysis_rows": [ - row for row in analysis_rows if row["operation"] != "read" - ], + "analysis_rows": analysis_rows, "execution_message": execution_message, "execution_links": execution_links, "execution_ok": execution_ok, diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 922f4e52..05a0d73e 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -5,6 +5,7 @@ from datasette.resources import DatabaseResource from datasette.stored_queries import ( StoredQuery, operation_is_write, + operation_should_be_ignored, permission_for_operation, ) from datasette.utils import ( @@ -203,29 +204,10 @@ async def _analyze_user_query(datasette, db, sql, *, actor): return is_write, derived, analysis -def _semantic_schema_operation_is_present(operations: tuple[Operation, ...]) -> bool: - return any( - operation.operation in {"create", "alter", "drop"} - and operation.target_type in {"table", "index", "view", "trigger"} - for operation in operations - ) - - def _display_operations(analysis: SQLAnalysis) -> list[Operation]: - has_semantic_schema_operation = _semantic_schema_operation_is_present( - analysis.operations - ) operations = [] for operation in analysis.operations: - if operation.internal and has_semantic_schema_operation: - continue - if has_semantic_schema_operation and operation.operation in { - "read", - "insert", - "update", - "delete", - "reindex", - }: + if operation_should_be_ignored(operation): continue operations.append(operation) return operations @@ -252,6 +234,7 @@ async def _analysis_rows_with_permissions( datasette, analysis: SQLAnalysis, actor ) -> list[dict[str, object]]: rows = _analysis_rows(analysis) + is_write = _analysis_is_write(analysis) for row, operation in zip(rows, _display_operations(analysis)): permission = permission_for_operation(operation) if permission: @@ -261,7 +244,7 @@ async def _analysis_rows_with_permissions( resource=resource, actor=actor, ) - elif operation_is_write(operation): + elif is_write: row["allowed"] = False else: row["allowed"] = None @@ -360,7 +343,7 @@ async def _execute_write_analysis_data(datasette, db, sql, actor): "ok": analysis_error is None, "parameters": parameter_names, "analysis_error": analysis_error, - "analysis_rows": [row for row in analysis_rows if row["operation"] != "read"], + "analysis_rows": analysis_rows, "execute_disabled": bool( (not sql) or analysis_error @@ -374,6 +357,7 @@ async def _query_create_analysis_data(datasette, db, sql, actor): parameter_names = [] analysis_rows = [] analysis_error = None + analysis: SQLAnalysis | None = None if has_sql: try: parameter_names = _derived_query_parameters(sql) @@ -390,9 +374,7 @@ async def _query_create_analysis_data(datasette, db, sql, actor): "analysis_error": analysis_error, "analysis_rows": analysis_rows, "has_sql": has_sql, - "analysis_is_write": bool( - analysis_rows and any(row["required_permission"] for row in analysis_rows) - ), + "analysis_is_write": _analysis_is_write(analysis) if analysis else False, "save_disabled": bool( (not has_sql) or analysis_error diff --git a/tests/test_queries.py b/tests/test_queries.py index 4b8a6486..97ec973f 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1181,11 +1181,10 @@ async def test_create_query_ui_and_arbitrary_sql_save_link(): assert 'Required permission' in create_response.text assert 'Source' not in create_response.text assert "read" in create_response.text + assert "view-table" in create_response.text assert ( - create_response.text.count( - 'n/a' - ) - == 2 + 'n/a' + not in create_response.text ) assert create_response.text.index( 'value="Save query"' @@ -1255,9 +1254,9 @@ async def test_create_query_analyze_endpoint_uses_sql_only(): "operation": "read", "database": "data", "table": "dogs", - "required_permission": "", + "required_permission": "view-table", "source": None, - "allowed": None, + "allowed": True, } ] @@ -1375,7 +1374,8 @@ async def test_execute_write_get_prepopulates_without_executing(): assert 'Required permission' in response.text assert "insert" in response.text assert "update" in response.text - assert "read" not in response.text + assert "read" in response.text + assert "view-table" in response.text assert 'action="/data/-/execute-write"' in response.text assert "insert into dogs (name) values ('Cleo')" in response.text assert (await db.execute("select count(*) from dogs")).first()[0] == 0 @@ -1643,6 +1643,127 @@ async def test_execute_write_post_requires_database_and_table_permissions(): assert (await db.execute("select name from dogs")).first()[0] == "Cleo" +@pytest.mark.asyncio +async def test_execute_write_insert_select_requires_view_table_on_source(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "secret": { + "permissions": {"view-table": {"id": "someone-else"}} + }, + "public_log": {"permissions": {"insert-row": {"id": "writer"}}}, + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_insert_select_source", name="data") + await db.execute_write("create table secret (value text)") + await db.execute_write("create table public_log (value text)") + await db.execute_write("insert into secret values ('sensitive')") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "insert into public_log(value) select value from secret"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need view-table on data/secret" + ] + assert (await db.execute("select value from public_log")).dicts() == [] + + +@pytest.mark.asyncio +async def test_execute_write_create_table_as_select_requires_view_table_on_source(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "creator"}, + "execute-write-sql": {"id": "creator"}, + "create-table": {"id": "creator"}, + }, + "tables": { + "secret": { + "permissions": {"view-table": {"id": "someone-else"}} + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_create_as_select_source", name="data") + await db.execute_write("create table secret (value text)") + await db.execute_write("insert into secret values ('sensitive')") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "creator"}, + json={"sql": "create table copied_secret as select value from secret"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need view-table on data/secret" + ] + assert not await db.table_exists("copied_secret") + + +@pytest.mark.asyncio +async def test_execute_write_rejects_function_operations(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "dogs": { + "permissions": { + "insert-row": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_function_operation", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "insert into dogs (name) values (upper('cleo'))"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Unsupported SQL operation: function function" + ] + assert (await db.execute("select name from dogs")).dicts() == [] + + @pytest.mark.asyncio async def test_execute_write_create_table_uses_create_table_permission(): ds = Datasette( @@ -1733,6 +1854,7 @@ async def test_execute_write_alter_and_drop_table_use_schema_permissions(): "permissions": { "alter-table": {"id": "alterer"}, "drop-table": {"id": "dropper"}, + "view-table": {"id": "alterer"}, } } }, diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index 5306a515..2ae11502 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -127,6 +127,100 @@ def test_analyze_transaction_operation(conn): ] +def test_analyze_savepoint_operation(conn): + analysis = analyze_sql_tables(conn, "savepoint s", database_name="data") + + assert [operation_dict(operation) for operation in analysis.operations] == [ + { + "operation": "savepoint", + "target_type": "transaction", + "database": None, + "sqlite_schema": None, + "table": None, + "target": "BEGIN s", + "columns": (), + "source": None, + "internal": False, + } + ] + + +def test_analyze_function_operation(conn): + analysis = analyze_sql_tables( + conn, + "insert into dogs (name) values (upper(:name))", + {"name": "Cleo"}, + database_name="data", + ) + + assert { + ( + operation.operation, + operation.target_type, + operation.target, + operation.database, + operation.table, + ) + for operation in analysis.operations + } == { + ("insert", "table", "dogs", "data", "dogs"), + ("function", "function", "upper", None, None), + ("read", "table", "dogs", "data", "dogs"), + ("update", "table", "cats", "data", "cats"), + ("read", "table", "cats", "data", "cats"), + ("insert", "table", "log", "data", "log"), + } + + +def test_analyze_create_virtual_table_operation(): + conn = sqlite3.connect(":memory:") + try: + analysis = analyze_sql_tables( + conn, + "create virtual table docs using fts5(body)", + database_name="data", + ) + finally: + conn.close() + + assert { + "operation": "create", + "target_type": "virtual-table", + "database": "data", + "sqlite_schema": "main", + "table": "docs", + "target": "docs", + "columns": (), + "source": None, + "internal": False, + } in [operation_dict(operation) for operation in analysis.operations] + + +def test_analyze_create_table_as_select_function_is_not_internal(): + conn = sqlite3.connect(":memory:") + try: + conn.execute("create table secret(value text)") + analysis = analyze_sql_tables( + conn, + "create table copied as select substr(value, 1, 1) from secret", + database_name="data", + ) + finally: + conn.close() + + assert { + "operation": "function", + "target_type": "function", + "database": None, + "sqlite_schema": None, + "table": None, + "target": "substr", + "columns": (), + "source": None, + "internal": False, + } in [operation_dict(operation) for operation in analysis.operations] + + def test_analyze_insert_tables(conn): analysis = analyze_sql_tables( conn, From 03b2c66f6312b8317d87eb4c1326977f6f63b26d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 15:17:10 -0700 Subject: [PATCH 1196/1218] Require full row mutation permissions for raw SQL Raw SQL insert and update statements can have broader effects than their SQLite authorizer callbacks reveal. INSERT OR REPLACE and UPDATE OR REPLACE can delete conflicting rows while only surfacing insert or update operations. Expand table insert and update operations to require insert-row, update-row, and delete-row together. Keep delete operations mapped to delete-row, and update the analysis UI/API to report and evaluate multiple required permissions for a single operation. Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4559083539 --- datasette/stored_queries.py | 108 ++++++++++++----- datasette/views/query_helpers.py | 27 +++-- tests/test_queries.py | 200 ++++++++++++++++++++++++++++++- 3 files changed, 290 insertions(+), 45 deletions(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index 4b0fe6a6..cf44a9ff 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -588,10 +588,25 @@ async def list_queries( ) -PermissionRequirement = tuple[str, Resource] +@dataclass(frozen=True) +class PermissionRequirement: + action: str + resource: Resource -def permission_for_operation(operation: Operation) -> PermissionRequirement | None: +def row_mutation_requirements( + database: str, table: str +) -> tuple[PermissionRequirement, ...]: + resource = TableResource(database=database, table=table) + return tuple( + PermissionRequirement(action=action, resource=resource) + for action in ("insert-row", "update-row", "delete-row") + ) + + +def permission_requirements_for_operation( + operation: Operation, +) -> tuple[PermissionRequirement, ...]: if ( operation.operation == "read" and operation.target_type == "table" @@ -599,31 +614,45 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No and operation.table is not None ): return ( - "view-table", - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="view-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) - write_actions = { - "insert": "insert-row", - "update": "update-row", - "delete": "delete-row", - } - action = write_actions.get(operation.operation) if ( - action + operation.operation in {"insert", "update"} + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return row_mutation_requirements( + database=operation.database, + table=operation.table, + ) + if ( + operation.operation == "delete" and operation.target_type == "table" and operation.database is not None and operation.table is not None ): return ( - action, - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="delete-row", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) if operation.operation == "create" and operation.target_type == "table": if operation.database is None: - return None + return () return ( - "create-table", - DatabaseResource(database=operation.database), + PermissionRequirement( + action="create-table", + resource=DatabaseResource(database=operation.database), + ), ) if ( operation.operation == "alter" @@ -632,8 +661,12 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No and operation.table is not None ): return ( - "alter-table", - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="alter-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) if ( operation.operation == "drop" @@ -642,8 +675,12 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No and operation.table is not None ): return ( - "drop-table", - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="drop-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) if ( operation.operation in {"create", "drop"} @@ -652,10 +689,14 @@ def permission_for_operation(operation: Operation) -> PermissionRequirement | No and operation.table is not None ): return ( - "alter-table", - TableResource(database=operation.database, table=operation.table), + PermissionRequirement( + action="alter-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), ) - return None + return () def operation_should_be_ignored(operation: Operation) -> bool: @@ -704,20 +745,23 @@ async def ensure_query_write_permissions( for operation in analysis.operations: if operation_should_be_ignored(operation): continue - permission = permission_for_operation(operation) - if permission is None: + permissions = permission_requirements_for_operation(operation) + if not permissions: raise Forbidden( "Unsupported SQL operation: {} {}".format( operation.operation, operation.target_type ) ) - action, resource = permission if operation.database != database: raise Forbidden("Writable queries may not access attached databases") - if not await datasette.allowed( - action=action, - resource=resource, - actor=actor, - ): - raise Forbidden(f"Permission denied: need {action} on {resource}") + for permission in permissions: + if not await datasette.allowed( + action=permission.action, + resource=permission.resource, + actor=actor, + ): + raise Forbidden( + f"Permission denied: need {permission.action} " + f"on {permission.resource}" + ) return analysis diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 05a0d73e..7f3ef1bc 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -6,7 +6,7 @@ from datasette.stored_queries import ( StoredQuery, operation_is_write, operation_should_be_ignored, - permission_for_operation, + permission_requirements_for_operation, ) from datasette.utils import ( named_parameters as derive_named_parameters, @@ -216,8 +216,10 @@ def _display_operations(analysis: SQLAnalysis) -> list[Operation]: def _analysis_rows(analysis: SQLAnalysis) -> list[dict[str, object]]: rows = [] for operation in _display_operations(analysis): - permission = permission_for_operation(operation) - required_permission = permission[0] if permission else "" + permissions = permission_requirements_for_operation(operation) + required_permission = ", ".join( + permission.action for permission in permissions + ) rows.append( { "operation": operation.operation, @@ -236,14 +238,17 @@ async def _analysis_rows_with_permissions( rows = _analysis_rows(analysis) is_write = _analysis_is_write(analysis) for row, operation in zip(rows, _display_operations(analysis)): - permission = permission_for_operation(operation) - if permission: - action, resource = permission - row["allowed"] = await datasette.allowed( - action=action, - resource=resource, - actor=actor, - ) + permissions = permission_requirements_for_operation(operation) + if permissions: + row["allowed"] = True + for permission in permissions: + if not await datasette.allowed( + action=permission.action, + resource=permission.resource, + actor=actor, + ): + row["allowed"] = False + break elif is_write: row["allowed"] = False else: diff --git a/tests/test_queries.py b/tests/test_queries.py index 97ec973f..fcd19d1c 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -508,6 +508,8 @@ async def test_analyze_write_query_requires_table_permissions(): "dogs": { "permissions": { "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, } } } @@ -1429,7 +1431,7 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): "operation": "insert", "database": "data", "table": "dogs", - "required_permission": "insert-row", + "required_permission": "insert-row, update-row, delete-row", "source": None, "allowed": True, } @@ -1627,6 +1629,40 @@ async def test_execute_write_post_requires_database_and_table_permissions(): } } } + missing_update_permission = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": "insert into dogs (name) values (:name)", + "params": {"name": "Cleo"}, + }, + ) + + assert missing_update_permission.status_code == 403 + assert missing_update_permission.json()["errors"] == [ + "Permission denied: need update-row on data/dogs" + ] + + ds.config["databases"]["data"]["tables"]["dogs"]["permissions"][ + "update-row" + ] = {"id": "writer"} + missing_delete_permission = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": "insert into dogs (name) values (:name)", + "params": {"name": "Cleo"}, + }, + ) + + assert missing_delete_permission.status_code == 403 + assert missing_delete_permission.json()["errors"] == [ + "Permission denied: need delete-row on data/dogs" + ] + + ds.config["databases"]["data"]["tables"]["dogs"]["permissions"][ + "delete-row" + ] = {"id": "writer"} allowed = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, @@ -1643,6 +1679,156 @@ async def test_execute_write_post_requires_database_and_table_permissions(): assert (await db.execute("select name from dogs")).first()[0] == "Cleo" +@pytest.mark.asyncio +async def test_execute_write_insert_or_replace_requires_delete_row_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "users": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "view-table": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_insert_or_replace", name="data") + await db.execute_write( + "create table users (id integer primary key, email text unique)" + ) + await db.execute_write( + "insert into users (id, email) values " + "(1, 'a@example.com'), (2, 'b@example.com')" + ) + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": ( + "insert or replace into users(id, email) " + "values (3, 'b@example.com')" + ) + }, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need delete-row on data/users" + ] + assert (await db.execute("select id, email from users order by id")).dicts() == [ + {"id": 1, "email": "a@example.com"}, + {"id": 2, "email": "b@example.com"}, + ] + + +@pytest.mark.asyncio +async def test_execute_write_update_or_replace_requires_delete_row_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "users": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "view-table": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_update_or_replace", name="data") + await db.execute_write( + "create table users (id integer primary key, email text unique)" + ) + await db.execute_write( + "insert into users (id, email) values " + "(1, 'a@example.com'), (2, 'b@example.com')" + ) + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "update or replace users set email = 'b@example.com' where id = 1"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need delete-row on data/users" + ] + assert (await db.execute("select id, email from users order by id")).dicts() == [ + {"id": 1, "email": "a@example.com"}, + {"id": 2, "email": "b@example.com"}, + ] + + +@pytest.mark.asyncio +async def test_execute_write_update_requires_insert_row_permission(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "users": { + "permissions": { + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, + "view-table": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_update_requires_insert", name="data") + await db.execute_write("create table users (id integer primary key, name text)") + await db.execute_write("insert into users (id, name) values (1, 'Alice')") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "update users set name = 'Alicia' where id = 1"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Permission denied: need insert-row on data/users" + ] + assert (await db.execute("select name from users where id = 1")).first()[0] == "Alice" + + @pytest.mark.asyncio async def test_execute_write_insert_select_requires_view_table_on_source(): ds = Datasette( @@ -1659,7 +1845,13 @@ async def test_execute_write_insert_select_requires_view_table_on_source(): "secret": { "permissions": {"view-table": {"id": "someone-else"}} }, - "public_log": {"permissions": {"insert-row": {"id": "writer"}}}, + "public_log": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, + } + }, }, } } @@ -1740,6 +1932,8 @@ async def test_execute_write_rejects_function_operations(): "dogs": { "permissions": { "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, } } }, @@ -2117,6 +2311,8 @@ async def test_user_writable_query_execution_rechecks_table_permissions(): "dogs": { "permissions": { "insert-row": {"id": "alice"}, + "update-row": {"id": "alice"}, + "delete-row": {"id": "alice"}, } } }, From 1932f8429fd3259d48fb848fdf893f9a004276e9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 16:14:50 -0700 Subject: [PATCH 1197/1218] Deny user-authored schema table reads in write SQL Stop marking sqlite_master and sqlite_schema reads as internal as soon as the SQLite authorizer reports them. The later DDL-aware pass still treats schema catalog access as internal when it accompanies semantic CREATE, ALTER, or DROP operations. This makes explicit catalog reads in write SQL fall through to the deny-by-default path as unsupported read schema operations, preventing queries from copying private table definitions into writable tables. Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4559073803 --- datasette/utils/sql_analysis.py | 1 - datasette/views/query_helpers.py | 4 +- tests/test_queries.py | 73 +++++++++++++++++++++++++++----- tests/test_utils_sql_analysis.py | 20 +++++++++ 4 files changed, 84 insertions(+), 14 deletions(-) diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index 8963da77..91216501 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -256,7 +256,6 @@ def analyze_sql_tables( target=arg1, source=source, column=column, - internal=target_type == "schema", ) return sqlite3.SQLITE_OK diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 7f3ef1bc..0e3d4e01 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -217,9 +217,7 @@ def _analysis_rows(analysis: SQLAnalysis) -> list[dict[str, object]]: rows = [] for operation in _display_operations(analysis): permissions = permission_requirements_for_operation(operation) - required_permission = ", ".join( - permission.action for permission in permissions - ) + required_permission = ", ".join(permission.action for permission in permissions) rows.append( { "operation": operation.operation, diff --git a/tests/test_queries.py b/tests/test_queries.py index fcd19d1c..40bc5052 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1643,9 +1643,9 @@ async def test_execute_write_post_requires_database_and_table_permissions(): "Permission denied: need update-row on data/dogs" ] - ds.config["databases"]["data"]["tables"]["dogs"]["permissions"][ - "update-row" - ] = {"id": "writer"} + ds.config["databases"]["data"]["tables"]["dogs"]["permissions"]["update-row"] = { + "id": "writer" + } missing_delete_permission = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, @@ -1660,9 +1660,9 @@ async def test_execute_write_post_requires_database_and_table_permissions(): "Permission denied: need delete-row on data/dogs" ] - ds.config["databases"]["data"]["tables"]["dogs"]["permissions"][ - "delete-row" - ] = {"id": "writer"} + ds.config["databases"]["data"]["tables"]["dogs"]["permissions"]["delete-row"] = { + "id": "writer" + } allowed = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, @@ -1719,8 +1719,7 @@ async def test_execute_write_insert_or_replace_requires_delete_row_permission(): actor={"id": "writer"}, json={ "sql": ( - "insert or replace into users(id, email) " - "values (3, 'b@example.com')" + "insert or replace into users(id, email) " "values (3, 'b@example.com')" ) }, ) @@ -1773,7 +1772,9 @@ async def test_execute_write_update_or_replace_requires_delete_row_permission(): denied_response = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, - json={"sql": "update or replace users set email = 'b@example.com' where id = 1"}, + json={ + "sql": "update or replace users set email = 'b@example.com' where id = 1" + }, ) assert denied_response.status_code == 403 @@ -1826,7 +1827,9 @@ async def test_execute_write_update_requires_insert_row_permission(): assert denied_response.json()["errors"] == [ "Permission denied: need insert-row on data/users" ] - assert (await db.execute("select name from users where id = 1")).first()[0] == "Alice" + assert (await db.execute("select name from users where id = 1")).first()[ + 0 + ] == "Alice" @pytest.mark.asyncio @@ -1876,6 +1879,56 @@ async def test_execute_write_insert_select_requires_view_table_on_source(): assert (await db.execute("select value from public_log")).dicts() == [] +@pytest.mark.asyncio +async def test_execute_write_rejects_sqlite_master_reads(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "secret": { + "permissions": {"view-table": {"id": "someone-else"}} + }, + "log": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, + } + }, + }, + } + } + }, + ) + db = ds.add_memory_database("execute_write_sqlite_master_read", name="data") + await db.execute_write("create table secret (value text)") + await db.execute_write("create table log (value text)") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={ + "sql": ( + "insert into log " "select sql from sqlite_master where name = 'secret'" + ) + }, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Unsupported SQL operation: read schema" + ] + assert (await db.execute("select value from log")).dicts() == [] + + @pytest.mark.asyncio async def test_execute_write_create_table_as_select_requires_view_table_on_source(): ds = Datasette( diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index 2ae11502..f931be51 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -65,6 +65,26 @@ def test_analyze_uses_sqlite_schema_as_default_database(conn): } +def test_analyze_user_schema_table_read_is_not_internal(conn): + analysis = analyze_sql_tables( + conn, + "insert into log select sql from sqlite_master where name = 'dogs'", + database_name="data", + ) + + assert { + "operation": "read", + "target_type": "schema", + "database": "data", + "sqlite_schema": "main", + "table": None, + "target": "sqlite_master", + "columns": ("name", "sql"), + "source": None, + "internal": False, + } in [operation_dict(operation) for operation in analysis.operations] + + def operation_dict(operation): return { "operation": operation.operation, From 951f5a9f306ebe0bb8b3668ee698dc6cb6051d78 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 16:30:05 -0700 Subject: [PATCH 1198/1218] Detect VACUUM in SQL analysis Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4559073803 --- datasette/stored_queries.py | 1 + datasette/utils/sql_analysis.py | 33 +++++++++++++++++++++++- tests/test_queries.py | 31 ++++++++++++++++++++++ tests/test_utils_sql_analysis.py | 44 ++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index cf44a9ff..6746124a 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -720,6 +720,7 @@ def operation_is_write(operation: Operation) -> bool: "pragma", "analyze", "reindex", + "vacuum", "unknown", } diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index 91216501..f2eb903f 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -22,6 +22,7 @@ SQLOperation = Literal[ "pragma", "analyze", "reindex", + "vacuum", "unknown", ] SQLTargetType = Literal[ @@ -423,10 +424,40 @@ def analyze_sql_tables( conn.set_authorizer(authorizer) try: - conn.execute("EXPLAIN " + sql, params if params is not None else {}).fetchall() + explain_rows = conn.execute( + "EXPLAIN " + sql, params if params is not None else {} + ).fetchall() finally: conn.set_authorizer(None) + if not operations: + vacuum_row = next((row for row in explain_rows if row[1] == "Vacuum"), None) + if vacuum_row is not None: + schema_by_index = { + row[0]: row[1] for row in conn.execute("PRAGMA database_list") + } + sqlite_schema = schema_by_index.get(vacuum_row[2]) + database = database_for_schema(sqlite_schema) + record( + "vacuum", + "database", + database=database, + table=None, + sqlite_schema=sqlite_schema, + target=database, + source=None, + ) + else: + record( + "unknown", + "statement", + database=database_name, + table=None, + sqlite_schema=None, + target=None, + source=None, + ) + has_schema_operation = any( key.target_type in {"table", "index", "view", "trigger", "virtual-table"} and key.operation in {"create", "alter", "drop"} diff --git a/tests/test_queries.py b/tests/test_queries.py index 40bc5052..bf371a80 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -2011,6 +2011,37 @@ async def test_execute_write_rejects_function_operations(): assert (await db.execute("select name from dogs")).dicts() == [] +@pytest.mark.asyncio +async def test_execute_write_rejects_vacuum_operation(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("execute_write_vacuum_operation", name="data") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + json={"sql": "vacuum"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Unsupported SQL operation: vacuum database" + ] + + @pytest.mark.asyncio async def test_execute_write_create_table_uses_create_table_permission(): ds = Datasette( diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index f931be51..df4b3625 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -129,6 +129,50 @@ def test_analyze_create_table_operation(): ] +def test_analyze_vacuum_operation(): + conn = sqlite3.connect(":memory:") + try: + analysis = analyze_sql_tables(conn, "vacuum", database_name="data") + finally: + conn.close() + + assert [operation_dict(operation) for operation in analysis.operations] == [ + { + "operation": "vacuum", + "target_type": "database", + "database": "data", + "sqlite_schema": "main", + "table": None, + "target": "data", + "columns": (), + "source": None, + "internal": False, + } + ] + + +def test_analyze_statement_with_no_authorizer_callbacks_is_unknown(): + conn = sqlite3.connect(":memory:") + try: + analysis = analyze_sql_tables(conn, "reindex", database_name="data") + finally: + conn.close() + + assert [operation_dict(operation) for operation in analysis.operations] == [ + { + "operation": "unknown", + "target_type": "statement", + "database": "data", + "sqlite_schema": None, + "table": None, + "target": None, + "columns": (), + "source": None, + "internal": False, + } + ] + + def test_analyze_transaction_operation(conn): analysis = analyze_sql_tables(conn, "commit", database_name="data") From 11bddc891918849e7c4a006c64d0217072aa499c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 16:51:12 -0700 Subject: [PATCH 1199/1218] Deny VACUUM in user-authored SQL Reject VACUUM explicitly during write-query permission analysis so arbitrary write SQL and untrusted stored write queries cannot run it, even when the actor has execute-write-sql. Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4559073803 (P3) --- datasette/stored_queries.py | 16 ++++ datasette/views/database.py | 23 ++++- datasette/views/execute_write.py | 6 +- datasette/views/query_helpers.py | 9 +- tests/test_queries.py | 153 ++++++++++++++++++++++++++++++- 5 files changed, 199 insertions(+), 8 deletions(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index 6746124a..fd1cabf3 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -15,6 +15,13 @@ if TYPE_CHECKING: UNCHANGED = object() + +class QueryWriteRejected(Exception): + def __init__(self, message: str): + self.message = message + super().__init__(message) + + QUERY_OPTION_FIELDS = ( "hide_sql", "fragment", @@ -703,6 +710,12 @@ def operation_should_be_ignored(operation: Operation) -> bool: return operation.internal or operation.operation == "select" +def operation_forbidden_message(operation: Operation) -> str | None: + if operation.operation == "vacuum": + return "VACUUM is not allowed in user-supplied SQL" + return None + + def operation_is_write(operation: Operation) -> bool: return operation.operation in { "insert", @@ -746,6 +759,9 @@ async def ensure_query_write_permissions( for operation in analysis.operations: if operation_should_be_ignored(operation): continue + forbidden_message = operation_forbidden_message(operation) + if forbidden_message is not None: + raise QueryWriteRejected(forbidden_message) permissions = permission_requirements_for_operation(operation) if not permissions: raise Forbidden( diff --git a/datasette/views/database.py b/datasette/views/database.py index b558b002..ae1cf375 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -13,7 +13,7 @@ import textwrap from datasette.events import AlterTableEvent, CreateTableEvent, InsertRowsEvent from datasette.database import QueryInterrupted from datasette.resources import DatabaseResource, QueryResource -from datasette.stored_queries import stored_query_to_dict +from datasette.stored_queries import QueryWriteRejected, stored_query_to_dict from datasette.utils import ( add_cors_headers, await_me_maybe, @@ -453,9 +453,24 @@ class QueryView(View): ): raise Forbidden("You do not have permission to view this query") - await _ensure_stored_query_execution_permissions( - datasette, db, stored_query, request.actor - ) + try: + await _ensure_stored_query_execution_permissions( + datasette, db, stored_query, request.actor + ) + except QueryWriteRejected as ex: + if request.headers.get("accept") == "application/json" or request.args.get( + "_json" + ): + return Response.json( + { + "ok": False, + "message": ex.message, + "redirect": None, + }, + status=403, + ) + datasette.add_message(request, ex.message, datasette.ERROR) + return Response.redirect(stored_query.on_error_redirect or request.path) # If database is immutable, return an error if not db.is_mutable: diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index 19006ac5..57c4d78e 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -163,13 +163,15 @@ class ExecuteWriteView(BaseView): except QueryValidationError as ex: if _wants_json(request, is_json, data): return _block_framing(_error([ex.message], ex.status)) + if ex.flash: + self.ds.add_message(request, ex.message, self.ds.ERROR) return await self._render_form( request, db, sql=sql or "", parameter_values=provided_params, - analysis_error=ex.message, - execution_message=ex.message, + analysis_error=None if ex.flash else ex.message, + execution_message=None if ex.flash else ex.message, execution_ok=False, status=ex.status, ) diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 0e3d4e01..92328ff3 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -3,6 +3,7 @@ import re from datasette.resources import DatabaseResource from datasette.stored_queries import ( + QueryWriteRejected, StoredQuery, operation_is_write, operation_should_be_ignored, @@ -47,9 +48,11 @@ _query_write_fields = { class QueryValidationError(Exception): - def __init__(self, message, status=400): + def __init__(self, message, status=400, *, flash=False): self.message = message self.status = status + self.flash = flash + super().__init__(message) def _actor_id(actor): @@ -194,6 +197,8 @@ async def _analyze_user_query(datasette, db, sql, *, actor): await datasette.ensure_query_write_permissions( db.name, sql, actor=actor, analysis=analysis ) + except QueryWriteRejected as ex: + raise QueryValidationError(ex.message, status=403, flash=True) from ex except Forbidden as ex: raise QueryValidationError(str(ex), status=403) from ex else: @@ -297,6 +302,8 @@ async def _prepare_execute_write(datasette, db, sql, params, actor): await datasette.ensure_query_write_permissions( db.name, sql, actor=actor, analysis=analysis ) + except QueryWriteRejected as ex: + raise QueryValidationError(ex.message, status=403, flash=True) from ex except Forbidden as ex: raise QueryValidationError(str(ex), status=403) from ex return parameter_names, params, analysis diff --git a/tests/test_queries.py b/tests/test_queries.py index bf371a80..b6e1637d 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -2038,10 +2038,161 @@ async def test_execute_write_rejects_vacuum_operation(): assert denied_response.status_code == 403 assert denied_response.json()["errors"] == [ - "Unsupported SQL operation: vacuum database" + "VACUUM is not allowed in user-supplied SQL" ] +@pytest.mark.asyncio +async def test_execute_write_form_rejects_vacuum_operation_with_flash_error(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("execute_write_vacuum_operation_form", name="data") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "writer"}, + data={"sql": "vacuum"}, + ) + + assert denied_response.status_code == 403 + assert ( + '

VACUUM is not allowed in user-supplied SQL

' + in denied_response.text + ) + assert denied_response.text.count("VACUUM is not allowed in user-supplied SQL") == 1 + + +@pytest.mark.asyncio +async def test_untrusted_stored_write_query_rejects_vacuum_operation(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("stored_query_vacuum_operation", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "vacuum_db", + "vacuum", + is_write=True, + is_trusted=False, + source="user", + owner_id="writer", + ) + + denied_response = await ds.client.post( + "/data/vacuum_db?_json=1", + actor={"id": "writer"}, + data={}, + ) + + assert denied_response.status_code == 403 + assert "VACUUM is not allowed in user-supplied SQL" in denied_response.text + + +@pytest.mark.asyncio +async def test_untrusted_stored_write_query_rejects_vacuum_operation_with_flash_error(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("stored_query_vacuum_operation_form", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "vacuum_db", + "vacuum", + is_write=True, + is_trusted=False, + source="user", + owner_id="writer", + ) + + denied_response = await ds.client.post( + "/data/vacuum_db", + actor={"id": "writer"}, + data={}, + ) + + assert denied_response.status_code == 302 + assert denied_response.headers["location"] == "/data/vacuum_db" + assert ds.unsign(denied_response.cookies["ds_messages"], "messages") == [ + ["VACUUM is not allowed in user-supplied SQL", ds.ERROR] + ] + + +@pytest.mark.asyncio +async def test_trusted_stored_write_query_skips_vacuum_filtering(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + } + } + } + }, + ) + ds.add_memory_database("trusted_stored_query_vacuum", name="data") + await ds.invoke_startup() + await ds.add_query( + "data", + "trusted_vacuum", + "vacuum", + is_write=True, + is_trusted=True, + source="config", + ) + + response = await ds.client.post( + "/data/trusted_vacuum?_json=1", + actor={"id": "writer"}, + data={}, + ) + + assert response.status_code == 200 + assert response.json()["ok"] is True + + @pytest.mark.asyncio async def test_execute_write_create_table_uses_create_table_permission(): ds = Datasette( From 0c5053cdf64a0dc2d1e9808fa712b88233760512 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 27 May 2026 17:26:50 -0700 Subject: [PATCH 1200/1218] Docs for //-/execute-write JSON API Closes #2750, refs #2742 --- docs/json_api.rst | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/docs/json_api.rst b/docs/json_api.rst index 48c70af6..fffc16d7 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -505,6 +505,68 @@ The JSON write API Datasette provides a write API for JSON data. This is a POST-only API that requires an authenticated API token, see :ref:`CreateTokenView`. The token will need to have the specified :ref:`authentication_permissions`. +.. _ExecuteWriteView: + +Executing write SQL +~~~~~~~~~~~~~~~~~~~ + +Actors with the :ref:`actions_execute_write_sql` permission can execute arbitrary writable SQL against a mutable database using ``/-/execute-write``. + +:: + + POST //-/execute-write + Content-Type: application/json + Authorization: Bearer dstok_ + +The request body must include a ``"sql"`` string. Named SQL parameters can be provided using the optional ``"params"`` object: + +.. code-block:: json + + { + "sql": "insert into dogs (name) values (:name)", + "params": { + "name": "Cleo" + } + } + +The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query API ` instead. + +Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. + +A successful response includes a message, the SQLite ``rowcount`` and a summary of the operations that were executed: + +The shape of the ``"analysis"`` block is not yet considered a stable API and may change in future Datasette releases. + +.. code-block:: json + + { + "ok": true, + "message": "Query executed, 1 row affected", + "rowcount": 1, + "analysis": [ + { + "operation": "insert", + "database": "data", + "table": "dogs", + "required_permission": "insert-row, update-row, delete-row", + "source": null + } + ] + } + +If SQLite reports ``-1`` for the row count, the message will be ``"Query executed"``. + +Errors use the standard Datasette error format: + +.. code-block:: json + + { + "ok": false, + "errors": [ + "Permission denied: need execute-write-sql" + ] + } + .. _TableInsertView: Inserting rows From bcd989f4f8802a73a60c75f9bda77649c1347986 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 08:36:59 -0700 Subject: [PATCH 1201/1218] Detect and disallow insert to virtual/shadow table Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4565727978 --- datasette/stored_queries.py | 5 + datasette/utils/sql_analysis.py | 21 ++++- datasette/utils/sqlite.py | 112 ++++++++++++++++++++++ tests/test_queries.py | 153 +++++++++++++++++++++++++++++++ tests/test_utils.py | 45 ++++++++- tests/test_utils_sql_analysis.py | 47 ++++++++++ 6 files changed, 381 insertions(+), 2 deletions(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index fd1cabf3..b5aea221 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -713,6 +713,11 @@ def operation_should_be_ignored(operation: Operation) -> bool: def operation_forbidden_message(operation: Operation) -> str | None: if operation.operation == "vacuum": return "VACUUM is not allowed in user-supplied SQL" + if operation.operation in {"insert", "update", "delete"}: + if operation.table_kind == "virtual": + return "Writes to virtual tables are not allowed in user-supplied SQL" + if operation.table_kind == "shadow": + return "Writes to shadow tables are not allowed in user-supplied SQL" return None diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index f2eb903f..a71fa315 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Literal -from datasette.utils.sqlite import sqlite3 +from datasette.utils.sqlite import SQLiteTableType, sqlite3, sqlite_table_type SQLOperation = Literal[ "read", @@ -42,6 +42,7 @@ SQLTargetType = Literal[ SQLTableOperation = Literal["read", "insert", "update", "delete"] SQLSchemaOperation = Literal["create", "drop"] SQLSchemaTargetType = Literal["index", "table", "trigger", "view", "virtual-table"] +SQLTableKind = SQLiteTableType @dataclass(frozen=True) @@ -51,6 +52,7 @@ class Operation: database: str | None table: str | None sqlite_schema: str | None + table_kind: SQLTableKind | None = None target: str | None = None columns: tuple[str, ...] = () source: str | None = None @@ -500,6 +502,22 @@ def analyze_sql_tables( return True return False + table_kind_cache: dict[tuple[str | None, str], SQLTableKind | None] = {} + + def table_kind_for(key: OperationKey) -> SQLTableKind | None: + if ( + key.target_type != "table" + or key.operation not in {"read", "insert", "update", "delete"} + or key.table is None + ): + return None + cache_key = (key.sqlite_schema, key.table) + if cache_key not in table_kind_cache: + table_kind_cache[cache_key] = sqlite_table_type( + conn, key.table, schema=key.sqlite_schema + ) + return table_kind_cache[cache_key] + return SQLAnalysis( operations=tuple( Operation( @@ -508,6 +526,7 @@ def analyze_sql_tables( database=key.database, table=key.table, sqlite_schema=key.sqlite_schema, + table_kind=table_kind_for(key), target=key.target, columns=tuple(sorted(columns)), source=key.source, diff --git a/datasette/utils/sqlite.py b/datasette/utils/sqlite.py index d0a2d783..130c5f62 100644 --- a/datasette/utils/sqlite.py +++ b/datasette/utils/sqlite.py @@ -1,3 +1,6 @@ +import re +from typing import Literal + using_pysqlite3 = False try: import pysqlite3 as sqlite3 @@ -10,6 +13,18 @@ if hasattr(sqlite3, "enable_callback_tracebacks"): sqlite3.enable_callback_tracebacks(True) _cached_sqlite_version = None +SQLiteTableType = Literal["table", "view", "virtual", "shadow"] +_VIRTUAL_TABLE_MODULE_RE = re.compile( + r"\bCREATE\s+VIRTUAL\s+TABLE\b.*?\bUSING\s+([^\s(]+)", + re.IGNORECASE, +) +_VIRTUAL_TABLE_SHADOW_SUFFIXES = { + "fts3": ("_content", "_segdir", "_segments", "_stat", "_docsize"), + "fts4": ("_content", "_segdir", "_segments", "_stat", "_docsize"), + "fts5": ("_data", "_idx", "_docsize", "_content", "_config"), + "rtree": ("_node", "_parent", "_rowid"), + "rtree_i32": ("_node", "_parent", "_rowid"), +} def sqlite_version(): @@ -36,5 +51,102 @@ def supports_table_xinfo(): return sqlite_version() >= (3, 26, 0) +def supports_table_list(): + return sqlite_version() >= (3, 37, 0) + + def supports_generated_columns(): return sqlite_version() >= (3, 31, 0) + + +def sqlite_table_type( + conn, + table: str, + *, + schema: str | None = "main", +) -> SQLiteTableType | None: + if supports_table_list(): + try: + query = "select type from pragma_table_list where name = ?" + params: tuple[str, ...] = (table,) + if schema is not None: + query += " and schema = ?" + params = (table, schema) + row = conn.execute(query, params).fetchone() + if row is not None and row[0] in {"table", "view", "virtual", "shadow"}: + return row[0] + except sqlite3.DatabaseError: + pass + return _sqlite_table_type_from_schema(conn, table, schema=schema) + + +def _sqlite_table_type_from_schema( + conn, + table: str, + *, + schema: str | None = "main", +) -> SQLiteTableType | None: + schema_table = _sqlite_schema_table(schema) + try: + row = conn.execute( + "select type, sql from {} where name = ?".format(schema_table), + (table,), + ).fetchone() + except sqlite3.DatabaseError: + return None + if row is None: + return None + object_type, sql = row + if object_type == "view": + return "view" + if object_type != "table": + return None + if _virtual_table_module(sql) is not None: + return "virtual" + if _is_known_shadow_table(conn, table, schema=schema): + return "shadow" + return "table" + + +def _is_known_shadow_table( + conn, + table: str, + *, + schema: str | None = "main", +) -> bool: + schema_table = _sqlite_schema_table(schema) + try: + rows = conn.execute( + "select name, sql from {} where type = 'table'".format(schema_table) + ).fetchall() + except sqlite3.DatabaseError: + return False + for virtual_table, sql in rows: + module = _virtual_table_module(sql) + if module is None: + continue + for suffix in _VIRTUAL_TABLE_SHADOW_SUFFIXES.get(module, ()): + if table == virtual_table + suffix: + return True + return False + + +def _sqlite_schema_table(schema: str | None) -> str: + if schema is None or schema == "main": + return "sqlite_master" + if schema == "temp": + return "sqlite_temp_master" + return "{}.sqlite_master".format(_quote_identifier(schema)) + + +def _quote_identifier(value: str) -> str: + return '"{}"'.format(value.replace('"', '""')) + + +def _virtual_table_module(sql: str | None) -> str | None: + if not sql: + return None + match = _VIRTUAL_TABLE_MODULE_RE.search(sql) + if match is None: + return None + return match.group(1).strip("\"'[]`").lower() diff --git a/tests/test_queries.py b/tests/test_queries.py index b6e1637d..73f8f3cf 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -2193,6 +2193,159 @@ async def test_trusted_stored_write_query_skips_vacuum_filtering(): assert response.json()["ok"] is True +@pytest.mark.asyncio +async def test_execute_write_rejects_virtual_table_control_insert(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_virtual_table_control", name="data") + await db.execute_write(""" + create virtual table docs using fts5(title, body, content='') + """) + await db.execute_write(""" + insert into docs(rowid, title, body) values (1, 'hello', 'world') + """) + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "root"}, + json={"sql": "insert into docs(docs) values('delete-all')"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Writes to virtual tables are not allowed in user-supplied SQL" + ] + assert ( + await db.execute("select count(*) from docs where docs match 'hello'") + ).first()[0] == 1 + + +@pytest.mark.asyncio +async def test_execute_write_rejects_regular_virtual_table_insert(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_virtual_table_insert", name="data") + await db.execute_write("create virtual table docs using fts5(title, body)") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "root"}, + json={"sql": "insert into docs(rowid, title, body) values (1, 'a', 'b')"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Writes to virtual tables are not allowed in user-supplied SQL" + ] + assert (await db.execute("select count(*) from docs")).first()[0] == 0 + + +@pytest.mark.asyncio +async def test_execute_write_rejects_shadow_table_insert(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("execute_write_shadow_table_insert", name="data") + await db.execute_write("create virtual table docs using fts5(title, body)") + await ds.invoke_startup() + + denied_response = await ds.client.post( + "/data/-/execute-write", + actor={"id": "root"}, + json={"sql": "insert into docs_config(k, v) values ('x', 1)"}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [ + "Writes to shadow tables are not allowed in user-supplied SQL" + ] + assert (await db.execute("select count(*) from docs_config")).first()[0] == 1 + + +@pytest.mark.asyncio +async def test_untrusted_stored_write_query_rejects_virtual_table_control_insert(): + ds = Datasette(memory=True, default_deny=True) + ds.root_enabled = True + db = ds.add_memory_database("stored_query_virtual_table_control", name="data") + await db.execute_write(""" + create virtual table docs using fts5(title, body, content='') + """) + await db.execute_write(""" + insert into docs(rowid, title, body) values (1, 'hello', 'world') + """) + await ds.invoke_startup() + await ds.add_query( + "data", + "delete_all_docs", + "insert into docs(docs) values('delete-all')", + is_write=True, + is_trusted=False, + source="user", + owner_id="root", + ) + + denied_response = await ds.client.post( + "/data/delete_all_docs?_json=1", + actor={"id": "root"}, + data={}, + ) + + assert denied_response.status_code == 403 + assert denied_response.json()["message"] == ( + "Writes to virtual tables are not allowed in user-supplied SQL" + ) + assert ( + await db.execute("select count(*) from docs where docs match 'hello'") + ).first()[0] == 1 + + +@pytest.mark.asyncio +async def test_trusted_stored_write_query_can_write_virtual_table(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + } + } + } + }, + ) + db = ds.add_memory_database("trusted_stored_query_virtual_table", name="data") + await db.execute_write(""" + create virtual table docs using fts5(title, body, content='') + """) + await db.execute_write(""" + insert into docs(rowid, title, body) values (1, 'hello', 'world') + """) + await ds.invoke_startup() + await ds.add_query( + "data", + "trusted_delete_all", + "insert into docs(docs) values('delete-all')", + is_write=True, + is_trusted=True, + source="config", + ) + + response = await ds.client.post( + "/data/trusted_delete_all?_json=1", + actor={"id": "writer"}, + data={}, + ) + + assert response.status_code == 200 + assert response.json()["ok"] is True + assert ( + await db.execute("select count(*) from docs where docs match 'hello'") + ).first()[0] == 0 + + @pytest.mark.asyncio async def test_execute_write_create_table_uses_create_table_permission(): ds = Datasette( diff --git a/tests/test_utils.py b/tests/test_utils.py index 3fcb623e..e142bb5b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,7 +5,7 @@ Tests for various datasette helper functions. from datasette.app import Datasette from datasette import utils from datasette.utils.asgi import Request -from datasette.utils.sqlite import sqlite3 +from datasette.utils.sqlite import sqlite3, sqlite_table_type import json import os import pathlib @@ -226,6 +226,49 @@ def test_detect_fts_different_table_names(table): conn.close() +@pytest.mark.parametrize("use_fallback", (False, True)) +def test_sqlite_table_type_detects_virtual_and_shadow_tables(monkeypatch, use_fallback): + if use_fallback: + monkeypatch.setattr("datasette.utils.sqlite.sqlite_version", lambda: (3, 25, 0)) + conn = utils.sqlite3.connect(":memory:") + try: + conn.executescript(""" + create table dogs(id integer primary key, name text); + create view dog_names as select name from dogs; + create virtual table search_index using fts5(title, body); + create virtual table boxes using rtree(id, minx, maxx, miny, maxy); + """) + + assert sqlite_table_type(conn, "dogs") == "table" + assert sqlite_table_type(conn, "dog_names") == "view" + assert sqlite_table_type(conn, "search_index") == "virtual" + assert sqlite_table_type(conn, "search_index_config") == "shadow" + assert sqlite_table_type(conn, "boxes") == "virtual" + assert sqlite_table_type(conn, "boxes_node") == "shadow" + assert sqlite_table_type(conn, "missing") is None + finally: + conn.close() + + +@pytest.mark.parametrize("use_fallback", (False, True)) +def test_sqlite_table_type_detects_attached_database_tables(monkeypatch, use_fallback): + if use_fallback: + monkeypatch.setattr("datasette.utils.sqlite.sqlite_version", lambda: (3, 25, 0)) + conn = utils.sqlite3.connect(":memory:") + try: + conn.executescript(""" + attach database ':memory:' as extra; + create table extra.cats(id integer primary key, name text); + create virtual table extra.cat_search using fts5(name); + """) + + assert sqlite_table_type(conn, "cats", schema="extra") == "table" + assert sqlite_table_type(conn, "cat_search", schema="extra") == "virtual" + assert sqlite_table_type(conn, "cat_search_data", schema="extra") == "shadow" + finally: + conn.close() + + @pytest.mark.parametrize( "url,expected", [ diff --git a/tests/test_utils_sql_analysis.py b/tests/test_utils_sql_analysis.py index df4b3625..979ff9e1 100644 --- a/tests/test_utils_sql_analysis.py +++ b/tests/test_utils_sql_analysis.py @@ -260,6 +260,53 @@ def test_analyze_create_virtual_table_operation(): } in [operation_dict(operation) for operation in analysis.operations] +def test_analyze_table_kind_for_regular_virtual_and_shadow_tables(): + conn = sqlite3.connect(":memory:") + try: + conn.executescript(""" + create table dogs (id integer primary key, name text); + create virtual table docs using fts5(title, body, content=''); + """) + + regular_analysis = analyze_sql_tables( + conn, + "insert into dogs (name) values ('Cleo')", + database_name="data", + ) + virtual_analysis = analyze_sql_tables( + conn, + "insert into docs(docs) values('delete-all')", + database_name="data", + ) + shadow_analysis = analyze_sql_tables( + conn, + "insert into docs_config(k, v) values ('x', 1)", + database_name="data", + ) + finally: + conn.close() + + regular_insert = next( + operation + for operation in regular_analysis.operations + if operation.operation == "insert" and operation.table == "dogs" + ) + virtual_insert = next( + operation + for operation in virtual_analysis.operations + if operation.operation == "insert" and operation.table == "docs" + ) + shadow_insert = next( + operation + for operation in shadow_analysis.operations + if operation.operation == "insert" and operation.table == "docs_config" + ) + + assert regular_insert.table_kind == "table" + assert virtual_insert.table_kind == "virtual" + assert shadow_insert.table_kind == "shadow" + + def test_analyze_create_table_as_select_function_is_not_internal(): conn = sqlite3.connect(":memory:") try: From aaf00e9ec22b77e53f291ccedcbf2f499cce9e2b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 08:42:06 -0700 Subject: [PATCH 1202/1218] Refactor hidden_table_names() to use new implemenatation Refs https://github.com/simonw/datasette/pull/2749#issuecomment-4565727978 --- datasette/database.py | 80 +------------------------------- datasette/utils/sqlite.py | 29 ++++++++++++ tests/test_internals_database.py | 9 +--- tests/test_utils.py | 12 ++++- 4 files changed, 43 insertions(+), 87 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index e7e9527e..10417670 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -26,7 +26,7 @@ from .utils import ( table_column_details, ) from .utils.sql_analysis import SQLAnalysis, analyze_sql_tables -from .utils.sqlite import sqlite_version +from .utils.sqlite import sqlite_hidden_table_names from .inspect import inspect_hash connections = threading.local() @@ -702,83 +702,7 @@ class Database: t for t in db_config["tables"] if db_config["tables"][t].get("hidden") ] - if sqlite_version()[1] >= 37: - hidden_tables += [x[0] for x in await self.execute(""" - with shadow_tables as ( - select name - from pragma_table_list - where [type] = 'shadow' - order by name - ), - core_tables as ( - select name - from sqlite_master - WHERE name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') - OR substr(name, 1, 1) == '_' - ), - combined as ( - select name from shadow_tables - union all - select name from core_tables - ) - select name from combined order by 1 - """)] - else: - hidden_tables += [x[0] for x in await self.execute(""" - WITH base AS ( - SELECT name - FROM sqlite_master - WHERE name IN ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') - OR substr(name, 1, 1) == '_' - ), - fts_suffixes AS ( - SELECT column1 AS suffix - FROM (VALUES ('_data'), ('_idx'), ('_docsize'), ('_content'), ('_config')) - ), - fts5_names AS ( - SELECT name - FROM sqlite_master - WHERE sql LIKE '%VIRTUAL TABLE%USING FTS%' - ), - fts5_shadow_tables AS ( - SELECT - printf('%s%s', fts5_names.name, fts_suffixes.suffix) AS name - FROM fts5_names - JOIN fts_suffixes - ), - fts3_suffixes AS ( - SELECT column1 AS suffix - FROM (VALUES ('_content'), ('_segdir'), ('_segments'), ('_stat'), ('_docsize')) - ), - fts3_names AS ( - SELECT name - FROM sqlite_master - WHERE sql LIKE '%VIRTUAL TABLE%USING FTS3%' - OR sql LIKE '%VIRTUAL TABLE%USING FTS4%' - ), - fts3_shadow_tables AS ( - SELECT - printf('%s%s', fts3_names.name, fts3_suffixes.suffix) AS name - FROM fts3_names - JOIN fts3_suffixes - ), - final AS ( - SELECT name FROM base - UNION ALL - SELECT name FROM fts5_shadow_tables - UNION ALL - SELECT name FROM fts3_shadow_tables - ) - SELECT name FROM final ORDER BY 1 - """)] - # Also hide any FTS tables that have a content= argument - hidden_tables += [x[0] for x in await self.execute(""" - SELECT name - FROM sqlite_master - WHERE sql LIKE '%VIRTUAL TABLE%' - AND sql LIKE '%USING FTS%' - AND sql LIKE '%content=%' - """)] + hidden_tables += await self.execute_fn(sqlite_hidden_table_names) has_spatialite = await self.execute_fn(detect_spatialite) if has_spatialite: diff --git a/datasette/utils/sqlite.py b/datasette/utils/sqlite.py index 130c5f62..d3f52751 100644 --- a/datasette/utils/sqlite.py +++ b/datasette/utils/sqlite.py @@ -80,6 +80,28 @@ def sqlite_table_type( return _sqlite_table_type_from_schema(conn, table, schema=schema) +def sqlite_hidden_table_names(conn, *, schema: str | None = "main") -> list[str]: + schema_table = _sqlite_schema_table(schema) + try: + rows = conn.execute( + "select name, sql from {} where type = 'table'".format(schema_table) + ).fetchall() + except sqlite3.DatabaseError: + return [] + hidden_tables = [] + content_fts_tables = [] + for name, sql in rows: + if ( + name in {"sqlite_stat1", "sqlite_stat2", "sqlite_stat3", "sqlite_stat4"} + or name.startswith("_") + or sqlite_table_type(conn, name, schema=schema) == "shadow" + ): + hidden_tables.append(name) + elif _is_fts_content_virtual_table(sql): + content_fts_tables.append(name) + return sorted(hidden_tables) + content_fts_tables + + def _sqlite_table_type_from_schema( conn, table: str, @@ -150,3 +172,10 @@ def _virtual_table_module(sql: str | None) -> str | None: if match is None: return None return match.group(1).strip("\"'[]`").lower() + + +def _is_fts_content_virtual_table(sql: str | None) -> bool: + return ( + _virtual_table_module(sql) in {"fts3", "fts4", "fts5"} + and "content=" in sql.lower() + ) diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index d6e130b4..88f9d571 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -8,7 +8,7 @@ from datasette.app import Datasette from datasette.database import Database, Results, MultipleValues from datasette.database import DatasetteClosedError from datasette.database import _deliver_write_result -from datasette.utils.sqlite import sqlite3, sqlite_version +from datasette.utils.sqlite import sqlite3 from datasette.utils import Column import pytest import time @@ -798,14 +798,7 @@ async def test_in_memory_databases_forbid_writes(app_client): assert await db.table_names() == ["foo"] -def pragma_table_list_supported(): - return sqlite_version()[1] >= 37 - - @pytest.mark.asyncio -@pytest.mark.skipif( - not pragma_table_list_supported(), reason="Requires PRAGMA table_list support" -) async def test_hidden_tables(app_client): ds = app_client.ds db = ds.add_database(Database(ds, is_memory=True, is_mutable=True)) diff --git a/tests/test_utils.py b/tests/test_utils.py index e142bb5b..90013537 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,7 +5,7 @@ Tests for various datasette helper functions. from datasette.app import Datasette from datasette import utils from datasette.utils.asgi import Request -from datasette.utils.sqlite import sqlite3, sqlite_table_type +from datasette.utils.sqlite import sqlite3, sqlite_hidden_table_names, sqlite_table_type import json import os import pathlib @@ -246,6 +246,16 @@ def test_sqlite_table_type_detects_virtual_and_shadow_tables(monkeypatch, use_fa assert sqlite_table_type(conn, "boxes") == "virtual" assert sqlite_table_type(conn, "boxes_node") == "shadow" assert sqlite_table_type(conn, "missing") is None + assert sqlite_hidden_table_names(conn) == [ + "boxes_node", + "boxes_parent", + "boxes_rowid", + "search_index_config", + "search_index_content", + "search_index_data", + "search_index_docsize", + "search_index_idx", + ] finally: conn.close() From 2785fd29deef505f132902dcee86284e39e3fdcb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 09:03:10 -0700 Subject: [PATCH 1203/1218] Fix tests I just broke --- datasette/utils/sql_analysis.py | 86 +++++++++++++++++++-------------- datasette/utils/sqlite.py | 2 +- tests/test_utils.py | 14 ++++++ 3 files changed, 65 insertions(+), 37 deletions(-) diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index a71fa315..b5d7ada8 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -193,6 +193,10 @@ _AUTHORIZER_ACTION_NAMES = { } +def _allow_authorizer_action(*args): + return sqlite3.SQLITE_OK + + def analyze_sql_tables( conn, sql: str, @@ -424,42 +428,59 @@ def analyze_sql_tables( ) return sqlite3.SQLITE_OK + table_kind_cache: dict[tuple[str | None, str], SQLTableKind | None] = {} + conn.set_authorizer(authorizer) try: explain_rows = conn.execute( "EXPLAIN " + sql, params if params is not None else {} ).fetchall() + # Passing None before these lookups leaves a failing callback installed + # on Python 3.10, so use a permissive callback until they are complete. + conn.set_authorizer(_allow_authorizer_action) + + if not operations: + vacuum_row = next((row for row in explain_rows if row[1] == "Vacuum"), None) + if vacuum_row is not None: + schema_by_index = { + row[0]: row[1] for row in conn.execute("PRAGMA database_list") + } + sqlite_schema = schema_by_index.get(vacuum_row[2]) + database = database_for_schema(sqlite_schema) + record( + "vacuum", + "database", + database=database, + table=None, + sqlite_schema=sqlite_schema, + target=database, + source=None, + ) + else: + record( + "unknown", + "statement", + database=database_name, + table=None, + sqlite_schema=None, + target=None, + source=None, + ) + + for key in operations: + if ( + key.target_type == "table" + and key.operation in {"read", "insert", "update", "delete"} + and key.table is not None + ): + cache_key = (key.sqlite_schema, key.table) + if cache_key not in table_kind_cache: + table_kind_cache[cache_key] = sqlite_table_type( + conn, key.table, schema=key.sqlite_schema + ) finally: conn.set_authorizer(None) - if not operations: - vacuum_row = next((row for row in explain_rows if row[1] == "Vacuum"), None) - if vacuum_row is not None: - schema_by_index = { - row[0]: row[1] for row in conn.execute("PRAGMA database_list") - } - sqlite_schema = schema_by_index.get(vacuum_row[2]) - database = database_for_schema(sqlite_schema) - record( - "vacuum", - "database", - database=database, - table=None, - sqlite_schema=sqlite_schema, - target=database, - source=None, - ) - else: - record( - "unknown", - "statement", - database=database_name, - table=None, - sqlite_schema=None, - target=None, - source=None, - ) - has_schema_operation = any( key.target_type in {"table", "index", "view", "trigger", "virtual-table"} and key.operation in {"create", "alter", "drop"} @@ -502,8 +523,6 @@ def analyze_sql_tables( return True return False - table_kind_cache: dict[tuple[str | None, str], SQLTableKind | None] = {} - def table_kind_for(key: OperationKey) -> SQLTableKind | None: if ( key.target_type != "table" @@ -511,12 +530,7 @@ def analyze_sql_tables( or key.table is None ): return None - cache_key = (key.sqlite_schema, key.table) - if cache_key not in table_kind_cache: - table_kind_cache[cache_key] = sqlite_table_type( - conn, key.table, schema=key.sqlite_schema - ) - return table_kind_cache[cache_key] + return table_kind_cache[(key.sqlite_schema, key.table)] return SQLAnalysis( operations=tuple( diff --git a/datasette/utils/sqlite.py b/datasette/utils/sqlite.py index d3f52751..5a7c6c38 100644 --- a/datasette/utils/sqlite.py +++ b/datasette/utils/sqlite.py @@ -16,7 +16,7 @@ _cached_sqlite_version = None SQLiteTableType = Literal["table", "view", "virtual", "shadow"] _VIRTUAL_TABLE_MODULE_RE = re.compile( r"\bCREATE\s+VIRTUAL\s+TABLE\b.*?\bUSING\s+([^\s(]+)", - re.IGNORECASE, + re.IGNORECASE | re.DOTALL, ) _VIRTUAL_TABLE_SHADOW_SUFFIXES = { "fts3": ("_content", "_segdir", "_segments", "_stat", "_docsize"), diff --git a/tests/test_utils.py b/tests/test_utils.py index 90013537..e83eed7a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -279,6 +279,20 @@ def test_sqlite_table_type_detects_attached_database_tables(monkeypatch, use_fal conn.close() +def test_sqlite_hidden_table_names_hides_multiline_content_fts_table(): + conn = utils.sqlite3.connect(":memory:") + try: + conn.executescript(""" + create table searchable(id integer primary key, body text); + create virtual table searchable_fts + using fts5(body, content='searchable', content_rowid='id'); + """) + + assert "searchable_fts" in sqlite_hidden_table_names(conn) + finally: + conn.close() + + @pytest.mark.parametrize( "url,expected", [ From 8bd7e165f465fe057beace2b17d52c0a347819f8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 09:50:56 -0700 Subject: [PATCH 1204/1218] Refactored for code readability --- datasette/app.py | 5 +- datasette/stored_queries.py | 211 +------------------------ datasette/views/database.py | 3 +- datasette/views/query_helpers.py | 27 ++-- datasette/write_sql.py | 255 +++++++++++++++++++++++++++++++ tests/test_write_sql.py | 59 +++++++ 6 files changed, 339 insertions(+), 221 deletions(-) create mode 100644 datasette/write_sql.py create mode 100644 tests/test_write_sql.py diff --git a/datasette/app.py b/datasette/app.py index 56b89789..e7f34e69 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -42,7 +42,7 @@ from jinja2.exceptions import TemplateNotFound from .events import Event from .column_types import SQLiteType -from . import stored_queries +from . import stored_queries, write_sql from .views import Context from .views.database import ( database_download, @@ -1197,7 +1197,8 @@ class Datasette: async def ensure_query_write_permissions( self, database, sql, *, actor=None, params=None, analysis=None ): - return await stored_queries.ensure_query_write_permissions( + # Raise Forbidden or QueryWriteRejected if SQL should not run + return await write_sql.ensure_query_write_permissions( self, database, sql, actor=actor, params=params, analysis=analysis ) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index b5aea221..b6ac49b8 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -2,26 +2,13 @@ from __future__ import annotations from dataclasses import dataclass import json -from typing import Any, Iterable, TYPE_CHECKING +from typing import Any, Iterable -from .resources import DatabaseResource, TableResource -from .permissions import Resource -from .utils import named_parameters, sqlite3, tilde_encode, urlsafe_components -from .utils.asgi import Forbidden -from .utils.sql_analysis import Operation, SQLAnalysis - -if TYPE_CHECKING: - from .app import Datasette +from .utils import tilde_encode, urlsafe_components UNCHANGED = object() -class QueryWriteRejected(Exception): - def __init__(self, message: str): - self.message = message - super().__init__(message) - - QUERY_OPTION_FIELDS = ( "hide_sql", "fragment", @@ -593,197 +580,3 @@ async def list_queries( has_more=has_more, limit=limit, ) - - -@dataclass(frozen=True) -class PermissionRequirement: - action: str - resource: Resource - - -def row_mutation_requirements( - database: str, table: str -) -> tuple[PermissionRequirement, ...]: - resource = TableResource(database=database, table=table) - return tuple( - PermissionRequirement(action=action, resource=resource) - for action in ("insert-row", "update-row", "delete-row") - ) - - -def permission_requirements_for_operation( - operation: Operation, -) -> tuple[PermissionRequirement, ...]: - if ( - operation.operation == "read" - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="view-table", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - if ( - operation.operation in {"insert", "update"} - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return row_mutation_requirements( - database=operation.database, - table=operation.table, - ) - if ( - operation.operation == "delete" - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="delete-row", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - if operation.operation == "create" and operation.target_type == "table": - if operation.database is None: - return () - return ( - PermissionRequirement( - action="create-table", - resource=DatabaseResource(database=operation.database), - ), - ) - if ( - operation.operation == "alter" - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="alter-table", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - if ( - operation.operation == "drop" - and operation.target_type == "table" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="drop-table", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - if ( - operation.operation in {"create", "drop"} - and operation.target_type == "index" - and operation.database is not None - and operation.table is not None - ): - return ( - PermissionRequirement( - action="alter-table", - resource=TableResource( - database=operation.database, table=operation.table - ), - ), - ) - return () - - -def operation_should_be_ignored(operation: Operation) -> bool: - return operation.internal or operation.operation == "select" - - -def operation_forbidden_message(operation: Operation) -> str | None: - if operation.operation == "vacuum": - return "VACUUM is not allowed in user-supplied SQL" - if operation.operation in {"insert", "update", "delete"}: - if operation.table_kind == "virtual": - return "Writes to virtual tables are not allowed in user-supplied SQL" - if operation.table_kind == "shadow": - return "Writes to shadow tables are not allowed in user-supplied SQL" - return None - - -def operation_is_write(operation: Operation) -> bool: - return operation.operation in { - "insert", - "update", - "delete", - "create", - "alter", - "drop", - "begin", - "commit", - "rollback", - "savepoint", - "attach", - "detach", - "pragma", - "analyze", - "reindex", - "vacuum", - "unknown", - } - - -async def ensure_query_write_permissions( - datasette: Datasette, - database: str, - sql: str, - *, - actor: dict[str, object] | None = None, - params: dict[str, object] | None = None, - analysis: SQLAnalysis | None = None, -) -> SQLAnalysis: - db = datasette.get_database(database) - if analysis is None: - if params is None: - params = {name: "" for name in named_parameters(sql)} - try: - analysis = await db.analyze_sql(sql, params) - except sqlite3.DatabaseError as ex: - raise Forbidden(f"Could not analyze query: {ex}") from ex - - for operation in analysis.operations: - if operation_should_be_ignored(operation): - continue - forbidden_message = operation_forbidden_message(operation) - if forbidden_message is not None: - raise QueryWriteRejected(forbidden_message) - permissions = permission_requirements_for_operation(operation) - if not permissions: - raise Forbidden( - "Unsupported SQL operation: {} {}".format( - operation.operation, operation.target_type - ) - ) - if operation.database != database: - raise Forbidden("Writable queries may not access attached databases") - for permission in permissions: - if not await datasette.allowed( - action=permission.action, - resource=permission.resource, - actor=actor, - ): - raise Forbidden( - f"Permission denied: need {permission.action} " - f"on {permission.resource}" - ) - return analysis diff --git a/datasette/views/database.py b/datasette/views/database.py index ae1cf375..b4a964f1 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -13,7 +13,8 @@ import textwrap from datasette.events import AlterTableEvent, CreateTableEvent, InsertRowsEvent from datasette.database import QueryInterrupted from datasette.resources import DatabaseResource, QueryResource -from datasette.stored_queries import QueryWriteRejected, stored_query_to_dict +from datasette.stored_queries import stored_query_to_dict +from datasette.write_sql import QueryWriteRejected from datasette.utils import ( add_cors_headers, await_me_maybe, diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 92328ff3..712832e8 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -3,11 +3,14 @@ import re from datasette.resources import DatabaseResource from datasette.stored_queries import ( - QueryWriteRejected, StoredQuery, +) +from datasette.write_sql import ( + IgnoreWriteSqlOperation, + QueryWriteRejected, + RequireWriteSqlPermissions, + decision_for_write_sql_operation, operation_is_write, - operation_should_be_ignored, - permission_requirements_for_operation, ) from datasette.utils import ( named_parameters as derive_named_parameters, @@ -212,7 +215,9 @@ async def _analyze_user_query(datasette, db, sql, *, actor): def _display_operations(analysis: SQLAnalysis) -> list[Operation]: operations = [] for operation in analysis.operations: - if operation_should_be_ignored(operation): + if isinstance( + decision_for_write_sql_operation(operation), IgnoreWriteSqlOperation + ): continue operations.append(operation) return operations @@ -221,8 +226,12 @@ def _display_operations(analysis: SQLAnalysis) -> list[Operation]: def _analysis_rows(analysis: SQLAnalysis) -> list[dict[str, object]]: rows = [] for operation in _display_operations(analysis): - permissions = permission_requirements_for_operation(operation) - required_permission = ", ".join(permission.action for permission in permissions) + decision = decision_for_write_sql_operation(operation) + required_permission = ( + ", ".join(permission.action for permission in decision.permissions) + if isinstance(decision, RequireWriteSqlPermissions) + else "" + ) rows.append( { "operation": operation.operation, @@ -241,10 +250,10 @@ async def _analysis_rows_with_permissions( rows = _analysis_rows(analysis) is_write = _analysis_is_write(analysis) for row, operation in zip(rows, _display_operations(analysis)): - permissions = permission_requirements_for_operation(operation) - if permissions: + decision = decision_for_write_sql_operation(operation) + if isinstance(decision, RequireWriteSqlPermissions): row["allowed"] = True - for permission in permissions: + for permission in decision.permissions: if not await datasette.allowed( action=permission.action, resource=permission.resource, diff --git a/datasette/write_sql.py b/datasette/write_sql.py new file mode 100644 index 00000000..2e1b69af --- /dev/null +++ b/datasette/write_sql.py @@ -0,0 +1,255 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from .permissions import Resource +from .resources import DatabaseResource, TableResource +from .utils import named_parameters, sqlite3 +from .utils.asgi import Forbidden +from .utils.sql_analysis import Operation, SQLAnalysis + +if TYPE_CHECKING: + from .app import Datasette + + +class QueryWriteRejected(Exception): + def __init__(self, message: str): + self.message = message + super().__init__(message) + + +@dataclass(frozen=True) +class PermissionRequirement: + action: str + resource: Resource + + +PermissionRequirements = tuple[PermissionRequirement, ...] + + +class WriteSqlOperationDecision: + """What Datasette should do with one operation in user-supplied write SQL.""" + + +@dataclass(frozen=True) +class IgnoreWriteSqlOperation(WriteSqlOperationDecision): + reason: str + + +@dataclass(frozen=True) +class RequireWriteSqlPermissions(WriteSqlOperationDecision): + permissions: PermissionRequirements + + +@dataclass(frozen=True) +class RejectWriteSqlOperation(WriteSqlOperationDecision): + message: str + + +@dataclass(frozen=True) +class UnsupportedWriteSqlOperation(WriteSqlOperationDecision): + message: str + + +def row_mutation_requirements(database: str, table: str) -> PermissionRequirements: + resource = TableResource(database=database, table=table) + return tuple( + PermissionRequirement(action=action, resource=resource) + for action in ("insert-row", "update-row", "delete-row") + ) + + +def decision_for_write_sql_operation( + operation: Operation, +) -> WriteSqlOperationDecision: + unsupported_message = ( + f"Unsupported SQL operation: {operation.operation} {operation.target_type}" + ) + if operation.internal: + return IgnoreWriteSqlOperation("internal SQLite operation") + if operation.operation == "select": + return IgnoreWriteSqlOperation("select statement") + if operation.operation == "vacuum": + return RejectWriteSqlOperation("VACUUM is not allowed in user-supplied SQL") + if operation.operation in {"insert", "update", "delete"}: + if operation.table_kind == "virtual": + return RejectWriteSqlOperation( + "Writes to virtual tables are not allowed in user-supplied SQL" + ) + if operation.table_kind == "shadow": + return RejectWriteSqlOperation( + "Writes to shadow tables are not allowed in user-supplied SQL" + ) + if operation.operation == "function": + # SQL functions currently have no Datasette permission mapping. They are + # rejected by the user-supplied write SQL allow-list as unsupported. + return UnsupportedWriteSqlOperation(unsupported_message) + if ( + operation.operation == "read" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="view-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + if ( + operation.operation in {"insert", "update"} + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + row_mutation_requirements( + database=operation.database, + table=operation.table, + ) + ) + if ( + operation.operation == "delete" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="delete-row", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + if operation.operation == "create" and operation.target_type == "table": + if operation.database is None: + return UnsupportedWriteSqlOperation(unsupported_message) + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="create-table", + resource=DatabaseResource(database=operation.database), + ), + ) + ) + if ( + operation.operation == "alter" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="alter-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + if ( + operation.operation == "drop" + and operation.target_type == "table" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="drop-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + if ( + operation.operation in {"create", "drop"} + and operation.target_type == "index" + and operation.database is not None + and operation.table is not None + ): + return RequireWriteSqlPermissions( + ( + PermissionRequirement( + action="alter-table", + resource=TableResource( + database=operation.database, table=operation.table + ), + ), + ) + ) + return UnsupportedWriteSqlOperation(unsupported_message) + + +def operation_is_write(operation: Operation) -> bool: + return operation.operation in { + "insert", + "update", + "delete", + "create", + "alter", + "drop", + "begin", + "commit", + "rollback", + "savepoint", + "attach", + "detach", + "pragma", + "analyze", + "reindex", + "vacuum", + "unknown", + } + + +async def ensure_query_write_permissions( + datasette: Datasette, + database: str, + sql: str, + *, + actor: dict[str, object] | None = None, + params: dict[str, object] | None = None, + analysis: SQLAnalysis | None = None, +) -> SQLAnalysis: + db = datasette.get_database(database) + if analysis is None: + if params is None: + params = {name: "" for name in named_parameters(sql)} + try: + analysis = await db.analyze_sql(sql, params) + except sqlite3.DatabaseError as ex: + raise Forbidden(f"Could not analyze query: {ex}") from ex + + for operation in analysis.operations: + decision = decision_for_write_sql_operation(operation) + if isinstance(decision, IgnoreWriteSqlOperation): + continue + if isinstance(decision, RejectWriteSqlOperation): + raise QueryWriteRejected(decision.message) + if isinstance(decision, UnsupportedWriteSqlOperation): + raise Forbidden(decision.message) + permissions = decision.permissions + if operation.database != database: + raise Forbidden("Writable queries may not access attached databases") + for permission in permissions: + if not await datasette.allowed( + action=permission.action, + resource=permission.resource, + actor=actor, + ): + raise Forbidden( + f"Permission denied: need {permission.action} " + f"on {permission.resource}" + ) + return analysis diff --git a/tests/test_write_sql.py b/tests/test_write_sql.py new file mode 100644 index 00000000..cfaf0f53 --- /dev/null +++ b/tests/test_write_sql.py @@ -0,0 +1,59 @@ +from datasette.utils.sql_analysis import Operation +from datasette.write_sql import ( + IgnoreWriteSqlOperation, + RejectWriteSqlOperation, + RequireWriteSqlPermissions, + UnsupportedWriteSqlOperation, + WriteSqlOperationDecision, + decision_for_write_sql_operation, +) + + +def test_decision_for_write_sql_operation_ignores_internal_and_select_operations(): + internal_decision = decision_for_write_sql_operation( + Operation("read", "schema", None, None, "main", internal=True) + ) + select_decision = decision_for_write_sql_operation( + Operation("select", "statement", None, None, None) + ) + + assert isinstance(internal_decision, IgnoreWriteSqlOperation) + assert isinstance(internal_decision, WriteSqlOperationDecision) + assert isinstance(select_decision, IgnoreWriteSqlOperation) + assert isinstance(select_decision, WriteSqlOperationDecision) + + +def test_decision_for_write_sql_operation_requires_table_write_permissions(): + decision = decision_for_write_sql_operation( + Operation("insert", "table", "data", "dogs", None) + ) + + assert isinstance(decision, RequireWriteSqlPermissions) + assert [permission.action for permission in decision.permissions] == [ + "insert-row", + "update-row", + "delete-row", + ] + assert [str(permission.resource) for permission in decision.permissions] == [ + "data/dogs", + "data/dogs", + "data/dogs", + ] + + +def test_decision_for_write_sql_operation_rejects_vacuum(): + decision = decision_for_write_sql_operation( + Operation("vacuum", "statement", None, None, None) + ) + + assert isinstance(decision, RejectWriteSqlOperation) + assert decision.message == "VACUUM is not allowed in user-supplied SQL" + + +def test_decision_for_write_sql_operation_reports_unsupported_functions(): + decision = decision_for_write_sql_operation( + Operation("function", "function", None, None, None, target="upper") + ) + + assert isinstance(decision, UnsupportedWriteSqlOperation) + assert decision.message == "Unsupported SQL operation: function function" From 51dab16149f8b345d46cf517fa03b95fc1028234 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 10:22:16 -0700 Subject: [PATCH 1205/1218] Allow SQL functions in SQL write queries Closes #2751 --- datasette/write_sql.py | 4 +- docs/authentication.rst | 2 +- docs/json_api.rst | 2 +- docs/sql_queries.rst | 2 +- tests/test_queries.py | 83 +++++++++++++++++++++++++++++++++++++---- tests/test_write_sql.py | 13 ++++++- 6 files changed, 91 insertions(+), 15 deletions(-) diff --git a/datasette/write_sql.py b/datasette/write_sql.py index 2e1b69af..cdc0c6d3 100644 --- a/datasette/write_sql.py +++ b/datasette/write_sql.py @@ -82,9 +82,7 @@ def decision_for_write_sql_operation( "Writes to shadow tables are not allowed in user-supplied SQL" ) if operation.operation == "function": - # SQL functions currently have no Datasette permission mapping. They are - # rejected by the user-supplied write SQL allow-list as unsupported. - return UnsupportedWriteSqlOperation(unsupported_message) + return IgnoreWriteSqlOperation("SQL function") if ( operation.operation == "read" and operation.target_type == "table" diff --git a/docs/authentication.rst b/docs/authentication.rst index f720c12f..a0891900 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1425,7 +1425,7 @@ See also :ref:`the default_allow_sql setting `. execute-write-sql ----------------- -Actor is allowed to run arbitrary writable SQL queries against a specific database, subject to table-level write permissions such as ``insert-row``, ``update-row`` and ``delete-row``. +Actor is allowed to run arbitrary writable SQL queries against a specific database, subject to table-level write permissions such as ``insert-row``, ``update-row`` and ``delete-row``. SQL functions are allowed and are not separately restricted by Datasette permissions. ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) diff --git a/docs/json_api.rst b/docs/json_api.rst index fffc16d7..d502299e 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -531,7 +531,7 @@ The request body must include a ``"sql"`` string. Named SQL parameters can be pr The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query API ` instead. -Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. +Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. SQL functions are allowed and are not separately restricted by Datasette permissions. A successful response includes a message, the SQLite ``rowcount`` and a summary of the operations that were executed: diff --git a/docs/sql_queries.rst b/docs/sql_queries.rst index f593a534..d427ea2b 100644 --- a/docs/sql_queries.rst +++ b/docs/sql_queries.rst @@ -140,7 +140,7 @@ Datasette stores both configured queries and user-created queries in the ``queri Stored queries created by users default to private. Private stored queries can only be viewed, updated or deleted by the actor that created them. Broad ``view-query``, ``update-query`` or ``delete-query`` permission grants still do not allow other actors to access another actor's private stored queries. -Stored queries created by users are untrusted. This means they execute using the permissions of the actor who runs them, as if that actor had pasted the SQL into the regular custom SQL interface or write SQL interface. Read-only stored queries require ``execute-sql``. Writable stored queries require ``execute-write-sql`` plus the relevant table-level write permissions. +Stored queries created by users are untrusted. This means they execute using the permissions of the actor who runs them, as if that actor had pasted the SQL into the regular custom SQL interface or write SQL interface. Read-only stored queries require ``execute-sql``. Writable stored queries require ``execute-write-sql`` plus the relevant table-level write permissions. SQL functions are allowed and are not separately restricted by Datasette permissions. .. _trusted_stored_queries: .. _trusted_saved_queries: diff --git a/tests/test_queries.py b/tests/test_queries.py index 73f8f3cf..9c3ebcc8 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1414,6 +1414,11 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): actor={"id": "root"}, params={"sql": "insert into dogs (name) values (:name)"}, ) + function_response = await ds.client.get( + "/data/-/execute-write/analyze", + actor={"id": "root"}, + params={"sql": "insert into dogs (name) values (upper(:name))"}, + ) read_only_response = await ds.client.get( "/data/-/execute-write/analyze", actor={"id": "root"}, @@ -1438,6 +1443,22 @@ async def test_execute_write_analyze_endpoint_uses_sql_only(): ] assert "params" not in data + assert function_response.status_code == 200 + function_data = function_response.json() + assert function_data["ok"] is True + assert function_data["parameters"] == ["name"] + assert function_data["execute_disabled"] is False + assert function_data["analysis_rows"] == [ + { + "operation": "insert", + "database": "data", + "table": "dogs", + "required_permission": "insert-row, update-row, delete-row", + "source": None, + "allowed": True, + } + ] + assert read_only_response.status_code == 200 read_only_data = read_only_response.json() assert read_only_data["ok"] is False @@ -1970,7 +1991,7 @@ async def test_execute_write_create_table_as_select_requires_view_table_on_sourc @pytest.mark.asyncio -async def test_execute_write_rejects_function_operations(): +async def test_execute_write_allows_function_operations(): ds = Datasette( memory=True, default_deny=True, @@ -1998,17 +2019,65 @@ async def test_execute_write_rejects_function_operations(): await db.execute_write("create table dogs (id integer primary key, name text)") await ds.invoke_startup() - denied_response = await ds.client.post( + response = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, json={"sql": "insert into dogs (name) values (upper('cleo'))"}, ) - assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Unsupported SQL operation: function function" - ] - assert (await db.execute("select name from dogs")).dicts() == [] + assert response.status_code == 200 + assert response.json()["ok"] is True + assert (await db.execute("select name from dogs")).dicts() == [{"name": "CLEO"}] + + +@pytest.mark.asyncio +async def test_untrusted_stored_write_query_allows_function_operations(): + ds = Datasette( + memory=True, + default_deny=True, + config={ + "databases": { + "data": { + "permissions": { + "view-database": {"id": "writer"}, + "view-query": {"id": "writer"}, + "execute-write-sql": {"id": "writer"}, + }, + "tables": { + "dogs": { + "permissions": { + "insert-row": {"id": "writer"}, + "update-row": {"id": "writer"}, + "delete-row": {"id": "writer"}, + } + } + }, + } + } + }, + ) + db = ds.add_memory_database("stored_query_function_operation", name="data") + await db.execute_write("create table dogs (id integer primary key, name text)") + await ds.invoke_startup() + await ds.add_query( + "data", + "insert_dog", + "insert into dogs (name) values (upper(:name))", + is_write=True, + is_trusted=False, + source="user", + owner_id="writer", + ) + + response = await ds.client.post( + "/data/insert_dog?_json=1", + actor={"id": "writer"}, + data={"name": "cleo"}, + ) + + assert response.status_code == 200 + assert response.json()["ok"] is True + assert (await db.execute("select name from dogs")).dicts() == [{"name": "CLEO"}] @pytest.mark.asyncio diff --git a/tests/test_write_sql.py b/tests/test_write_sql.py index cfaf0f53..6d95c3c4 100644 --- a/tests/test_write_sql.py +++ b/tests/test_write_sql.py @@ -50,10 +50,19 @@ def test_decision_for_write_sql_operation_rejects_vacuum(): assert decision.message == "VACUUM is not allowed in user-supplied SQL" -def test_decision_for_write_sql_operation_reports_unsupported_functions(): +def test_decision_for_write_sql_operation_ignores_functions(): decision = decision_for_write_sql_operation( Operation("function", "function", None, None, None, target="upper") ) + assert isinstance(decision, IgnoreWriteSqlOperation) + assert decision.reason == "SQL function" + + +def test_decision_for_write_sql_operation_reports_unsupported_operations(): + decision = decision_for_write_sql_operation( + Operation("unknown", "unknown", None, None, None) + ) + assert isinstance(decision, UnsupportedWriteSqlOperation) - assert decision.message == "Unsupported SQL operation: function function" + assert decision.message == "Unsupported SQL operation: unknown unknown" From b2b20b36c52ea446fb05fe688b636b83d187e6a6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 10:24:40 -0700 Subject: [PATCH 1206/1218] Document write SQL analyzer restrictions Expand the unreleased changelog with the deny-by-default operation analysis model, SQL function handling, and the VACUUM and virtual/shadow table restrictions for user-supplied write SQL. Clarify the /-/execute-write JSON API documentation with the same restrictions and DDL permission requirements. --- docs/changelog.rst | 2 ++ docs/json_api.rst | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 2ba713ee..a4be98b1 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -24,6 +24,8 @@ Write SQL UI - New "Write to this database" interface at ``//-/execute-write`` for running arbitrary writable SQL against mutable databases. The form extracts named parameters, analyzes the SQL, shows the table operations that will be attempted and links to a newly inserted row when a single-row insert succeeds. (:issue:`2742`) - Added the new :ref:`execute-write-sql ` permission for running arbitrary writable SQL. Execution is also gated by table-level permissions such as :ref:`insert-row `, :ref:`update-row ` and :ref:`delete-row `, and writes to attached databases are rejected. (:issue:`2742`) +- The write SQL analyzer now uses a deny-by-default model for unsupported operations. Reads from source tables require :ref:`view-table ` permission, schema changes require :ref:`create-table `, :ref:`alter-table ` or :ref:`drop-table ` as appropriate, and row mutation statements require the full ``insert-row``, ``update-row`` and ``delete-row`` permission set. SQL functions are allowed and are not separately permission-gated. (:issue:`2748`) +- User-supplied write SQL now rejects ``VACUUM`` and writes to SQLite virtual tables or shadow tables. These restrictions also apply to untrusted stored write queries; trusted configured stored queries continue to skip these filters. (:issue:`2748`) Plugin API changes ~~~~~~~~~~~~~~~~~~ diff --git a/docs/json_api.rst b/docs/json_api.rst index d502299e..db19afc2 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -531,7 +531,9 @@ The request body must include a ``"sql"`` string. Named SQL parameters can be pr The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query API ` instead. -Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. SQL functions are allowed and are not separately restricted by Datasette permissions. +Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. Schema changes require ``create-table``, ``alter-table`` or ``drop-table`` permissions as appropriate. + +Unsupported SQL operations are rejected by default. ``VACUUM`` is not allowed in arbitrary write SQL, and writes to SQLite virtual tables or shadow tables are rejected. SQL functions are allowed and are not separately restricted by Datasette permissions. A successful response includes a message, the SQLite ``rowcount`` and a summary of the operations that were executed: From cbe9594a3dcac1f91a6baa7ac99a138c22a71a8a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 11:00:04 -0700 Subject: [PATCH 1207/1218] Use SQLiteTableType directly in SQL analysis Remove the redundant SQLTableKind alias from the write SQL analysis model. Operation.table_kind and the analyzer cache now use the SQLite metadata classification type directly, making the source of table-kind values clearer. --- datasette/utils/sql_analysis.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/datasette/utils/sql_analysis.py b/datasette/utils/sql_analysis.py index b5d7ada8..0a3a947c 100644 --- a/datasette/utils/sql_analysis.py +++ b/datasette/utils/sql_analysis.py @@ -42,7 +42,6 @@ SQLTargetType = Literal[ SQLTableOperation = Literal["read", "insert", "update", "delete"] SQLSchemaOperation = Literal["create", "drop"] SQLSchemaTargetType = Literal["index", "table", "trigger", "view", "virtual-table"] -SQLTableKind = SQLiteTableType @dataclass(frozen=True) @@ -52,7 +51,7 @@ class Operation: database: str | None table: str | None sqlite_schema: str | None - table_kind: SQLTableKind | None = None + table_kind: SQLiteTableType | None = None target: str | None = None columns: tuple[str, ...] = () source: str | None = None @@ -428,7 +427,7 @@ def analyze_sql_tables( ) return sqlite3.SQLITE_OK - table_kind_cache: dict[tuple[str | None, str], SQLTableKind | None] = {} + table_kind_cache: dict[tuple[str | None, str], SQLiteTableType | None] = {} conn.set_authorizer(authorizer) try: @@ -523,7 +522,7 @@ def analyze_sql_tables( return True return False - def table_kind_for(key: OperationKey) -> SQLTableKind | None: + def table_kind_for(key: OperationKey) -> SQLiteTableType | None: if ( key.target_type != "table" or key.operation not in {"read", "insert", "update", "delete"} From 17f45b884b4b4844e9f0cce0fef402e888c690f0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 12:06:57 -0700 Subject: [PATCH 1208/1218] Clarify ignored write SQL operation tests Split the combined ignored-operation decision test into separate internal-operation and select-statement cases. Assert the decision reason for each case instead of checking the shared base class, so the tests document why those operations are ignored. --- tests/test_write_sql.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/test_write_sql.py b/tests/test_write_sql.py index 6d95c3c4..75d6b6e1 100644 --- a/tests/test_write_sql.py +++ b/tests/test_write_sql.py @@ -4,23 +4,26 @@ from datasette.write_sql import ( RejectWriteSqlOperation, RequireWriteSqlPermissions, UnsupportedWriteSqlOperation, - WriteSqlOperationDecision, decision_for_write_sql_operation, ) -def test_decision_for_write_sql_operation_ignores_internal_and_select_operations(): - internal_decision = decision_for_write_sql_operation( +def test_decision_for_write_sql_operation_ignores_internal_operations(): + decision = decision_for_write_sql_operation( Operation("read", "schema", None, None, "main", internal=True) ) - select_decision = decision_for_write_sql_operation( + + assert isinstance(decision, IgnoreWriteSqlOperation) + assert decision.reason == "internal SQLite operation" + + +def test_decision_for_write_sql_operation_ignores_select_statement_operations(): + decision = decision_for_write_sql_operation( Operation("select", "statement", None, None, None) ) - assert isinstance(internal_decision, IgnoreWriteSqlOperation) - assert isinstance(internal_decision, WriteSqlOperationDecision) - assert isinstance(select_decision, IgnoreWriteSqlOperation) - assert isinstance(select_decision, WriteSqlOperationDecision) + assert isinstance(decision, IgnoreWriteSqlOperation) + assert decision.reason == "select statement" def test_decision_for_write_sql_operation_requires_table_write_permissions(): From 0b7c26c6c8bf4827c02aba9707b1db0eb63aeaa5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 12:09:02 -0700 Subject: [PATCH 1209/1218] Refactored write decision tests --- tests/test_write_sql.py | 71 ---------------- tests/test_write_sql_operation_decisions.py | 94 +++++++++++++++++++++ 2 files changed, 94 insertions(+), 71 deletions(-) delete mode 100644 tests/test_write_sql.py create mode 100644 tests/test_write_sql_operation_decisions.py diff --git a/tests/test_write_sql.py b/tests/test_write_sql.py deleted file mode 100644 index 75d6b6e1..00000000 --- a/tests/test_write_sql.py +++ /dev/null @@ -1,71 +0,0 @@ -from datasette.utils.sql_analysis import Operation -from datasette.write_sql import ( - IgnoreWriteSqlOperation, - RejectWriteSqlOperation, - RequireWriteSqlPermissions, - UnsupportedWriteSqlOperation, - decision_for_write_sql_operation, -) - - -def test_decision_for_write_sql_operation_ignores_internal_operations(): - decision = decision_for_write_sql_operation( - Operation("read", "schema", None, None, "main", internal=True) - ) - - assert isinstance(decision, IgnoreWriteSqlOperation) - assert decision.reason == "internal SQLite operation" - - -def test_decision_for_write_sql_operation_ignores_select_statement_operations(): - decision = decision_for_write_sql_operation( - Operation("select", "statement", None, None, None) - ) - - assert isinstance(decision, IgnoreWriteSqlOperation) - assert decision.reason == "select statement" - - -def test_decision_for_write_sql_operation_requires_table_write_permissions(): - decision = decision_for_write_sql_operation( - Operation("insert", "table", "data", "dogs", None) - ) - - assert isinstance(decision, RequireWriteSqlPermissions) - assert [permission.action for permission in decision.permissions] == [ - "insert-row", - "update-row", - "delete-row", - ] - assert [str(permission.resource) for permission in decision.permissions] == [ - "data/dogs", - "data/dogs", - "data/dogs", - ] - - -def test_decision_for_write_sql_operation_rejects_vacuum(): - decision = decision_for_write_sql_operation( - Operation("vacuum", "statement", None, None, None) - ) - - assert isinstance(decision, RejectWriteSqlOperation) - assert decision.message == "VACUUM is not allowed in user-supplied SQL" - - -def test_decision_for_write_sql_operation_ignores_functions(): - decision = decision_for_write_sql_operation( - Operation("function", "function", None, None, None, target="upper") - ) - - assert isinstance(decision, IgnoreWriteSqlOperation) - assert decision.reason == "SQL function" - - -def test_decision_for_write_sql_operation_reports_unsupported_operations(): - decision = decision_for_write_sql_operation( - Operation("unknown", "unknown", None, None, None) - ) - - assert isinstance(decision, UnsupportedWriteSqlOperation) - assert decision.message == "Unsupported SQL operation: unknown unknown" diff --git a/tests/test_write_sql_operation_decisions.py b/tests/test_write_sql_operation_decisions.py new file mode 100644 index 00000000..cc19f701 --- /dev/null +++ b/tests/test_write_sql_operation_decisions.py @@ -0,0 +1,94 @@ +import pytest + +from datasette.utils.sql_analysis import Operation +from datasette.write_sql import ( + IgnoreWriteSqlOperation, + RejectWriteSqlOperation, + RequireWriteSqlPermissions, + UnsupportedWriteSqlOperation, + decision_for_write_sql_operation, +) + + +@pytest.mark.parametrize( + ("operation", "reason"), + ( + pytest.param( + Operation("read", "schema", None, None, "main", internal=True), + "internal SQLite operation", + id="internal", + ), + pytest.param( + Operation("select", "statement", None, None, None), + "select statement", + id="select-statement", + ), + pytest.param( + Operation("function", "function", None, None, None, target="upper"), + "SQL function", + id="function", + ), + ), +) +def test_decision_for_write_sql_operation_ignores_operations(operation, reason): + decision = decision_for_write_sql_operation(operation) + + assert isinstance(decision, IgnoreWriteSqlOperation) + assert decision.reason == reason + + +@pytest.mark.parametrize("operation", ("insert", "update")) +def test_decision_for_write_sql_operation_requires_table_write_permissions(operation): + decision = decision_for_write_sql_operation( + Operation(operation, "table", "data", "dogs", None) + ) + + assert isinstance(decision, RequireWriteSqlPermissions) + assert [permission.action for permission in decision.permissions] == [ + "insert-row", + "update-row", + "delete-row", + ] + assert [str(permission.resource) for permission in decision.permissions] == [ + "data/dogs", + "data/dogs", + "data/dogs", + ] + + +@pytest.mark.parametrize( + ("operation", "message"), + ( + pytest.param( + Operation("vacuum", "statement", None, None, None), + "VACUUM is not allowed in user-supplied SQL", + id="vacuum", + ), + pytest.param( + Operation("insert", "table", "data", "docs", None, table_kind="virtual"), + "Writes to virtual tables are not allowed in user-supplied SQL", + id="virtual-table", + ), + pytest.param( + Operation( + "insert", "table", "data", "docs_data", None, table_kind="shadow" + ), + "Writes to shadow tables are not allowed in user-supplied SQL", + id="shadow-table", + ), + ), +) +def test_decision_for_write_sql_operation_rejects_operations(operation, message): + decision = decision_for_write_sql_operation(operation) + + assert isinstance(decision, RejectWriteSqlOperation) + assert decision.message == message + + +def test_decision_for_write_sql_operation_reports_unsupported_operations(): + decision = decision_for_write_sql_operation( + Operation("unknown", "unknown", None, None, None) + ) + + assert isinstance(decision, UnsupportedWriteSqlOperation) + assert decision.message == "Unsupported SQL operation: unknown unknown" From cd838daef4d066e584b047164d8e2a5e96909511 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 15:22:21 -0700 Subject: [PATCH 1210/1218] Refactor tests a bit --- tests/test_queries.py | 449 +++++++++++++++++++++--------------------- 1 file changed, 225 insertions(+), 224 deletions(-) diff --git a/tests/test_queries.py b/tests/test_queries.py index 9c3ebcc8..216cb211 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1700,8 +1700,22 @@ async def test_execute_write_post_requires_database_and_table_permissions(): assert (await db.execute("select name from dogs")).first()[0] == "Cleo" +@pytest.mark.parametrize( + "database_name, sql", + ( + ( + "execute_write_insert_or_replace", + "insert or replace into users(id, email) values (3, 'b@example.com')", + ), + ( + "execute_write_update_or_replace", + "update or replace users set email = 'b@example.com' where id = 1", + ), + ), + ids=("insert-or-replace", "update-or-replace"), +) @pytest.mark.asyncio -async def test_execute_write_insert_or_replace_requires_delete_row_permission(): +async def test_execute_write_replace_requires_delete_row_permission(database_name, sql): ds = Datasette( memory=True, default_deny=True, @@ -1725,7 +1739,7 @@ async def test_execute_write_insert_or_replace_requires_delete_row_permission(): } }, ) - db = ds.add_memory_database("execute_write_insert_or_replace", name="data") + db = ds.add_memory_database(database_name, name="data") await db.execute_write( "create table users (id integer primary key, email text unique)" ) @@ -1738,64 +1752,7 @@ async def test_execute_write_insert_or_replace_requires_delete_row_permission(): denied_response = await ds.client.post( "/data/-/execute-write", actor={"id": "writer"}, - json={ - "sql": ( - "insert or replace into users(id, email) " "values (3, 'b@example.com')" - ) - }, - ) - - assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Permission denied: need delete-row on data/users" - ] - assert (await db.execute("select id, email from users order by id")).dicts() == [ - {"id": 1, "email": "a@example.com"}, - {"id": 2, "email": "b@example.com"}, - ] - - -@pytest.mark.asyncio -async def test_execute_write_update_or_replace_requires_delete_row_permission(): - ds = Datasette( - memory=True, - default_deny=True, - config={ - "databases": { - "data": { - "permissions": { - "view-database": {"id": "writer"}, - "execute-write-sql": {"id": "writer"}, - }, - "tables": { - "users": { - "permissions": { - "insert-row": {"id": "writer"}, - "update-row": {"id": "writer"}, - "view-table": {"id": "writer"}, - } - } - }, - } - } - }, - ) - db = ds.add_memory_database("execute_write_update_or_replace", name="data") - await db.execute_write( - "create table users (id integer primary key, email text unique)" - ) - await db.execute_write( - "insert into users (id, email) values " - "(1, 'a@example.com'), (2, 'b@example.com')" - ) - await ds.invoke_startup() - - denied_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "writer"}, - json={ - "sql": "update or replace users set email = 'b@example.com' where id = 1" - }, + json={"sql": sql}, ) assert denied_response.status_code == 403 @@ -2262,74 +2219,71 @@ async def test_trusted_stored_write_query_skips_vacuum_filtering(): assert response.json()["ok"] is True +@pytest.mark.parametrize( + ( + "database_name", + "setup_sqls", + "write_sql", + "expected_error", + "verification_sql", + "expected_count", + ), + ( + ( + "execute_write_virtual_table_control", + ( + "create virtual table docs using fts5(title, body, content='')", + "insert into docs(rowid, title, body) values (1, 'hello', 'world')", + ), + "insert into docs(docs) values('delete-all')", + "Writes to virtual tables are not allowed in user-supplied SQL", + "select count(*) from docs where docs match 'hello'", + 1, + ), + ( + "execute_write_virtual_table_insert", + ("create virtual table docs using fts5(title, body)",), + "insert into docs(rowid, title, body) values (1, 'a', 'b')", + "Writes to virtual tables are not allowed in user-supplied SQL", + "select count(*) from docs", + 0, + ), + ( + "execute_write_shadow_table_insert", + ("create virtual table docs using fts5(title, body)",), + "insert into docs_config(k, v) values ('x', 1)", + "Writes to shadow tables are not allowed in user-supplied SQL", + "select count(*) from docs_config", + 1, + ), + ), + ids=("control-insert", "virtual-table", "shadow-table"), +) @pytest.mark.asyncio -async def test_execute_write_rejects_virtual_table_control_insert(): +async def test_execute_write_rejects_virtual_and_shadow_table_writes( + database_name, + setup_sqls, + write_sql, + expected_error, + verification_sql, + expected_count, +): ds = Datasette(memory=True, default_deny=True) ds.root_enabled = True - db = ds.add_memory_database("execute_write_virtual_table_control", name="data") - await db.execute_write(""" - create virtual table docs using fts5(title, body, content='') - """) - await db.execute_write(""" - insert into docs(rowid, title, body) values (1, 'hello', 'world') - """) + db = ds.add_memory_database(database_name, name="data") + for setup_sql in setup_sqls: + await db.execute_write(setup_sql) await ds.invoke_startup() denied_response = await ds.client.post( "/data/-/execute-write", actor={"id": "root"}, - json={"sql": "insert into docs(docs) values('delete-all')"}, + json={"sql": write_sql}, ) assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Writes to virtual tables are not allowed in user-supplied SQL" - ] - assert ( - await db.execute("select count(*) from docs where docs match 'hello'") - ).first()[0] == 1 - - -@pytest.mark.asyncio -async def test_execute_write_rejects_regular_virtual_table_insert(): - ds = Datasette(memory=True, default_deny=True) - ds.root_enabled = True - db = ds.add_memory_database("execute_write_virtual_table_insert", name="data") - await db.execute_write("create virtual table docs using fts5(title, body)") - await ds.invoke_startup() - - denied_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "root"}, - json={"sql": "insert into docs(rowid, title, body) values (1, 'a', 'b')"}, - ) - - assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Writes to virtual tables are not allowed in user-supplied SQL" - ] - assert (await db.execute("select count(*) from docs")).first()[0] == 0 - - -@pytest.mark.asyncio -async def test_execute_write_rejects_shadow_table_insert(): - ds = Datasette(memory=True, default_deny=True) - ds.root_enabled = True - db = ds.add_memory_database("execute_write_shadow_table_insert", name="data") - await db.execute_write("create virtual table docs using fts5(title, body)") - await ds.invoke_startup() - - denied_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "root"}, - json={"sql": "insert into docs_config(k, v) values ('x', 1)"}, - ) - - assert denied_response.status_code == 403 - assert denied_response.json()["errors"] == [ - "Writes to shadow tables are not allowed in user-supplied SQL" - ] - assert (await db.execute("select count(*) from docs_config")).first()[0] == 1 + assert denied_response.json()["errors"] == [expected_error] + assert (await db.execute(verification_sql)).first()[0] == expected_count @pytest.mark.asyncio @@ -2482,8 +2436,69 @@ async def test_execute_write_create_table_uses_create_table_permission(): assert not await db.table_exists("should_not_exist") +@pytest.mark.parametrize( + ( + "database_name", + "allowed_actor", + "allowed_sql", + "denied_sql", + "expected_error", + "setup_sqls", + "expected_state", + ), + ( + ( + "execute_write_alter_table", + "alterer", + "alter table dogs add column age integer", + "alter table cats add column age integer", + "Permission denied: need alter-table on data/cats", + (), + "alter-table", + ), + ( + "execute_write_create_index", + "alterer", + "create index idx_dogs_name on dogs(name)", + "create index idx_cats_name on cats(name)", + "Permission denied: need alter-table on data/cats", + (), + "create-index", + ), + ( + "execute_write_drop_index", + "alterer", + "drop index idx_dogs_name", + "drop index idx_cats_name", + "Permission denied: need alter-table on data/cats", + ( + "create index idx_dogs_name on dogs(name)", + "create index idx_cats_name on cats(name)", + ), + "drop-index", + ), + ( + "execute_write_drop_table", + "dropper", + "drop table dogs", + "drop table cats", + "Permission denied: need drop-table on data/cats", + (), + "drop-table", + ), + ), + ids=("alter-table", "create-index", "drop-index", "drop-table"), +) @pytest.mark.asyncio -async def test_execute_write_alter_and_drop_table_use_schema_permissions(): +async def test_execute_write_schema_operations_use_schema_permissions( + database_name, + allowed_actor, + allowed_sql, + denied_sql, + expected_error, + setup_sqls, + expected_state, +): ds = Datasette( memory=True, default_deny=True, @@ -2513,73 +2528,53 @@ async def test_execute_write_alter_and_drop_table_use_schema_permissions(): }, }, ) - db = ds.add_memory_database("execute_write_alter_drop_table", name="data") + db = ds.add_memory_database(database_name, name="data") await db.execute_write("create table dogs (id integer primary key, name text)") await db.execute_write("create table cats (id integer primary key, name text)") + for setup_sql in setup_sqls: + await db.execute_write(setup_sql) await ds.invoke_startup() - alter_allowed_response = await ds.client.post( + async def index_exists(index_name): + row = ( + await db.execute( + "select 1 from sqlite_master where type = 'index' and name = ?", + [index_name], + ) + ).first() + return row is not None + + allowed_response = await ds.client.post( "/data/-/execute-write", - actor={"id": "alterer"}, - json={"sql": "alter table dogs add column age integer"}, + actor={"id": allowed_actor}, + json={"sql": allowed_sql}, ) - alter_row_permission_response = await ds.client.post( + denied_response = await ds.client.post( "/data/-/execute-write", actor={"id": "row-writer"}, - json={"sql": "alter table cats add column age integer"}, + json={"sql": denied_sql}, ) - assert alter_allowed_response.status_code == 200 - assert "age" in [column.name for column in await db.table_column_details("dogs")] - assert alter_row_permission_response.status_code == 403 - assert alter_row_permission_response.json()["errors"] == [ - "Permission denied: need alter-table on data/cats" - ] - assert "age" not in [ - column.name for column in await db.table_column_details("cats") - ] + assert allowed_response.status_code == 200 + assert denied_response.status_code == 403 + assert denied_response.json()["errors"] == [expected_error] - create_index_allowed_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "alterer"}, - json={"sql": "create index idx_dogs_name on dogs(name)"}, - ) - create_index_row_permission_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "row-writer"}, - json={"sql": "create index idx_cats_name on cats(name)"}, - ) - drop_index_allowed_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "alterer"}, - json={"sql": "drop index idx_dogs_name"}, - ) - - assert create_index_allowed_response.status_code == 200 - assert create_index_row_permission_response.status_code == 403 - assert create_index_row_permission_response.json()["errors"] == [ - "Permission denied: need alter-table on data/cats" - ] - assert drop_index_allowed_response.status_code == 200 - - drop_allowed_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "dropper"}, - json={"sql": "drop table dogs"}, - ) - drop_row_permission_response = await ds.client.post( - "/data/-/execute-write", - actor={"id": "row-writer"}, - json={"sql": "drop table cats"}, - ) - - assert drop_allowed_response.status_code == 200 - assert not await db.table_exists("dogs") - assert drop_row_permission_response.status_code == 403 - assert drop_row_permission_response.json()["errors"] == [ - "Permission denied: need drop-table on data/cats" - ] - assert await db.table_exists("cats") + if expected_state == "alter-table": + assert "age" in [ + column.name for column in await db.table_column_details("dogs") + ] + assert "age" not in [ + column.name for column in await db.table_column_details("cats") + ] + elif expected_state == "create-index": + assert await index_exists("idx_dogs_name") + assert not await index_exists("idx_cats_name") + elif expected_state == "drop-index": + assert not await index_exists("idx_dogs_name") + assert await index_exists("idx_cats_name") + elif expected_state == "drop-table": + assert not await db.table_exists("dogs") + assert await db.table_exists("cats") @pytest.mark.asyncio @@ -2644,8 +2639,9 @@ async def test_execute_write_post_rejects_read_only_sql(): ] +@pytest.mark.parametrize("action", ("view-query", "update-query", "delete-query")) @pytest.mark.asyncio -async def test_query_owner_gets_update_delete_and_writable_view_defaults(): +async def test_query_owner_gets_update_delete_and_writable_view_defaults(action): ds = Datasette(memory=True, default_deny=True) ds.add_memory_database("query_owner_defaults", name="data") await ds.invoke_startup() @@ -2658,21 +2654,35 @@ async def test_query_owner_gets_update_delete_and_writable_view_defaults(): owner_id="alice", ) - for action in ("view-query", "update-query", "delete-query"): - assert await ds.allowed( - action=action, - resource=QueryResource("data", "insert_dog"), - actor={"id": "alice"}, - ) - assert not await ds.allowed( - action=action, - resource=QueryResource("data", "insert_dog"), - actor={"id": "bob"}, - ) + assert await ds.allowed( + action=action, + resource=QueryResource("data", "insert_dog"), + actor={"id": "alice"}, + ) + assert not await ds.allowed( + action=action, + resource=QueryResource("data", "insert_dog"), + actor={"id": "bob"}, + ) +@pytest.mark.parametrize( + "action, path_suffix, request_json, expected_public_title", + ( + ( + "update-query", + "-/update", + {"update": {"title": "Bob can edit public queries"}}, + "Bob can edit public queries", + ), + ("delete-query", "-/delete", {}, None), + ), + ids=("update-query", "delete-query"), +) @pytest.mark.asyncio -async def test_private_query_restricts_broad_update_delete_permissions(): +async def test_private_query_restricts_broad_update_delete_permissions( + action, path_suffix, request_json, expected_public_title +): ds = Datasette( memory=True, default_deny=True, @@ -2706,50 +2716,41 @@ async def test_private_query_restricts_broad_update_delete_permissions(): owner_id="alice", ) - for action in ("update-query", "delete-query"): - assert await ds.allowed( - action=action, - resource=QueryResource("data", "alice_private"), - actor={"id": "alice"}, - ) - assert not await ds.allowed( - action=action, - resource=QueryResource("data", "alice_private"), - actor={"id": "bob"}, - ) - assert await ds.allowed( - action=action, - resource=QueryResource("data", "alice_public"), - actor={"id": "bob"}, - ) - - private_update_response = await ds.client.post( - "/data/alice_private/-/update", - actor={"id": "bob"}, - json={"update": {"title": "Nope"}}, + assert await ds.allowed( + action=action, + resource=QueryResource("data", "alice_private"), + actor={"id": "alice"}, ) - private_delete_response = await ds.client.post( - "/data/alice_private/-/delete", + assert not await ds.allowed( + action=action, + resource=QueryResource("data", "alice_private"), actor={"id": "bob"}, - json={}, ) - public_update_response = await ds.client.post( - "/data/alice_public/-/update", + assert await ds.allowed( + action=action, + resource=QueryResource("data", "alice_public"), actor={"id": "bob"}, - json={"update": {"title": "Bob can edit public queries"}}, - ) - public_delete_response = await ds.client.post( - "/data/alice_public/-/delete", - actor={"id": "bob"}, - json={}, ) - assert private_update_response.status_code == 403 - assert private_delete_response.status_code == 403 - assert public_update_response.status_code == 200 - assert public_delete_response.status_code == 200 + private_response = await ds.client.post( + "/data/alice_private/{}".format(path_suffix), + actor={"id": "bob"}, + json=request_json, + ) + public_response = await ds.client.post( + "/data/alice_public/{}".format(path_suffix), + actor={"id": "bob"}, + json=request_json, + ) + + assert private_response.status_code == 403 + assert public_response.status_code == 200 assert await ds.get_query("data", "alice_private") is not None - assert await ds.get_query("data", "alice_public") is None + public_query = await ds.get_query("data", "alice_public") + if expected_public_title is None: + assert public_query is None + else: + assert public_query.title == expected_public_title @pytest.mark.asyncio From b6e9b189905f6a03136e5998fdf39e1944a1e2a8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 15:37:48 -0700 Subject: [PATCH 1211/1218] datasette.yml can no longer set a query to private Private means it has an owner, and the config does not let you say who the owner is - plus configured queries should not be possible to edit or delete in the UI so having an owner makes even less sense. You can still make configured queries visible to specific people using regular view-query permissions. --- datasette/stored_queries.py | 1 - tests/test_queries.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/datasette/stored_queries.py b/datasette/stored_queries.py index b6ac49b8..a6123daa 100644 --- a/datasette/stored_queries.py +++ b/datasette/stored_queries.py @@ -109,7 +109,6 @@ async def save_queries_from_config(datasette: Any) -> None: fragment=query_config.get("fragment"), parameters=query_config.get("params"), is_write=bool(query_config.get("write")), - is_private=bool(query_config.get("is_private")), is_trusted=bool(query_config.get("is_trusted", True)), source="config", on_success_message=query_config.get("on_success_message"), diff --git a/tests/test_queries.py b/tests/test_queries.py index 216cb211..2aa5142b 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -191,6 +191,8 @@ async def test_config_queries_imported_to_internal_table(): "title": "Configured query", "description_html": "

Configured HTML

", "params": ["name"], + # Configured queries are always public; this is ignored. + "is_private": True, "on_success_message_sql": "select 'Hello ' || :name", } } From 74324cb8492be8aa8597e58fb6f690158128e6fc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 15:46:27 -0700 Subject: [PATCH 1212/1218] Improved docs for user-facing SQL query pages - /database-name/-/execute-write - /-/queries --- docs/authentication.rst | 4 ++-- docs/pages.rst | 27 +++++++++++++++++++++++++++ docs/sql_queries.rst | 2 ++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/docs/authentication.rst b/docs/authentication.rst index a0891900..5d831da0 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1413,7 +1413,7 @@ Actor is allowed to drop a database table. execute-sql ----------- -Actor is allowed to run arbitrary read-only SQL queries against a specific database, e.g. https://latest.datasette.io/fixtures/-/query?sql=select+100 +Actor is allowed to run arbitrary read-only SQL queries against a specific database using the :ref:`custom SQL query page `, e.g. https://latest.datasette.io/fixtures/-/query?sql=select+100 ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) @@ -1425,7 +1425,7 @@ See also :ref:`the default_allow_sql setting `. execute-write-sql ----------------- -Actor is allowed to run arbitrary writable SQL queries against a specific database, subject to table-level write permissions such as ``insert-row``, ``update-row`` and ``delete-row``. SQL functions are allowed and are not separately restricted by Datasette permissions. +Actor is allowed to run arbitrary writable SQL queries against a specific database using the :ref:`write SQL queries page `, subject to table-level write permissions such as ``insert-row``, ``update-row`` and ``delete-row``. SQL functions are allowed and are not separately restricted by Datasette permissions. ``resource`` - ``datasette.resources.DatabaseResource(database)`` ``database`` is the name of the database (string) diff --git a/docs/pages.rst b/docs/pages.rst index e57c15e6..a8ff7c37 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -62,6 +62,11 @@ The following tables are hidden by default: Queries ======= +.. _pages_custom_sql_queries: + +Custom SQL queries +------------------ + The ``/database-name/-/query`` page can be used to execute an arbitrary SQL query against that database, if the :ref:`actions_execute_sql` permission is enabled. This query is passed as the ``?sql=`` query string parameter. This means you can link directly to a query by constructing the following URL: @@ -72,6 +77,28 @@ Each configured :ref:`stored query ` has its own page, at ``/dat In both cases adding a ``.json`` extension to the URL will return the results as JSON. +.. _pages_execute_write: + +Write SQL queries +----------------- + +The ``/database-name/-/execute-write`` page can be used to execute SQL statements that write to a mutable database, if the :ref:`actions_execute_write_sql` permission is enabled. + +This page extracts named parameters from the SQL, shows the tables that will be affected and lists the permissions required before the query can be executed. It also includes templates for common ``INSERT``, ``UPDATE`` and ``DELETE`` statements. + +Datasette checks additional permissions based on the operations in the SQL. Row changes require the relevant table-level permissions such as :ref:`actions_insert_row`, :ref:`actions_update_row` and :ref:`actions_delete_row`; reads from source tables require :ref:`actions_view_table`; and schema changes require permissions such as :ref:`actions_create_table`, :ref:`actions_alter_table` or :ref:`actions_drop_table`. + +Use the :ref:`ExecuteWriteView` JSON API to execute writable SQL programmatically. + +.. _pages_stored_query_browser: + +Stored query browsers +--------------------- + +The ``/-/queries`` page lists stored queries across every database visible to the current actor. The ``/database-name/-/queries`` page lists stored queries for a single database. + +These pages support search, pagination and filters for read-only or writable queries and private or public queries. Adding a ``.json`` extension to either URL returns the same list as JSON. + .. _TableView: Table diff --git a/docs/sql_queries.rst b/docs/sql_queries.rst index d427ea2b..c0ba67f0 100644 --- a/docs/sql_queries.rst +++ b/docs/sql_queries.rst @@ -7,6 +7,8 @@ Datasette treats SQLite database files as read-only and immutable. This means it The easiest way to execute custom SQL against Datasette is through the web UI. The database index page includes a SQL editor that lets you run any SELECT query you like. You can also construct queries using the filter interface on the tables page, then click "View and edit SQL" to open that query in the custom SQL editor. +For mutable databases, actors with the appropriate permissions can use the :ref:`write SQL page ` to execute SQL statements that insert, update or delete rows. + Note that this interface is only available if the :ref:`actions_execute_sql` permission is allowed. See :ref:`authentication_permissions_execute_sql`. Any Datasette SQL query is reflected in the URL of the page, allowing you to bookmark them, share them with others and navigate through previous queries using your browser back button. From 6a998610eef6e69d439a654dd31087023d285452 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 15:52:51 -0700 Subject: [PATCH 1213/1218] datasette inspect now counts 10,000+ tables correctly (#2752) Closes #2712 Refs https://github.com/simonw/datasette/pull/2721#issuecomment-4568966383 --- datasette/cli.py | 7 ++++--- docs/changelog.rst | 1 + tests/test_cli.py | 18 +++++++++++++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/datasette/cli.py b/datasette/cli.py index 93aa22ef..90a33e80 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -21,6 +21,7 @@ from .app import ( SQLITE_LIMIT_ATTACHED, pm, ) +from .inspect import inspect_tables from .utils import ( LoadExtension, StartupError, @@ -154,14 +155,14 @@ async def inspect_(files, sqlite_extensions): app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions) data = {} for name, database in app.databases.items(): - counts = await database.table_counts(limit=3600 * 1000) + tables = await database.execute_fn(lambda conn: inspect_tables(conn, {})) data[name] = { "hash": database.hash, "size": database.size, "file": database.path, "tables": { - table_name: {"count": table_count} - for table_name, table_count in counts.items() + table_name: {"count": table["count"]} + for table_name, table in tables.items() }, } return data diff --git a/docs/changelog.rst b/docs/changelog.rst index a4be98b1..3882cc12 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -37,6 +37,7 @@ Bug fixes ~~~~~~~~~ - Fixed a bug where visiting ``//-/query`` without a ``?sql=`` parameter returned a 500 error. (:issue:`2743`) +- The ``datasette inspect`` command now correctly records row counts for tables with more than 10,000 rows. (:issue:`2712`) .. _v1_0_a30: diff --git a/tests/test_cli.py b/tests/test_cli.py index 1d3a2b28..f86d6909 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -35,12 +35,28 @@ def test_inspect_cli(app_client): assert expected_count == database["tables"][table_name]["count"] +def test_inspect_cli_counts_all_rows(tmp_path): + db_path = tmp_path / "big.db" + conn = sqlite3.connect(db_path) + with conn: + conn.execute("create table t (id integer primary key)") + conn.executemany("insert into t (id) values (?)", ((i,) for i in range(10002))) + conn.close() + + runner = CliRunner() + result = runner.invoke(cli, ["inspect", str(db_path)]) + assert result.exit_code == 0, result.output + data = json.loads(result.output) + + assert data["big"]["tables"]["t"]["count"] == 10002 + + def test_inspect_cli_writes_to_file(app_client): runner = CliRunner() result = runner.invoke( cli, ["inspect", "fixtures.db", "--inspect-file", "foo.json"] ) - assert 0 == result.exit_code, result.output + assert result.exit_code == 0, result.output with open("foo.json") as fp: data = json.load(fp) assert ["fixtures"] == list(data.keys()) From e5b6166fa35558920342e74f5ec13078957e87bf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 16:19:39 -0700 Subject: [PATCH 1214/1218] Nicer UI around Execute Write SQL denied Refs https://github.com/simonw/datasette/issues/2753#issuecomment-4569117665 --- datasette/templates/execute_write.html | 82 ++++++++++++++++++++------ datasette/views/execute_write.py | 17 +++--- datasette/views/query_helpers.py | 20 +++++-- tests/test_queries.py | 75 ++++++++++++++++++++++- 4 files changed, 160 insertions(+), 34 deletions(-) diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index 6b626f8d..ee251111 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -40,6 +40,26 @@ border-radius: 0.25rem; min-width: 13rem; } +.execute-write-submit-row { + align-items: center; + display: flex; + flex-wrap: wrap; + gap: 0.45rem 0.75rem; +} +.execute-write-submit-row [hidden] { + display: none; +} +form.sql.core input[data-execute-write-submit]:disabled { + background: #d0d7de; + border-color: #b6c0cc; + color: #5f6975; + cursor: not-allowed; + opacity: 1; +} +.execute-write-disabled-reason { + color: #4f5b6d; + font-size: 0.85rem; +} {% include "_execute_write_analysis_styles.html" %} {% include "_sql_parameter_styles.html" %} @@ -119,9 +139,10 @@ {% endif %}
-

- - {% if save_query_base_url %}Save this query{% endif %} +

+ + {{ execute_disabled_reason or "" }} + {% if save_query_url %}Save this query{% endif %}

@@ -143,25 +164,55 @@ window.addEventListener("DOMContentLoaded", () => { const submitButton = form ? form.querySelector("[data-execute-write-submit]") : null; - const saveQueryLink = form + const submitDisabledReason = form + ? form.querySelector("[data-execute-write-disabled-reason]") + : null; + const submitRow = form + ? form.querySelector(".execute-write-submit-row") + : null; + let saveQueryLink = form ? form.querySelector("[data-save-query-link]") : null; + function updateSubmitState(data) { + if (submitButton) { + submitButton.disabled = data.execute_disabled; + } + if (!submitDisabledReason) { + return; + } + const reason = data.execute_disabled_reason || ""; + submitDisabledReason.textContent = reason; + submitDisabledReason.hidden = !reason; + } + function updateSaveQueryLink(data) { - if (!saveQueryLink) { + if (!submitRow || !submitRow.dataset.saveQueryBaseUrl) { return; } const sql = window.editor ? window.editor.state.doc.toString() : executeWriteSqlInput.value; if (!sql.trim() || !data.ok || data.execute_disabled) { - saveQueryLink.hidden = true; + if (saveQueryLink) { + saveQueryLink.remove(); + saveQueryLink = null; + } return; } - const url = new URL(saveQueryLink.dataset.saveQueryBaseUrl, window.location.href); + if (!saveQueryLink) { + saveQueryLink = document.createElement("a"); + saveQueryLink.className = "save-query"; + saveQueryLink.setAttribute("data-save-query-link", ""); + saveQueryLink.textContent = "Save this query"; + submitRow.appendChild(saveQueryLink); + } + const url = new URL( + submitRow.dataset.saveQueryBaseUrl, + window.location.href + ); url.searchParams.set("sql", sql); saveQueryLink.href = url.pathname + url.search + url.hash; - saveQueryLink.hidden = false; } window.datasetteSqlParameters.setupSqlParameterRefresh({ @@ -170,9 +221,7 @@ window.addEventListener("DOMContentLoaded", () => { allowExpand: true, onData(data) { window.datasetteSqlAnalysis.renderAnalysis(analysisSection, data); - if (submitButton) { - submitButton.disabled = data.execute_disabled; - } + updateSubmitState(data); updateSaveQueryLink(data); }, onError(error) { @@ -180,12 +229,11 @@ window.addEventListener("DOMContentLoaded", () => { analysis_error: error.message, analysis_rows: [], }); - if (submitButton) { - submitButton.disabled = true; - } - if (saveQueryLink) { - saveQueryLink.hidden = true; - } + updateSubmitState({ + execute_disabled: true, + execute_disabled_reason: error.message, + }); + updateSaveQueryLink({ ok: false, execute_disabled: true }); }, }); }); diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index 57c4d78e..7b693978 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -14,6 +14,7 @@ from .query_helpers import ( _coerce_execute_write_payload, _derived_query_parameters, _execute_write_analysis_data, + _execute_write_disabled_reason, _inserted_row_url, _json_or_form_payload, _prepare_execute_write, @@ -80,13 +81,12 @@ class ExecuteWriteView(BaseView): ) save_query_base_url = None save_query_url = None + execute_disabled_reason = _execute_write_disabled_reason( + sql, analysis_error, analysis_rows + ) if allow_save_query: save_query_base_url = self.ds.urls.database(db.name) + "/-/queries/store" - if ( - sql - and analysis_error is None - and not any(row["allowed"] is False for row in analysis_rows) - ): + if not execute_disabled_reason: save_query_url = save_query_base_url + "?" + urlencode({"sql": sql}) response = await self.render( @@ -103,11 +103,8 @@ class ExecuteWriteView(BaseView): "execution_message": execution_message, "execution_links": execution_links, "execution_ok": execution_ok, - "execute_disabled": bool( - (not sql) - or analysis_error - or any(row["allowed"] is False for row in analysis_rows) - ), + "execute_disabled": bool(execute_disabled_reason), + "execute_disabled_reason": execute_disabled_reason, "table_columns": table_columns, "write_template_tables": write_template_tables, "save_query_url": save_query_url, diff --git a/datasette/views/query_helpers.py b/datasette/views/query_helpers.py index 712832e8..f30a30bc 100644 --- a/datasette/views/query_helpers.py +++ b/datasette/views/query_helpers.py @@ -268,6 +268,16 @@ async def _analysis_rows_with_permissions( return rows +def _execute_write_disabled_reason(sql, analysis_error, analysis_rows): + if not (sql and sql.strip()): + return "Enter writable SQL before executing." + if analysis_error: + return analysis_error + if any(row.get("allowed") is False for row in analysis_rows): + return "You do not have permission for every operation listed above." + return None + + def _coerce_execute_write_payload(data, is_json): if not isinstance(data, dict): raise QueryValidationError("JSON must be a dictionary") @@ -358,16 +368,16 @@ async def _execute_write_analysis_data(datasette, db, sql, actor): ) except (QueryValidationError, sqlite3.DatabaseError) as ex: analysis_error = getattr(ex, "message", str(ex)) + execute_disabled_reason = _execute_write_disabled_reason( + sql, analysis_error, analysis_rows + ) return { "ok": analysis_error is None, "parameters": parameter_names, "analysis_error": analysis_error, "analysis_rows": analysis_rows, - "execute_disabled": bool( - (not sql) - or analysis_error - or any(row["allowed"] is False for row in analysis_rows) - ), + "execute_disabled": bool(execute_disabled_reason), + "execute_disabled_reason": execute_disabled_reason, } diff --git a/tests/test_queries.py b/tests/test_queries.py index 2aa5142b..87ecacde 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1374,6 +1374,10 @@ async def test_execute_write_get_prepopulates_without_executing(): assert 'addEventListener("paste"' in response.text assert "setupSqlParameterRefresh" in response.text assert "datasetteSqlAnalysis.renderAnalysis" in response.text + assert "input[data-execute-write-submit]:disabled" in response.text + assert ( + 'data-execute-write-disabled-reason aria-live="polite" hidden' in response.text + ) assert '' in response.text assert '' in response.text assert "" in response.text @@ -1390,7 +1394,9 @@ async def test_execute_write_get_prepopulates_without_executing(): ) assert '' in empty_response.text assert 'executeWriteSqlInput.value = "\\n\\n\\n";' in empty_response.text - assert "hidden>Save this query" in empty_response.text + assert "Enter writable SQL before executing." in empty_response.text + assert 'data-save-query-base-url="/data/-/queries/store"' in empty_response.text + assert 'Save this query" in read_only_response.text + assert ( + '' + ) in read_only_response.text + assert 'data-save-query-base-url="/data/-/queries/store"' in read_only_response.text + assert '' + ) in response.text + assert ( + '' + "You do not have permission for every operation listed above." + ) in response.text + assert 'no' in response.text + assert 'data-save-query-base-url="/data/-/queries/store"' in response.text + assert ' Date: Thu, 28 May 2026 16:20:28 -0700 Subject: [PATCH 1215/1218] //-/query.json and changelog docs --- docs/changelog.rst | 3 ++- docs/json_api.rst | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 3882cc12..3501aa60 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -17,12 +17,13 @@ Stored queries - Users with :ref:`store-query ` and :ref:`execute-sql ` permission can create stored queries from the SQL query page or the new ``GET //-/queries/store`` form. (:issue:`2735`) - The database page now shows a count and preview of stored queries, capped at five, and links to new paginated query browsers at ``/-/queries`` and ``//-/queries``. Those browsers support search. (:issue:`2735`) - Stored queries created by users default to private and untrusted. Private stored queries can only be viewed, updated or deleted by their owner, even if another actor has broad ``view-query``, ``update-query`` or ``delete-query`` permission. Untrusted stored queries execute using the permissions of the actor running them. See :ref:`stored_queries` and :ref:`trusted_stored_queries` for details. (:issue:`2735`) +- Configured queries from ``datasette.yaml`` are trusted by default, so they can execute with ``view-query`` permission alone. They can opt out of that behavior using ``is_trusted: false`` but cannot be made private; private queries are only available for user-created stored queries. (:issue:`2735`) - New ``store-query``, ``update-query`` and ``delete-query`` permissions, plus updated semantics for :ref:`view-query `. Trusted stored queries can still execute with ``view-query`` alone; untrusted read queries also require :ref:`execute-sql ` and untrusted writable queries require :ref:`execute-write-sql ` plus the relevant table-level write permissions. (:issue:`2735`) Write SQL UI ~~~~~~~~~~~~ -- New "Write to this database" interface at ``//-/execute-write`` for running arbitrary writable SQL against mutable databases. The form extracts named parameters, analyzes the SQL, shows the table operations that will be attempted and links to a newly inserted row when a single-row insert succeeds. (:issue:`2742`) +- New "Write to this database" interface at ``//-/execute-write`` for running arbitrary writable SQL against mutable databases. The form extracts named parameters, analyzes the SQL, shows the table operations that will be attempted, includes starter templates for ``INSERT``, ``UPDATE`` and ``DELETE`` statements and links to a newly inserted row when a single-row insert succeeds. This is also available as a :ref:`JSON API `. (:issue:`2742`) - Added the new :ref:`execute-write-sql ` permission for running arbitrary writable SQL. Execution is also gated by table-level permissions such as :ref:`insert-row `, :ref:`update-row ` and :ref:`delete-row `, and writes to attached databases are rejected. (:issue:`2742`) - The write SQL analyzer now uses a deny-by-default model for unsupported operations. Reads from source tables require :ref:`view-table ` permission, schema changes require :ref:`create-table `, :ref:`alter-table ` or :ref:`drop-table ` as appropriate, and row mutation statements require the full ``insert-row``, ``update-row`` and ``delete-row`` permission set. SQL functions are allowed and are not separately permission-gated. (:issue:`2748`) - User-supplied write SQL now rejects ``VACUUM`` and writes to SQLite virtual tables or shadow tables. These restrictions also apply to untrusted stored write queries; trusted configured stored queries continue to skip these filters. (:issue:`2748`) diff --git a/docs/json_api.rst b/docs/json_api.rst index db19afc2..4bd76717 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -50,6 +50,25 @@ The ``"truncated"`` key lets you know if the query was truncated. This can happe For table pages, an additional key ``"next"`` may be present. This indicates that the next page in the pagination set can be retrieved using ``?_next=VALUE``. +.. _json_api_custom_sql: + +Executing custom SQL +-------------------- + +Actors with the :ref:`actions_execute_sql` permission can execute read-only SQL against a database using ``/-/query.json``: + +:: + + GET //-/query.json?sql=select+*+from+dogs + +Values for named SQL parameters can be provided as additional query string parameters: + +:: + + GET //-/query.json?sql=select+*+from+dogs+where+name=:name&name=Cleo + +The response uses the same default representation described above. + .. _json_api_shapes: Different shapes @@ -529,7 +548,7 @@ The request body must include a ``"sql"`` string. Named SQL parameters can be pr } } -The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query API ` instead. +The SQL must be writable. Read-only ``select`` queries should use the regular :ref:`custom SQL query JSON API ` instead. Datasette analyzes the SQL before executing it. The actor must have ``execute-write-sql`` permission for the database, and must also have any permissions required by the operations in the SQL. For example, inserts and updates against a table require ``insert-row``, ``update-row`` and ``delete-row`` permissions for that table. Reads performed as part of the write, such as ``insert into dogs select ... from other_table``, require ``view-table`` permission on the source table. Schema changes require ``create-table``, ``alter-table`` or ``drop-table`` permissions as appropriate. From 9e377e8b90b27ae21d3263d0bfe8d3808e2c6133 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 28 May 2026 20:01:48 -0700 Subject: [PATCH 1216/1218] Only show valid SQL write templates Closes #2753 Demo: https://github.com/simonw/datasette/issues/2753#issuecomment-4570071413 --- datasette/templates/execute_write.html | 130 ++++------------- datasette/views/execute_write.py | 192 ++++++++++++++++++++++++- tests/test_queries.py | 117 ++++++++++++++- 3 files changed, 331 insertions(+), 108 deletions(-) diff --git a/datasette/templates/execute_write.html b/datasette/templates/execute_write.html index ee251111..394261de 100644 --- a/datasette/templates/execute_write.html +++ b/datasette/templates/execute_write.html @@ -89,16 +89,18 @@ form.sql.core input[data-execute-write-submit]:disabled {

- - - + {% for operation in write_template_operations %} + + {% endfor %}

+ {% else %} +

You don't currently have permission to insert, edit or delete from any tables.

{% endif %}

@@ -242,119 +244,43 @@ window.addEventListener("DOMContentLoaded", () => { {% if write_template_tables %} {% endif %} diff --git a/datasette/views/execute_write.py b/datasette/views/execute_write.py index 7b693978..cff20847 100644 --- a/datasette/views/execute_write.py +++ b/datasette/views/execute_write.py @@ -1,3 +1,4 @@ +import re from urllib.parse import urlencode from datasette.resources import DatabaseResource @@ -22,6 +23,187 @@ from .query_helpers import ( _wants_json, ) +WRITE_TEMPLATE_LABELS = { + "insert": "Insert row", + "update": "Update rows", + "delete": "Delete rows", +} +WRITE_TEMPLATE_OPERATIONS = tuple(WRITE_TEMPLATE_LABELS) + + +def _parameter_names(columns): + seen = set() + names = {} + for column in columns: + base = re.sub(r"[^a-z0-9_]+", "_", column.lower()) + base = base.strip("_") or "value" + if base[0].isdigit(): + base = "p_{}".format(base) + name = base + index = 2 + while name in seen: + name = "{}_{}".format(base, index) + index += 1 + seen.add(name) + names[column] = name + return names + + +def _quote_identifier(identifier): + return '"{}"'.format(identifier.replace('"', '""')) + + +def _preferred_where_column(table, columns): + lower_table_id = "{}_id".format(table.lower()) + return ( + next((column for column in columns if column.lower() == "id"), None) + or next( + (column for column in columns if column.lower() == lower_table_id), None + ) + or columns[0] + ) + + +def _auto_incrementing_primary_key(columns): + primary_keys = [column for column in columns if column.is_pk] + if len(primary_keys) != 1: + return None + primary_key = primary_keys[0] + if primary_key.type and primary_key.type.lower() == "integer": + return primary_key.name + return None + + +def _insert_template_sql(table, columns): + column_names = [column.name for column in columns] + auto_pk = _auto_incrementing_primary_key(columns) + insert_columns = [column for column in column_names if column != auto_pk] + if not insert_columns: + return "insert into {}\ndefault values".format(_quote_identifier(table)) + names = _parameter_names(insert_columns) + return "\n".join( + ( + "insert into {} (".format(_quote_identifier(table)), + ",\n".join( + " {}".format(_quote_identifier(column)) for column in insert_columns + ), + ")", + "values (", + ",\n".join(" :{}".format(names[column]) for column in insert_columns), + ")", + ) + ) + + +def _update_template_sql(table, columns): + column_names = [column.name for column in columns] + names = _parameter_names(column_names) + where_column = _preferred_where_column(table, column_names) + set_columns = [column for column in column_names if column != where_column] + if not set_columns: + return "\n".join( + ( + "update {}".format(_quote_identifier(table)), + "set {} = :new_{}".format( + _quote_identifier(where_column), names[where_column] + ), + "where {} = :{}".format( + _quote_identifier(where_column), names[where_column] + ), + ) + ) + return "\n".join( + ( + "update {}".format(_quote_identifier(table)), + "set " + + ",\n".join( + "{}{} = :{}".format( + " " if index else "", + _quote_identifier(column), + names[column], + ) + for index, column in enumerate(set_columns) + ), + "where {} = :{}".format( + _quote_identifier(where_column), names[where_column] + ), + ) + ) + + +def _delete_template_sql(table, columns): + column_names = [column.name for column in columns] + names = _parameter_names(column_names) + where_column = _preferred_where_column(table, column_names) + return "\n".join( + ( + "delete from {}".format(_quote_identifier(table)), + "where {} = :{}".format( + _quote_identifier(where_column), names[where_column] + ), + ) + ) + + +def _template_sqls_for_table(table, columns): + return { + "insert": _insert_template_sql(table, columns), + "update": _update_template_sql(table, columns), + "delete": _delete_template_sql(table, columns), + } + + +async def _template_sql_allowed(datasette, db, sql, actor): + params = {parameter: "" for parameter in _derived_query_parameters(sql)} + try: + analysis = await db.analyze_sql(sql, params) + except sqlite3.DatabaseError: + return False + if not _analysis_is_write(analysis): + return False + analysis_rows = await _analysis_rows_with_permissions(datasette, analysis, actor) + return _execute_write_disabled_reason(sql, None, analysis_rows) is None + + +async def _write_template_tables( + datasette, db, table_columns, hidden_table_names, actor +): + write_template_tables = {} + for table in table_columns: + if table in hidden_table_names or not table_columns[table]: + continue + column_details = [ + column + for column in await db.table_column_details(table) + if not column.hidden + ] + if not column_details: + continue + templates = {} + for operation, sql in _template_sqls_for_table(table, column_details).items(): + if await _template_sql_allowed(datasette, db, sql, actor): + templates[operation] = sql + if templates: + write_template_tables[table] = { + "templates": templates, + } + return write_template_tables + + +def _write_template_operations(write_template_tables): + operations = [] + for operation in WRITE_TEMPLATE_OPERATIONS: + if any( + operation in table["templates"] for table in write_template_tables.values() + ): + operations.append( + { + "name": operation, + "label": WRITE_TEMPLATE_LABELS[operation], + } + ) + return operations + class ExecuteWriteView(BaseView): name = "execute-write" @@ -47,11 +229,10 @@ class ExecuteWriteView(BaseView): analysis_rows = [] table_columns = await _table_columns(self.ds, db.name) hidden_table_names = set(await db.hidden_table_names()) - write_template_tables = { - table: columns - for table, columns in table_columns.items() - if columns and table not in hidden_table_names - } + write_template_tables = await _write_template_tables( + self.ds, db, table_columns, hidden_table_names, request.actor + ) + write_template_operations = _write_template_operations(write_template_tables) if sql and analysis_error is None: try: parameter_names = _derived_query_parameters(sql) @@ -107,6 +288,7 @@ class ExecuteWriteView(BaseView): "execute_disabled_reason": execute_disabled_reason, "table_columns": table_columns, "write_template_tables": write_template_tables, + "write_template_operations": write_template_operations, "save_query_url": save_query_url, "save_query_base_url": save_query_base_url, }, diff --git a/tests/test_queries.py b/tests/test_queries.py index 87ecacde..89167a1d 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1,4 +1,6 @@ import json +import re +from html import unescape import pytest @@ -8,6 +10,19 @@ from datasette.stored_queries import StoredQuery, StoredQueryPage from datasette.utils.asgi import Forbidden +def _template_option_attributes(html, table): + match = re.search(r'' in response.text + assert '
Required permissioninsert