From 9cca3810338164c10367b73dbe32d6a090b35d32 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Nov 2022 16:34:33 -0800 Subject: [PATCH 01/60] --generate-dir option to publish heroku, refs #1905 --- datasette/publish/heroku.py | 18 ++++++++++++++ docs/cli-reference.rst | 2 ++ docs/publish.rst | 4 ++++ tests/test_publish_heroku.py | 46 ++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) diff --git a/datasette/publish/heroku.py b/datasette/publish/heroku.py index 171252ce..2b8977f1 100644 --- a/datasette/publish/heroku.py +++ b/datasette/publish/heroku.py @@ -3,7 +3,9 @@ from datasette import hookimpl import click import json import os +import pathlib import shlex +import shutil from subprocess import call, check_output import tempfile @@ -28,6 +30,11 @@ def publish_subcommand(publish): "--tar", help="--tar option to pass to Heroku, e.g. --tar=/usr/local/bin/gtar", ) + @click.option( + "--generate-dir", + type=click.Path(dir_okay=True, file_okay=False), + help="Output generated application files and stop without deploying", + ) def heroku( files, metadata, @@ -49,6 +56,7 @@ def publish_subcommand(publish): about_url, name, tar, + generate_dir, ): "Publish databases to Datasette running on Heroku" fail_if_publish_binary_not_installed( @@ -105,6 +113,16 @@ def publish_subcommand(publish): secret, extra_metadata, ): + if generate_dir: + # Recursively copy files from current working directory to it + if pathlib.Path(generate_dir).exists(): + raise click.ClickException("Directory already exists") + shutil.copytree(".", generate_dir) + click.echo( + f"Generated files written to {generate_dir}, stopping without deploying", + err=True, + ) + return app_name = None if name: # Check to see if this app already exists diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 4a8465cb..a6885fc8 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -501,6 +501,8 @@ See :ref:`publish_heroku`. -n, --name TEXT Application name to use when deploying --tar TEXT --tar option to pass to Heroku, e.g. --tar=/usr/local/bin/gtar + --generate-dir DIRECTORY Output generated application files and stop + without deploying --help Show this message and exit. diff --git a/docs/publish.rst b/docs/publish.rst index 4ba94792..7ae0399e 100644 --- a/docs/publish.rst +++ b/docs/publish.rst @@ -73,6 +73,10 @@ This will output some details about the new deployment, including a URL like thi You can specify a custom app name by passing ``-n my-app-name`` to the publish command. This will also allow you to overwrite an existing app. +Rather than deploying directly you can use the ``--generate-dir`` option to output the files that would be deployed to a directory:: + + datasette publish heroku mydatabase.db --generate-dir=/tmp/deploy-this-to-heroku + See :ref:`cli_help_publish_heroku___help` for the full list of options for this command. .. _publish_vercel: diff --git a/tests/test_publish_heroku.py b/tests/test_publish_heroku.py index b5a8af73..faab340e 100644 --- a/tests/test_publish_heroku.py +++ b/tests/test_publish_heroku.py @@ -2,6 +2,7 @@ from click.testing import CliRunner from datasette import cli from unittest import mock import os +import pathlib import pytest @@ -128,3 +129,48 @@ def test_publish_heroku_plugin_secrets( mock.call(["heroku", "builds:create", "-a", "f", "--include-vcs-ignore"]), ] ) + + +@pytest.mark.serial +@mock.patch("shutil.which") +def test_publish_heroku_generate_dir(mock_which, tmp_path_factory): + mock_which.return_value = True + runner = CliRunner() + os.chdir(tmp_path_factory.mktemp("runner")) + with open("test.db", "w") as fp: + fp.write("data") + output = str(tmp_path_factory.mktemp("generate_dir") / "output") + result = runner.invoke( + cli.cli, + [ + "publish", + "heroku", + "test.db", + "--generate-dir", + output, + ], + ) + assert result.exit_code == 0 + path = pathlib.Path(output) + assert path.exists() + file_names = {str(r.relative_to(path)) for r in path.glob("*")} + assert file_names == { + "requirements.txt", + "bin", + "runtime.txt", + "Procfile", + "test.db", + } + for name, expected in ( + ("requirements.txt", "datasette"), + ("runtime.txt", "python-3.8.10"), + ( + "Procfile", + ( + "web: datasette serve --host 0.0.0.0 -i test.db " + "--cors --port $PORT --inspect-file inspect-data.json" + ), + ), + ): + with open(path / name) as fp: + assert fp.read().strip() == expected From a93ccc63c709bb0984116cd1224ee2e846cb088b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Nov 2022 16:44:46 -0800 Subject: [PATCH 02/60] Upgrade to Python 3.11 on Heroku, refs #1905 --- datasette/publish/heroku.py | 2 +- tests/test_publish_heroku.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/datasette/publish/heroku.py b/datasette/publish/heroku.py index 2b8977f1..f576a346 100644 --- a/datasette/publish/heroku.py +++ b/datasette/publish/heroku.py @@ -194,7 +194,7 @@ def temporary_heroku_directory( fp.write(json.dumps(metadata_content, indent=2)) with open("runtime.txt", "w") as fp: - fp.write("python-3.8.10") + fp.write("python-3.11.0") if branch: install = [ diff --git a/tests/test_publish_heroku.py b/tests/test_publish_heroku.py index faab340e..cab83654 100644 --- a/tests/test_publish_heroku.py +++ b/tests/test_publish_heroku.py @@ -133,8 +133,15 @@ def test_publish_heroku_plugin_secrets( @pytest.mark.serial @mock.patch("shutil.which") -def test_publish_heroku_generate_dir(mock_which, tmp_path_factory): +@mock.patch("datasette.publish.heroku.check_output") +@mock.patch("datasette.publish.heroku.call") +def test_publish_heroku_generate_dir( + mock_call, mock_check_output, mock_which, tmp_path_factory +): mock_which.return_value = True + mock_check_output.side_effect = lambda s: { + "['heroku', 'plugins']": b"heroku-builds", + }[repr(s)] runner = CliRunner() os.chdir(tmp_path_factory.mktemp("runner")) with open("test.db", "w") as fp: @@ -163,7 +170,7 @@ def test_publish_heroku_generate_dir(mock_which, tmp_path_factory): } for name, expected in ( ("requirements.txt", "datasette"), - ("runtime.txt", "python-3.8.10"), + ("runtime.txt", "python-3.11.0"), ( "Procfile", ( From c4d002fef5d3c5dfaada6259d4f6067eef3fcc74 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Nov 2022 14:50:19 -0800 Subject: [PATCH 03/60] Pin httpx in Pyodide test, refs #1904 Should help get tests to pass for #1896 too --- test-in-pyodide-with-shot-scraper.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/test-in-pyodide-with-shot-scraper.sh b/test-in-pyodide-with-shot-scraper.sh index e5df7398..0c140818 100755 --- a/test-in-pyodide-with-shot-scraper.sh +++ b/test-in-pyodide-with-shot-scraper.sh @@ -25,6 +25,7 @@ async () => { let output = await pyodide.runPythonAsync(\` import micropip await micropip.install('h11==0.12.0') + await micropip.install('httpx==0.23') await micropip.install('http://localhost:8529/$wheel') import ssl import setuptools From d67f812b7327c7075732688f3df728807503dc58 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Nov 2022 16:53:05 -0800 Subject: [PATCH 04/60] Release 0.63.2 Refs #1904, #1905 --- datasette/version.py | 2 +- docs/changelog.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 3a4f06dc..6016687a 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63.1" +__version__ = "0.63.2" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 0e0393ef..865bb58e 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Changelog ========= +.. _v0_63_2: + +0.63.2 (2022-11-18) +------------------- + +- Fixed a bug in ``datasette publish heroku`` where deployments failed due to an older version of Python being requested. (:issue:`1905`) +- New ``datasette publish heroku --generate-dir `` option for generating a Heroku deployment directory without deploying it. + .. _v0_63_1: 0.63.1 (2022-11-10) From 4ba8d57bb1a8537ef2f40fa03414f8b14a16c29a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Dec 2022 16:55:17 -0800 Subject: [PATCH 05/60] Try click.echo() instead This ensures the URL is output correctly when running under Docker. Closes #1958 --- datasette/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/cli.py b/datasette/cli.py index 6eb42712..2b61292b 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -607,7 +607,7 @@ def serve( url = "http://{}:{}{}?token={}".format( host, port, ds.urls.path("-/auth-token"), ds._root_token ) - print(url) + click.echo(url) if open_browser: if url is None: # Figure out most convenient URL - to table, database or homepage From 96b3a86d7f3c74c7bb508e0ea6cb538ede5d046d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Dec 2022 09:34:07 -0800 Subject: [PATCH 06/60] Replace AsgiLifespan with AsgiRunOnFirstRequest, refs #1955 --- datasette/app.py | 20 +++--------------- datasette/utils/asgi.py | 44 ++++++++++++++-------------------------- docs/plugin_hooks.rst | 5 +++-- docs/testing_plugins.rst | 2 +- 4 files changed, 22 insertions(+), 49 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 246269f3..20f08b7b 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -63,17 +63,14 @@ from .utils import ( to_css_class, ) from .utils.asgi import ( - AsgiLifespan, - Base400, Forbidden, NotFound, Request, Response, + AsgiRunOnFirstRequest, asgi_static, asgi_send, asgi_send_file, - asgi_send_html, - asgi_send_json, asgi_send_redirect, ) from .utils.internal_db import init_internal_db, populate_schema_tables @@ -1260,7 +1257,7 @@ class Datasette: async def setup_db(): # First time server starts up, calculate table counts for immutable databases - for dbname, database in self.databases.items(): + for database in self.databases.values(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) @@ -1274,10 +1271,7 @@ class Datasette: ) if self.setting("trace_debug"): asgi = AsgiTracer(asgi) - asgi = AsgiLifespan( - asgi, - on_startup=setup_db, - ) + asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) return asgi @@ -1566,42 +1560,34 @@ class DatasetteClient: return path async def get(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.get(self._fix(path), **kwargs) async def options(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.options(self._fix(path), **kwargs) async def head(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.head(self._fix(path), **kwargs) async def post(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.post(self._fix(path), **kwargs) async def put(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.put(self._fix(path), **kwargs) async def patch(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.patch(self._fix(path), **kwargs) async def delete(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.delete(self._fix(path), **kwargs) async def request(self, method, path, **kwargs): - await self.ds.invoke_startup() avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) async with httpx.AsyncClient(app=self.app) as client: return await client.request( diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 8a2fa060..03c1c9cd 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -135,35 +135,6 @@ class Request: return cls(scope, None) -class AsgiLifespan: - def __init__(self, app, on_startup=None, on_shutdown=None): - self.app = app - on_startup = on_startup or [] - on_shutdown = on_shutdown or [] - if not isinstance(on_startup or [], list): - on_startup = [on_startup] - if not isinstance(on_shutdown or [], list): - on_shutdown = [on_shutdown] - self.on_startup = on_startup - self.on_shutdown = on_shutdown - - async def __call__(self, scope, receive, send): - if scope["type"] == "lifespan": - while True: - message = await receive() - if message["type"] == "lifespan.startup": - for fn in self.on_startup: - await fn() - await send({"type": "lifespan.startup.complete"}) - elif message["type"] == "lifespan.shutdown": - for fn in self.on_shutdown: - await fn() - await send({"type": "lifespan.shutdown.complete"}) - return - else: - await self.app(scope, receive, send) - - class AsgiStream: def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): self.stream_fn = stream_fn @@ -428,3 +399,18 @@ class AsgiFileDownload: content_type=self.content_type, headers=self.headers, ) + + +class AsgiRunOnFirstRequest: + def __init__(self, asgi, on_startup): + assert isinstance(on_startup, list) + self.asgi = asgi + self.on_startup = on_startup + self._started = False + + async def __call__(self, scope, receive, send): + if not self._started: + self._started = True + for hook in self.on_startup: + await hook() + return await self.asgi(scope, receive, send) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index b61f953a..101911cc 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -855,13 +855,14 @@ Potential use-cases: .. note:: - If you are writing :ref:`unit tests ` for a plugin that uses this hook you will need to explicitly call ``await ds.invoke_startup()`` in your tests. An example: + If you are writing :ref:`unit tests ` for a plugin that uses this hook and doesn't exercise Datasette by sending + any simulated requests through it you will need to explicitly call ``await ds.invoke_startup()`` in your tests. An example: .. code-block:: python @pytest.mark.asyncio async def test_my_plugin(): - ds = Datasette([], metadata={}) + ds = Datasette() await ds.invoke_startup() # Rest of test goes here diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index 41f50e56..6d2097ad 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -80,7 +80,7 @@ Creating a ``Datasette()`` instance like this as useful shortcut in tests, but t This method registers any :ref:`plugin_hook_startup` or :ref:`plugin_hook_prepare_jinja2_environment` plugins that might themselves need to make async calls. -If you are using ``await datasette.client.get()`` and similar methods then you don't need to worry about this - those method calls ensure that ``.invoke_startup()`` has been called for you. +If you are using ``await datasette.client.get()`` and similar methods then you don't need to worry about this - Datasette automatically calls ``invoke_startup()`` the first time it handles a request. .. _testing_plugins_pdb: From 5649e547eff4278130d5d66c5684fc5d4fd77b69 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 17:22:00 -0800 Subject: [PATCH 07/60] Put AsgiLifestyle back so server starts up again, refs #1955 --- datasette/app.py | 2 ++ datasette/utils/asgi.py | 29 +++++++++++++++++++++++++++++ tests/conftest.py | 33 ++++++++++++++------------------- 3 files changed, 45 insertions(+), 19 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 20f08b7b..ea9bb6d2 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -63,6 +63,7 @@ from .utils import ( to_css_class, ) from .utils.asgi import ( + AsgiLifespan, Forbidden, NotFound, Request, @@ -1271,6 +1272,7 @@ class Datasette: ) if self.setting("trace_debug"): asgi = AsgiTracer(asgi) + asgi = AsgiLifespan(asgi) asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 03c1c9cd..16f90077 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -135,6 +135,35 @@ class Request: return cls(scope, None) +class AsgiLifespan: + def __init__(self, app, on_startup=None, on_shutdown=None): + self.app = app + on_startup = on_startup or [] + on_shutdown = on_shutdown or [] + if not isinstance(on_startup or [], list): + on_startup = [on_startup] + if not isinstance(on_shutdown or [], list): + on_shutdown = [on_shutdown] + self.on_startup = on_startup + self.on_shutdown = on_shutdown + + async def __call__(self, scope, receive, send): + if scope["type"] == "lifespan": + while True: + message = await receive() + if message["type"] == "lifespan.startup": + for fn in self.on_startup: + await fn() + await send({"type": "lifespan.startup.complete"}) + elif message["type"] == "lifespan.shutdown": + for fn in self.on_shutdown: + await fn() + await send({"type": "lifespan.shutdown.complete"}) + return + else: + await self.app(scope, receive, send) + + class AsgiStream: def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): self.stream_fn = stream_fn diff --git a/tests/conftest.py b/tests/conftest.py index f4638a14..dd06fa07 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,17 @@ UNDOCUMENTED_PERMISSIONS = { } +def wait_until_responds(url, timeout=5.0, client=httpx, **kwargs): + start = time.time() + while time.time() - start < timeout: + try: + client.get(url, **kwargs) + return + except httpx.ConnectError: + time.sleep(0.1) + raise AssertionError("Timed out waiting for {} to respond".format(url)) + + def pytest_report_header(config): return "SQLite: {}".format( sqlite3.connect(":memory:").execute("select sqlite_version()").fetchone()[0] @@ -111,13 +122,7 @@ def ds_localhost_http_server(): # Avoid FileNotFoundError: [Errno 2] No such file or directory: cwd=tempfile.gettempdir(), ) - # Loop until port 8041 serves traffic - while True: - try: - httpx.get("http://localhost:8041/") - break - except httpx.ConnectError: - time.sleep(0.1) + wait_until_responds("http://localhost:8041/") # Check it started successfully assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") yield ds_proc @@ -152,12 +157,7 @@ def ds_localhost_https_server(tmp_path_factory): stderr=subprocess.STDOUT, cwd=tempfile.gettempdir(), ) - while True: - try: - httpx.get("https://localhost:8042/", verify=client_cert) - break - except httpx.ConnectError: - time.sleep(0.1) + wait_until_responds("http://localhost:8042/", verify=client_cert) # Check it started successfully assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") yield ds_proc, client_cert @@ -181,12 +181,7 @@ def ds_unix_domain_socket_server(tmp_path_factory): # Poll until available transport = httpx.HTTPTransport(uds=uds) client = httpx.Client(transport=transport) - while True: - try: - client.get("http://localhost/_memory.json") - break - except httpx.ConnectError: - time.sleep(0.1) + wait_until_responds("http://localhost/_memory.json", client=client) # Check it started successfully assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") yield ds_proc, uds From 0bd3eaa2ddb5f8dbf57190cfb639e3f4d7a82e99 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 18:33:07 -0800 Subject: [PATCH 08/60] Move HTTPS test to a bash script See https://github.com/simonw/datasette/issues/1955#issuecomment-1356627931 --- tests/conftest.py | 35 ---------------------------- tests/test_cli_serve_server.py | 11 --------- tests/test_datasette_https_server.sh | 33 ++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 46 deletions(-) create mode 100755 tests/test_datasette_https_server.sh diff --git a/tests/conftest.py b/tests/conftest.py index dd06fa07..ee13df11 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -130,41 +130,6 @@ def ds_localhost_http_server(): ds_proc.terminate() -@pytest.fixture(scope="session") -def ds_localhost_https_server(tmp_path_factory): - cert_directory = tmp_path_factory.mktemp("certs") - ca = trustme.CA() - server_cert = ca.issue_cert("localhost") - keyfile = str(cert_directory / "server.key") - certfile = str(cert_directory / "server.pem") - client_cert = str(cert_directory / "client.pem") - server_cert.private_key_pem.write_to_path(path=keyfile) - for blob in server_cert.cert_chain_pems: - blob.write_to_path(path=certfile, append=True) - ca.cert_pem.write_to_path(path=client_cert) - ds_proc = subprocess.Popen( - [ - "datasette", - "--memory", - "-p", - "8042", - "--ssl-keyfile", - keyfile, - "--ssl-certfile", - certfile, - ], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - cwd=tempfile.gettempdir(), - ) - wait_until_responds("http://localhost:8042/", verify=client_cert) - # Check it started successfully - assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") - yield ds_proc, client_cert - # Shut it down at the end of the pytest session - ds_proc.terminate() - - @pytest.fixture(scope="session") def ds_unix_domain_socket_server(tmp_path_factory): # This used to use tmp_path_factory.mktemp("uds") but that turned out to diff --git a/tests/test_cli_serve_server.py b/tests/test_cli_serve_server.py index 1c31e2a3..47f23c08 100644 --- a/tests/test_cli_serve_server.py +++ b/tests/test_cli_serve_server.py @@ -13,17 +13,6 @@ def test_serve_localhost_http(ds_localhost_http_server): }.items() <= response.json().items() -@pytest.mark.serial -def test_serve_localhost_https(ds_localhost_https_server): - _, client_cert = ds_localhost_https_server - response = httpx.get("https://localhost:8042/_memory.json", verify=client_cert) - assert { - "database": "_memory", - "path": "/_memory", - "tables": [], - }.items() <= response.json().items() - - @pytest.mark.serial @pytest.mark.skipif( not hasattr(socket, "AF_UNIX"), reason="Requires socket.AF_UNIX support" diff --git a/tests/test_datasette_https_server.sh b/tests/test_datasette_https_server.sh new file mode 100755 index 00000000..a701ad4c --- /dev/null +++ b/tests/test_datasette_https_server.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Generate certificates +python -m trustme +# This creates server.pem, server.key, client.pem + +# Start the server in the background +datasette --memory \ + --ssl-keyfile=server.key \ + --ssl-certfile=server.pem \ + -p 8152 & + +# Store the background process ID in a variable +server_pid=$! + +# Wait for the server to start +sleep 2 + +# Make a test request using curl +curl -f --cacert client.pem 'https://localhost:8152/_memory.json' + +# Save curl's exit code (-f option causes it to return one on HTTP errors) +curl_exit_code=$? + +# Shut down the server +kill $server_pid +sleep 1 + +# Clean up the certificates +rm server.pem server.key client.pem + +echo $curl_exit_code +exit $curl_exit_code From e6d94f9ffa09a7c7bbb6b4a6273a8b9a7c0cb204 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 18:38:26 -0800 Subject: [PATCH 09/60] Run new HTTPS test in CI, refs #1955 --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 886f649a..39aa8b13 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,6 +35,8 @@ jobs: run: | pytest -n auto -m "not serial" pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh - name: Check if cog needs to be run run: | cog --check docs/*.rst From d93f975b3dbdf37ce91c176e24b920b041cf7571 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 18:52:52 -0800 Subject: [PATCH 10/60] On publish run tests same way as for test --- .github/workflows/publish.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index fa608055..b7d99c1c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -31,7 +31,10 @@ jobs: pip install -e '.[test]' - name: Run tests run: | - pytest + pytest -n auto -m "not serial" + pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh deploy: runs-on: ubuntu-latest From a9ffcbd42ec6c6d5a55f7a541e282c0f1002c559 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 19:06:03 -0800 Subject: [PATCH 11/60] Release 0.63.3 Refs #1955, #1958, #1963 --- datasette/version.py | 2 +- docs/changelog.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 6016687a..8aaf793d 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63.2" +__version__ = "0.63.3" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 865bb58e..33945ccb 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Changelog ========= +.. _v0_63_3: + +0.63.3 (2022-12-17) +------------------- + +- Fixed a bug where ``datasette --root``, when running in Docker, would only output the URL to sign in root when the server shut down, not when it started up. (:issue:`1958`) +- You no longer need to ensure ``await datasette.invoke_startup()`` has been called in order for Datasette to start correctly serving requests - this is now handled automatically the first time the server receives a request. This fixes a bug experienced when Datasette is served directly by an ASGI application server such as Uvicorn or Gunicorn. It also fixes a bug with the `datasette-gunicorn `__ plugin. (:issue:`1955`) + .. _v0_63_2: 0.63.2 (2022-11-18) From e05998bc85e6347095cf71434c6de4e9d8414933 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 19:08:01 -0800 Subject: [PATCH 12/60] Added missing word, refs #1963 --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 33945ccb..270765c8 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,7 +9,7 @@ Changelog 0.63.3 (2022-12-17) ------------------- -- Fixed a bug where ``datasette --root``, when running in Docker, would only output the URL to sign in root when the server shut down, not when it started up. (:issue:`1958`) +- Fixed a bug where ``datasette --root``, when running in Docker, would only output the URL to sign in as root when the server shut down, not when it started up. (:issue:`1958`) - You no longer need to ensure ``await datasette.invoke_startup()`` has been called in order for Datasette to start correctly serving requests - this is now handled automatically the first time the server receives a request. This fixes a bug experienced when Datasette is served directly by an ASGI application server such as Uvicorn or Gunicorn. It also fixes a bug with the `datasette-gunicorn `__ plugin. (:issue:`1955`) .. _v0_63_2: From 9ec58da6ec9ee8a19a1227b256e72e32def254a8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 19:24:34 -0800 Subject: [PATCH 13/60] Deploy docs on publish using Python 3.9 A workaround for gcloud setup, see: https://til.simonwillison.net/googlecloud/gcloud-error-workaround Refs #1963 --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index b7d99c1c..50961b82 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -72,7 +72,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.10' + python-version: '3.9' - uses: actions/cache@v2 name: Configure pip caching with: From 74022940184da13ac8b0881b7cb7f1cc43650424 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 22:28:07 -0800 Subject: [PATCH 14/60] .select-wrapper:focus-within for accessibility, closes #1771 --- datasette/static/app.css | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datasette/static/app.css b/datasette/static/app.css index 712b9925..71437bd4 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -573,6 +573,9 @@ form button[type=button] { display: inline-block; margin-right: 0.3em; } +.select-wrapper:focus-within { + border: 1px solid black; +} .select-wrapper.filter-op { width: 80px; } From 1bff3f1a70964977bcccc6e9ca80db122c28afcf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 4 Jan 2023 10:25:04 -0800 Subject: [PATCH 15/60] Fixed table_action example in docs --- docs/plugin_hooks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 101911cc..399226a0 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1346,7 +1346,7 @@ This example adds a new table action if the signed in user is ``"root"``: @hookimpl - def table_actions(datasette, actor): + def table_actions(datasette, actor, database, table): if actor and actor.get("id") == "root": return [ { From b8cf864fa68d25961024586b2d263f4dc7ebee75 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 4 Jan 2023 16:47:47 -0800 Subject: [PATCH 16/60] Fixed broken example links in _where= docs --- docs/json_api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/json_api.rst b/docs/json_api.rst index d3fdb1e4..fd2d3ec6 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -357,8 +357,8 @@ Special table arguments Some examples: - * `facetable?_where=neighborhood like "%c%"&_where=city_id=3 `__ - * `facetable?_where=city_id in (select id from facet_cities where name != "Detroit") `__ + * `facetable?_where=_neighborhood like "%c%"&_where=_city_id=3 `__ + * `facetable?_where=_city_id in (select id from facet_cities where name != "Detroit") `__ ``?_through={json}`` This can be used to filter rows via a join against another table. From 1ec9c9995c611c9ba34539f1b93c0e684fd35b67 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 5 Jan 2023 09:21:07 -0800 Subject: [PATCH 17/60] Backported default_allow_sql for 0.63.x, closes #1409 --- datasette/app.py | 5 +++++ datasette/default_permissions.py | 6 +++++- docs/authentication.rst | 10 ++++++++-- docs/cli-reference.rst | 2 ++ docs/settings.rst | 15 +++++++++++++++ tests/test_api.py | 1 + tests/test_cli.py | 22 ++++++++++++++++++++++ 7 files changed, 58 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index ea9bb6d2..c052be58 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -116,6 +116,11 @@ SETTINGS = ( True, "Allow users to specify columns to facet using ?_facet= parameter", ), + Setting( + "default_allow_sql", + True, + "Allow anyone to run arbitrary SQL queries", + ), Setting( "allow_download", True, diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index b58d8d1b..a0681e83 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -36,12 +36,16 @@ def permission_allowed(datasette, actor, action, resource): return None return actor_matches_allow(actor, allow) elif action == "execute-sql": + # Only use default_allow_sql setting if it is set to False: + default_allow_sql = ( + None if datasette.setting("default_allow_sql") else False + ) # Use allow_sql block from database block, or from top-level database_allow_sql = datasette.metadata("allow_sql", database=resource) if database_allow_sql is None: database_allow_sql = datasette.metadata("allow_sql") if database_allow_sql is None: - return None + return default_allow_sql return actor_matches_allow(actor, database_allow_sql) return inner diff --git a/docs/authentication.rst b/docs/authentication.rst index 685dab15..87852555 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -307,7 +307,13 @@ To limit access to the ``add_name`` canned query in your ``dogs.db`` database to Controlling the ability to execute arbitrary SQL ------------------------------------------------ -The ``"allow_sql"`` block can be used to control who is allowed to execute arbitrary SQL queries, both using the form on the database page e.g. https://latest.datasette.io/fixtures or by appending a ``?_where=`` parameter to the table page as seen on https://latest.datasette.io/fixtures/facetable?_where=city_id=1. +Datasette defaults to allowing any site visitor to execute their own custom SQL queries, for example using the form on `the database page `__ or by appending a ``?_where=`` parameter to the table page `like this `__. + +Access to this ability is controlled by the :ref:`permissions_execute_sql` permission. + +The easiest way to disable arbitrary SQL queries is using the :ref:`default_allow_sql setting ` when you first start Datasette running. + +You can alternatively use an ``"allow_sql"`` block to control who is allowed to execute arbitrary SQL queries. To enable just the :ref:`root user` to execute SQL for all databases in your instance, use the following: @@ -515,7 +521,7 @@ Actor is allowed to run arbitrary SQL queries against a specific database, e.g. ``resource`` - string The name of the database -Default *allow*. +Default *allow*. See also :ref:`the default_allow_sql setting `. .. _permissions_permissions_debug: diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index a6885fc8..ed20ea8a 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -224,6 +224,8 @@ These can be passed to ``datasette serve`` using ``datasette serve --setting nam (default=50) allow_facet Allow users to specify columns to facet using ?_facet= parameter (default=True) + default_allow_sql Allow anyone to run arbitrary SQL queries + (default=True) allow_download Allow users to download the original SQLite database files (default=True) suggest_facets Calculate and display suggested facets diff --git a/docs/settings.rst b/docs/settings.rst index a6d50543..8a83cc2f 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -59,6 +59,21 @@ Settings The following options can be set using ``--setting name value``, or by storing them in the ``settings.json`` file for use with :ref:`config_dir`. +.. _setting_default_allow_sql: + +default_allow_sql +~~~~~~~~~~~~~~~~~ + +Should users be able to execute arbitrary SQL queries by default? + +Setting this to ``off`` causes permission checks for :ref:`permissions_execute_sql` to fail by default. + +:: + + datasette mydatabase.db --setting default_allow_sql off + +There are two ways to achieve this: the other is to add ``"allow_sql": false`` to your ``metadata.json`` file, as described in :ref:`authentication_permissions_execute_sql`. This setting offers a more convenient way to do this. + .. _setting_default_page_size: default_page_size diff --git a/tests/test_api.py b/tests/test_api.py index 4027a7a5..db624823 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -805,6 +805,7 @@ def test_settings_json(app_client): assert { "default_page_size": 50, "default_facet_size": 30, + "default_allow_sql": True, "facet_suggest_time_limit_ms": 50, "facet_time_limit_ms": 200, "max_returned_rows": 100, diff --git a/tests/test_cli.py b/tests/test_cli.py index f0d28037..9ca50cbe 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -215,6 +215,28 @@ def test_setting_type_validation(): assert '"default_page_size" should be an integer' in result.stderr +@pytest.mark.parametrize("default_allow_sql", (True, False)) +def test_setting_default_allow_sql(default_allow_sql): + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--setting", + "default_allow_sql", + "on" if default_allow_sql else "off", + "--get", + "/_memory.json?sql=select+21&_shape=objects", + ], + ) + if default_allow_sql: + assert result.exit_code == 0, result.output + assert json.loads(result.output)["rows"][0] == {"21": 21} + else: + assert result.exit_code == 1, result.output + # This isn't JSON at the moment, maybe it should be though + assert "Forbidden" in result.output + + def test_config_deprecated(): # The --config option should show a deprecation message runner = CliRunner(mix_stderr=False) From 3b88ac671e2fd64663c0588ea92f374c57836cc4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 7 Jan 2023 15:48:08 -0800 Subject: [PATCH 18/60] What to do if extensions will not load, refs #1979 --- docs/installation.rst | 57 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/docs/installation.rst b/docs/installation.rst index a4757736..121f2ef5 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -230,3 +230,60 @@ Some plugins such as `datasette-ripgrep Date: Sat, 7 Jan 2023 15:56:03 -0800 Subject: [PATCH 19/60] Better error for --load-extensions, refs #1979 --- datasette/cli.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/datasette/cli.py b/datasette/cli.py index 2b61292b..89ee12b6 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -4,6 +4,7 @@ import click from click import formatting from click.types import CompositeParamType from click_default_group import DefaultGroup +import functools import json import os import pathlib @@ -11,6 +12,7 @@ import shutil from subprocess import call import sys from runpy import run_module +import textwrap import webbrowser from .app import ( OBSOLETE_SETTINGS, @@ -126,7 +128,7 @@ class Setting(CompositeParamType): def sqlite_extensions(fn): - return click.option( + fn = click.option( "sqlite_extensions", "--load-extension", type=LoadExtension(), @@ -134,6 +136,25 @@ def sqlite_extensions(fn): multiple=True, help="Path to a SQLite extension to load, and optional entrypoint", )(fn) + # Wrap it in a custom error handler + @functools.wraps(fn) + def wrapped(*args, **kwargs): + try: + return fn(*args, **kwargs) + except AttributeError as e: + if "enable_load_extension" in str(e): + raise click.ClickException( + textwrap.dedent( + """ + Your Python installation does not have the ability to load SQLite extensions. + + More information: https://datasette.io/help/extensions + """ + ).strip() + ) + raise + + return wrapped @click.group(cls=DefaultGroup, default="serve", default_if_no_args=True) From 02987e342d75fc874d0c57ecfa8c4eabf0f5b313 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Jan 2023 08:25:07 -0800 Subject: [PATCH 20/60] Explicitly explain allow_sql: false --- docs/authentication.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/authentication.rst b/docs/authentication.rst index 87852555..37703307 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -315,6 +315,14 @@ The easiest way to disable arbitrary SQL queries is using the :ref:`default_allo You can alternatively use an ``"allow_sql"`` block to control who is allowed to execute arbitrary SQL queries. +To prevent any user from executing arbitrary SQL queries, use this: + +.. code-block:: json + + { + "allow_sql": false + } + To enable just the :ref:`root user` to execute SQL for all databases in your instance, use the following: .. code-block:: json From 0084daa50a8d97e025989d73d23b9a60c2dc69d9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Jan 2023 08:37:07 -0800 Subject: [PATCH 21/60] Release 0.64, with a warning against arbitrary SQL with SpatiaLite Refs #1409, #1771, #1979 Refs https://github.com/simonw/datasette.io/issues/132 --- datasette/version.py | 2 +- docs/changelog.rst | 11 +++++++++++ docs/spatialite.rst | 26 +++++++++++++++++++++++--- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/datasette/version.py b/datasette/version.py index 8aaf793d..eedecc61 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63.3" +__version__ = "0.64" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 270765c8..39324989 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,17 @@ Changelog ========= +.. _v0_64: + +0.64 (2023-01-09) +----------------- + +- Datasette now **strongly recommends against allowing arbitrary SQL queries if you are using SpatiaLite**. SpatiaLite includes SQL functions that could cause the Datasette server to crash. See :ref:`spatialite` for more details. +- New :ref:`setting_default_allow_sql` setting, providing an easier way to disable all arbitrary SQL execution by end users: ``datasette --setting default_allow_sql off``. See also :ref:`authentication_permissions_execute_sql`. (:issue:`1409`) +- `Building a location to time zone API with SpatiaLite `__ is a new Datasette tutorial showing how to safely use SpatiaLite to create a location to time zone API. +- New documentation about :ref:`how to debug problems loading SQLite extensions `. The error message shown when an extension cannot be loaded has also been improved. (:issue:`1979`) +- Fixed an accessibility issue: the ``