From 9cca3810338164c10367b73dbe32d6a090b35d32 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Nov 2022 16:34:33 -0800 Subject: [PATCH 01/79] --generate-dir option to publish heroku, refs #1905 --- datasette/publish/heroku.py | 18 ++++++++++++++ docs/cli-reference.rst | 2 ++ docs/publish.rst | 4 ++++ tests/test_publish_heroku.py | 46 ++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) diff --git a/datasette/publish/heroku.py b/datasette/publish/heroku.py index 171252ce..2b8977f1 100644 --- a/datasette/publish/heroku.py +++ b/datasette/publish/heroku.py @@ -3,7 +3,9 @@ from datasette import hookimpl import click import json import os +import pathlib import shlex +import shutil from subprocess import call, check_output import tempfile @@ -28,6 +30,11 @@ def publish_subcommand(publish): "--tar", help="--tar option to pass to Heroku, e.g. --tar=/usr/local/bin/gtar", ) + @click.option( + "--generate-dir", + type=click.Path(dir_okay=True, file_okay=False), + help="Output generated application files and stop without deploying", + ) def heroku( files, metadata, @@ -49,6 +56,7 @@ def publish_subcommand(publish): about_url, name, tar, + generate_dir, ): "Publish databases to Datasette running on Heroku" fail_if_publish_binary_not_installed( @@ -105,6 +113,16 @@ def publish_subcommand(publish): secret, extra_metadata, ): + if generate_dir: + # Recursively copy files from current working directory to it + if pathlib.Path(generate_dir).exists(): + raise click.ClickException("Directory already exists") + shutil.copytree(".", generate_dir) + click.echo( + f"Generated files written to {generate_dir}, stopping without deploying", + err=True, + ) + return app_name = None if name: # Check to see if this app already exists diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 4a8465cb..a6885fc8 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -501,6 +501,8 @@ See :ref:`publish_heroku`. -n, --name TEXT Application name to use when deploying --tar TEXT --tar option to pass to Heroku, e.g. --tar=/usr/local/bin/gtar + --generate-dir DIRECTORY Output generated application files and stop + without deploying --help Show this message and exit. diff --git a/docs/publish.rst b/docs/publish.rst index 4ba94792..7ae0399e 100644 --- a/docs/publish.rst +++ b/docs/publish.rst @@ -73,6 +73,10 @@ This will output some details about the new deployment, including a URL like thi You can specify a custom app name by passing ``-n my-app-name`` to the publish command. This will also allow you to overwrite an existing app. +Rather than deploying directly you can use the ``--generate-dir`` option to output the files that would be deployed to a directory:: + + datasette publish heroku mydatabase.db --generate-dir=/tmp/deploy-this-to-heroku + See :ref:`cli_help_publish_heroku___help` for the full list of options for this command. .. _publish_vercel: diff --git a/tests/test_publish_heroku.py b/tests/test_publish_heroku.py index b5a8af73..faab340e 100644 --- a/tests/test_publish_heroku.py +++ b/tests/test_publish_heroku.py @@ -2,6 +2,7 @@ from click.testing import CliRunner from datasette import cli from unittest import mock import os +import pathlib import pytest @@ -128,3 +129,48 @@ def test_publish_heroku_plugin_secrets( mock.call(["heroku", "builds:create", "-a", "f", "--include-vcs-ignore"]), ] ) + + +@pytest.mark.serial +@mock.patch("shutil.which") +def test_publish_heroku_generate_dir(mock_which, tmp_path_factory): + mock_which.return_value = True + runner = CliRunner() + os.chdir(tmp_path_factory.mktemp("runner")) + with open("test.db", "w") as fp: + fp.write("data") + output = str(tmp_path_factory.mktemp("generate_dir") / "output") + result = runner.invoke( + cli.cli, + [ + "publish", + "heroku", + "test.db", + "--generate-dir", + output, + ], + ) + assert result.exit_code == 0 + path = pathlib.Path(output) + assert path.exists() + file_names = {str(r.relative_to(path)) for r in path.glob("*")} + assert file_names == { + "requirements.txt", + "bin", + "runtime.txt", + "Procfile", + "test.db", + } + for name, expected in ( + ("requirements.txt", "datasette"), + ("runtime.txt", "python-3.8.10"), + ( + "Procfile", + ( + "web: datasette serve --host 0.0.0.0 -i test.db " + "--cors --port $PORT --inspect-file inspect-data.json" + ), + ), + ): + with open(path / name) as fp: + assert fp.read().strip() == expected From a93ccc63c709bb0984116cd1224ee2e846cb088b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Nov 2022 16:44:46 -0800 Subject: [PATCH 02/79] Upgrade to Python 3.11 on Heroku, refs #1905 --- datasette/publish/heroku.py | 2 +- tests/test_publish_heroku.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/datasette/publish/heroku.py b/datasette/publish/heroku.py index 2b8977f1..f576a346 100644 --- a/datasette/publish/heroku.py +++ b/datasette/publish/heroku.py @@ -194,7 +194,7 @@ def temporary_heroku_directory( fp.write(json.dumps(metadata_content, indent=2)) with open("runtime.txt", "w") as fp: - fp.write("python-3.8.10") + fp.write("python-3.11.0") if branch: install = [ diff --git a/tests/test_publish_heroku.py b/tests/test_publish_heroku.py index faab340e..cab83654 100644 --- a/tests/test_publish_heroku.py +++ b/tests/test_publish_heroku.py @@ -133,8 +133,15 @@ def test_publish_heroku_plugin_secrets( @pytest.mark.serial @mock.patch("shutil.which") -def test_publish_heroku_generate_dir(mock_which, tmp_path_factory): +@mock.patch("datasette.publish.heroku.check_output") +@mock.patch("datasette.publish.heroku.call") +def test_publish_heroku_generate_dir( + mock_call, mock_check_output, mock_which, tmp_path_factory +): mock_which.return_value = True + mock_check_output.side_effect = lambda s: { + "['heroku', 'plugins']": b"heroku-builds", + }[repr(s)] runner = CliRunner() os.chdir(tmp_path_factory.mktemp("runner")) with open("test.db", "w") as fp: @@ -163,7 +170,7 @@ def test_publish_heroku_generate_dir(mock_which, tmp_path_factory): } for name, expected in ( ("requirements.txt", "datasette"), - ("runtime.txt", "python-3.8.10"), + ("runtime.txt", "python-3.11.0"), ( "Procfile", ( From c4d002fef5d3c5dfaada6259d4f6067eef3fcc74 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Nov 2022 14:50:19 -0800 Subject: [PATCH 03/79] Pin httpx in Pyodide test, refs #1904 Should help get tests to pass for #1896 too --- test-in-pyodide-with-shot-scraper.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/test-in-pyodide-with-shot-scraper.sh b/test-in-pyodide-with-shot-scraper.sh index e5df7398..0c140818 100755 --- a/test-in-pyodide-with-shot-scraper.sh +++ b/test-in-pyodide-with-shot-scraper.sh @@ -25,6 +25,7 @@ async () => { let output = await pyodide.runPythonAsync(\` import micropip await micropip.install('h11==0.12.0') + await micropip.install('httpx==0.23') await micropip.install('http://localhost:8529/$wheel') import ssl import setuptools From d67f812b7327c7075732688f3df728807503dc58 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Nov 2022 16:53:05 -0800 Subject: [PATCH 04/79] Release 0.63.2 Refs #1904, #1905 --- datasette/version.py | 2 +- docs/changelog.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 3a4f06dc..6016687a 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63.1" +__version__ = "0.63.2" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 0e0393ef..865bb58e 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Changelog ========= +.. _v0_63_2: + +0.63.2 (2022-11-18) +------------------- + +- Fixed a bug in ``datasette publish heroku`` where deployments failed due to an older version of Python being requested. (:issue:`1905`) +- New ``datasette publish heroku --generate-dir `` option for generating a Heroku deployment directory without deploying it. + .. _v0_63_1: 0.63.1 (2022-11-10) From 4ba8d57bb1a8537ef2f40fa03414f8b14a16c29a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Dec 2022 16:55:17 -0800 Subject: [PATCH 05/79] Try click.echo() instead This ensures the URL is output correctly when running under Docker. Closes #1958 --- datasette/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/cli.py b/datasette/cli.py index 6eb42712..2b61292b 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -607,7 +607,7 @@ def serve( url = "http://{}:{}{}?token={}".format( host, port, ds.urls.path("-/auth-token"), ds._root_token ) - print(url) + click.echo(url) if open_browser: if url is None: # Figure out most convenient URL - to table, database or homepage From 96b3a86d7f3c74c7bb508e0ea6cb538ede5d046d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Dec 2022 09:34:07 -0800 Subject: [PATCH 06/79] Replace AsgiLifespan with AsgiRunOnFirstRequest, refs #1955 --- datasette/app.py | 20 +++--------------- datasette/utils/asgi.py | 44 ++++++++++++++-------------------------- docs/plugin_hooks.rst | 5 +++-- docs/testing_plugins.rst | 2 +- 4 files changed, 22 insertions(+), 49 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 246269f3..20f08b7b 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -63,17 +63,14 @@ from .utils import ( to_css_class, ) from .utils.asgi import ( - AsgiLifespan, - Base400, Forbidden, NotFound, Request, Response, + AsgiRunOnFirstRequest, asgi_static, asgi_send, asgi_send_file, - asgi_send_html, - asgi_send_json, asgi_send_redirect, ) from .utils.internal_db import init_internal_db, populate_schema_tables @@ -1260,7 +1257,7 @@ class Datasette: async def setup_db(): # First time server starts up, calculate table counts for immutable databases - for dbname, database in self.databases.items(): + for database in self.databases.values(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) @@ -1274,10 +1271,7 @@ class Datasette: ) if self.setting("trace_debug"): asgi = AsgiTracer(asgi) - asgi = AsgiLifespan( - asgi, - on_startup=setup_db, - ) + asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) return asgi @@ -1566,42 +1560,34 @@ class DatasetteClient: return path async def get(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.get(self._fix(path), **kwargs) async def options(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.options(self._fix(path), **kwargs) async def head(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.head(self._fix(path), **kwargs) async def post(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.post(self._fix(path), **kwargs) async def put(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.put(self._fix(path), **kwargs) async def patch(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.patch(self._fix(path), **kwargs) async def delete(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.delete(self._fix(path), **kwargs) async def request(self, method, path, **kwargs): - await self.ds.invoke_startup() avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) async with httpx.AsyncClient(app=self.app) as client: return await client.request( diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 8a2fa060..03c1c9cd 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -135,35 +135,6 @@ class Request: return cls(scope, None) -class AsgiLifespan: - def __init__(self, app, on_startup=None, on_shutdown=None): - self.app = app - on_startup = on_startup or [] - on_shutdown = on_shutdown or [] - if not isinstance(on_startup or [], list): - on_startup = [on_startup] - if not isinstance(on_shutdown or [], list): - on_shutdown = [on_shutdown] - self.on_startup = on_startup - self.on_shutdown = on_shutdown - - async def __call__(self, scope, receive, send): - if scope["type"] == "lifespan": - while True: - message = await receive() - if message["type"] == "lifespan.startup": - for fn in self.on_startup: - await fn() - await send({"type": "lifespan.startup.complete"}) - elif message["type"] == "lifespan.shutdown": - for fn in self.on_shutdown: - await fn() - await send({"type": "lifespan.shutdown.complete"}) - return - else: - await self.app(scope, receive, send) - - class AsgiStream: def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): self.stream_fn = stream_fn @@ -428,3 +399,18 @@ class AsgiFileDownload: content_type=self.content_type, headers=self.headers, ) + + +class AsgiRunOnFirstRequest: + def __init__(self, asgi, on_startup): + assert isinstance(on_startup, list) + self.asgi = asgi + self.on_startup = on_startup + self._started = False + + async def __call__(self, scope, receive, send): + if not self._started: + self._started = True + for hook in self.on_startup: + await hook() + return await self.asgi(scope, receive, send) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index b61f953a..101911cc 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -855,13 +855,14 @@ Potential use-cases: .. note:: - If you are writing :ref:`unit tests ` for a plugin that uses this hook you will need to explicitly call ``await ds.invoke_startup()`` in your tests. An example: + If you are writing :ref:`unit tests ` for a plugin that uses this hook and doesn't exercise Datasette by sending + any simulated requests through it you will need to explicitly call ``await ds.invoke_startup()`` in your tests. An example: .. code-block:: python @pytest.mark.asyncio async def test_my_plugin(): - ds = Datasette([], metadata={}) + ds = Datasette() await ds.invoke_startup() # Rest of test goes here diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index 41f50e56..6d2097ad 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -80,7 +80,7 @@ Creating a ``Datasette()`` instance like this as useful shortcut in tests, but t This method registers any :ref:`plugin_hook_startup` or :ref:`plugin_hook_prepare_jinja2_environment` plugins that might themselves need to make async calls. -If you are using ``await datasette.client.get()`` and similar methods then you don't need to worry about this - those method calls ensure that ``.invoke_startup()`` has been called for you. +If you are using ``await datasette.client.get()`` and similar methods then you don't need to worry about this - Datasette automatically calls ``invoke_startup()`` the first time it handles a request. .. _testing_plugins_pdb: From 5649e547eff4278130d5d66c5684fc5d4fd77b69 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 17:22:00 -0800 Subject: [PATCH 07/79] Put AsgiLifestyle back so server starts up again, refs #1955 --- datasette/app.py | 2 ++ datasette/utils/asgi.py | 29 +++++++++++++++++++++++++++++ tests/conftest.py | 33 ++++++++++++++------------------- 3 files changed, 45 insertions(+), 19 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 20f08b7b..ea9bb6d2 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -63,6 +63,7 @@ from .utils import ( to_css_class, ) from .utils.asgi import ( + AsgiLifespan, Forbidden, NotFound, Request, @@ -1271,6 +1272,7 @@ class Datasette: ) if self.setting("trace_debug"): asgi = AsgiTracer(asgi) + asgi = AsgiLifespan(asgi) asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 03c1c9cd..16f90077 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -135,6 +135,35 @@ class Request: return cls(scope, None) +class AsgiLifespan: + def __init__(self, app, on_startup=None, on_shutdown=None): + self.app = app + on_startup = on_startup or [] + on_shutdown = on_shutdown or [] + if not isinstance(on_startup or [], list): + on_startup = [on_startup] + if not isinstance(on_shutdown or [], list): + on_shutdown = [on_shutdown] + self.on_startup = on_startup + self.on_shutdown = on_shutdown + + async def __call__(self, scope, receive, send): + if scope["type"] == "lifespan": + while True: + message = await receive() + if message["type"] == "lifespan.startup": + for fn in self.on_startup: + await fn() + await send({"type": "lifespan.startup.complete"}) + elif message["type"] == "lifespan.shutdown": + for fn in self.on_shutdown: + await fn() + await send({"type": "lifespan.shutdown.complete"}) + return + else: + await self.app(scope, receive, send) + + class AsgiStream: def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): self.stream_fn = stream_fn diff --git a/tests/conftest.py b/tests/conftest.py index f4638a14..dd06fa07 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,17 @@ UNDOCUMENTED_PERMISSIONS = { } +def wait_until_responds(url, timeout=5.0, client=httpx, **kwargs): + start = time.time() + while time.time() - start < timeout: + try: + client.get(url, **kwargs) + return + except httpx.ConnectError: + time.sleep(0.1) + raise AssertionError("Timed out waiting for {} to respond".format(url)) + + def pytest_report_header(config): return "SQLite: {}".format( sqlite3.connect(":memory:").execute("select sqlite_version()").fetchone()[0] @@ -111,13 +122,7 @@ def ds_localhost_http_server(): # Avoid FileNotFoundError: [Errno 2] No such file or directory: cwd=tempfile.gettempdir(), ) - # Loop until port 8041 serves traffic - while True: - try: - httpx.get("http://localhost:8041/") - break - except httpx.ConnectError: - time.sleep(0.1) + wait_until_responds("http://localhost:8041/") # Check it started successfully assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") yield ds_proc @@ -152,12 +157,7 @@ def ds_localhost_https_server(tmp_path_factory): stderr=subprocess.STDOUT, cwd=tempfile.gettempdir(), ) - while True: - try: - httpx.get("https://localhost:8042/", verify=client_cert) - break - except httpx.ConnectError: - time.sleep(0.1) + wait_until_responds("http://localhost:8042/", verify=client_cert) # Check it started successfully assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") yield ds_proc, client_cert @@ -181,12 +181,7 @@ def ds_unix_domain_socket_server(tmp_path_factory): # Poll until available transport = httpx.HTTPTransport(uds=uds) client = httpx.Client(transport=transport) - while True: - try: - client.get("http://localhost/_memory.json") - break - except httpx.ConnectError: - time.sleep(0.1) + wait_until_responds("http://localhost/_memory.json", client=client) # Check it started successfully assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") yield ds_proc, uds From 0bd3eaa2ddb5f8dbf57190cfb639e3f4d7a82e99 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 18:33:07 -0800 Subject: [PATCH 08/79] Move HTTPS test to a bash script See https://github.com/simonw/datasette/issues/1955#issuecomment-1356627931 --- tests/conftest.py | 35 ---------------------------- tests/test_cli_serve_server.py | 11 --------- tests/test_datasette_https_server.sh | 33 ++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 46 deletions(-) create mode 100755 tests/test_datasette_https_server.sh diff --git a/tests/conftest.py b/tests/conftest.py index dd06fa07..ee13df11 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -130,41 +130,6 @@ def ds_localhost_http_server(): ds_proc.terminate() -@pytest.fixture(scope="session") -def ds_localhost_https_server(tmp_path_factory): - cert_directory = tmp_path_factory.mktemp("certs") - ca = trustme.CA() - server_cert = ca.issue_cert("localhost") - keyfile = str(cert_directory / "server.key") - certfile = str(cert_directory / "server.pem") - client_cert = str(cert_directory / "client.pem") - server_cert.private_key_pem.write_to_path(path=keyfile) - for blob in server_cert.cert_chain_pems: - blob.write_to_path(path=certfile, append=True) - ca.cert_pem.write_to_path(path=client_cert) - ds_proc = subprocess.Popen( - [ - "datasette", - "--memory", - "-p", - "8042", - "--ssl-keyfile", - keyfile, - "--ssl-certfile", - certfile, - ], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - cwd=tempfile.gettempdir(), - ) - wait_until_responds("http://localhost:8042/", verify=client_cert) - # Check it started successfully - assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") - yield ds_proc, client_cert - # Shut it down at the end of the pytest session - ds_proc.terminate() - - @pytest.fixture(scope="session") def ds_unix_domain_socket_server(tmp_path_factory): # This used to use tmp_path_factory.mktemp("uds") but that turned out to diff --git a/tests/test_cli_serve_server.py b/tests/test_cli_serve_server.py index 1c31e2a3..47f23c08 100644 --- a/tests/test_cli_serve_server.py +++ b/tests/test_cli_serve_server.py @@ -13,17 +13,6 @@ def test_serve_localhost_http(ds_localhost_http_server): }.items() <= response.json().items() -@pytest.mark.serial -def test_serve_localhost_https(ds_localhost_https_server): - _, client_cert = ds_localhost_https_server - response = httpx.get("https://localhost:8042/_memory.json", verify=client_cert) - assert { - "database": "_memory", - "path": "/_memory", - "tables": [], - }.items() <= response.json().items() - - @pytest.mark.serial @pytest.mark.skipif( not hasattr(socket, "AF_UNIX"), reason="Requires socket.AF_UNIX support" diff --git a/tests/test_datasette_https_server.sh b/tests/test_datasette_https_server.sh new file mode 100755 index 00000000..a701ad4c --- /dev/null +++ b/tests/test_datasette_https_server.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Generate certificates +python -m trustme +# This creates server.pem, server.key, client.pem + +# Start the server in the background +datasette --memory \ + --ssl-keyfile=server.key \ + --ssl-certfile=server.pem \ + -p 8152 & + +# Store the background process ID in a variable +server_pid=$! + +# Wait for the server to start +sleep 2 + +# Make a test request using curl +curl -f --cacert client.pem 'https://localhost:8152/_memory.json' + +# Save curl's exit code (-f option causes it to return one on HTTP errors) +curl_exit_code=$? + +# Shut down the server +kill $server_pid +sleep 1 + +# Clean up the certificates +rm server.pem server.key client.pem + +echo $curl_exit_code +exit $curl_exit_code From e6d94f9ffa09a7c7bbb6b4a6273a8b9a7c0cb204 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 18:38:26 -0800 Subject: [PATCH 09/79] Run new HTTPS test in CI, refs #1955 --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 886f649a..39aa8b13 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,6 +35,8 @@ jobs: run: | pytest -n auto -m "not serial" pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh - name: Check if cog needs to be run run: | cog --check docs/*.rst From d93f975b3dbdf37ce91c176e24b920b041cf7571 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 18:52:52 -0800 Subject: [PATCH 10/79] On publish run tests same way as for test --- .github/workflows/publish.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index fa608055..b7d99c1c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -31,7 +31,10 @@ jobs: pip install -e '.[test]' - name: Run tests run: | - pytest + pytest -n auto -m "not serial" + pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh deploy: runs-on: ubuntu-latest From a9ffcbd42ec6c6d5a55f7a541e282c0f1002c559 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 19:06:03 -0800 Subject: [PATCH 11/79] Release 0.63.3 Refs #1955, #1958, #1963 --- datasette/version.py | 2 +- docs/changelog.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 6016687a..8aaf793d 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63.2" +__version__ = "0.63.3" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 865bb58e..33945ccb 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Changelog ========= +.. _v0_63_3: + +0.63.3 (2022-12-17) +------------------- + +- Fixed a bug where ``datasette --root``, when running in Docker, would only output the URL to sign in root when the server shut down, not when it started up. (:issue:`1958`) +- You no longer need to ensure ``await datasette.invoke_startup()`` has been called in order for Datasette to start correctly serving requests - this is now handled automatically the first time the server receives a request. This fixes a bug experienced when Datasette is served directly by an ASGI application server such as Uvicorn or Gunicorn. It also fixes a bug with the `datasette-gunicorn `__ plugin. (:issue:`1955`) + .. _v0_63_2: 0.63.2 (2022-11-18) From e05998bc85e6347095cf71434c6de4e9d8414933 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 19:08:01 -0800 Subject: [PATCH 12/79] Added missing word, refs #1963 --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 33945ccb..270765c8 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,7 +9,7 @@ Changelog 0.63.3 (2022-12-17) ------------------- -- Fixed a bug where ``datasette --root``, when running in Docker, would only output the URL to sign in root when the server shut down, not when it started up. (:issue:`1958`) +- Fixed a bug where ``datasette --root``, when running in Docker, would only output the URL to sign in as root when the server shut down, not when it started up. (:issue:`1958`) - You no longer need to ensure ``await datasette.invoke_startup()`` has been called in order for Datasette to start correctly serving requests - this is now handled automatically the first time the server receives a request. This fixes a bug experienced when Datasette is served directly by an ASGI application server such as Uvicorn or Gunicorn. It also fixes a bug with the `datasette-gunicorn `__ plugin. (:issue:`1955`) .. _v0_63_2: From 9ec58da6ec9ee8a19a1227b256e72e32def254a8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 19:24:34 -0800 Subject: [PATCH 13/79] Deploy docs on publish using Python 3.9 A workaround for gcloud setup, see: https://til.simonwillison.net/googlecloud/gcloud-error-workaround Refs #1963 --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index b7d99c1c..50961b82 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -72,7 +72,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.10' + python-version: '3.9' - uses: actions/cache@v2 name: Configure pip caching with: From 74022940184da13ac8b0881b7cb7f1cc43650424 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 17 Dec 2022 22:28:07 -0800 Subject: [PATCH 14/79] .select-wrapper:focus-within for accessibility, closes #1771 --- datasette/static/app.css | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datasette/static/app.css b/datasette/static/app.css index 712b9925..71437bd4 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -573,6 +573,9 @@ form button[type=button] { display: inline-block; margin-right: 0.3em; } +.select-wrapper:focus-within { + border: 1px solid black; +} .select-wrapper.filter-op { width: 80px; } From 1bff3f1a70964977bcccc6e9ca80db122c28afcf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 4 Jan 2023 10:25:04 -0800 Subject: [PATCH 15/79] Fixed table_action example in docs --- docs/plugin_hooks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 101911cc..399226a0 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1346,7 +1346,7 @@ This example adds a new table action if the signed in user is ``"root"``: @hookimpl - def table_actions(datasette, actor): + def table_actions(datasette, actor, database, table): if actor and actor.get("id") == "root": return [ { From b8cf864fa68d25961024586b2d263f4dc7ebee75 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 4 Jan 2023 16:47:47 -0800 Subject: [PATCH 16/79] Fixed broken example links in _where= docs --- docs/json_api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/json_api.rst b/docs/json_api.rst index d3fdb1e4..fd2d3ec6 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -357,8 +357,8 @@ Special table arguments Some examples: - * `facetable?_where=neighborhood like "%c%"&_where=city_id=3 `__ - * `facetable?_where=city_id in (select id from facet_cities where name != "Detroit") `__ + * `facetable?_where=_neighborhood like "%c%"&_where=_city_id=3 `__ + * `facetable?_where=_city_id in (select id from facet_cities where name != "Detroit") `__ ``?_through={json}`` This can be used to filter rows via a join against another table. From 1ec9c9995c611c9ba34539f1b93c0e684fd35b67 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 5 Jan 2023 09:21:07 -0800 Subject: [PATCH 17/79] Backported default_allow_sql for 0.63.x, closes #1409 --- datasette/app.py | 5 +++++ datasette/default_permissions.py | 6 +++++- docs/authentication.rst | 10 ++++++++-- docs/cli-reference.rst | 2 ++ docs/settings.rst | 15 +++++++++++++++ tests/test_api.py | 1 + tests/test_cli.py | 22 ++++++++++++++++++++++ 7 files changed, 58 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index ea9bb6d2..c052be58 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -116,6 +116,11 @@ SETTINGS = ( True, "Allow users to specify columns to facet using ?_facet= parameter", ), + Setting( + "default_allow_sql", + True, + "Allow anyone to run arbitrary SQL queries", + ), Setting( "allow_download", True, diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index b58d8d1b..a0681e83 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -36,12 +36,16 @@ def permission_allowed(datasette, actor, action, resource): return None return actor_matches_allow(actor, allow) elif action == "execute-sql": + # Only use default_allow_sql setting if it is set to False: + default_allow_sql = ( + None if datasette.setting("default_allow_sql") else False + ) # Use allow_sql block from database block, or from top-level database_allow_sql = datasette.metadata("allow_sql", database=resource) if database_allow_sql is None: database_allow_sql = datasette.metadata("allow_sql") if database_allow_sql is None: - return None + return default_allow_sql return actor_matches_allow(actor, database_allow_sql) return inner diff --git a/docs/authentication.rst b/docs/authentication.rst index 685dab15..87852555 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -307,7 +307,13 @@ To limit access to the ``add_name`` canned query in your ``dogs.db`` database to Controlling the ability to execute arbitrary SQL ------------------------------------------------ -The ``"allow_sql"`` block can be used to control who is allowed to execute arbitrary SQL queries, both using the form on the database page e.g. https://latest.datasette.io/fixtures or by appending a ``?_where=`` parameter to the table page as seen on https://latest.datasette.io/fixtures/facetable?_where=city_id=1. +Datasette defaults to allowing any site visitor to execute their own custom SQL queries, for example using the form on `the database page `__ or by appending a ``?_where=`` parameter to the table page `like this `__. + +Access to this ability is controlled by the :ref:`permissions_execute_sql` permission. + +The easiest way to disable arbitrary SQL queries is using the :ref:`default_allow_sql setting ` when you first start Datasette running. + +You can alternatively use an ``"allow_sql"`` block to control who is allowed to execute arbitrary SQL queries. To enable just the :ref:`root user` to execute SQL for all databases in your instance, use the following: @@ -515,7 +521,7 @@ Actor is allowed to run arbitrary SQL queries against a specific database, e.g. ``resource`` - string The name of the database -Default *allow*. +Default *allow*. See also :ref:`the default_allow_sql setting `. .. _permissions_permissions_debug: diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index a6885fc8..ed20ea8a 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -224,6 +224,8 @@ These can be passed to ``datasette serve`` using ``datasette serve --setting nam (default=50) allow_facet Allow users to specify columns to facet using ?_facet= parameter (default=True) + default_allow_sql Allow anyone to run arbitrary SQL queries + (default=True) allow_download Allow users to download the original SQLite database files (default=True) suggest_facets Calculate and display suggested facets diff --git a/docs/settings.rst b/docs/settings.rst index a6d50543..8a83cc2f 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -59,6 +59,21 @@ Settings The following options can be set using ``--setting name value``, or by storing them in the ``settings.json`` file for use with :ref:`config_dir`. +.. _setting_default_allow_sql: + +default_allow_sql +~~~~~~~~~~~~~~~~~ + +Should users be able to execute arbitrary SQL queries by default? + +Setting this to ``off`` causes permission checks for :ref:`permissions_execute_sql` to fail by default. + +:: + + datasette mydatabase.db --setting default_allow_sql off + +There are two ways to achieve this: the other is to add ``"allow_sql": false`` to your ``metadata.json`` file, as described in :ref:`authentication_permissions_execute_sql`. This setting offers a more convenient way to do this. + .. _setting_default_page_size: default_page_size diff --git a/tests/test_api.py b/tests/test_api.py index 4027a7a5..db624823 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -805,6 +805,7 @@ def test_settings_json(app_client): assert { "default_page_size": 50, "default_facet_size": 30, + "default_allow_sql": True, "facet_suggest_time_limit_ms": 50, "facet_time_limit_ms": 200, "max_returned_rows": 100, diff --git a/tests/test_cli.py b/tests/test_cli.py index f0d28037..9ca50cbe 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -215,6 +215,28 @@ def test_setting_type_validation(): assert '"default_page_size" should be an integer' in result.stderr +@pytest.mark.parametrize("default_allow_sql", (True, False)) +def test_setting_default_allow_sql(default_allow_sql): + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--setting", + "default_allow_sql", + "on" if default_allow_sql else "off", + "--get", + "/_memory.json?sql=select+21&_shape=objects", + ], + ) + if default_allow_sql: + assert result.exit_code == 0, result.output + assert json.loads(result.output)["rows"][0] == {"21": 21} + else: + assert result.exit_code == 1, result.output + # This isn't JSON at the moment, maybe it should be though + assert "Forbidden" in result.output + + def test_config_deprecated(): # The --config option should show a deprecation message runner = CliRunner(mix_stderr=False) From 3b88ac671e2fd64663c0588ea92f374c57836cc4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 7 Jan 2023 15:48:08 -0800 Subject: [PATCH 18/79] What to do if extensions will not load, refs #1979 --- docs/installation.rst | 57 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/docs/installation.rst b/docs/installation.rst index a4757736..121f2ef5 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -230,3 +230,60 @@ Some plugins such as `datasette-ripgrep Date: Sat, 7 Jan 2023 15:56:03 -0800 Subject: [PATCH 19/79] Better error for --load-extensions, refs #1979 --- datasette/cli.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/datasette/cli.py b/datasette/cli.py index 2b61292b..89ee12b6 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -4,6 +4,7 @@ import click from click import formatting from click.types import CompositeParamType from click_default_group import DefaultGroup +import functools import json import os import pathlib @@ -11,6 +12,7 @@ import shutil from subprocess import call import sys from runpy import run_module +import textwrap import webbrowser from .app import ( OBSOLETE_SETTINGS, @@ -126,7 +128,7 @@ class Setting(CompositeParamType): def sqlite_extensions(fn): - return click.option( + fn = click.option( "sqlite_extensions", "--load-extension", type=LoadExtension(), @@ -134,6 +136,25 @@ def sqlite_extensions(fn): multiple=True, help="Path to a SQLite extension to load, and optional entrypoint", )(fn) + # Wrap it in a custom error handler + @functools.wraps(fn) + def wrapped(*args, **kwargs): + try: + return fn(*args, **kwargs) + except AttributeError as e: + if "enable_load_extension" in str(e): + raise click.ClickException( + textwrap.dedent( + """ + Your Python installation does not have the ability to load SQLite extensions. + + More information: https://datasette.io/help/extensions + """ + ).strip() + ) + raise + + return wrapped @click.group(cls=DefaultGroup, default="serve", default_if_no_args=True) From 02987e342d75fc874d0c57ecfa8c4eabf0f5b313 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Jan 2023 08:25:07 -0800 Subject: [PATCH 20/79] Explicitly explain allow_sql: false --- docs/authentication.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/authentication.rst b/docs/authentication.rst index 87852555..37703307 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -315,6 +315,14 @@ The easiest way to disable arbitrary SQL queries is using the :ref:`default_allo You can alternatively use an ``"allow_sql"`` block to control who is allowed to execute arbitrary SQL queries. +To prevent any user from executing arbitrary SQL queries, use this: + +.. code-block:: json + + { + "allow_sql": false + } + To enable just the :ref:`root user` to execute SQL for all databases in your instance, use the following: .. code-block:: json From 0084daa50a8d97e025989d73d23b9a60c2dc69d9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 9 Jan 2023 08:37:07 -0800 Subject: [PATCH 21/79] Release 0.64, with a warning against arbitrary SQL with SpatiaLite Refs #1409, #1771, #1979 Refs https://github.com/simonw/datasette.io/issues/132 --- datasette/version.py | 2 +- docs/changelog.rst | 11 +++++++++++ docs/spatialite.rst | 26 +++++++++++++++++++++++--- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/datasette/version.py b/datasette/version.py index 8aaf793d..eedecc61 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63.3" +__version__ = "0.64" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 270765c8..39324989 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,17 @@ Changelog ========= +.. _v0_64: + +0.64 (2023-01-09) +----------------- + +- Datasette now **strongly recommends against allowing arbitrary SQL queries if you are using SpatiaLite**. SpatiaLite includes SQL functions that could cause the Datasette server to crash. See :ref:`spatialite` for more details. +- New :ref:`setting_default_allow_sql` setting, providing an easier way to disable all arbitrary SQL execution by end users: ``datasette --setting default_allow_sql off``. See also :ref:`authentication_permissions_execute_sql`. (:issue:`1409`) +- `Building a location to time zone API with SpatiaLite `__ is a new Datasette tutorial showing how to safely use SpatiaLite to create a location to time zone API. +- New documentation about :ref:`how to debug problems loading SQLite extensions `. The error message shown when an extension cannot be loaded has also been improved. (:issue:`1979`) +- Fixed an accessibility issue: the ``', '', @@ -505,7 +505,7 @@ def test_csv_json_export_links_include_labels_if_foreign_keys(app_client): links = ( Soup(response.body, "html.parser") .find("p", {"class": "export-links"}) - .findAll("a") + .find_all("a") ) actual = [l["href"] for l in links] expected = [ @@ -554,7 +554,7 @@ def test_rowid_sortable_no_primary_key(app_client): assert response.status == 200 table = Soup(response.body, "html.parser").find("table") assert table["class"] == ["rows-and-columns"] - ths = table.findAll("th") + ths = table.find_all("th") assert "rowid\xa0▼" == ths[1].find("a").string.strip() @@ -562,7 +562,7 @@ def test_table_html_compound_primary_key(app_client): response = app_client.get("/fixtures/compound_primary_key") assert response.status == 200 table = Soup(response.body, "html.parser").find("table") - ths = table.findAll("th") + ths = table.find_all("th") assert "Link" == ths[0].string.strip() for expected_col, th in zip(("pk1", "pk2", "content"), ths[1:]): a = th.find("a") @@ -783,7 +783,7 @@ def test_advanced_export_box(app_client, path, has_object, has_stream, has_expan if has_object: expected_json_shapes.append("object") div = soup.find("div", {"class": "advanced-export"}) - assert expected_json_shapes == [a.text for a in div.find("p").findAll("a")] + assert expected_json_shapes == [a.text for a in div.find("p").find_all("a")] # "stream all rows" option if has_stream: assert "stream all rows" in str(div) @@ -799,13 +799,13 @@ def test_extra_where_clauses(app_client): soup = Soup(response.body, "html.parser") div = soup.select(".extra-wheres")[0] assert "2 extra where clauses" == div.find("h3").text - hrefs = [a["href"] for a in div.findAll("a")] + hrefs = [a["href"] for a in div.find_all("a")] assert [ "/fixtures/facetable?_where=_city_id%3D1", "/fixtures/facetable?_where=_neighborhood%3D%27Dogpatch%27", ] == hrefs # These should also be persisted as hidden fields - inputs = soup.find("form").findAll("input") + inputs = soup.find("form").find_all("input") hiddens = [i for i in inputs if i["type"] == "hidden"] assert [("_where", "_neighborhood='Dogpatch'"), ("_where", "_city_id=1")] == [ (hidden["name"], hidden["value"]) for hidden in hiddens @@ -829,7 +829,7 @@ def test_extra_where_clauses(app_client): def test_other_hidden_form_fields(app_client, path, expected_hidden): response = app_client.get(path) soup = Soup(response.body, "html.parser") - inputs = soup.find("form").findAll("input") + inputs = soup.find("form").find_all("input") hiddens = [i for i in inputs if i["type"] == "hidden"] assert [(hidden["name"], hidden["value"]) for hidden in hiddens] == expected_hidden @@ -847,7 +847,7 @@ def test_search_and_sort_fields_not_duplicated(app_client, path, expected_hidden # https://github.com/simonw/datasette/issues/1214 response = app_client.get(path) soup = Soup(response.body, "html.parser") - inputs = soup.find("form").findAll("input") + inputs = soup.find("form").find_all("input") hiddens = [i for i in inputs if i["type"] == "hidden"] assert [(hidden["name"], hidden["value"]) for hidden in hiddens] == expected_hidden @@ -896,7 +896,7 @@ def test_metadata_sort(app_client): assert response.status == 200 table = Soup(response.body, "html.parser").find("table") assert table["class"] == ["rows-and-columns"] - ths = table.findAll("th") + ths = table.find_all("th") assert ["id", "name\xa0▼"] == [th.find("a").string.strip() for th in ths] rows = [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] expected = [ @@ -931,7 +931,7 @@ def test_metadata_sort_desc(app_client): assert response.status == 200 table = Soup(response.body, "html.parser").find("table") assert table["class"] == ["rows-and-columns"] - ths = table.findAll("th") + ths = table.find_all("th") assert ["pk\xa0▲", "name"] == [th.find("a").string.strip() for th in ths] rows = [[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")] expected = [ @@ -1032,7 +1032,7 @@ def test_column_metadata(app_client): response = app_client.get("/fixtures/roadside_attractions") soup = Soup(response.body, "html.parser") dl = soup.find("dl") - assert [(dt.text, dt.nextSibling.text) for dt in dl.findAll("dt")] == [ + assert [(dt.text, dt.nextSibling.text) for dt in dl.find_all("dt")] == [ ("name", "The name of the attraction"), ("address", "The street address for the attraction"), ] diff --git a/tests/utils.py b/tests/utils.py index 191ead9b..dcc0ba44 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -2,7 +2,7 @@ from datasette.utils.sqlite import sqlite3 def assert_footer_links(soup): - footer_links = soup.find("footer").findAll("a") + footer_links = soup.find("footer").find_all("a") assert 4 == len(footer_links) datasette_link, license_link, source_link, about_link = footer_links assert "Datasette" == datasette_link.text.strip() From f626983fdb4685c47a931c08a6a1b29188ad3835 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 09:49:10 -0800 Subject: [PATCH 74/79] Fix BeautifulSoup deprecation warnings in tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes findNext → find_next and nextSibling → next_sibling Refs 9becb04e --- tests/test_html.py | 2 +- tests/test_table_html.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_html.py b/tests/test_html.py index 1489e56d..cea91d2e 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -333,7 +333,7 @@ def test_row_links_from_other_tables(app_client, path, expected_text, expected_l soup = Soup(response.body, "html.parser") h2 = soup.find("h2") assert h2.text == "Links from other tables" - li = h2.findNext("ul").find("li") + li = h2.find_next("ul").find("li") text = re.sub(r"\s+", " ", li.text.strip()) assert text == expected_text link = li.find("a")["href"] diff --git a/tests/test_table_html.py b/tests/test_table_html.py index fee0b702..3cbbe27d 100644 --- a/tests/test_table_html.py +++ b/tests/test_table_html.py @@ -1032,7 +1032,7 @@ def test_column_metadata(app_client): response = app_client.get("/fixtures/roadside_attractions") soup = Soup(response.body, "html.parser") dl = soup.find("dl") - assert [(dt.text, dt.nextSibling.text) for dt in dl.find_all("dt")] == [ + assert [(dt.text, dt.next_sibling.text) for dt in dl.find_all("dt")] == [ ("name", "The name of the attraction"), ("address", "The street address for the attraction"), ] From 7d28ca14453923a91f2483b00a31cc30f385eff0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 09:50:21 -0800 Subject: [PATCH 75/79] Fix datetime.utcnow deprecation warning Refs 261fc8d8 --- datasette/app.py | 2 +- datasette/default_magic_parameters.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 999d6aa6..ae74c5d3 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -695,7 +695,7 @@ class Datasette: used_default = True self._permission_checks.append( { - "when": datetime.datetime.utcnow().isoformat(), + "when": datetime.datetime.now(datetime.timezone.utc).isoformat(), "actor": actor, "action": action, "resource": resource, diff --git a/datasette/default_magic_parameters.py b/datasette/default_magic_parameters.py index 19382207..91c1c5aa 100644 --- a/datasette/default_magic_parameters.py +++ b/datasette/default_magic_parameters.py @@ -24,9 +24,12 @@ def now(key, request): if key == "epoch": return int(time.time()) elif key == "date_utc": - return datetime.datetime.utcnow().date().isoformat() + return datetime.datetime.now(datetime.timezone.utc).date().isoformat() elif key == "datetime_utc": - return datetime.datetime.utcnow().strftime(r"%Y-%m-%dT%H:%M:%S") + "Z" + return ( + datetime.datetime.now(datetime.timezone.utc).strftime(r"%Y-%m-%dT%H:%M:%S") + + "Z" + ) else: raise KeyError From c434ce03f997b809ceb8a5041b33f59f44153281 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 09:50:52 -0800 Subject: [PATCH 76/79] Switch from pkg_resources to importlib.metadata Refs #2057, refs 852f5014 --- datasette/app.py | 6 +++--- datasette/plugins.py | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index ae74c5d3..bda16e65 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -11,7 +11,7 @@ import inspect from itsdangerous import BadSignature import json import os -import pkg_resources +import importlib.metadata import re import secrets import sys @@ -921,9 +921,9 @@ class Datasette: if using_pysqlite3: for package in ("pysqlite3", "pysqlite3-binary"): try: - info["pysqlite3"] = pkg_resources.get_distribution(package).version + info["pysqlite3"] = importlib.metadata.version(package) break - except pkg_resources.DistributionNotFound: + except importlib.metadata.PackageNotFoundError: pass return info diff --git a/datasette/plugins.py b/datasette/plugins.py index fef0c8e9..bddaddf0 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -1,6 +1,7 @@ import importlib +import importlib.metadata +import importlib.resources import pluggy -import pkg_resources import sys from . import hookspecs @@ -40,16 +41,16 @@ def get_plugins(): templates_path = None if plugin.__name__ not in DEFAULT_PLUGINS: try: - if pkg_resources.resource_isdir(plugin.__name__, "static"): - static_path = pkg_resources.resource_filename( - plugin.__name__, "static" + if (importlib.resources.files(plugin.__name__) / "static").is_dir(): + static_path = str( + importlib.resources.files(plugin.__name__) / "static" ) - if pkg_resources.resource_isdir(plugin.__name__, "templates"): - templates_path = pkg_resources.resource_filename( - plugin.__name__, "templates" + if (importlib.resources.files(plugin.__name__) / "templates").is_dir(): + templates_path = str( + importlib.resources.files(plugin.__name__) / "templates" ) - except (KeyError, ImportError): - # Caused by --plugins_dir= plugins - KeyError/ImportError thrown in Py3.5 + except (TypeError, ModuleNotFoundError): + # Caused by --plugins_dir= plugins pass plugin_info = { "name": plugin.__name__, From 6a141467f9f8b05c84bc00c10e157962e3e43960 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Nov 2025 10:19:03 -0800 Subject: [PATCH 77/79] Release 0.65.2 Refs #2429, #2511 Closes #2579 --- datasette/version.py | 2 +- docs/changelog.rst | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index b89619a3..18d3cd80 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.65.1" +__version__ = "0.65.2" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 91c37506..f0485b4f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,17 @@ Changelog ========= + +.. _v0_65_2: + +0.65.2 (2025-11-05) +------------------- + +* Fixes an **open redirect** security issue: Datasette instances would redirect to ``example.com/foo/bar`` if you accessed the path ``//example.com/foo/bar``. Thanks to `James Jefferies `__ for the fix. (:issue:`2429`) +* Upgraded for compatibility with Python 3.14. +* Fixed ``datasette publish cloudrun`` to work with changes to the underlying Cloud Run architecture. (:issue:`2511`) +* Minor upgrades to fix warnings, including ``pkg_resources`` deprecation. + .. _v0_65_1: 0.65.1 (2024-12-28) From 0b687051de0b3b88aa8d4dbaa4159ffe769c6f08 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 28 May 2025 19:17:22 -0700 Subject: [PATCH 78/79] Replace Glitch with Codespaces, closes #2488 --- docs/getting_started.rst | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 6515ef8d..8226741f 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -33,29 +33,18 @@ You can pass a URL to a CSV, SQLite or raw SQL file directly to Datasette Lite t This `example link `__ opens Datasette Lite and loads the SQL Murder Mystery example database from `Northwestern University Knight Lab `__. -.. _getting_started_glitch: +.. _getting_started_codespaces: -Try Datasette without installing anything using Glitch ------------------------------------------------------- +Try Datasette without installing anything with Codespaces +--------------------------------------------------------- -`Glitch `__ is a free online tool for building web apps directly from your web browser. You can use Glitch to try out Datasette without needing to install any software on your own computer. +`GitHub Codespaces `__ offers a free browser-based development environment that lets you run a development server without installing any local software. -Here's a demo project on Glitch which you can use as the basis for your own experiments: +Here's a demo project on GitHub which you can use as the basis for your own experiments: -`glitch.com/~datasette-csvs `__ +`github.com/datasette/datasette-studio `__ -Glitch allows you to "remix" any project to create your own copy and start editing it in your browser. You can remix the ``datasette-csvs`` project by clicking this button: - -.. image:: https://cdn.glitch.com/2703baf2-b643-4da7-ab91-7ee2a2d00b5b%2Fremix-button.svg - :target: https://glitch.com/edit/#!/remix/datasette-csvs - -Find a CSV file and drag it onto the Glitch file explorer panel - ``datasette-csvs`` will automatically convert it to a SQLite database (using `sqlite-utils `__) and allow you to start exploring it using Datasette. - -If your CSV file has a ``latitude`` and ``longitude`` column you can visualize it on a map by uncommenting the ``datasette-cluster-map`` line in the ``requirements.txt`` file using the Glitch file editor. - -Need some data? Try this `Public Art Data `__ for the city of Seattle - hit "Export" and select "CSV" to download it as a CSV file. - -For more on how this works, see `Running Datasette on Glitch `__. +The README file in that repository has instructions on how to get started. .. _getting_started_your_computer: From 1d68d86e04b46b5b7e93c13d34a8d4b19e3802ec Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 28 Sep 2025 21:15:58 -0700 Subject: [PATCH 79/79] Removed broken refs to Glitch, closes #2503 --- docs/index.rst | 2 +- docs/installation.rst | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 5a9cc7ed..b5548904 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,7 +25,7 @@ Datasette is a tool for exploring and publishing data. It helps people take data Datasette is aimed at data journalists, museum curators, archivists, local governments and anyone else who has data that they wish to share with the world. It is part of a :ref:`wider ecosystem of tools and plugins ` dedicated to making working with structured data as productive as possible. -`Explore a demo `__, watch `a presentation about the project `__ or :ref:`getting_started_glitch`. +`Explore a demo `__, watch `a presentation about the project `__. Interested in learning Datasette? Start with `the official tutorials `__. diff --git a/docs/installation.rst b/docs/installation.rst index e1de0ae2..4d0d322e 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -4,9 +4,6 @@ Installation ============== -.. note:: - If you just want to try Datasette out you don't need to install anything: see :ref:`getting_started_glitch` - There are two main options for installing Datasette. You can install it directly on to your machine, or you can install it using Docker. If you want to start making contributions to the Datasette project by installing a copy that lets you directly modify the code, take a look at our guide to :ref:`devenvironment`.