diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index fa608055..1002969a 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -31,7 +31,10 @@ jobs: pip install -e '.[test]' - name: Run tests run: | - pytest + pytest -n auto -m "not serial" + pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh deploy: runs-on: ubuntu-latest @@ -69,7 +72,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.10' + python-version: '3.9' - uses: actions/cache@v2 name: Configure pip caching with: @@ -90,7 +93,7 @@ jobs: - name: Set up Cloud Run uses: google-github-actions/setup-gcloud@v0 with: - version: '275.0.0' + version: '318.0.0' service_account_email: ${{ secrets.GCP_SA_EMAIL }} service_account_key: ${{ secrets.GCP_SA_KEY }} - name: Deploy stable-docs.datasette.io to Cloud Run diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 886f649a..39aa8b13 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,6 +35,8 @@ jobs: run: | pytest -n auto -m "not serial" pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh - name: Check if cog needs to be run run: | cog --check docs/*.rst diff --git a/datasette/app.py b/datasette/app.py index 246269f3..6b889f08 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -64,16 +64,14 @@ from .utils import ( ) from .utils.asgi import ( AsgiLifespan, - Base400, Forbidden, NotFound, Request, Response, + AsgiRunOnFirstRequest, asgi_static, asgi_send, asgi_send_file, - asgi_send_html, - asgi_send_json, asgi_send_redirect, ) from .utils.internal_db import init_internal_db, populate_schema_tables @@ -118,6 +116,11 @@ SETTINGS = ( True, "Allow users to specify columns to facet using ?_facet= parameter", ), + Setting( + "default_allow_sql", + True, + "Allow anyone to run arbitrary SQL queries", + ), Setting( "allow_download", True, @@ -215,6 +218,8 @@ class Datasette: self.config_dir = config_dir self.pdb = pdb self._secret = secret or secrets.token_hex(32) + if files is not None and isinstance(files, str): + raise ValueError("files= must be a list of paths, not a string") self.files = tuple(files or []) + tuple(immutables or []) if config_dir: db_files = [] @@ -1260,7 +1265,7 @@ class Datasette: async def setup_db(): # First time server starts up, calculate table counts for immutable databases - for dbname, database in self.databases.items(): + for database in self.databases.values(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) @@ -1274,10 +1279,8 @@ class Datasette: ) if self.setting("trace_debug"): asgi = AsgiTracer(asgi) - asgi = AsgiLifespan( - asgi, - on_startup=setup_db, - ) + asgi = AsgiLifespan(asgi) + asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) return asgi @@ -1566,42 +1569,34 @@ class DatasetteClient: return path async def get(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.get(self._fix(path), **kwargs) async def options(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.options(self._fix(path), **kwargs) async def head(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.head(self._fix(path), **kwargs) async def post(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.post(self._fix(path), **kwargs) async def put(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.put(self._fix(path), **kwargs) async def patch(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.patch(self._fix(path), **kwargs) async def delete(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.delete(self._fix(path), **kwargs) async def request(self, method, path, **kwargs): - await self.ds.invoke_startup() avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) async with httpx.AsyncClient(app=self.app) as client: return await client.request( diff --git a/datasette/cli.py b/datasette/cli.py index 6eb42712..fd65ea94 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -4,6 +4,7 @@ import click from click import formatting from click.types import CompositeParamType from click_default_group import DefaultGroup +import functools import json import os import pathlib @@ -11,6 +12,7 @@ import shutil from subprocess import call import sys from runpy import run_module +import textwrap import webbrowser from .app import ( OBSOLETE_SETTINGS, @@ -126,7 +128,7 @@ class Setting(CompositeParamType): def sqlite_extensions(fn): - return click.option( + fn = click.option( "sqlite_extensions", "--load-extension", type=LoadExtension(), @@ -135,6 +137,26 @@ def sqlite_extensions(fn): help="Path to a SQLite extension to load, and optional entrypoint", )(fn) + # Wrap it in a custom error handler + @functools.wraps(fn) + def wrapped(*args, **kwargs): + try: + return fn(*args, **kwargs) + except AttributeError as e: + if "enable_load_extension" in str(e): + raise click.ClickException( + textwrap.dedent( + """ + Your Python installation does not have the ability to load SQLite extensions. + + More information: https://datasette.io/help/extensions + """ + ).strip() + ) + raise + + return wrapped + @click.group(cls=DefaultGroup, default="serve", default_if_no_args=True) @click.version_option(version=__version__) @@ -607,7 +629,7 @@ def serve( url = "http://{}:{}{}?token={}".format( host, port, ds.urls.path("-/auth-token"), ds._root_token ) - print(url) + click.echo(url) if open_browser: if url is None: # Figure out most convenient URL - to table, database or homepage diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index b58d8d1b..a0681e83 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -36,12 +36,16 @@ def permission_allowed(datasette, actor, action, resource): return None return actor_matches_allow(actor, allow) elif action == "execute-sql": + # Only use default_allow_sql setting if it is set to False: + default_allow_sql = ( + None if datasette.setting("default_allow_sql") else False + ) # Use allow_sql block from database block, or from top-level database_allow_sql = datasette.metadata("allow_sql", database=resource) if database_allow_sql is None: database_allow_sql = datasette.metadata("allow_sql") if database_allow_sql is None: - return None + return default_allow_sql return actor_matches_allow(actor, database_allow_sql) return inner diff --git a/datasette/publish/cloudrun.py b/datasette/publish/cloudrun.py index 77274eb0..760ff0d1 100644 --- a/datasette/publish/cloudrun.py +++ b/datasette/publish/cloudrun.py @@ -173,7 +173,7 @@ def publish_subcommand(publish): print(fp.read()) print("\n====================\n") - image_id = f"gcr.io/{project}/{name}" + image_id = f"gcr.io/{project}/datasette-{service}" check_call( "gcloud builds submit --tag {}{}".format( image_id, " --timeout {}".format(timeout) if timeout else "" diff --git a/datasette/publish/heroku.py b/datasette/publish/heroku.py index 171252ce..f576a346 100644 --- a/datasette/publish/heroku.py +++ b/datasette/publish/heroku.py @@ -3,7 +3,9 @@ from datasette import hookimpl import click import json import os +import pathlib import shlex +import shutil from subprocess import call, check_output import tempfile @@ -28,6 +30,11 @@ def publish_subcommand(publish): "--tar", help="--tar option to pass to Heroku, e.g. --tar=/usr/local/bin/gtar", ) + @click.option( + "--generate-dir", + type=click.Path(dir_okay=True, file_okay=False), + help="Output generated application files and stop without deploying", + ) def heroku( files, metadata, @@ -49,6 +56,7 @@ def publish_subcommand(publish): about_url, name, tar, + generate_dir, ): "Publish databases to Datasette running on Heroku" fail_if_publish_binary_not_installed( @@ -105,6 +113,16 @@ def publish_subcommand(publish): secret, extra_metadata, ): + if generate_dir: + # Recursively copy files from current working directory to it + if pathlib.Path(generate_dir).exists(): + raise click.ClickException("Directory already exists") + shutil.copytree(".", generate_dir) + click.echo( + f"Generated files written to {generate_dir}, stopping without deploying", + err=True, + ) + return app_name = None if name: # Check to see if this app already exists @@ -176,7 +194,7 @@ def temporary_heroku_directory( fp.write(json.dumps(metadata_content, indent=2)) with open("runtime.txt", "w") as fp: - fp.write("python-3.8.10") + fp.write("python-3.11.0") if branch: install = [ diff --git a/datasette/static/app.css b/datasette/static/app.css index 712b9925..71437bd4 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -573,6 +573,9 @@ form button[type=button] { display: inline-block; margin-right: 0.3em; } +.select-wrapper:focus-within { + border: 1px solid black; +} .select-wrapper.filter-op { width: 80px; } diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 8a2fa060..16f90077 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -428,3 +428,18 @@ class AsgiFileDownload: content_type=self.content_type, headers=self.headers, ) + + +class AsgiRunOnFirstRequest: + def __init__(self, asgi, on_startup): + assert isinstance(on_startup, list) + self.asgi = asgi + self.on_startup = on_startup + self._started = False + + async def __call__(self, scope, receive, send): + if not self._started: + self._started = True + for hook in self.on_startup: + await hook() + return await self.asgi(scope, receive, send) diff --git a/datasette/version.py b/datasette/version.py index 3a4f06dc..7a87e2bf 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63.1" +__version__ = "0.64.4" __version_info__ = tuple(__version__.split(".")) diff --git a/datasette/views/table.py b/datasette/views/table.py index e80ed217..d479fede 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -172,20 +172,14 @@ class TableView(DataView): raise NotFound("Database not found: {}".format(database_route)) database_name = db.name - # For performance profiling purposes, ?_noparallel=1 turns off asyncio.gather - async def _gather_parallel(*args): - return await asyncio.gather(*args) - - async def _gather_sequential(*args): + # We always now run queries sequentially, rather than with asyncio.gather() - + # see https://github.com/simonw/datasette/issues/2189 + async def gather(*args): results = [] for fn in args: results.append(await fn) return results - gather = ( - _gather_sequential if request.args.get("_noparallel") else _gather_parallel - ) - # If this is a canned query, not a table, then dispatch to QueryView instead canned_query = await self.ds.get_canned_query( database_name, table_name, request.actor diff --git a/docs/authentication.rst b/docs/authentication.rst index 685dab15..37703307 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -307,7 +307,21 @@ To limit access to the ``add_name`` canned query in your ``dogs.db`` database to Controlling the ability to execute arbitrary SQL ------------------------------------------------ -The ``"allow_sql"`` block can be used to control who is allowed to execute arbitrary SQL queries, both using the form on the database page e.g. https://latest.datasette.io/fixtures or by appending a ``?_where=`` parameter to the table page as seen on https://latest.datasette.io/fixtures/facetable?_where=city_id=1. +Datasette defaults to allowing any site visitor to execute their own custom SQL queries, for example using the form on `the database page `__ or by appending a ``?_where=`` parameter to the table page `like this `__. + +Access to this ability is controlled by the :ref:`permissions_execute_sql` permission. + +The easiest way to disable arbitrary SQL queries is using the :ref:`default_allow_sql setting ` when you first start Datasette running. + +You can alternatively use an ``"allow_sql"`` block to control who is allowed to execute arbitrary SQL queries. + +To prevent any user from executing arbitrary SQL queries, use this: + +.. code-block:: json + + { + "allow_sql": false + } To enable just the :ref:`root user` to execute SQL for all databases in your instance, use the following: @@ -515,7 +529,7 @@ Actor is allowed to run arbitrary SQL queries against a specific database, e.g. ``resource`` - string The name of the database -Default *allow*. +Default *allow*. See also :ref:`the default_allow_sql setting `. .. _permissions_permissions_debug: diff --git a/docs/changelog.rst b/docs/changelog.rst index 0e0393ef..409dbc42 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,62 @@ Changelog ========= +.. _v0_64_4: + +0.64.4 (2023-09-21) +------------------- + +- Fix for a crashing bug caused by viewing the table page for a named in-memory database. (:issue:`2189`) + +.. _v0_64_3: + +0.64.3 (2023-04-27) +------------------- + +- Added ``pip`` and ``setuptools`` as explicit dependencies. This fixes a bug where Datasette could not be installed using `Rye `__. (:issue:`2065`) + +.. _v0_64_2: + +0.64.2 (2023-03-08) +------------------- + +- Fixed a bug with ``datasette publish cloudrun`` where deploys all used the same Docker image tag. This was mostly inconsequential as the service is deployed as soon as the image has been pushed to the registry, but could result in the incorrect image being deployed if two different deploys for two separate services ran at exactly the same time. (:issue:`2036`) + +.. _v0_64_1: + +0.64.1 (2023-01-11) +------------------- + +- Documentation now links to a current source of information for installing Python 3. (:issue:`1987`) +- Incorrectly calling the Datasette constructor using ``Datasette("path/to/data.db")`` instead of ``Datasette(["path/to/data.db"])`` now returns a useful error message. (:issue:`1985`) + +.. _v0_64: + +0.64 (2023-01-09) +----------------- + +- Datasette now **strongly recommends against allowing arbitrary SQL queries if you are using SpatiaLite**. SpatiaLite includes SQL functions that could cause the Datasette server to crash. See :ref:`spatialite` for more details. +- New :ref:`setting_default_allow_sql` setting, providing an easier way to disable all arbitrary SQL execution by end users: ``datasette --setting default_allow_sql off``. See also :ref:`authentication_permissions_execute_sql`. (:issue:`1409`) +- `Building a location to time zone API with SpatiaLite `__ is a new Datasette tutorial showing how to safely use SpatiaLite to create a location to time zone API. +- New documentation about :ref:`how to debug problems loading SQLite extensions `. The error message shown when an extension cannot be loaded has also been improved. (:issue:`1979`) +- Fixed an accessibility issue: the ``