diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index fa608055..1002969a 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -31,7 +31,10 @@ jobs: pip install -e '.[test]' - name: Run tests run: | - pytest + pytest -n auto -m "not serial" + pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh deploy: runs-on: ubuntu-latest @@ -69,7 +72,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.10' + python-version: '3.9' - uses: actions/cache@v2 name: Configure pip caching with: @@ -90,7 +93,7 @@ jobs: - name: Set up Cloud Run uses: google-github-actions/setup-gcloud@v0 with: - version: '275.0.0' + version: '318.0.0' service_account_email: ${{ secrets.GCP_SA_EMAIL }} service_account_key: ${{ secrets.GCP_SA_KEY }} - name: Deploy stable-docs.datasette.io to Cloud Run diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 886f649a..39aa8b13 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,6 +35,8 @@ jobs: run: | pytest -n auto -m "not serial" pytest -m "serial" + # And the test that exceeds a localhost HTTPS server + tests/test_datasette_https_server.sh - name: Check if cog needs to be run run: | cog --check docs/*.rst diff --git a/datasette/app.py b/datasette/app.py index 246269f3..41c73acd 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -64,16 +64,14 @@ from .utils import ( ) from .utils.asgi import ( AsgiLifespan, - Base400, Forbidden, NotFound, Request, Response, + AsgiRunOnFirstRequest, asgi_static, asgi_send, asgi_send_file, - asgi_send_html, - asgi_send_json, asgi_send_redirect, ) from .utils.internal_db import init_internal_db, populate_schema_tables @@ -118,6 +116,11 @@ SETTINGS = ( True, "Allow users to specify columns to facet using ?_facet= parameter", ), + Setting( + "default_allow_sql", + True, + "Allow anyone to run arbitrary SQL queries", + ), Setting( "allow_download", True, @@ -215,6 +218,8 @@ class Datasette: self.config_dir = config_dir self.pdb = pdb self._secret = secret or secrets.token_hex(32) + if files is not None and isinstance(files, str): + raise ValueError("files= must be a list of paths, not a string") self.files = tuple(files or []) + tuple(immutables or []) if config_dir: db_files = [] @@ -371,23 +376,50 @@ class Datasette: await init_internal_db(internal_db) self.internal_db_created = True - current_schema_versions = { - row["database_name"]: row["schema_version"] + current_schema_versions_and_hashes = { + row["database_name"]: (row["schema_version"], row["schema_hash"]) for row in await internal_db.execute( - "select database_name, schema_version from databases" + "select database_name, schema_version, schema_hash from databases" ) } for database_name, db in self.databases.items(): - schema_version = (await db.execute("PRAGMA schema_version")).first()[0] - # Compare schema versions to see if we should skip it - if schema_version == current_schema_versions.get(database_name): - continue + schema_version = await db.schema_version() + current_version_and_hash = current_schema_versions_and_hashes.get( + database_name + ) + if current_version_and_hash: + # We might get to skip this database + if schema_version is not None and current_version_and_hash: + # Use this to decide if the schema has changed + if schema_version == current_version_and_hash[0]: + continue + else: + # Use the schema hash instead + schema_hash = await db.schema_hash() + if schema_hash == current_version_and_hash[1]: + continue + + # Calculate new schema hash + schema_hash = await db.schema_hash() + placeholders = "(?, ?, ?, ?, ?)" + values = [ + database_name, + str(db.path), + db.is_memory, + schema_version, + schema_hash, + ] + if db.path is None: + placeholders = "(?, null, ?, ?, ?)" + values = [database_name, db.is_memory, schema_version, schema_hash] await internal_db.execute_write( """ - INSERT OR REPLACE INTO databases (database_name, path, is_memory, schema_version) - VALUES (?, ?, ?, ?) - """, - [database_name, str(db.path), db.is_memory, schema_version], + INSERT OR REPLACE INTO databases (database_name, path, is_memory, schema_version, schema_hash) + VALUES {} + """.format( + placeholders + ), + values, ) await populate_schema_tables(internal_db, db) @@ -1260,7 +1292,7 @@ class Datasette: async def setup_db(): # First time server starts up, calculate table counts for immutable databases - for dbname, database in self.databases.items(): + for database in self.databases.values(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) @@ -1274,10 +1306,8 @@ class Datasette: ) if self.setting("trace_debug"): asgi = AsgiTracer(asgi) - asgi = AsgiLifespan( - asgi, - on_startup=setup_db, - ) + asgi = AsgiLifespan(asgi) + asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup]) for wrapper in pm.hook.asgi_wrapper(datasette=self): asgi = wrapper(asgi) return asgi @@ -1566,42 +1596,34 @@ class DatasetteClient: return path async def get(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.get(self._fix(path), **kwargs) async def options(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.options(self._fix(path), **kwargs) async def head(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.head(self._fix(path), **kwargs) async def post(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.post(self._fix(path), **kwargs) async def put(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.put(self._fix(path), **kwargs) async def patch(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.patch(self._fix(path), **kwargs) async def delete(self, path, **kwargs): - await self.ds.invoke_startup() async with httpx.AsyncClient(app=self.app) as client: return await client.delete(self._fix(path), **kwargs) async def request(self, method, path, **kwargs): - await self.ds.invoke_startup() avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None) async with httpx.AsyncClient(app=self.app) as client: return await client.request( diff --git a/datasette/cli.py b/datasette/cli.py index 6eb42712..fd65ea94 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -4,6 +4,7 @@ import click from click import formatting from click.types import CompositeParamType from click_default_group import DefaultGroup +import functools import json import os import pathlib @@ -11,6 +12,7 @@ import shutil from subprocess import call import sys from runpy import run_module +import textwrap import webbrowser from .app import ( OBSOLETE_SETTINGS, @@ -126,7 +128,7 @@ class Setting(CompositeParamType): def sqlite_extensions(fn): - return click.option( + fn = click.option( "sqlite_extensions", "--load-extension", type=LoadExtension(), @@ -135,6 +137,26 @@ def sqlite_extensions(fn): help="Path to a SQLite extension to load, and optional entrypoint", )(fn) + # Wrap it in a custom error handler + @functools.wraps(fn) + def wrapped(*args, **kwargs): + try: + return fn(*args, **kwargs) + except AttributeError as e: + if "enable_load_extension" in str(e): + raise click.ClickException( + textwrap.dedent( + """ + Your Python installation does not have the ability to load SQLite extensions. + + More information: https://datasette.io/help/extensions + """ + ).strip() + ) + raise + + return wrapped + @click.group(cls=DefaultGroup, default="serve", default_if_no_args=True) @click.version_option(version=__version__) @@ -607,7 +629,7 @@ def serve( url = "http://{}:{}{}?token={}".format( host, port, ds.urls.path("-/auth-token"), ds._root_token ) - print(url) + click.echo(url) if open_browser: if url is None: # Figure out most convenient URL - to table, database or homepage diff --git a/datasette/database.py b/datasette/database.py index dfca179c..1de7b393 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -1,6 +1,7 @@ import asyncio from collections import namedtuple from pathlib import Path +import hashlib import janus import queue import sys @@ -50,6 +51,24 @@ class Database: # This is used to track all file connections so they can be closed self._all_file_connections = [] + async def schema_version(self): + # This can return 'None' if the schema_version cannot be read + # See https://github.com/simonw/datasette/issues/2058 + try: + return (await self.execute("PRAGMA schema_version")).first()[0] + except sqlite3.OperationalError: + return None + + async def schema_hash(self): + return hashlib.md5( + ( + ( + await self.execute("SELECT group_concat(sql) FROM sqlite_master") + ).first()[0] + or "" + ).encode("utf8") + ).hexdigest() + @property def cached_table_counts(self): if self._cached_table_counts is not None: diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index b58d8d1b..a0681e83 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -36,12 +36,16 @@ def permission_allowed(datasette, actor, action, resource): return None return actor_matches_allow(actor, allow) elif action == "execute-sql": + # Only use default_allow_sql setting if it is set to False: + default_allow_sql = ( + None if datasette.setting("default_allow_sql") else False + ) # Use allow_sql block from database block, or from top-level database_allow_sql = datasette.metadata("allow_sql", database=resource) if database_allow_sql is None: database_allow_sql = datasette.metadata("allow_sql") if database_allow_sql is None: - return None + return default_allow_sql return actor_matches_allow(actor, database_allow_sql) return inner diff --git a/datasette/publish/cloudrun.py b/datasette/publish/cloudrun.py index 77274eb0..760ff0d1 100644 --- a/datasette/publish/cloudrun.py +++ b/datasette/publish/cloudrun.py @@ -173,7 +173,7 @@ def publish_subcommand(publish): print(fp.read()) print("\n====================\n") - image_id = f"gcr.io/{project}/{name}" + image_id = f"gcr.io/{project}/datasette-{service}" check_call( "gcloud builds submit --tag {}{}".format( image_id, " --timeout {}".format(timeout) if timeout else "" diff --git a/datasette/publish/heroku.py b/datasette/publish/heroku.py index 171252ce..f576a346 100644 --- a/datasette/publish/heroku.py +++ b/datasette/publish/heroku.py @@ -3,7 +3,9 @@ from datasette import hookimpl import click import json import os +import pathlib import shlex +import shutil from subprocess import call, check_output import tempfile @@ -28,6 +30,11 @@ def publish_subcommand(publish): "--tar", help="--tar option to pass to Heroku, e.g. --tar=/usr/local/bin/gtar", ) + @click.option( + "--generate-dir", + type=click.Path(dir_okay=True, file_okay=False), + help="Output generated application files and stop without deploying", + ) def heroku( files, metadata, @@ -49,6 +56,7 @@ def publish_subcommand(publish): about_url, name, tar, + generate_dir, ): "Publish databases to Datasette running on Heroku" fail_if_publish_binary_not_installed( @@ -105,6 +113,16 @@ def publish_subcommand(publish): secret, extra_metadata, ): + if generate_dir: + # Recursively copy files from current working directory to it + if pathlib.Path(generate_dir).exists(): + raise click.ClickException("Directory already exists") + shutil.copytree(".", generate_dir) + click.echo( + f"Generated files written to {generate_dir}, stopping without deploying", + err=True, + ) + return app_name = None if name: # Check to see if this app already exists @@ -176,7 +194,7 @@ def temporary_heroku_directory( fp.write(json.dumps(metadata_content, indent=2)) with open("runtime.txt", "w") as fp: - fp.write("python-3.8.10") + fp.write("python-3.11.0") if branch: install = [ diff --git a/datasette/static/app.css b/datasette/static/app.css index 712b9925..71437bd4 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -573,6 +573,9 @@ form button[type=button] { display: inline-block; margin-right: 0.3em; } +.select-wrapper:focus-within { + border: 1px solid black; +} .select-wrapper.filter-op { width: 80px; } diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 8a2fa060..16f90077 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -428,3 +428,18 @@ class AsgiFileDownload: content_type=self.content_type, headers=self.headers, ) + + +class AsgiRunOnFirstRequest: + def __init__(self, asgi, on_startup): + assert isinstance(on_startup, list) + self.asgi = asgi + self.on_startup = on_startup + self._started = False + + async def __call__(self, scope, receive, send): + if not self._started: + self._started = True + for hook in self.on_startup: + await hook() + return await self.asgi(scope, receive, send) diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index e4b49e80..08868f3f 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -9,7 +9,8 @@ async def init_internal_db(db): database_name TEXT PRIMARY KEY, path TEXT, is_memory INTEGER, - schema_version INTEGER + schema_version INTEGER, + schema_hash TEXT ); CREATE TABLE IF NOT EXISTS tables ( database_name TEXT, diff --git a/datasette/version.py b/datasette/version.py index 3a4f06dc..4e0dc4fc 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63.1" +__version__ = "0.64.2" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/authentication.rst b/docs/authentication.rst index 685dab15..37703307 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -307,7 +307,21 @@ To limit access to the ``add_name`` canned query in your ``dogs.db`` database to Controlling the ability to execute arbitrary SQL ------------------------------------------------ -The ``"allow_sql"`` block can be used to control who is allowed to execute arbitrary SQL queries, both using the form on the database page e.g. https://latest.datasette.io/fixtures or by appending a ``?_where=`` parameter to the table page as seen on https://latest.datasette.io/fixtures/facetable?_where=city_id=1. +Datasette defaults to allowing any site visitor to execute their own custom SQL queries, for example using the form on `the database page `__ or by appending a ``?_where=`` parameter to the table page `like this `__. + +Access to this ability is controlled by the :ref:`permissions_execute_sql` permission. + +The easiest way to disable arbitrary SQL queries is using the :ref:`default_allow_sql setting ` when you first start Datasette running. + +You can alternatively use an ``"allow_sql"`` block to control who is allowed to execute arbitrary SQL queries. + +To prevent any user from executing arbitrary SQL queries, use this: + +.. code-block:: json + + { + "allow_sql": false + } To enable just the :ref:`root user` to execute SQL for all databases in your instance, use the following: @@ -515,7 +529,7 @@ Actor is allowed to run arbitrary SQL queries against a specific database, e.g. ``resource`` - string The name of the database -Default *allow*. +Default *allow*. See also :ref:`the default_allow_sql setting `. .. _permissions_permissions_debug: diff --git a/docs/changelog.rst b/docs/changelog.rst index 0e0393ef..0899b6fc 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,48 @@ Changelog ========= +.. _v0_64_2: + +0.64.2 (2023-03-08) +------------------- + +- Fixed a bug with ``datasette publish cloudrun`` where deploys all used the same Docker image tag. This was mostly inconsequential as the service is deployed as soon as the image has been pushed to the registry, but could result in the incorrect image being deployed if two different deploys for two separate services ran at exactly the same time. (:issue:`2036`) + +.. _v0_64_1: + +0.64.1 (2023-01-11) +------------------- + +- Documentation now links to a current source of information for installing Python 3. (:issue:`1987`) +- Incorrectly calling the Datasette constructor using ``Datasette("path/to/data.db")`` instead of ``Datasette(["path/to/data.db"])`` now returns a useful error message. (:issue:`1985`) + +.. _v0_64: + +0.64 (2023-01-09) +----------------- + +- Datasette now **strongly recommends against allowing arbitrary SQL queries if you are using SpatiaLite**. SpatiaLite includes SQL functions that could cause the Datasette server to crash. See :ref:`spatialite` for more details. +- New :ref:`setting_default_allow_sql` setting, providing an easier way to disable all arbitrary SQL execution by end users: ``datasette --setting default_allow_sql off``. See also :ref:`authentication_permissions_execute_sql`. (:issue:`1409`) +- `Building a location to time zone API with SpatiaLite `__ is a new Datasette tutorial showing how to safely use SpatiaLite to create a location to time zone API. +- New documentation about :ref:`how to debug problems loading SQLite extensions `. The error message shown when an extension cannot be loaded has also been improved. (:issue:`1979`) +- Fixed an accessibility issue: the ``