Compare commits

...

29 commits

Author SHA1 Message Date
Simon Willison
42bf9e2aab Backported experimental #2058 fix to 0.64.x 2023-04-12 17:56:10 -07:00
Simon Willison
2a0a94fe97 Release 0.64.2
Refs #2036
2023-03-08 12:41:13 -08:00
Simon Willison
cb9fa71878 Use service-specific image ID for Cloud Run deploys, refs #2036 2023-03-08 12:27:15 -08:00
Simon Willison
2e064641ac Release 0.64.1
Refs #1985, #1987
2023-01-11 10:21:37 -08:00
Simon Willison
a1c60bab86 Fix Sphinx warning turned error 2023-01-11 10:21:15 -08:00
Simon Willison
b7df546a45 Raise ValueError if Datasette(files=) is a string, refs #1985 2023-01-11 10:12:53 -08:00
Simon Willison
0a1de5d7b9 Link to non-spam Python 3 setup instructions
Refs #1987
2023-01-11 10:08:49 -08:00
Simon Willison
cd65558aee setup-gcloud 318.0.0
Refs https://til.simonwillison.net/googlecloud/gcloud-error-workaround
2023-01-09 16:02:28 -08:00
Simon Willison
0084daa50a Release 0.64, with a warning against arbitrary SQL with SpatiaLite
Refs #1409, #1771, #1979

Refs https://github.com/simonw/datasette.io/issues/132
2023-01-09 08:37:21 -08:00
Simon Willison
02987e342d Explicitly explain allow_sql: false 2023-01-09 08:25:19 -08:00
Simon Willison
fdb4d975a1 Better error for --load-extensions, refs #1979 2023-01-07 15:58:30 -08:00
Simon Willison
3b88ac671e What to do if extensions will not load, refs #1979 2023-01-07 15:48:08 -08:00
Simon Willison
1ec9c9995c Backported default_allow_sql for 0.63.x, closes #1409 2023-01-05 09:21:07 -08:00
Simon Willison
b8cf864fa6 Fixed broken example links in _where= docs 2023-01-04 16:53:39 -08:00
Simon Willison
1bff3f1a70 Fixed table_action example in docs 2023-01-04 16:53:31 -08:00
Simon Willison
7402294018 .select-wrapper:focus-within for accessibility, closes #1771 2023-01-04 16:53:16 -08:00
Simon Willison
9ec58da6ec Deploy docs on publish using Python 3.9
A workaround for gcloud setup, see:

https://til.simonwillison.net/googlecloud/gcloud-error-workaround

Refs #1963
2022-12-17 19:24:34 -08:00
Simon Willison
e05998bc85 Added missing word, refs #1963 2022-12-17 19:08:01 -08:00
Simon Willison
a9ffcbd42e Release 0.63.3
Refs #1955, #1958, #1963
2022-12-17 19:06:03 -08:00
Simon Willison
d93f975b3d On publish run tests same way as for test 2022-12-17 18:52:52 -08:00
Simon Willison
e6d94f9ffa Run new HTTPS test in CI, refs #1955 2022-12-17 18:51:17 -08:00
Simon Willison
0bd3eaa2dd Move HTTPS test to a bash script
See https://github.com/simonw/datasette/issues/1955#issuecomment-1356627931
2022-12-17 18:51:17 -08:00
Simon Willison
5649e547ef Put AsgiLifestyle back so server starts up again, refs #1955 2022-12-17 18:51:17 -08:00
Simon Willison
96b3a86d7f Replace AsgiLifespan with AsgiRunOnFirstRequest, refs #1955 2022-12-17 18:51:17 -08:00
Simon Willison
4ba8d57bb1 Try click.echo() instead
This ensures the URL is output correctly when running under Docker.

Closes #1958
2022-12-15 16:57:45 -08:00
Simon Willison
d67f812b73 Release 0.63.2
Refs #1904, #1905
2022-11-18 16:53:05 -08:00
Simon Willison
c4d002fef5 Pin httpx in Pyodide test, refs #1904
Should help get tests to pass for #1896 too
2022-11-18 16:52:09 -08:00
Simon Willison
a93ccc63c7 Upgrade to Python 3.11 on Heroku, refs #1905 2022-11-18 16:49:08 -08:00
Simon Willison
9cca381033 --generate-dir option to publish heroku, refs #1905 2022-11-18 16:49:02 -08:00
35 changed files with 521 additions and 147 deletions

View file

@ -31,7 +31,10 @@ jobs:
pip install -e '.[test]'
- name: Run tests
run: |
pytest
pytest -n auto -m "not serial"
pytest -m "serial"
# And the test that exceeds a localhost HTTPS server
tests/test_datasette_https_server.sh
deploy:
runs-on: ubuntu-latest
@ -69,7 +72,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.10'
python-version: '3.9'
- uses: actions/cache@v2
name: Configure pip caching
with:
@ -90,7 +93,7 @@ jobs:
- name: Set up Cloud Run
uses: google-github-actions/setup-gcloud@v0
with:
version: '275.0.0'
version: '318.0.0'
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
- name: Deploy stable-docs.datasette.io to Cloud Run

View file

@ -35,6 +35,8 @@ jobs:
run: |
pytest -n auto -m "not serial"
pytest -m "serial"
# And the test that exceeds a localhost HTTPS server
tests/test_datasette_https_server.sh
- name: Check if cog needs to be run
run: |
cog --check docs/*.rst

View file

@ -64,16 +64,14 @@ from .utils import (
)
from .utils.asgi import (
AsgiLifespan,
Base400,
Forbidden,
NotFound,
Request,
Response,
AsgiRunOnFirstRequest,
asgi_static,
asgi_send,
asgi_send_file,
asgi_send_html,
asgi_send_json,
asgi_send_redirect,
)
from .utils.internal_db import init_internal_db, populate_schema_tables
@ -118,6 +116,11 @@ SETTINGS = (
True,
"Allow users to specify columns to facet using ?_facet= parameter",
),
Setting(
"default_allow_sql",
True,
"Allow anyone to run arbitrary SQL queries",
),
Setting(
"allow_download",
True,
@ -215,6 +218,8 @@ class Datasette:
self.config_dir = config_dir
self.pdb = pdb
self._secret = secret or secrets.token_hex(32)
if files is not None and isinstance(files, str):
raise ValueError("files= must be a list of paths, not a string")
self.files = tuple(files or []) + tuple(immutables or [])
if config_dir:
db_files = []
@ -371,23 +376,50 @@ class Datasette:
await init_internal_db(internal_db)
self.internal_db_created = True
current_schema_versions = {
row["database_name"]: row["schema_version"]
current_schema_versions_and_hashes = {
row["database_name"]: (row["schema_version"], row["schema_hash"])
for row in await internal_db.execute(
"select database_name, schema_version from databases"
"select database_name, schema_version, schema_hash from databases"
)
}
for database_name, db in self.databases.items():
schema_version = (await db.execute("PRAGMA schema_version")).first()[0]
# Compare schema versions to see if we should skip it
if schema_version == current_schema_versions.get(database_name):
continue
schema_version = await db.schema_version()
current_version_and_hash = current_schema_versions_and_hashes.get(
database_name
)
if current_version_and_hash:
# We might get to skip this database
if schema_version is not None and current_version_and_hash:
# Use this to decide if the schema has changed
if schema_version == current_version_and_hash[0]:
continue
else:
# Use the schema hash instead
schema_hash = await db.schema_hash()
if schema_hash == current_version_and_hash[1]:
continue
# Calculate new schema hash
schema_hash = await db.schema_hash()
placeholders = "(?, ?, ?, ?, ?)"
values = [
database_name,
str(db.path),
db.is_memory,
schema_version,
schema_hash,
]
if db.path is None:
placeholders = "(?, null, ?, ?, ?)"
values = [database_name, db.is_memory, schema_version, schema_hash]
await internal_db.execute_write(
"""
INSERT OR REPLACE INTO databases (database_name, path, is_memory, schema_version)
VALUES (?, ?, ?, ?)
""",
[database_name, str(db.path), db.is_memory, schema_version],
INSERT OR REPLACE INTO databases (database_name, path, is_memory, schema_version, schema_hash)
VALUES {}
""".format(
placeholders
),
values,
)
await populate_schema_tables(internal_db, db)
@ -1260,7 +1292,7 @@ class Datasette:
async def setup_db():
# First time server starts up, calculate table counts for immutable databases
for dbname, database in self.databases.items():
for database in self.databases.values():
if not database.is_mutable:
await database.table_counts(limit=60 * 60 * 1000)
@ -1274,10 +1306,8 @@ class Datasette:
)
if self.setting("trace_debug"):
asgi = AsgiTracer(asgi)
asgi = AsgiLifespan(
asgi,
on_startup=setup_db,
)
asgi = AsgiLifespan(asgi)
asgi = AsgiRunOnFirstRequest(asgi, on_startup=[setup_db, self.invoke_startup])
for wrapper in pm.hook.asgi_wrapper(datasette=self):
asgi = wrapper(asgi)
return asgi
@ -1566,42 +1596,34 @@ class DatasetteClient:
return path
async def get(self, path, **kwargs):
await self.ds.invoke_startup()
async with httpx.AsyncClient(app=self.app) as client:
return await client.get(self._fix(path), **kwargs)
async def options(self, path, **kwargs):
await self.ds.invoke_startup()
async with httpx.AsyncClient(app=self.app) as client:
return await client.options(self._fix(path), **kwargs)
async def head(self, path, **kwargs):
await self.ds.invoke_startup()
async with httpx.AsyncClient(app=self.app) as client:
return await client.head(self._fix(path), **kwargs)
async def post(self, path, **kwargs):
await self.ds.invoke_startup()
async with httpx.AsyncClient(app=self.app) as client:
return await client.post(self._fix(path), **kwargs)
async def put(self, path, **kwargs):
await self.ds.invoke_startup()
async with httpx.AsyncClient(app=self.app) as client:
return await client.put(self._fix(path), **kwargs)
async def patch(self, path, **kwargs):
await self.ds.invoke_startup()
async with httpx.AsyncClient(app=self.app) as client:
return await client.patch(self._fix(path), **kwargs)
async def delete(self, path, **kwargs):
await self.ds.invoke_startup()
async with httpx.AsyncClient(app=self.app) as client:
return await client.delete(self._fix(path), **kwargs)
async def request(self, method, path, **kwargs):
await self.ds.invoke_startup()
avoid_path_rewrites = kwargs.pop("avoid_path_rewrites", None)
async with httpx.AsyncClient(app=self.app) as client:
return await client.request(

View file

@ -4,6 +4,7 @@ import click
from click import formatting
from click.types import CompositeParamType
from click_default_group import DefaultGroup
import functools
import json
import os
import pathlib
@ -11,6 +12,7 @@ import shutil
from subprocess import call
import sys
from runpy import run_module
import textwrap
import webbrowser
from .app import (
OBSOLETE_SETTINGS,
@ -126,7 +128,7 @@ class Setting(CompositeParamType):
def sqlite_extensions(fn):
return click.option(
fn = click.option(
"sqlite_extensions",
"--load-extension",
type=LoadExtension(),
@ -135,6 +137,26 @@ def sqlite_extensions(fn):
help="Path to a SQLite extension to load, and optional entrypoint",
)(fn)
# Wrap it in a custom error handler
@functools.wraps(fn)
def wrapped(*args, **kwargs):
try:
return fn(*args, **kwargs)
except AttributeError as e:
if "enable_load_extension" in str(e):
raise click.ClickException(
textwrap.dedent(
"""
Your Python installation does not have the ability to load SQLite extensions.
More information: https://datasette.io/help/extensions
"""
).strip()
)
raise
return wrapped
@click.group(cls=DefaultGroup, default="serve", default_if_no_args=True)
@click.version_option(version=__version__)
@ -607,7 +629,7 @@ def serve(
url = "http://{}:{}{}?token={}".format(
host, port, ds.urls.path("-/auth-token"), ds._root_token
)
print(url)
click.echo(url)
if open_browser:
if url is None:
# Figure out most convenient URL - to table, database or homepage

View file

@ -1,6 +1,7 @@
import asyncio
from collections import namedtuple
from pathlib import Path
import hashlib
import janus
import queue
import sys
@ -50,6 +51,24 @@ class Database:
# This is used to track all file connections so they can be closed
self._all_file_connections = []
async def schema_version(self):
# This can return 'None' if the schema_version cannot be read
# See https://github.com/simonw/datasette/issues/2058
try:
return (await self.execute("PRAGMA schema_version")).first()[0]
except sqlite3.OperationalError:
return None
async def schema_hash(self):
return hashlib.md5(
(
(
await self.execute("SELECT group_concat(sql) FROM sqlite_master")
).first()[0]
or ""
).encode("utf8")
).hexdigest()
@property
def cached_table_counts(self):
if self._cached_table_counts is not None:

View file

@ -36,12 +36,16 @@ def permission_allowed(datasette, actor, action, resource):
return None
return actor_matches_allow(actor, allow)
elif action == "execute-sql":
# Only use default_allow_sql setting if it is set to False:
default_allow_sql = (
None if datasette.setting("default_allow_sql") else False
)
# Use allow_sql block from database block, or from top-level
database_allow_sql = datasette.metadata("allow_sql", database=resource)
if database_allow_sql is None:
database_allow_sql = datasette.metadata("allow_sql")
if database_allow_sql is None:
return None
return default_allow_sql
return actor_matches_allow(actor, database_allow_sql)
return inner

View file

@ -173,7 +173,7 @@ def publish_subcommand(publish):
print(fp.read())
print("\n====================\n")
image_id = f"gcr.io/{project}/{name}"
image_id = f"gcr.io/{project}/datasette-{service}"
check_call(
"gcloud builds submit --tag {}{}".format(
image_id, " --timeout {}".format(timeout) if timeout else ""

View file

@ -3,7 +3,9 @@ from datasette import hookimpl
import click
import json
import os
import pathlib
import shlex
import shutil
from subprocess import call, check_output
import tempfile
@ -28,6 +30,11 @@ def publish_subcommand(publish):
"--tar",
help="--tar option to pass to Heroku, e.g. --tar=/usr/local/bin/gtar",
)
@click.option(
"--generate-dir",
type=click.Path(dir_okay=True, file_okay=False),
help="Output generated application files and stop without deploying",
)
def heroku(
files,
metadata,
@ -49,6 +56,7 @@ def publish_subcommand(publish):
about_url,
name,
tar,
generate_dir,
):
"Publish databases to Datasette running on Heroku"
fail_if_publish_binary_not_installed(
@ -105,6 +113,16 @@ def publish_subcommand(publish):
secret,
extra_metadata,
):
if generate_dir:
# Recursively copy files from current working directory to it
if pathlib.Path(generate_dir).exists():
raise click.ClickException("Directory already exists")
shutil.copytree(".", generate_dir)
click.echo(
f"Generated files written to {generate_dir}, stopping without deploying",
err=True,
)
return
app_name = None
if name:
# Check to see if this app already exists
@ -176,7 +194,7 @@ def temporary_heroku_directory(
fp.write(json.dumps(metadata_content, indent=2))
with open("runtime.txt", "w") as fp:
fp.write("python-3.8.10")
fp.write("python-3.11.0")
if branch:
install = [

View file

@ -573,6 +573,9 @@ form button[type=button] {
display: inline-block;
margin-right: 0.3em;
}
.select-wrapper:focus-within {
border: 1px solid black;
}
.select-wrapper.filter-op {
width: 80px;
}

View file

@ -428,3 +428,18 @@ class AsgiFileDownload:
content_type=self.content_type,
headers=self.headers,
)
class AsgiRunOnFirstRequest:
def __init__(self, asgi, on_startup):
assert isinstance(on_startup, list)
self.asgi = asgi
self.on_startup = on_startup
self._started = False
async def __call__(self, scope, receive, send):
if not self._started:
self._started = True
for hook in self.on_startup:
await hook()
return await self.asgi(scope, receive, send)

View file

@ -9,7 +9,8 @@ async def init_internal_db(db):
database_name TEXT PRIMARY KEY,
path TEXT,
is_memory INTEGER,
schema_version INTEGER
schema_version INTEGER,
schema_hash TEXT
);
CREATE TABLE IF NOT EXISTS tables (
database_name TEXT,

View file

@ -1,2 +1,2 @@
__version__ = "0.63.1"
__version__ = "0.64.2"
__version_info__ = tuple(__version__.split("."))

View file

@ -307,7 +307,21 @@ To limit access to the ``add_name`` canned query in your ``dogs.db`` database to
Controlling the ability to execute arbitrary SQL
------------------------------------------------
The ``"allow_sql"`` block can be used to control who is allowed to execute arbitrary SQL queries, both using the form on the database page e.g. https://latest.datasette.io/fixtures or by appending a ``?_where=`` parameter to the table page as seen on https://latest.datasette.io/fixtures/facetable?_where=city_id=1.
Datasette defaults to allowing any site visitor to execute their own custom SQL queries, for example using the form on `the database page <https://latest.datasette.io/fixtures>`__ or by appending a ``?_where=`` parameter to the table page `like this <https://latest.datasette.io/fixtures/facetable?_where=_city_id=1>`__.
Access to this ability is controlled by the :ref:`permissions_execute_sql` permission.
The easiest way to disable arbitrary SQL queries is using the :ref:`default_allow_sql setting <setting_default_allow_sql>` when you first start Datasette running.
You can alternatively use an ``"allow_sql"`` block to control who is allowed to execute arbitrary SQL queries.
To prevent any user from executing arbitrary SQL queries, use this:
.. code-block:: json
{
"allow_sql": false
}
To enable just the :ref:`root user<authentication_root>` to execute SQL for all databases in your instance, use the following:
@ -515,7 +529,7 @@ Actor is allowed to run arbitrary SQL queries against a specific database, e.g.
``resource`` - string
The name of the database
Default *allow*.
Default *allow*. See also :ref:`the default_allow_sql setting <setting_default_allow_sql>`.
.. _permissions_permissions_debug:

View file

@ -4,6 +4,48 @@
Changelog
=========
.. _v0_64_2:
0.64.2 (2023-03-08)
-------------------
- Fixed a bug with ``datasette publish cloudrun`` where deploys all used the same Docker image tag. This was mostly inconsequential as the service is deployed as soon as the image has been pushed to the registry, but could result in the incorrect image being deployed if two different deploys for two separate services ran at exactly the same time. (:issue:`2036`)
.. _v0_64_1:
0.64.1 (2023-01-11)
-------------------
- Documentation now links to a current source of information for installing Python 3. (:issue:`1987`)
- Incorrectly calling the Datasette constructor using ``Datasette("path/to/data.db")`` instead of ``Datasette(["path/to/data.db"])`` now returns a useful error message. (:issue:`1985`)
.. _v0_64:
0.64 (2023-01-09)
-----------------
- Datasette now **strongly recommends against allowing arbitrary SQL queries if you are using SpatiaLite**. SpatiaLite includes SQL functions that could cause the Datasette server to crash. See :ref:`spatialite` for more details.
- New :ref:`setting_default_allow_sql` setting, providing an easier way to disable all arbitrary SQL execution by end users: ``datasette --setting default_allow_sql off``. See also :ref:`authentication_permissions_execute_sql`. (:issue:`1409`)
- `Building a location to time zone API with SpatiaLite <https://datasette.io/tutorials/spatialite>`__ is a new Datasette tutorial showing how to safely use SpatiaLite to create a location to time zone API.
- New documentation about :ref:`how to debug problems loading SQLite extensions <installation_extensions>`. The error message shown when an extension cannot be loaded has also been improved. (:issue:`1979`)
- Fixed an accessibility issue: the ``<select>`` elements in the table filter form now show an outline when they are currently focused. (:issue:`1771`)
.. _v0_63_3:
0.63.3 (2022-12-17)
-------------------
- Fixed a bug where ``datasette --root``, when running in Docker, would only output the URL to sign in as root when the server shut down, not when it started up. (:issue:`1958`)
- You no longer need to ensure ``await datasette.invoke_startup()`` has been called in order for Datasette to start correctly serving requests - this is now handled automatically the first time the server receives a request. This fixes a bug experienced when Datasette is served directly by an ASGI application server such as Uvicorn or Gunicorn. It also fixes a bug with the `datasette-gunicorn <https://datasette.io/plugins/datasette-gunicorn>`__ plugin. (:issue:`1955`)
.. _v0_63_2:
0.63.2 (2022-11-18)
-------------------
- Fixed a bug in ``datasette publish heroku`` where deployments failed due to an older version of Python being requested. (:issue:`1905`)
- New ``datasette publish heroku --generate-dir <dir>`` option for generating a Heroku deployment directory without deploying it.
.. _v0_63_1:
0.63.1 (2022-11-10)

View file

@ -224,6 +224,8 @@ These can be passed to ``datasette serve`` using ``datasette serve --setting nam
(default=50)
allow_facet Allow users to specify columns to facet using
?_facet= parameter (default=True)
default_allow_sql Allow anyone to run arbitrary SQL queries
(default=True)
allow_download Allow users to download the original SQLite
database files (default=True)
suggest_facets Calculate and display suggested facets
@ -501,6 +503,8 @@ See :ref:`publish_heroku`.
-n, --name TEXT Application name to use when deploying
--tar TEXT --tar option to pass to Heroku, e.g.
--tar=/usr/local/bin/gtar
--generate-dir DIRECTORY Output generated application files and stop
without deploying
--help Show this message and exit.

View file

@ -34,7 +34,7 @@
extensions = ["sphinx.ext.extlinks", "sphinx.ext.autodoc", "sphinx_copybutton"]
extlinks = {
"issue": ("https://github.com/simonw/datasette/issues/%s", "#"),
"issue": ("https://github.com/simonw/datasette/issues/%s", "#%s"),
}
# Add any paths that contain templates here, relative to this directory.

View file

@ -57,7 +57,7 @@ If the latest packaged release of Datasette has not yet been made available thro
Using pip
---------
Datasette requires Python 3.7 or higher. Visit `InstallPython3.com <https://installpython3.com/>`__ for step-by-step installation guides for your operating system.
Datasette requires Python 3.7 or higher. The `Python.org Python For Beginners <https://www.python.org/about/gettingstarted/>`__ page has instructions for getting started.
You can install Datasette and its dependencies using ``pip``::
@ -230,3 +230,60 @@ Some plugins such as `datasette-ripgrep <https://datasette.io/plugins/datasette-
pip install datasette-ripgrep'
docker commit $(docker ps -lq) datasette-with-ripgrep
.. _installation_extensions:
A note about extensions
=======================
SQLite supports extensions, such as :ref:`spatialite` for geospatial operations.
These can be loaded using the ``--load-extension`` argument, like so::
datasette --load-extension=/usr/local/lib/mod_spatialite.dylib
Some Python installations do not include support for SQLite extensions. If this is the case you will see the following error when you attempt to load an extension:
Your Python installation does not have the ability to load SQLite extensions.
In some cases you may see the following error message instead::
AttributeError: 'sqlite3.Connection' object has no attribute 'enable_load_extension'
On macOS the easiest fix for this is to install Datasette using Homebrew::
brew install datasette
Use ``which datasette`` to confirm that ``datasette`` will run that version. The output should look something like this::
/usr/local/opt/datasette/bin/datasette
If you get a different location here such as ``/Library/Frameworks/Python.framework/Versions/3.10/bin/datasette`` you can run the following command to cause ``datasette`` to execute the Homebrew version instead::
alias datasette=$(echo $(brew --prefix datasette)/bin/datasette)
You can undo this operation using::
unalias datasette
If you need to run SQLite with extension support for other Python code, you can do so by install Python itself using Homebrew::
brew install python
Then executing Python using::
/usr/local/opt/python@3/libexec/bin/python
A more convenient way to work with this version of Python may be to use it to create a virtual environment::
/usr/local/opt/python@3/libexec/bin/python -m venv datasette-venv
Then activate it like this::
source datasette-venv/bin/activate
Now running ``python`` and ``pip`` will work against a version of Python 3 that includes support for SQLite extensions::
pip install datasette
which datasette
datasette --version

View file

@ -357,8 +357,8 @@ Special table arguments
Some examples:
* `facetable?_where=neighborhood like "%c%"&_where=city_id=3 <https://latest.datasette.io/fixtures/facetable?_where=neighborhood%20like%20%22%c%%22&_where=city_id=3>`__
* `facetable?_where=city_id in (select id from facet_cities where name != "Detroit") <https://latest.datasette.io/fixtures/facetable?_where=city_id%20in%20(select%20id%20from%20facet_cities%20where%20name%20!=%20%22Detroit%22)>`__
* `facetable?_where=_neighborhood like "%c%"&_where=_city_id=3 <https://latest.datasette.io/fixtures/facetable?_where=_neighborhood%20like%20%22%c%%22&_where=_city_id=3>`__
* `facetable?_where=_city_id in (select id from facet_cities where name != "Detroit") <https://latest.datasette.io/fixtures/facetable?_where=_city_id%20in%20(select%20id%20from%20facet_cities%20where%20name%20!=%20%22Detroit%22)>`__
``?_through={json}``
This can be used to filter rows via a join against another table.

View file

@ -855,13 +855,14 @@ Potential use-cases:
.. note::
If you are writing :ref:`unit tests <testing_plugins>` for a plugin that uses this hook you will need to explicitly call ``await ds.invoke_startup()`` in your tests. An example:
If you are writing :ref:`unit tests <testing_plugins>` for a plugin that uses this hook and doesn't exercise Datasette by sending
any simulated requests through it you will need to explicitly call ``await ds.invoke_startup()`` in your tests. An example:
.. code-block:: python
@pytest.mark.asyncio
async def test_my_plugin():
ds = Datasette([], metadata={})
ds = Datasette()
await ds.invoke_startup()
# Rest of test goes here
@ -1345,7 +1346,7 @@ This example adds a new table action if the signed in user is ``"root"``:
@hookimpl
def table_actions(datasette, actor):
def table_actions(datasette, actor, database, table):
if actor and actor.get("id") == "root":
return [
{

View file

@ -73,6 +73,10 @@ This will output some details about the new deployment, including a URL like thi
You can specify a custom app name by passing ``-n my-app-name`` to the publish command. This will also allow you to overwrite an existing app.
Rather than deploying directly you can use the ``--generate-dir`` option to output the files that would be deployed to a directory::
datasette publish heroku mydatabase.db --generate-dir=/tmp/deploy-this-to-heroku
See :ref:`cli_help_publish_heroku___help` for the full list of options for this command.
.. _publish_vercel:

View file

@ -59,6 +59,21 @@ Settings
The following options can be set using ``--setting name value``, or by storing them in the ``settings.json`` file for use with :ref:`config_dir`.
.. _setting_default_allow_sql:
default_allow_sql
~~~~~~~~~~~~~~~~~
Should users be able to execute arbitrary SQL queries by default?
Setting this to ``off`` causes permission checks for :ref:`permissions_execute_sql` to fail by default.
::
datasette mydatabase.db --setting default_allow_sql off
There are two ways to achieve this: the other is to add ``"allow_sql": false`` to your ``metadata.json`` file, as described in :ref:`authentication_permissions_execute_sql`. This setting offers a more convenient way to do this.
.. _setting_default_page_size:
default_page_size

View file

@ -4,17 +4,37 @@
SpatiaLite
============
The `SpatiaLite module <https://www.gaia-gis.it/fossil/libspatialite/index>`_ for SQLite adds features for handling geographic and spatial data. For an example of what you can do with it, see the tutorial `Building a location to time zone API with SpatiaLite, OpenStreetMap and Datasette <https://simonwillison.net/2017/Dec/12/location-time-zone-api/>`_.
The `SpatiaLite module <https://www.gaia-gis.it/fossil/libspatialite/index>`_ for SQLite adds features for handling geographic and spatial data. For an example of what you can do with it, see the tutorial `Building a location to time zone API with SpatiaLite <https://datasette.io/tutorials/spatialite>`__.
To use it with Datasette, you need to install the ``mod_spatialite`` dynamic library. This can then be loaded into Datasette using the ``--load-extension`` command-line option.
Datasette can look for SpatiaLite in common installation locations if you run it like this::
datasette --load-extension=spatialite
datasette --load-extension=spatialite --setting default_allow_sql off
If SpatiaLite is in another location, use the full path to the extension instead::
datasette --load-extension=/usr/local/lib/mod_spatialite.dylib
datasette --setting default_allow_sql off \
--load-extension=/usr/local/lib/mod_spatialite.dylib
.. _spatialite_warning:
Warning
=======
.. warning::
The SpatiaLite extension adds `a large number of additional SQL functions <https://www.gaia-gis.it/gaia-sins/spatialite-sql-5.0.1.html>`__, some of which are not be safe for untrusted users to execute: they may cause the Datasette server to crash.
You should not expose a SpatiaLite-enabled Datasette instance to the public internet without taking extra measures to secure it against potentially harmful SQL queries.
The following steps are recommended:
- Disable arbitrary SQL queries by untrusted users. See :ref:`authentication_permissions_execute_sql` for ways to do this. The easiest is to start Datasette with the ``datasette --setting default_allow_sql off`` option.
- Define :ref:`canned_queries` with the SQL queries that use SpatiaLite functions that you want people to be able to execute.
The `Datasette SpatiaLite tutorial <https://datasette.io/tutorials/spatialite>`__ includes detailed instructions for running SpatiaLite safely using these techniques
.. _spatialite_installation:
Installation
============

View file

@ -80,7 +80,7 @@ Creating a ``Datasette()`` instance like this as useful shortcut in tests, but t
This method registers any :ref:`plugin_hook_startup` or :ref:`plugin_hook_prepare_jinja2_environment` plugins that might themselves need to make async calls.
If you are using ``await datasette.client.get()`` and similar methods then you don't need to worry about this - those method calls ensure that ``.invoke_startup()`` has been called for you.
If you are using ``await datasette.client.get()`` and similar methods then you don't need to worry about this - Datasette automatically calls ``invoke_startup()`` the first time it handles a request.
.. _testing_plugins_pdb:

View file

@ -25,6 +25,7 @@ async () => {
let output = await pyodide.runPythonAsync(\`
import micropip
await micropip.install('h11==0.12.0')
await micropip.install('httpx==0.23')
await micropip.install('http://localhost:8529/$wheel')
import ssl
import setuptools

View file

@ -23,6 +23,17 @@ UNDOCUMENTED_PERMISSIONS = {
}
def wait_until_responds(url, timeout=5.0, client=httpx, **kwargs):
start = time.time()
while time.time() - start < timeout:
try:
client.get(url, **kwargs)
return
except httpx.ConnectError:
time.sleep(0.1)
raise AssertionError("Timed out waiting for {} to respond".format(url))
def pytest_report_header(config):
return "SQLite: {}".format(
sqlite3.connect(":memory:").execute("select sqlite_version()").fetchone()[0]
@ -111,13 +122,7 @@ def ds_localhost_http_server():
# Avoid FileNotFoundError: [Errno 2] No such file or directory:
cwd=tempfile.gettempdir(),
)
# Loop until port 8041 serves traffic
while True:
try:
httpx.get("http://localhost:8041/")
break
except httpx.ConnectError:
time.sleep(0.1)
wait_until_responds("http://localhost:8041/")
# Check it started successfully
assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8")
yield ds_proc
@ -125,46 +130,6 @@ def ds_localhost_http_server():
ds_proc.terminate()
@pytest.fixture(scope="session")
def ds_localhost_https_server(tmp_path_factory):
cert_directory = tmp_path_factory.mktemp("certs")
ca = trustme.CA()
server_cert = ca.issue_cert("localhost")
keyfile = str(cert_directory / "server.key")
certfile = str(cert_directory / "server.pem")
client_cert = str(cert_directory / "client.pem")
server_cert.private_key_pem.write_to_path(path=keyfile)
for blob in server_cert.cert_chain_pems:
blob.write_to_path(path=certfile, append=True)
ca.cert_pem.write_to_path(path=client_cert)
ds_proc = subprocess.Popen(
[
"datasette",
"--memory",
"-p",
"8042",
"--ssl-keyfile",
keyfile,
"--ssl-certfile",
certfile,
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=tempfile.gettempdir(),
)
while True:
try:
httpx.get("https://localhost:8042/", verify=client_cert)
break
except httpx.ConnectError:
time.sleep(0.1)
# Check it started successfully
assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8")
yield ds_proc, client_cert
# Shut it down at the end of the pytest session
ds_proc.terminate()
@pytest.fixture(scope="session")
def ds_unix_domain_socket_server(tmp_path_factory):
# This used to use tmp_path_factory.mktemp("uds") but that turned out to
@ -181,12 +146,7 @@ def ds_unix_domain_socket_server(tmp_path_factory):
# Poll until available
transport = httpx.HTTPTransport(uds=uds)
client = httpx.Client(transport=transport)
while True:
try:
client.get("http://localhost/_memory.json")
break
except httpx.ConnectError:
time.sleep(0.1)
wait_until_responds("http://localhost/_memory.json", client=client)
# Check it started successfully
assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8")
yield ds_proc, uds

View file

@ -805,6 +805,7 @@ def test_settings_json(app_client):
assert {
"default_page_size": 50,
"default_facet_size": 30,
"default_allow_sql": True,
"facet_suggest_time_limit_ms": 50,
"facet_time_limit_ms": 200,
"max_returned_rows": 100,

View file

@ -215,6 +215,28 @@ def test_setting_type_validation():
assert '"default_page_size" should be an integer' in result.stderr
@pytest.mark.parametrize("default_allow_sql", (True, False))
def test_setting_default_allow_sql(default_allow_sql):
runner = CliRunner()
result = runner.invoke(
cli,
[
"--setting",
"default_allow_sql",
"on" if default_allow_sql else "off",
"--get",
"/_memory.json?sql=select+21&_shape=objects",
],
)
if default_allow_sql:
assert result.exit_code == 0, result.output
assert json.loads(result.output)["rows"][0] == {"21": 21}
else:
assert result.exit_code == 1, result.output
# This isn't JSON at the moment, maybe it should be though
assert "Forbidden" in result.output
def test_config_deprecated():
# The --config option should show a deprecation message
runner = CliRunner(mix_stderr=False)

View file

@ -13,17 +13,6 @@ def test_serve_localhost_http(ds_localhost_http_server):
}.items() <= response.json().items()
@pytest.mark.serial
def test_serve_localhost_https(ds_localhost_https_server):
_, client_cert = ds_localhost_https_server
response = httpx.get("https://localhost:8042/_memory.json", verify=client_cert)
assert {
"database": "_memory",
"path": "/_memory",
"tables": [],
}.items() <= response.json().items()
@pytest.mark.serial
@pytest.mark.skipif(
not hasattr(socket, "AF_UNIX"), reason="Requires socket.AF_UNIX support"

View file

@ -0,0 +1,33 @@
#!/bin/bash
# Generate certificates
python -m trustme
# This creates server.pem, server.key, client.pem
# Start the server in the background
datasette --memory \
--ssl-keyfile=server.key \
--ssl-certfile=server.pem \
-p 8152 &
# Store the background process ID in a variable
server_pid=$!
# Wait for the server to start
sleep 2
# Make a test request using curl
curl -f --cacert client.pem 'https://localhost:8152/_memory.json'
# Save curl's exit code (-f option causes it to return one on HTTP errors)
curl_exit_code=$?
# Shut down the server
kill $server_pid
sleep 1
# Clean up the certificates
rm server.pem server.key client.pem
echo $curl_exit_code
exit $curl_exit_code

View file

@ -83,13 +83,11 @@ async def test_through_filters_from_request(app_client):
request = Request.fake(
'/?_through={"table":"roadside_attraction_characteristics","column":"characteristic_id","value":"1"}'
)
filter_args = await (
through_filters(
request=request,
datasette=app_client.ds,
table="roadside_attractions",
database="fixtures",
)
filter_args = await through_filters(
request=request,
datasette=app_client.ds,
table="roadside_attractions",
database="fixtures",
)()
assert filter_args.where_clauses == [
"pk in (select attraction_id from roadside_attraction_characteristics where characteristic_id = :p0)"
@ -106,13 +104,11 @@ async def test_through_filters_from_request(app_client):
request = Request.fake(
'/?_through={"table":"roadside_attraction_characteristics","column":"characteristic_id","value":"1"}'
)
filter_args = await (
through_filters(
request=request,
datasette=app_client.ds,
table="roadside_attractions",
database="fixtures",
)
filter_args = await through_filters(
request=request,
datasette=app_client.ds,
table="roadside_attractions",
database="fixtures",
)()
assert filter_args.where_clauses == [
"pk in (select attraction_id from roadside_attraction_characteristics where characteristic_id = :p0)"
@ -127,12 +123,10 @@ async def test_through_filters_from_request(app_client):
@pytest.mark.asyncio
async def test_where_filters_from_request(app_client):
request = Request.fake("/?_where=pk+>+3")
filter_args = await (
where_filters(
request=request,
datasette=app_client.ds,
database="fixtures",
)
filter_args = await where_filters(
request=request,
datasette=app_client.ds,
database="fixtures",
)()
assert filter_args.where_clauses == ["pk > 3"]
assert filter_args.params == {}
@ -145,13 +139,11 @@ async def test_where_filters_from_request(app_client):
@pytest.mark.asyncio
async def test_search_filters_from_request(app_client):
request = Request.fake("/?_search=bobcat")
filter_args = await (
search_filters(
request=request,
datasette=app_client.ds,
database="fixtures",
table="searchable",
)
filter_args = await search_filters(
request=request,
datasette=app_client.ds,
database="fixtures",
table="searchable",
)()
assert filter_args.where_clauses == [
"rowid in (select rowid from searchable_fts where searchable_fts match escape_fts(:search))"

View file

@ -1,5 +1,8 @@
from .fixtures import app_client
import pytest
from unittest.mock import patch
from datasette.app import Datasette
from datasette.database import Database
def test_internal_only_available_to_root(app_client):
@ -65,3 +68,51 @@ def test_internal_foreign_keys(app_client):
"table_name",
"from",
}
@pytest.mark.asyncio
@pytest.mark.parametrize("schema_version_returns_none", (True, False))
async def test_detects_schema_changes(schema_version_returns_none):
ds = Datasette()
db_name = "test_detects_schema_changes_{}".format(schema_version_returns_none)
db = ds.add_memory_database(db_name)
# Test if Datasette correctly detects schema changes, whether or not
# the schema_version method is working.
# https://github.com/simonw/datasette/issues/2058
_internal = ds.get_database("_internal")
async def get_tables():
return [
dict(r)
for r in await _internal.execute(
"select table_name from tables where database_name = ?", [db_name]
)
]
async def test_it():
await ds.refresh_schemas()
initial_hash = await db.schema_hash()
# _internal should list zero tables
tables = await get_tables()
assert tables == []
# Create a new table
await db.execute_write("CREATE TABLE test (id INTEGER PRIMARY KEY)")
await ds.refresh_schemas()
assert await db.schema_hash() != initial_hash
# _internal should list one table
tables = await get_tables()
assert tables == [
{"table_name": "test"},
]
async def schema_version_none(self):
return None
if schema_version_returns_none:
with patch(
"datasette.database.Database.schema_version", new=schema_version_none
):
await test_it()
else:
await test_it()

View file

@ -130,7 +130,14 @@ async def test_datasette_ensure_permissions_check_visibility(
@pytest.mark.asyncio
async def test_datasette_render_template_no_request():
# https://github.com/simonw/datasette/issues/1849
ds = Datasette([], memory=True)
ds = Datasette(memory=True)
await ds.invoke_startup()
rendered = await ds.render_template("error.html")
assert "Error " in rendered
def test_datasette_error_if_string_not_list(tmpdir):
# https://github.com/simonw/datasette/issues/1985
db_path = str(tmpdir / "data.db")
with pytest.raises(ValueError):
ds = Datasette(db_path)

View file

@ -8,6 +8,7 @@ from pathlib import Path
# this resolves to "./ext", which is enough for SQLite to calculate the rest
COMPILED_EXTENSION_PATH = str(Path(__file__).parent / "ext")
# See if ext.c has been compiled, based off the different possible suffixes.
def has_compiled_ext():
for ext in ["dylib", "so", "dll"]:
@ -20,7 +21,6 @@ def has_compiled_ext():
@pytest.mark.asyncio
@pytest.mark.skipif(not has_compiled_ext(), reason="Requires compiled ext.c")
async def test_load_extension_default_entrypoint():
# The default entrypoint only loads a() and NOT b() or c(), so those
# should fail.
ds = Datasette(sqlite_extensions=[COMPILED_EXTENSION_PATH])
@ -41,7 +41,6 @@ async def test_load_extension_default_entrypoint():
@pytest.mark.asyncio
@pytest.mark.skipif(not has_compiled_ext(), reason="Requires compiled ext.c")
async def test_load_extension_multiple_entrypoints():
# Load in the default entrypoint and the other 2 custom entrypoints, now
# all a(), b(), and c() should run successfully.
ds = Datasette(

View file

@ -57,7 +57,7 @@ def test_publish_cloudrun_prompts_for_service(
"Service name: input-service"
) == result.output.strip()
assert 0 == result.exit_code
tag = "gcr.io/myproject/datasette"
tag = "gcr.io/myproject/datasette-input-service"
mock_call.assert_has_calls(
[
mock.call(f"gcloud builds submit --tag {tag}", shell=True),
@ -86,7 +86,7 @@ def test_publish_cloudrun(mock_call, mock_output, mock_which, tmp_path_factory):
cli.cli, ["publish", "cloudrun", "test.db", "--service", "test"]
)
assert 0 == result.exit_code
tag = f"gcr.io/{mock_output.return_value}/datasette"
tag = f"gcr.io/{mock_output.return_value}/datasette-test"
mock_call.assert_has_calls(
[
mock.call(f"gcloud builds submit --tag {tag}", shell=True),
@ -167,7 +167,7 @@ def test_publish_cloudrun_memory_cpu(
assert 2 == result.exit_code
return
assert 0 == result.exit_code
tag = f"gcr.io/{mock_output.return_value}/datasette"
tag = f"gcr.io/{mock_output.return_value}/datasette-test"
expected_call = (
"gcloud run deploy --allow-unauthenticated --platform=managed"
" --image {} test".format(tag)

View file

@ -2,6 +2,7 @@ from click.testing import CliRunner
from datasette import cli
from unittest import mock
import os
import pathlib
import pytest
@ -128,3 +129,55 @@ def test_publish_heroku_plugin_secrets(
mock.call(["heroku", "builds:create", "-a", "f", "--include-vcs-ignore"]),
]
)
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.heroku.check_output")
@mock.patch("datasette.publish.heroku.call")
def test_publish_heroku_generate_dir(
mock_call, mock_check_output, mock_which, tmp_path_factory
):
mock_which.return_value = True
mock_check_output.side_effect = lambda s: {
"['heroku', 'plugins']": b"heroku-builds",
}[repr(s)]
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
output = str(tmp_path_factory.mktemp("generate_dir") / "output")
result = runner.invoke(
cli.cli,
[
"publish",
"heroku",
"test.db",
"--generate-dir",
output,
],
)
assert result.exit_code == 0
path = pathlib.Path(output)
assert path.exists()
file_names = {str(r.relative_to(path)) for r in path.glob("*")}
assert file_names == {
"requirements.txt",
"bin",
"runtime.txt",
"Procfile",
"test.db",
}
for name, expected in (
("requirements.txt", "datasette"),
("runtime.txt", "python-3.11.0"),
(
"Procfile",
(
"web: datasette serve --host 0.0.0.0 -i test.db "
"--cors --port $PORT --inspect-file inspect-data.json"
),
),
):
with open(path / name) as fp:
assert fp.read().strip() == expected