From 83adf55b2da83fd9a227f7e4c8506d72def72294 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Oct 2022 20:28:15 -0700 Subject: [PATCH 01/82] Deploy one-dot-zero branch preview --- .github/workflows/deploy-latest.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 2b94a7f1..43a843ed 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -3,7 +3,8 @@ name: Deploy latest.datasette.io on: push: branches: - - main + - main + - 1.0-dev permissions: contents: read @@ -68,6 +69,8 @@ jobs: gcloud config set project datasette-222320 export SUFFIX="-${GITHUB_REF#refs/heads/}" export SUFFIX=${SUFFIX#-main} + # Replace 1.0 with one-dot-zero in SUFFIX + export SUFFIX=${SUFFIX//1.0/one-dot-zero} datasette publish cloudrun fixtures.db fixtures2.db extra_database.db \ -m fixtures.json \ --plugins-dir=plugins \ From 02ae1a002918eb91f794e912c32742559da34cf5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 11:59:03 -0700 Subject: [PATCH 02/82] Upgrade Docker images to Python 3.11, closes #1853 --- Dockerfile | 2 +- datasette/utils/__init__.py | 2 +- demos/apache-proxy/Dockerfile | 2 +- docs/publish.rst | 2 +- tests/test_package.py | 2 +- tests/test_publish_cloudrun.py | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index ee7ed957..9a8f06cf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10.6-slim-bullseye as build +FROM python:3.11.0-slim-bullseye as build # Version of Datasette to install, e.g. 0.55 # docker build . -t datasette --build-arg VERSION=0.55 diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 2bdea673..803ba96d 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -390,7 +390,7 @@ def make_dockerfile( "SQLITE_EXTENSIONS" ] = "/usr/lib/x86_64-linux-gnu/mod_spatialite.so" return """ -FROM python:3.10.6-slim-bullseye +FROM python:3.11.0-slim-bullseye COPY . /app WORKDIR /app {apt_get_extras} diff --git a/demos/apache-proxy/Dockerfile b/demos/apache-proxy/Dockerfile index 70b33bec..9a8448da 100644 --- a/demos/apache-proxy/Dockerfile +++ b/demos/apache-proxy/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10.6-slim-bullseye +FROM python:3.11.0-slim-bullseye RUN apt-get update && \ apt-get install -y apache2 supervisor && \ diff --git a/docs/publish.rst b/docs/publish.rst index d817ed31..4ba94792 100644 --- a/docs/publish.rst +++ b/docs/publish.rst @@ -146,7 +146,7 @@ Here's example output for the package command:: $ datasette package parlgov.db --extra-options="--setting sql_time_limit_ms 2500" Sending build context to Docker daemon 4.459MB - Step 1/7 : FROM python:3.10.6-slim-bullseye + Step 1/7 : FROM python:3.11.0-slim-bullseye ---> 79e1dc9af1c1 Step 2/7 : COPY . /app ---> Using cache diff --git a/tests/test_package.py b/tests/test_package.py index ac15e61e..f05f3ece 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -12,7 +12,7 @@ class CaptureDockerfile: EXPECTED_DOCKERFILE = """ -FROM python:3.10.6-slim-bullseye +FROM python:3.11.0-slim-bullseye COPY . /app WORKDIR /app diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index e64534d2..158a090e 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -242,7 +242,7 @@ def test_publish_cloudrun_plugin_secrets( ) expected = textwrap.dedent( r""" - FROM python:3.10.6-slim-bullseye + FROM python:3.11.0-slim-bullseye COPY . /app WORKDIR /app @@ -309,7 +309,7 @@ def test_publish_cloudrun_apt_get_install( ) expected = textwrap.dedent( r""" - FROM python:3.10.6-slim-bullseye + FROM python:3.11.0-slim-bullseye COPY . /app WORKDIR /app From 6d085af28c63c28ecda388fc0552c91f756be0c6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 07:13:43 -0700 Subject: [PATCH 03/82] Python 3.11 in CI --- .github/workflows/publish.yml | 16 ++++++++-------- .github/workflows/test.yml | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 9ef09d2e..fa608055 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -12,14 +12,14 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v2 + - uses: actions/cache@v3 name: Configure pip caching with: path: ~/.cache/pip @@ -37,12 +37,12 @@ jobs: runs-on: ubuntu-latest needs: [test] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: '3.10' - - uses: actions/cache@v2 + python-version: '3.11' + - uses: actions/cache@v3 name: Configure pip caching with: path: ~/.cache/pip diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e38d5ee9..886f649a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,14 +10,14 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11-dev"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v2 + - uses: actions/cache@v3 name: Configure pip caching with: path: ~/.cache/pip From 05b479224fa57af3ab2d03769edd5081dad62a19 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 12:16:48 -0700 Subject: [PATCH 04/82] Don't need pysqlite3-binary any more, refs #1853 --- .github/workflows/deploy-latest.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml index 43a843ed..5598dc12 100644 --- a/.github/workflows/deploy-latest.yml +++ b/.github/workflows/deploy-latest.yml @@ -14,12 +14,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out datasette - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: "3.10" - - uses: actions/cache@v2 + python-version: "3.11" + - uses: actions/cache@v3 name: Configure pip caching with: path: ~/.cache/pip @@ -77,7 +77,6 @@ jobs: --branch=$GITHUB_SHA \ --version-note=$GITHUB_SHA \ --extra-options="--setting template_debug 1 --setting trace_debug 1 --crossdb" \ - --install=pysqlite3-binary \ --service "datasette-latest$SUFFIX" - name: Deploy to docs as well (only for main) if: ${{ github.ref == 'refs/heads/main' }} From f9ae92b37796f7f559d57b1ee9718aa4d43547e8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 12:42:21 -0700 Subject: [PATCH 05/82] Poll until servers start, refs #1854 --- tests/conftest.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 215853b3..f4638a14 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,4 @@ +import httpx import os import pathlib import pytest @@ -110,8 +111,13 @@ def ds_localhost_http_server(): # Avoid FileNotFoundError: [Errno 2] No such file or directory: cwd=tempfile.gettempdir(), ) - # Give the server time to start - time.sleep(1.5) + # Loop until port 8041 serves traffic + while True: + try: + httpx.get("http://localhost:8041/") + break + except httpx.ConnectError: + time.sleep(0.1) # Check it started successfully assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") yield ds_proc @@ -146,8 +152,12 @@ def ds_localhost_https_server(tmp_path_factory): stderr=subprocess.STDOUT, cwd=tempfile.gettempdir(), ) - # Give the server time to start - time.sleep(1.5) + while True: + try: + httpx.get("https://localhost:8042/", verify=client_cert) + break + except httpx.ConnectError: + time.sleep(0.1) # Check it started successfully assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") yield ds_proc, client_cert @@ -168,8 +178,15 @@ def ds_unix_domain_socket_server(tmp_path_factory): stderr=subprocess.STDOUT, cwd=tempfile.gettempdir(), ) - # Give the server time to start - time.sleep(1.5) + # Poll until available + transport = httpx.HTTPTransport(uds=uds) + client = httpx.Client(transport=transport) + while True: + try: + client.get("http://localhost/_memory.json") + break + except httpx.ConnectError: + time.sleep(0.1) # Check it started successfully assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8") yield ds_proc, uds From 42f8b402e6aa56af4bbe921e346af8df42acd50f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 17:07:58 -0700 Subject: [PATCH 06/82] Initial prototype of create API token page, refs #1852 --- datasette/app.py | 5 ++ datasette/templates/create_token.html | 83 +++++++++++++++++++++++++++ datasette/views/special.py | 54 +++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100644 datasette/templates/create_token.html diff --git a/datasette/app.py b/datasette/app.py index 9df16558..cab9d142 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -33,6 +33,7 @@ from .views.special import ( JsonDataView, PatternPortfolioView, AuthTokenView, + CreateTokenView, LogoutView, AllowDebugView, PermissionsDebugView, @@ -1212,6 +1213,10 @@ class Datasette: AuthTokenView.as_view(self), r"/-/auth-token$", ) + add_route( + CreateTokenView.as_view(self), + r"/-/create-token$", + ) add_route( LogoutView.as_view(self), r"/-/logout$", diff --git a/datasette/templates/create_token.html b/datasette/templates/create_token.html new file mode 100644 index 00000000..a94881ed --- /dev/null +++ b/datasette/templates/create_token.html @@ -0,0 +1,83 @@ +{% extends "base.html" %} + +{% block title %}Create an API token{% endblock %} + +{% block content %} + +

Create an API token

+ +

This token will allow API access with the same abilities as your current user.

+ +{% if errors %} + {% for error in errors %} +

{{ error }}

+ {% endfor %} +{% endif %} + +
+
+
+ +
+ + + +
+
+ +{% if token %} +
+

Your API token

+
+ + +
+ +
+ Token details +
{{ token_bits|tojson }}
+
+
+ {% endif %} + + + +{% endblock %} diff --git a/datasette/views/special.py b/datasette/views/special.py index dd834528..f2e69412 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -3,6 +3,7 @@ from datasette.utils.asgi import Response, Forbidden from datasette.utils import actor_matches_allow, add_cors_headers from .base import BaseView import secrets +import time class JsonDataView(BaseView): @@ -163,3 +164,56 @@ class MessagesDebugView(BaseView): else: datasette.add_message(request, message, getattr(datasette, message_type)) return Response.redirect(self.ds.urls.instance()) + + +class CreateTokenView(BaseView): + name = "create_token" + has_json_alternate = False + + async def get(self, request): + if not request.actor: + raise Forbidden("You must be logged in to create a token") + return await self.render( + ["create_token.html"], + request, + {"actor": request.actor}, + ) + + async def post(self, request): + if not request.actor: + raise Forbidden("You must be logged in to create a token") + post = await request.post_vars() + expires = None + errors = [] + if post.get("expire_type"): + duration = post.get("expire_duration") + if not duration or not duration.isdigit() or not int(duration) > 0: + errors.append("Invalid expire duration") + else: + unit = post["expire_type"] + if unit == "minutes": + expires = int(duration) * 60 + elif unit == "hours": + expires = int(duration) * 60 * 60 + elif unit == "days": + expires = int(duration) * 60 * 60 * 24 + else: + errors.append("Invalid expire duration unit") + token_bits = None + token = None + if not errors: + token_bits = { + "a": request.actor, + "e": (int(time.time()) + expires) if expires else None, + } + token = self.ds.sign(token_bits, "token") + return await self.render( + ["create_token.html"], + request, + { + "actor": request.actor, + "errors": errors, + "token": token, + "token_bits": token_bits, + }, + ) From 68ccb7578b5d3bf68b86fb2f5cf8753098dfe075 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 18:40:07 -0700 Subject: [PATCH 07/82] dstoke_ prefix for tokens Refs https://github.com/simonw/datasette/issues/1852#issuecomment-1291290451 --- datasette/views/special.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/views/special.py b/datasette/views/special.py index f2e69412..d3f202f4 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -206,7 +206,7 @@ class CreateTokenView(BaseView): "a": request.actor, "e": (int(time.time()) + expires) if expires else None, } - token = self.ds.sign(token_bits, "token") + token = "dstok_{}".format(self.ds.sign(token_bits, "token")) return await self.render( ["create_token.html"], request, From 7ab091e8ef8d3af1e23b5a81ffad2bd8c96cc47c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 19:04:05 -0700 Subject: [PATCH 08/82] Tests and docs for /-/create-token, refs #1852 --- datasette/views/special.py | 14 +++++--- docs/authentication.rst | 15 +++++++++ tests/test_auth.py | 68 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 4 deletions(-) diff --git a/datasette/views/special.py b/datasette/views/special.py index d3f202f4..7f70eb1f 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -170,9 +170,16 @@ class CreateTokenView(BaseView): name = "create_token" has_json_alternate = False - async def get(self, request): + def check_permission(self, request): if not request.actor: raise Forbidden("You must be logged in to create a token") + if not request.actor.get("id"): + raise Forbidden( + "You must be logged in as an actor with an ID to create a token" + ) + + async def get(self, request): + self.check_permission(request) return await self.render( ["create_token.html"], request, @@ -180,8 +187,7 @@ class CreateTokenView(BaseView): ) async def post(self, request): - if not request.actor: - raise Forbidden("You must be logged in to create a token") + self.check_permission(request) post = await request.post_vars() expires = None errors = [] @@ -203,7 +209,7 @@ class CreateTokenView(BaseView): token = None if not errors: token_bits = { - "a": request.actor, + "a": request.actor["id"], "e": (int(time.time()) + expires) if expires else None, } token = "dstok_{}".format(self.ds.sign(token_bits, "token")) diff --git a/docs/authentication.rst b/docs/authentication.rst index 685dab15..fc903fbb 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -333,6 +333,21 @@ To limit this ability for just one specific database, use this: } } +.. _CreateTokenView: + +API Tokens +========== + +Datasette includes a default mechanism for generating API tokens that can be used to authenticate requests. + +Authenticated users can create new API tokens using a form on the ``/-/create-token`` page. + +Created tokens can then be passed in the ``Authorization: Bearer token_here`` header of HTTP requests to Datasette. + +A token created by a user will include that user's ``"id"`` in the token payload, so any permissions granted to that user based on their ID will be made available to the token as well. + +Coming soon: a mechanism for creating tokens that can only perform a subset of the actions available to the user who created them. + .. _permissions_plugins: Checking permissions in plugins diff --git a/tests/test_auth.py b/tests/test_auth.py index 4ef35a76..3aaab50d 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -110,3 +110,71 @@ def test_no_logout_button_in_navigation_if_no_ds_actor_cookie(app_client, path): response = app_client.get(path + "?_bot=1") assert "bot" in response.text assert '
' not in response.text + + +@pytest.mark.parametrize( + "post_data,errors,expected_duration", + ( + ({"expire_type": ""}, [], None), + ({"expire_type": "x"}, ["Invalid expire duration"], None), + ({"expire_type": "minutes"}, ["Invalid expire duration"], None), + ( + {"expire_type": "minutes", "expire_duration": "x"}, + ["Invalid expire duration"], + None, + ), + ( + {"expire_type": "minutes", "expire_duration": "-1"}, + ["Invalid expire duration"], + None, + ), + ( + {"expire_type": "minutes", "expire_duration": "0"}, + ["Invalid expire duration"], + None, + ), + ( + {"expire_type": "minutes", "expire_duration": "10"}, + [], + 600, + ), + ( + {"expire_type": "hours", "expire_duration": "10"}, + [], + 10 * 60 * 60, + ), + ( + {"expire_type": "days", "expire_duration": "3"}, + [], + 60 * 60 * 24 * 3, + ), + ), +) +def test_auth_create_token(app_client, post_data, errors, expected_duration): + assert app_client.get("/-/create-token").status == 403 + ds_actor = app_client.actor_cookie({"id": "test"}) + response = app_client.get("/-/create-token", cookies={"ds_actor": ds_actor}) + assert response.status == 200 + assert ">Create an API token<" in response.text + # Now try actually creating one + response2 = app_client.post( + "/-/create-token", + post_data, + csrftoken_from=True, + cookies={"ds_actor": ds_actor}, + ) + assert response2.status == 200 + if errors: + for error in errors: + assert '

{}

'.format(error) in response2.text + else: + # Extract token from page + token = response2.text.split('value="dstok_')[1].split('"')[0] + details = app_client.ds.unsign(token, "token") + assert details.keys() == {"a", "e"} + assert details["a"] == "test" + if expected_duration is None: + assert details["e"] is None + else: + about_right = int(time.time()) + expected_duration + assert about_right - 2 < details["e"] < about_right + 2 From b29e487bc3fde6418bf45bda7cfed2e081ff03fb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 19:18:41 -0700 Subject: [PATCH 09/82] actor_from_request for dstok_ tokens, refs #1852 --- datasette/default_permissions.py | 25 +++++++++++++++++++++++++ datasette/utils/testing.py | 2 ++ tests/test_auth.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index b58d8d1b..4d836ddc 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -1,5 +1,7 @@ from datasette import hookimpl from datasette.utils import actor_matches_allow +import itsdangerous +import time @hookimpl(tryfirst=True) @@ -45,3 +47,26 @@ def permission_allowed(datasette, actor, action, resource): return actor_matches_allow(actor, database_allow_sql) return inner + + +@hookimpl +def actor_from_request(datasette, request): + prefix = "dstok_" + authorization = request.headers.get("authorization") + if not authorization: + return None + if not authorization.startswith("Bearer "): + return None + token = authorization[len("Bearer ") :] + if not token.startswith(prefix): + return None + token = token[len(prefix) :] + try: + decoded = datasette.unsign(token, namespace="token") + except itsdangerous.BadSignature: + return None + expires_at = decoded.get("e") + if expires_at is not None: + if expires_at < time.time(): + return None + return {"id": decoded["a"], "dstok": True} diff --git a/datasette/utils/testing.py b/datasette/utils/testing.py index b28fc575..4f76a799 100644 --- a/datasette/utils/testing.py +++ b/datasette/utils/testing.py @@ -62,6 +62,7 @@ class TestClient: method="GET", cookies=None, if_none_match=None, + headers=None, ): return await self._request( path=path, @@ -70,6 +71,7 @@ class TestClient: method=method, cookies=cookies, if_none_match=if_none_match, + headers=headers, ) @async_to_sync diff --git a/tests/test_auth.py b/tests/test_auth.py index 3aaab50d..be21d6a5 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -178,3 +178,35 @@ def test_auth_create_token(app_client, post_data, errors, expected_duration): else: about_right = int(time.time()) + expected_duration assert about_right - 2 < details["e"] < about_right + 2 + + +@pytest.mark.parametrize( + "scenario,should_work", + ( + ("no_token", False), + ("invalid_token", False), + ("expired_token", False), + ("valid_unlimited_token", True), + ("valid_expiring_token", True), + ), +) +def test_auth_with_dstok_token(app_client, scenario, should_work): + token = None + if scenario == "valid_unlimited_token": + token = app_client.ds.sign({"a": "test"}, "token") + elif scenario == "valid_expiring_token": + token = app_client.ds.sign({"a": "test", "e": int(time.time()) + 1000}, "token") + elif scenario == "expired_token": + token = app_client.ds.sign({"a": "test", "e": int(time.time()) - 1000}, "token") + elif scenario == "invalid_token": + token = "invalid" + if token: + token = "dstok_{}".format(token) + headers = {} + if token: + headers["Authorization"] = "Bearer {}".format(token) + response = app_client.get("/-/actor.json", headers=headers) + if should_work: + assert response.json == {"actor": {"id": "test", "dstok": True}} + else: + assert response.json == {"actor": None} From 0f013ff497df62e1dd2075777b9817555646010e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 19:43:55 -0700 Subject: [PATCH 10/82] Mechanism to prevent tokens creating tokens, closes #1857 --- datasette/default_permissions.py | 2 +- datasette/views/special.py | 4 ++++ docs/authentication.rst | 2 ++ tests/test_auth.py | 11 ++++++++++- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 4d836ddc..d908af7a 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -69,4 +69,4 @@ def actor_from_request(datasette, request): if expires_at is not None: if expires_at < time.time(): return None - return {"id": decoded["a"], "dstok": True} + return {"id": decoded["a"], "token": "dstok"} diff --git a/datasette/views/special.py b/datasette/views/special.py index 7f70eb1f..91130353 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -177,6 +177,10 @@ class CreateTokenView(BaseView): raise Forbidden( "You must be logged in as an actor with an ID to create a token" ) + if request.actor.get("token"): + raise Forbidden( + "Token authentication cannot be used to create additional tokens" + ) async def get(self, request): self.check_permission(request) diff --git a/docs/authentication.rst b/docs/authentication.rst index fc903fbb..cbecd296 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -348,6 +348,8 @@ A token created by a user will include that user's ``"id"`` in the token payload Coming soon: a mechanism for creating tokens that can only perform a subset of the actions available to the user who created them. +This page cannot be accessed by actors with a ``"token": "some-value"`` property. This is to prevent API tokens from being used to automatically create more tokens. Datasette plugins that implement their own form of API token authentication should follow this convention. + .. _permissions_plugins: Checking permissions in plugins diff --git a/tests/test_auth.py b/tests/test_auth.py index be21d6a5..397d51d7 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -180,6 +180,15 @@ def test_auth_create_token(app_client, post_data, errors, expected_duration): assert about_right - 2 < details["e"] < about_right + 2 +def test_auth_create_token_not_allowed_for_tokens(app_client): + ds_tok = app_client.ds.sign({"a": "test", "token": "dstok"}, "token") + response = app_client.get( + "/-/create-token", + headers={"Authorization": "Bearer dstok_{}".format(ds_tok)}, + ) + assert response.status == 403 + + @pytest.mark.parametrize( "scenario,should_work", ( @@ -207,6 +216,6 @@ def test_auth_with_dstok_token(app_client, scenario, should_work): headers["Authorization"] = "Bearer {}".format(token) response = app_client.get("/-/actor.json", headers=headers) if should_work: - assert response.json == {"actor": {"id": "test", "dstok": True}} + assert response.json == {"actor": {"id": "test", "token": "dstok"}} else: assert response.json == {"actor": None} From c23fa850e7f21977e367e3467656055216978e8a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 19:55:47 -0700 Subject: [PATCH 11/82] allow_signed_tokens setting, closes #1856 --- datasette/app.py | 5 +++++ datasette/default_permissions.py | 2 ++ datasette/views/special.py | 2 ++ docs/authentication.rst | 2 ++ docs/cli-reference.rst | 2 ++ docs/plugins.rst | 1 + docs/settings.rst | 13 +++++++++++++ tests/test_auth.py | 26 +++++++++++++++++++++----- 8 files changed, 48 insertions(+), 5 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index cab9d142..c868f8d3 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -124,6 +124,11 @@ SETTINGS = ( True, "Allow users to download the original SQLite database files", ), + Setting( + "allow_signed_tokens", + True, + "Allow users to create and use signed API tokens", + ), Setting("suggest_facets", True, "Calculate and display suggested facets"), Setting( "default_cache_ttl", diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index d908af7a..49ca8851 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -52,6 +52,8 @@ def permission_allowed(datasette, actor, action, resource): @hookimpl def actor_from_request(datasette, request): prefix = "dstok_" + if not datasette.setting("allow_signed_tokens"): + return None authorization = request.headers.get("authorization") if not authorization: return None diff --git a/datasette/views/special.py b/datasette/views/special.py index 91130353..89015958 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -171,6 +171,8 @@ class CreateTokenView(BaseView): has_json_alternate = False def check_permission(self, request): + if not self.ds.setting("allow_signed_tokens"): + raise Forbidden("Signed tokens are not enabled for this Datasette instance") if not request.actor: raise Forbidden("You must be logged in to create a token") if not request.actor.get("id"): diff --git a/docs/authentication.rst b/docs/authentication.rst index cbecd296..50304ec5 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -350,6 +350,8 @@ Coming soon: a mechanism for creating tokens that can only perform a subset of t This page cannot be accessed by actors with a ``"token": "some-value"`` property. This is to prevent API tokens from being used to automatically create more tokens. Datasette plugins that implement their own form of API token authentication should follow this convention. +You can disable this feature using the :ref:`allow_signed_tokens ` setting. + .. _permissions_plugins: Checking permissions in plugins diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 4a8465cb..fd5e2404 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -226,6 +226,8 @@ These can be passed to ``datasette serve`` using ``datasette serve --setting nam ?_facet= parameter (default=True) allow_download Allow users to download the original SQLite database files (default=True) + allow_signed_tokens Allow users to create and use signed API tokens + (default=True) suggest_facets Calculate and display suggested facets (default=True) default_cache_ttl Default HTTP cache TTL (used in Cache-Control: diff --git a/docs/plugins.rst b/docs/plugins.rst index 29078054..9efef32f 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -151,6 +151,7 @@ If you run ``datasette plugins --all`` it will include default plugins that ship "templates": false, "version": null, "hooks": [ + "actor_from_request", "permission_allowed" ] }, diff --git a/docs/settings.rst b/docs/settings.rst index a6d50543..be640b21 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -169,6 +169,19 @@ Should users be able to download the original SQLite database using a link on th datasette mydatabase.db --setting allow_download off +.. _setting_allow_signed_tokens: + +allow_signed_tokens +~~~~~~~~~~~~~~~~~~~ + +Should users be able to create signed API tokens to access Datasette? + +This is turned on by default. Use the following to turn it off:: + + datasette mydatabase.db --setting allow_signed_tokens off + +Turning this setting off will disable the ``/-/create-token`` page, :ref:`described here `. It will also cause any incoming ``Authorization: Bearer dstok_...`` API tokens to be ignored. + .. _setting_default_cache_ttl: default_cache_ttl diff --git a/tests/test_auth.py b/tests/test_auth.py index 397d51d7..a79dafd8 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -189,9 +189,20 @@ def test_auth_create_token_not_allowed_for_tokens(app_client): assert response.status == 403 +def test_auth_create_token_not_allowed_if_allow_signed_tokens_off(app_client): + app_client.ds._settings["allow_signed_tokens"] = False + try: + ds_actor = app_client.actor_cookie({"id": "test"}) + response = app_client.get("/-/create-token", cookies={"ds_actor": ds_actor}) + assert response.status == 403 + finally: + app_client.ds._settings["allow_signed_tokens"] = True + + @pytest.mark.parametrize( "scenario,should_work", ( + ("allow_signed_tokens_off", False), ("no_token", False), ("invalid_token", False), ("expired_token", False), @@ -201,7 +212,7 @@ def test_auth_create_token_not_allowed_for_tokens(app_client): ) def test_auth_with_dstok_token(app_client, scenario, should_work): token = None - if scenario == "valid_unlimited_token": + if scenario in ("valid_unlimited_token", "allow_signed_tokens_off"): token = app_client.ds.sign({"a": "test"}, "token") elif scenario == "valid_expiring_token": token = app_client.ds.sign({"a": "test", "e": int(time.time()) + 1000}, "token") @@ -211,11 +222,16 @@ def test_auth_with_dstok_token(app_client, scenario, should_work): token = "invalid" if token: token = "dstok_{}".format(token) + if scenario == "allow_signed_tokens_off": + app_client.ds._settings["allow_signed_tokens"] = False headers = {} if token: headers["Authorization"] = "Bearer {}".format(token) response = app_client.get("/-/actor.json", headers=headers) - if should_work: - assert response.json == {"actor": {"id": "test", "token": "dstok"}} - else: - assert response.json == {"actor": None} + try: + if should_work: + assert response.json == {"actor": {"id": "test", "token": "dstok"}} + else: + assert response.json == {"actor": None} + finally: + app_client.ds._settings["allow_signed_tokens"] = True From c36a74ece1e475291af326d493d8db9ff3afdd30 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 21:04:39 -0700 Subject: [PATCH 12/82] Try shutting down executor in tests to free up thread local SQLite connections, refs #1843 --- tests/fixtures.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index 13a3dffa..d1afd2f3 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -166,6 +166,7 @@ def make_app_client( # Close the connection to avoid "too many open files" errors conn.close() os.remove(filepath) + ds.executor.shutdown() @pytest.fixture(scope="session") From c556fad65d8a45ce85027678796a12ac9107d9ed Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 21:25:47 -0700 Subject: [PATCH 13/82] Try to address too many files error again, refs #1843 --- tests/fixtures.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index d1afd2f3..92a10da6 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -131,10 +131,14 @@ def make_app_client( for sql, params in TABLE_PARAMETERIZED_SQL: with conn: conn.execute(sql, params) + # Close the connection to avoid "too many open files" errors + conn.close() if extra_databases is not None: for extra_filename, extra_sql in extra_databases.items(): extra_filepath = os.path.join(tmpdir, extra_filename) - sqlite3.connect(extra_filepath).executescript(extra_sql) + c2 = sqlite3.connect(extra_filepath) + c2.executescript(extra_sql) + c2.close() # Insert at start to help test /-/databases ordering: files.insert(0, extra_filepath) os.chdir(os.path.dirname(filepath)) @@ -163,10 +167,7 @@ def make_app_client( crossdb=crossdb, ) yield TestClient(ds) - # Close the connection to avoid "too many open files" errors - conn.close() os.remove(filepath) - ds.executor.shutdown() @pytest.fixture(scope="session") From c7956eed7777c62653b4d508570c5d77cfead7d9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Oct 2022 21:26:12 -0700 Subject: [PATCH 14/82] datasette create-token command, refs #1859 --- datasette/default_permissions.py | 38 ++++++++++++++++++++++++++++ docs/authentication.rst | 23 +++++++++++++++++ docs/cli-reference.rst | 43 ++++++++++++++++++++++++++------ docs/plugins.rst | 3 ++- tests/test_api.py | 1 + tests/test_auth.py | 28 +++++++++++++++++++++ tests/test_plugins.py | 2 ++ 7 files changed, 130 insertions(+), 8 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 49ca8851..12499c16 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -1,6 +1,8 @@ from datasette import hookimpl from datasette.utils import actor_matches_allow +import click import itsdangerous +import json import time @@ -72,3 +74,39 @@ def actor_from_request(datasette, request): if expires_at < time.time(): return None return {"id": decoded["a"], "token": "dstok"} + + +@hookimpl +def register_commands(cli): + from datasette.app import Datasette + + @cli.command() + @click.argument("id") + @click.option( + "--secret", + help="Secret used for signing the API tokens", + envvar="DATASETTE_SECRET", + required=True, + ) + @click.option( + "-e", + "--expires-after", + help="Token should expire after this many seconds", + type=int, + ) + @click.option( + "--debug", + help="Show decoded token", + is_flag=True, + ) + def create_token(id, secret, expires_after, debug): + "Create a signed API token for the specified actor ID" + ds = Datasette(secret=secret) + bits = {"a": id, "token": "dstok"} + if expires_after: + bits["e"] = int(time.time()) + expires_after + token = ds.sign(bits, namespace="token") + click.echo("dstok_{}".format(token)) + if debug: + click.echo("\nDecoded:\n") + click.echo(json.dumps(ds.unsign(token, namespace="token"), indent=2)) diff --git a/docs/authentication.rst b/docs/authentication.rst index 50304ec5..0835e17c 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -352,6 +352,29 @@ This page cannot be accessed by actors with a ``"token": "some-value"`` property You can disable this feature using the :ref:`allow_signed_tokens ` setting. +.. _authentication_cli_create_token: + +datasette create-token +---------------------- + +You can also create tokens on the command line using the ``datasette create-token`` command. + +This command takes one required argument - the ID of the actor to be associated with the created token. + +You can specify an ``--expires-after`` option in seconds. If omitted, the token will never expire. + +The command will sign the token using the ``DATASETTE_SECRET`` environment variable, if available. You can also pass the secret using the ``--secret`` option. + +This means you can run the command locally to create tokens for use with a deployed Datasette instance, provided you know that instance's secret. + +To create a token for the ``root`` actor that will expire in one hour:: + + datasette create-token root --expires-after 3600 + +To create a secret that never expires using a specific secret:: + + datasette create-token root --secret my-secret-goes-here + .. _permissions_plugins: Checking permissions in plugins diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index fd5e2404..b40c6b2c 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -47,13 +47,14 @@ Running ``datasette --help`` shows a list of all of the available commands. --help Show this message and exit. Commands: - serve* Serve up specified SQLite database files with a web UI - inspect Generate JSON summary of provided database files - install Install plugins and packages from PyPI into the same... - package Package SQLite files into a Datasette Docker container - plugins List currently installed plugins - publish Publish specified SQLite database files to the internet along... - uninstall Uninstall plugins and Python packages from the Datasette... + serve* Serve up specified SQLite database files with a web UI + create-token Create a signed API token for the specified actor ID + inspect Generate JSON summary of provided database files + install Install plugins and packages from PyPI into the same... + package Package SQLite files into a Datasette Docker container + plugins List currently installed plugins + publish Publish specified SQLite database files to the internet... + uninstall Uninstall plugins and Python packages from the Datasette... .. [[[end]]] @@ -591,3 +592,31 @@ This performance optimization is used automatically by some of the ``datasette p .. [[[end]]] + + +.. _cli_help_create_token___help: + +datasette create-token +====================== + +Create a signed API token, see :ref:`authentication_cli_create_token`. + +.. [[[cog + help(["create-token", "--help"]) +.. ]]] + +:: + + Usage: datasette create-token [OPTIONS] ID + + Create a signed API token for the specified actor ID + + Options: + --secret TEXT Secret used for signing the API tokens + [required] + -e, --expires-after INTEGER Token should expire after this many seconds + --debug Show decoded token + --help Show this message and exit. + + +.. [[[end]]] diff --git a/docs/plugins.rst b/docs/plugins.rst index 9efef32f..3ae42293 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -152,7 +152,8 @@ If you run ``datasette plugins --all`` it will include default plugins that ship "version": null, "hooks": [ "actor_from_request", - "permission_allowed" + "permission_allowed", + "register_commands" ] }, { diff --git a/tests/test_api.py b/tests/test_api.py index ad74d16e..f7cbe950 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -806,6 +806,7 @@ def test_settings_json(app_client): "max_returned_rows": 100, "sql_time_limit_ms": 200, "allow_download": True, + "allow_signed_tokens": True, "allow_facet": True, "suggest_facets": True, "default_cache_ttl": 5, diff --git a/tests/test_auth.py b/tests/test_auth.py index a79dafd8..f2d82107 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -1,5 +1,7 @@ from .fixtures import app_client +from click.testing import CliRunner from datasette.utils import baseconv +from datasette.cli import cli import pytest import time @@ -235,3 +237,29 @@ def test_auth_with_dstok_token(app_client, scenario, should_work): assert response.json == {"actor": None} finally: app_client.ds._settings["allow_signed_tokens"] = True + + +@pytest.mark.parametrize("expires", (None, 1000, -1000)) +def test_cli_create_token(app_client, expires): + secret = app_client.ds._secret + runner = CliRunner(mix_stderr=False) + args = ["create-token", "--secret", secret, "test"] + if expires: + args += ["--expires-after", str(expires)] + result = runner.invoke(cli, args) + assert result.exit_code == 0 + token = result.output.strip() + assert token.startswith("dstok_") + details = app_client.ds.unsign(token[len("dstok_") :], "token") + expected_keys = {"a", "token"} + if expires: + expected_keys.add("e") + assert details.keys() == expected_keys + assert details["a"] == "test" + response = app_client.get( + "/-/actor.json", headers={"Authorization": "Bearer {}".format(token)} + ) + if expires is None or expires > 0: + assert response.json == {"actor": {"id": "test", "token": "dstok"}} + else: + assert response.json == {"actor": None} diff --git a/tests/test_plugins.py b/tests/test_plugins.py index e0a7bc76..de3fde8e 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -971,6 +971,7 @@ def test_hook_register_commands(): "plugins", "publish", "uninstall", + "create-token", } # Now install a plugin @@ -1001,6 +1002,7 @@ def test_hook_register_commands(): "uninstall", "verify", "unverify", + "create-token", } pm.unregister(name="verify") importlib.reload(cli) From 55f860c304aea813cb7ed740cc5625560a0722a0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 26 Oct 2022 14:13:31 -0700 Subject: [PATCH 15/82] Fix bug with breadcrumbs and request=None, closes #1849 --- datasette/app.py | 9 ++++++--- tests/test_internals_datasette.py | 9 +++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index c868f8d3..596ff44d 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -639,15 +639,18 @@ class Datasette: async def _crumb_items(self, request, table=None, database=None): crumbs = [] + actor = None + if request: + actor = request.actor # Top-level link if await self.permission_allowed( - actor=request.actor, action="view-instance", default=True + actor=actor, action="view-instance", default=True ): crumbs.append({"href": self.urls.instance(), "label": "home"}) # Database link if database: if await self.permission_allowed( - actor=request.actor, + actor=actor, action="view-database", resource=database, default=True, @@ -662,7 +665,7 @@ class Datasette: if table: assert database, "table= requires database=" if await self.permission_allowed( - actor=request.actor, + actor=actor, action="view-table", resource=(database, table), default=True, diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index c82cafb3..1b4732af 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -125,3 +125,12 @@ async def test_datasette_ensure_permissions_check_visibility( visible, private = await ds.check_visibility(actor, permissions=permissions) assert visible == should_allow assert private == expected_private + + +@pytest.mark.asyncio +async def test_datasette_render_template_no_request(): + # https://github.com/simonw/datasette/issues/1849 + ds = Datasette([], memory=True) + await ds.invoke_startup() + rendered = await ds.render_template("error.html") + assert "Error " in rendered From af5d5d0243631562ad83f2c318bff31a077feb5d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 26 Oct 2022 14:34:33 -0700 Subject: [PATCH 16/82] Allow leading comments on SQL queries, refs #1860 --- datasette/utils/__init__.py | 27 +++++++++++++++++++++------ tests/test_utils.py | 7 +++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 803ba96d..977a66d6 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -205,13 +205,28 @@ class InvalidSql(Exception): pass +# Allow SQL to start with a /* */ or -- comment +comment_re = ( + # Start of string, then any amount of whitespace + r"^(\s*" + + + # Comment that starts with -- and ends at a newline + r"(?:\-\-.*?\n\s*)" + + + # Comment that starts with /* and ends with */ + r"|(?:/\*[\s\S]*?\*/)" + + + # Whitespace + r")*\s*" +) + allowed_sql_res = [ - re.compile(r"^select\b"), - re.compile(r"^explain\s+select\b"), - re.compile(r"^explain\s+query\s+plan\s+select\b"), - re.compile(r"^with\b"), - re.compile(r"^explain\s+with\b"), - re.compile(r"^explain\s+query\s+plan\s+with\b"), + re.compile(comment_re + r"select\b"), + re.compile(comment_re + r"explain\s+select\b"), + re.compile(comment_re + r"explain\s+query\s+plan\s+select\b"), + re.compile(comment_re + r"with\b"), + re.compile(comment_re + r"explain\s+with\b"), + re.compile(comment_re + r"explain\s+query\s+plan\s+with\b"), ] allowed_pragmas = ( "database_list", diff --git a/tests/test_utils.py b/tests/test_utils.py index d71a612d..e89f1e6b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -141,6 +141,7 @@ def test_custom_json_encoder(obj, expected): "update blah set some_column='# Hello there\n\n* This is a list\n* of items\n--\n[And a link](https://github.com/simonw/datasette-render-markdown).'\nas demo_markdown", "PRAGMA case_sensitive_like = true", "SELECT * FROM pragma_not_on_allow_list('idx52')", + "/* This comment is not valid. select 1", ], ) def test_validate_sql_select_bad(bad_sql): @@ -166,6 +167,12 @@ def test_validate_sql_select_bad(bad_sql): "explain query plan WITH RECURSIVE cnt(x) AS (SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 10) SELECT x FROM cnt;", "SELECT * FROM pragma_index_info('idx52')", "select * from pragma_table_xinfo('table')", + # Various types of comment + "-- comment\nselect 1", + "-- one line\n -- two line\nselect 1", + " /* comment */\nselect 1", + " /* comment */select 1", + "/* comment */\n -- another\n /* one more */ select 1", ], ) def test_validate_sql_select_good(good_sql): From 382a87158337540f991c6dc887080f7b37c7c26e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 26 Oct 2022 14:13:31 -0700 Subject: [PATCH 17/82] max_signed_tokens_ttl setting, closes #1858 Also redesigned token format to include creation time and optional duration. --- datasette/app.py | 5 ++++ datasette/default_permissions.py | 33 +++++++++++++++++---- datasette/views/special.py | 20 ++++++++----- docs/settings.rst | 15 ++++++++++ tests/test_api.py | 1 + tests/test_auth.py | 50 ++++++++++++++++++++++++-------- 6 files changed, 99 insertions(+), 25 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 596ff44d..894d7f0f 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -129,6 +129,11 @@ SETTINGS = ( True, "Allow users to create and use signed API tokens", ), + Setting( + "max_signed_tokens_ttl", + 0, + "Maximum allowed expiry time for signed API tokens", + ), Setting("suggest_facets", True, "Calculate and display suggested facets"), Setting( "default_cache_ttl", diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 12499c16..c502dd70 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -56,6 +56,7 @@ def actor_from_request(datasette, request): prefix = "dstok_" if not datasette.setting("allow_signed_tokens"): return None + max_signed_tokens_ttl = datasette.setting("max_signed_tokens_ttl") authorization = request.headers.get("authorization") if not authorization: return None @@ -69,11 +70,31 @@ def actor_from_request(datasette, request): decoded = datasette.unsign(token, namespace="token") except itsdangerous.BadSignature: return None - expires_at = decoded.get("e") - if expires_at is not None: - if expires_at < time.time(): + if "t" not in decoded: + # Missing timestamp + return None + created = decoded["t"] + if not isinstance(created, int): + # Invalid timestamp + return None + duration = decoded.get("d") + if duration is not None and not isinstance(duration, int): + # Invalid duration + return None + if (duration is None and max_signed_tokens_ttl) or ( + duration is not None + and max_signed_tokens_ttl + and duration > max_signed_tokens_ttl + ): + duration = max_signed_tokens_ttl + if duration: + if time.time() - created > duration: + # Expired return None - return {"id": decoded["a"], "token": "dstok"} + actor = {"id": decoded["a"], "token": "dstok"} + if duration: + actor["token_expires"] = created + duration + return actor @hookimpl @@ -102,9 +123,9 @@ def register_commands(cli): def create_token(id, secret, expires_after, debug): "Create a signed API token for the specified actor ID" ds = Datasette(secret=secret) - bits = {"a": id, "token": "dstok"} + bits = {"a": id, "token": "dstok", "t": int(time.time())} if expires_after: - bits["e"] = int(time.time()) + expires_after + bits["d"] = expires_after token = ds.sign(bits, namespace="token") click.echo("dstok_{}".format(token)) if debug: diff --git a/datasette/views/special.py b/datasette/views/special.py index 89015958..b754a2f0 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -195,20 +195,24 @@ class CreateTokenView(BaseView): async def post(self, request): self.check_permission(request) post = await request.post_vars() - expires = None errors = [] + duration = None if post.get("expire_type"): - duration = post.get("expire_duration") - if not duration or not duration.isdigit() or not int(duration) > 0: + duration_string = post.get("expire_duration") + if ( + not duration_string + or not duration_string.isdigit() + or not int(duration_string) > 0 + ): errors.append("Invalid expire duration") else: unit = post["expire_type"] if unit == "minutes": - expires = int(duration) * 60 + duration = int(duration_string) * 60 elif unit == "hours": - expires = int(duration) * 60 * 60 + duration = int(duration_string) * 60 * 60 elif unit == "days": - expires = int(duration) * 60 * 60 * 24 + duration = int(duration_string) * 60 * 60 * 24 else: errors.append("Invalid expire duration unit") token_bits = None @@ -216,8 +220,10 @@ class CreateTokenView(BaseView): if not errors: token_bits = { "a": request.actor["id"], - "e": (int(time.time()) + expires) if expires else None, + "t": int(time.time()), } + if duration: + token_bits["d"] = duration token = "dstok_{}".format(self.ds.sign(token_bits, "token")) return await self.render( ["create_token.html"], diff --git a/docs/settings.rst b/docs/settings.rst index be640b21..a990c78c 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -182,6 +182,21 @@ This is turned on by default. Use the following to turn it off:: Turning this setting off will disable the ``/-/create-token`` page, :ref:`described here `. It will also cause any incoming ``Authorization: Bearer dstok_...`` API tokens to be ignored. +.. _setting_max_signed_tokens_ttl: + +max_signed_tokens_ttl +~~~~~~~~~~~~~~~~~~~~~ + +Maximum allowed expiry time for signed API tokens created by users. + +Defaults to ``0`` which means no limit - tokens can be created that will never expire. + +Set this to a value in seconds to limit the maximum expiry time. For example, to set that limit to 24 hours you would use:: + + datasette mydatabase.db --setting max_signed_tokens_ttl 86400 + +This setting is enforced when incoming tokens are processed. + .. _setting_default_cache_ttl: default_cache_ttl diff --git a/tests/test_api.py b/tests/test_api.py index f7cbe950..fc171421 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -807,6 +807,7 @@ def test_settings_json(app_client): "sql_time_limit_ms": 200, "allow_download": True, "allow_signed_tokens": True, + "max_signed_tokens_ttl": 0, "allow_facet": True, "suggest_facets": True, "default_cache_ttl": 5, diff --git a/tests/test_auth.py b/tests/test_auth.py index f2d82107..fa1b2e46 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -173,13 +173,19 @@ def test_auth_create_token(app_client, post_data, errors, expected_duration): # Extract token from page token = response2.text.split('value="dstok_')[1].split('"')[0] details = app_client.ds.unsign(token, "token") - assert details.keys() == {"a", "e"} + assert details.keys() == {"a", "t", "d"} or details.keys() == {"a", "t"} assert details["a"] == "test" if expected_duration is None: - assert details["e"] is None + assert "d" not in details else: - about_right = int(time.time()) + expected_duration - assert about_right - 2 < details["e"] < about_right + 2 + assert details["d"] == expected_duration + # And test that token + response3 = app_client.get( + "/-/actor.json", + headers={"Authorization": "Bearer {}".format("dstok_{}".format(token))}, + ) + assert response3.status == 200 + assert response3.json["actor"]["id"] == "test" def test_auth_create_token_not_allowed_for_tokens(app_client): @@ -206,6 +212,7 @@ def test_auth_create_token_not_allowed_if_allow_signed_tokens_off(app_client): ( ("allow_signed_tokens_off", False), ("no_token", False), + ("no_timestamp", False), ("invalid_token", False), ("expired_token", False), ("valid_unlimited_token", True), @@ -214,12 +221,15 @@ def test_auth_create_token_not_allowed_if_allow_signed_tokens_off(app_client): ) def test_auth_with_dstok_token(app_client, scenario, should_work): token = None + _time = int(time.time()) if scenario in ("valid_unlimited_token", "allow_signed_tokens_off"): - token = app_client.ds.sign({"a": "test"}, "token") + token = app_client.ds.sign({"a": "test", "t": _time}, "token") elif scenario == "valid_expiring_token": - token = app_client.ds.sign({"a": "test", "e": int(time.time()) + 1000}, "token") + token = app_client.ds.sign({"a": "test", "t": _time - 50, "d": 1000}, "token") elif scenario == "expired_token": - token = app_client.ds.sign({"a": "test", "e": int(time.time()) - 1000}, "token") + token = app_client.ds.sign({"a": "test", "t": _time - 2000, "d": 1000}, "token") + elif scenario == "no_timestamp": + token = app_client.ds.sign({"a": "test"}, "token") elif scenario == "invalid_token": token = "invalid" if token: @@ -232,7 +242,16 @@ def test_auth_with_dstok_token(app_client, scenario, should_work): response = app_client.get("/-/actor.json", headers=headers) try: if should_work: - assert response.json == {"actor": {"id": "test", "token": "dstok"}} + assert response.json.keys() == {"actor"} + actor = response.json["actor"] + expected_keys = {"id", "token"} + if scenario != "valid_unlimited_token": + expected_keys.add("token_expires") + assert actor.keys() == expected_keys + assert actor["id"] == "test" + assert actor["token"] == "dstok" + if scenario != "valid_unlimited_token": + assert isinstance(actor["token_expires"], int) else: assert response.json == {"actor": None} finally: @@ -251,15 +270,22 @@ def test_cli_create_token(app_client, expires): token = result.output.strip() assert token.startswith("dstok_") details = app_client.ds.unsign(token[len("dstok_") :], "token") - expected_keys = {"a", "token"} + expected_keys = {"a", "token", "t"} if expires: - expected_keys.add("e") + expected_keys.add("d") assert details.keys() == expected_keys assert details["a"] == "test" response = app_client.get( "/-/actor.json", headers={"Authorization": "Bearer {}".format(token)} ) if expires is None or expires > 0: - assert response.json == {"actor": {"id": "test", "token": "dstok"}} + expected_actor = { + "id": "test", + "token": "dstok", + } + if expires and expires > 0: + expected_actor["token_expires"] = details["t"] + expires + assert response.json == {"actor": expected_actor} else: - assert response.json == {"actor": None} + expected_actor = None + assert response.json == {"actor": expected_actor} From 51c436fed29205721dcf17fa31d7e7090d34ebb8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 26 Oct 2022 20:57:02 -0700 Subject: [PATCH 18/82] First draft of insert row write API, refs #1851 --- datasette/default_permissions.py | 2 +- datasette/views/table.py | 76 +++++++++++++++++++++++++++----- docs/authentication.rst | 12 +++++ docs/cli-reference.rst | 2 + docs/json_api.rst | 38 ++++++++++++++++ 5 files changed, 119 insertions(+), 11 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index c502dd70..87684e2a 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -9,7 +9,7 @@ import time @hookimpl(tryfirst=True) def permission_allowed(datasette, actor, action, resource): async def inner(): - if action in ("permissions-debug", "debug-menu"): + if action in ("permissions-debug", "debug-menu", "insert-row"): if actor and actor.get("id") == "root": return True elif action == "view-instance": diff --git a/datasette/views/table.py b/datasette/views/table.py index f73b0957..74d1c532 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -28,7 +28,7 @@ from datasette.utils import ( urlsafe_components, value_as_boolean, ) -from datasette.utils.asgi import BadRequest, Forbidden, NotFound +from datasette.utils.asgi import BadRequest, Forbidden, NotFound, Response from datasette.filters import Filters from .base import DataView, DatasetteError, ureg from .database import QueryView @@ -103,15 +103,71 @@ class TableView(DataView): canned_query = await self.ds.get_canned_query( database_name, table_name, request.actor ) - assert canned_query, "You may only POST to a canned query" - return await QueryView(self.ds).data( - request, - canned_query["sql"], - metadata=canned_query, - editable=False, - canned_query=table_name, - named_parameters=canned_query.get("params"), - write=bool(canned_query.get("write")), + if canned_query: + return await QueryView(self.ds).data( + request, + canned_query["sql"], + metadata=canned_query, + editable=False, + canned_query=table_name, + named_parameters=canned_query.get("params"), + write=bool(canned_query.get("write")), + ) + else: + # Handle POST to a table + return await self.table_post(request, database_name, table_name) + + async def table_post(self, request, database_name, table_name): + # Table must exist (may handle table creation in the future) + db = self.ds.get_database(database_name) + if not await db.table_exists(table_name): + raise NotFound("Table not found: {}".format(table_name)) + # Must have insert-row permission + if not await self.ds.permission_allowed( + request.actor, "insert-row", resource=(database_name, table_name) + ): + raise Forbidden("Permission denied") + if request.headers.get("content-type") != "application/json": + # TODO: handle form-encoded data + raise BadRequest("Must send JSON data") + data = json.loads(await request.post_body()) + if "row" not in data: + raise BadRequest('Must send "row" data') + row = data["row"] + if not isinstance(row, dict): + raise BadRequest("row must be a dictionary") + # Verify all columns exist + columns = await db.table_columns(table_name) + pks = await db.primary_keys(table_name) + for key in row: + if key not in columns: + raise BadRequest("Column not found: {}".format(key)) + if key in pks: + raise BadRequest( + "Cannot insert into primary key column: {}".format(key) + ) + # Perform the insert + sql = "INSERT INTO [{table}] ({columns}) VALUES ({values})".format( + table=escape_sqlite(table_name), + columns=", ".join(escape_sqlite(c) for c in row), + values=", ".join("?" for c in row), + ) + cursor = await db.execute_write(sql, list(row.values())) + # Return the new row + rowid = cursor.lastrowid + new_row = ( + await db.execute( + "SELECT * FROM [{table}] WHERE rowid = ?".format( + table=escape_sqlite(table_name) + ), + [rowid], + ) + ).first() + return Response.json( + { + "row": dict(new_row), + }, + status=201, ) async def columns_to_select(self, table_columns, pks, request): diff --git a/docs/authentication.rst b/docs/authentication.rst index 0835e17c..233a50d2 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -547,6 +547,18 @@ Actor is allowed to view (and execute) a :ref:`canned query ` pa Default *allow*. +.. _permissions_insert_row: + +insert-row +---------- + +Actor is allowed to insert rows into a table. + +``resource`` - tuple: (string, string) + The name of the database, then the name of the table + +Default *deny*. + .. _permissions_execute_sql: execute-sql diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index b40c6b2c..56156568 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -229,6 +229,8 @@ These can be passed to ``datasette serve`` using ``datasette serve --setting nam database files (default=True) allow_signed_tokens Allow users to create and use signed API tokens (default=True) + max_signed_tokens_ttl Maximum allowed expiry time for signed API tokens + (default=0) suggest_facets Calculate and display suggested facets (default=True) default_cache_ttl Default HTTP cache TTL (used in Cache-Control: diff --git a/docs/json_api.rst b/docs/json_api.rst index d3fdb1e4..b339a738 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -455,3 +455,41 @@ You can find this near the top of the source code of those pages, looking like t The JSON URL is also made available in a ``Link`` HTTP header for the page:: Link: https://latest.datasette.io/fixtures/sortable.json; rel="alternate"; type="application/json+datasette" + +.. _json_api_write: + +The JSON write API +------------------ + +Datasette provides a write API for JSON data. This is a POST-only API that requires an authenticated API token, see :ref:`CreateTokenView`. + +.. _json_api_write_insert_row: + +Inserting a single row +~~~~~~~~~~~~~~~~~~~~~~ + +This requires the :ref:`permissions_insert_row` permission. + +:: + + POST // + Content-Type: application/json + Authorization: Bearer dstok_ + { + "row": { + "column1": "value1", + "column2": "value2" + } + } + +If successful, this will return a ``201`` status code and the newly inserted row, for example: + +.. code-block:: json + + { + "row": { + "id": 1, + "column1": "value1", + "column2": "value2" + } + } From 918f3561208ee58c44773d30e21bace7d7c7cf3b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 06:56:11 -0700 Subject: [PATCH 19/82] Delete mirror-master-and-main.yml Closes #1865 --- .github/workflows/mirror-master-and-main.yml | 21 -------------------- 1 file changed, 21 deletions(-) delete mode 100644 .github/workflows/mirror-master-and-main.yml diff --git a/.github/workflows/mirror-master-and-main.yml b/.github/workflows/mirror-master-and-main.yml deleted file mode 100644 index 8418df40..00000000 --- a/.github/workflows/mirror-master-and-main.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Mirror "master" and "main" branches -on: - push: - branches: - - master - - main - -jobs: - mirror: - runs-on: ubuntu-latest - steps: - - name: Mirror to "master" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: master - force: false - - name: Mirror to "main" - uses: zofrex/mirror-branch@ea152f124954fa4eb26eea3fe0dbe313a3a08d94 - with: - target-branch: main - force: false From b597bb6b3e7c4b449654bbfa5b01ceff3eb3cb33 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 11:47:41 -0700 Subject: [PATCH 20/82] Better comment handling in SQL regex, refs #1860 --- datasette/utils/__init__.py | 9 +++++---- tests/test_utils.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 977a66d6..5acfb8b4 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -208,16 +208,16 @@ class InvalidSql(Exception): # Allow SQL to start with a /* */ or -- comment comment_re = ( # Start of string, then any amount of whitespace - r"^(\s*" + r"^\s*(" + # Comment that starts with -- and ends at a newline r"(?:\-\-.*?\n\s*)" + - # Comment that starts with /* and ends with */ - r"|(?:/\*[\s\S]*?\*/)" + # Comment that starts with /* and ends with */ - but does not have */ in it + r"|(?:\/\*((?!\*\/)[\s\S])*\*\/)" + # Whitespace - r")*\s*" + r"\s*)*\s*" ) allowed_sql_res = [ @@ -228,6 +228,7 @@ allowed_sql_res = [ re.compile(comment_re + r"explain\s+with\b"), re.compile(comment_re + r"explain\s+query\s+plan\s+with\b"), ] + allowed_pragmas = ( "database_list", "foreign_key_list", diff --git a/tests/test_utils.py b/tests/test_utils.py index e89f1e6b..c1589107 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -142,6 +142,7 @@ def test_custom_json_encoder(obj, expected): "PRAGMA case_sensitive_like = true", "SELECT * FROM pragma_not_on_allow_list('idx52')", "/* This comment is not valid. select 1", + "/**/\nupdate foo set bar = 1\n/* test */ select 1", ], ) def test_validate_sql_select_bad(bad_sql): From 6958e21b5c2012adf5655d2512cb4106490d10f2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 11:50:54 -0700 Subject: [PATCH 21/82] Add test for /* multi line */ comment, refs #1860 --- tests/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index c1589107..8b64f865 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -174,6 +174,7 @@ def test_validate_sql_select_bad(bad_sql): " /* comment */\nselect 1", " /* comment */select 1", "/* comment */\n -- another\n /* one more */ select 1", + "/* This comment \n has multiple lines */\nselect 1", ], ) def test_validate_sql_select_good(good_sql): From a51608090b5ee37593078f71d18b33767ef3af79 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 12:06:18 -0700 Subject: [PATCH 22/82] Slight tweak to insert row API design, refs #1851 https://github.com/simonw/datasette/issues/1851#issuecomment-1292997608 --- datasette/views/table.py | 10 +++++----- docs/json_api.rst | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 74d1c532..056b7b04 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -131,11 +131,11 @@ class TableView(DataView): # TODO: handle form-encoded data raise BadRequest("Must send JSON data") data = json.loads(await request.post_body()) - if "row" not in data: - raise BadRequest('Must send "row" data') - row = data["row"] + if "insert" not in data: + raise BadRequest('Must send a "insert" key containing a dictionary') + row = data["insert"] if not isinstance(row, dict): - raise BadRequest("row must be a dictionary") + raise BadRequest("insert must be a dictionary") # Verify all columns exist columns = await db.table_columns(table_name) pks = await db.primary_keys(table_name) @@ -165,7 +165,7 @@ class TableView(DataView): ).first() return Response.json( { - "row": dict(new_row), + "inserted_row": dict(new_row), }, status=201, ) diff --git a/docs/json_api.rst b/docs/json_api.rst index b339a738..2ed8a354 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -476,7 +476,7 @@ This requires the :ref:`permissions_insert_row` permission. Content-Type: application/json Authorization: Bearer dstok_ { - "row": { + "insert": { "column1": "value1", "column2": "value2" } @@ -487,7 +487,7 @@ If successful, this will return a ``201`` status code and the newly inserted row .. code-block:: json { - "row": { + "inserted_row": { "id": 1, "column1": "value1", "column2": "value2" From a2a5dff709c6f1676ac30b5e734c2763002562cf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 12:08:26 -0700 Subject: [PATCH 23/82] Missing tests for insert row API, refs #1851 --- tests/test_api_write.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 tests/test_api_write.py diff --git a/tests/test_api_write.py b/tests/test_api_write.py new file mode 100644 index 00000000..86c221d0 --- /dev/null +++ b/tests/test_api_write.py @@ -0,0 +1,38 @@ +from datasette.app import Datasette +from datasette.utils import sqlite3 +import pytest +import time + + +@pytest.fixture +def ds_write(tmp_path_factory): + db_directory = tmp_path_factory.mktemp("dbs") + db_path = str(db_directory / "data.db") + db = sqlite3.connect(str(db_path)) + db.execute("vacuum") + db.execute("create table docs (id integer primary key, title text, score float)") + ds = Datasette([db_path]) + yield ds + db.close() + + +@pytest.mark.asyncio +async def test_write_row(ds_write): + token = "dstok_{}".format( + ds_write.sign( + {"a": "root", "token": "dstok", "t": int(time.time())}, namespace="token" + ) + ) + response = await ds_write.client.post( + "/data/docs", + json={"insert": {"title": "Test", "score": 1.0}}, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + expected_row = {"id": 1, "title": "Test", "score": 1.0} + assert response.status_code == 201 + assert response.json()["inserted_row"] == expected_row + rows = (await ds_write.get_database("data").execute("select * from docs")).rows + assert dict(rows[0]) == expected_row From 6e788b49edf4f842c0817f006eb9d865778eea5e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 13:17:18 -0700 Subject: [PATCH 24/82] New URL design /db/table/-/insert, refs #1851 --- datasette/app.py | 6 +++- datasette/views/table.py | 69 +++++++++++++++++++++++++++++++++++++++- docs/json_api.rst | 18 ++++++----- tests/test_api_write.py | 6 ++-- 4 files changed, 86 insertions(+), 13 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 894d7f0f..8bc5fe36 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -39,7 +39,7 @@ from .views.special import ( PermissionsDebugView, MessagesDebugView, ) -from .views.table import TableView +from .views.table import TableView, TableInsertView from .views.row import RowView from .renderer import json_renderer from .url_builder import Urls @@ -1262,6 +1262,10 @@ class Datasette: RowView.as_view(self), r"/(?P[^\/\.]+)/(?P
[^/]+?)/(?P[^/]+?)(\.(?P\w+))?$", ) + add_route( + TableInsertView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/insert$", + ) return [ # Compile any strings to regular expressions ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) diff --git a/datasette/views/table.py b/datasette/views/table.py index 056b7b04..be3d4f93 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -30,7 +30,7 @@ from datasette.utils import ( ) from datasette.utils.asgi import BadRequest, Forbidden, NotFound, Response from datasette.filters import Filters -from .base import DataView, DatasetteError, ureg +from .base import BaseView, DataView, DatasetteError, ureg from .database import QueryView LINK_WITH_LABEL = ( @@ -1077,3 +1077,70 @@ async def display_columns_and_rows( } columns = [first_column] + columns return columns, cell_rows + + +class TableInsertView(BaseView): + name = "table-insert" + + def __init__(self, datasette): + self.ds = datasette + + async def post(self, request): + database_route = tilde_decode(request.url_vars["database"]) + try: + db = self.ds.get_database(route=database_route) + except KeyError: + raise NotFound("Database not found: {}".format(database_route)) + database_name = db.name + table_name = tilde_decode(request.url_vars["table"]) + # Table must exist (may handle table creation in the future) + db = self.ds.get_database(database_name) + if not await db.table_exists(table_name): + raise NotFound("Table not found: {}".format(table_name)) + # Must have insert-row permission + if not await self.ds.permission_allowed( + request.actor, "insert-row", resource=(database_name, table_name) + ): + raise Forbidden("Permission denied") + if request.headers.get("content-type") != "application/json": + # TODO: handle form-encoded data + raise BadRequest("Must send JSON data") + data = json.loads(await request.post_body()) + if "row" not in data: + raise BadRequest('Must send a "row" key containing a dictionary') + row = data["row"] + if not isinstance(row, dict): + raise BadRequest("row must be a dictionary") + # Verify all columns exist + columns = await db.table_columns(table_name) + pks = await db.primary_keys(table_name) + for key in row: + if key not in columns: + raise BadRequest("Column not found: {}".format(key)) + if key in pks: + raise BadRequest( + "Cannot insert into primary key column: {}".format(key) + ) + # Perform the insert + sql = "INSERT INTO [{table}] ({columns}) VALUES ({values})".format( + table=escape_sqlite(table_name), + columns=", ".join(escape_sqlite(c) for c in row), + values=", ".join("?" for c in row), + ) + cursor = await db.execute_write(sql, list(row.values())) + # Return the new row + rowid = cursor.lastrowid + new_row = ( + await db.execute( + "SELECT * FROM [{table}] WHERE rowid = ?".format( + table=escape_sqlite(table_name) + ), + [rowid], + ) + ).first() + return Response.json( + { + "inserted": [dict(new_row)], + }, + status=201, + ) diff --git a/docs/json_api.rst b/docs/json_api.rst index 2ed8a354..4a7961f2 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -463,7 +463,7 @@ The JSON write API Datasette provides a write API for JSON data. This is a POST-only API that requires an authenticated API token, see :ref:`CreateTokenView`. -.. _json_api_write_insert_row: +.. _TableInsertView: Inserting a single row ~~~~~~~~~~~~~~~~~~~~~~ @@ -472,11 +472,11 @@ This requires the :ref:`permissions_insert_row` permission. :: - POST //
+ POST //
/-/insert Content-Type: application/json Authorization: Bearer dstok_ { - "insert": { + "row": { "column1": "value1", "column2": "value2" } @@ -487,9 +487,11 @@ If successful, this will return a ``201`` status code and the newly inserted row .. code-block:: json { - "inserted_row": { - "id": 1, - "column1": "value1", - "column2": "value2" - } + "inserted": [ + { + "id": 1, + "column1": "value1", + "column2": "value2" + } + ] } diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 86c221d0..e8222e43 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -24,8 +24,8 @@ async def test_write_row(ds_write): ) ) response = await ds_write.client.post( - "/data/docs", - json={"insert": {"title": "Test", "score": 1.0}}, + "/data/docs/-/insert", + json={"row": {"title": "Test", "score": 1.0}}, headers={ "Authorization": "Bearer {}".format(token), "Content-Type": "application/json", @@ -33,6 +33,6 @@ async def test_write_row(ds_write): ) expected_row = {"id": 1, "title": "Test", "score": 1.0} assert response.status_code == 201 - assert response.json()["inserted_row"] == expected_row + assert response.json()["inserted"] == [expected_row] rows = (await ds_write.get_database("data").execute("select * from docs")).rows assert dict(rows[0]) == expected_row From 2ea60e12d90b7cec03ebab728854d3ec4d553f54 Mon Sep 17 00:00:00 2001 From: Forest Gregg Date: Thu, 27 Oct 2022 16:51:20 -0400 Subject: [PATCH 25/82] Make hash and size a lazy property (#1837) * use inspect data for hash and file size * make hash and cached_size lazy properties * move hash property near size --- datasette/database.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index d75bd70c..af1df0a8 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -39,7 +39,7 @@ class Database: self.memory_name = memory_name if memory_name is not None: self.is_memory = True - self.hash = None + self.cached_hash = None self.cached_size = None self._cached_table_counts = None self._write_thread = None @@ -47,14 +47,6 @@ class Database: # These are used when in non-threaded mode: self._read_connection = None self._write_connection = None - if not self.is_mutable and not self.is_memory: - if self.ds.inspect_data and self.ds.inspect_data.get(self.name): - self.hash = self.ds.inspect_data[self.name]["hash"] - self.cached_size = self.ds.inspect_data[self.name]["size"] - else: - p = Path(path) - self.hash = inspect_hash(p) - self.cached_size = p.stat().st_size @property def cached_table_counts(self): @@ -266,14 +258,34 @@ class Database: results = await self.execute_fn(sql_operation_in_thread) return results + @property + def hash(self): + if self.cached_hash is not None: + return self.cached_hash + elif self.is_mutable or self.is_memory: + return None + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_hash = self.ds.inspect_data[self.name]["hash"] + return self.cached_hash + else: + p = Path(self.path) + self.cached_hash = inspect_hash(p) + return self.cached_hash + @property def size(self): - if self.is_memory: - return 0 if self.cached_size is not None: return self.cached_size - else: + elif self.is_memory: + return 0 + elif self.is_mutable: return Path(self.path).stat().st_size + elif self.ds.inspect_data and self.ds.inspect_data.get(self.name): + self.cached_size = self.ds.inspect_data[self.name]["size"] + return self.cached_size + else: + self.cached_size = Path(self.path).stat().st_size + return self.cached_size async def table_counts(self, limit=10): if not self.is_mutable and self.cached_table_counts is not None: From 641bc4453b5ef1dff0b2fc7dfad0b692be7aa61c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 27 Oct 2022 13:51:45 -0700 Subject: [PATCH 26/82] Bump black from 22.8.0 to 22.10.0 (#1839) Bumps [black](https://github.com/psf/black) from 22.8.0 to 22.10.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/22.8.0...22.10.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fe258adb..625557ae 100644 --- a/setup.py +++ b/setup.py @@ -76,7 +76,7 @@ setup( "pytest-xdist>=2.2.1", "pytest-asyncio>=0.17", "beautifulsoup4>=4.8.1", - "black==22.8.0", + "black==22.10.0", "blacken-docs==1.12.1", "pytest-timeout>=1.4.2", "trustme>=0.7", From 26af9b9c4a6c62ee15870caa1c7bc455165d3b11 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 13:58:00 -0700 Subject: [PATCH 27/82] Release notes for 0.63, refs #1869 --- docs/changelog.rst | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 2255dcce..01957e4f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,36 +4,42 @@ Changelog ========= -.. _v0_63a1: +.. _v0_63: -0.63a1 (2022-10-23) -------------------- +0.63 (2022-10-27) +----------------- +Features +~~~~~~~~ + +- Now tested against Python 3.11. Docker containers used by ``datasette publish`` and ``datasette package`` both now use that version of Python. (:issue:`1853`) +- ``--load-extension`` option now supports entrypoints. Thanks, Alex Garcia. (`#1789 `__) +- Facet size can now be set per-table with the new ``facet_size`` table metadata option. (:issue:`1804`) +- The :ref:`setting_truncate_cells_html` setting now also affects long URLs in columns. (:issue:`1805`) +- The non-JavaScript SQL editor textarea now increases height to fit the SQL query. (:issue:`1786`) +- Facets are now displayed with better line-breaks in long values. Thanks, Daniel Rech. (`#1794 `__) +- The ``settings.json`` file used in :ref:`config_dir` is now validated on startup. (:issue:`1816`) +- SQL queries can now include leading SQL comments, using ``/* ... */`` or ``-- ...`` syntax. Thanks, Charles Nepote. (:issue:`1860`) - SQL query is now re-displayed when terminated with a time limit error. (:issue:`1819`) -- New documentation on :ref:`deploying_openrc` - thanks, Adam Simpson. (`#1825 `__) - The :ref:`inspect data ` mechanism is now used to speed up server startup - thanks, Forest Gregg. (:issue:`1834`) - In :ref:`config_dir` databases with filenames ending in ``.sqlite`` or ``.sqlite3`` are now automatically added to the Datasette instance. (:issue:`1646`) - Breadcrumb navigation display now respects the current user's permissions. (:issue:`1831`) -- Screenshots in the documentation are now maintained using `shot-scraper `__, as described in `Automating screenshots for the Datasette documentation using shot-scraper `__. (:issue:`1844`) -- The :ref:`datasette.check_visibility() ` method now accepts an optional ``permissions=`` list, allowing it to take multiple permissions into account at once when deciding if something should be shown as public or private. This has been used to correctly display padlock icons in more places in the Datasette interface. (:issue:`1829`) - -.. _v0_63a0: - -0.63a0 (2022-09-26) -------------------- +Plugin hooks and internals +~~~~~~~~~~~~~~~~~~~~~~~~~~ - The :ref:`plugin_hook_prepare_jinja2_environment` plugin hook now accepts an optional ``datasette`` argument. Hook implementations can also now return an ``async`` function which will be awaited automatically. (:issue:`1809`) -- ``--load-extension`` option now supports entrypoints. Thanks, Alex Garcia. (`#1789 `__) -- New tutorial: `Cleaning data with sqlite-utils and Datasette `__. -- Facet size can now be set per-table with the new ``facet_size`` table metadata option. (:issue:`1804`) -- ``truncate_cells_html`` setting now also affects long URLs in columns. (:issue:`1805`) - ``Database(is_mutable=)`` now defaults to ``True``. (:issue:`1808`) -- Non-JavaScript textarea now increases height to fit the SQL query. (:issue:`1786`) -- More detailed command descriptions on the :ref:`CLI reference ` page. (:issue:`1787`) +- The :ref:`datasette.check_visibility() ` method now accepts an optional ``permissions=`` list, allowing it to take multiple permissions into account at once when deciding if something should be shown as public or private. This has been used to correctly display padlock icons in more places in the Datasette interface. (:issue:`1829`) - Datasette no longer enforces upper bounds on its dependencies. (:issue:`1800`) -- Facets are now displayed with better line-breaks in long values. Thanks, Daniel Rech. (`#1794 `__) -- The ``settings.json`` file used in :ref:`config_dir` is now validated on startup. (:issue:`1816`) + +Documentation +~~~~~~~~~~~~~ + +- New tutorial: `Cleaning data with sqlite-utils and Datasette `__. +- Screenshots in the documentation are now maintained using `shot-scraper `__, as described in `Automating screenshots for the Datasette documentation using shot-scraper `__. (:issue:`1844`) +- More detailed command descriptions on the :ref:`CLI reference ` page. (:issue:`1787`) +- New documentation on :ref:`deploying_openrc` - thanks, Adam Simpson. (`#1825 `__) .. _v0_62: From 61171f01549549e5fb25c72b13280d941d96dbf1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 15:11:26 -0700 Subject: [PATCH 28/82] Release 0.63 Refs #1646, #1786, #1787, #1789, #1794, #1800, #1804, #1805, #1808, #1809, #1816, #1819, #1825, #1829, #1831, #1834, #1844, #1853, #1860 Closes #1869 --- datasette/version.py | 2 +- docs/changelog.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index eb36da45..ac012640 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "0.63a1" +__version__ = "0.63" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 01957e4f..f573afb3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,8 @@ Changelog 0.63 (2022-10-27) ----------------- +See `Datasette 0.63: The annotated release notes `__ for more background on the changes in this release. + Features ~~~~~~~~ From c9b5f5d598e7f85cd3e1ce020351a27da334408b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Oct 2022 17:58:36 -0700 Subject: [PATCH 29/82] Depend on sqlite-utils>=3.30 Decided to use the most recent version in case I decide later to use the flatten() utility function. Refs #1850 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 625557ae..99e2a4ad 100644 --- a/setup.py +++ b/setup.py @@ -57,6 +57,7 @@ setup( "PyYAML>=5.3", "mergedeep>=1.1.1", "itsdangerous>=1.1", + "sqlite-utils>=3.30", ], entry_points=""" [console_scripts] From c35859ae3df163406f1a1895ccf9803e933b2d8e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 29 Oct 2022 23:03:45 -0700 Subject: [PATCH 30/82] API for bulk inserts, closes #1866 --- datasette/app.py | 5 ++ datasette/views/table.py | 136 +++++++++++++++++++++---------- docs/cli-reference.rst | 2 + docs/json_api.rst | 48 ++++++++++- docs/settings.rst | 11 +++ tests/test_api.py | 1 + tests/test_api_write.py | 168 +++++++++++++++++++++++++++++++++++++-- 7 files changed, 320 insertions(+), 51 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 8bc5fe36..f80d3792 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -99,6 +99,11 @@ SETTINGS = ( 1000, "Maximum rows that can be returned from a table or custom query", ), + Setting( + "max_insert_rows", + 100, + "Maximum rows that can be inserted at a time using the bulk insert API", + ), Setting( "num_sql_threads", 3, diff --git a/datasette/views/table.py b/datasette/views/table.py index be3d4f93..fd203036 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -30,6 +30,7 @@ from datasette.utils import ( ) from datasette.utils.asgi import BadRequest, Forbidden, NotFound, Response from datasette.filters import Filters +import sqlite_utils from .base import BaseView, DataView, DatasetteError, ureg from .database import QueryView @@ -1085,62 +1086,109 @@ class TableInsertView(BaseView): def __init__(self, datasette): self.ds = datasette + async def _validate_data(self, request, db, table_name): + errors = [] + + def _errors(errors): + return None, errors, {} + + if request.headers.get("content-type") != "application/json": + # TODO: handle form-encoded data + return _errors(["Invalid content-type, must be application/json"]) + body = await request.post_body() + try: + data = json.loads(body) + except json.JSONDecodeError as e: + return _errors(["Invalid JSON: {}".format(e)]) + if not isinstance(data, dict): + return _errors(["JSON must be a dictionary"]) + keys = data.keys() + # keys must contain "row" or "rows" + if "row" not in keys and "rows" not in keys: + return _errors(['JSON must have one or other of "row" or "rows"']) + rows = [] + if "row" in keys: + if "rows" in keys: + return _errors(['Cannot use "row" and "rows" at the same time']) + row = data["row"] + if not isinstance(row, dict): + return _errors(['"row" must be a dictionary']) + rows = [row] + data["return_rows"] = True + else: + rows = data["rows"] + if not isinstance(rows, list): + return _errors(['"rows" must be a list']) + for row in rows: + if not isinstance(row, dict): + return _errors(['"rows" must be a list of dictionaries']) + # Does this exceed max_insert_rows? + max_insert_rows = self.ds.setting("max_insert_rows") + if len(rows) > max_insert_rows: + return _errors( + ["Too many rows, maximum allowed is {}".format(max_insert_rows)] + ) + # Validate columns of each row + columns = await db.table_columns(table_name) + # TODO: There are cases where pks are OK, if not using auto-incrementing pk + pks = await db.primary_keys(table_name) + allowed_columns = set(columns) - set(pks) + for i, row in enumerate(rows): + invalid_columns = set(row.keys()) - allowed_columns + if invalid_columns: + errors.append( + "Row {} has invalid columns: {}".format( + i, ", ".join(sorted(invalid_columns)) + ) + ) + if errors: + return _errors(errors) + extra = {key: data[key] for key in data if key not in ("rows", "row")} + return rows, errors, extra + async def post(self, request): + def _error(messages, status=400): + return Response.json({"ok": False, "errors": messages}, status=status) + database_route = tilde_decode(request.url_vars["database"]) try: db = self.ds.get_database(route=database_route) except KeyError: - raise NotFound("Database not found: {}".format(database_route)) + return _error(["Database not found: {}".format(database_route)], 404) database_name = db.name table_name = tilde_decode(request.url_vars["table"]) + # Table must exist (may handle table creation in the future) db = self.ds.get_database(database_name) if not await db.table_exists(table_name): - raise NotFound("Table not found: {}".format(table_name)) + return _error(["Table not found: {}".format(table_name)], 404) # Must have insert-row permission if not await self.ds.permission_allowed( request.actor, "insert-row", resource=(database_name, table_name) ): - raise Forbidden("Permission denied") - if request.headers.get("content-type") != "application/json": - # TODO: handle form-encoded data - raise BadRequest("Must send JSON data") - data = json.loads(await request.post_body()) - if "row" not in data: - raise BadRequest('Must send a "row" key containing a dictionary') - row = data["row"] - if not isinstance(row, dict): - raise BadRequest("row must be a dictionary") - # Verify all columns exist - columns = await db.table_columns(table_name) - pks = await db.primary_keys(table_name) - for key in row: - if key not in columns: - raise BadRequest("Column not found: {}".format(key)) - if key in pks: - raise BadRequest( - "Cannot insert into primary key column: {}".format(key) + return _error(["Permission denied"], 403) + rows, errors, extra = await self._validate_data(request, db, table_name) + if errors: + return _error(errors, 400) + + should_return = bool(extra.get("return_rows", False)) + # Insert rows + def insert_rows(conn): + table = sqlite_utils.Database(conn)[table_name] + if should_return: + rowids = [] + for row in rows: + rowids.append(table.insert(row).last_rowid) + return list( + table.rows_where( + "rowid in ({})".format(",".join("?" for _ in rowids)), rowids + ) ) - # Perform the insert - sql = "INSERT INTO [{table}] ({columns}) VALUES ({values})".format( - table=escape_sqlite(table_name), - columns=", ".join(escape_sqlite(c) for c in row), - values=", ".join("?" for c in row), - ) - cursor = await db.execute_write(sql, list(row.values())) - # Return the new row - rowid = cursor.lastrowid - new_row = ( - await db.execute( - "SELECT * FROM [{table}] WHERE rowid = ?".format( - table=escape_sqlite(table_name) - ), - [rowid], - ) - ).first() - return Response.json( - { - "inserted": [dict(new_row)], - }, - status=201, - ) + else: + table.insert_all(rows) + + rows = await db.execute_write_fn(insert_rows) + result = {"ok": True} + if should_return: + result["inserted"] = rows + return Response.json(result, status=201) diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 56156568..649a3dcd 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -213,6 +213,8 @@ These can be passed to ``datasette serve`` using ``datasette serve --setting nam (default=100) max_returned_rows Maximum rows that can be returned from a table or custom query (default=1000) + max_insert_rows Maximum rows that can be inserted at a time using + the bulk insert API (default=1000) num_sql_threads Number of threads in the thread pool for executing SQLite queries (default=3) sql_time_limit_ms Time limit for a SQL query in milliseconds diff --git a/docs/json_api.rst b/docs/json_api.rst index 4a7961f2..01558c23 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -465,11 +465,13 @@ Datasette provides a write API for JSON data. This is a POST-only API that requi .. _TableInsertView: -Inserting a single row -~~~~~~~~~~~~~~~~~~~~~~ +Inserting rows +~~~~~~~~~~~~~~ This requires the :ref:`permissions_insert_row` permission. +A single row can be inserted using the ``"row"`` key: + :: POST //
/-/insert @@ -495,3 +497,45 @@ If successful, this will return a ``201`` status code and the newly inserted row } ] } + +To insert multiple rows at a time, use the same API method but send a list of dictionaries as the ``"rows"`` key: + +:: + + POST //
/-/insert + Content-Type: application/json + Authorization: Bearer dstok_ + { + "rows": [ + { + "column1": "value1", + "column2": "value2" + }, + { + "column1": "value3", + "column2": "value4" + } + ] + } + +If successful, this will return a ``201`` status code and an empty ``{}`` response body. + +To return the newly inserted rows, add the ``"return_rows": true`` key to the request body: + +.. code-block:: json + + { + "rows": [ + { + "column1": "value1", + "column2": "value2" + }, + { + "column1": "value3", + "column2": "value4" + } + ], + "return_rows": true + } + +This will return the same ``"inserted"`` key as the single row example above. There is a small performance penalty for using this option. diff --git a/docs/settings.rst b/docs/settings.rst index a990c78c..b86b18bd 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -96,6 +96,17 @@ You can increase or decrease this limit like so:: datasette mydatabase.db --setting max_returned_rows 2000 +.. _setting_max_insert_rows: + +max_insert_rows +~~~~~~~~~~~~~~~ + +Maximum rows that can be inserted at a time using the bulk insert API, see :ref:`TableInsertView`. Defaults to 100. + +You can increase or decrease this limit like so:: + + datasette mydatabase.db --setting max_insert_rows 1000 + .. _setting_num_sql_threads: num_sql_threads diff --git a/tests/test_api.py b/tests/test_api.py index fc171421..ebd675b9 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -804,6 +804,7 @@ def test_settings_json(app_client): "facet_suggest_time_limit_ms": 50, "facet_time_limit_ms": 200, "max_returned_rows": 100, + "max_insert_rows": 100, "sql_time_limit_ms": 200, "allow_download": True, "allow_signed_tokens": True, diff --git a/tests/test_api_write.py b/tests/test_api_write.py index e8222e43..4a5a58aa 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -18,11 +18,7 @@ def ds_write(tmp_path_factory): @pytest.mark.asyncio async def test_write_row(ds_write): - token = "dstok_{}".format( - ds_write.sign( - {"a": "root", "token": "dstok", "t": int(time.time())}, namespace="token" - ) - ) + token = write_token(ds_write) response = await ds_write.client.post( "/data/docs/-/insert", json={"row": {"title": "Test", "score": 1.0}}, @@ -36,3 +32,165 @@ async def test_write_row(ds_write): assert response.json()["inserted"] == [expected_row] rows = (await ds_write.get_database("data").execute("select * from docs")).rows assert dict(rows[0]) == expected_row + + +@pytest.mark.asyncio +@pytest.mark.parametrize("return_rows", (True, False)) +async def test_write_rows(ds_write, return_rows): + token = write_token(ds_write) + data = {"rows": [{"title": "Test {}".format(i), "score": 1.0} for i in range(20)]} + if return_rows: + data["return_rows"] = True + response = await ds_write.client.post( + "/data/docs/-/insert", + json=data, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + assert response.status_code == 201 + actual_rows = [ + dict(r) + for r in ( + await ds_write.get_database("data").execute("select * from docs") + ).rows + ] + assert len(actual_rows) == 20 + assert actual_rows == [ + {"id": i + 1, "title": "Test {}".format(i), "score": 1.0} for i in range(20) + ] + assert response.json()["ok"] is True + if return_rows: + assert response.json()["inserted"] == actual_rows + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "path,input,special_case,expected_status,expected_errors", + ( + ( + "/data2/docs/-/insert", + {}, + None, + 404, + ["Database not found: data2"], + ), + ( + "/data/docs2/-/insert", + {}, + None, + 404, + ["Table not found: docs2"], + ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"} for i in range(10)]}, + "bad_token", + 403, + ["Permission denied"], + ), + ( + "/data/docs/-/insert", + {}, + "invalid_json", + 400, + [ + "Invalid JSON: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)" + ], + ), + ( + "/data/docs/-/insert", + {}, + "invalid_content_type", + 400, + ["Invalid content-type, must be application/json"], + ), + ( + "/data/docs/-/insert", + [], + None, + 400, + ["JSON must be a dictionary"], + ), + ( + "/data/docs/-/insert", + {"row": "blah"}, + None, + 400, + ['"row" must be a dictionary'], + ), + ( + "/data/docs/-/insert", + {"blah": "blah"}, + None, + 400, + ['JSON must have one or other of "row" or "rows"'], + ), + ( + "/data/docs/-/insert", + {"rows": "blah"}, + None, + 400, + ['"rows" must be a list'], + ), + ( + "/data/docs/-/insert", + {"rows": ["blah"]}, + None, + 400, + ['"rows" must be a list of dictionaries'], + ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"} for i in range(101)]}, + None, + 400, + ["Too many rows, maximum allowed is 100"], + ), + # Validate columns of each row + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test", "bad": 1, "worse": 2} for i in range(2)]}, + None, + 400, + [ + "Row 0 has invalid columns: bad, worse", + "Row 1 has invalid columns: bad, worse", + ], + ), + ), +) +async def test_write_row_errors( + ds_write, path, input, special_case, expected_status, expected_errors +): + token = write_token(ds_write) + if special_case == "bad_token": + token += "bad" + kwargs = dict( + json=input, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "text/plain" + if special_case == "invalid_content_type" + else "application/json", + }, + ) + if special_case == "invalid_json": + del kwargs["json"] + kwargs["content"] = "{bad json" + response = await ds_write.client.post( + path, + **kwargs, + ) + assert response.status_code == expected_status + assert response.json()["ok"] is False + assert response.json()["errors"] == expected_errors + + +def write_token(ds): + return "dstok_{}".format( + ds.sign( + {"a": "root", "token": "dstok", "t": int(time.time())}, namespace="token" + ) + ) From f6bf2d8045cc239fe34357342bff1440561c8909 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 29 Oct 2022 23:20:11 -0700 Subject: [PATCH 31/82] Initial prototype of API explorer at /-/api, refs #1871 --- datasette/app.py | 5 ++ datasette/templates/api_explorer.html | 73 +++++++++++++++++++++++++++ datasette/views/special.py | 8 +++ tests/test_docs.py | 2 +- 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 datasette/templates/api_explorer.html diff --git a/datasette/app.py b/datasette/app.py index f80d3792..c3d802a4 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -33,6 +33,7 @@ from .views.special import ( JsonDataView, PatternPortfolioView, AuthTokenView, + ApiExplorerView, CreateTokenView, LogoutView, AllowDebugView, @@ -1235,6 +1236,10 @@ class Datasette: CreateTokenView.as_view(self), r"/-/create-token$", ) + add_route( + ApiExplorerView.as_view(self), + r"/-/api$", + ) add_route( LogoutView.as_view(self), r"/-/logout$", diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html new file mode 100644 index 00000000..034bee60 --- /dev/null +++ b/datasette/templates/api_explorer.html @@ -0,0 +1,73 @@ +{% extends "base.html" %} + +{% block title %}API Explorer{% endblock %} + +{% block content %} + +

API Explorer

+ +

Use this tool to try out the Datasette write API.

+ +{% if errors %} + {% for error in errors %} +

{{ error }}

+ {% endfor %} +{% endif %} + + +
+ + +
+
+ + +
+
+ +
+

+ + + + +{% endblock %} diff --git a/datasette/views/special.py b/datasette/views/special.py index b754a2f0..9922a621 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -235,3 +235,11 @@ class CreateTokenView(BaseView): "token_bits": token_bits, }, ) + + +class ApiExplorerView(BaseView): + name = "api_explorer" + has_json_alternate = False + + async def get(self, request): + return await self.render(["api_explorer.html"], request) diff --git a/tests/test_docs.py b/tests/test_docs.py index cd5a6c13..e9b813fe 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -62,7 +62,7 @@ def documented_views(): if first_word.endswith("View"): view_labels.add(first_word) # We deliberately don't document these: - view_labels.update(("PatternPortfolioView", "AuthTokenView")) + view_labels.update(("PatternPortfolioView", "AuthTokenView", "ApiExplorerView")) return view_labels From 9eb9ffae3ddd4e8ff0b713bf6fd6a0afed3368d7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 30 Oct 2022 13:09:55 -0700 Subject: [PATCH 32/82] Drop API token requirement from API explorer, refs #1871 --- datasette/default_permissions.py | 9 +++++++++ datasette/templates/api_explorer.html | 13 ++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 87684e2a..151ba2b5 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -131,3 +131,12 @@ def register_commands(cli): if debug: click.echo("\nDecoded:\n") click.echo(json.dumps(ds.unsign(token, namespace="token"), indent=2)) + + +@hookimpl +def skip_csrf(scope): + # Skip CSRF check for requests with content-type: application/json + if scope["type"] == "http": + headers = scope.get("headers") or {} + if dict(headers).get(b"content-type") == b"application/json": + return True diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html index 034bee60..01b182d8 100644 --- a/datasette/templates/api_explorer.html +++ b/datasette/templates/api_explorer.html @@ -15,16 +15,13 @@ {% endif %}
-
- - -
- +
-
- +
+ +

@@ -46,7 +43,6 @@ form.addEventListener("submit", (ev) => { var formData = new FormData(form); var json = formData.get('json'); var path = formData.get('path'); - var token = formData.get('token'); // Validate JSON try { var data = JSON.parse(json); @@ -60,7 +56,6 @@ form.addEventListener("submit", (ev) => { body: json, headers: { 'Content-Type': 'application/json', - 'Authorization': `Bearer ${token}` } }).then(r => r.json()).then(r => { alert(JSON.stringify(r, null, 2)); From fedbfcc36873366143195d8fe124e1859bf88346 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 30 Oct 2022 14:49:07 -0700 Subject: [PATCH 33/82] Neater display of output and errors in API explorer, refs #1871 --- datasette/templates/api_explorer.html | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html index 01b182d8..38fdb7bc 100644 --- a/datasette/templates/api_explorer.html +++ b/datasette/templates/api_explorer.html @@ -26,6 +26,12 @@

+ + """.format( escape(ex.sql) ) diff --git a/tests/test_api.py b/tests/test_api.py index ebd675b9..de0223e2 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -662,7 +662,11 @@ def test_sql_time_limit(app_client_shorter_time_limit): "

SQL query took too long. The time limit is controlled by the\n" 'sql_time_limit_ms\n' "configuration option.

\n" - "
select sleep(0.5)
" + '\n' + "" ), "status": 400, "title": "SQL Interrupted", diff --git a/tests/test_html.py b/tests/test_html.py index 4b394199..7cfe9d90 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -172,7 +172,7 @@ def test_sql_time_limit(app_client_shorter_time_limit): """ sql_time_limit_ms """.strip(), - "
select sleep(0.5)
", + '', ] for expected_html_fragment in expected_html_fragments: assert expected_html_fragment in response.text From 9bec7c38eb93cde5afb16df9bdd96aea2a5b0459 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 1 Nov 2022 11:07:59 -0700 Subject: [PATCH 38/82] ignore and replace options for bulk inserts, refs #1873 Also removed the rule that you cannot include primary keys in the rows you insert. And added validation that catches invalid parameters in the incoming JSON. And renamed "inserted" to "rows" in the returned JSON for return_rows: true --- datasette/views/table.py | 41 ++++++++++++++------ docs/json_api.rst | 4 +- tests/test_api_write.py | 83 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 111 insertions(+), 17 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 1e3d566e..7692a4e3 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1107,6 +1107,7 @@ class TableInsertView(BaseView): if not isinstance(data, dict): return _errors(["JSON must be a dictionary"]) keys = data.keys() + # keys must contain "row" or "rows" if "row" not in keys and "rows" not in keys: return _errors(['JSON must have one or other of "row" or "rows"']) @@ -1126,19 +1127,31 @@ class TableInsertView(BaseView): for row in rows: if not isinstance(row, dict): return _errors(['"rows" must be a list of dictionaries']) + # Does this exceed max_insert_rows? max_insert_rows = self.ds.setting("max_insert_rows") if len(rows) > max_insert_rows: return _errors( ["Too many rows, maximum allowed is {}".format(max_insert_rows)] ) + + # Validate other parameters + extras = { + key: value for key, value in data.items() if key not in ("row", "rows") + } + valid_extras = {"return_rows", "ignore", "replace"} + invalid_extras = extras.keys() - valid_extras + if invalid_extras: + return _errors( + ['Invalid parameter: "{}"'.format('", "'.join(sorted(invalid_extras)))] + ) + if extras.get("ignore") and extras.get("replace"): + return _errors(['Cannot use "ignore" and "replace" at the same time']) + # Validate columns of each row - columns = await db.table_columns(table_name) - # TODO: There are cases where pks are OK, if not using auto-incrementing pk - pks = await db.primary_keys(table_name) - allowed_columns = set(columns) - set(pks) + columns = set(await db.table_columns(table_name)) for i, row in enumerate(rows): - invalid_columns = set(row.keys()) - allowed_columns + invalid_columns = set(row.keys()) - columns if invalid_columns: errors.append( "Row {} has invalid columns: {}".format( @@ -1147,8 +1160,7 @@ class TableInsertView(BaseView): ) if errors: return _errors(errors) - extra = {key: data[key] for key in data if key not in ("rows", "row")} - return rows, errors, extra + return rows, errors, extras async def post(self, request): database_route = tilde_decode(request.url_vars["database"]) @@ -1168,18 +1180,23 @@ class TableInsertView(BaseView): request.actor, "insert-row", resource=(database_name, table_name) ): return _error(["Permission denied"], 403) - rows, errors, extra = await self._validate_data(request, db, table_name) + rows, errors, extras = await self._validate_data(request, db, table_name) if errors: return _error(errors, 400) - should_return = bool(extra.get("return_rows", False)) + ignore = extras.get("ignore") + replace = extras.get("replace") + + should_return = bool(extras.get("return_rows", False)) # Insert rows def insert_rows(conn): table = sqlite_utils.Database(conn)[table_name] if should_return: rowids = [] for row in rows: - rowids.append(table.insert(row).last_rowid) + rowids.append( + table.insert(row, ignore=ignore, replace=replace).last_rowid + ) return list( table.rows_where( "rowid in ({})".format(",".join("?" for _ in rowids)), @@ -1187,12 +1204,12 @@ class TableInsertView(BaseView): ) ) else: - table.insert_all(rows) + table.insert_all(rows, ignore=ignore, replace=replace) rows = await db.execute_write_fn(insert_rows) result = {"ok": True} if should_return: - result["inserted"] = rows + result["rows"] = rows return Response.json(result, status=201) diff --git a/docs/json_api.rst b/docs/json_api.rst index da4500ab..34c13211 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -489,7 +489,7 @@ If successful, this will return a ``201`` status code and the newly inserted row .. code-block:: json { - "inserted": [ + "rows": [ { "id": 1, "column1": "value1", @@ -538,7 +538,7 @@ To return the newly inserted rows, add the ``"return_rows": true`` key to the re "return_rows": true } -This will return the same ``"inserted"`` key as the single row example above. There is a small performance penalty for using this option. +This will return the same ``"rows"`` key as the single row example above. There is a small performance penalty for using this option. .. _RowDeleteView: diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 1cfba104..d0b0f324 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -37,7 +37,7 @@ async def test_write_row(ds_write): ) expected_row = {"id": 1, "title": "Test", "score": 1.0} assert response.status_code == 201 - assert response.json()["inserted"] == [expected_row] + assert response.json()["rows"] == [expected_row] rows = (await ds_write.get_database("data").execute("select * from docs")).rows assert dict(rows[0]) == expected_row @@ -70,7 +70,7 @@ async def test_write_rows(ds_write, return_rows): ] assert response.json()["ok"] is True if return_rows: - assert response.json()["inserted"] == actual_rows + assert response.json()["rows"] == actual_rows @pytest.mark.asyncio @@ -156,6 +156,27 @@ async def test_write_rows(ds_write, return_rows): 400, ["Too many rows, maximum allowed is 100"], ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"}], "ignore": True, "replace": True}, + None, + 400, + ['Cannot use "ignore" and "replace" at the same time'], + ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"}], "invalid_param": True}, + None, + 400, + ['Invalid parameter: "invalid_param"'], + ), + ( + "/data/docs/-/insert", + {"rows": [{"title": "Test"}], "one": True, "two": True}, + None, + 400, + ['Invalid parameter: "one", "two"'], + ), # Validate columns of each row ( "/data/docs/-/insert", @@ -196,6 +217,62 @@ async def test_write_row_errors( assert response.json()["errors"] == expected_errors +@pytest.mark.asyncio +@pytest.mark.parametrize( + "ignore,replace,expected_rows", + ( + ( + True, + False, + [ + {"id": 1, "title": "Exists", "score": None}, + ], + ), + ( + False, + True, + [ + {"id": 1, "title": "One", "score": None}, + ], + ), + ), +) +@pytest.mark.parametrize("should_return", (True, False)) +async def test_insert_ignore_replace( + ds_write, ignore, replace, expected_rows, should_return +): + await ds_write.get_database("data").execute_write( + "insert into docs (id, title) values (1, 'Exists')" + ) + token = write_token(ds_write) + data = {"rows": [{"id": 1, "title": "One"}]} + if ignore: + data["ignore"] = True + if replace: + data["replace"] = True + if should_return: + data["return_rows"] = True + response = await ds_write.client.post( + "/data/docs/-/insert", + json=data, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + assert response.status_code == 201 + actual_rows = [ + dict(r) + for r in ( + await ds_write.get_database("data").execute("select * from docs") + ).rows + ] + assert actual_rows == expected_rows + assert response.json()["ok"] is True + if should_return: + assert response.json()["rows"] == expected_rows + + @pytest.mark.asyncio @pytest.mark.parametrize("scenario", ("no_token", "no_perm", "bad_table", "has_perm")) async def test_delete_row(ds_write, scenario): @@ -217,7 +294,7 @@ async def test_delete_row(ds_write, scenario): }, ) assert insert_response.status_code == 201 - pk = insert_response.json()["inserted"][0]["id"] + pk = insert_response.json()["rows"][0]["id"] path = "/data/{}/{}/-/delete".format( "docs" if scenario != "bad_table" else "bad_table", pk From 497290beaf32e6b779f9683ef15f1c5bc142a41a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 1 Nov 2022 12:59:17 -0700 Subject: [PATCH 39/82] Handle database errors in /-/insert, refs #1866, #1873 Also improved API explorer to show HTTP status of response, refs #1871 --- datasette/templates/api_explorer.html | 14 +++++++++----- datasette/views/table.py | 5 ++++- tests/test_api_write.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html index 38fdb7bc..93bacde3 100644 --- a/datasette/templates/api_explorer.html +++ b/datasette/templates/api_explorer.html @@ -27,7 +27,8 @@ @@ -64,12 +65,15 @@ form.addEventListener("submit", (ev) => { headers: { 'Content-Type': 'application/json', } - }).then(r => r.json()).then(r => { + }).then(r => { + document.getElementById('response-status').textContent = r.status; + return r.json(); + }).then(data => { var errorList = output.querySelector('.errors'); - if (r.errors) { + if (data.errors) { errorList.style.display = 'block'; errorList.innerHTML = ''; - r.errors.forEach(error => { + data.errors.forEach(error => { var li = document.createElement('li'); li.textContent = error; errorList.appendChild(li); @@ -77,7 +81,7 @@ form.addEventListener("submit", (ev) => { } else { errorList.style.display = 'none'; } - output.querySelector('pre').innerText = JSON.stringify(r, null, 2); + output.querySelector('pre').innerText = JSON.stringify(data, null, 2); output.style.display = 'block'; }).catch(err => { alert("Error: " + err); diff --git a/datasette/views/table.py b/datasette/views/table.py index 7692a4e3..61227206 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1206,7 +1206,10 @@ class TableInsertView(BaseView): else: table.insert_all(rows, ignore=ignore, replace=replace) - rows = await db.execute_write_fn(insert_rows) + try: + rows = await db.execute_write_fn(insert_rows) + except Exception as e: + return _error([str(e)]) result = {"ok": True} if should_return: result["rows"] = rows diff --git a/tests/test_api_write.py b/tests/test_api_write.py index d0b0f324..0b567f48 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -156,6 +156,13 @@ async def test_write_rows(ds_write, return_rows): 400, ["Too many rows, maximum allowed is 100"], ), + ( + "/data/docs/-/insert", + {"rows": [{"id": 1, "title": "Test"}]}, + "duplicate_id", + 400, + ["UNIQUE constraint failed: docs.id"], + ), ( "/data/docs/-/insert", {"rows": [{"title": "Test"}], "ignore": True, "replace": True}, @@ -194,6 +201,10 @@ async def test_write_row_errors( ds_write, path, input, special_case, expected_status, expected_errors ): token = write_token(ds_write) + if special_case == "duplicate_id": + await ds_write.get_database("data").execute_write( + "insert into docs (id) values (1)" + ) if special_case == "bad_token": token += "bad" kwargs = dict( From 0b166befc0096fca30d71e19608a928d59c331a4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 1 Nov 2022 17:31:22 -0700 Subject: [PATCH 40/82] API explorer can now do GET, has JSON syntax highlighting Refs #1871 --- .../static/json-format-highlight-1.0.1.js | 43 +++++++++++ datasette/templates/api_explorer.html | 77 +++++++++++++++---- 2 files changed, 103 insertions(+), 17 deletions(-) create mode 100644 datasette/static/json-format-highlight-1.0.1.js diff --git a/datasette/static/json-format-highlight-1.0.1.js b/datasette/static/json-format-highlight-1.0.1.js new file mode 100644 index 00000000..e87c76e1 --- /dev/null +++ b/datasette/static/json-format-highlight-1.0.1.js @@ -0,0 +1,43 @@ +/* +https://github.com/luyilin/json-format-highlight +From https://unpkg.com/json-format-highlight@1.0.1/dist/json-format-highlight.js +MIT Licensed +*/ +(function (global, factory) { + typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() : + typeof define === 'function' && define.amd ? define(factory) : + (global.jsonFormatHighlight = factory()); +}(this, (function () { 'use strict'; + +var defaultColors = { + keyColor: 'dimgray', + numberColor: 'lightskyblue', + stringColor: 'lightcoral', + trueColor: 'lightseagreen', + falseColor: '#f66578', + nullColor: 'cornflowerblue' +}; + +function index (json, colorOptions) { + if ( colorOptions === void 0 ) colorOptions = {}; + + if (!json) { return; } + if (typeof json !== 'string') { + json = JSON.stringify(json, null, 2); + } + var colors = Object.assign({}, defaultColors, colorOptions); + json = json.replace(/&/g, '&').replace(//g, '>'); + return json.replace(/("(\\u[a-zA-Z0-9]{4}|\\[^u]|[^\\"])*"(\s*:)?|\b(true|false|null)\b|-?\d+(?:\.\d*)?(?:[eE][+]?\d+)?)/g, function (match) { + var color = colors.numberColor; + if (/^"/.test(match)) { + color = /:$/.test(match) ? colors.keyColor : colors.stringColor; + } else { + color = /true/.test(match) ? colors.trueColor : /false/.test(match) ? colors.falseColor : /null/.test(match) ? colors.nullColor : color; + } + return ("" + match + ""); + }); +} + +return index; + +}))); diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html index 93bacde3..de5337e3 100644 --- a/datasette/templates/api_explorer.html +++ b/datasette/templates/api_explorer.html @@ -2,6 +2,10 @@ {% block title %}API Explorer{% endblock %} +{% block extra_head %} + +{% endblock %} + {% block content %}

API Explorer

@@ -14,17 +18,30 @@ {% endfor %} {% endif %} -
-
- - -
-
- - -
-

- +
+ GET +
+
+ + + +
+ +
+
+ POST +
+
+ + +
+
+ + +
+

+ +