diff --git a/datasette/app.py b/datasette/app.py index ed62c528..6ab32f9e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,6 +1,5 @@ from __future__ import annotations -from asgi_csrf import Errors import asyncio import contextvars from typing import TYPE_CHECKING, Any, Dict, Iterable, List @@ -8,7 +7,6 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List if TYPE_CHECKING: from datasette.permissions import Resource from datasette.tokens import TokenRestrictions -import asgi_csrf import collections import dataclasses import datetime @@ -120,6 +118,7 @@ from .utils.asgi import ( asgi_send_file, asgi_send_redirect, ) +from .csrf import CrossOriginProtectionMiddleware from .utils.internal_db import init_internal_db, populate_schema_tables from .utils.sqlite import ( sqlite3, @@ -2003,7 +2002,11 @@ class Datasette: "extra_js_urls", template, context, request, view_name ), "base_url": self.setting("base_url"), - "csrftoken": request.scope["csrftoken"] if request else lambda: "", + "csrftoken": ( + request.scope["csrftoken"] + if request and "csrftoken" in request.scope + else lambda: "" + ), "datasette_version": __version__, }, **extra_template_vars, @@ -2306,26 +2309,7 @@ class Datasette: if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) - async def custom_csrf_error(scope, send, message_id): - await asgi_send( - send, - content=await self.render_template( - "csrf_error.html", - {"message_id": message_id, "message_name": Errors(message_id).name}, - ), - status=403, - content_type="text/html; charset=utf-8", - ) - - asgi = asgi_csrf.asgi_csrf( - DatasetteRouter(self, routes), - signing_secret=self._secret, - cookie_name="ds_csrftoken", - skip_if_scope=lambda scope: any( - pm.hook.skip_csrf(datasette=self, scope=scope) - ), - send_csrf_failed=custom_csrf_error, - ) + asgi = CrossOriginProtectionMiddleware(DatasetteRouter(self, routes), self) if self.setting("trace_debug"): asgi = AsgiTracer(asgi) asgi = AsgiLifespan(asgi) diff --git a/datasette/csrf.py b/datasette/csrf.py new file mode 100644 index 00000000..dd968a4d --- /dev/null +++ b/datasette/csrf.py @@ -0,0 +1,126 @@ +""" +Header-based CSRF (Cross-Origin) protection. + +Datasette uses the Sec-Fetch-Site + Origin header approach described in +Filippo Valsorda's article (https://words.filippo.io/csrf/) and implemented +in Go 1.25's http.CrossOriginProtection. This replaces the previous +token-based asgi-csrf mechanism. +""" + +from __future__ import annotations + +import secrets +import urllib.parse + +from .utils.asgi import asgi_send + +SAFE_METHODS = frozenset({"GET", "HEAD", "OPTIONS"}) + + +def _install_legacy_csrftoken(scope): + """ + Populate ``scope["csrftoken"]`` with a callable returning a per-request + random token. Provided for plugin compatibility only - core no longer + uses this value for CSRF enforcement. + """ + + def csrftoken(): + if "_datasette_legacy_csrftoken" not in scope: + scope["_datasette_legacy_csrftoken"] = secrets.token_urlsafe(32) + return scope["_datasette_legacy_csrftoken"] + + scope["csrftoken"] = csrftoken + + +class CrossOriginProtectionMiddleware: + """ + Modern CSRF protection using the Sec-Fetch-Site and Origin headers. + + Based on Filippo Valsorda's algorithm, as implemented in Go 1.25's + http.CrossOriginProtection. See https://words.filippo.io/csrf/ + + Unsafe-method requests are allowed through only if they look same-origin. + Non-browser clients (curl, etc.) send neither Sec-Fetch-Site nor Origin + and are passed through unchanged - CSRF is a browser-only attack. + """ + + SAFE_METHODS = SAFE_METHODS + + def __init__(self, app, datasette): + self.app = app + self.datasette = datasette + + async def __call__(self, scope, receive, send): + if scope["type"] != "http": + await self.app(scope, receive, send) + return + + _install_legacy_csrftoken(scope) + + if scope.get("method", "GET") in self.SAFE_METHODS: + await self.app(scope, receive, send) + return + + headers = dict(scope.get("headers") or []) + + # Bearer-token requests are not ambient browser credentials, so they + # are not CSRF-vulnerable. Narrowly exempt them from the header check + # before evaluating Sec-Fetch-Site / Origin. Only "Bearer" is exempt; + # schemes like Basic or Digest can be browser-managed and ambient. + authorization = headers.get(b"authorization", b"").decode("latin-1") + if authorization: + scheme = authorization.split(None, 1)[0].lower() + if scheme == "bearer": + await self.app(scope, receive, send) + return + + origin_bytes = headers.get(b"origin") + sec_fetch_site_bytes = headers.get(b"sec-fetch-site") + host_bytes = headers.get(b"host", b"") + origin = origin_bytes.decode("latin-1") if origin_bytes else None + sec_fetch_site = ( + sec_fetch_site_bytes.decode("latin-1") if sec_fetch_site_bytes else None + ) + host = host_bytes.decode("latin-1") + + # Primary defense: Sec-Fetch-Site (set by browsers, unforgeable from JS) + if sec_fetch_site is not None: + if sec_fetch_site in ("same-origin", "none"): + await self.app(scope, receive, send) + return + await self._forbid( + send, + "Sec-Fetch-Site was {!r}, expected 'same-origin' or 'none'".format( + sec_fetch_site + ), + ) + return + + # No Sec-Fetch-Site and no Origin -> non-browser client (curl, API, etc.) + if origin is None: + await self.app(scope, receive, send) + return + + # Fallback for older browsers: Origin host must match Host header + parsed = urllib.parse.urlparse(origin) + origin_host = parsed.hostname or "" + if parsed.port: + origin_host = "{}:{}".format(origin_host, parsed.port) + if origin_host == host: + await self.app(scope, receive, send) + return + + await self._forbid( + send, + "Origin {!r} does not match Host {!r}".format(origin, host), + ) + + async def _forbid(self, send, reason): + await asgi_send( + send, + content=await self.datasette.render_template( + "csrf_error.html", {"reason": reason} + ), + status=403, + content_type="text/html; charset=utf-8", + ) diff --git a/datasette/default_permissions/__init__.py b/datasette/default_permissions/__init__.py index 4ebe6147..9e3bb648 100644 --- a/datasette/default_permissions/__init__.py +++ b/datasette/default_permissions/__init__.py @@ -17,7 +17,7 @@ UNION/INTERSECT operations. The order of evaluation is: from __future__ import annotations -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING if TYPE_CHECKING: from datasette.app import Datasette @@ -39,16 +39,6 @@ from .defaults import ( ) -@hookimpl -def skip_csrf(scope) -> Optional[bool]: - """Skip CSRF check for JSON content-type requests.""" - if scope["type"] == "http": - headers = scope.get("headers") or {} - if dict(headers).get(b"content-type") == b"application/json": - return True - return None - - @hookimpl def canned_queries(datasette: "Datasette", database: str, actor) -> dict: """Return canned queries defined in datasette.yaml configuration.""" diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 7af9cbce..27e20bd4 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -187,11 +187,6 @@ def homepage_actions(datasette, actor, request): """Links for the homepage actions menu""" -@hookspec -def skip_csrf(datasette, scope): - """Mechanism for skipping CSRF checks for certain requests""" - - @hookspec def handle_exception(datasette, request, exception): """Handle an uncaught exception. Can return a Response or None.""" diff --git a/datasette/templates/base.html b/datasette/templates/base.html index 0d89e11c..21f8c693 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -38,7 +38,6 @@ {% endif %} {% if show_logout %} {% endif %} diff --git a/datasette/templates/create_token.html b/datasette/templates/create_token.html index ad7c71b6..270d9c86 100644 --- a/datasette/templates/create_token.html +++ b/datasette/templates/create_token.html @@ -50,7 +50,6 @@ -
diff --git a/datasette/templates/csrf_error.html b/datasette/templates/csrf_error.html index 7cd4b42b..b84749d3 100644 --- a/datasette/templates/csrf_error.html +++ b/datasette/templates/csrf_error.html @@ -1,5 +1,5 @@ {% extends "base.html" %} -{% block title %}CSRF check failed){% endblock %} +{% block title %}CSRF check failed{% endblock %} {% block content %}

Form origin check failed

@@ -7,7 +7,7 @@
Technical details

Developers: consult Datasette's CSRF protection documentation.

-

Error code is {{ message_name }}.

+

Reason: {{ reason }}

{% endblock %} diff --git a/datasette/templates/debug_permissions_playground.html b/datasette/templates/debug_permissions_playground.html index 91ce1fcf..4410a677 100644 --- a/datasette/templates/debug_permissions_playground.html +++ b/datasette/templates/debug_permissions_playground.html @@ -52,7 +52,6 @@ textarea {
-
diff --git a/datasette/templates/logout.html b/datasette/templates/logout.html index c8fc642a..a99870e6 100644 --- a/datasette/templates/logout.html +++ b/datasette/templates/logout.html @@ -10,7 +10,6 @@
-
diff --git a/datasette/templates/messages_debug.html b/datasette/templates/messages_debug.html index 2940cd69..891cf915 100644 --- a/datasette/templates/messages_debug.html +++ b/datasette/templates/messages_debug.html @@ -19,7 +19,6 @@
-
diff --git a/datasette/templates/query.html b/datasette/templates/query.html index a6e9a3aa..8b405da5 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -65,7 +65,6 @@ {% endif %}

{% if not hide_sql %}{% endif %} - {% if canned_query_write %}{% endif %} {{ show_hide_hidden }} {% if canned_query and edit_sql_url %}Edit SQL{% endif %} diff --git a/datasette/utils/testing.py b/datasette/utils/testing.py index 1606da05..de7e94af 100644 --- a/datasette/utils/testing.py +++ b/datasette/utils/testing.py @@ -95,15 +95,8 @@ class TestClient: cookies = cookies or {} post_data = post_data or {} assert not (post_data and body), "Provide one or other of body= or post_data=" - # Maybe fetch a csrftoken first - if csrftoken_from is not None: - assert body is None, "body= is not compatible with csrftoken_from=" - if csrftoken_from is True: - csrftoken_from = path - token_response = await self._request(csrftoken_from, cookies=cookies) - csrftoken = token_response.cookies["ds_csrftoken"] - cookies["ds_csrftoken"] = csrftoken - post_data["csrftoken"] = csrftoken + # csrftoken_from is accepted for backward compatibility but is now a no-op. + # Datasette no longer uses CSRF tokens - see CrossOriginProtectionMiddleware. if post_data: body = urlencode(post_data, doseq=True) return await self._request( diff --git a/docs/internals.rst b/docs/internals.rst index 06a6b348..1693a241 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1941,19 +1941,16 @@ The ``Database`` class also provides properties and methods for introspecting th CSRF protection =============== -Datasette uses `asgi-csrf `__ to guard against CSRF attacks on form POST submissions. Users receive a ``ds_csrftoken`` cookie which is compared against the ``csrftoken`` form field (or ``x-csrftoken`` HTTP header) for every incoming request. +Datasette protects against Cross-Site Request Forgery by inspecting the browser-set ``Sec-Fetch-Site`` and ``Origin`` headers on every unsafe (non-``GET``/``HEAD``/``OPTIONS``) request, following the approach described in `Filippo Valsorda's article `__ and implemented in Go 1.25's ``http.CrossOriginProtection``. -If your plugin implements a ``

`` anywhere you will need to include that token. You can do so with the following template snippet: +A request is rejected with a ``403`` response if: -.. code-block:: html +- It carries ``Sec-Fetch-Site`` with any value other than ``same-origin`` or ``none``, or +- It has no ``Sec-Fetch-Site`` header but does carry an ``Origin`` header whose host does not match the request ``Host``. - +Requests from non-browser clients (``curl``, server-to-server scripts, etc.) do not send ``Sec-Fetch-Site`` or ``Origin`` and pass through unchanged - CSRF is a browser-only attack. -If you are rendering templates using the :ref:`datasette_render_template` method the ``csrftoken()`` helper will only work if you provide the ``request=`` argument to that method. If you forget to do this you will see the following error:: - - form-urlencoded POST field did not match cookie - -You can selectively disable CSRF protection using the :ref:`plugin_hook_skip_csrf` hook. +No token, cookie, or hidden form field is needed. Any ```` inside Datasette or a plugin will be accepted from the same origin without modification. .. _internals_internal: diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index 79b3e669..54dde20c 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -1837,31 +1837,6 @@ This example logs an error to `Sentry `__ and then renders a Example: `datasette-sentry `_ -.. _plugin_hook_skip_csrf: - -skip_csrf(datasette, scope) ---------------------------- - -``datasette`` - :ref:`internals_datasette` - You can use this to access plugin configuration options via ``datasette.plugin_config(your_plugin_name)``, or to execute SQL queries. - -``scope`` - dictionary - The `ASGI scope `__ for the incoming HTTP request. - -This hook can be used to skip :ref:`internals_csrf` for a specific incoming request. For example, you might have a custom path at ``/submit-comment`` which is designed to accept comments from anywhere, whether or not the incoming request originated on the site and has an accompanying CSRF token. - -This example will disable CSRF protection for that specific URL path: - -.. code-block:: python - - from datasette import hookimpl - - - @hookimpl - def skip_csrf(scope): - return scope["path"] == "/submit-comment" - -If any of the currently active ``skip_csrf()`` plugin hooks return ``True``, CSRF protection will be skipped for the request. .. _plugin_hook_menu_links: diff --git a/docs/plugins.rst b/docs/plugins.rst index eb7b06e1..d9938dba 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -241,8 +241,7 @@ If you run ``datasette plugins --all`` it will include default plugins that ship "version": null, "hooks": [ "canned_queries", - "permission_resources_sql", - "skip_csrf" + "permission_resources_sql" ] }, { diff --git a/docs/testing_plugins.rst b/docs/testing_plugins.rst index b0713e7c..1b10c132 100644 --- a/docs/testing_plugins.rst +++ b/docs/testing_plugins.rst @@ -235,9 +235,8 @@ As an example, here's a very simple plugin which executes an HTTP response and r if request.method == "GET": return Response.html(""" - - """.format(request.scope["csrftoken"]())) + """) vars = await request.post_vars() url = vars["url"] return Response.text(httpx.get(url).text) diff --git a/docs/upgrade_guide.md b/docs/upgrade_guide.md index b67eb054..33a8343b 100644 --- a/docs/upgrade_guide.md +++ b/docs/upgrade_guide.md @@ -155,3 +155,63 @@ token = await datasette.create_token( ``` The `datasette create-token` CLI command is unchanged. + +(upgrade_guide_csrf)= +### CSRF protection is now header-based + +Datasette's Cross-Site Request Forgery protection no longer uses tokens. The previous `asgi-csrf` mechanism - which set a `ds_csrftoken` cookie and required a matching `` in every form - has been replaced with an ASGI middleware that inspects the browser-set `Sec-Fetch-Site` and `Origin` headers, following the approach described in [Filippo Valsorda's research](https://words.filippo.io/csrf/) and implemented in Go 1.25's `http.CrossOriginProtection`. + +This works identically on HTTPS, HTTP, and localhost. Non-browser clients (curl, Python `requests`, server-to-server scripts) do not send `Sec-Fetch-Site` or `Origin` and are passed through unchanged - CSRF is a browser-only attack. + +Requests that carry an explicit `Authorization: Bearer ...` header are also exempt from the CSRF check, because bearer tokens are not ambient browser credentials: a malicious cross-origin page cannot cause the browser to attach a target site's bearer token unless the attacker's JavaScript already possesses it. This exemption is narrow - it covers the `Bearer` scheme only, not `Basic` or `Digest` - and it does not depend on the `--cors` setting. The exemption is about CSRF classification, not browser read access; CORS still controls the latter. + +#### What you can remove + +You can now delete any of the following from your plugins and custom templates: + +- Hidden CSRF form fields: + + ```html + + ``` + + The `csrftoken()` template helper (and `request.scope["csrftoken"]()` for plugins that call it from Python) still exists as a compatibility shim. It now returns a per-request random string rather than a cookie-bound signed value. Datasette no longer validates this token, and no `ds_csrftoken` cookie is set. + + **Important for plugin authors:** if your plugin previously used `request.scope["csrftoken"]()` or the `ds_csrftoken` cookie as a security primitive (for example, signing a URL and later comparing it to the cookie), the invariant that the token equals `request.cookies["ds_csrftoken"]` no longer holds. Replace those flows with signed, short-lived action URLs or explicit non-ambient credentials. + +- Manual CSRF token extraction in tests, e.g.: + + ```python + # No longer needed + csrftoken = response.cookies["ds_csrftoken"] + cookies["ds_csrftoken"] = csrftoken + post_data["csrftoken"] = csrftoken + ``` + + The `ds_csrftoken` cookie is no longer set at all. The `csrftoken_from=` argument of the Datasette test client's `.post()` method is now a no-op and can be removed from your test code. + +#### Breaking changes + +- **The `skip_csrf` plugin hook has been removed.** Existing plugins that still declare a `skip_csrf` hookimpl will continue to load - pluggy silently ignores unknown hook names - but the hook is no longer consulted by core, so the flows it previously unlocked will now be blocked (or allowed) purely on the basis of the new header check. + + The new middleware already covers the common cases that `skip_csrf` was written for: + + - Browser-initiated JSON POSTs automatically get `Sec-Fetch-Site: same-origin` and pass the check. + - Non-browser API clients (curl, `requests`, server-to-server scripts) do not send browser security headers and are passed through. + - Requests with an explicit `Authorization: Bearer ...` header are exempt from the CSRF check (see above). + + If your plugin previously used `skip_csrf` to accept cross-origin browser POSTs, replace that flow with an authentication mechanism that does **not** rely on ambient browser credentials. Safe patterns include: + + - Requiring an `Authorization: Bearer ...` API token on the endpoint. + - Requiring a non-ambient credential in the request body (a webhook secret, HMAC signature, signed capability URL, OAuth client credential, or similar). + - Issuing a short-lived signed URL that encodes the actor, the action, and an expiry, and verifying the signature on request. + + Do not rely on the `ds_csrftoken` cookie for your own plugin's security checks - Datasette no longer sets or validates it, and the `request.scope["csrftoken"]()` compatibility shim now returns a fresh random value each request rather than the signed cookie-bound value it used to. + +- **The `asgi-csrf` dependency has been dropped.** Any plugin that imported from `asgi_csrf` directly will need to be updated. + +- **The `csrf_error.html` template now receives a `reason` context variable** instead of `message_id` and `message_name`. Custom overrides of this template should be updated. + +#### Security properties + +For defense-in-depth the `ds_actor` and `ds_messages` cookies continue to be set with `SameSite=Lax` (Datasette's long-standing default). This means a genuine cross-site POST from an attacker's page would arrive without the user's authentication cookie even if the header check somehow failed. diff --git a/pyproject.toml b/pyproject.toml index 2ab2ce10..a0ee050c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,6 @@ dependencies = [ "uvicorn>=0.11", "aiofiles>=0.4", "janus>=0.6.2", - "asgi-csrf>=0.10", "PyYAML>=5.3", "mergedeep>=1.1.1", "itsdangerous>=1.1", diff --git a/tests/conftest.py b/tests/conftest.py index efa02c0a..1a9b940f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,6 +42,17 @@ def wait_until_responds(url, timeout=5.0, client=httpx, **kwargs): raise AssertionError("Timed out waiting for {} to respond".format(url)) +@pytest.fixture +def bare_ds(): + """ + Minimal Datasette with no plugins, data, metadata, or config - for tests + that want to exercise core behavior (e.g. middleware) in isolation. + """ + from datasette.app import Datasette + + return Datasette(memory=True) + + @pytest_asyncio.fixture async def ds_client(): from datasette.app import Datasette diff --git a/tests/fixtures.py b/tests/fixtures.py index 1f6c491d..713e6c17 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -54,7 +54,6 @@ EXPECTED_PLUGINS = [ "register_token_handler", "render_cell", "row_actions", - "skip_csrf", "startup", "table_actions", "view_actions", diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 77079557..4e401c07 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -444,11 +444,6 @@ def homepage_actions(datasette, actor, request): ] -@hookimpl -def skip_csrf(scope): - return scope["path"] == "/skip-csrf" - - @hookimpl def register_actions(datasette): extras_old = datasette.plugin_config("datasette-register-permissions") or {} diff --git a/tests/test_canned_queries.py b/tests/test_canned_queries.py index ed6202a4..5e36a87a 100644 --- a/tests/test_canned_queries.py +++ b/tests/test_canned_queries.py @@ -109,31 +109,12 @@ def test_insert(canned_write_client): assert response.headers["Location"] == "/data/add_name?success" -@pytest.mark.parametrize( - "query_name,expect_csrf_hidden_field", - [ - ("canned_read", False), - ("add_name_specify_id", True), - ("add_name", True), - ], -) -def test_canned_query_form_csrf_hidden_field( - canned_write_client, query_name, expect_csrf_hidden_field -): - response = canned_write_client.get(f"/data/{query_name}") - html = response.text - fragment = '' in response.text -def test_vary_header(canned_write_client): - # These forms embed a csrftoken so they should be served with Vary: Cookie +def test_canned_query_pages_no_vary_header(canned_write_client): + # These pages no longer embed per-cookie CSRF tokens, so they must not + # set Vary: Cookie - they should be cacheable across users. assert "vary" not in canned_write_client.get("/data").headers - assert "Cookie" == canned_write_client.get("/data/update_name").headers["vary"] + assert "vary" not in canned_write_client.get("/data/update_name").headers def test_json_post_body(canned_write_client): diff --git a/tests/test_csrf_middleware.py b/tests/test_csrf_middleware.py new file mode 100644 index 00000000..07ff598e --- /dev/null +++ b/tests/test_csrf_middleware.py @@ -0,0 +1,270 @@ +""" +Tests for the header-based CSRF (Cross-Origin) protection middleware. + +Datasette uses the Sec-Fetch-Site + Origin header approach described in +Filippo Valsorda's article (https://words.filippo.io/csrf/) and implemented +in Go 1.25's http.CrossOriginProtection. This replaces the previous +token-based asgi-csrf mechanism. +""" + +import pluggy +import pytest + +from datasette import hookimpl +from datasette.csrf import CrossOriginProtectionMiddleware, _install_legacy_csrftoken + + +async def _post(bare_ds, **kwargs): + kwargs.setdefault("data", {"message": "hello", "message_class": "info"}) + return await bare_ds.client.post("/-/messages", **kwargs) + + +async def _run_middleware(scope): + """ + Run CrossOriginProtectionMiddleware against a scope and return + ("allowed",) if the inner app was called, or ("blocked", status) + if the middleware sent a response itself. + """ + + class FakeDs: + async def render_template(self, name, ctx): + return "BLOCKED" + + inner_called = [] + + async def app(scope, receive, send): + inner_called.append(True) + + sent = [] + + async def send(msg): + sent.append(msg) + + mw = CrossOriginProtectionMiddleware(app, FakeDs()) + await mw(scope, None, send) + if inner_called: + return ("allowed",) + start = [m for m in sent if m["type"] == "http.response.start"][0] + return ("blocked", start["status"]) + + +def _http_scope(headers, method="POST"): + return { + "type": "http", + "method": method, + "headers": [(k.encode(), v.encode()) for k, v in headers.items()], + } + + +@pytest.mark.asyncio +@pytest.mark.parametrize("method", ["GET", "HEAD", "OPTIONS"]) +async def test_safe_methods_always_pass(bare_ds, method): + # Safe methods bypass CSRF entirely, even with hostile headers + response = await bare_ds.client.request( + method, + "/-/messages", + headers={"sec-fetch-site": "cross-site", "origin": "http://evil.example"}, + ) + assert response.status_code != 403 or "origin" not in response.text.lower() + + +@pytest.mark.asyncio +@pytest.mark.parametrize("sec_fetch_site", ["same-origin", "none"]) +async def test_post_with_trusted_sec_fetch_site_allowed(bare_ds, sec_fetch_site): + # "same-origin" = first-party; "none" = user-initiated direct navigation + response = await _post(bare_ds, headers={"sec-fetch-site": sec_fetch_site}) + assert response.status_code != 403 + + +@pytest.mark.asyncio +@pytest.mark.parametrize("sec_fetch_site", ["cross-site", "same-site", "cross-origin"]) +async def test_post_with_untrusted_sec_fetch_site_blocked(bare_ds, sec_fetch_site): + # same-site is blocked too: different subdomains must not bypass CSRF + response = await _post( + bare_ds, data={"message": "hi"}, headers={"sec-fetch-site": sec_fetch_site} + ) + assert response.status_code == 403 + assert response.headers["content-type"].startswith("text/html") + + +@pytest.mark.asyncio +async def test_post_with_no_browser_headers_allowed(bare_ds): + # curl / requests / server-to-server: no Sec-Fetch-Site, no Origin. + # CSRF is browser-specific so these pass through. + response = await _post(bare_ds) + assert response.status_code != 403 + + +@pytest.mark.asyncio +async def test_post_with_matching_origin_allowed(bare_ds): + # Fallback for older browsers without Sec-Fetch-Site: Origin must match Host + response = await _post(bare_ds, headers={"origin": "http://localhost"}) + assert response.status_code != 403 + + +@pytest.mark.asyncio +async def test_post_with_mismatched_origin_blocked(bare_ds): + response = await _post( + bare_ds, data={"message": "hi"}, headers={"origin": "http://evil.example.com"} + ) + assert response.status_code == 403 + + +@pytest.mark.asyncio +async def test_csrf_error_page_renders(bare_ds): + response = await _post( + bare_ds, data={"message": "hi"}, headers={"sec-fetch-site": "cross-site"} + ) + assert response.status_code == 403 + assert "origin" in response.text.lower() + + +@pytest.mark.asyncio +async def test_csrf_error_page_title_has_no_typo(bare_ds): + response = await _post( + bare_ds, data={"message": "hi"}, headers={"sec-fetch-site": "cross-site"} + ) + assert "CSRF check failed" in response.text + assert "CSRF check failed)" not in response.text + + +@pytest.mark.asyncio +@pytest.mark.parametrize("scope_type", ["websocket", "lifespan"]) +async def test_non_http_scope_passes_through(scope_type): + called = [] + + async def app(scope, receive, send): + called.append(scope["type"]) + + mw = CrossOriginProtectionMiddleware(app, datasette=None) + await mw({"type": scope_type}, None, None) + assert called == [scope_type] + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "label,headers,expected", + [ + ( + "plain cross-site blocked", + {"sec-fetch-site": "cross-site", "host": "example.com"}, + ("blocked", 403), + ), + ( + "basic auth does not bypass", + { + "sec-fetch-site": "cross-site", + "host": "example.com", + "authorization": "Basic dXNlcjpwYXNz", + }, + ("blocked", 403), + ), + ( + "bearer auth bypasses", + { + "sec-fetch-site": "cross-site", + "origin": "https://evil.example", + "host": "example.com", + "authorization": "Bearer dstok_abc", + }, + ("allowed",), + ), + ( + "bearer scheme case-insensitive", + { + "sec-fetch-site": "cross-site", + "host": "example.com", + "authorization": "bearer dstok_abc", + }, + ("allowed",), + ), + ( + "non-browser (no Sec-Fetch-Site, no Origin) allowed", + {"host": "example.com"}, + ("allowed",), + ), + ], +) +async def test_middleware_unit(label, headers, expected): + assert await _run_middleware(_http_scope(headers)) == expected + + +def test_legacy_csrftoken_scope_value_nonempty(app_client): + # GET /post/ calls request.scope["csrftoken"]() - must not 500 + response = app_client.get("/post/") + assert response.status == 200 + assert response.text.strip() != "" + assert len(response.text.strip()) >= 20 + + +def test_legacy_csrftoken_no_ds_csrftoken_cookie(app_client): + response = app_client.get("/post/") + assert "ds_csrftoken" not in response.cookies + + +def test_legacy_csrftoken_varies_across_requests(app_client): + r1 = app_client.get("/post/").text.strip() + r2 = app_client.get("/post/").text.strip() + assert r1 != r2 + + +def test_legacy_csrftoken_stable_within_request(): + # Two calls in the same request return the same value + scope = {} + _install_legacy_csrftoken(scope) + assert scope["csrftoken"]() == scope["csrftoken"]() + + +@pytest.mark.asyncio +async def test_cross_site_post_blocked_even_with_ds_csrftoken_cookie(bare_ds): + # A stale ds_csrftoken cookie + csrftoken body field must NOT bypass + # the header-based CSRF check. + response = await _post( + bare_ds, + data={"message": "hi", "message_class": "info", "csrftoken": "abc"}, + headers={"sec-fetch-site": "cross-site"}, + cookies={"ds_csrftoken": "abc"}, + ) + assert response.status_code == 403 + + +@pytest.mark.asyncio +async def test_bearer_invalid_token_not_csrf_error(bare_ds): + # Cross-site POST with bogus bearer must pass CSRF and be rejected + # by auth/permission handling, not by the CSRF middleware. + response = await _post( + bare_ds, + headers={ + "sec-fetch-site": "cross-site", + "authorization": "Bearer totally-invalid-token", + }, + ) + if response.status_code == 403: + assert "origin" not in response.text.lower() + assert "sec-fetch-site" not in response.text.lower() + + +@pytest.mark.asyncio +async def test_cross_site_post_without_auth_still_blocked(bare_ds): + response = await _post( + bare_ds, data={"message": "hi"}, headers={"sec-fetch-site": "cross-site"} + ) + assert response.status_code == 403 + + +def test_legacy_skip_csrf_hookimpl_does_not_break_loading(): + # Plugins that still define skip_csrf must load cleanly - pluggy ignores + # unknown hook implementations - even though the hook is no longer + # consulted by core. Use a throwaway PluginManager so that registering + # this hookimpl does not leak a _HookCaller onto the real datasette.pm. + class LegacyPlugin: + __name__ = "legacy-skip-csrf-plugin" + + @hookimpl + def skip_csrf(self, datasette, scope): + return True + + throwaway = pluggy.PluginManager("datasette") + plugin = LegacyPlugin() + throwaway.register(plugin, name=LegacyPlugin.__name__) + assert throwaway.is_registered(plugin) diff --git a/tests/test_html.py b/tests/test_html.py index 39249c19..4fc144ab 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1202,11 +1202,12 @@ async def test_custom_csrf_error(ds_client): data={ "message": "A message", }, - cookies={"csrftoken": "x"}, + headers={"sec-fetch-site": "cross-site"}, ) assert response.status_code == 403 assert response.headers["content-type"] == "text/html; charset=utf-8" - assert "Error code is FORM_URLENCODED_MISMATCH." in response.text + assert "Reason:" in response.text + assert "cross-site" in response.text @pytest.mark.asyncio diff --git a/tests/test_permissions.py b/tests/test_permissions.py index f9303759..04195f75 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -711,10 +711,6 @@ async def test_actor_restricted_permissions( perms_ds.pdb = True perms_ds.root_enabled = True # Allow root actor to access /-/permissions cookies = {"ds_actor": perms_ds.sign({"a": {"id": "root"}}, "actor")} - csrftoken = (await perms_ds.client.get("/-/permissions", cookies=cookies)).cookies[ - "ds_csrftoken" - ] - cookies["ds_csrftoken"] = csrftoken response = await perms_ds.client.post( "/-/permissions", data={ @@ -722,7 +718,6 @@ async def test_actor_restricted_permissions( "permission": permission, "resource_1": resource_1, "resource_2": resource_2, - "csrftoken": csrftoken, }, cookies=cookies, ) diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 4ce2c7c0..c9de1c57 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -812,21 +812,25 @@ def test_hook_register_routes_override(): def test_hook_register_routes_post(app_client): - response = app_client.post("/post/", {"this is": "post data"}, csrftoken_from=True) + response = app_client.post("/post/", {"this is": "post data"}) assert response.status_code == 200 - assert "csrftoken" in response.json assert response.json["this is"] == "post data" def test_hook_register_routes_csrftoken(restore_working_directory, tmpdir_factory): + # csrftoken() is a legacy compatibility shim that returns a + # per-request random value - it is no longer used for CSRF enforcement. templates = tmpdir_factory.mktemp("templates") (templates / "csrftoken_form.html").write_text( - "CSRFTOKEN: {{ csrftoken() }}", "utf-8" + "CSRFTOKEN:{{ csrftoken() }}:END", "utf-8" ) with make_app_client(template_dir=templates) as client: response = client.get("/csrftoken-form/") - expected_token = client.ds._last_request.scope["csrftoken"]() - assert f"CSRFTOKEN: {expected_token}" == response.text + assert response.text.startswith("CSRFTOKEN:") + assert response.text.endswith(":END") + token = response.text[len("CSRFTOKEN:") : -len(":END")] + assert len(token) >= 20 + assert "ds_csrftoken" not in response.cookies @pytest.mark.asyncio @@ -1125,31 +1129,6 @@ async def test_hook_homepage_actions(ds_client): ] -def test_hook_skip_csrf(app_client): - cookie = app_client.actor_cookie({"id": "test"}) - csrf_response = app_client.post( - "/post/", - post_data={"this is": "post data"}, - csrftoken_from=True, - cookies={"ds_actor": cookie}, - ) - assert csrf_response.status_code == 200 - missing_csrf_response = app_client.post( - "/post/", post_data={"this is": "post data"}, cookies={"ds_actor": cookie} - ) - assert missing_csrf_response.status_code == 403 - # But "/skip-csrf" should allow - allow_csrf_response = app_client.post( - "/skip-csrf", post_data={"this is": "post data"}, cookies={"ds_actor": cookie} - ) - assert allow_csrf_response.status_code == 405 # Method not allowed - # /skip-csrf-2 should not - second_missing_csrf_response = app_client.post( - "/skip-csrf-2", post_data={"this is": "post data"}, cookies={"ds_actor": cookie} - ) - assert second_missing_csrf_response.status_code == 403 - - def _extract_commands(output): lines = output.split("Commands:\n", 1)[1].split("\n") return {line.split()[0].replace("*", "") for line in lines if line.strip()}