From 7cdc55c6836fe246b1ca8a13a965a39991c9ffec Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 15 Jun 2019 14:27:11 -0700 Subject: [PATCH 01/33] AsgiRouter and AsgiView WIP --- datasette/views/base.py | 63 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index 9db8cc76..69c0ba16 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -49,7 +49,68 @@ class DatasetteError(Exception): self.messagge_is_html = messagge_is_html -class BaseView(HTTPMethodView): +class AsgiRouter: + def __init__(self, routes=None): + routes = routes or [] + self.routes = [ + # Compile any strings to regular expressions + (re.compile(pattern) if isinstance(pattern, str) else pattern, view) + for pattern, view in routes + ] + + async def __call__(self, scope, receive, send): + for regex, view in self.routes: + match = regex.match(scope["path"]) + if match is not None: + new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) + return await view(new_scope, receive, send) + return await self.handle_404(scope, receive, send) + + async def handle_404(self, scope, receive, send): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + await send({"type": "http.response.body", "body": b"

404

"}) + + +async def hello_world(scope, receive, send): + assert scope['type'] == 'http' + await send({ + 'type': 'http.response.start', + 'status': 200, + 'headers': [ + [b'content-type', b'text/html'], + ] + }) + await send({ + 'type': 'http.response.body', + 'body': b'

Hello world!

', + }) + + + +app = AsgiRouter([ + ('/hello/', hello_world), +]) + + +class AsgiView(HTTPMethodView): + async def asgi(self, scope, receive, send): + # Uses scope to create a Sanic-compatible request object, + # then dispatches that to self.get(...) or self.options(...) + # along with keyword arguments that were already tucked + # into scope["url_route"]["kwargs"] by the router + # https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter + pass + + +class BaseView(AsgiView): + ds = None + def _asset_urls(self, key, template, context): # Flatten list-of-lists from plugins: seen_urls = set() From d7364116991d85612c88ed0f800f1c36d83ba57b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 16 Jun 2019 08:43:58 -0700 Subject: [PATCH 02/33] Applied black --- datasette/views/base.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index 69c0ba16..b278f3fb 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -78,24 +78,18 @@ class AsgiRouter: async def hello_world(scope, receive, send): - assert scope['type'] == 'http' - await send({ - 'type': 'http.response.start', - 'status': 200, - 'headers': [ - [b'content-type', b'text/html'], - ] - }) - await send({ - 'type': 'http.response.body', - 'body': b'

Hello world!

', - }) + assert scope["type"] == "http" + await send( + { + "type": "http.response.start", + "status": 200, + "headers": [[b"content-type", b"text/html"]], + } + ) + await send({"type": "http.response.body", "body": b"

Hello world!

"}) - -app = AsgiRouter([ - ('/hello/', hello_world), -]) +app = AsgiRouter([("/hello/", hello_world)]) class AsgiView(HTTPMethodView): From 39d66f17c1a97c571998a1067fb8263509473a6f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 18 Jun 2019 17:22:26 -0700 Subject: [PATCH 03/33] Revert "New encode/decode_path_component functions" Refs #272 This reverts commit 9fdb47ca952b93b7b60adddb965ea6642b1ff523. Now that ASGI supports raw_path we don't need our own encoding scheme! --- datasette/utils.py | 21 --------------------- tests/test_utils.py | 16 ---------------- 2 files changed, 37 deletions(-) diff --git a/datasette/utils.py b/datasette/utils.py index 56fe2996..58746be4 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -261,27 +261,6 @@ def escape_sqlite(s): return "[{}]".format(s) -_decode_path_component_re = re.compile(r"U\+([\da-h]{4})", re.IGNORECASE) -_encode_path_component_re = re.compile( - "[{}]".format( - "".join( - re.escape(c) - for c in (";", "/", "?", ":", "@", "&", "=", "+", "$", ",", "~") - ) - ) -) - - -def decode_path_component(table_name): - return _decode_path_component_re.sub(lambda m: chr(int(m.group(1), 16)), table_name) - - -def encode_path_component(table_name): - return _encode_path_component_re.sub( - lambda m: "U+{0:0{1}x}".format(ord(m.group(0)), 4).upper(), table_name - ) - - def make_dockerfile( files, metadata_file, diff --git a/tests/test_utils.py b/tests/test_utils.py index 73aee12a..a5f603e6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -381,19 +381,3 @@ def test_path_with_format(path, format, extra_qs, expected): ) def test_format_bytes(bytes, expected): assert expected == utils.format_bytes(bytes) - - -@pytest.mark.parametrize( - "name,expected", - [ - ("table", "table"), - ("table/and/slashes", "tableU+002FandU+002Fslashes"), - ("~table", "U+007Etable"), - ("+bobcats!", "U+002Bbobcats!"), - ("U+007Etable", "UU+002B007Etable"), - ], -) -def test_encode_decode_path_component(name, expected): - encoded = utils.encode_path_component(name) - assert encoded == expected - assert name == utils.decode_path_component(encoded) From 180d5be8118560d348dd546d51070fd703b37f02 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 22 Jun 2019 18:06:24 -0700 Subject: [PATCH 04/33] First partially working version of ASGI-powered Datasette #272 Lots still to do: * Static files are not being served * Streaming CSV files don't work * Tests all fail * Some URLs (e.g. the 'next' link on tables) are incorrect But... the server does start up and you can browse databases/tables --- datasette/app.py | 196 +++++++++++++++------------------------- datasette/cli.py | 3 +- datasette/views/base.py | 76 ++++++++++------ setup.py | 3 +- 4 files changed, 123 insertions(+), 155 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 2ef7da41..fe4a8683 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -17,7 +17,7 @@ from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader from sanic import Sanic, response from sanic.exceptions import InvalidUsage, NotFound -from .views.base import DatasetteError, ureg +from .views.base import DatasetteError, ureg, AsgiRouter from .views.database import DatabaseDownload, DatabaseView from .views.index import IndexView from .views.special import JsonDataView @@ -126,8 +126,15 @@ CONFIG_OPTIONS = ( DEFAULT_CONFIG = {option.name: option.default for option in CONFIG_OPTIONS} -async def favicon(request): - return response.text("") +async def favicon(scope, recieve, send): + await send( + { + "type": "http.response.start", + "status": 200, + "headers": [[b"content-type", b"text/plain"]], + } + ) + await send({"type": "http.response.body", "body": b""}) class Datasette: @@ -543,21 +550,8 @@ class Datasette: self.renderers[renderer["extension"]] = renderer["callback"] def app(self): - class TracingSanic(Sanic): - async def handle_request(self, request, write_callback, stream_callback): - if request.args.get("_trace"): - request["traces"] = [] - request["trace_start"] = time.time() - with capture_traces(request["traces"]): - await super().handle_request( - request, write_callback, stream_callback - ) - else: - await super().handle_request( - request, write_callback, stream_callback - ) - - app = TracingSanic(__name__) + "Returns an ASGI app function that serves the whole of Datasette" + # TODO: re-implement ?_trace= mechanism, see class TracingSanic default_templates = str(app_root / "datasette" / "templates") template_paths = [] if self.template_dir: @@ -588,134 +582,86 @@ class Datasette: pm.hook.prepare_jinja2_environment(env=self.jinja_env) self.register_renderers() + + routes = [] + + def add_route(view, regex): + routes.append((regex, view)) + # Generate a regex snippet to match all registered renderer file extensions renderer_regex = "|".join(r"\." + key for key in self.renderers.keys()) - app.add_route(IndexView.as_view(self), r"/") + add_route(IndexView.as_asgi(self), r"/(?P(\.jsono?)?$)") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires - app.add_route(favicon, "/favicon.ico") - app.static("/-/static/", str(app_root / "datasette" / "static")) - for path, dirname in self.static_mounts: - app.static(path, dirname) - # Mount any plugin static/ directories - for plugin in get_plugins(pm): - if plugin["static_path"]: - modpath = "/-/static-plugins/{}/".format(plugin["name"]) - app.static(modpath, plugin["static_path"]) - app.add_route( - JsonDataView.as_view(self, "metadata.json", lambda: self._metadata), - r"/-/metadata", + add_route(favicon, "/favicon.ico") + # # TODO: re-enable the static bits + # app.static("/-/static/", str(app_root / "datasette" / "static")) + # for path, dirname in self.static_mounts: + # app.static(path, dirname) + # # Mount any plugin static/ directories + # for plugin in get_plugins(pm): + # if plugin["static_path"]: + # modpath = "/-/static-plugins/{}/".format(plugin["name"]) + # app.static(modpath, plugin["static_path"]) + add_route( + JsonDataView.as_asgi(self, "metadata.json", lambda: self._metadata), + r"/-/metadata(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "versions.json", self.versions), - r"/-/versions", + add_route( + JsonDataView.as_asgi(self, "versions.json", self.versions), + r"/-/versions(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "plugins.json", self.plugins), - r"/-/plugins", + add_route( + JsonDataView.as_asgi(self, "plugins.json", self.plugins), + r"/-/plugins(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "config.json", lambda: self._config), - r"/-/config", + add_route( + JsonDataView.as_asgi(self, "config.json", lambda: self._config), + r"/-/config(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "databases.json", self.connected_databases), - r"/-/databases", + add_route( + JsonDataView.as_asgi(self, "databases.json", self.connected_databases), + r"/-/databases(?P(\.json)?)$", ) - app.add_route( - DatabaseDownload.as_view(self), r"/" + add_route( + DatabaseDownload.as_asgi(self), r"/(?P[^/]+?)(?P\.db)$" ) - app.add_route( - DatabaseView.as_view(self), - r"/", + add_route( + DatabaseView.as_asgi(self), + r"/(?P[^/]+?)(?P" + + renderer_regex + + r"|.jsono|\.csv)?$", ) - app.add_route( - TableView.as_view(self), r"//" + add_route( + TableView.as_asgi(self), + r"/(?P[^/]+)/(?P[^/]+?$)", ) - app.add_route( - RowView.as_view(self), + add_route( + RowView.as_asgi(self), r"///", ) self.register_custom_units() + app = AsgiRouter(routes) # On 404 with a trailing slash redirect to path without that slash: # pylint: disable=unused-variable - @app.middleware("response") - def redirect_on_404_with_trailing_slash(request, original_response): - if original_response.status == 404 and request.path.endswith("/"): - path = request.path.rstrip("/") - if request.query_string: - path = "{}?{}".format(path, request.query_string) - return response.redirect(path) - - @app.middleware("response") - async def add_traces_to_response(request, response): - if request.get("traces") is None: - return - traces = request["traces"] - trace_info = { - "request_duration_ms": 1000 * (time.time() - request["trace_start"]), - "sum_trace_duration_ms": sum(t["duration_ms"] for t in traces), - "num_traces": len(traces), - "traces": traces, - } - if "text/html" in response.content_type and b"" in response.body: - extra = json.dumps(trace_info, indent=2) - extra_html = "
{}
".format(extra).encode("utf8") - response.body = response.body.replace(b"", extra_html) - elif "json" in response.content_type and response.body.startswith(b"{"): - data = json.loads(response.body.decode("utf8")) - if "_trace" not in data: - data["_trace"] = trace_info - response.body = json.dumps(data).encode("utf8") - - @app.exception(Exception) - def on_exception(request, exception): - title = None - help = None - if isinstance(exception, NotFound): - status = 404 - info = {} - message = exception.args[0] - elif isinstance(exception, InvalidUsage): - status = 405 - info = {} - message = exception.args[0] - elif isinstance(exception, DatasetteError): - status = exception.status - info = exception.error_dict - message = exception.message - if exception.messagge_is_html: - message = Markup(message) - title = exception.title - else: - status = 500 - info = {} - message = str(exception) - traceback.print_exc() - templates = ["500.html"] - if status != 500: - templates = ["{}.html".format(status)] + templates - info.update( - {"ok": False, "error": message, "status": status, "title": title} - ) - if request is not None and request.path.split("?")[0].endswith(".json"): - r = response.json(info, status=status) - - else: - template = self.jinja_env.select_template(templates) - r = response.html(template.render(info), status=status) - if self.cors: - r.headers["Access-Control-Allow-Origin"] = "*" - return r + # TODO: re-enable this + # @app.middleware("response") + # def redirect_on_404_with_trailing_slash(request, original_response): + # if original_response.status == 404 and request.path.endswith("/"): + # path = request.path.rstrip("/") + # if request.query_string: + # path = "{}?{}".format(path, request.query_string) + # return response.redirect(path) # First time server starts up, calculate table counts for immutable databases - @app.listener("before_server_start") - async def setup_db(app, loop): - for dbname, database in self.databases.items(): - if not database.is_mutable: - await database.table_counts(limit=60 * 60 * 1000) + # TODO: re-enable this mechanism + # @app.listener("before_server_start") + # async def setup_db(app, loop): + # for dbname, database in self.databases.items(): + # if not database.is_mutable: + # await database.table_counts(limit=60 * 60 * 1000) return app diff --git a/datasette/cli.py b/datasette/cli.py index 0d47f47a..181b281c 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -1,4 +1,5 @@ import asyncio +import uvicorn import click from click import formatting from click_default_group import DefaultGroup @@ -354,4 +355,4 @@ def serve( asyncio.get_event_loop().run_until_complete(ds.run_sanity_checks()) # Start the server - ds.app().run(host=host, port=port, debug=debug) + uvicorn.run(ds.app(), host=host, port=port, log_level="info") diff --git a/datasette/views/base.py b/datasette/views/base.py index b278f3fb..edf81266 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -10,6 +10,7 @@ import pint from sanic import response from sanic.exceptions import NotFound from sanic.views import HTTPMethodView +from sanic.request import Request as SanicRequest from datasette import __version__ from datasette.plugins import pm @@ -54,7 +55,7 @@ class AsgiRouter: routes = routes or [] self.routes = [ # Compile any strings to regular expressions - (re.compile(pattern) if isinstance(pattern, str) else pattern, view) + ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) for pattern, view in routes ] @@ -77,29 +78,48 @@ class AsgiRouter: await send({"type": "http.response.body", "body": b"

404

"}) -async def hello_world(scope, receive, send): - assert scope["type"] == "http" - await send( - { - "type": "http.response.start", - "status": 200, - "headers": [[b"content-type", b"text/html"]], - } - ) - await send({"type": "http.response.body", "body": b"

Hello world!

"}) - - -app = AsgiRouter([("/hello/", hello_world)]) - - class AsgiView(HTTPMethodView): - async def asgi(self, scope, receive, send): - # Uses scope to create a Sanic-compatible request object, - # then dispatches that to self.get(...) or self.options(...) - # along with keyword arguments that were already tucked - # into scope["url_route"]["kwargs"] by the router - # https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter - pass + @classmethod + def as_asgi(cls, *class_args, **class_kwargs): + async def view(scope, receive, send): + # Uses scope to create a Sanic-compatible request object, + # then dispatches that to self.get(...) or self.options(...) + # along with keyword arguments that were already tucked + # into scope["url_route"]["kwargs"] by the router + # https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter + path = scope.get("raw_path", scope["path"].encode("utf8")) + if scope["query_string"]: + path = path + b"?" + scope["query_string"] + request = SanicRequest(path, {}, "1.1", scope["method"], None) + + class Woo: + def get_extra_info(self, key): + return False + + request.app = Woo() + request.app.websocket_enabled = False + request.transport = Woo() + self = view.view_class(*class_args, **class_kwargs) + response = await self.dispatch_request( + request, **scope["url_route"]["kwargs"] + ) + await send( + { + "type": "http.response.start", + "status": response.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in response.headers.items() + ], + } + ) + await send({"type": "http.response.body", "body": response.body}) + + view.view_class = cls + view.__doc__ = cls.__doc__ + view.__module__ = cls.__module__ + view.__name__ = cls.__name__ + return view class BaseView(AsgiView): @@ -250,17 +270,17 @@ class DataView(BaseView): kwargs["table"] = table if _format: kwargs["as_format"] = ".{}".format(_format) - elif "table" in kwargs: + elif kwargs.get("table"): kwargs["table"] = urllib.parse.unquote_plus(kwargs["table"]) should_redirect = "/{}-{}".format(name, expected) - if "table" in kwargs: + if kwargs.get("table"): should_redirect += "/" + urllib.parse.quote_plus(kwargs["table"]) - if "pk_path" in kwargs: + if kwargs.get("pk_path"): should_redirect += "/" + kwargs["pk_path"] - if "as_format" in kwargs: + if kwargs.get("as_format"): should_redirect += kwargs["as_format"] - if "as_db" in kwargs: + if kwargs.get("as_db"): should_redirect += kwargs["as_db"] if ( diff --git a/setup.py b/setup.py index 60c1bcc5..24535b24 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ setup( author="Simon Willison", license="Apache License, Version 2.0", url="https://github.com/simonw/datasette", - packages=find_packages(exclude='tests'), + packages=find_packages(exclude="tests"), package_data={"datasette": ["templates/*.html"]}, include_package_data=True, install_requires=[ @@ -48,6 +48,7 @@ setup( "hupper==1.0", "pint==0.8.1", "pluggy>=0.12.0", + "uvicorn>=0.8.1", ], entry_points=""" [console_scripts] From b53a75c460f21c87b8edf42c47dc2eaef037883b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 22 Jun 2019 18:34:00 -0700 Subject: [PATCH 05/33] Test harness now uses ASGI, some tests pass #272 --- setup.py | 1 + tests/fixtures.py | 56 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index 24535b24..39ebd21d 100644 --- a/setup.py +++ b/setup.py @@ -61,6 +61,7 @@ setup( "pytest-asyncio==0.10.0", "aiohttp==3.5.3", "beautifulsoup4==4.6.1", + "asgiref==3.1.2", ] + maybe_black }, diff --git a/tests/fixtures.py b/tests/fixtures.py index 04ac3c68..1be7dc23 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,5 +1,7 @@ from datasette.app import Datasette from datasette.utils import sqlite3 +from asgiref.testing import ApplicationCommunicator +from asgiref.sync import async_to_sync import itertools import json import os @@ -12,14 +14,52 @@ import tempfile import time -class TestClient: - def __init__(self, sanic_test_client): - self.sanic_test_client = sanic_test_client +class TestResponse: + def __init__(self, status, headers, body): + self.status = status + self.headers = headers + self.body = body - def get(self, path, allow_redirects=True): - return self.sanic_test_client.get( - path, allow_redirects=allow_redirects, gather_request=False + @property + def json(self): + return json.loads(self.body) + + +class TestClient: + def __init__(self, asgi_app): + self.asgi_app = asgi_app + + @async_to_sync + async def get(self, path, allow_redirects=True): + query_string = b"" + if "?" in path: + path, _, query_string = path.partition("?") + query_string = query_string.encode("utf8") + instance = ApplicationCommunicator( + self.asgi_app, + { + "type": "http", + "http_version": "1.0", + "method": "GET", + "path": path, + "query_string": query_string, + }, ) + await instance.send_input({"type": "http.request"}) + # First message back should be response.start with headers and status + start = await instance.receive_output(2) + assert start["type"] == "http.response.start" + headers = start["headers"] + status = start["status"] + # Now loop until we run out of response.body + body = b"" + while True: + message = await instance.receive_output(2) + assert message["type"] == "http.response.body" + body += message["body"] + if not message.get("more_body"): + break + return TestResponse(status, headers, body) def make_app_client( @@ -75,7 +115,7 @@ def make_app_client( inspect_data=inspect_data, ) ds.sqlite_functions.append(("sleep", 1, lambda n: time.sleep(float(n)))) - client = TestClient(ds.app().test_client) + client = TestClient(ds.app()) client.ds = ds yield client @@ -88,7 +128,7 @@ def app_client(): @pytest.fixture(scope="session") def app_client_no_files(): ds = Datasette([]) - client = TestClient(ds.app().test_client) + client = TestClient(ds.app()) client.ds = ds yield client From 55fc9936674e9d6b17c969ee75840b7e417898e9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 22 Jun 2019 18:57:10 -0700 Subject: [PATCH 06/33] Implemented custom 404/500, more tests pass #272 --- datasette/app.py | 79 ++++++++++++++++++++++++++++++++++++++++- datasette/views/base.py | 18 +++++++++- tests/fixtures.py | 8 ++++- 3 files changed, 102 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index fe4a8683..61a597ac 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -644,7 +644,50 @@ class Datasette: ) self.register_custom_units() - app = AsgiRouter(routes) + outer_self = self + + class DatasetteRouter(AsgiRouter): + async def handle_500(self, scope, receive, send, exception): + title = None + help = None + if isinstance(exception, NotFound): + status = 404 + info = {} + message = exception.args[0] + elif isinstance(exception, InvalidUsage): + status = 405 + info = {} + message = exception.args[0] + elif isinstance(exception, DatasetteError): + status = exception.status + info = exception.error_dict + message = exception.message + if exception.messagge_is_html: + message = Markup(message) + title = exception.title + else: + status = 500 + info = {} + message = str(exception) + traceback.print_exc() + templates = ["500.html"] + if status != 500: + templates = ["{}.html".format(status)] + templates + info.update( + {"ok": False, "error": message, "status": status, "title": title} + ) + headers = {} + if outer_self.cors: + headers["Access-Control-Allow-Origin"] = "*" + if scope["path"].split("?")[0].endswith(".json"): + await asgi_send_json(send, info, status=status, headers=headers) + else: + template = outer_self.jinja_env.select_template(templates) + await asgi_send_html( + send, template.render(info), status=status, headers=headers + ) + + app = DatasetteRouter(routes) # On 404 with a trailing slash redirect to path without that slash: # pylint: disable=unused-variable # TODO: re-enable this @@ -665,3 +708,37 @@ class Datasette: # await database.table_counts(limit=60 * 60 * 1000) return app + + +async def asgi_send_json(send, info, status=200, headers=None): + headers = headers or {} + await asgi_send( + send, + json.dumps(info), + status=status, + headers=headers, + content_type="application/json", + ) + + +async def asgi_send_html(send, html, status=200, headers=None): + headers = headers or {} + await asgi_send( + send, html, status=status, headers=headers, content_type="text/html" + ) + + +async def asgi_send(send, content, status, headers, content_type="text/plain"): + # TODO: watch out for Content-Type due to mixed case: + headers["content-type"] = content_type + await send( + { + "type": "http.response.start", + "status": status, + "headers": [ + [key.encode("latin1"), value.encode("latin1")] + for key, value in headers.items() + ], + } + ) + await send({"type": "http.response.body", "body": content.encode("utf8")}) diff --git a/datasette/views/base.py b/datasette/views/base.py index edf81266..70c7f55f 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -12,6 +12,8 @@ from sanic.exceptions import NotFound from sanic.views import HTTPMethodView from sanic.request import Request as SanicRequest +from html import escape + from datasette import __version__ from datasette.plugins import pm from datasette.utils import ( @@ -64,7 +66,10 @@ class AsgiRouter: match = regex.match(scope["path"]) if match is not None: new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) - return await view(new_scope, receive, send) + try: + return await view(new_scope, receive, send) + except Exception as exception: + return await self.handle_500(scope, receive, send, exception) return await self.handle_404(scope, receive, send) async def handle_404(self, scope, receive, send): @@ -77,6 +82,17 @@ class AsgiRouter: ) await send({"type": "http.response.body", "body": b"

404

"}) + async def handle_500(self, scope, receive, send, exception): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + html = "

500

".format(escape(repr(exception))) + await send({"type": "http.response.body", "body": html.encode("utf8")}) + class AsgiView(HTTPMethodView): @classmethod diff --git a/tests/fixtures.py b/tests/fixtures.py index 1be7dc23..5238f8fa 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -24,6 +24,10 @@ class TestResponse: def json(self): return json.loads(self.body) + @property + def text(self): + return self.body.decode("utf8") + class TestClient: def __init__(self, asgi_app): @@ -49,7 +53,9 @@ class TestClient: # First message back should be response.start with headers and status start = await instance.receive_output(2) assert start["type"] == "http.response.start" - headers = start["headers"] + headers = dict( + [(k.decode("utf8"), v.decode("utf8")) for k, v in start["headers"]] + ) status = start["status"] # Now loop until we run out of response.body body = b"" From d8dcc34e36da26ebbfadb36ed8fa15cc2ba01b59 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 22 Jun 2019 20:21:23 -0700 Subject: [PATCH 07/33] All API tests now pass, refs #272 CSV tests still all fail. Also I marked test_trace() as skip because I have not yet re-implemented trace. --- datasette/app.py | 4 ++-- datasette/views/base.py | 12 +++++++++++- tests/fixtures.py | 1 + tests/test_api.py | 11 ++++++++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 61a597ac..4252287e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -638,9 +638,9 @@ class Datasette: ) add_route( RowView.as_asgi(self), - r"///[^/]+)/(?P[^/]+?)/(?P[^/]+?)(?P" + renderer_regex - + r")?$>", + + r")?$", ) self.register_custom_units() diff --git a/datasette/views/base.py b/datasette/views/base.py index 70c7f55f..96bc996a 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -106,7 +106,17 @@ class AsgiView(HTTPMethodView): path = scope.get("raw_path", scope["path"].encode("utf8")) if scope["query_string"]: path = path + b"?" + scope["query_string"] - request = SanicRequest(path, {}, "1.1", scope["method"], None) + request = SanicRequest( + path, + { + "Host": dict(scope.get("headers") or []) + .get(b"host", b"") + .decode("utf8") + }, + "1.1", + scope["method"], + None, + ) class Woo: def get_extra_info(self, key): diff --git a/tests/fixtures.py b/tests/fixtures.py index 5238f8fa..afee2f54 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -47,6 +47,7 @@ class TestClient: "method": "GET", "path": path, "query_string": query_string, + "headers": [[b"host", b"localhost"]], }, ) await instance.send_input({"type": "http.request"}) diff --git a/tests/test_api.py b/tests/test_api.py index 5c1bff15..3655ed42 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -771,8 +771,8 @@ def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pag fetched.extend(response.json["rows"]) path = response.json["next_url"] if path: - assert response.json["next"] assert urllib.parse.urlencode({"_next": response.json["next"]}) in path + path = path.replace("http://localhost", "") assert count < 30, "Possible infinite loop detected" assert expected_rows == len(fetched) @@ -812,6 +812,8 @@ def test_paginate_compound_keys(app_client): response = app_client.get(path) fetched.extend(response.json["rows"]) path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") assert page < 100 assert 1001 == len(fetched) assert 21 == page @@ -833,6 +835,8 @@ def test_paginate_compound_keys_with_extra_filters(app_client): response = app_client.get(path) fetched.extend(response.json["rows"]) path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") assert 2 == page expected = [r[3] for r in generate_compound_rows(1001) if "d" in r[3]] assert expected == [f["content"] for f in fetched] @@ -881,6 +885,8 @@ def test_sortable(app_client, query_string, sort_key, human_description_en): assert human_description_en == response.json["human_description_en"] fetched.extend(response.json["rows"]) path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") assert 5 == page expected = list(generate_sortable_rows(201)) expected.sort(key=sort_key) @@ -1236,6 +1242,8 @@ def test_page_size_matching_max_returned_rows( fetched.extend(response.json["rows"]) assert len(response.json["rows"]) in (1, 50) path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") assert 201 == len(fetched) @@ -1603,6 +1611,7 @@ def test_infinity_returned_as_invalid_json_if_requested(app_client): ] == response.json +@pytest.mark.skip def test_trace(app_client): response = app_client.get("/fixtures/simple_primary_key.json?_trace=1") data = response.json From ca03940f6d9313543e134e11c1e509f931be82c3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 22 Jun 2019 20:58:31 -0700 Subject: [PATCH 08/33] Basic static files now work, refs #272 Not yet using aiofiles so will not correctly handle larger static assets. Still needs security tightening. Still needs tests. But the CSS and JS now work --- datasette/app.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 4252287e..d5f1b43f 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,4 +1,5 @@ import asyncio +from mimetypes import guess_type import collections import hashlib import json @@ -594,15 +595,18 @@ class Datasette: add_route(IndexView.as_asgi(self), r"/(?P(\.jsono?)?$)") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires add_route(favicon, "/favicon.ico") - # # TODO: re-enable the static bits - # app.static("/-/static/", str(app_root / "datasette" / "static")) + + add_route( + asgi_static(app_root / "datasette" / "static"), r"/-/static/(?P.*)$" + ) # for path, dirname in self.static_mounts: # app.static(path, dirname) - # # Mount any plugin static/ directories - # for plugin in get_plugins(pm): - # if plugin["static_path"]: - # modpath = "/-/static-plugins/{}/".format(plugin["name"]) - # app.static(modpath, plugin["static_path"]) + + # Mount any plugin static/ directories + for plugin in get_plugins(pm): + if plugin["static_path"]: + modpath = "/-/static-plugins/{}/(?P.*)$".format(plugin["name"]) + add_route(asgi_static(plugin["static_path"]), modpath) add_route( JsonDataView.as_asgi(self, "metadata.json", lambda: self._metadata), r"/-/metadata(?P(\.json)?)$", @@ -742,3 +746,19 @@ async def asgi_send(send, content, status, headers, content_type="text/plain"): } ) await send({"type": "http.response.body", "body": content.encode("utf8")}) + + +def asgi_static(root_path): + async def inner_static(scope, receive, send): + path = scope["url_route"]["kwargs"]["path"] + # TODO: prevent ../../ style paths + full_path = Path(root_path) / path + await asgi_send( + send, + full_path.open().read(), + 200, + {}, + content_type=guess_type(str(full_path))[0] or "text/plain", + ) + + return inner_static From 8a1a15d7250fde86f9bf3a296861d1dd79fd9eca Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 22 Jun 2019 22:07:41 -0700 Subject: [PATCH 09/33] Use aiofiles for static, refs #272 --- datasette/app.py | 50 +++++++++++++++++++++++++++++++++++----------- setup.py | 1 + tests/fixtures.py | 3 +++ tests/test_html.py | 8 ++++++++ 4 files changed, 50 insertions(+), 12 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index d5f1b43f..af89d7f6 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,4 +1,5 @@ import asyncio +import aiofiles from mimetypes import guess_type import collections import hashlib @@ -733,7 +734,13 @@ async def asgi_send_html(send, html, status=200, headers=None): async def asgi_send(send, content, status, headers, content_type="text/plain"): - # TODO: watch out for Content-Type due to mixed case: + await asgi_start(send, status, headers, content_type) + await send({"type": "http.response.body", "body": content.encode("utf8")}) + + +async def asgi_start(send, status, headers, content_type="text/plain"): + # Remove any existing content-type header + headers = dict([(k, v) for k, v in headers.items() if k.lower() != "content-type"]) headers["content-type"] = content_type await send( { @@ -745,20 +752,39 @@ async def asgi_send(send, content, status, headers, content_type="text/plain"): ], } ) - await send({"type": "http.response.body", "body": content.encode("utf8")}) -def asgi_static(root_path): +def asgi_static(root_path, chunk_size=4096): async def inner_static(scope, receive, send): path = scope["url_route"]["kwargs"]["path"] - # TODO: prevent ../../ style paths - full_path = Path(root_path) / path - await asgi_send( - send, - full_path.open().read(), - 200, - {}, - content_type=guess_type(str(full_path))[0] or "text/plain", - ) + full_path = (Path(root_path) / path).absolute() + # Ensure full_path is within root_path to avoid weird "../" tricks + try: + full_path.relative_to(root_path) + except ValueError: + await asgi_send_html(send, "404", 404) + return + first = True + try: + async with aiofiles.open(full_path, mode="rb") as fp: + if first: + await asgi_start( + send, 200, {}, guess_type(str(full_path))[0] or "text/plain" + ) + first = False + more_body = True + while more_body: + chunk = await fp.read(chunk_size) + more_body = len(chunk) == chunk_size + await send( + { + "type": "http.response.body", + "body": chunk, + "more_body": more_body, + } + ) + except FileNotFoundError: + await asgi_send_html(send, "404", 404) + return return inner_static diff --git a/setup.py b/setup.py index 39ebd21d..3a8201cb 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ setup( "pint==0.8.1", "pluggy>=0.12.0", "uvicorn>=0.8.1", + "aiofiles==0.4.0", ], entry_points=""" [console_scripts] diff --git a/tests/fixtures.py b/tests/fixtures.py index afee2f54..b1f54185 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -52,7 +52,9 @@ class TestClient: ) await instance.send_input({"type": "http.request"}) # First message back should be response.start with headers and status + messages = [] start = await instance.receive_output(2) + messages.append(start) assert start["type"] == "http.response.start" headers = dict( [(k.decode("utf8"), v.decode("utf8")) for k, v in start["headers"]] @@ -62,6 +64,7 @@ class TestClient: body = b"" while True: message = await instance.receive_output(2) + messages.append(message) assert message["type"] == "http.response.body" body += message["body"] if not message.get("more_body"): diff --git a/tests/test_html.py b/tests/test_html.py index 6b673c13..f9ff393f 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -44,6 +44,14 @@ def test_homepage(app_client_two_attached_databases): ] == table_links +def test_static(app_client): + response = app_client.get("/-/static/app2.css") + assert response.status == 404 + response = app_client.get("/-/static/app.css") + assert response.status == 200 + assert "text/css" == response.headers["content-type"] + + def test_memory_database_page(): for client in make_app_client(memory=True): response = client.get("/:memory:") From eb06e59332f16bb301f663df3d99eeeaa0d953d5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 22 Jun 2019 22:42:23 -0700 Subject: [PATCH 10/33] static_mounts mechanism works again, refs #272 --- datasette/app.py | 4 ++-- tests/fixtures.py | 2 ++ tests/test_html.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index af89d7f6..dfba438a 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -600,8 +600,8 @@ class Datasette: add_route( asgi_static(app_root / "datasette" / "static"), r"/-/static/(?P.*)$" ) - # for path, dirname in self.static_mounts: - # app.static(path, dirname) + for path, dirname in self.static_mounts: + add_route(asgi_static(dirname), r"/" + path + "/(?P.*)$") # Mount any plugin static/ directories for plugin in get_plugins(pm): diff --git a/tests/fixtures.py b/tests/fixtures.py index b1f54185..e98bf492 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -82,6 +82,7 @@ def make_app_client( is_immutable=False, extra_databases=None, inspect_data=None, + static_mounts=None, ): with tempfile.TemporaryDirectory() as tmpdir: filepath = os.path.join(tmpdir, filename) @@ -123,6 +124,7 @@ def make_app_client( plugins_dir=plugins_dir, config=config, inspect_data=inspect_data, + static_mounts=static_mounts, ) ds.sqlite_functions.append(("sleep", 1, lambda n: time.sleep(float(n)))) client = TestClient(ds.app()) diff --git a/tests/test_html.py b/tests/test_html.py index f9ff393f..60014691 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -8,6 +8,7 @@ from .fixtures import ( # noqa METADATA, ) import json +import pathlib import pytest import re import urllib.parse @@ -52,6 +53,16 @@ def test_static(app_client): assert "text/css" == response.headers["content-type"] +def test_static_mounts(): + for client in make_app_client( + static_mounts=[("custom-static", str(pathlib.Path(__file__).parent))] + ): + response = client.get("/custom-static/test_html.py") + assert response.status == 200 + response = client.get("/custom-static/not_exists.py") + assert response.status == 404 + + def test_memory_database_page(): for client in make_app_client(memory=True): response = client.get("/:memory:") From ff9efa668ebc33f17ef9b30139960e29906a18fb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 06:50:02 -0700 Subject: [PATCH 11/33] Implemente AsgiStream, CSV tests all now pass #272 --- datasette/utils.py | 4 +-- datasette/views/base.py | 77 ++++++++++++++++++++++++++++++++--------- tests/test_csv.py | 8 ++--- 3 files changed, 67 insertions(+), 22 deletions(-) diff --git a/datasette/utils.py b/datasette/utils.py index 58746be4..5ed8dd12 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -697,13 +697,13 @@ class LimitedWriter: self.limit_bytes = limit_mb * 1024 * 1024 self.bytes_count = 0 - def write(self, bytes): + async def write(self, bytes): self.bytes_count += len(bytes) if self.limit_bytes and (self.bytes_count > self.limit_bytes): raise WriteLimitExceeded( "CSV contains more than {} bytes".format(self.limit_bytes) ) - self.writer.write(bytes) + await self.writer.write(bytes) _infinities = {float("inf"), float("-inf")} diff --git a/datasette/views/base.py b/datasette/views/base.py index 96bc996a..0b02a13b 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -129,17 +129,20 @@ class AsgiView(HTTPMethodView): response = await self.dispatch_request( request, **scope["url_route"]["kwargs"] ) - await send( - { - "type": "http.response.start", - "status": response.status, - "headers": [ - [key.encode("utf-8"), value.encode("utf-8")] - for key, value in response.headers.items() - ], - } - ) - await send({"type": "http.response.body", "body": response.body}) + if hasattr(response, "asgi_send"): + await response.asgi_send(send) + else: + await send( + { + "type": "http.response.start", + "status": response.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in response.headers.items() + ], + } + ) + await send({"type": "http.response.body", "body": response.body}) view.view_class = cls view.__doc__ = cls.__doc__ @@ -148,6 +151,48 @@ class AsgiView(HTTPMethodView): return view +class AsgiStream: + def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): + self.stream_fn = stream_fn + self.status = status + self.headers = headers or {} + self.content_type = content_type + + async def asgi_send(self, send): + # Remove any existing content-type header + headers = dict( + [(k, v) for k, v in self.headers.items() if k.lower() != "content-type"] + ) + headers["content-type"] = self.content_type + await send( + { + "type": "http.response.start", + "status": self.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in headers.items() + ], + } + ) + w = AsgiWriter(send) + await self.stream_fn(w) + await send({"type": "http.response.body", "body": b""}) + + +class AsgiWriter: + def __init__(self, send): + self.send = send + + async def write(self, chunk): + await self.send( + { + "type": "http.response.body", + "body": chunk.encode("utf8"), + "more_body": True, + } + ) + + class BaseView(AsgiView): ds = None @@ -383,13 +428,13 @@ class DataView(BaseView): if not first: data, _, _ = await self.data(request, database, hash, **kwargs) if first: - writer.writerow(headings) + await writer.writerow(headings) first = False next = data.get("next") for row in data["rows"]: if not expanded_columns: # Simple path - writer.writerow(row) + await writer.writerow(row) else: # Look for {"value": "label": } dicts and expand new_row = [] @@ -399,10 +444,10 @@ class DataView(BaseView): new_row.append(cell["label"]) else: new_row.append(cell) - writer.writerow(new_row) + await writer.writerow(new_row) except Exception as e: print("caught this", e) - r.write(str(e)) + await r.write(str(e)) return content_type = "text/plain; charset=utf-8" @@ -416,7 +461,7 @@ class DataView(BaseView): ) headers["Content-Disposition"] = disposition - return response.stream(stream_fn, headers=headers, content_type=content_type) + return AsgiStream(stream_fn, headers=headers, content_type=content_type) async def get_format(self, request, database, args): """ Determine the format of the response from the request, from URL diff --git a/tests/test_csv.py b/tests/test_csv.py index cf0e6732..c3cdc241 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -46,7 +46,7 @@ def test_table_csv(app_client): response = app_client.get("/fixtures/simple_primary_key.csv") assert response.status == 200 assert not response.headers.get("Access-Control-Allow-Origin") - assert "text/plain; charset=utf-8" == response.headers["Content-Type"] + assert "text/plain; charset=utf-8" == response.headers["content-type"] assert EXPECTED_TABLE_CSV == response.text @@ -59,7 +59,7 @@ def test_table_csv_cors_headers(app_client_with_cors): def test_table_csv_with_labels(app_client): response = app_client.get("/fixtures/facetable.csv?_labels=1") assert response.status == 200 - assert "text/plain; charset=utf-8" == response.headers["Content-Type"] + assert "text/plain; charset=utf-8" == response.headers["content-type"] assert EXPECTED_TABLE_WITH_LABELS_CSV == response.text @@ -68,14 +68,14 @@ def test_custom_sql_csv(app_client): "/fixtures.csv?sql=select+content+from+simple_primary_key+limit+2" ) assert response.status == 200 - assert "text/plain; charset=utf-8" == response.headers["Content-Type"] + assert "text/plain; charset=utf-8" == response.headers["content-type"] assert EXPECTED_CUSTOM_CSV == response.text def test_table_csv_download(app_client): response = app_client.get("/fixtures/simple_primary_key.csv?_dl=1") assert response.status == 200 - assert "text/csv; charset=utf-8" == response.headers["Content-Type"] + assert "text/csv; charset=utf-8" == response.headers["content-type"] expected_disposition = 'attachment; filename="simple_primary_key.csv"' assert expected_disposition == response.headers["Content-Disposition"] From b7a00dbde3b9a0b37381111329e462d4f213fd66 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 07:01:38 -0700 Subject: [PATCH 12/33] Include "asgi": "3.0" in /-/versions, refs #272 Mainly so you can tell if a Datasette instance is running on ASGI or not. --- datasette/app.py | 1 + tests/test_api.py | 1 + 2 files changed, 2 insertions(+) diff --git a/datasette/app.py b/datasette/app.py index dfba438a..6ea208e6 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -422,6 +422,7 @@ class Datasette: "full": sys.version, }, "datasette": datasette_version, + "asgi": "3.0", "sqlite": { "version": sqlite_version, "fts_versions": fts_versions, diff --git a/tests/test_api.py b/tests/test_api.py index 3655ed42..6ed990f3 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1197,6 +1197,7 @@ def test_plugins_json(app_client): def test_versions_json(app_client): response = app_client.get("/-/versions.json") assert "python" in response.json + assert "3.0" == response.json.get("asgi") assert "version" in response.json["python"] assert "full" in response.json["python"] assert "datasette" in response.json From 2b5a644dd7b9c12974fbad8916aa943a43cc0fb4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 07:10:42 -0700 Subject: [PATCH 13/33] TestClient obeys allow_redirects again, refs #272 --- tests/fixtures.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index e98bf492..d625fbe5 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -30,11 +30,16 @@ class TestResponse: class TestClient: + max_redirects = 5 + def __init__(self, asgi_app): self.asgi_app = asgi_app @async_to_sync - async def get(self, path, allow_redirects=True): + async def get(self, path, allow_redirects=True, redirect_count=0): + return await self._get(path, allow_redirects, redirect_count) + + async def _get(self, path, allow_redirects=True, redirect_count=0): query_string = b"" if "?" in path: path, _, query_string = path.partition("?") @@ -69,7 +74,18 @@ class TestClient: body += message["body"] if not message.get("more_body"): break - return TestResponse(status, headers, body) + response = TestResponse(status, headers, body) + if allow_redirects and response.status in (301, 302): + assert ( + redirect_count < self.max_redirects + ), "Redirected {} times, max_redirects={}".format( + redirect_count, self.max_redirects + ) + location = response.headers["Location"] + return await self._get( + location, allow_redirects=True, redirect_count=redirect_count + 1 + ) + return response def make_app_client( From d2daa1b9f74ef33c4a819aa1d968e442328ec987 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 07:36:54 -0700 Subject: [PATCH 14/33] Database download works again, refactored utils.py #272 Refactored utils.py into a datasette/utils package, refactored some of the ASGI helper code into datasette/utils/asgi.py --- datasette/app.py | 78 +------ datasette/{utils.py => utils/__init__.py} | 0 datasette/utils/asgi.py | 244 ++++++++++++++++++++++ datasette/views/base.py | 143 +------------ datasette/views/database.py | 7 +- 5 files changed, 250 insertions(+), 222 deletions(-) rename datasette/{utils.py => utils/__init__.py} (100%) create mode 100644 datasette/utils/asgi.py diff --git a/datasette/app.py b/datasette/app.py index 6ea208e6..f86a14b5 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,6 +1,4 @@ import asyncio -import aiofiles -from mimetypes import guess_type import collections import hashlib import json @@ -38,6 +36,7 @@ from .utils import ( sqlite_timelimit, to_css_class, ) +from .utils.asgi import asgi_static, asgi_send_html, asgi_send_json from .tracer import capture_traces, trace from .plugins import pm, DEFAULT_PLUGINS from .version import __version__ @@ -714,78 +713,3 @@ class Datasette: # await database.table_counts(limit=60 * 60 * 1000) return app - - -async def asgi_send_json(send, info, status=200, headers=None): - headers = headers or {} - await asgi_send( - send, - json.dumps(info), - status=status, - headers=headers, - content_type="application/json", - ) - - -async def asgi_send_html(send, html, status=200, headers=None): - headers = headers or {} - await asgi_send( - send, html, status=status, headers=headers, content_type="text/html" - ) - - -async def asgi_send(send, content, status, headers, content_type="text/plain"): - await asgi_start(send, status, headers, content_type) - await send({"type": "http.response.body", "body": content.encode("utf8")}) - - -async def asgi_start(send, status, headers, content_type="text/plain"): - # Remove any existing content-type header - headers = dict([(k, v) for k, v in headers.items() if k.lower() != "content-type"]) - headers["content-type"] = content_type - await send( - { - "type": "http.response.start", - "status": status, - "headers": [ - [key.encode("latin1"), value.encode("latin1")] - for key, value in headers.items() - ], - } - ) - - -def asgi_static(root_path, chunk_size=4096): - async def inner_static(scope, receive, send): - path = scope["url_route"]["kwargs"]["path"] - full_path = (Path(root_path) / path).absolute() - # Ensure full_path is within root_path to avoid weird "../" tricks - try: - full_path.relative_to(root_path) - except ValueError: - await asgi_send_html(send, "404", 404) - return - first = True - try: - async with aiofiles.open(full_path, mode="rb") as fp: - if first: - await asgi_start( - send, 200, {}, guess_type(str(full_path))[0] or "text/plain" - ) - first = False - more_body = True - while more_body: - chunk = await fp.read(chunk_size) - more_body = len(chunk) == chunk_size - await send( - { - "type": "http.response.body", - "body": chunk, - "more_body": more_body, - } - ) - except FileNotFoundError: - await asgi_send_html(send, "404", 404) - return - - return inner_static diff --git a/datasette/utils.py b/datasette/utils/__init__.py similarity index 100% rename from datasette/utils.py rename to datasette/utils/__init__.py diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py new file mode 100644 index 00000000..14ade563 --- /dev/null +++ b/datasette/utils/asgi.py @@ -0,0 +1,244 @@ +import json +from mimetypes import guess_type +from sanic.views import HTTPMethodView +from pathlib import Path +import re +import aiofiles + + +class AsgiRouter: + def __init__(self, routes=None): + routes = routes or [] + self.routes = [ + # Compile any strings to regular expressions + ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) + for pattern, view in routes + ] + + async def __call__(self, scope, receive, send): + for regex, view in self.routes: + match = regex.match(scope["path"]) + if match is not None: + new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) + try: + return await view(new_scope, receive, send) + except Exception as exception: + return await self.handle_500(scope, receive, send, exception) + return await self.handle_404(scope, receive, send) + + async def handle_404(self, scope, receive, send): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + await send({"type": "http.response.body", "body": b"

404

"}) + + async def handle_500(self, scope, receive, send, exception): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + html = "

500

".format(escape(repr(exception))) + await send({"type": "http.response.body", "body": html.encode("utf8")}) + + +class AsgiView(HTTPMethodView): + @classmethod + def as_asgi(cls, *class_args, **class_kwargs): + async def view(scope, receive, send): + # Uses scope to create a Sanic-compatible request object, + # then dispatches that to self.get(...) or self.options(...) + # along with keyword arguments that were already tucked + # into scope["url_route"]["kwargs"] by the router + # https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter + path = scope.get("raw_path", scope["path"].encode("utf8")) + if scope["query_string"]: + path = path + b"?" + scope["query_string"] + request = SanicRequest( + path, + { + "Host": dict(scope.get("headers") or []) + .get(b"host", b"") + .decode("utf8") + }, + "1.1", + scope["method"], + None, + ) + + # TODO: Remove need for this + class Woo: + def get_extra_info(self, key): + return False + + request.app = Woo() + request.app.websocket_enabled = False + request.transport = Woo() + self = view.view_class(*class_args, **class_kwargs) + response = await self.dispatch_request( + request, **scope["url_route"]["kwargs"] + ) + if hasattr(response, "asgi_send"): + await response.asgi_send(send) + else: + await send( + { + "type": "http.response.start", + "status": response.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in response.headers.items() + ], + } + ) + await send({"type": "http.response.body", "body": response.body}) + + view.view_class = cls + view.__doc__ = cls.__doc__ + view.__module__ = cls.__module__ + view.__name__ = cls.__name__ + return view + + +class AsgiStream: + def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): + self.stream_fn = stream_fn + self.status = status + self.headers = headers or {} + self.content_type = content_type + + async def asgi_send(self, send): + # Remove any existing content-type header + headers = dict( + [(k, v) for k, v in self.headers.items() if k.lower() != "content-type"] + ) + headers["content-type"] = self.content_type + await send( + { + "type": "http.response.start", + "status": self.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in headers.items() + ], + } + ) + w = AsgiWriter(send) + await self.stream_fn(w) + await send({"type": "http.response.body", "body": b""}) + + +class AsgiWriter: + def __init__(self, send): + self.send = send + + async def write(self, chunk): + await self.send( + { + "type": "http.response.body", + "body": chunk.encode("utf8"), + "more_body": True, + } + ) + + +async def asgi_send_json(send, info, status=200, headers=None): + headers = headers or {} + await asgi_send( + send, + json.dumps(info), + status=status, + headers=headers, + content_type="application/json", + ) + + +async def asgi_send_html(send, html, status=200, headers=None): + headers = headers or {} + await asgi_send( + send, html, status=status, headers=headers, content_type="text/html" + ) + + +async def asgi_send(send, content, status, headers, content_type="text/plain"): + await asgi_start(send, status, headers, content_type) + await send({"type": "http.response.body", "body": content.encode("utf8")}) + + +async def asgi_start(send, status, headers, content_type="text/plain"): + # Remove any existing content-type header + headers = dict([(k, v) for k, v in headers.items() if k.lower() != "content-type"]) + headers["content-type"] = content_type + await send( + { + "type": "http.response.start", + "status": status, + "headers": [ + [key.encode("latin1"), value.encode("latin1")] + for key, value in headers.items() + ], + } + ) + + +async def asgi_send_file( + send, filepath, filename=None, content_type=None, chunk_size=4096 +): + headers = {} + if filename: + headers["Content-Disposition"] = 'attachment; filename="{}"'.format(filename) + first = True + async with aiofiles.open(filepath, mode="rb") as fp: + if first: + await asgi_start( + send, + 200, + headers, + content_type or guess_type(str(filepath))[0] or "text/plain", + ) + first = False + more_body = True + while more_body: + chunk = await fp.read(chunk_size) + more_body = len(chunk) == chunk_size + await send( + {"type": "http.response.body", "body": chunk, "more_body": more_body} + ) + + +def asgi_static(root_path, chunk_size=4096, headers=None, content_type=None): + async def inner_static(scope, receive, send): + path = scope["url_route"]["kwargs"]["path"] + full_path = (Path(root_path) / path).absolute() + # Ensure full_path is within root_path to avoid weird "../" tricks + try: + full_path.relative_to(root_path) + except ValueError: + await asgi_send_html(send, "404", 404) + return + first = True + try: + await asgi_send_file(send, full_path, chunk_size=chunk_size) + except FileNotFoundError: + await asgi_send_html(send, "404", 404) + return + + return inner_static + + +class AsgiFileDownload: + def __init__( + self, filepath, filename=None, content_type="application/octet-stream" + ): + self.filepath = filepath + self.filename = filename + self.content_type = content_type + + async def asgi_send(self, send): + return await asgi_send_file(send, self.filepath, content_type=self.content_type) diff --git a/datasette/views/base.py b/datasette/views/base.py index 0b02a13b..a2d6571f 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -9,7 +9,6 @@ import jinja2 import pint from sanic import response from sanic.exceptions import NotFound -from sanic.views import HTTPMethodView from sanic.request import Request as SanicRequest from html import escape @@ -29,6 +28,7 @@ from datasette.utils import ( sqlite3, to_css_class, ) +from datasette.utils.asgi import AsgiStream, AsgiWriter, AsgiRouter, AsgiView ureg = pint.UnitRegistry() @@ -52,147 +52,6 @@ class DatasetteError(Exception): self.messagge_is_html = messagge_is_html -class AsgiRouter: - def __init__(self, routes=None): - routes = routes or [] - self.routes = [ - # Compile any strings to regular expressions - ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) - for pattern, view in routes - ] - - async def __call__(self, scope, receive, send): - for regex, view in self.routes: - match = regex.match(scope["path"]) - if match is not None: - new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) - try: - return await view(new_scope, receive, send) - except Exception as exception: - return await self.handle_500(scope, receive, send, exception) - return await self.handle_404(scope, receive, send) - - async def handle_404(self, scope, receive, send): - await send( - { - "type": "http.response.start", - "status": 404, - "headers": [[b"content-type", b"text/html"]], - } - ) - await send({"type": "http.response.body", "body": b"

404

"}) - - async def handle_500(self, scope, receive, send, exception): - await send( - { - "type": "http.response.start", - "status": 404, - "headers": [[b"content-type", b"text/html"]], - } - ) - html = "

500

".format(escape(repr(exception))) - await send({"type": "http.response.body", "body": html.encode("utf8")}) - - -class AsgiView(HTTPMethodView): - @classmethod - def as_asgi(cls, *class_args, **class_kwargs): - async def view(scope, receive, send): - # Uses scope to create a Sanic-compatible request object, - # then dispatches that to self.get(...) or self.options(...) - # along with keyword arguments that were already tucked - # into scope["url_route"]["kwargs"] by the router - # https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter - path = scope.get("raw_path", scope["path"].encode("utf8")) - if scope["query_string"]: - path = path + b"?" + scope["query_string"] - request = SanicRequest( - path, - { - "Host": dict(scope.get("headers") or []) - .get(b"host", b"") - .decode("utf8") - }, - "1.1", - scope["method"], - None, - ) - - class Woo: - def get_extra_info(self, key): - return False - - request.app = Woo() - request.app.websocket_enabled = False - request.transport = Woo() - self = view.view_class(*class_args, **class_kwargs) - response = await self.dispatch_request( - request, **scope["url_route"]["kwargs"] - ) - if hasattr(response, "asgi_send"): - await response.asgi_send(send) - else: - await send( - { - "type": "http.response.start", - "status": response.status, - "headers": [ - [key.encode("utf-8"), value.encode("utf-8")] - for key, value in response.headers.items() - ], - } - ) - await send({"type": "http.response.body", "body": response.body}) - - view.view_class = cls - view.__doc__ = cls.__doc__ - view.__module__ = cls.__module__ - view.__name__ = cls.__name__ - return view - - -class AsgiStream: - def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): - self.stream_fn = stream_fn - self.status = status - self.headers = headers or {} - self.content_type = content_type - - async def asgi_send(self, send): - # Remove any existing content-type header - headers = dict( - [(k, v) for k, v in self.headers.items() if k.lower() != "content-type"] - ) - headers["content-type"] = self.content_type - await send( - { - "type": "http.response.start", - "status": self.status, - "headers": [ - [key.encode("utf-8"), value.encode("utf-8")] - for key, value in headers.items() - ], - } - ) - w = AsgiWriter(send) - await self.stream_fn(w) - await send({"type": "http.response.body", "body": b""}) - - -class AsgiWriter: - def __init__(self, send): - self.send = send - - async def write(self, chunk): - await self.send( - { - "type": "http.response.body", - "body": chunk.encode("utf8"), - "more_body": True, - } - ) - - class BaseView(AsgiView): ds = None diff --git a/datasette/views/database.py b/datasette/views/database.py index a5b606f1..4809fef0 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -3,8 +3,9 @@ import os from sanic import response from datasette.utils import to_css_class, validate_sql_select +from datasette.utils.asgi import AsgiFileDownload -from .base import DataView, DatasetteError +from .base import DatasetteError, DataView class DatabaseView(DataView): @@ -79,8 +80,8 @@ class DatabaseDownload(DataView): if not db.path: raise DatasetteError("Cannot download database", status=404) filepath = db.path - return await response.file_stream( + return AsgiFileDownload( filepath, filename=os.path.basename(filepath), - mime_type="application/octet-stream", + content_type="application/octet-stream", ) From 5bd510b01adae3f719e4426b9bfbc346a946ba5c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 07:55:55 -0700 Subject: [PATCH 15/33] Re-implemented redirect on 404 with trailing slash, refs #272 All of the tests now pass --- datasette/app.py | 24 ++++++++++++------------ datasette/utils/asgi.py | 11 +++++++++++ datasette/views/base.py | 1 - 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index f86a14b5..22584379 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -36,7 +36,7 @@ from .utils import ( sqlite_timelimit, to_css_class, ) -from .utils.asgi import asgi_static, asgi_send_html, asgi_send_json +from .utils.asgi import asgi_static, asgi_send_html, asgi_send_json, asgi_send_redirect from .tracer import capture_traces, trace from .plugins import pm, DEFAULT_PLUGINS from .version import __version__ @@ -652,6 +652,17 @@ class Datasette: outer_self = self class DatasetteRouter(AsgiRouter): + async def handle_404(self, scope, receive, send): + # If URL has a trailing slash, redirect to URL without it + path = scope.get("raw_path", scope["path"].encode("utf8")) + if path.endswith(b"/"): + path = path.rstrip(b"/") + if scope["query_string"]: + path += b"?" + scope["query_string"] + await asgi_send_redirect(send, path.decode("latin1")) + else: + await super().handle_404(scope, receive, send) + async def handle_500(self, scope, receive, send, exception): title = None help = None @@ -693,17 +704,6 @@ class Datasette: ) app = DatasetteRouter(routes) - # On 404 with a trailing slash redirect to path without that slash: - # pylint: disable=unused-variable - # TODO: re-enable this - # @app.middleware("response") - # def redirect_on_404_with_trailing_slash(request, original_response): - # if original_response.status == 404 and request.path.endswith("/"): - # path = request.path.rstrip("/") - # if request.query_string: - # path = "{}?{}".format(path, request.query_string) - # return response.redirect(path) - # First time server starts up, calculate table counts for immutable databases # TODO: re-enable this mechanism # @app.listener("before_server_start") diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 14ade563..2aeeb836 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -1,6 +1,7 @@ import json from mimetypes import guess_type from sanic.views import HTTPMethodView +from sanic.request import Request as SanicRequest from pathlib import Path import re import aiofiles @@ -166,6 +167,16 @@ async def asgi_send_html(send, html, status=200, headers=None): ) +async def asgi_send_redirect(send, location, status=302): + await asgi_send( + send, + "", + status=status, + headers={"Location": location}, + content_type="text/html", + ) + + async def asgi_send(send, content, status, headers, content_type="text/plain"): await asgi_start(send, status, headers, content_type) await send({"type": "http.response.body", "body": content.encode("utf8")}) diff --git a/datasette/views/base.py b/datasette/views/base.py index a2d6571f..4bf251fb 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -9,7 +9,6 @@ import jinja2 import pint from sanic import response from sanic.exceptions import NotFound -from sanic.request import Request as SanicRequest from html import escape From b97cd53a48598a559e425c05cddcdf6d4d59a2e8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 08:03:42 -0700 Subject: [PATCH 16/33] Fix for Python 3.5 - refs #272 --- tests/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index d625fbe5..875382d0 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -22,7 +22,7 @@ class TestResponse: @property def json(self): - return json.loads(self.body) + return json.loads(self.text) @property def text(self): From 3bd5e14bc1c54716824b31e7fc3586c39076ca8c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 08:09:41 -0700 Subject: [PATCH 17/33] Fix for Python 3.5 --- datasette/utils/asgi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 2aeeb836..8add01b1 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -205,7 +205,7 @@ async def asgi_send_file( if filename: headers["Content-Disposition"] = 'attachment; filename="{}"'.format(filename) first = True - async with aiofiles.open(filepath, mode="rb") as fp: + async with aiofiles.open(str(filepath), mode="rb") as fp: if first: await asgi_start( send, From cbd0c014eced9c816528115d0e37022e96460f84 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 08:29:02 -0700 Subject: [PATCH 18/33] Hoping this will allow github to resolve the merge conflict with master --- datasette/utils/__init__.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 5ed8dd12..1173d798 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -261,6 +261,27 @@ def escape_sqlite(s): return "[{}]".format(s) +_decode_path_component_re = re.compile(r"U\+([\da-h]{4})", re.IGNORECASE) +_encode_path_component_re = re.compile( + "[{}]".format( + "".join( + re.escape(c) + for c in (";", "/", "?", ":", "@", "&", "=", "+", "$", ",", "~") + ) + ) +) + + +def decode_path_component(table_name): + return _decode_path_component_re.sub(lambda m: chr(int(m.group(1), 16)), table_name) + + +def encode_path_component(table_name): + return _encode_path_component_re.sub( + lambda m: "U+{0:0{1}x}".format(ord(m.group(0)), 4).upper(), table_name + ) + + def make_dockerfile( files, metadata_file, From 4b6b409d853a1fab1dd76226517171aa882c83c9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 08:59:10 -0700 Subject: [PATCH 19/33] Test harness simulates raw_path/path properly This causes tests to fail. --- tests/fixtures.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 875382d0..c5f2bfd4 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -12,6 +12,7 @@ import sys import string import tempfile import time +from urllib.parse import unquote class TestResponse: @@ -50,7 +51,8 @@ class TestClient: "type": "http", "http_version": "1.0", "method": "GET", - "path": path, + "path": unquote(path), + "raw_path": path.encode("ascii"), "query_string": query_string, "headers": [[b"host", b"localhost"]], }, From 1208bcbfe84e852225c801a83cdb4c3b1d9956ca Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 09:06:11 -0700 Subject: [PATCH 20/33] Handle tables%2fwith%2fslashes --- datasette/utils/asgi.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 8add01b1..c7afa21c 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -17,8 +17,10 @@ class AsgiRouter: ] async def __call__(self, scope, receive, send): + # Because we care about "foo/bar" v.s. "foo%2Fbar" we decode raw_path ourselves + path = scope["raw_path"].decode("ascii") for regex, view in self.routes: - match = regex.match(scope["path"]) + match = regex.match(path) if match is not None: new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) try: From 1e8419bde47ff5a287d054084e70ae0632a14456 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 12:52:10 -0700 Subject: [PATCH 21/33] Use correct content-type header, refs #272 --- datasette/renderer.py | 2 +- datasette/utils/asgi.py | 7 +++++-- datasette/views/index.py | 2 +- datasette/views/special.py | 4 +++- tests/test_api.py | 1 + tests/test_html.py | 1 + 6 files changed, 12 insertions(+), 5 deletions(-) diff --git a/datasette/renderer.py b/datasette/renderer.py index 417fecb5..349c2922 100644 --- a/datasette/renderer.py +++ b/datasette/renderer.py @@ -88,5 +88,5 @@ def json_renderer(args, data, view_name): content_type = "text/plain" else: body = json.dumps(data, cls=CustomJSONEncoder) - content_type = "application/json" + content_type = "application/json; charset=utf-8" return {"body": body, "status_code": status_code, "content_type": content_type} diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index c7afa21c..2ae8ab6e 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -90,13 +90,16 @@ class AsgiView(HTTPMethodView): if hasattr(response, "asgi_send"): await response.asgi_send(send) else: + headers = {} + headers.update(response.headers) + headers["content-type"] = response.content_type await send( { "type": "http.response.start", "status": response.status, "headers": [ [key.encode("utf-8"), value.encode("utf-8")] - for key, value in response.headers.items() + for key, value in headers.items() ], } ) @@ -158,7 +161,7 @@ async def asgi_send_json(send, info, status=200, headers=None): json.dumps(info), status=status, headers=headers, - content_type="application/json", + content_type="application/json; charset=utf-8", ) diff --git a/datasette/views/index.py b/datasette/views/index.py index c9d15c36..c01ae811 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -106,7 +106,7 @@ class IndexView(BaseView): headers["Access-Control-Allow-Origin"] = "*" return response.HTTPResponse( json.dumps({db["name"]: db for db in databases}, cls=CustomJSONEncoder), - content_type="application/json", + content_type="application/json; charset=utf-8", headers=headers, ) else: diff --git a/datasette/views/special.py b/datasette/views/special.py index 91b577fc..1e0c2032 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -18,7 +18,9 @@ class JsonDataView(BaseView): if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" return response.HTTPResponse( - json.dumps(data), content_type="application/json", headers=headers + json.dumps(data), + content_type="application/json; charset=utf-8", + headers=headers, ) else: diff --git a/tests/test_api.py b/tests/test_api.py index 6ed990f3..96c16175 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -22,6 +22,7 @@ import urllib def test_homepage(app_client): response = app_client.get("/.json") assert response.status == 200 + assert "application/json; charset=utf-8" == response.headers["content-type"] assert response.json.keys() == {"fixtures": 0}.keys() d = response.json["fixtures"] assert d["name"] == "fixtures" diff --git a/tests/test_html.py b/tests/test_html.py index 60014691..7a5c32c4 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -17,6 +17,7 @@ import urllib.parse def test_homepage(app_client_two_attached_databases): response = app_client_two_attached_databases.get("/") assert response.status == 200 + assert "text/html; charset=utf-8" == response.headers["content-type"] soup = Soup(response.body, "html.parser") assert "Datasette Fixtures" == soup.find("h1").text assert ( From b1c6db4b8f80b48a45ff9bd3dbba70ae427f8343 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 12:55:16 -0700 Subject: [PATCH 22/33] Re-implemented tracing, refs #272 --- datasette/app.py | 6 ++-- datasette/tracer.py | 75 +++++++++++++++++++++++++++++++++++++++++++++ tests/test_api.py | 1 - 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 22584379..4c85a78e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -37,7 +37,7 @@ from .utils import ( to_css_class, ) from .utils.asgi import asgi_static, asgi_send_html, asgi_send_json, asgi_send_redirect -from .tracer import capture_traces, trace +from .tracer import capture_traces, trace, AsgiTracer from .plugins import pm, DEFAULT_PLUGINS from .version import __version__ @@ -127,7 +127,7 @@ CONFIG_OPTIONS = ( DEFAULT_CONFIG = {option.name: option.default for option in CONFIG_OPTIONS} -async def favicon(scope, recieve, send): +async def favicon(scope, receive, send): await send( { "type": "http.response.start", @@ -712,4 +712,4 @@ class Datasette: # if not database.is_mutable: # await database.table_counts(limit=60 * 60 * 1000) - return app + return AsgiTracer(app) diff --git a/datasette/tracer.py b/datasette/tracer.py index c6fe0a00..4a46f1e6 100644 --- a/datasette/tracer.py +++ b/datasette/tracer.py @@ -1,6 +1,7 @@ import asyncio from contextlib import contextmanager import time +import json import traceback tracers = {} @@ -53,3 +54,77 @@ def capture_traces(tracer): tracers[task_id] = tracer yield del tracers[task_id] + + +class AsgiTracer: + # If the body is larger than this we don't attempt to append the trace + max_body_bytes = 1024 * 256 # 256 KB + + def __init__(self, app): + self.app = app + + async def __call__(self, scope, receive, send): + if b"_trace=1" not in scope.get("query_string", b"").split(b"&"): + await self.app(scope, receive, send) + return + trace_start = time.time() + traces = [] + + accumulated_body = b"" + size_limit_exceeded = False + response_headers = [] + + async def wrapped_send(message): + nonlocal accumulated_body, size_limit_exceeded, response_headers + if message["type"] == "http.response.start": + response_headers = message["headers"] + await send(message) + return + + if message["type"] != "http.response.body" or size_limit_exceeded: + await send(message) + return + + # Accumulate body until the end or until size is exceeded + accumulated_body += message["body"] + if len(accumulated_body) > self.max_body_bytes: + await send( + { + "type": "http.response.body", + "body": accumulated_body, + "more_body": True, + } + ) + size_limit_exceeded = True + return + + if not message.get("more_body"): + # We have all the body - modify it and send the result + # TODO: What to do about Content-Type or other cases? + trace_info = { + "request_duration_ms": 1000 * (time.time() - trace_start), + "sum_trace_duration_ms": sum(t["duration_ms"] for t in traces), + "num_traces": len(traces), + "traces": traces, + } + try: + content_type = [ + v.decode("utf8") + for k, v in response_headers + if k.lower() == b"content-type" + ][0] + except IndexError: + content_type = "" + if "text/html" in content_type and b"" in accumulated_body: + extra = json.dumps(trace_info, indent=2) + extra_html = "
{}
".format(extra).encode("utf8") + accumulated_body = accumulated_body.replace(b"", extra_html) + elif "json" in content_type and accumulated_body.startswith(b"{"): + data = json.loads(accumulated_body.decode("utf8")) + if "_trace" not in data: + data["_trace"] = trace_info + accumulated_body = json.dumps(data).encode("utf8") + await send({"type": "http.response.body", "body": accumulated_body}) + + with capture_traces(traces): + await self.app(scope, receive, wrapped_send) diff --git a/tests/test_api.py b/tests/test_api.py index 96c16175..a32ed5e3 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1613,7 +1613,6 @@ def test_infinity_returned_as_invalid_json_if_requested(app_client): ] == response.json -@pytest.mark.skip def test_trace(app_client): response = app_client.get("/fixtures/simple_primary_key.json?_trace=1") data = response.json From 28c31b228d93d22facff728d7199bcf9bc1310d4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 13:31:03 -0700 Subject: [PATCH 23/33] Implemented ASGI lifespan #272 Also did a little bit of lint cleanup --- datasette/app.py | 30 ++++++++++++++---------------- datasette/tracer.py | 6 +++--- datasette/utils/asgi.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 19 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 4c85a78e..4552d9d4 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,11 +1,9 @@ import asyncio import collections import hashlib -import json import os import sys import threading -import time import traceback import urllib.parse from concurrent import futures @@ -14,7 +12,6 @@ from pathlib import Path import click from markupsafe import Markup from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader -from sanic import Sanic, response from sanic.exceptions import InvalidUsage, NotFound from .views.base import DatasetteError, ureg, AsgiRouter @@ -36,8 +33,14 @@ from .utils import ( sqlite_timelimit, to_css_class, ) -from .utils.asgi import asgi_static, asgi_send_html, asgi_send_json, asgi_send_redirect -from .tracer import capture_traces, trace, AsgiTracer +from .utils.asgi import ( + AsgiLifespan, + asgi_static, + asgi_send_html, + asgi_send_json, + asgi_send_redirect, +) +from .tracer import trace, AsgiTracer from .plugins import pm, DEFAULT_PLUGINS from .version import __version__ @@ -553,7 +556,6 @@ class Datasette: def app(self): "Returns an ASGI app function that serves the whole of Datasette" - # TODO: re-implement ?_trace= mechanism, see class TracingSanic default_templates = str(app_root / "datasette" / "templates") template_paths = [] if self.template_dir: @@ -665,7 +667,6 @@ class Datasette: async def handle_500(self, scope, receive, send, exception): title = None - help = None if isinstance(exception, NotFound): status = 404 info = {} @@ -703,13 +704,10 @@ class Datasette: send, template.render(info), status=status, headers=headers ) - app = DatasetteRouter(routes) - # First time server starts up, calculate table counts for immutable databases - # TODO: re-enable this mechanism - # @app.listener("before_server_start") - # async def setup_db(app, loop): - # for dbname, database in self.databases.items(): - # if not database.is_mutable: - # await database.table_counts(limit=60 * 60 * 1000) + async def setup_db(): + # First time server starts up, calculate table counts for immutable databases + for dbname, database in self.databases.items(): + if not database.is_mutable: + await database.table_counts(limit=60 * 60 * 1000) - return AsgiTracer(app) + return AsgiLifespan(AsgiTracer(DatasetteRouter(routes)), on_startup=setup_db) diff --git a/datasette/tracer.py b/datasette/tracer.py index 4a46f1e6..e46a6fda 100644 --- a/datasette/tracer.py +++ b/datasette/tracer.py @@ -33,15 +33,15 @@ def trace(type, **kwargs): start = time.time() yield end = time.time() - trace = { + trace_info = { "type": type, "start": start, "end": end, "duration_ms": (end - start) * 1000, "traceback": traceback.format_list(traceback.extract_stack(limit=6)[:-3]), } - trace.update(kwargs) - tracer.append(trace) + trace_info.update(kwargs) + tracer.append(trace_info) @contextmanager diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 2ae8ab6e..be53627f 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -3,6 +3,7 @@ from mimetypes import guess_type from sanic.views import HTTPMethodView from sanic.request import Request as SanicRequest from pathlib import Path +from html import escape import re import aiofiles @@ -51,6 +52,36 @@ class AsgiRouter: await send({"type": "http.response.body", "body": html.encode("utf8")}) +class AsgiLifespan: + def __init__(self, app, on_startup=None, on_shutdown=None): + print("Wrapping {}".format(app)) + self.app = app + on_startup = on_startup or [] + on_shutdown = on_shutdown or [] + if not isinstance(on_startup or [], list): + on_startup = [on_startup] + if not isinstance(on_shutdown or [], list): + on_shutdown = [on_shutdown] + self.on_startup = on_startup + self.on_shutdown = on_shutdown + + async def __call__(self, scope, receive, send): + if scope["type"] == "lifespan": + while True: + message = await receive() + if message["type"] == "lifespan.startup": + for fn in self.on_startup: + await fn() + await send({"type": "lifespan.startup.complete"}) + elif message["type"] == "lifespan.shutdown": + for fn in self.on_shutdown: + await fn() + await send({"type": "lifespan.shutdown.complete"}) + return + else: + await self.app(scope, receive, send) + + class AsgiView(HTTPMethodView): @classmethod def as_asgi(cls, *class_args, **class_kwargs): From 620f0aa4f8aeeadb6ef2e70238b1b746f87d65df Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 14:55:19 -0700 Subject: [PATCH 24/33] Cleaned up favicon() --- datasette/app.py | 10 ++-------- datasette/utils/asgi.py | 5 +++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 4552d9d4..28b5e857 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -36,6 +36,7 @@ from .utils import ( from .utils.asgi import ( AsgiLifespan, asgi_static, + asgi_send, asgi_send_html, asgi_send_json, asgi_send_redirect, @@ -131,14 +132,7 @@ DEFAULT_CONFIG = {option.name: option.default for option in CONFIG_OPTIONS} async def favicon(scope, receive, send): - await send( - { - "type": "http.response.start", - "status": 200, - "headers": [[b"content-type", b"text/plain"]], - } - ) - await send({"type": "http.response.body", "body": b""}) + await asgi_send(send, "", 200) class Datasette: diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index be53627f..ac7d62a4 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -213,12 +213,13 @@ async def asgi_send_redirect(send, location, status=302): ) -async def asgi_send(send, content, status, headers, content_type="text/plain"): +async def asgi_send(send, content, status, headers=None, content_type="text/plain"): await asgi_start(send, status, headers, content_type) await send({"type": "http.response.body", "body": content.encode("utf8")}) -async def asgi_start(send, status, headers, content_type="text/plain"): +async def asgi_start(send, status, headers=None, content_type="text/plain"): + headers = headers or {} # Remove any existing content-type header headers = dict([(k, v) for k, v in headers.items() if k.lower() != "content-type"]) headers["content-type"] = content_type From 79950c9643241791c2ff554db3b846d12b965c8a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 15:06:43 -0700 Subject: [PATCH 25/33] Implemented HEAD requests, removed Sanic InvalidUsage --- datasette/app.py | 6 +----- datasette/views/base.py | 5 +++++ tests/fixtures.py | 8 ++++---- tests/test_html.py | 5 +++++ 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 28b5e857..978fb7e7 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -12,7 +12,7 @@ from pathlib import Path import click from markupsafe import Markup from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader -from sanic.exceptions import InvalidUsage, NotFound +from sanic.exceptions import NotFound from .views.base import DatasetteError, ureg, AsgiRouter from .views.database import DatabaseDownload, DatabaseView @@ -665,10 +665,6 @@ class Datasette: status = 404 info = {} message = exception.args[0] - elif isinstance(exception, InvalidUsage): - status = 405 - info = {} - message = exception.args[0] elif isinstance(exception, DatasetteError): status = exception.status info = exception.error_dict diff --git a/datasette/views/base.py b/datasette/views/base.py index 4bf251fb..8d514688 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -54,6 +54,11 @@ class DatasetteError(Exception): class BaseView(AsgiView): ds = None + async def head(self, *args, **kwargs): + response = await self.get(*args, **kwargs) + response.body = b"" + return response + def _asset_urls(self, key, template, context): # Flatten list-of-lists from plugins: seen_urls = set() diff --git a/tests/fixtures.py b/tests/fixtures.py index c5f2bfd4..00140f50 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -37,10 +37,10 @@ class TestClient: self.asgi_app = asgi_app @async_to_sync - async def get(self, path, allow_redirects=True, redirect_count=0): - return await self._get(path, allow_redirects, redirect_count) + async def get(self, path, allow_redirects=True, redirect_count=0, method="GET"): + return await self._get(path, allow_redirects, redirect_count, method) - async def _get(self, path, allow_redirects=True, redirect_count=0): + async def _get(self, path, allow_redirects=True, redirect_count=0, method="GET"): query_string = b"" if "?" in path: path, _, query_string = path.partition("?") @@ -50,7 +50,7 @@ class TestClient: { "type": "http", "http_version": "1.0", - "method": "GET", + "method": method, "path": unquote(path), "raw_path": path.encode("ascii"), "query_string": query_string, diff --git a/tests/test_html.py b/tests/test_html.py index 7a5c32c4..32fa2fe3 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -46,6 +46,11 @@ def test_homepage(app_client_two_attached_databases): ] == table_links +def test_http_head(app_client): + response = app_client.get("/", method="HEAD") + assert response.status == 200 + + def test_static(app_client): response = app_client.get("/-/static/app2.css") assert response.status == 404 From 1e0998ed2dc80d77b3d191ec09318f0f6fea5aff Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 15:13:25 -0700 Subject: [PATCH 26/33] Removed Sanic HTTPMethodView --- datasette/utils/asgi.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index ac7d62a4..1cbdd583 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -1,6 +1,5 @@ import json from mimetypes import guess_type -from sanic.views import HTTPMethodView from sanic.request import Request as SanicRequest from pathlib import Path from html import escape @@ -82,7 +81,11 @@ class AsgiLifespan: await self.app(scope, receive, send) -class AsgiView(HTTPMethodView): +class AsgiView: + def dispatch_request(self, request, *args, **kwargs): + handler = getattr(self, request.method.lower(), None) + return handler(request, *args, **kwargs) + @classmethod def as_asgi(cls, *class_args, **class_kwargs): async def view(scope, receive, send): From 979ae4f9164bdae6db100fa7835b2f432160e7fd Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 15:23:53 -0700 Subject: [PATCH 27/33] Replaced sanic.request.RequestParameters --- datasette/utils/__init__.py | 13 +++++++++++++ datasette/views/table.py | 5 ++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 5ed8dd12..bdebfc30 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -741,3 +741,16 @@ def format_bytes(bytes): return "{} {}".format(int(current), unit) else: return "{:.1f} {}".format(current, unit) + + +class RequestParameters(dict): + def get(self, name, default=None): + "Return first value in the list, if available" + try: + return super().get(name)[0] + except KeyError: + return default + + def getlist(self, name, default=None): + "Return full list" + return super().get(name, default) diff --git a/datasette/views/table.py b/datasette/views/table.py index 14b8743a..b5765ca4 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -4,12 +4,12 @@ import json import jinja2 from sanic.exceptions import NotFound -from sanic.request import RequestParameters from datasette.plugins import pm from datasette.utils import ( CustomRow, QueryInterrupted, + RequestParameters, append_querystring, compound_keys_after_sql, escape_sqlite, @@ -219,8 +219,7 @@ class TableView(RowTableShared): if is_view: order_by = "" - # We roll our own query_string decoder because by default Sanic - # drops anything with an empty value e.g. ?name__exact= + # Ensure we don't drop anything with an empty value e.g. ?name__exact= args = RequestParameters( urllib.parse.parse_qs(request.query_string, keep_blank_values=True) ) From 3c4d4f35351b1691993984503a6380d2fc635681 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 15:28:14 -0700 Subject: [PATCH 28/33] Replaced sanic.exceptions.NotFound --- datasette/app.py | 2 +- datasette/utils/asgi.py | 4 ++++ datasette/views/base.py | 3 +-- datasette/views/table.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 978fb7e7..9debfa1e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -12,7 +12,6 @@ from pathlib import Path import click from markupsafe import Markup from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader -from sanic.exceptions import NotFound from .views.base import DatasetteError, ureg, AsgiRouter from .views.database import DatabaseDownload, DatabaseView @@ -35,6 +34,7 @@ from .utils import ( ) from .utils.asgi import ( AsgiLifespan, + NotFound, asgi_static, asgi_send, asgi_send_html, diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 1cbdd583..56a7a79a 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -7,6 +7,10 @@ import re import aiofiles +class NotFound(Exception): + pass + + class AsgiRouter: def __init__(self, routes=None): routes = routes or [] diff --git a/datasette/views/base.py b/datasette/views/base.py index 8d514688..69a17315 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -8,7 +8,6 @@ import urllib import jinja2 import pint from sanic import response -from sanic.exceptions import NotFound from html import escape @@ -27,7 +26,7 @@ from datasette.utils import ( sqlite3, to_css_class, ) -from datasette.utils.asgi import AsgiStream, AsgiWriter, AsgiRouter, AsgiView +from datasette.utils.asgi import AsgiStream, AsgiWriter, AsgiRouter, AsgiView, NotFound ureg = pint.UnitRegistry() diff --git a/datasette/views/table.py b/datasette/views/table.py index b5765ca4..06be5671 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -3,7 +3,6 @@ import itertools import json import jinja2 -from sanic.exceptions import NotFound from datasette.plugins import pm from datasette.utils import ( @@ -24,6 +23,7 @@ from datasette.utils import ( urlsafe_components, value_as_boolean, ) +from datasette.utils.asgi import NotFound from datasette.filters import Filters from .base import DataView, DatasetteError, ureg From 176dd4f12a85bbfc642aeb257c910413d123756f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 16:02:33 -0700 Subject: [PATCH 29/33] DatasetteRouter is no longer a nested class --- datasette/app.py | 103 ++++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 9debfa1e..4a8ead1d 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -645,59 +645,62 @@ class Datasette: ) self.register_custom_units() - outer_self = self - - class DatasetteRouter(AsgiRouter): - async def handle_404(self, scope, receive, send): - # If URL has a trailing slash, redirect to URL without it - path = scope.get("raw_path", scope["path"].encode("utf8")) - if path.endswith(b"/"): - path = path.rstrip(b"/") - if scope["query_string"]: - path += b"?" + scope["query_string"] - await asgi_send_redirect(send, path.decode("latin1")) - else: - await super().handle_404(scope, receive, send) - - async def handle_500(self, scope, receive, send, exception): - title = None - if isinstance(exception, NotFound): - status = 404 - info = {} - message = exception.args[0] - elif isinstance(exception, DatasetteError): - status = exception.status - info = exception.error_dict - message = exception.message - if exception.messagge_is_html: - message = Markup(message) - title = exception.title - else: - status = 500 - info = {} - message = str(exception) - traceback.print_exc() - templates = ["500.html"] - if status != 500: - templates = ["{}.html".format(status)] + templates - info.update( - {"ok": False, "error": message, "status": status, "title": title} - ) - headers = {} - if outer_self.cors: - headers["Access-Control-Allow-Origin"] = "*" - if scope["path"].split("?")[0].endswith(".json"): - await asgi_send_json(send, info, status=status, headers=headers) - else: - template = outer_self.jinja_env.select_template(templates) - await asgi_send_html( - send, template.render(info), status=status, headers=headers - ) - async def setup_db(): # First time server starts up, calculate table counts for immutable databases for dbname, database in self.databases.items(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) - return AsgiLifespan(AsgiTracer(DatasetteRouter(routes)), on_startup=setup_db) + return AsgiLifespan( + AsgiTracer(DatasetteRouter(self, routes)), on_startup=setup_db + ) + + +class DatasetteRouter(AsgiRouter): + def __init__(self, datasette, routes): + self.ds = datasette + super().__init__(routes) + + async def handle_404(self, scope, receive, send): + # If URL has a trailing slash, redirect to URL without it + path = scope.get("raw_path", scope["path"].encode("utf8")) + if path.endswith(b"/"): + path = path.rstrip(b"/") + if scope["query_string"]: + path += b"?" + scope["query_string"] + await asgi_send_redirect(send, path.decode("latin1")) + else: + await super().handle_404(scope, receive, send) + + async def handle_500(self, scope, receive, send, exception): + title = None + if isinstance(exception, NotFound): + status = 404 + info = {} + message = exception.args[0] + elif isinstance(exception, DatasetteError): + status = exception.status + info = exception.error_dict + message = exception.message + if exception.messagge_is_html: + message = Markup(message) + title = exception.title + else: + status = 500 + info = {} + message = str(exception) + traceback.print_exc() + templates = ["500.html"] + if status != 500: + templates = ["{}.html".format(status)] + templates + info.update({"ok": False, "error": message, "status": status, "title": title}) + headers = {} + if self.ds.cors: + headers["Access-Control-Allow-Origin"] = "*" + if scope["path"].split("?")[0].endswith(".json"): + await asgi_send_json(send, info, status=status, headers=headers) + else: + template = self.ds.jinja_env.select_template(templates) + await asgi_send_html( + send, template.render(info), status=status, headers=headers + ) From d0fc11769390660ee30aa3aafa5376703452af38 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 16:32:06 -0700 Subject: [PATCH 30/33] Removed rogue debug print --- datasette/utils/asgi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 56a7a79a..6e005ab3 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -57,7 +57,6 @@ class AsgiRouter: class AsgiLifespan: def __init__(self, app, on_startup=None, on_shutdown=None): - print("Wrapping {}".format(app)) self.app = app on_startup = on_startup or [] on_shutdown = on_shutdown or [] From 5e122394027fcc140c10e3c0d4d0bc80f8445a9a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 16:42:49 -0700 Subject: [PATCH 31/33] Replaced sanic.request.Request --- datasette/utils/__init__.py | 2 +- datasette/utils/asgi.py | 108 +++++++++++++++++++++++++----------- pytest.ini | 2 - tests/test_utils.py | 12 ++-- 4 files changed, 84 insertions(+), 40 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index bdebfc30..94ccc23e 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -748,7 +748,7 @@ class RequestParameters(dict): "Return first value in the list, if available" try: return super().get(name)[0] - except KeyError: + except (KeyError, TypeError): return default def getlist(self, name, default=None): diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 6e005ab3..08cb44aa 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -1,6 +1,7 @@ import json +from datasette.utils import RequestParameters from mimetypes import guess_type -from sanic.request import Request as SanicRequest +from urllib.parse import parse_qs, urlunparse from pathlib import Path from html import escape import re @@ -11,6 +12,73 @@ class NotFound(Exception): pass +class Request: + def __init__(self, scope): + self.scope = scope + + @property + def method(self): + return self.scope["method"] + + @property + def url(self): + return urlunparse( + (self.scheme, self.host, self.path, None, self.query_string, None) + ) + + @property + def scheme(self): + return self.scope.get("scheme") or "http" + + @property + def headers(self): + return dict( + [ + (k.decode("latin-1").lower(), v.decode("latin-1")) + for k, v in self.scope.get("headers") or [] + ] + ) + + @property + def host(self): + return self.headers.get("host") or "localhost" + + @property + def path(self): + return ( + self.scope.get("raw_path", self.scope["path"].encode("latin-1")) + ).decode("latin-1") + + @property + def query_string(self): + return (self.scope.get("query_string") or b"").decode("latin-1") + + @property + def args(self): + return RequestParameters(parse_qs(qs=self.query_string)) + + @property + def raw_args(self): + return {key: value[0] for key, value in self.args.items()} + + @classmethod + def from_path_with_query_string( + cls, path_with_query_string, method="GET", scheme="http" + ): + "Useful for constructing Request objects for tests" + path, _, query_string = path_with_query_string.partition("?") + scope = { + "http_version": "1.1", + "method": method, + "path": path, + "raw_path": path.encode("latin-1"), + "query_string": query_string.encode("latin-1"), + "scheme": scheme, + "type": "http", + } + return cls(scope) + + class AsgiRouter: def __init__(self, routes=None): routes = routes or [] @@ -52,7 +120,7 @@ class AsgiRouter: } ) html = "

500

".format(escape(repr(exception))) - await send({"type": "http.response.body", "body": html.encode("utf8")}) + await send({"type": "http.response.body", "body": html.encode("latin-1")}) class AsgiLifespan: @@ -92,34 +160,12 @@ class AsgiView: @classmethod def as_asgi(cls, *class_args, **class_kwargs): async def view(scope, receive, send): - # Uses scope to create a Sanic-compatible request object, - # then dispatches that to self.get(...) or self.options(...) - # along with keyword arguments that were already tucked - # into scope["url_route"]["kwargs"] by the router + # Uses scope to create a request object, then dispatches that to + # self.get(...) or self.options(...) along with keyword arguments + # that were already tucked into scope["url_route"]["kwargs"] by + # the router, similar to how Django Channels works: # https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter - path = scope.get("raw_path", scope["path"].encode("utf8")) - if scope["query_string"]: - path = path + b"?" + scope["query_string"] - request = SanicRequest( - path, - { - "Host": dict(scope.get("headers") or []) - .get(b"host", b"") - .decode("utf8") - }, - "1.1", - scope["method"], - None, - ) - - # TODO: Remove need for this - class Woo: - def get_extra_info(self, key): - return False - - request.app = Woo() - request.app.websocket_enabled = False - request.transport = Woo() + request = Request(scope) self = view.view_class(*class_args, **class_kwargs) response = await self.dispatch_request( request, **scope["url_route"]["kwargs"] @@ -185,7 +231,7 @@ class AsgiWriter: await self.send( { "type": "http.response.body", - "body": chunk.encode("utf8"), + "body": chunk.encode("latin-1"), "more_body": True, } ) @@ -221,7 +267,7 @@ async def asgi_send_redirect(send, location, status=302): async def asgi_send(send, content, status, headers=None, content_type="text/plain"): await asgi_start(send, status, headers, content_type) - await send({"type": "http.response.body", "body": content.encode("utf8")}) + await send({"type": "http.response.body", "body": content.encode("latin-1")}) async def asgi_start(send, status, headers=None, content_type="text/plain"): diff --git a/pytest.ini b/pytest.ini index f2c8a6d2..aa292efc 100644 --- a/pytest.ini +++ b/pytest.ini @@ -4,7 +4,5 @@ filterwarnings= ignore:Using or importing the ABCs::jinja2 # https://bugs.launchpad.net/beautifulsoup/+bug/1778909 ignore:Using or importing the ABCs::bs4.element - # Sanic verify_ssl=True - ignore:verify_ssl is deprecated::sanic # Python 3.7 PendingDeprecationWarning: Task.current_task() ignore:.*current_task.*:PendingDeprecationWarning diff --git a/tests/test_utils.py b/tests/test_utils.py index a5f603e6..cbf9eba7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,11 +3,11 @@ Tests for various datasette helper functions. """ from datasette import utils +from datasette.utils.asgi import Request from datasette.filters import Filters import json import os import pytest -from sanic.request import Request import sqlite3 import tempfile from unittest.mock import patch @@ -53,7 +53,7 @@ def test_urlsafe_components(path, expected): ], ) def test_path_with_added_args(path, added_args, expected): - request = Request(path.encode("utf8"), {}, "1.1", "GET", None) + request = Request.from_path_with_query_string(path) actual = utils.path_with_added_args(request, added_args) assert expected == actual @@ -67,11 +67,11 @@ def test_path_with_added_args(path, added_args, expected): ], ) def test_path_with_removed_args(path, args, expected): - request = Request(path.encode("utf8"), {}, "1.1", "GET", None) + request = Request.from_path_with_query_string(path) actual = utils.path_with_removed_args(request, args) assert expected == actual # Run the test again but this time use the path= argument - request = Request("/".encode("utf8"), {}, "1.1", "GET", None) + request = Request.from_path_with_query_string("/") actual = utils.path_with_removed_args(request, args, path=path) assert expected == actual @@ -84,7 +84,7 @@ def test_path_with_removed_args(path, args, expected): ], ) def test_path_with_replaced_args(path, args, expected): - request = Request(path.encode("utf8"), {}, "1.1", "GET", None) + request = Request.from_path_with_query_string(path) actual = utils.path_with_replaced_args(request, args) assert expected == actual @@ -363,7 +363,7 @@ def test_table_columns(): ], ) def test_path_with_format(path, format, extra_qs, expected): - request = Request(path.encode("utf8"), {}, "1.1", "GET", None) + request = Request.from_path_with_query_string(path) actual = utils.path_with_format(request, format, extra_qs) assert expected == actual From eba15fb5a8c8b155d1a33c1f3357fdfb29eb7041 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 16:49:57 -0700 Subject: [PATCH 32/33] Renamed Request.from_path_with_query_string() to Request.fake() --- datasette/utils/asgi.py | 4 +--- tests/test_utils.py | 10 +++++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 08cb44aa..63034872 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -62,9 +62,7 @@ class Request: return {key: value[0] for key, value in self.args.items()} @classmethod - def from_path_with_query_string( - cls, path_with_query_string, method="GET", scheme="http" - ): + def fake(cls, path_with_query_string, method="GET", scheme="http"): "Useful for constructing Request objects for tests" path, _, query_string = path_with_query_string.partition("?") scope = { diff --git a/tests/test_utils.py b/tests/test_utils.py index cbf9eba7..e9e722b8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -53,7 +53,7 @@ def test_urlsafe_components(path, expected): ], ) def test_path_with_added_args(path, added_args, expected): - request = Request.from_path_with_query_string(path) + request = Request.fake(path) actual = utils.path_with_added_args(request, added_args) assert expected == actual @@ -67,11 +67,11 @@ def test_path_with_added_args(path, added_args, expected): ], ) def test_path_with_removed_args(path, args, expected): - request = Request.from_path_with_query_string(path) + request = Request.fake(path) actual = utils.path_with_removed_args(request, args) assert expected == actual # Run the test again but this time use the path= argument - request = Request.from_path_with_query_string("/") + request = Request.fake("/") actual = utils.path_with_removed_args(request, args, path=path) assert expected == actual @@ -84,7 +84,7 @@ def test_path_with_removed_args(path, args, expected): ], ) def test_path_with_replaced_args(path, args, expected): - request = Request.from_path_with_query_string(path) + request = Request.fake(path) actual = utils.path_with_replaced_args(request, args) assert expected == actual @@ -363,7 +363,7 @@ def test_table_columns(): ], ) def test_path_with_format(path, format, extra_qs, expected): - request = Request.from_path_with_query_string(path) + request = Request.fake(path) actual = utils.path_with_format(request, format, extra_qs) assert expected == actual From b794554a26ddc81bd772c4422d80d5ee863e92b0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 20:03:33 -0700 Subject: [PATCH 33/33] Replaced sanic.response and finished removing Sanic entirely in favour of ASGI --- datasette/utils/asgi.py | 69 ++++++++++++++++++++++++++++--------- datasette/views/base.py | 36 +++++++++---------- datasette/views/database.py | 2 -- datasette/views/index.py | 5 ++- datasette/views/special.py | 4 +-- setup.py | 1 - 6 files changed, 74 insertions(+), 43 deletions(-) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 63034872..fdf330ae 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -168,23 +168,7 @@ class AsgiView: response = await self.dispatch_request( request, **scope["url_route"]["kwargs"] ) - if hasattr(response, "asgi_send"): - await response.asgi_send(send) - else: - headers = {} - headers.update(response.headers) - headers["content-type"] = response.content_type - await send( - { - "type": "http.response.start", - "status": response.status, - "headers": [ - [key.encode("utf-8"), value.encode("utf-8")] - for key, value in headers.items() - ], - } - ) - await send({"type": "http.response.body", "body": response.body}) + await response.asgi_send(send) view.view_class = cls view.__doc__ = cls.__doc__ @@ -330,6 +314,57 @@ def asgi_static(root_path, chunk_size=4096, headers=None, content_type=None): return inner_static +class Response: + def __init__(self, body=None, status=200, headers=None, content_type="text/plain"): + self.body = body + self.status = status + self.headers = headers or {} + self.content_type = content_type + + async def asgi_send(self, send): + headers = {} + headers.update(self.headers) + headers["content-type"] = self.content_type + await send( + { + "type": "http.response.start", + "status": self.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in headers.items() + ], + } + ) + body = self.body + if not isinstance(body, bytes): + body = body.encode("utf-8") + await send({"type": "http.response.body", "body": body}) + + @classmethod + def html(cls, body, status=200, headers=None): + return cls( + body, + status=status, + headers=headers, + content_type="text/html; charset=utf-8", + ) + + @classmethod + def text(cls, body, status=200, headers=None): + return cls( + body, + status=status, + headers=headers, + content_type="text/plain; charset=utf-8", + ) + + @classmethod + def redirect(cls, path, status=302, headers=None): + headers = headers or {} + headers["Location"] = path + return cls("", status=status, headers=headers) + + class AsgiFileDownload: def __init__( self, filepath, filename=None, content_type="application/octet-stream" diff --git a/datasette/views/base.py b/datasette/views/base.py index 69a17315..7acb7304 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -7,7 +7,6 @@ import urllib import jinja2 import pint -from sanic import response from html import escape @@ -26,7 +25,14 @@ from datasette.utils import ( sqlite3, to_css_class, ) -from datasette.utils.asgi import AsgiStream, AsgiWriter, AsgiRouter, AsgiView, NotFound +from datasette.utils.asgi import ( + AsgiStream, + AsgiWriter, + AsgiRouter, + AsgiView, + NotFound, + Response, +) ureg = pint.UnitRegistry() @@ -112,7 +118,7 @@ class BaseView(AsgiView): datasette=self.ds, ): body_scripts.append(jinja2.Markup(script)) - return response.html( + return Response.html( template.render( { **context, @@ -144,7 +150,7 @@ class DataView(BaseView): self.ds = datasette def options(self, request, *args, **kwargs): - r = response.text("ok") + r = Response.text("ok") if self.ds.cors: r.headers["Access-Control-Allow-Origin"] = "*" return r @@ -154,7 +160,7 @@ class DataView(BaseView): path = "{}?{}".format(path, request.query_string) if remove_args: path = path_with_removed_args(request, remove_args, path=path) - r = response.redirect(path) + r = Response.redirect(path) r.headers["Link"] = "<{}>; rel=preload".format(path) if self.ds.cors: r.headers["Access-Control-Allow-Origin"] = "*" @@ -254,7 +260,7 @@ class DataView(BaseView): response_or_template_contexts = await self.data( request, database, hash, **kwargs ) - if isinstance(response_or_template_contexts, response.HTTPResponse): + if isinstance(response_or_template_contexts, Response): return response_or_template_contexts else: data, _, _ = response_or_template_contexts @@ -371,7 +377,7 @@ class DataView(BaseView): response_or_template_contexts = await self.data( request, database, hash, **kwargs ) - if isinstance(response_or_template_contexts, response.HTTPResponse): + if isinstance(response_or_template_contexts, Response): return response_or_template_contexts else: @@ -422,17 +428,11 @@ class DataView(BaseView): if result is None: raise NotFound("No data") - response_args = { - "content_type": result.get("content_type", "text/plain"), - "status": result.get("status_code", 200), - } - - if type(result.get("body")) == bytes: - response_args["body_bytes"] = result.get("body") - else: - response_args["body"] = result.get("body") - - r = response.HTTPResponse(**response_args) + r = Response( + body=result.get("body"), + status=result.get("status_code", 200), + content_type=result.get("content_type", "text/plain"), + ) else: extras = {} if callable(extra_template_data): diff --git a/datasette/views/database.py b/datasette/views/database.py index 4809fef0..78af19c5 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1,7 +1,5 @@ import os -from sanic import response - from datasette.utils import to_css_class, validate_sql_select from datasette.utils.asgi import AsgiFileDownload diff --git a/datasette/views/index.py b/datasette/views/index.py index c01ae811..2c1c017a 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -1,9 +1,8 @@ import hashlib import json -from sanic import response - from datasette.utils import CustomJSONEncoder +from datasette.utils.asgi import Response from datasette.version import __version__ from .base import BaseView @@ -104,7 +103,7 @@ class IndexView(BaseView): headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" - return response.HTTPResponse( + return Response( json.dumps({db["name"]: db for db in databases}, cls=CustomJSONEncoder), content_type="application/json; charset=utf-8", headers=headers, diff --git a/datasette/views/special.py b/datasette/views/special.py index 1e0c2032..c4976bb2 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -1,5 +1,5 @@ import json -from sanic import response +from datasette.utils.asgi import Response from .base import BaseView @@ -17,7 +17,7 @@ class JsonDataView(BaseView): headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" - return response.HTTPResponse( + return Response( json.dumps(data), content_type="application/json; charset=utf-8", headers=headers, diff --git a/setup.py b/setup.py index 3a8201cb..f66d03da 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,6 @@ setup( install_requires=[ "click>=6.7", "click-default-group==1.2", - "Sanic==0.7.0", "Jinja2==2.10.1", "hupper==1.0", "pint==0.8.1",