Database download works again, refactored utils.py #272

Refactored utils.py into a datasette/utils package, refactored some of
the ASGI helper code into datasette/utils/asgi.py
This commit is contained in:
Simon Willison 2019-06-23 07:36:54 -07:00
commit d2daa1b9f7
6 changed files with 993 additions and 965 deletions

View file

@ -1,6 +1,4 @@
import asyncio import asyncio
import aiofiles
from mimetypes import guess_type
import collections import collections
import hashlib import hashlib
import json import json
@ -38,6 +36,7 @@ from .utils import (
sqlite_timelimit, sqlite_timelimit,
to_css_class, to_css_class,
) )
from .utils.asgi import asgi_static, asgi_send_html, asgi_send_json
from .tracer import capture_traces, trace from .tracer import capture_traces, trace
from .plugins import pm, DEFAULT_PLUGINS from .plugins import pm, DEFAULT_PLUGINS
from .version import __version__ from .version import __version__
@ -714,78 +713,3 @@ class Datasette:
# await database.table_counts(limit=60 * 60 * 1000) # await database.table_counts(limit=60 * 60 * 1000)
return app return app
async def asgi_send_json(send, info, status=200, headers=None):
headers = headers or {}
await asgi_send(
send,
json.dumps(info),
status=status,
headers=headers,
content_type="application/json",
)
async def asgi_send_html(send, html, status=200, headers=None):
headers = headers or {}
await asgi_send(
send, html, status=status, headers=headers, content_type="text/html"
)
async def asgi_send(send, content, status, headers, content_type="text/plain"):
await asgi_start(send, status, headers, content_type)
await send({"type": "http.response.body", "body": content.encode("utf8")})
async def asgi_start(send, status, headers, content_type="text/plain"):
# Remove any existing content-type header
headers = dict([(k, v) for k, v in headers.items() if k.lower() != "content-type"])
headers["content-type"] = content_type
await send(
{
"type": "http.response.start",
"status": status,
"headers": [
[key.encode("latin1"), value.encode("latin1")]
for key, value in headers.items()
],
}
)
def asgi_static(root_path, chunk_size=4096):
async def inner_static(scope, receive, send):
path = scope["url_route"]["kwargs"]["path"]
full_path = (Path(root_path) / path).absolute()
# Ensure full_path is within root_path to avoid weird "../" tricks
try:
full_path.relative_to(root_path)
except ValueError:
await asgi_send_html(send, "404", 404)
return
first = True
try:
async with aiofiles.open(full_path, mode="rb") as fp:
if first:
await asgi_start(
send, 200, {}, guess_type(str(full_path))[0] or "text/plain"
)
first = False
more_body = True
while more_body:
chunk = await fp.read(chunk_size)
more_body = len(chunk) == chunk_size
await send(
{
"type": "http.response.body",
"body": chunk,
"more_body": more_body,
}
)
except FileNotFoundError:
await asgi_send_html(send, "404", 404)
return
return inner_static

244
datasette/utils/asgi.py Normal file
View file

@ -0,0 +1,244 @@
import json
from mimetypes import guess_type
from sanic.views import HTTPMethodView
from pathlib import Path
import re
import aiofiles
class AsgiRouter:
def __init__(self, routes=None):
routes = routes or []
self.routes = [
# Compile any strings to regular expressions
((re.compile(pattern) if isinstance(pattern, str) else pattern), view)
for pattern, view in routes
]
async def __call__(self, scope, receive, send):
for regex, view in self.routes:
match = regex.match(scope["path"])
if match is not None:
new_scope = dict(scope, url_route={"kwargs": match.groupdict()})
try:
return await view(new_scope, receive, send)
except Exception as exception:
return await self.handle_500(scope, receive, send, exception)
return await self.handle_404(scope, receive, send)
async def handle_404(self, scope, receive, send):
await send(
{
"type": "http.response.start",
"status": 404,
"headers": [[b"content-type", b"text/html"]],
}
)
await send({"type": "http.response.body", "body": b"<h1>404</h1>"})
async def handle_500(self, scope, receive, send, exception):
await send(
{
"type": "http.response.start",
"status": 404,
"headers": [[b"content-type", b"text/html"]],
}
)
html = "<h1>500</h1><pre{}></pre>".format(escape(repr(exception)))
await send({"type": "http.response.body", "body": html.encode("utf8")})
class AsgiView(HTTPMethodView):
@classmethod
def as_asgi(cls, *class_args, **class_kwargs):
async def view(scope, receive, send):
# Uses scope to create a Sanic-compatible request object,
# then dispatches that to self.get(...) or self.options(...)
# along with keyword arguments that were already tucked
# into scope["url_route"]["kwargs"] by the router
# https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter
path = scope.get("raw_path", scope["path"].encode("utf8"))
if scope["query_string"]:
path = path + b"?" + scope["query_string"]
request = SanicRequest(
path,
{
"Host": dict(scope.get("headers") or [])
.get(b"host", b"")
.decode("utf8")
},
"1.1",
scope["method"],
None,
)
# TODO: Remove need for this
class Woo:
def get_extra_info(self, key):
return False
request.app = Woo()
request.app.websocket_enabled = False
request.transport = Woo()
self = view.view_class(*class_args, **class_kwargs)
response = await self.dispatch_request(
request, **scope["url_route"]["kwargs"]
)
if hasattr(response, "asgi_send"):
await response.asgi_send(send)
else:
await send(
{
"type": "http.response.start",
"status": response.status,
"headers": [
[key.encode("utf-8"), value.encode("utf-8")]
for key, value in response.headers.items()
],
}
)
await send({"type": "http.response.body", "body": response.body})
view.view_class = cls
view.__doc__ = cls.__doc__
view.__module__ = cls.__module__
view.__name__ = cls.__name__
return view
class AsgiStream:
def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"):
self.stream_fn = stream_fn
self.status = status
self.headers = headers or {}
self.content_type = content_type
async def asgi_send(self, send):
# Remove any existing content-type header
headers = dict(
[(k, v) for k, v in self.headers.items() if k.lower() != "content-type"]
)
headers["content-type"] = self.content_type
await send(
{
"type": "http.response.start",
"status": self.status,
"headers": [
[key.encode("utf-8"), value.encode("utf-8")]
for key, value in headers.items()
],
}
)
w = AsgiWriter(send)
await self.stream_fn(w)
await send({"type": "http.response.body", "body": b""})
class AsgiWriter:
def __init__(self, send):
self.send = send
async def write(self, chunk):
await self.send(
{
"type": "http.response.body",
"body": chunk.encode("utf8"),
"more_body": True,
}
)
async def asgi_send_json(send, info, status=200, headers=None):
headers = headers or {}
await asgi_send(
send,
json.dumps(info),
status=status,
headers=headers,
content_type="application/json",
)
async def asgi_send_html(send, html, status=200, headers=None):
headers = headers or {}
await asgi_send(
send, html, status=status, headers=headers, content_type="text/html"
)
async def asgi_send(send, content, status, headers, content_type="text/plain"):
await asgi_start(send, status, headers, content_type)
await send({"type": "http.response.body", "body": content.encode("utf8")})
async def asgi_start(send, status, headers, content_type="text/plain"):
# Remove any existing content-type header
headers = dict([(k, v) for k, v in headers.items() if k.lower() != "content-type"])
headers["content-type"] = content_type
await send(
{
"type": "http.response.start",
"status": status,
"headers": [
[key.encode("latin1"), value.encode("latin1")]
for key, value in headers.items()
],
}
)
async def asgi_send_file(
send, filepath, filename=None, content_type=None, chunk_size=4096
):
headers = {}
if filename:
headers["Content-Disposition"] = 'attachment; filename="{}"'.format(filename)
first = True
async with aiofiles.open(filepath, mode="rb") as fp:
if first:
await asgi_start(
send,
200,
headers,
content_type or guess_type(str(filepath))[0] or "text/plain",
)
first = False
more_body = True
while more_body:
chunk = await fp.read(chunk_size)
more_body = len(chunk) == chunk_size
await send(
{"type": "http.response.body", "body": chunk, "more_body": more_body}
)
def asgi_static(root_path, chunk_size=4096, headers=None, content_type=None):
async def inner_static(scope, receive, send):
path = scope["url_route"]["kwargs"]["path"]
full_path = (Path(root_path) / path).absolute()
# Ensure full_path is within root_path to avoid weird "../" tricks
try:
full_path.relative_to(root_path)
except ValueError:
await asgi_send_html(send, "404", 404)
return
first = True
try:
await asgi_send_file(send, full_path, chunk_size=chunk_size)
except FileNotFoundError:
await asgi_send_html(send, "404", 404)
return
return inner_static
class AsgiFileDownload:
def __init__(
self, filepath, filename=None, content_type="application/octet-stream"
):
self.filepath = filepath
self.filename = filename
self.content_type = content_type
async def asgi_send(self, send):
return await asgi_send_file(send, self.filepath, content_type=self.content_type)

View file

@ -9,7 +9,6 @@ import jinja2
import pint import pint
from sanic import response from sanic import response
from sanic.exceptions import NotFound from sanic.exceptions import NotFound
from sanic.views import HTTPMethodView
from sanic.request import Request as SanicRequest from sanic.request import Request as SanicRequest
from html import escape from html import escape
@ -29,6 +28,7 @@ from datasette.utils import (
sqlite3, sqlite3,
to_css_class, to_css_class,
) )
from datasette.utils.asgi import AsgiStream, AsgiWriter, AsgiRouter, AsgiView
ureg = pint.UnitRegistry() ureg = pint.UnitRegistry()
@ -52,147 +52,6 @@ class DatasetteError(Exception):
self.messagge_is_html = messagge_is_html self.messagge_is_html = messagge_is_html
class AsgiRouter:
def __init__(self, routes=None):
routes = routes or []
self.routes = [
# Compile any strings to regular expressions
((re.compile(pattern) if isinstance(pattern, str) else pattern), view)
for pattern, view in routes
]
async def __call__(self, scope, receive, send):
for regex, view in self.routes:
match = regex.match(scope["path"])
if match is not None:
new_scope = dict(scope, url_route={"kwargs": match.groupdict()})
try:
return await view(new_scope, receive, send)
except Exception as exception:
return await self.handle_500(scope, receive, send, exception)
return await self.handle_404(scope, receive, send)
async def handle_404(self, scope, receive, send):
await send(
{
"type": "http.response.start",
"status": 404,
"headers": [[b"content-type", b"text/html"]],
}
)
await send({"type": "http.response.body", "body": b"<h1>404</h1>"})
async def handle_500(self, scope, receive, send, exception):
await send(
{
"type": "http.response.start",
"status": 404,
"headers": [[b"content-type", b"text/html"]],
}
)
html = "<h1>500</h1><pre{}></pre>".format(escape(repr(exception)))
await send({"type": "http.response.body", "body": html.encode("utf8")})
class AsgiView(HTTPMethodView):
@classmethod
def as_asgi(cls, *class_args, **class_kwargs):
async def view(scope, receive, send):
# Uses scope to create a Sanic-compatible request object,
# then dispatches that to self.get(...) or self.options(...)
# along with keyword arguments that were already tucked
# into scope["url_route"]["kwargs"] by the router
# https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter
path = scope.get("raw_path", scope["path"].encode("utf8"))
if scope["query_string"]:
path = path + b"?" + scope["query_string"]
request = SanicRequest(
path,
{
"Host": dict(scope.get("headers") or [])
.get(b"host", b"")
.decode("utf8")
},
"1.1",
scope["method"],
None,
)
class Woo:
def get_extra_info(self, key):
return False
request.app = Woo()
request.app.websocket_enabled = False
request.transport = Woo()
self = view.view_class(*class_args, **class_kwargs)
response = await self.dispatch_request(
request, **scope["url_route"]["kwargs"]
)
if hasattr(response, "asgi_send"):
await response.asgi_send(send)
else:
await send(
{
"type": "http.response.start",
"status": response.status,
"headers": [
[key.encode("utf-8"), value.encode("utf-8")]
for key, value in response.headers.items()
],
}
)
await send({"type": "http.response.body", "body": response.body})
view.view_class = cls
view.__doc__ = cls.__doc__
view.__module__ = cls.__module__
view.__name__ = cls.__name__
return view
class AsgiStream:
def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"):
self.stream_fn = stream_fn
self.status = status
self.headers = headers or {}
self.content_type = content_type
async def asgi_send(self, send):
# Remove any existing content-type header
headers = dict(
[(k, v) for k, v in self.headers.items() if k.lower() != "content-type"]
)
headers["content-type"] = self.content_type
await send(
{
"type": "http.response.start",
"status": self.status,
"headers": [
[key.encode("utf-8"), value.encode("utf-8")]
for key, value in headers.items()
],
}
)
w = AsgiWriter(send)
await self.stream_fn(w)
await send({"type": "http.response.body", "body": b""})
class AsgiWriter:
def __init__(self, send):
self.send = send
async def write(self, chunk):
await self.send(
{
"type": "http.response.body",
"body": chunk.encode("utf8"),
"more_body": True,
}
)
class BaseView(AsgiView): class BaseView(AsgiView):
ds = None ds = None

View file

@ -3,8 +3,9 @@ import os
from sanic import response from sanic import response
from datasette.utils import to_css_class, validate_sql_select from datasette.utils import to_css_class, validate_sql_select
from datasette.utils.asgi import AsgiFileDownload
from .base import DataView, DatasetteError from .base import DatasetteError, DataView
class DatabaseView(DataView): class DatabaseView(DataView):
@ -79,8 +80,8 @@ class DatabaseDownload(DataView):
if not db.path: if not db.path:
raise DatasetteError("Cannot download database", status=404) raise DatasetteError("Cannot download database", status=404)
filepath = db.path filepath = db.path
return await response.file_stream( return AsgiFileDownload(
filepath, filepath,
filename=os.path.basename(filepath), filename=os.path.basename(filepath),
mime_type="application/octet-stream", content_type="application/octet-stream",
) )