.blob output renderer

* _blob_hash= checking plus refactored to use new BadRequest class, refs #1050
* Replace BlobView with new .blob renderer, closes #1050
* .blob downloads on arbitrary queries, closes #1051
This commit is contained in:
Simon Willison 2020-10-29 15:01:38 -07:00 committed by GitHub
commit 78b3eeaad9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 165 additions and 121 deletions

View file

@ -38,7 +38,7 @@ from .views.special import (
PermissionsDebugView,
MessagesDebugView,
)
from .views.table import RowView, TableView, BlobView
from .views.table import RowView, TableView
from .renderer import json_renderer
from .database import Database, QueryInterrupted
@ -60,6 +60,7 @@ from .utils import (
)
from .utils.asgi import (
AsgiLifespan,
Base400,
Forbidden,
NotFound,
Request,
@ -923,10 +924,6 @@ class Datasette:
+ renderer_regex
+ r")?$",
)
add_route(
BlobView.as_view(self),
r"/(?P<db_name>[^/]+)/(?P<table>[^/]+?)/\-/blob/(?P<pk_path>[^/]+?)/(?P<column>[^/]+)\.blob$",
)
self._register_custom_units()
async def setup_db():
@ -1113,11 +1110,7 @@ class DatasetteRouter:
pdb.post_mortem(exception.__traceback__)
title = None
if isinstance(exception, NotFound):
status = 404
info = {}
message = exception.args[0]
elif isinstance(exception, Forbidden):
if isinstance(exception, Forbidden):
status = 403
info = {}
message = exception.args[0]
@ -1129,6 +1122,10 @@ class DatasetteRouter:
if custom_response is not None:
await custom_response.asgi_send(send)
return
elif isinstance(exception, Base400):
status = exception.status
info = {}
message = exception.args[0]
elif isinstance(exception, DatasetteError):
status = exception.status
info = exception.error_dict
@ -1308,6 +1305,6 @@ class Urls:
return "{}/{}".format(self.table(database, table), row_path)
def row_blob(self, database, table, row_path, column):
return self.table(database, table) + "/-/blob/{}/{}.blob".format(
return self.table(database, table) + "/{}.blob?_blob_column={}".format(
row_path, urllib.parse.quote_plus(column)
)

View file

@ -0,0 +1,61 @@
from datasette import hookimpl
from datasette.utils.asgi import Response, BadRequest
from datasette.utils import to_css_class
import hashlib
_BLOB_COLUMN = "_blob_column"
_BLOB_HASH = "_blob_hash"
async def render_blob(datasette, database, rows, columns, request, table, view_name):
if _BLOB_COLUMN not in request.args:
raise BadRequest("?{}= is required".format(_BLOB_COLUMN))
blob_column = request.args[_BLOB_COLUMN]
if blob_column not in columns:
raise BadRequest("{} is not a valid column".format(blob_column))
# If ?_blob_hash= provided, use that to select the row - otherwise use first row
blob_hash = None
if _BLOB_HASH in request.args:
blob_hash = request.args[_BLOB_HASH]
for row in rows:
value = row[blob_column]
if hashlib.sha256(value).hexdigest() == blob_hash:
break
else:
# Loop did not break
raise BadRequest(
"Link has expired - the requested binary content has changed or could not be found."
)
else:
row = rows[0]
value = row[blob_column]
filename_bits = []
if table:
filename_bits.append(to_css_class(table))
if "pk_path" in request.url_vars:
filename_bits.append(request.url_vars["pk_path"])
filename_bits.append(to_css_class(blob_column))
if blob_hash:
filename_bits.append(blob_hash[:6])
filename = "-".join(filename_bits) + ".blob"
headers = {
"X-Content-Type-Options": "nosniff",
"Content-Disposition": 'attachment; filename="{}"'.format(filename),
}
return Response(
body=value or b"",
status=200,
headers=headers,
content_type="application/binary",
)
@hookimpl
def register_output_renderer():
return {
"extension": "blob",
"render": render_blob,
"can_render": lambda: False,
}

View file

@ -12,6 +12,7 @@ DEFAULT_PLUGINS = (
"datasette.actor_auth_cookie",
"datasette.default_permissions",
"datasette.default_magic_parameters",
"datasette.blob_renderer",
)
pm = pluggy.PluginManager("datasette")

View file

@ -70,7 +70,7 @@
{% for row in display_rows %}
<tr>
{% for column, td in zip(columns, row) %}
<td class="col-{{ column|to_css_class }}">{% if td == None %}{{ "&nbsp;"|safe }}{% else %}{{ td }}{% endif %}</td>
<td class="col-{{ column|to_css_class }}">{{ td }}</td>
{% endfor %}
</tr>
{% endfor %}

View file

@ -1,4 +1,5 @@
import json
from os import EX_CANTCREAT
from datasette.utils import MultiParams
from mimetypes import guess_type
from urllib.parse import parse_qs, urlunparse, parse_qsl
@ -15,12 +16,20 @@ Morsel._reserved["samesite"] = "SameSite"
# https://github.com/encode/starlette/blob/519f575/starlette/responses.py#L17
class NotFound(Exception):
pass
class Base400(Exception):
status = 400
class Forbidden(Exception):
pass
class NotFound(Base400):
status = 404
class Forbidden(Base400):
status = 403
class BadRequest(Base400):
status = 400
SAMESITE_VALUES = ("strict", "lax", "none")

View file

@ -26,6 +26,7 @@ from datasette.utils.asgi import (
Forbidden,
NotFound,
Response,
BadRequest,
)
ureg = pint.UnitRegistry()
@ -260,9 +261,9 @@ class DataView(BaseView):
if stream:
# Some quick sanity checks
if not self.ds.config("allow_csv_stream"):
raise DatasetteError("CSV streaming is disabled", status=400)
raise BadRequest("CSV streaming is disabled")
if request.args.get("_next"):
raise DatasetteError("_next not allowed for CSV streaming", status=400)
raise BadRequest("_next not allowed for CSV streaming")
kwargs["_size"] = "max"
# Fetch the first page
try:

View file

@ -1,4 +1,5 @@
import os
import hashlib
import itertools
import jinja2
import json
@ -10,6 +11,7 @@ from datasette.utils import (
validate_sql_select,
is_url,
path_with_added_args,
path_with_format,
path_with_removed_args,
InvalidSql,
)
@ -342,6 +344,24 @@ class QueryView(DataView):
url=jinja2.escape(value.strip())
)
)
elif isinstance(display_value, bytes):
blob_url = path_with_format(
request,
"blob",
extra_qs={
"_blob_column": column,
"_blob_hash": hashlib.sha256(
display_value
).hexdigest(),
},
)
display_value = jinja2.Markup(
'<a class="blob-download" href="{}">&lt;Binary:&nbsp;{}&nbsp;byte{}&gt;</a>'.format(
blob_url,
len(display_value),
"" if len(value) == 1 else "s",
)
)
display_row.append(display_value)
display_rows.append(display_row)

View file

@ -23,9 +23,9 @@ from datasette.utils import (
urlsafe_components,
value_as_boolean,
)
from datasette.utils.asgi import NotFound, Response
from datasette.utils.asgi import BadRequest, NotFound
from datasette.filters import Filters
from .base import BaseView, DataView, DatasetteError, ureg
from .base import DataView, DatasetteError, ureg
from .database import QueryView
LINK_WITH_LABEL = (
@ -469,7 +469,7 @@ class TableView(RowTableShared):
for i, (key, search_text) in enumerate(search_args.items()):
search_col = key.split("_search_", 1)[1]
if search_col not in await db.table_columns(fts_table):
raise DatasetteError("Cannot search by that column", status=400)
raise BadRequest("Cannot search by that column")
where_clauses.append(
"rowid in (select rowid from {fts_table} where {search_col} match {match_clause})".format(
@ -614,11 +614,11 @@ class TableView(RowTableShared):
raise ValueError
except ValueError:
raise DatasetteError("_size must be a positive integer", status=400)
raise BadRequest("_size must be a positive integer")
if page_size > self.ds.max_returned_rows:
raise DatasetteError(
"_size must be <= {}".format(self.ds.max_returned_rows), status=400
raise BadRequest(
"_size must be <= {}".format(self.ds.max_returned_rows)
)
extra_args["page_size"] = page_size
@ -665,7 +665,7 @@ class TableView(RowTableShared):
if not self.ds.config("allow_facet") and any(
arg.startswith("_facet") for arg in request.args
):
raise DatasetteError("_facet= is not allowed", status=400)
raise BadRequest("_facet= is not allowed")
# pylint: disable=no-member
facet_classes = list(
@ -1041,50 +1041,3 @@ class RowView(RowTableShared):
)
foreign_key_tables.append({**fk, **{"count": count}})
return foreign_key_tables
class BlobView(BaseView):
async def get(self, request, db_name, table, pk_path, column):
await self.check_permissions(
request,
[
("view-table", (db_name, table)),
("view-database", db_name),
"view-instance",
],
)
try:
db = self.ds.get_database(db_name)
except KeyError:
raise NotFound("Database {} does not exist".format(db_name))
if not await db.table_exists(table):
raise NotFound("Table {} does not exist".format(table))
# Ensure the column exists and is of type BLOB
column_types = {c.name: c.type for c in await db.table_column_details(table)}
if column not in column_types:
raise NotFound("Table {} does not have column {}".format(table, column))
if column_types[column].upper() not in ("BLOB", ""):
raise NotFound(
"Table {} does not have column {} of type BLOB".format(table, column)
)
# Ensure the row exists for the pk_path
pk_values = urlsafe_components(pk_path)
sql, params, _ = await _sql_params_pks(db, table, pk_values)
results = await db.execute(sql, params, truncate=True)
rows = list(results.rows)
if not rows:
raise NotFound("Record not found: {}".format(pk_values))
# Serve back the binary data
filename_bits = [to_css_class(table), pk_path, to_css_class(column)]
filename = "-".join(filename_bits) + ".blob"
headers = {
"X-Content-Type-Options": "nosniff",
"Content-Disposition": 'attachment; filename="{}"'.format(filename),
}
return Response(
body=rows[0][column] or b"",
status=200,
headers=headers,
content_type="application/binary",
)

View file

@ -77,14 +77,3 @@ Note that this URL includes the encoded primary key of the record.
Here's that same page as JSON:
`../people/uk.org.publicwhip%2Fperson%2F10001.json <https://register-of-members-interests.datasettes.com/regmem/people/uk.org.publicwhip%2Fperson%2F10001.json>`_
.. _BlobView:
Blob
====
SQLite databases can contain binary data, stored in a ``BLOB`` column. Datasette makes the content of these columns available to download directly, at URLs that look like the following::
/database-name/table-name/-/blob/row-identifier/column-name.blob
Binary content is also made available as a base64 encoded string in the ``.json`` representation of the row.

View file

@ -1,3 +1,5 @@
import textwrap
import pytest
from .fixtures import ( # noqa
app_client,
app_client_csv_max_mb_one,
@ -78,6 +80,22 @@ def test_table_csv_with_nullable_labels(app_client):
assert EXPECTED_TABLE_WITH_NULLABLE_LABELS_CSV == response.text
@pytest.mark.xfail
def test_table_csv_blob_columns(app_client):
response = app_client.get("/fixtures/binary_data.csv")
assert response.status == 200
assert "text/plain; charset=utf-8" == response.headers["content-type"]
assert EXPECTED_TABLE_CSV == textwrap.dedent(
"""
rowid,data
1,/fixtures/binary_data/-/blob/1/data.blob
2,/fixtures/binary_data/-/blob/1/data.blob
""".strip().replace(
"\n", "\r\n"
)
)
def test_custom_sql_csv(app_client):
response = app_client.get(
"/fixtures.csv?sql=select+content+from+simple_primary_key+limit+2"

View file

@ -1223,7 +1223,7 @@ def test_extra_where_clauses(app_client):
]
def test_binary_data_display(app_client):
def test_binary_data_display_in_table(app_client):
response = app_client.get("/fixtures/binary_data")
assert response.status == 200
table = Soup(response.body, "html.parser").find("table")
@ -1231,12 +1231,12 @@ def test_binary_data_display(app_client):
[
'<td class="col-Link type-pk"><a href="/fixtures/binary_data/1">1</a></td>',
'<td class="col-rowid type-int">1</td>',
'<td class="col-data type-bytes"><a class="blob-download" href="/fixtures/binary_data/-/blob/1/data.blob">&lt;Binary:\xa07\xa0bytes&gt;</a></td>',
'<td class="col-data type-bytes"><a class="blob-download" href="/fixtures/binary_data/1.blob?_blob_column=data">&lt;Binary:\xa07\xa0bytes&gt;</a></td>',
],
[
'<td class="col-Link type-pk"><a href="/fixtures/binary_data/2">2</a></td>',
'<td class="col-rowid type-int">2</td>',
'<td class="col-data type-bytes"><a class="blob-download" href="/fixtures/binary_data/-/blob/2/data.blob">&lt;Binary:\xa07\xa0bytes&gt;</a></td>',
'<td class="col-data type-bytes"><a class="blob-download" href="/fixtures/binary_data/2.blob?_blob_column=data">&lt;Binary:\xa07\xa0bytes&gt;</a></td>',
],
[
'<td class="col-Link type-pk"><a href="/fixtures/binary_data/3">3</a></td>',
@ -1249,21 +1249,38 @@ def test_binary_data_display(app_client):
]
def test_binary_data_display_in_query(app_client):
response = app_client.get("/fixtures?sql=select+*+from+binary_data")
assert response.status == 200
table = Soup(response.body, "html.parser").find("table")
expected_tds = [
[
'<td class="col-data"><a class="blob-download" href="/fixtures.blob?sql=select+*+from+binary_data&amp;_blob_column=data&amp;_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d">&lt;Binary:\xa07\xa0bytes&gt;</a></td>'
],
[
'<td class="col-data"><a class="blob-download" href="/fixtures.blob?sql=select+*+from+binary_data&amp;_blob_column=data&amp;_blob_hash=b835b0483cedb86130b9a2c280880bf5fadc5318ddf8c18d0df5204d40df1724">&lt;Binary:\xa07\xa0bytes&gt;</a></td>'
],
['<td class="col-data">\xa0</td>'],
]
assert expected_tds == [
[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")
]
@pytest.mark.parametrize(
"path,expected_body,expected_filename",
"path,expected_filename",
[
("/fixtures/binary_data/1.blob?_blob_column=data", "binary_data-1-data.blob"),
(
"/fixtures/binary_data/-/blob/1/data.blob",
b"\x15\x1c\x02\xc7\xad\x05\xfe",
"binary_data-1-data.blob",
"/fixtures.blob?sql=select+*+from+binary_data&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d",
"data-f30889.blob",
),
("/fixtures/binary_data/-/blob/3/data.blob", b"", "binary_data-3-data.blob"),
],
)
def test_blob_download(app_client, path, expected_body, expected_filename):
def test_blob_download(app_client, path, expected_filename):
response = app_client.get(path)
assert response.status == 200
assert response.body == expected_body
assert response.body == b"\x15\x1c\x02\xc7\xad\x05\xfe"
assert response.headers["x-content-type-options"] == "nosniff"
assert response.headers[
"content-disposition"
@ -1274,28 +1291,17 @@ def test_blob_download(app_client, path, expected_body, expected_filename):
@pytest.mark.parametrize(
"path,expected_message",
[
("/baddb/binary_data/-/blob/1/data.blob", "Database baddb does not exist"),
("/fixtures/binary_data/1.blob", "?_blob_column= is required"),
("/fixtures/binary_data/1.blob?_blob_column=foo", "foo is not a valid column"),
(
"/fixtures/binary_data_bad/-/blob/1/data.blob",
"Table binary_data_bad does not exist",
),
(
"/fixtures/binary_data/-/blob/1/bad.blob",
"Table binary_data does not have column bad",
),
(
"/fixtures/facetable/-/blob/1/state.blob",
"Table facetable does not have column state of type BLOB",
),
(
"/fixtures/binary_data/-/blob/101/data.blob",
"Record not found: [&#39;101&#39;]",
"/fixtures/binary_data/1.blob?_blob_column=data&_blob_hash=x",
"Link has expired - the requested binary content has changed or could not be found.",
),
],
)
def test_blob_download_not_found_messages(app_client, path, expected_message):
def test_blob_download_invalid_messages(app_client, path, expected_message):
response = app_client.get(path)
assert response.status == 404
assert response.status == 400
assert expected_message in response.text

View file

@ -417,17 +417,6 @@ def cascade_app_client():
("/fixtures/binary_data/1", ["table"], 200),
("/fixtures/binary_data/1", ["table", "database"], 200),
("/fixtures/binary_data/1", ["table", "database", "instance"], 200),
# ... and for binary blob
("/fixtures/binary_data/-/blob/1/data.blob", [], 403),
("/fixtures/binary_data/-/blob/1/data.blob", ["database"], 403),
("/fixtures/binary_data/-/blob/1/data.blob", ["instance"], 403),
("/fixtures/binary_data/-/blob/1/data.blob", ["table"], 200),
("/fixtures/binary_data/-/blob/1/data.blob", ["table", "database"], 200),
(
"/fixtures/binary_data/-/blob/1/data.blob",
["table", "database", "instance"],
200,
),
# Can view query even if not allowed database or instance
("/fixtures/magic_parameters", [], 403),
("/fixtures/magic_parameters", ["database"], 403),