mirror of
https://github.com/simonw/datasette.git
synced 2025-12-10 16:51:24 +01:00
.blob output renderer
* _blob_hash= checking plus refactored to use new BadRequest class, refs #1050 * Replace BlobView with new .blob renderer, closes #1050 * .blob downloads on arbitrary queries, closes #1051
This commit is contained in:
parent
d6f9ff7137
commit
78b3eeaad9
12 changed files with 165 additions and 121 deletions
|
|
@ -38,7 +38,7 @@ from .views.special import (
|
|||
PermissionsDebugView,
|
||||
MessagesDebugView,
|
||||
)
|
||||
from .views.table import RowView, TableView, BlobView
|
||||
from .views.table import RowView, TableView
|
||||
from .renderer import json_renderer
|
||||
from .database import Database, QueryInterrupted
|
||||
|
||||
|
|
@ -60,6 +60,7 @@ from .utils import (
|
|||
)
|
||||
from .utils.asgi import (
|
||||
AsgiLifespan,
|
||||
Base400,
|
||||
Forbidden,
|
||||
NotFound,
|
||||
Request,
|
||||
|
|
@ -923,10 +924,6 @@ class Datasette:
|
|||
+ renderer_regex
|
||||
+ r")?$",
|
||||
)
|
||||
add_route(
|
||||
BlobView.as_view(self),
|
||||
r"/(?P<db_name>[^/]+)/(?P<table>[^/]+?)/\-/blob/(?P<pk_path>[^/]+?)/(?P<column>[^/]+)\.blob$",
|
||||
)
|
||||
self._register_custom_units()
|
||||
|
||||
async def setup_db():
|
||||
|
|
@ -1113,11 +1110,7 @@ class DatasetteRouter:
|
|||
pdb.post_mortem(exception.__traceback__)
|
||||
|
||||
title = None
|
||||
if isinstance(exception, NotFound):
|
||||
status = 404
|
||||
info = {}
|
||||
message = exception.args[0]
|
||||
elif isinstance(exception, Forbidden):
|
||||
if isinstance(exception, Forbidden):
|
||||
status = 403
|
||||
info = {}
|
||||
message = exception.args[0]
|
||||
|
|
@ -1129,6 +1122,10 @@ class DatasetteRouter:
|
|||
if custom_response is not None:
|
||||
await custom_response.asgi_send(send)
|
||||
return
|
||||
elif isinstance(exception, Base400):
|
||||
status = exception.status
|
||||
info = {}
|
||||
message = exception.args[0]
|
||||
elif isinstance(exception, DatasetteError):
|
||||
status = exception.status
|
||||
info = exception.error_dict
|
||||
|
|
@ -1308,6 +1305,6 @@ class Urls:
|
|||
return "{}/{}".format(self.table(database, table), row_path)
|
||||
|
||||
def row_blob(self, database, table, row_path, column):
|
||||
return self.table(database, table) + "/-/blob/{}/{}.blob".format(
|
||||
return self.table(database, table) + "/{}.blob?_blob_column={}".format(
|
||||
row_path, urllib.parse.quote_plus(column)
|
||||
)
|
||||
|
|
|
|||
61
datasette/blob_renderer.py
Normal file
61
datasette/blob_renderer.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
from datasette import hookimpl
|
||||
from datasette.utils.asgi import Response, BadRequest
|
||||
from datasette.utils import to_css_class
|
||||
import hashlib
|
||||
|
||||
_BLOB_COLUMN = "_blob_column"
|
||||
_BLOB_HASH = "_blob_hash"
|
||||
|
||||
|
||||
async def render_blob(datasette, database, rows, columns, request, table, view_name):
|
||||
if _BLOB_COLUMN not in request.args:
|
||||
raise BadRequest("?{}= is required".format(_BLOB_COLUMN))
|
||||
blob_column = request.args[_BLOB_COLUMN]
|
||||
if blob_column not in columns:
|
||||
raise BadRequest("{} is not a valid column".format(blob_column))
|
||||
|
||||
# If ?_blob_hash= provided, use that to select the row - otherwise use first row
|
||||
blob_hash = None
|
||||
if _BLOB_HASH in request.args:
|
||||
blob_hash = request.args[_BLOB_HASH]
|
||||
for row in rows:
|
||||
value = row[blob_column]
|
||||
if hashlib.sha256(value).hexdigest() == blob_hash:
|
||||
break
|
||||
else:
|
||||
# Loop did not break
|
||||
raise BadRequest(
|
||||
"Link has expired - the requested binary content has changed or could not be found."
|
||||
)
|
||||
else:
|
||||
row = rows[0]
|
||||
|
||||
value = row[blob_column]
|
||||
filename_bits = []
|
||||
if table:
|
||||
filename_bits.append(to_css_class(table))
|
||||
if "pk_path" in request.url_vars:
|
||||
filename_bits.append(request.url_vars["pk_path"])
|
||||
filename_bits.append(to_css_class(blob_column))
|
||||
if blob_hash:
|
||||
filename_bits.append(blob_hash[:6])
|
||||
filename = "-".join(filename_bits) + ".blob"
|
||||
headers = {
|
||||
"X-Content-Type-Options": "nosniff",
|
||||
"Content-Disposition": 'attachment; filename="{}"'.format(filename),
|
||||
}
|
||||
return Response(
|
||||
body=value or b"",
|
||||
status=200,
|
||||
headers=headers,
|
||||
content_type="application/binary",
|
||||
)
|
||||
|
||||
|
||||
@hookimpl
|
||||
def register_output_renderer():
|
||||
return {
|
||||
"extension": "blob",
|
||||
"render": render_blob,
|
||||
"can_render": lambda: False,
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ DEFAULT_PLUGINS = (
|
|||
"datasette.actor_auth_cookie",
|
||||
"datasette.default_permissions",
|
||||
"datasette.default_magic_parameters",
|
||||
"datasette.blob_renderer",
|
||||
)
|
||||
|
||||
pm = pluggy.PluginManager("datasette")
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@
|
|||
{% for row in display_rows %}
|
||||
<tr>
|
||||
{% for column, td in zip(columns, row) %}
|
||||
<td class="col-{{ column|to_css_class }}">{% if td == None %}{{ " "|safe }}{% else %}{{ td }}{% endif %}</td>
|
||||
<td class="col-{{ column|to_css_class }}">{{ td }}</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import json
|
||||
from os import EX_CANTCREAT
|
||||
from datasette.utils import MultiParams
|
||||
from mimetypes import guess_type
|
||||
from urllib.parse import parse_qs, urlunparse, parse_qsl
|
||||
|
|
@ -15,12 +16,20 @@ Morsel._reserved["samesite"] = "SameSite"
|
|||
# https://github.com/encode/starlette/blob/519f575/starlette/responses.py#L17
|
||||
|
||||
|
||||
class NotFound(Exception):
|
||||
pass
|
||||
class Base400(Exception):
|
||||
status = 400
|
||||
|
||||
|
||||
class Forbidden(Exception):
|
||||
pass
|
||||
class NotFound(Base400):
|
||||
status = 404
|
||||
|
||||
|
||||
class Forbidden(Base400):
|
||||
status = 403
|
||||
|
||||
|
||||
class BadRequest(Base400):
|
||||
status = 400
|
||||
|
||||
|
||||
SAMESITE_VALUES = ("strict", "lax", "none")
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ from datasette.utils.asgi import (
|
|||
Forbidden,
|
||||
NotFound,
|
||||
Response,
|
||||
BadRequest,
|
||||
)
|
||||
|
||||
ureg = pint.UnitRegistry()
|
||||
|
|
@ -260,9 +261,9 @@ class DataView(BaseView):
|
|||
if stream:
|
||||
# Some quick sanity checks
|
||||
if not self.ds.config("allow_csv_stream"):
|
||||
raise DatasetteError("CSV streaming is disabled", status=400)
|
||||
raise BadRequest("CSV streaming is disabled")
|
||||
if request.args.get("_next"):
|
||||
raise DatasetteError("_next not allowed for CSV streaming", status=400)
|
||||
raise BadRequest("_next not allowed for CSV streaming")
|
||||
kwargs["_size"] = "max"
|
||||
# Fetch the first page
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import hashlib
|
||||
import itertools
|
||||
import jinja2
|
||||
import json
|
||||
|
|
@ -10,6 +11,7 @@ from datasette.utils import (
|
|||
validate_sql_select,
|
||||
is_url,
|
||||
path_with_added_args,
|
||||
path_with_format,
|
||||
path_with_removed_args,
|
||||
InvalidSql,
|
||||
)
|
||||
|
|
@ -342,6 +344,24 @@ class QueryView(DataView):
|
|||
url=jinja2.escape(value.strip())
|
||||
)
|
||||
)
|
||||
elif isinstance(display_value, bytes):
|
||||
blob_url = path_with_format(
|
||||
request,
|
||||
"blob",
|
||||
extra_qs={
|
||||
"_blob_column": column,
|
||||
"_blob_hash": hashlib.sha256(
|
||||
display_value
|
||||
).hexdigest(),
|
||||
},
|
||||
)
|
||||
display_value = jinja2.Markup(
|
||||
'<a class="blob-download" href="{}"><Binary: {} byte{}></a>'.format(
|
||||
blob_url,
|
||||
len(display_value),
|
||||
"" if len(value) == 1 else "s",
|
||||
)
|
||||
)
|
||||
display_row.append(display_value)
|
||||
display_rows.append(display_row)
|
||||
|
||||
|
|
|
|||
|
|
@ -23,9 +23,9 @@ from datasette.utils import (
|
|||
urlsafe_components,
|
||||
value_as_boolean,
|
||||
)
|
||||
from datasette.utils.asgi import NotFound, Response
|
||||
from datasette.utils.asgi import BadRequest, NotFound
|
||||
from datasette.filters import Filters
|
||||
from .base import BaseView, DataView, DatasetteError, ureg
|
||||
from .base import DataView, DatasetteError, ureg
|
||||
from .database import QueryView
|
||||
|
||||
LINK_WITH_LABEL = (
|
||||
|
|
@ -469,7 +469,7 @@ class TableView(RowTableShared):
|
|||
for i, (key, search_text) in enumerate(search_args.items()):
|
||||
search_col = key.split("_search_", 1)[1]
|
||||
if search_col not in await db.table_columns(fts_table):
|
||||
raise DatasetteError("Cannot search by that column", status=400)
|
||||
raise BadRequest("Cannot search by that column")
|
||||
|
||||
where_clauses.append(
|
||||
"rowid in (select rowid from {fts_table} where {search_col} match {match_clause})".format(
|
||||
|
|
@ -614,11 +614,11 @@ class TableView(RowTableShared):
|
|||
raise ValueError
|
||||
|
||||
except ValueError:
|
||||
raise DatasetteError("_size must be a positive integer", status=400)
|
||||
raise BadRequest("_size must be a positive integer")
|
||||
|
||||
if page_size > self.ds.max_returned_rows:
|
||||
raise DatasetteError(
|
||||
"_size must be <= {}".format(self.ds.max_returned_rows), status=400
|
||||
raise BadRequest(
|
||||
"_size must be <= {}".format(self.ds.max_returned_rows)
|
||||
)
|
||||
|
||||
extra_args["page_size"] = page_size
|
||||
|
|
@ -665,7 +665,7 @@ class TableView(RowTableShared):
|
|||
if not self.ds.config("allow_facet") and any(
|
||||
arg.startswith("_facet") for arg in request.args
|
||||
):
|
||||
raise DatasetteError("_facet= is not allowed", status=400)
|
||||
raise BadRequest("_facet= is not allowed")
|
||||
|
||||
# pylint: disable=no-member
|
||||
facet_classes = list(
|
||||
|
|
@ -1041,50 +1041,3 @@ class RowView(RowTableShared):
|
|||
)
|
||||
foreign_key_tables.append({**fk, **{"count": count}})
|
||||
return foreign_key_tables
|
||||
|
||||
|
||||
class BlobView(BaseView):
|
||||
async def get(self, request, db_name, table, pk_path, column):
|
||||
await self.check_permissions(
|
||||
request,
|
||||
[
|
||||
("view-table", (db_name, table)),
|
||||
("view-database", db_name),
|
||||
"view-instance",
|
||||
],
|
||||
)
|
||||
try:
|
||||
db = self.ds.get_database(db_name)
|
||||
except KeyError:
|
||||
raise NotFound("Database {} does not exist".format(db_name))
|
||||
if not await db.table_exists(table):
|
||||
raise NotFound("Table {} does not exist".format(table))
|
||||
# Ensure the column exists and is of type BLOB
|
||||
column_types = {c.name: c.type for c in await db.table_column_details(table)}
|
||||
if column not in column_types:
|
||||
raise NotFound("Table {} does not have column {}".format(table, column))
|
||||
if column_types[column].upper() not in ("BLOB", ""):
|
||||
raise NotFound(
|
||||
"Table {} does not have column {} of type BLOB".format(table, column)
|
||||
)
|
||||
# Ensure the row exists for the pk_path
|
||||
pk_values = urlsafe_components(pk_path)
|
||||
sql, params, _ = await _sql_params_pks(db, table, pk_values)
|
||||
results = await db.execute(sql, params, truncate=True)
|
||||
rows = list(results.rows)
|
||||
if not rows:
|
||||
raise NotFound("Record not found: {}".format(pk_values))
|
||||
|
||||
# Serve back the binary data
|
||||
filename_bits = [to_css_class(table), pk_path, to_css_class(column)]
|
||||
filename = "-".join(filename_bits) + ".blob"
|
||||
headers = {
|
||||
"X-Content-Type-Options": "nosniff",
|
||||
"Content-Disposition": 'attachment; filename="{}"'.format(filename),
|
||||
}
|
||||
return Response(
|
||||
body=rows[0][column] or b"",
|
||||
status=200,
|
||||
headers=headers,
|
||||
content_type="application/binary",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -77,14 +77,3 @@ Note that this URL includes the encoded primary key of the record.
|
|||
Here's that same page as JSON:
|
||||
|
||||
`../people/uk.org.publicwhip%2Fperson%2F10001.json <https://register-of-members-interests.datasettes.com/regmem/people/uk.org.publicwhip%2Fperson%2F10001.json>`_
|
||||
|
||||
.. _BlobView:
|
||||
|
||||
Blob
|
||||
====
|
||||
|
||||
SQLite databases can contain binary data, stored in a ``BLOB`` column. Datasette makes the content of these columns available to download directly, at URLs that look like the following::
|
||||
|
||||
/database-name/table-name/-/blob/row-identifier/column-name.blob
|
||||
|
||||
Binary content is also made available as a base64 encoded string in the ``.json`` representation of the row.
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import textwrap
|
||||
import pytest
|
||||
from .fixtures import ( # noqa
|
||||
app_client,
|
||||
app_client_csv_max_mb_one,
|
||||
|
|
@ -78,6 +80,22 @@ def test_table_csv_with_nullable_labels(app_client):
|
|||
assert EXPECTED_TABLE_WITH_NULLABLE_LABELS_CSV == response.text
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_table_csv_blob_columns(app_client):
|
||||
response = app_client.get("/fixtures/binary_data.csv")
|
||||
assert response.status == 200
|
||||
assert "text/plain; charset=utf-8" == response.headers["content-type"]
|
||||
assert EXPECTED_TABLE_CSV == textwrap.dedent(
|
||||
"""
|
||||
rowid,data
|
||||
1,/fixtures/binary_data/-/blob/1/data.blob
|
||||
2,/fixtures/binary_data/-/blob/1/data.blob
|
||||
""".strip().replace(
|
||||
"\n", "\r\n"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_custom_sql_csv(app_client):
|
||||
response = app_client.get(
|
||||
"/fixtures.csv?sql=select+content+from+simple_primary_key+limit+2"
|
||||
|
|
|
|||
|
|
@ -1223,7 +1223,7 @@ def test_extra_where_clauses(app_client):
|
|||
]
|
||||
|
||||
|
||||
def test_binary_data_display(app_client):
|
||||
def test_binary_data_display_in_table(app_client):
|
||||
response = app_client.get("/fixtures/binary_data")
|
||||
assert response.status == 200
|
||||
table = Soup(response.body, "html.parser").find("table")
|
||||
|
|
@ -1231,12 +1231,12 @@ def test_binary_data_display(app_client):
|
|||
[
|
||||
'<td class="col-Link type-pk"><a href="/fixtures/binary_data/1">1</a></td>',
|
||||
'<td class="col-rowid type-int">1</td>',
|
||||
'<td class="col-data type-bytes"><a class="blob-download" href="/fixtures/binary_data/-/blob/1/data.blob"><Binary:\xa07\xa0bytes></a></td>',
|
||||
'<td class="col-data type-bytes"><a class="blob-download" href="/fixtures/binary_data/1.blob?_blob_column=data"><Binary:\xa07\xa0bytes></a></td>',
|
||||
],
|
||||
[
|
||||
'<td class="col-Link type-pk"><a href="/fixtures/binary_data/2">2</a></td>',
|
||||
'<td class="col-rowid type-int">2</td>',
|
||||
'<td class="col-data type-bytes"><a class="blob-download" href="/fixtures/binary_data/-/blob/2/data.blob"><Binary:\xa07\xa0bytes></a></td>',
|
||||
'<td class="col-data type-bytes"><a class="blob-download" href="/fixtures/binary_data/2.blob?_blob_column=data"><Binary:\xa07\xa0bytes></a></td>',
|
||||
],
|
||||
[
|
||||
'<td class="col-Link type-pk"><a href="/fixtures/binary_data/3">3</a></td>',
|
||||
|
|
@ -1249,21 +1249,38 @@ def test_binary_data_display(app_client):
|
|||
]
|
||||
|
||||
|
||||
def test_binary_data_display_in_query(app_client):
|
||||
response = app_client.get("/fixtures?sql=select+*+from+binary_data")
|
||||
assert response.status == 200
|
||||
table = Soup(response.body, "html.parser").find("table")
|
||||
expected_tds = [
|
||||
[
|
||||
'<td class="col-data"><a class="blob-download" href="/fixtures.blob?sql=select+*+from+binary_data&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d"><Binary:\xa07\xa0bytes></a></td>'
|
||||
],
|
||||
[
|
||||
'<td class="col-data"><a class="blob-download" href="/fixtures.blob?sql=select+*+from+binary_data&_blob_column=data&_blob_hash=b835b0483cedb86130b9a2c280880bf5fadc5318ddf8c18d0df5204d40df1724"><Binary:\xa07\xa0bytes></a></td>'
|
||||
],
|
||||
['<td class="col-data">\xa0</td>'],
|
||||
]
|
||||
assert expected_tds == [
|
||||
[str(td) for td in tr.select("td")] for tr in table.select("tbody tr")
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"path,expected_body,expected_filename",
|
||||
"path,expected_filename",
|
||||
[
|
||||
("/fixtures/binary_data/1.blob?_blob_column=data", "binary_data-1-data.blob"),
|
||||
(
|
||||
"/fixtures/binary_data/-/blob/1/data.blob",
|
||||
b"\x15\x1c\x02\xc7\xad\x05\xfe",
|
||||
"binary_data-1-data.blob",
|
||||
"/fixtures.blob?sql=select+*+from+binary_data&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d",
|
||||
"data-f30889.blob",
|
||||
),
|
||||
("/fixtures/binary_data/-/blob/3/data.blob", b"", "binary_data-3-data.blob"),
|
||||
],
|
||||
)
|
||||
def test_blob_download(app_client, path, expected_body, expected_filename):
|
||||
def test_blob_download(app_client, path, expected_filename):
|
||||
response = app_client.get(path)
|
||||
assert response.status == 200
|
||||
assert response.body == expected_body
|
||||
assert response.body == b"\x15\x1c\x02\xc7\xad\x05\xfe"
|
||||
assert response.headers["x-content-type-options"] == "nosniff"
|
||||
assert response.headers[
|
||||
"content-disposition"
|
||||
|
|
@ -1274,28 +1291,17 @@ def test_blob_download(app_client, path, expected_body, expected_filename):
|
|||
@pytest.mark.parametrize(
|
||||
"path,expected_message",
|
||||
[
|
||||
("/baddb/binary_data/-/blob/1/data.blob", "Database baddb does not exist"),
|
||||
("/fixtures/binary_data/1.blob", "?_blob_column= is required"),
|
||||
("/fixtures/binary_data/1.blob?_blob_column=foo", "foo is not a valid column"),
|
||||
(
|
||||
"/fixtures/binary_data_bad/-/blob/1/data.blob",
|
||||
"Table binary_data_bad does not exist",
|
||||
),
|
||||
(
|
||||
"/fixtures/binary_data/-/blob/1/bad.blob",
|
||||
"Table binary_data does not have column bad",
|
||||
),
|
||||
(
|
||||
"/fixtures/facetable/-/blob/1/state.blob",
|
||||
"Table facetable does not have column state of type BLOB",
|
||||
),
|
||||
(
|
||||
"/fixtures/binary_data/-/blob/101/data.blob",
|
||||
"Record not found: ['101']",
|
||||
"/fixtures/binary_data/1.blob?_blob_column=data&_blob_hash=x",
|
||||
"Link has expired - the requested binary content has changed or could not be found.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_blob_download_not_found_messages(app_client, path, expected_message):
|
||||
def test_blob_download_invalid_messages(app_client, path, expected_message):
|
||||
response = app_client.get(path)
|
||||
assert response.status == 404
|
||||
assert response.status == 400
|
||||
assert expected_message in response.text
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -417,17 +417,6 @@ def cascade_app_client():
|
|||
("/fixtures/binary_data/1", ["table"], 200),
|
||||
("/fixtures/binary_data/1", ["table", "database"], 200),
|
||||
("/fixtures/binary_data/1", ["table", "database", "instance"], 200),
|
||||
# ... and for binary blob
|
||||
("/fixtures/binary_data/-/blob/1/data.blob", [], 403),
|
||||
("/fixtures/binary_data/-/blob/1/data.blob", ["database"], 403),
|
||||
("/fixtures/binary_data/-/blob/1/data.blob", ["instance"], 403),
|
||||
("/fixtures/binary_data/-/blob/1/data.blob", ["table"], 200),
|
||||
("/fixtures/binary_data/-/blob/1/data.blob", ["table", "database"], 200),
|
||||
(
|
||||
"/fixtures/binary_data/-/blob/1/data.blob",
|
||||
["table", "database", "instance"],
|
||||
200,
|
||||
),
|
||||
# Can view query even if not allowed database or instance
|
||||
("/fixtures/magic_parameters", [], 403),
|
||||
("/fixtures/magic_parameters", ["database"], 403),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue