diff --git a/datasette/app.py b/datasette/app.py index 5b50294f..3016043a 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -38,7 +38,7 @@ from .views.special import ( PermissionsDebugView, MessagesDebugView, ) -from .views.table import RowView, TableView, BlobView +from .views.table import RowView, TableView from .renderer import json_renderer from .database import Database, QueryInterrupted @@ -60,6 +60,7 @@ from .utils import ( ) from .utils.asgi import ( AsgiLifespan, + Base400, Forbidden, NotFound, Request, @@ -923,10 +924,6 @@ class Datasette: + renderer_regex + r")?$", ) - add_route( - BlobView.as_view(self), - r"/(?P[^/]+)/(?P[^/]+?)/\-/blob/(?P[^/]+?)/(?P[^/]+)\.blob$", - ) self._register_custom_units() async def setup_db(): @@ -1113,11 +1110,7 @@ class DatasetteRouter: pdb.post_mortem(exception.__traceback__) title = None - if isinstance(exception, NotFound): - status = 404 - info = {} - message = exception.args[0] - elif isinstance(exception, Forbidden): + if isinstance(exception, Forbidden): status = 403 info = {} message = exception.args[0] @@ -1129,6 +1122,10 @@ class DatasetteRouter: if custom_response is not None: await custom_response.asgi_send(send) return + elif isinstance(exception, Base400): + status = exception.status + info = {} + message = exception.args[0] elif isinstance(exception, DatasetteError): status = exception.status info = exception.error_dict @@ -1308,6 +1305,6 @@ class Urls: return "{}/{}".format(self.table(database, table), row_path) def row_blob(self, database, table, row_path, column): - return self.table(database, table) + "/-/blob/{}/{}.blob".format( + return self.table(database, table) + "/{}.blob?_blob_column={}".format( row_path, urllib.parse.quote_plus(column) ) diff --git a/datasette/blob_renderer.py b/datasette/blob_renderer.py new file mode 100644 index 00000000..794b153e --- /dev/null +++ b/datasette/blob_renderer.py @@ -0,0 +1,61 @@ +from datasette import hookimpl +from datasette.utils.asgi import Response, BadRequest +from datasette.utils import to_css_class +import hashlib + +_BLOB_COLUMN = "_blob_column" +_BLOB_HASH = "_blob_hash" + + +async def render_blob(datasette, database, rows, columns, request, table, view_name): + if _BLOB_COLUMN not in request.args: + raise BadRequest("?{}= is required".format(_BLOB_COLUMN)) + blob_column = request.args[_BLOB_COLUMN] + if blob_column not in columns: + raise BadRequest("{} is not a valid column".format(blob_column)) + + # If ?_blob_hash= provided, use that to select the row - otherwise use first row + blob_hash = None + if _BLOB_HASH in request.args: + blob_hash = request.args[_BLOB_HASH] + for row in rows: + value = row[blob_column] + if hashlib.sha256(value).hexdigest() == blob_hash: + break + else: + # Loop did not break + raise BadRequest( + "Link has expired - the requested binary content has changed or could not be found." + ) + else: + row = rows[0] + + value = row[blob_column] + filename_bits = [] + if table: + filename_bits.append(to_css_class(table)) + if "pk_path" in request.url_vars: + filename_bits.append(request.url_vars["pk_path"]) + filename_bits.append(to_css_class(blob_column)) + if blob_hash: + filename_bits.append(blob_hash[:6]) + filename = "-".join(filename_bits) + ".blob" + headers = { + "X-Content-Type-Options": "nosniff", + "Content-Disposition": 'attachment; filename="{}"'.format(filename), + } + return Response( + body=value or b"", + status=200, + headers=headers, + content_type="application/binary", + ) + + +@hookimpl +def register_output_renderer(): + return { + "extension": "blob", + "render": render_blob, + "can_render": lambda: False, + } diff --git a/datasette/plugins.py b/datasette/plugins.py index cb3d2c34..1c2f392f 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -12,6 +12,7 @@ DEFAULT_PLUGINS = ( "datasette.actor_auth_cookie", "datasette.default_permissions", "datasette.default_magic_parameters", + "datasette.blob_renderer", ) pm = pluggy.PluginManager("datasette") diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 78a1c123..9b3fff25 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -70,7 +70,7 @@ {% for row in display_rows %} {% for column, td in zip(columns, row) %} - + {% endfor %} {% endfor %} diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index bd388390..f438f829 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -1,4 +1,5 @@ import json +from os import EX_CANTCREAT from datasette.utils import MultiParams from mimetypes import guess_type from urllib.parse import parse_qs, urlunparse, parse_qsl @@ -15,12 +16,20 @@ Morsel._reserved["samesite"] = "SameSite" # https://github.com/encode/starlette/blob/519f575/starlette/responses.py#L17 -class NotFound(Exception): - pass +class Base400(Exception): + status = 400 -class Forbidden(Exception): - pass +class NotFound(Base400): + status = 404 + + +class Forbidden(Base400): + status = 403 + + +class BadRequest(Base400): + status = 400 SAMESITE_VALUES = ("strict", "lax", "none") diff --git a/datasette/views/base.py b/datasette/views/base.py index f9bbe45d..4432ddca 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -26,6 +26,7 @@ from datasette.utils.asgi import ( Forbidden, NotFound, Response, + BadRequest, ) ureg = pint.UnitRegistry() @@ -260,9 +261,9 @@ class DataView(BaseView): if stream: # Some quick sanity checks if not self.ds.config("allow_csv_stream"): - raise DatasetteError("CSV streaming is disabled", status=400) + raise BadRequest("CSV streaming is disabled") if request.args.get("_next"): - raise DatasetteError("_next not allowed for CSV streaming", status=400) + raise BadRequest("_next not allowed for CSV streaming") kwargs["_size"] = "max" # Fetch the first page try: diff --git a/datasette/views/database.py b/datasette/views/database.py index 00fbc0b0..8b9e8833 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1,4 +1,5 @@ import os +import hashlib import itertools import jinja2 import json @@ -10,6 +11,7 @@ from datasette.utils import ( validate_sql_select, is_url, path_with_added_args, + path_with_format, path_with_removed_args, InvalidSql, ) @@ -342,6 +344,24 @@ class QueryView(DataView): url=jinja2.escape(value.strip()) ) ) + elif isinstance(display_value, bytes): + blob_url = path_with_format( + request, + "blob", + extra_qs={ + "_blob_column": column, + "_blob_hash": hashlib.sha256( + display_value + ).hexdigest(), + }, + ) + display_value = jinja2.Markup( + '<Binary: {} byte{}>'.format( + blob_url, + len(display_value), + "" if len(value) == 1 else "s", + ) + ) display_row.append(display_value) display_rows.append(display_row) diff --git a/datasette/views/table.py b/datasette/views/table.py index d190b6af..079e0b0a 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -23,9 +23,9 @@ from datasette.utils import ( urlsafe_components, value_as_boolean, ) -from datasette.utils.asgi import NotFound, Response +from datasette.utils.asgi import BadRequest, NotFound from datasette.filters import Filters -from .base import BaseView, DataView, DatasetteError, ureg +from .base import DataView, DatasetteError, ureg from .database import QueryView LINK_WITH_LABEL = ( @@ -469,7 +469,7 @@ class TableView(RowTableShared): for i, (key, search_text) in enumerate(search_args.items()): search_col = key.split("_search_", 1)[1] if search_col not in await db.table_columns(fts_table): - raise DatasetteError("Cannot search by that column", status=400) + raise BadRequest("Cannot search by that column") where_clauses.append( "rowid in (select rowid from {fts_table} where {search_col} match {match_clause})".format( @@ -614,11 +614,11 @@ class TableView(RowTableShared): raise ValueError except ValueError: - raise DatasetteError("_size must be a positive integer", status=400) + raise BadRequest("_size must be a positive integer") if page_size > self.ds.max_returned_rows: - raise DatasetteError( - "_size must be <= {}".format(self.ds.max_returned_rows), status=400 + raise BadRequest( + "_size must be <= {}".format(self.ds.max_returned_rows) ) extra_args["page_size"] = page_size @@ -665,7 +665,7 @@ class TableView(RowTableShared): if not self.ds.config("allow_facet") and any( arg.startswith("_facet") for arg in request.args ): - raise DatasetteError("_facet= is not allowed", status=400) + raise BadRequest("_facet= is not allowed") # pylint: disable=no-member facet_classes = list( @@ -1041,50 +1041,3 @@ class RowView(RowTableShared): ) foreign_key_tables.append({**fk, **{"count": count}}) return foreign_key_tables - - -class BlobView(BaseView): - async def get(self, request, db_name, table, pk_path, column): - await self.check_permissions( - request, - [ - ("view-table", (db_name, table)), - ("view-database", db_name), - "view-instance", - ], - ) - try: - db = self.ds.get_database(db_name) - except KeyError: - raise NotFound("Database {} does not exist".format(db_name)) - if not await db.table_exists(table): - raise NotFound("Table {} does not exist".format(table)) - # Ensure the column exists and is of type BLOB - column_types = {c.name: c.type for c in await db.table_column_details(table)} - if column not in column_types: - raise NotFound("Table {} does not have column {}".format(table, column)) - if column_types[column].upper() not in ("BLOB", ""): - raise NotFound( - "Table {} does not have column {} of type BLOB".format(table, column) - ) - # Ensure the row exists for the pk_path - pk_values = urlsafe_components(pk_path) - sql, params, _ = await _sql_params_pks(db, table, pk_values) - results = await db.execute(sql, params, truncate=True) - rows = list(results.rows) - if not rows: - raise NotFound("Record not found: {}".format(pk_values)) - - # Serve back the binary data - filename_bits = [to_css_class(table), pk_path, to_css_class(column)] - filename = "-".join(filename_bits) + ".blob" - headers = { - "X-Content-Type-Options": "nosniff", - "Content-Disposition": 'attachment; filename="{}"'.format(filename), - } - return Response( - body=rows[0][column] or b"", - status=200, - headers=headers, - content_type="application/binary", - ) diff --git a/docs/pages.rst b/docs/pages.rst index 3ad58565..db970ead 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -77,14 +77,3 @@ Note that this URL includes the encoded primary key of the record. Here's that same page as JSON: `../people/uk.org.publicwhip%2Fperson%2F10001.json `_ - -.. _BlobView: - -Blob -==== - -SQLite databases can contain binary data, stored in a ``BLOB`` column. Datasette makes the content of these columns available to download directly, at URLs that look like the following:: - - /database-name/table-name/-/blob/row-identifier/column-name.blob - -Binary content is also made available as a base64 encoded string in the ``.json`` representation of the row. diff --git a/tests/test_csv.py b/tests/test_csv.py index 86e402b5..863659f7 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -1,3 +1,5 @@ +import textwrap +import pytest from .fixtures import ( # noqa app_client, app_client_csv_max_mb_one, @@ -78,6 +80,22 @@ def test_table_csv_with_nullable_labels(app_client): assert EXPECTED_TABLE_WITH_NULLABLE_LABELS_CSV == response.text +@pytest.mark.xfail +def test_table_csv_blob_columns(app_client): + response = app_client.get("/fixtures/binary_data.csv") + assert response.status == 200 + assert "text/plain; charset=utf-8" == response.headers["content-type"] + assert EXPECTED_TABLE_CSV == textwrap.dedent( + """ + rowid,data + 1,/fixtures/binary_data/-/blob/1/data.blob + 2,/fixtures/binary_data/-/blob/1/data.blob + """.strip().replace( + "\n", "\r\n" + ) + ) + + def test_custom_sql_csv(app_client): response = app_client.get( "/fixtures.csv?sql=select+content+from+simple_primary_key+limit+2" diff --git a/tests/test_html.py b/tests/test_html.py index 3c1101d2..95b5128a 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1223,7 +1223,7 @@ def test_extra_where_clauses(app_client): ] -def test_binary_data_display(app_client): +def test_binary_data_display_in_table(app_client): response = app_client.get("/fixtures/binary_data") assert response.status == 200 table = Soup(response.body, "html.parser").find("table") @@ -1231,12 +1231,12 @@ def test_binary_data_display(app_client): [ '', '', - '', + '', ], [ '', '', - '', + '', ], [ '', @@ -1249,21 +1249,38 @@ def test_binary_data_display(app_client): ] +def test_binary_data_display_in_query(app_client): + response = app_client.get("/fixtures?sql=select+*+from+binary_data") + assert response.status == 200 + table = Soup(response.body, "html.parser").find("table") + expected_tds = [ + [ + '' + ], + [ + '' + ], + [''], + ] + assert expected_tds == [ + [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") + ] + + @pytest.mark.parametrize( - "path,expected_body,expected_filename", + "path,expected_filename", [ + ("/fixtures/binary_data/1.blob?_blob_column=data", "binary_data-1-data.blob"), ( - "/fixtures/binary_data/-/blob/1/data.blob", - b"\x15\x1c\x02\xc7\xad\x05\xfe", - "binary_data-1-data.blob", + "/fixtures.blob?sql=select+*+from+binary_data&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d", + "data-f30889.blob", ), - ("/fixtures/binary_data/-/blob/3/data.blob", b"", "binary_data-3-data.blob"), ], ) -def test_blob_download(app_client, path, expected_body, expected_filename): +def test_blob_download(app_client, path, expected_filename): response = app_client.get(path) assert response.status == 200 - assert response.body == expected_body + assert response.body == b"\x15\x1c\x02\xc7\xad\x05\xfe" assert response.headers["x-content-type-options"] == "nosniff" assert response.headers[ "content-disposition" @@ -1274,28 +1291,17 @@ def test_blob_download(app_client, path, expected_body, expected_filename): @pytest.mark.parametrize( "path,expected_message", [ - ("/baddb/binary_data/-/blob/1/data.blob", "Database baddb does not exist"), + ("/fixtures/binary_data/1.blob", "?_blob_column= is required"), + ("/fixtures/binary_data/1.blob?_blob_column=foo", "foo is not a valid column"), ( - "/fixtures/binary_data_bad/-/blob/1/data.blob", - "Table binary_data_bad does not exist", - ), - ( - "/fixtures/binary_data/-/blob/1/bad.blob", - "Table binary_data does not have column bad", - ), - ( - "/fixtures/facetable/-/blob/1/state.blob", - "Table facetable does not have column state of type BLOB", - ), - ( - "/fixtures/binary_data/-/blob/101/data.blob", - "Record not found: ['101']", + "/fixtures/binary_data/1.blob?_blob_column=data&_blob_hash=x", + "Link has expired - the requested binary content has changed or could not be found.", ), ], ) -def test_blob_download_not_found_messages(app_client, path, expected_message): +def test_blob_download_invalid_messages(app_client, path, expected_message): response = app_client.get(path) - assert response.status == 404 + assert response.status == 400 assert expected_message in response.text diff --git a/tests/test_permissions.py b/tests/test_permissions.py index a935a495..4d1b09b8 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -417,17 +417,6 @@ def cascade_app_client(): ("/fixtures/binary_data/1", ["table"], 200), ("/fixtures/binary_data/1", ["table", "database"], 200), ("/fixtures/binary_data/1", ["table", "database", "instance"], 200), - # ... and for binary blob - ("/fixtures/binary_data/-/blob/1/data.blob", [], 403), - ("/fixtures/binary_data/-/blob/1/data.blob", ["database"], 403), - ("/fixtures/binary_data/-/blob/1/data.blob", ["instance"], 403), - ("/fixtures/binary_data/-/blob/1/data.blob", ["table"], 200), - ("/fixtures/binary_data/-/blob/1/data.blob", ["table", "database"], 200), - ( - "/fixtures/binary_data/-/blob/1/data.blob", - ["table", "database", "instance"], - 200, - ), # Can view query even if not allowed database or instance ("/fixtures/magic_parameters", [], 403), ("/fixtures/magic_parameters", ["database"], 403),
{% if td == None %}{{ " "|safe }}{% else %}{{ td }}{% endif %}{{ td }}
1<Binary:\xa07\xa0bytes><Binary:\xa07\xa0bytes>2<Binary:\xa07\xa0bytes><Binary:\xa07\xa0bytes><Binary:\xa07\xa0bytes><Binary:\xa07\xa0bytes>\xa0