From d1d06ace49606da790a765689b4fbffa4c6deecb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 1 Jun 2021 08:49:50 -0700 Subject: [PATCH] ?_trac=1 for CSV, plus ?_nofacets=1 when rendering CSV Closes #1351, closes #1350 --- datasette/utils/__init__.py | 9 +++++++++ datasette/views/base.py | 38 +++++++++++++++++++++++++++++++++---- tests/test_csv.py | 24 ++++++++++++++++++++--- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 1fedb69c..dd47771f 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -7,6 +7,7 @@ import hashlib import inspect import itertools import json +import markupsafe import mergedeep import os import re @@ -777,6 +778,14 @@ class LimitedWriter: await self.writer.write(bytes) +class EscapeHtmlWriter: + def __init__(self, writer): + self.writer = writer + + async def write(self, content): + await self.writer.write(markupsafe.escape(content)) + + _infinities = {float("inf"), float("-inf")} diff --git a/datasette/views/base.py b/datasette/views/base.py index ba0f7d4c..aefaec6c 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -13,6 +13,7 @@ from datasette.plugins import pm from datasette.database import QueryInterrupted from datasette.utils import ( await_me_maybe, + EscapeHtmlWriter, InvalidSql, LimitedWriter, call_with_supported_arguments, @@ -262,6 +263,16 @@ class DataView(BaseView): async def as_csv(self, request, database, hash, **kwargs): stream = request.args.get("_stream") + # Do not calculate facets: + if not request.args.get("_nofacets"): + if not request.query_string: + new_query_string = "_nofacets=1" + else: + new_query_string = request.query_string + "&_nofacets=1" + new_scope = dict( + request.scope, query_string=new_query_string.encode("latin-1") + ) + request.scope = new_scope if stream: # Some quick sanity checks if not self.ds.setting("allow_csv_stream"): @@ -298,9 +309,27 @@ class DataView(BaseView): if column in expanded_columns: headings.append(f"{column}_label") + content_type = "text/plain; charset=utf-8" + preamble = "" + postamble = "" + + trace = request.args.get("_trace") + if trace: + content_type = "text/html; charset=utf-8" + preamble = ( + "CSV debug" + '" + async def stream_fn(r): - nonlocal data - writer = csv.writer(LimitedWriter(r, self.ds.setting("max_csv_mb"))) + nonlocal data, trace + limited_writer = LimitedWriter(r, self.ds.setting("max_csv_mb")) + if trace: + await limited_writer.write(preamble) + writer = csv.writer(EscapeHtmlWriter(limited_writer)) + else: + writer = csv.writer(limited_writer) first = True next = None while first or (next and stream): @@ -371,13 +400,14 @@ class DataView(BaseView): sys.stderr.flush() await r.write(str(e)) return + await limited_writer.write(postamble) - content_type = "text/plain; charset=utf-8" headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" if request.args.get("_dl", None): - content_type = "text/csv; charset=utf-8" + if not trace: + content_type = "text/csv; charset=utf-8" disposition = 'attachment; filename="{}.csv"'.format( kwargs.get("table", database) ) diff --git a/tests/test_csv.py b/tests/test_csv.py index 6b17033c..30afbd9e 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -1,3 +1,4 @@ +from bs4 import BeautifulSoup as Soup from .fixtures import ( # noqa app_client, app_client_csv_max_mb_one, @@ -51,7 +52,7 @@ pk,foreign_key_with_label,foreign_key_with_label_label,foreign_key_with_blank_la def test_table_csv(app_client): - response = app_client.get("/fixtures/simple_primary_key.csv") + response = app_client.get("/fixtures/simple_primary_key.csv?_oh=1") assert response.status == 200 assert not response.headers.get("Access-Control-Allow-Origin") assert "text/plain; charset=utf-8" == response.headers["content-type"] @@ -104,8 +105,8 @@ def test_custom_sql_csv_blob_columns(app_client): assert "text/plain; charset=utf-8" == response.headers["content-type"] assert response.text == ( "rowid,data\r\n" - '1,"http://localhost/fixtures.blob?sql=select+rowid,+data+from+binary_data&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d"\r\n' - '2,"http://localhost/fixtures.blob?sql=select+rowid,+data+from+binary_data&_blob_column=data&_blob_hash=b835b0483cedb86130b9a2c280880bf5fadc5318ddf8c18d0df5204d40df1724"\r\n' + '1,"http://localhost/fixtures.blob?sql=select+rowid,+data+from+binary_data&_nofacets=1&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d"\r\n' + '2,"http://localhost/fixtures.blob?sql=select+rowid,+data+from+binary_data&_nofacets=1&_blob_column=data&_blob_hash=b835b0483cedb86130b9a2c280880bf5fadc5318ddf8c18d0df5204d40df1724"\r\n' "3,\r\n" ) @@ -157,3 +158,20 @@ def test_table_csv_stream(app_client): # With _stream=1 should return header + 1001 rows response = app_client.get("/fixtures/compound_three_primary_keys.csv?_stream=1") assert 1002 == len([b for b in response.body.split(b"\r\n") if b]) + + +def test_csv_trace(app_client): + response = app_client.get("/fixtures/simple_primary_key.csv?_trace=1") + assert response.headers["content-type"] == "text/html; charset=utf-8" + soup = Soup(response.text, "html.parser") + assert ( + soup.find("textarea").text + == "id,content\r\n1,hello\r\n2,world\r\n3,\r\n4,RENDER_CELL_DEMO\r\n" + ) + assert "select id, content from simple_primary_key" in soup.find("pre").text + + +def test_table_csv_stream_does_not_calculate_facets(app_client): + response = app_client.get("/fixtures/simple_primary_key.csv?_trace=1") + soup = Soup(response.text, "html.parser") + assert "select content, count(*) as n" not in soup.find("pre").text