diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py
index 1fedb69c..dd47771f 100644
--- a/datasette/utils/__init__.py
+++ b/datasette/utils/__init__.py
@@ -7,6 +7,7 @@ import hashlib
import inspect
import itertools
import json
+import markupsafe
import mergedeep
import os
import re
@@ -777,6 +778,14 @@ class LimitedWriter:
await self.writer.write(bytes)
+class EscapeHtmlWriter:
+ def __init__(self, writer):
+ self.writer = writer
+
+ async def write(self, content):
+ await self.writer.write(markupsafe.escape(content))
+
+
_infinities = {float("inf"), float("-inf")}
diff --git a/datasette/views/base.py b/datasette/views/base.py
index ba0f7d4c..aefaec6c 100644
--- a/datasette/views/base.py
+++ b/datasette/views/base.py
@@ -13,6 +13,7 @@ from datasette.plugins import pm
from datasette.database import QueryInterrupted
from datasette.utils import (
await_me_maybe,
+ EscapeHtmlWriter,
InvalidSql,
LimitedWriter,
call_with_supported_arguments,
@@ -262,6 +263,16 @@ class DataView(BaseView):
async def as_csv(self, request, database, hash, **kwargs):
stream = request.args.get("_stream")
+ # Do not calculate facets:
+ if not request.args.get("_nofacets"):
+ if not request.query_string:
+ new_query_string = "_nofacets=1"
+ else:
+ new_query_string = request.query_string + "&_nofacets=1"
+ new_scope = dict(
+ request.scope, query_string=new_query_string.encode("latin-1")
+ )
+ request.scope = new_scope
if stream:
# Some quick sanity checks
if not self.ds.setting("allow_csv_stream"):
@@ -298,9 +309,27 @@ class DataView(BaseView):
if column in expanded_columns:
headings.append(f"{column}_label")
+ content_type = "text/plain; charset=utf-8"
+ preamble = ""
+ postamble = ""
+
+ trace = request.args.get("_trace")
+ if trace:
+ content_type = "text/html; charset=utf-8"
+ preamble = (
+ "
CSV debug"
+ '"
+
async def stream_fn(r):
- nonlocal data
- writer = csv.writer(LimitedWriter(r, self.ds.setting("max_csv_mb")))
+ nonlocal data, trace
+ limited_writer = LimitedWriter(r, self.ds.setting("max_csv_mb"))
+ if trace:
+ await limited_writer.write(preamble)
+ writer = csv.writer(EscapeHtmlWriter(limited_writer))
+ else:
+ writer = csv.writer(limited_writer)
first = True
next = None
while first or (next and stream):
@@ -371,13 +400,14 @@ class DataView(BaseView):
sys.stderr.flush()
await r.write(str(e))
return
+ await limited_writer.write(postamble)
- content_type = "text/plain; charset=utf-8"
headers = {}
if self.ds.cors:
headers["Access-Control-Allow-Origin"] = "*"
if request.args.get("_dl", None):
- content_type = "text/csv; charset=utf-8"
+ if not trace:
+ content_type = "text/csv; charset=utf-8"
disposition = 'attachment; filename="{}.csv"'.format(
kwargs.get("table", database)
)
diff --git a/tests/test_csv.py b/tests/test_csv.py
index 6b17033c..30afbd9e 100644
--- a/tests/test_csv.py
+++ b/tests/test_csv.py
@@ -1,3 +1,4 @@
+from bs4 import BeautifulSoup as Soup
from .fixtures import ( # noqa
app_client,
app_client_csv_max_mb_one,
@@ -51,7 +52,7 @@ pk,foreign_key_with_label,foreign_key_with_label_label,foreign_key_with_blank_la
def test_table_csv(app_client):
- response = app_client.get("/fixtures/simple_primary_key.csv")
+ response = app_client.get("/fixtures/simple_primary_key.csv?_oh=1")
assert response.status == 200
assert not response.headers.get("Access-Control-Allow-Origin")
assert "text/plain; charset=utf-8" == response.headers["content-type"]
@@ -104,8 +105,8 @@ def test_custom_sql_csv_blob_columns(app_client):
assert "text/plain; charset=utf-8" == response.headers["content-type"]
assert response.text == (
"rowid,data\r\n"
- '1,"http://localhost/fixtures.blob?sql=select+rowid,+data+from+binary_data&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d"\r\n'
- '2,"http://localhost/fixtures.blob?sql=select+rowid,+data+from+binary_data&_blob_column=data&_blob_hash=b835b0483cedb86130b9a2c280880bf5fadc5318ddf8c18d0df5204d40df1724"\r\n'
+ '1,"http://localhost/fixtures.blob?sql=select+rowid,+data+from+binary_data&_nofacets=1&_blob_column=data&_blob_hash=f3088978da8f9aea479ffc7f631370b968d2e855eeb172bea7f6c7a04262bb6d"\r\n'
+ '2,"http://localhost/fixtures.blob?sql=select+rowid,+data+from+binary_data&_nofacets=1&_blob_column=data&_blob_hash=b835b0483cedb86130b9a2c280880bf5fadc5318ddf8c18d0df5204d40df1724"\r\n'
"3,\r\n"
)
@@ -157,3 +158,20 @@ def test_table_csv_stream(app_client):
# With _stream=1 should return header + 1001 rows
response = app_client.get("/fixtures/compound_three_primary_keys.csv?_stream=1")
assert 1002 == len([b for b in response.body.split(b"\r\n") if b])
+
+
+def test_csv_trace(app_client):
+ response = app_client.get("/fixtures/simple_primary_key.csv?_trace=1")
+ assert response.headers["content-type"] == "text/html; charset=utf-8"
+ soup = Soup(response.text, "html.parser")
+ assert (
+ soup.find("textarea").text
+ == "id,content\r\n1,hello\r\n2,world\r\n3,\r\n4,RENDER_CELL_DEMO\r\n"
+ )
+ assert "select id, content from simple_primary_key" in soup.find("pre").text
+
+
+def test_table_csv_stream_does_not_calculate_facets(app_client):
+ response = app_client.get("/fixtures/simple_primary_key.csv?_trace=1")
+ soup = Soup(response.text, "html.parser")
+ assert "select content, count(*) as n" not in soup.find("pre").text