From 5a0a82faf9cf9dd109d76181ed00eea19472087c Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Jun 2018 16:08:02 -0700
Subject: [PATCH 1/6] Streaming works locally, needs cleanup + _dl= option

---
 datasette/views/base.py  | 28 ++++++++++++++++++++++++++++
 datasette/views/table.py | 10 +++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/datasette/views/base.py b/datasette/views/base.py
index 53ae08bd..7beff72e 100644
--- a/datasette/views/base.py
+++ b/datasette/views/base.py
@@ -149,7 +149,35 @@ class BaseView(RenderMixin):
 
         return await self.view_get(request, name, hash, **kwargs)
 
+    async def as_csv_stream(self, request, name, hash, **kwargs):
+        assert not request.args.get("_next")  # TODO: real error
+        kwargs['_size'] = 'max'
+
+        async def stream_fn(r):
+            first = True
+            next = None
+            writer = csv.writer(r)
+            while first or next:
+                if next:
+                    kwargs['_next'] = next
+                data, extra_template_data, templates = await self.data(
+                    request, name, hash, **kwargs
+                )
+                if first:
+                    writer.writerow(data["columns"])
+                    first = False
+                next = data["next"]
+                for row in data["rows"]:
+                    writer.writerow(row)
+
+        return response.stream(
+            stream_fn,
+            content_type="text/plain; charset=utf-8"
+        )
+
     async def as_csv(self, request, name, hash, **kwargs):
+        if request.args.get("_stream"):
+            return await self.as_csv_stream(request, name, hash, **kwargs)
         try:
             response_or_template_contexts = await self.data(
                 request, name, hash, **kwargs
diff --git a/datasette/views/table.py b/datasette/views/table.py
index c57fd954..cb2c9ae5 100644
--- a/datasette/views/table.py
+++ b/datasette/views/table.py
@@ -220,7 +220,7 @@ class RowTableShared(BaseView):
 
 class TableView(RowTableShared):
 
-    async def data(self, request, name, hash, table, default_labels=False):
+    async def data(self, request, name, hash, table, default_labels=False,  _next=None, _size=None):
         canned_query = self.ds.get_canned_query(name, table)
         if canned_query is not None:
             return await self.custom_sql(
@@ -375,7 +375,7 @@ class TableView(RowTableShared):
 
         count_sql = "select count(*) {}".format(from_sql)
 
-        _next = special_args.get("_next")
+        _next = _next or special_args.get("_next")
         offset = ""
         if _next:
             if is_view:
@@ -462,7 +462,7 @@ class TableView(RowTableShared):
 
         extra_args = {}
         # Handle ?_size=500
-        page_size = request.raw_args.get("_size")
+        page_size = _size or request.raw_args.get("_size")
         if page_size:
             if page_size == "max":
                 page_size = self.max_returned_rows
@@ -512,6 +512,8 @@ class TableView(RowTableShared):
         facet_results = {}
         facets_timed_out = []
         for column in facets:
+            if _next:
+                continue
             facet_sql = """
                 select {col} as value, count(*) as count
                 {from_sql} {and_or_where} {col} is not null
@@ -665,6 +667,8 @@ class TableView(RowTableShared):
                 for facet_column in columns:
                     if facet_column in facets:
                         continue
+                    if _next:
+                        continue
                     if not self.ds.config["suggest_facets"]:
                         continue
                     suggested_facet_sql = '''

From 619a9ddb338e2b11419477a6f16c6ef5bd57d32b Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Jun 2018 19:31:09 -0700
Subject: [PATCH 2/6] table.csv?_stream=1 to download all rows - refs #266

This option causes Datasette to serve ALL rows in the table, by internally
following the _next= pagination links and serving everything out as a stream.

Also added new config option, allow_csv_stream, which can be used to disable
this feature.
---
 datasette/app.py        |  3 ++
 datasette/views/base.py | 84 ++++++++++++++++++++---------------------
 docs/config.rst         | 12 ++++++
 tests/test_api.py       |  1 +
 tests/test_csv.py       | 13 +++++++
 5 files changed, 69 insertions(+), 44 deletions(-)

diff --git a/datasette/app.py b/datasette/app.py
index 70f2a93f..92327353 100644
--- a/datasette/app.py
+++ b/datasette/app.py
@@ -94,6 +94,9 @@ CONFIG_OPTIONS = (
     ConfigOption("cache_size_kb", 0, """
         SQLite cache size in KB (0 == use SQLite default)
     """.strip()),
+    ConfigOption("allow_csv_stream", True, """
+        Allow .csv?_stream=1 to download all rows (ignoring max_returned_rows)
+    """.strip()),
 )
 DEFAULT_CONFIG = {
     option.name: option.default
diff --git a/datasette/views/base.py b/datasette/views/base.py
index 7beff72e..a572c330 100644
--- a/datasette/views/base.py
+++ b/datasette/views/base.py
@@ -149,42 +149,24 @@ class BaseView(RenderMixin):
 
         return await self.view_get(request, name, hash, **kwargs)
 
-    async def as_csv_stream(self, request, name, hash, **kwargs):
-        assert not request.args.get("_next")  # TODO: real error
-        kwargs['_size'] = 'max'
-
-        async def stream_fn(r):
-            first = True
-            next = None
-            writer = csv.writer(r)
-            while first or next:
-                if next:
-                    kwargs['_next'] = next
-                data, extra_template_data, templates = await self.data(
-                    request, name, hash, **kwargs
-                )
-                if first:
-                    writer.writerow(data["columns"])
-                    first = False
-                next = data["next"]
-                for row in data["rows"]:
-                    writer.writerow(row)
-
-        return response.stream(
-            stream_fn,
-            content_type="text/plain; charset=utf-8"
-        )
-
     async def as_csv(self, request, name, hash, **kwargs):
-        if request.args.get("_stream"):
-            return await self.as_csv_stream(request, name, hash, **kwargs)
+        stream = request.args.get("_stream")
+        if stream:
+            # Some quick sanity checks
+            if not self.ds.config["allow_csv_stream"]:
+                raise DatasetteError("CSV streaming is disabled", status=400)
+            if request.args.get("_next"):
+                raise DatasetteError(
+                    "_next not allowed for CSV streaming", status=400
+                )
+            kwargs["_size"] = "max"
+        # Fetch the first page
         try:
             response_or_template_contexts = await self.data(
                 request, name, hash, **kwargs
             )
             if isinstance(response_or_template_contexts, response.HTTPResponse):
                 return response_or_template_contexts
-
             else:
                 data, extra_template_data, templates = response_or_template_contexts
         except (sqlite3.OperationalError, InvalidSql) as e:
@@ -195,6 +177,7 @@ class BaseView(RenderMixin):
 
         except DatasetteError:
             raise
+
         # Convert rows and columns to CSV
         headings = data["columns"]
         # if there are expanded_columns we need to add additional headings
@@ -207,22 +190,35 @@ class BaseView(RenderMixin):
                     headings.append("{}_label".format(column))
 
         async def stream_fn(r):
+            nonlocal data
             writer = csv.writer(r)
-            writer.writerow(headings)
-            for row in data["rows"]:
-                if not expanded_columns:
-                    # Simple path
-                    writer.writerow(row)
-                else:
-                    # Look for {"value": "label": } dicts and expand
-                    new_row = []
-                    for cell in row:
-                        if isinstance(cell, dict):
-                            new_row.append(cell["value"])
-                            new_row.append(cell["label"])
-                        else:
-                            new_row.append(cell)
-                    writer.writerow(new_row)
+            first = True
+            next = None
+            while first or (next and stream):
+                if next:
+                    kwargs["_next"] = next
+                if not first:
+                    data, extra_template_data, templates = await self.data(
+                        request, name, hash, **kwargs
+                    )
+                if first:
+                    writer.writerow(headings)
+                    first = False
+                next = data.get("next")
+                for row in data["rows"]:
+                    if not expanded_columns:
+                        # Simple path
+                        writer.writerow(row)
+                    else:
+                        # Look for {"value": "label": } dicts and expand
+                        new_row = []
+                        for cell in row:
+                            if isinstance(cell, dict):
+                                new_row.append(cell["value"])
+                                new_row.append(cell["label"])
+                            else:
+                                new_row.append(cell)
+                        writer.writerow(new_row)
 
         content_type = "text/plain; charset=utf-8"
         headers = {}
diff --git a/docs/config.rst b/docs/config.rst
index 8f0cd246..36a046fc 100644
--- a/docs/config.rst
+++ b/docs/config.rst
@@ -125,3 +125,15 @@ Sets the amount of memory SQLite uses for its `per-connection cache <https://www
 ::
 
     datasette mydatabase.db --config cache_size_kb:5000
+
+
+allow_csv_stream
+----------------
+
+Enables the feature where an entire table (potentially hundreds of thousands of
+rows) can be exported as a single CSV file. This is turned on by default - you
+can turn it off like this::
+
+::
+
+    datasette mydatabase.db --config allow_csv_stream:off
diff --git a/tests/test_api.py b/tests/test_api.py
index cb237b22..1e889963 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -901,6 +901,7 @@ def test_config_json(app_client):
         "default_cache_ttl": 365 * 24 * 60 * 60,
         "num_sql_threads": 3,
         "cache_size_kb": 0,
+        "allow_csv_stream": True,
     } == response.json
 
 
diff --git a/tests/test_csv.py b/tests/test_csv.py
index 8b781f70..65bc9c1d 100644
--- a/tests/test_csv.py
+++ b/tests/test_csv.py
@@ -59,3 +59,16 @@ def test_table_csv_download(app_client):
     assert 'text/csv; charset=utf-8' == response.headers['Content-Type']
     expected_disposition = 'attachment; filename="simple_primary_key.csv"'
     assert expected_disposition == response.headers['Content-Disposition']
+
+
+def test_table_csv_stream(app_client):
+    # Without _stream should return header + 100 rows:
+    response = app_client.get(
+        "/fixtures/compound_three_primary_keys.csv?_size=max"
+    )
+    assert 101 == len([b for b in response.body.split(b"\r\n") if b])
+    # With _stream=1 should return header + 1001 rows
+    response = app_client.get(
+        "/fixtures/compound_three_primary_keys.csv?_stream=1"
+    )
+    assert 1002 == len([b for b in response.body.split(b"\r\n") if b])

From 9d0071825077b2ebb77314c5a05bd614f3b28b54 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Jun 2018 20:01:30 -0700
Subject: [PATCH 3/6] New config option max_csv_mb limiting size of CSV export
 - refs #266

---
 datasette/app.py            |  3 ++
 datasette/utils.py          | 19 ++++++++++++
 datasette/views/base.py     | 61 +++++++++++++++++++++----------------
 datasette/views/database.py |  4 +--
 docs/config.rst             |  9 ++++++
 tests/fixtures.py           |  7 +++++
 tests/test_api.py           |  1 +
 tests/test_csv.py           | 14 ++++++++-
 8 files changed, 89 insertions(+), 29 deletions(-)

diff --git a/datasette/app.py b/datasette/app.py
index 92327353..fb389d73 100644
--- a/datasette/app.py
+++ b/datasette/app.py
@@ -97,6 +97,9 @@ CONFIG_OPTIONS = (
     ConfigOption("allow_csv_stream", True, """
         Allow .csv?_stream=1 to download all rows (ignoring max_returned_rows)
     """.strip()),
+    ConfigOption("max_csv_mb", 100, """
+        Maximum size allowed for CSV export in MB. Set 0 to disable this limit.
+    """.strip()),
 )
 DEFAULT_CONFIG = {
     option.name: option.default
diff --git a/datasette/utils.py b/datasette/utils.py
index a179eddf..005db87f 100644
--- a/datasette/utils.py
+++ b/datasette/utils.py
@@ -832,3 +832,22 @@ def value_as_boolean(value):
 
 class ValueAsBooleanError(ValueError):
     pass
+
+
+class WriteLimitExceeded(Exception):
+    pass
+
+
+class LimitedWriter:
+    def __init__(self, writer, limit_mb):
+        self.writer = writer
+        self.limit_bytes = limit_mb * 1024 * 1024
+        self.bytes_count = 0
+
+    def write(self, bytes):
+        self.bytes_count += len(bytes)
+        if self.limit_bytes and (self.bytes_count > self.limit_bytes):
+            raise WriteLimitExceeded("CSV contains more than {} bytes".format(
+                self.limit_bytes
+            ))
+        self.writer.write(bytes)
diff --git a/datasette/views/base.py b/datasette/views/base.py
index a572c330..0ca52e61 100644
--- a/datasette/views/base.py
+++ b/datasette/views/base.py
@@ -16,6 +16,7 @@ from datasette.utils import (
     CustomJSONEncoder,
     InterruptedError,
     InvalidSql,
+    LimitedWriter,
     path_from_row_pks,
     path_with_added_args,
     path_with_format,
@@ -191,34 +192,39 @@ class BaseView(RenderMixin):
 
         async def stream_fn(r):
             nonlocal data
-            writer = csv.writer(r)
+            writer = csv.writer(LimitedWriter(r, self.ds.config["max_csv_mb"]))
             first = True
             next = None
             while first or (next and stream):
-                if next:
-                    kwargs["_next"] = next
-                if not first:
-                    data, extra_template_data, templates = await self.data(
-                        request, name, hash, **kwargs
-                    )
-                if first:
-                    writer.writerow(headings)
-                    first = False
-                next = data.get("next")
-                for row in data["rows"]:
-                    if not expanded_columns:
-                        # Simple path
-                        writer.writerow(row)
-                    else:
-                        # Look for {"value": "label": } dicts and expand
-                        new_row = []
-                        for cell in row:
-                            if isinstance(cell, dict):
-                                new_row.append(cell["value"])
-                                new_row.append(cell["label"])
-                            else:
-                                new_row.append(cell)
-                        writer.writerow(new_row)
+                try:
+                    if next:
+                        kwargs["_next"] = next
+                    if not first:
+                        data, extra_template_data, templates = await self.data(
+                            request, name, hash, **kwargs
+                        )
+                    if first:
+                        writer.writerow(headings)
+                        first = False
+                    next = data.get("next")
+                    for row in data["rows"]:
+                        if not expanded_columns:
+                            # Simple path
+                            writer.writerow(row)
+                        else:
+                            # Look for {"value": "label": } dicts and expand
+                            new_row = []
+                            for cell in row:
+                                if isinstance(cell, dict):
+                                    new_row.append(cell["value"])
+                                    new_row.append(cell["label"])
+                                else:
+                                    new_row.append(cell)
+                            writer.writerow(new_row)
+                except Exception as e:
+                    print('caught this', e)
+                    r.write(str(e))
+                    return
 
         content_type = "text/plain; charset=utf-8"
         headers = {}
@@ -417,7 +423,8 @@ class BaseView(RenderMixin):
         return r
 
     async def custom_sql(
-        self, request, name, hash, sql, editable=True, canned_query=None
+        self, request, name, hash, sql, editable=True, canned_query=None,
+        _size=None
     ):
         params = request.raw_args
         if "sql" in params:
@@ -439,6 +446,8 @@ class BaseView(RenderMixin):
         extra_args = {}
         if params.get("_timelimit"):
             extra_args["custom_time_limit"] = int(params["_timelimit"])
+        if _size:
+            extra_args["page_size"] = _size
         results = await self.ds.execute(
             name, sql, params, truncate=True, **extra_args
         )
diff --git a/datasette/views/database.py b/datasette/views/database.py
index 2f3f41d3..a7df485b 100644
--- a/datasette/views/database.py
+++ b/datasette/views/database.py
@@ -9,13 +9,13 @@ from .base import BaseView, DatasetteError
 
 class DatabaseView(BaseView):
 
-    async def data(self, request, name, hash, default_labels=False):
+    async def data(self, request, name, hash, default_labels=False, _size=None):
         if request.args.get("sql"):
             if not self.ds.config["allow_sql"]:
                 raise DatasetteError("sql= is not allowed", status=400)
             sql = request.raw_args.pop("sql")
             validate_sql_select(sql)
-            return await self.custom_sql(request, name, hash, sql)
+            return await self.custom_sql(request, name, hash, sql, _size=_size)
 
         info = self.ds.inspect()[name]
         metadata = self.ds.metadata.get("databases", {}).get(name, {})
diff --git a/docs/config.rst b/docs/config.rst
index 36a046fc..e0013bf0 100644
--- a/docs/config.rst
+++ b/docs/config.rst
@@ -137,3 +137,12 @@ can turn it off like this::
 ::
 
     datasette mydatabase.db --config allow_csv_stream:off
+
+
+max_csv_mb
+----------
+
+The maximum size of CSV that can be exported, in megabytes. Defaults to 100MB.
+You can disable the limit entirely by settings this to 0::
+
+    datasette mydatabase.db --config max_csv_mb:0
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 92fd5073..ea0b4e35 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -71,6 +71,13 @@ def app_client_larger_cache_size():
     })
 
 
+@pytest.fixture(scope='session')
+def app_client_csv_max_mb_one():
+    yield from app_client(config={
+        'max_csv_mb': 1,
+    })
+
+
 def generate_compound_rows(num):
     for a, b, c in itertools.islice(
         itertools.product(string.ascii_lowercase, repeat=3), num
diff --git a/tests/test_api.py b/tests/test_api.py
index 1e889963..2187deb5 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -902,6 +902,7 @@ def test_config_json(app_client):
         "num_sql_threads": 3,
         "cache_size_kb": 0,
         "allow_csv_stream": True,
+        "max_csv_mb": 100,
     } == response.json
 
 
diff --git a/tests/test_csv.py b/tests/test_csv.py
index 65bc9c1d..fcbacc76 100644
--- a/tests/test_csv.py
+++ b/tests/test_csv.py
@@ -1,4 +1,4 @@
-from .fixtures import app_client # noqa
+from .fixtures import app_client, app_client_csv_max_mb_one # noqa
 
 EXPECTED_TABLE_CSV = '''id,content
 1,hello
@@ -61,6 +61,18 @@ def test_table_csv_download(app_client):
     assert expected_disposition == response.headers['Content-Disposition']
 
 
+def test_max_csv_mb(app_client_csv_max_mb_one):
+    response = app_client_csv_max_mb_one.get(
+        "/fixtures.csv?sql=select+randomblob(10000)+"
+        "from+compound_three_primary_keys&_stream=1&_size=max"
+    )
+    # It's a 200 because we started streaming before we knew the error
+    assert response.status == 200
+    # Last line should be an error message
+    last_line = [line for line in response.body.split(b"\r\n") if line][-1]
+    assert last_line.startswith(b"CSV contains more than")
+
+
 def test_table_csv_stream(app_client):
     # Without _stream should return header + 100 rows:
     response = app_client.get(

From 9f9c737fc2618780dde46c071ad8b985ad6c567d Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Jun 2018 20:05:38 -0700
Subject: [PATCH 4/6] Fixed sphinx warning

---
 docs/metadata.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/metadata.rst b/docs/metadata.rst
index 029e2960..7ae561af 100644
--- a/docs/metadata.rst
+++ b/docs/metadata.rst
@@ -122,6 +122,7 @@ This will restrict sorting of ``example_table`` to just the ``height`` and
 You can also disable sorting entirely by setting ``"sortable_columns": []``
 
 .. _label_columns:
+
 Specifying the label column for a table
 ---------------------------------------
 

From b15f412e04ce9ff21983986e661fbe4396f97b43 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Jun 2018 20:14:32 -0700
Subject: [PATCH 5/6] Only deploy latest on push to master (not pull request)

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 9e92eee3..d32df307 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,6 +13,7 @@ script:
 jobs:
   include:
     - stage: deploy latest.datasette.io
+      if: branch = master AND type = push
       script:
         - pip install .
         - npm install -g now
@@ -23,7 +24,6 @@ jobs:
         - now alias --token=$NOW_TOKEN
         - echo "{\"name\":\"datasette-latest-$ALIAS\",\"alias\":\"$ALIAS.datasette.io\"}" > now.json
         - now alias --token=$NOW_TOKEN
-      on: master
     - stage: release tagged version
       if: tag IS present
       python: 3.6

From 6204ebb7d507b08b81f13cc5b9c176f62b499634 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Jun 2018 23:03:22 -0700
Subject: [PATCH 6/6] Improved UI for CSV/JSON export, closes #266

---
 datasette/static/app.css       | 14 ++++++++++++++
 datasette/templates/query.html |  2 +-
 datasette/templates/table.html | 23 ++++++++++++++++++++++-
 datasette/utils.py             |  7 +++++++
 datasette/views/base.py        | 18 +++++++++---------
 datasette/views/table.py       |  2 ++
 tests/test_html.py             | 26 +++++++++++++++++++++++---
 7 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/datasette/static/app.css b/datasette/static/app.css
index 9e95505f..2fc16940 100644
--- a/datasette/static/app.css
+++ b/datasette/static/app.css
@@ -118,6 +118,13 @@ form label {
     display: inline-block;
     width: 15%;
 }
+.advanced-export form label {
+    width: auto;
+}
+.advanced-export input[type=submit] {
+    font-size: 0.6em;
+    margin-left: 1em;
+}
 label.sort_by_desc {
     width: auto;
     padding-right: 1em;
@@ -272,3 +279,10 @@ a.not-underlined {
 .facet-info a.cross:active {
     text-decoration: none;
 }
+.advanced-export {
+    margin-top: 1em;
+    padding: 0.01em 2em 0.01em 1em;
+    width: auto;
+    display: inline-block;
+    box-shadow: 1px 2px 8px 2px rgba(0,0,0,0.08);
+}
diff --git a/datasette/templates/query.html b/datasette/templates/query.html
index e04df160..8e2f9036 100644
--- a/datasette/templates/query.html
+++ b/datasette/templates/query.html
@@ -40,7 +40,7 @@
 </form>
 
 {% if rows %}
-<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
+<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>)</p>
 <table class="rows-and-columns">
     <thead>
         <tr>
diff --git a/datasette/templates/table.html b/datasette/templates/table.html
index eda37bc7..bb2522d6 100644
--- a/datasette/templates/table.html
+++ b/datasette/templates/table.html
@@ -92,7 +92,7 @@
     <p><a class="not-underlined" title="{{ query.sql }}" href="/{{ database }}-{{ database_hash }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&amp;{{ query.params|urlencode|safe }}{% endif %}">&#x270e; <span class="underlined">View and edit SQL</span></a></p>
 {% endif %}
 
-<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
+<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>{% if display_rows %}, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>){% endif %}</p>
 
 {% if suggested_facets %}
     <p class="suggested-facets">
@@ -137,6 +137,27 @@
      <p><a href="{{ next_url }}">Next page</a></p>
 {% endif %}
 
+{% if display_rows %}
+    <div id="export" class="advanced-export">
+        <h3>Advanced export</h3>
+        <p>JSON shape: <a href="{{ url_json }}">default</a>, <a href="{{ append_querystring(url_json, '_shape=array') }}">array</a>{% if primary_keys %}, <a href="{{ append_querystring(url_json, '_shape=object') }}">object</a>{% endif %}</p>
+        <form action="{{ url_csv_path }}" method="get">
+            <p>
+                CSV options:
+                <label><input type="checkbox" name="_dl"> download file</label>
+                {% if expandable_columns %}<label><input type="checkbox" name="_labels"> expand labels</label>{% endif %}
+                {% if next_url %}<label><input type="checkbox" name="_stream"> stream all records</label>{% endif %}
+                <input type="submit" value="Export CSV">
+                {% for key, value in url_csv_args.items() %}
+                    {% if key != "_labels" %}
+                        <input type="hidden" name="{{ key }}" value="{{ value }}">
+                    {% endif %}
+                {% endfor %}
+            </p>
+        </form>
+    </div>
+{% endif %}
+
 {% if table_definition %}
     <pre>{{ table_definition }}</pre>
 {% endif %}
diff --git a/datasette/utils.py b/datasette/utils.py
index 005db87f..6253fb7a 100644
--- a/datasette/utils.py
+++ b/datasette/utils.py
@@ -170,6 +170,13 @@ def validate_sql_select(sql):
             raise InvalidSql(msg)
 
 
+def append_querystring(url, querystring):
+    op = "&" if ("?" in url) else "?"
+    return "{}{}{}".format(
+        url, op, querystring
+    )
+
+
 def path_with_added_args(request, args, path=None):
     path = path or request.path
     if isinstance(args, dict):
diff --git a/datasette/views/base.py b/datasette/views/base.py
index 0ca52e61..c3da3ab7 100644
--- a/datasette/views/base.py
+++ b/datasette/views/base.py
@@ -382,6 +382,12 @@ class BaseView(RenderMixin):
             url_labels_extra = {}
             if data.get("expandable_columns"):
                 url_labels_extra = {"_labels": "on"}
+            url_csv_args = {
+                "_size": "max",
+                **url_labels_extra
+            }
+            url_csv = path_with_format(request, "csv", url_csv_args)
+            url_csv_path = url_csv.split('?')[0]
             context = {
                 **data,
                 **extras,
@@ -389,15 +395,9 @@ class BaseView(RenderMixin):
                     "url_json": path_with_format(request, "json", {
                         **url_labels_extra,
                     }),
-                    "url_csv": path_with_format(request, "csv", {
-                        "_size": "max",
-                        **url_labels_extra
-                    }),
-                    "url_csv_dl": path_with_format(request, "csv", {
-                        "_dl": "1",
-                        "_size": "max",
-                        **url_labels_extra
-                    }),
+                    "url_csv": url_csv,
+                    "url_csv_path": url_csv_path,
+                    "url_csv_args": url_csv_args,
                     "extra_css_urls": self.ds.extra_css_urls(),
                     "extra_js_urls": self.ds.extra_js_urls(),
                     "datasette_version": __version__,
diff --git a/datasette/views/table.py b/datasette/views/table.py
index cb2c9ae5..89dec455 100644
--- a/datasette/views/table.py
+++ b/datasette/views/table.py
@@ -10,6 +10,7 @@ from datasette.utils import (
     CustomRow,
     Filters,
     InterruptedError,
+    append_querystring,
     compound_keys_after_sql,
     escape_sqlite,
     filters_should_redirect,
@@ -748,6 +749,7 @@ class TableView(RowTableShared):
                 "is_sortable": any(c["sortable"] for c in display_columns),
                 "path_with_replaced_args": path_with_replaced_args,
                 "path_with_removed_args": path_with_removed_args,
+                "append_querystring": append_querystring,
                 "request": request,
                 "sort": sort,
                 "sort_desc": sort_desc,
diff --git a/tests/test_html.py b/tests/test_html.py
index 98e54998..11c5fd43 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -274,9 +274,10 @@ def test_table_html_simple_primary_key(app_client):
     ] == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')]
 
 
-def test_table_csv_json_export_links(app_client):
+def test_table_csv_json_export_interface(app_client):
     response = app_client.get('/fixtures/simple_primary_key')
     assert response.status == 200
+    # The links at the top of the page
     links = Soup(response.body, "html.parser").find("p", {
         "class": "export-links"
     }).findAll("a")
@@ -284,9 +285,28 @@ def test_table_csv_json_export_links(app_client):
     expected = [
         "simple_primary_key.json",
         "simple_primary_key.csv?_size=max",
-        "simple_primary_key.csv?_dl=1&_size=max"
+        "#export"
     ]
     assert expected == actual
+    # And the advaced export box at the bottom:
+    div = Soup(response.body, "html.parser").find("div", {
+        "class": "advanced-export"
+    })
+    json_links = [a["href"].split("/")[-1] for a in div.find("p").findAll("a")]
+    assert [
+        "simple_primary_key.json",
+        "simple_primary_key.json?_shape=array",
+        "simple_primary_key.json?_shape=object"
+    ] == json_links
+    # And the CSV form
+    form = div.find("form")
+    assert form["action"].endswith("/simple_primary_key.csv")
+    inputs = [str(input) for input in form.findAll("input")]
+    assert [
+        '<input name="_dl" type="checkbox"/>',
+        '<input type="submit" value="Export CSV"/>',
+        '<input name="_size" type="hidden" value="max"/>'
+    ] == inputs
 
 
 def test_csv_json_export_links_include_labels_if_foreign_keys(app_client):
@@ -299,7 +319,7 @@ def test_csv_json_export_links_include_labels_if_foreign_keys(app_client):
     expected = [
         "facetable.json?_labels=on",
         "facetable.csv?_labels=on&_size=max",
-        "facetable.csv?_dl=1&_labels=on&_size=max"
+        "#export"
     ]
     assert expected == actual