Improved UI for CSV/JSON export, closes #266

Only deploy latest on push to master (not pull request)
Fixed sphinx warning
2025-12-10 16:51:24 +01:00 · 2018-06-17 23:03:22 -07:00 · 2018-06-17 20:14:32 -07:00 · 2018-06-17 20:05:38 -07:00 · 2018-06-17 20:01:30 -07:00 · 2018-06-17 19:31:09 -07:00
15 changed files with 220 additions and 38 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -13,6 +13,7 @@ script:
 jobs:
  include:
    - stage: deploy latest.datasette.io
      if: branch = master AND type = push
      script:
        - pip install .
        - npm install -g now
@ -23,7 +24,6 @@ jobs:
        - now alias --token=$NOW_TOKEN
        - echo "{\"name\":\"datasette-latest-$ALIAS\",\"alias\":\"$ALIAS.datasette.io\"}" > now.json
        - now alias --token=$NOW_TOKEN
      on: master
    - stage: release tagged version
      if: tag IS present
      python: 3.6
--- a/datasette/app.py
+++ b/datasette/app.py
@ -94,6 +94,12 @@ CONFIG_OPTIONS = (
    ConfigOption("cache_size_kb", 0, """
        SQLite cache size in KB (0 == use SQLite default)
    """.strip()),
    ConfigOption("allow_csv_stream", True, """
        Allow .csv?_stream=1 to download all rows (ignoring max_returned_rows)
    """.strip()),
    ConfigOption("max_csv_mb", 100, """
        Maximum size allowed for CSV export in MB. Set 0 to disable this limit.
    """.strip()),
 )
 DEFAULT_CONFIG = {
    option.name: option.default
--- a/datasette/static/app.css
+++ b/datasette/static/app.css
@ -118,6 +118,13 @@ form label {
    display: inline-block;
    width: 15%;
 }
 .advanced-export form label {
    width: auto;
 }
 .advanced-export input[type=submit] {
    font-size: 0.6em;
    margin-left: 1em;
 }
 label.sort_by_desc {
    width: auto;
    padding-right: 1em;
@ -272,3 +279,10 @@ a.not-underlined {
 .facet-info a.cross:active {
    text-decoration: none;
 }
 .advanced-export {
    margin-top: 1em;
    padding: 0.01em 2em 0.01em 1em;
    width: auto;
    display: inline-block;
    box-shadow: 1px 2px 8px 2px rgba(0,0,0,0.08);
 }
--- a/datasette/templates/query.html
+++ b/datasette/templates/query.html
@ -40,7 +40,7 @@
 </form>
 {% if rows %}
-<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
+<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>)</p>
 <table class="rows-and-columns">
    <thead>
        <tr>
--- a/datasette/templates/table.html
+++ b/datasette/templates/table.html
@ -92,7 +92,7 @@
    <p><a class="not-underlined" title="{{ query.sql }}" href="/{{ database }}-{{ database_hash }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&amp;{{ query.params|urlencode|safe }}{% endif %}">&#x270e; <span class="underlined">View and edit SQL</span></a></p>
 {% endif %}
-<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
+<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>{% if display_rows %}, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>){% endif %}</p>
 {% if suggested_facets %}
    <p class="suggested-facets">
@ -137,6 +137,27 @@
     <p><a href="{{ next_url }}">Next page</a></p>
 {% endif %}
 {% if display_rows %}
    <div id="export" class="advanced-export">
        <h3>Advanced export</h3>
        <p>JSON shape: <a href="{{ url_json }}">default</a>, <a href="{{ append_querystring(url_json, '_shape=array') }}">array</a>{% if primary_keys %}, <a href="{{ append_querystring(url_json, '_shape=object') }}">object</a>{% endif %}</p>
        <form action="{{ url_csv_path }}" method="get">
            <p>
                CSV options:
                <label><input type="checkbox" name="_dl"> download file</label>
                {% if expandable_columns %}<label><input type="checkbox" name="_labels"> expand labels</label>{% endif %}
                {% if next_url %}<label><input type="checkbox" name="_stream"> stream all records</label>{% endif %}
                <input type="submit" value="Export CSV">
                {% for key, value in url_csv_args.items() %}
                    {% if key != "_labels" %}
                        <input type="hidden" name="{{ key }}" value="{{ value }}">
                    {% endif %}
                {% endfor %}
            </p>
        </form>
    </div>
 {% endif %}
 {% if table_definition %}
    <pre>{{ table_definition }}</pre>
 {% endif %}
--- a/datasette/utils.py
+++ b/datasette/utils.py
@ -170,6 +170,13 @@ def validate_sql_select(sql):
            raise InvalidSql(msg)
 def append_querystring(url, querystring):
    op = "&" if ("?" in url) else "?"
    return "{}{}{}".format(
        url, op, querystring
    )
 def path_with_added_args(request, args, path=None):
    path = path or request.path
    if isinstance(args, dict):
@ -832,3 +839,22 @@ def value_as_boolean(value):
 class ValueAsBooleanError(ValueError):
    pass
 class WriteLimitExceeded(Exception):
    pass
 class LimitedWriter:
    def __init__(self, writer, limit_mb):
        self.writer = writer
        self.limit_bytes = limit_mb * 1024 * 1024
        self.bytes_count = 0
    def write(self, bytes):
        self.bytes_count += len(bytes)
        if self.limit_bytes and (self.bytes_count > self.limit_bytes):
            raise WriteLimitExceeded("CSV contains more than {} bytes".format(
                self.limit_bytes
            ))
        self.writer.write(bytes)
--- a/datasette/views/base.py
+++ b/datasette/views/base.py
@ -16,6 +16,7 @@ from datasette.utils import (
    CustomJSONEncoder,
    InterruptedError,
    InvalidSql,
    LimitedWriter,
    path_from_row_pks,
    path_with_added_args,
    path_with_format,
@ -150,13 +151,23 @@ class BaseView(RenderMixin):
        return await self.view_get(request, name, hash, **kwargs)
    async def as_csv(self, request, name, hash, **kwargs):
        stream = request.args.get("_stream")
        if stream:
            # Some quick sanity checks
            if not self.ds.config["allow_csv_stream"]:
                raise DatasetteError("CSV streaming is disabled", status=400)
            if request.args.get("_next"):
                raise DatasetteError(
                    "_next not allowed for CSV streaming", status=400
                )
            kwargs["_size"] = "max"
        # Fetch the first page
        try:
            response_or_template_contexts = await self.data(
                request, name, hash, **kwargs
            )
            if isinstance(response_or_template_contexts, response.HTTPResponse):
                return response_or_template_contexts
            else:
                data, extra_template_data, templates = response_or_template_contexts
        except (sqlite3.OperationalError, InvalidSql) as e:
@ -167,6 +178,7 @@ class BaseView(RenderMixin):
        except DatasetteError:
            raise
        # Convert rows and columns to CSV
        headings = data["columns"]
        # if there are expanded_columns we need to add additional headings
@ -179,8 +191,22 @@ class BaseView(RenderMixin):
                    headings.append("{}_label".format(column))
        async def stream_fn(r):
-            writer = csv.writer(r)
+            nonlocal data
            writer = csv.writer(LimitedWriter(r, self.ds.config["max_csv_mb"]))
            first = True
            next = None
            while first or (next and stream):
                try:
                    if next:
                        kwargs["_next"] = next
                    if not first:
                        data, extra_template_data, templates = await self.data(
                            request, name, hash, **kwargs
                        )
                    if first:
                        writer.writerow(headings)
                        first = False
                    next = data.get("next")
                    for row in data["rows"]:
                        if not expanded_columns:
                            # Simple path
@ -195,6 +221,10 @@ class BaseView(RenderMixin):
                                else:
                                    new_row.append(cell)
                            writer.writerow(new_row)
                except Exception as e:
                    print('caught this', e)
                    r.write(str(e))
                    return
        content_type = "text/plain; charset=utf-8"
        headers = {}
@ -352,6 +382,12 @@ class BaseView(RenderMixin):
            url_labels_extra = {}
            if data.get("expandable_columns"):
                url_labels_extra = {"_labels": "on"}
            url_csv_args = {
                "_size": "max",
                **url_labels_extra
            }
            url_csv = path_with_format(request, "csv", url_csv_args)
            url_csv_path = url_csv.split('?')[0]
            context = {
                **data,
                **extras,
@ -359,15 +395,9 @@ class BaseView(RenderMixin):
                    "url_json": path_with_format(request, "json", {
                        **url_labels_extra,
                    }),
-                    "url_csv": path_with_format(request, "csv", {
+                    "url_csv": url_csv,
-                        "_size": "max",
+                    "url_csv_path": url_csv_path,
-                        **url_labels_extra
+                    "url_csv_args": url_csv_args,
                    }),
                    "url_csv_dl": path_with_format(request, "csv", {
                        "_dl": "1",
                        "_size": "max",
                        **url_labels_extra
                    }),
                    "extra_css_urls": self.ds.extra_css_urls(),
                    "extra_js_urls": self.ds.extra_js_urls(),
                    "datasette_version": __version__,
@ -393,7 +423,8 @@ class BaseView(RenderMixin):
        return r
    async def custom_sql(
-        self, request, name, hash, sql, editable=True, canned_query=None
+        self, request, name, hash, sql, editable=True, canned_query=None,
        _size=None
    ):
        params = request.raw_args
        if "sql" in params:
@ -415,6 +446,8 @@ class BaseView(RenderMixin):
        extra_args = {}
        if params.get("_timelimit"):
            extra_args["custom_time_limit"] = int(params["_timelimit"])
        if _size:
            extra_args["page_size"] = _size
        results = await self.ds.execute(
            name, sql, params, truncate=True, **extra_args
        )
--- a/datasette/views/database.py
+++ b/datasette/views/database.py
@ -9,13 +9,13 @@ from .base import BaseView, DatasetteError
 class DatabaseView(BaseView):
-    async def data(self, request, name, hash, default_labels=False):
+    async def data(self, request, name, hash, default_labels=False, _size=None):
        if request.args.get("sql"):
            if not self.ds.config["allow_sql"]:
                raise DatasetteError("sql= is not allowed", status=400)
            sql = request.raw_args.pop("sql")
            validate_sql_select(sql)
-            return await self.custom_sql(request, name, hash, sql)
+            return await self.custom_sql(request, name, hash, sql, _size=_size)
        info = self.ds.inspect()[name]
        metadata = self.ds.metadata.get("databases", {}).get(name, {})
--- a/datasette/views/table.py
+++ b/datasette/views/table.py
@ -10,6 +10,7 @@ from datasette.utils import (
    CustomRow,
    Filters,
    InterruptedError,
    append_querystring,
    compound_keys_after_sql,
    escape_sqlite,
    filters_should_redirect,
@ -220,7 +221,7 @@ class RowTableShared(BaseView):
 class TableView(RowTableShared):
-    async def data(self, request, name, hash, table, default_labels=False):
+    async def data(self, request, name, hash, table, default_labels=False,  _next=None, _size=None):
        canned_query = self.ds.get_canned_query(name, table)
        if canned_query is not None:
            return await self.custom_sql(
@ -375,7 +376,7 @@ class TableView(RowTableShared):
        count_sql = "select count(*) {}".format(from_sql)
-        _next = special_args.get("_next")
+        _next = _next or special_args.get("_next")
        offset = ""
        if _next:
            if is_view:
@ -462,7 +463,7 @@ class TableView(RowTableShared):
        extra_args = {}
        # Handle ?_size=500
-        page_size = request.raw_args.get("_size")
+        page_size = _size or request.raw_args.get("_size")
        if page_size:
            if page_size == "max":
                page_size = self.max_returned_rows
@ -512,6 +513,8 @@ class TableView(RowTableShared):
        facet_results = {}
        facets_timed_out = []
        for column in facets:
            if _next:
                continue
            facet_sql = """
                select {col} as value, count(*) as count
                {from_sql} {and_or_where} {col} is not null
@ -665,6 +668,8 @@ class TableView(RowTableShared):
                for facet_column in columns:
                    if facet_column in facets:
                        continue
                    if _next:
                        continue
                    if not self.ds.config["suggest_facets"]:
                        continue
                    suggested_facet_sql = '''
@ -744,6 +749,7 @@ class TableView(RowTableShared):
                "is_sortable": any(c["sortable"] for c in display_columns),
                "path_with_replaced_args": path_with_replaced_args,
                "path_with_removed_args": path_with_removed_args,
                "append_querystring": append_querystring,
                "request": request,
                "sort": sort,
                "sort_desc": sort_desc,
--- a/docs/config.rst
+++ b/docs/config.rst
@ -125,3 +125,24 @@ Sets the amount of memory SQLite uses for its `per-connection cache <https://www
 ::
    datasette mydatabase.db --config cache_size_kb:5000
 allow_csv_stream
 ----------------
 Enables the feature where an entire table (potentially hundreds of thousands of
 rows) can be exported as a single CSV file. This is turned on by default - you
 can turn it off like this::
 ::
    datasette mydatabase.db --config allow_csv_stream:off
 max_csv_mb
 ----------
 The maximum size of CSV that can be exported, in megabytes. Defaults to 100MB.
 You can disable the limit entirely by settings this to 0::
    datasette mydatabase.db --config max_csv_mb:0
--- a/docs/metadata.rst
+++ b/docs/metadata.rst
@ -122,6 +122,7 @@ This will restrict sorting of ``example_table`` to just the ``height`` and
 You can also disable sorting entirely by setting ``"sortable_columns": []``
 .. _label_columns:
 Specifying the label column for a table
 ---------------------------------------
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@ -71,6 +71,13 @@ def app_client_larger_cache_size():
    })
@pytest.fixture(scope='session')
 def app_client_csv_max_mb_one():
    yield from app_client(config={
        'max_csv_mb': 1,
    })
 def generate_compound_rows(num):
    for a, b, c in itertools.islice(
        itertools.product(string.ascii_lowercase, repeat=3), num
--- a/tests/test_api.py
+++ b/tests/test_api.py
@ -901,6 +901,8 @@ def test_config_json(app_client):
        "default_cache_ttl": 365 * 24 * 60 * 60,
        "num_sql_threads": 3,
        "cache_size_kb": 0,
        "allow_csv_stream": True,
        "max_csv_mb": 100,
    } == response.json
--- a/tests/test_csv.py
+++ b/tests/test_csv.py
@ -1,4 +1,4 @@
-from .fixtures import app_client # noqa
+from .fixtures import app_client, app_client_csv_max_mb_one # noqa
 EXPECTED_TABLE_CSV = '''id,content
 1,hello
@ -59,3 +59,28 @@ def test_table_csv_download(app_client):
    assert 'text/csv; charset=utf-8' == response.headers['Content-Type']
    expected_disposition = 'attachment; filename="simple_primary_key.csv"'
    assert expected_disposition == response.headers['Content-Disposition']
 def test_max_csv_mb(app_client_csv_max_mb_one):
    response = app_client_csv_max_mb_one.get(
        "/fixtures.csv?sql=select+randomblob(10000)+"
        "from+compound_three_primary_keys&_stream=1&_size=max"
    )
    # It's a 200 because we started streaming before we knew the error
    assert response.status == 200
    # Last line should be an error message
    last_line = [line for line in response.body.split(b"\r\n") if line][-1]
    assert last_line.startswith(b"CSV contains more than")
 def test_table_csv_stream(app_client):
    # Without _stream should return header + 100 rows:
    response = app_client.get(
        "/fixtures/compound_three_primary_keys.csv?_size=max"
    )
    assert 101 == len([b for b in response.body.split(b"\r\n") if b])
    # With _stream=1 should return header + 1001 rows
    response = app_client.get(
        "/fixtures/compound_three_primary_keys.csv?_stream=1"
    )
    assert 1002 == len([b for b in response.body.split(b"\r\n") if b])
--- a/tests/test_html.py
+++ b/tests/test_html.py
@ -274,9 +274,10 @@ def test_table_html_simple_primary_key(app_client):
    ] == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')]
-def test_table_csv_json_export_links(app_client):
+def test_table_csv_json_export_interface(app_client):
    response = app_client.get('/fixtures/simple_primary_key')
    assert response.status == 200
    # The links at the top of the page
    links = Soup(response.body, "html.parser").find("p", {
        "class": "export-links"
    }).findAll("a")
@ -284,9 +285,28 @@ def test_table_csv_json_export_links(app_client):
    expected = [
        "simple_primary_key.json",
        "simple_primary_key.csv?_size=max",
-        "simple_primary_key.csv?_dl=1&_size=max"
+        "#export"
    ]
    assert expected == actual
    # And the advaced export box at the bottom:
    div = Soup(response.body, "html.parser").find("div", {
        "class": "advanced-export"
    })
    json_links = [a["href"].split("/")[-1] for a in div.find("p").findAll("a")]
    assert [
        "simple_primary_key.json",
        "simple_primary_key.json?_shape=array",
        "simple_primary_key.json?_shape=object"
    ] == json_links
    # And the CSV form
    form = div.find("form")
    assert form["action"].endswith("/simple_primary_key.csv")
    inputs = [str(input) for input in form.findAll("input")]
    assert [
        '<input name="_dl" type="checkbox"/>',
        '<input type="submit" value="Export CSV"/>',
        '<input name="_size" type="hidden" value="max"/>'
    ] == inputs
 def test_csv_json_export_links_include_labels_if_foreign_keys(app_client):
@ -299,7 +319,7 @@ def test_csv_json_export_links_include_labels_if_foreign_keys(app_client):
    expected = [
        "facetable.json?_labels=on",
        "facetable.csv?_labels=on&_size=max",
-        "facetable.csv?_dl=1&_labels=on&_size=max"
+        "#export"
    ]
    assert expected == actual
Author	SHA1	Message	Date
Simon Willison	6204ebb7d5	Improved UI for CSV/JSON export, closes #266	2018-06-17 23:03:22 -07:00
Simon Willison	b15f412e04	Only deploy latest on push to master (not pull request)	2018-06-17 20:14:32 -07:00
Simon Willison	9f9c737fc2	Fixed sphinx warning	2018-06-17 20:05:38 -07:00
Simon Willison	9d00718250	New config option max_csv_mb limiting size of CSV export - refs #266	2018-06-17 20:01:30 -07:00
Simon Willison	619a9ddb33	table.csv?_stream=1 to download all rows - refs #266 This option causes Datasette to serve ALL rows in the table, by internally following the _next= pagination links and serving everything out as a stream. Also added new config option, allow_csv_stream, which can be used to disable this feature.	2018-06-17 19:31:09 -07:00
Simon Willison	5a0a82faf9	Streaming works locally, needs cleanup + _dl= option	2018-06-17 16:08:02 -07:00