diff --git a/.travis.yml b/.travis.yml index 9e92eee3..d32df307 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ script: jobs: include: - stage: deploy latest.datasette.io + if: branch = master AND type = push script: - pip install . - npm install -g now @@ -23,7 +24,6 @@ jobs: - now alias --token=$NOW_TOKEN - echo "{\"name\":\"datasette-latest-$ALIAS\",\"alias\":\"$ALIAS.datasette.io\"}" > now.json - now alias --token=$NOW_TOKEN - on: master - stage: release tagged version if: tag IS present python: 3.6 diff --git a/datasette/app.py b/datasette/app.py index 70f2a93f..fb389d73 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -94,6 +94,12 @@ CONFIG_OPTIONS = ( ConfigOption("cache_size_kb", 0, """ SQLite cache size in KB (0 == use SQLite default) """.strip()), + ConfigOption("allow_csv_stream", True, """ + Allow .csv?_stream=1 to download all rows (ignoring max_returned_rows) + """.strip()), + ConfigOption("max_csv_mb", 100, """ + Maximum size allowed for CSV export in MB. Set 0 to disable this limit. + """.strip()), ) DEFAULT_CONFIG = { option.name: option.default diff --git a/datasette/static/app.css b/datasette/static/app.css index 9e95505f..2fc16940 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -118,6 +118,13 @@ form label { display: inline-block; width: 15%; } +.advanced-export form label { + width: auto; +} +.advanced-export input[type=submit] { + font-size: 0.6em; + margin-left: 1em; +} label.sort_by_desc { width: auto; padding-right: 1em; @@ -272,3 +279,10 @@ a.not-underlined { .facet-info a.cross:active { text-decoration: none; } +.advanced-export { + margin-top: 1em; + padding: 0.01em 2em 0.01em 1em; + width: auto; + display: inline-block; + box-shadow: 1px 2px 8px 2px rgba(0,0,0,0.08); +} diff --git a/datasette/templates/query.html b/datasette/templates/query.html index e04df160..8e2f9036 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -40,7 +40,7 @@ {% if rows %} - + diff --git a/datasette/templates/table.html b/datasette/templates/table.html index eda37bc7..bb2522d6 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -92,7 +92,7 @@

View and edit SQL

{% endif %} - + {% if suggested_facets %}

@@ -137,6 +137,27 @@

Next page

{% endif %} +{% if display_rows %} +
+

Advanced export

+

JSON shape: default, array{% if primary_keys %}, object{% endif %}

+
+

+ CSV options: + + {% if expandable_columns %}{% endif %} + {% if next_url %}{% endif %} + + {% for key, value in url_csv_args.items() %} + {% if key != "_labels" %} + + {% endif %} + {% endfor %} +

+ +
+{% endif %} + {% if table_definition %}
{{ table_definition }}
{% endif %} diff --git a/datasette/utils.py b/datasette/utils.py index a179eddf..6253fb7a 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -170,6 +170,13 @@ def validate_sql_select(sql): raise InvalidSql(msg) +def append_querystring(url, querystring): + op = "&" if ("?" in url) else "?" + return "{}{}{}".format( + url, op, querystring + ) + + def path_with_added_args(request, args, path=None): path = path or request.path if isinstance(args, dict): @@ -832,3 +839,22 @@ def value_as_boolean(value): class ValueAsBooleanError(ValueError): pass + + +class WriteLimitExceeded(Exception): + pass + + +class LimitedWriter: + def __init__(self, writer, limit_mb): + self.writer = writer + self.limit_bytes = limit_mb * 1024 * 1024 + self.bytes_count = 0 + + def write(self, bytes): + self.bytes_count += len(bytes) + if self.limit_bytes and (self.bytes_count > self.limit_bytes): + raise WriteLimitExceeded("CSV contains more than {} bytes".format( + self.limit_bytes + )) + self.writer.write(bytes) diff --git a/datasette/views/base.py b/datasette/views/base.py index 53ae08bd..c3da3ab7 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -16,6 +16,7 @@ from datasette.utils import ( CustomJSONEncoder, InterruptedError, InvalidSql, + LimitedWriter, path_from_row_pks, path_with_added_args, path_with_format, @@ -150,13 +151,23 @@ class BaseView(RenderMixin): return await self.view_get(request, name, hash, **kwargs) async def as_csv(self, request, name, hash, **kwargs): + stream = request.args.get("_stream") + if stream: + # Some quick sanity checks + if not self.ds.config["allow_csv_stream"]: + raise DatasetteError("CSV streaming is disabled", status=400) + if request.args.get("_next"): + raise DatasetteError( + "_next not allowed for CSV streaming", status=400 + ) + kwargs["_size"] = "max" + # Fetch the first page try: response_or_template_contexts = await self.data( request, name, hash, **kwargs ) if isinstance(response_or_template_contexts, response.HTTPResponse): return response_or_template_contexts - else: data, extra_template_data, templates = response_or_template_contexts except (sqlite3.OperationalError, InvalidSql) as e: @@ -167,6 +178,7 @@ class BaseView(RenderMixin): except DatasetteError: raise + # Convert rows and columns to CSV headings = data["columns"] # if there are expanded_columns we need to add additional headings @@ -179,22 +191,40 @@ class BaseView(RenderMixin): headings.append("{}_label".format(column)) async def stream_fn(r): - writer = csv.writer(r) - writer.writerow(headings) - for row in data["rows"]: - if not expanded_columns: - # Simple path - writer.writerow(row) - else: - # Look for {"value": "label": } dicts and expand - new_row = [] - for cell in row: - if isinstance(cell, dict): - new_row.append(cell["value"]) - new_row.append(cell["label"]) + nonlocal data + writer = csv.writer(LimitedWriter(r, self.ds.config["max_csv_mb"])) + first = True + next = None + while first or (next and stream): + try: + if next: + kwargs["_next"] = next + if not first: + data, extra_template_data, templates = await self.data( + request, name, hash, **kwargs + ) + if first: + writer.writerow(headings) + first = False + next = data.get("next") + for row in data["rows"]: + if not expanded_columns: + # Simple path + writer.writerow(row) else: - new_row.append(cell) - writer.writerow(new_row) + # Look for {"value": "label": } dicts and expand + new_row = [] + for cell in row: + if isinstance(cell, dict): + new_row.append(cell["value"]) + new_row.append(cell["label"]) + else: + new_row.append(cell) + writer.writerow(new_row) + except Exception as e: + print('caught this', e) + r.write(str(e)) + return content_type = "text/plain; charset=utf-8" headers = {} @@ -352,6 +382,12 @@ class BaseView(RenderMixin): url_labels_extra = {} if data.get("expandable_columns"): url_labels_extra = {"_labels": "on"} + url_csv_args = { + "_size": "max", + **url_labels_extra + } + url_csv = path_with_format(request, "csv", url_csv_args) + url_csv_path = url_csv.split('?')[0] context = { **data, **extras, @@ -359,15 +395,9 @@ class BaseView(RenderMixin): "url_json": path_with_format(request, "json", { **url_labels_extra, }), - "url_csv": path_with_format(request, "csv", { - "_size": "max", - **url_labels_extra - }), - "url_csv_dl": path_with_format(request, "csv", { - "_dl": "1", - "_size": "max", - **url_labels_extra - }), + "url_csv": url_csv, + "url_csv_path": url_csv_path, + "url_csv_args": url_csv_args, "extra_css_urls": self.ds.extra_css_urls(), "extra_js_urls": self.ds.extra_js_urls(), "datasette_version": __version__, @@ -393,7 +423,8 @@ class BaseView(RenderMixin): return r async def custom_sql( - self, request, name, hash, sql, editable=True, canned_query=None + self, request, name, hash, sql, editable=True, canned_query=None, + _size=None ): params = request.raw_args if "sql" in params: @@ -415,6 +446,8 @@ class BaseView(RenderMixin): extra_args = {} if params.get("_timelimit"): extra_args["custom_time_limit"] = int(params["_timelimit"]) + if _size: + extra_args["page_size"] = _size results = await self.ds.execute( name, sql, params, truncate=True, **extra_args ) diff --git a/datasette/views/database.py b/datasette/views/database.py index 2f3f41d3..a7df485b 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -9,13 +9,13 @@ from .base import BaseView, DatasetteError class DatabaseView(BaseView): - async def data(self, request, name, hash, default_labels=False): + async def data(self, request, name, hash, default_labels=False, _size=None): if request.args.get("sql"): if not self.ds.config["allow_sql"]: raise DatasetteError("sql= is not allowed", status=400) sql = request.raw_args.pop("sql") validate_sql_select(sql) - return await self.custom_sql(request, name, hash, sql) + return await self.custom_sql(request, name, hash, sql, _size=_size) info = self.ds.inspect()[name] metadata = self.ds.metadata.get("databases", {}).get(name, {}) diff --git a/datasette/views/table.py b/datasette/views/table.py index c57fd954..89dec455 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -10,6 +10,7 @@ from datasette.utils import ( CustomRow, Filters, InterruptedError, + append_querystring, compound_keys_after_sql, escape_sqlite, filters_should_redirect, @@ -220,7 +221,7 @@ class RowTableShared(BaseView): class TableView(RowTableShared): - async def data(self, request, name, hash, table, default_labels=False): + async def data(self, request, name, hash, table, default_labels=False, _next=None, _size=None): canned_query = self.ds.get_canned_query(name, table) if canned_query is not None: return await self.custom_sql( @@ -375,7 +376,7 @@ class TableView(RowTableShared): count_sql = "select count(*) {}".format(from_sql) - _next = special_args.get("_next") + _next = _next or special_args.get("_next") offset = "" if _next: if is_view: @@ -462,7 +463,7 @@ class TableView(RowTableShared): extra_args = {} # Handle ?_size=500 - page_size = request.raw_args.get("_size") + page_size = _size or request.raw_args.get("_size") if page_size: if page_size == "max": page_size = self.max_returned_rows @@ -512,6 +513,8 @@ class TableView(RowTableShared): facet_results = {} facets_timed_out = [] for column in facets: + if _next: + continue facet_sql = """ select {col} as value, count(*) as count {from_sql} {and_or_where} {col} is not null @@ -665,6 +668,8 @@ class TableView(RowTableShared): for facet_column in columns: if facet_column in facets: continue + if _next: + continue if not self.ds.config["suggest_facets"]: continue suggested_facet_sql = ''' @@ -744,6 +749,7 @@ class TableView(RowTableShared): "is_sortable": any(c["sortable"] for c in display_columns), "path_with_replaced_args": path_with_replaced_args, "path_with_removed_args": path_with_removed_args, + "append_querystring": append_querystring, "request": request, "sort": sort, "sort_desc": sort_desc, diff --git a/docs/config.rst b/docs/config.rst index 8f0cd246..e0013bf0 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -125,3 +125,24 @@ Sets the amount of memory SQLite uses for its `per-connection cache ', + '', + '' + ] == inputs def test_csv_json_export_links_include_labels_if_foreign_keys(app_client): @@ -299,7 +319,7 @@ def test_csv_json_export_links_include_labels_if_foreign_keys(app_client): expected = [ "facetable.json?_labels=on", "facetable.csv?_labels=on&_size=max", - "facetable.csv?_dl=1&_labels=on&_size=max" + "#export" ] assert expected == actual