mirror of
https://github.com/simonw/datasette.git
synced 2025-12-10 16:51:24 +01:00
table.csv?_stream=1 to download all rows - refs #266
This option causes Datasette to serve ALL rows in the table, by internally following the _next= pagination links and serving everything out as a stream. Also added new config option, allow_csv_stream, which can be used to disable this feature.
This commit is contained in:
parent
5a0a82faf9
commit
619a9ddb33
5 changed files with 69 additions and 44 deletions
|
|
@ -94,6 +94,9 @@ CONFIG_OPTIONS = (
|
|||
ConfigOption("cache_size_kb", 0, """
|
||||
SQLite cache size in KB (0 == use SQLite default)
|
||||
""".strip()),
|
||||
ConfigOption("allow_csv_stream", True, """
|
||||
Allow .csv?_stream=1 to download all rows (ignoring max_returned_rows)
|
||||
""".strip()),
|
||||
)
|
||||
DEFAULT_CONFIG = {
|
||||
option.name: option.default
|
||||
|
|
|
|||
|
|
@ -149,42 +149,24 @@ class BaseView(RenderMixin):
|
|||
|
||||
return await self.view_get(request, name, hash, **kwargs)
|
||||
|
||||
async def as_csv_stream(self, request, name, hash, **kwargs):
|
||||
assert not request.args.get("_next") # TODO: real error
|
||||
kwargs['_size'] = 'max'
|
||||
|
||||
async def stream_fn(r):
|
||||
first = True
|
||||
next = None
|
||||
writer = csv.writer(r)
|
||||
while first or next:
|
||||
if next:
|
||||
kwargs['_next'] = next
|
||||
data, extra_template_data, templates = await self.data(
|
||||
request, name, hash, **kwargs
|
||||
)
|
||||
if first:
|
||||
writer.writerow(data["columns"])
|
||||
first = False
|
||||
next = data["next"]
|
||||
for row in data["rows"]:
|
||||
writer.writerow(row)
|
||||
|
||||
return response.stream(
|
||||
stream_fn,
|
||||
content_type="text/plain; charset=utf-8"
|
||||
)
|
||||
|
||||
async def as_csv(self, request, name, hash, **kwargs):
|
||||
if request.args.get("_stream"):
|
||||
return await self.as_csv_stream(request, name, hash, **kwargs)
|
||||
stream = request.args.get("_stream")
|
||||
if stream:
|
||||
# Some quick sanity checks
|
||||
if not self.ds.config["allow_csv_stream"]:
|
||||
raise DatasetteError("CSV streaming is disabled", status=400)
|
||||
if request.args.get("_next"):
|
||||
raise DatasetteError(
|
||||
"_next not allowed for CSV streaming", status=400
|
||||
)
|
||||
kwargs["_size"] = "max"
|
||||
# Fetch the first page
|
||||
try:
|
||||
response_or_template_contexts = await self.data(
|
||||
request, name, hash, **kwargs
|
||||
)
|
||||
if isinstance(response_or_template_contexts, response.HTTPResponse):
|
||||
return response_or_template_contexts
|
||||
|
||||
else:
|
||||
data, extra_template_data, templates = response_or_template_contexts
|
||||
except (sqlite3.OperationalError, InvalidSql) as e:
|
||||
|
|
@ -195,6 +177,7 @@ class BaseView(RenderMixin):
|
|||
|
||||
except DatasetteError:
|
||||
raise
|
||||
|
||||
# Convert rows and columns to CSV
|
||||
headings = data["columns"]
|
||||
# if there are expanded_columns we need to add additional headings
|
||||
|
|
@ -207,22 +190,35 @@ class BaseView(RenderMixin):
|
|||
headings.append("{}_label".format(column))
|
||||
|
||||
async def stream_fn(r):
|
||||
nonlocal data
|
||||
writer = csv.writer(r)
|
||||
writer.writerow(headings)
|
||||
for row in data["rows"]:
|
||||
if not expanded_columns:
|
||||
# Simple path
|
||||
writer.writerow(row)
|
||||
else:
|
||||
# Look for {"value": "label": } dicts and expand
|
||||
new_row = []
|
||||
for cell in row:
|
||||
if isinstance(cell, dict):
|
||||
new_row.append(cell["value"])
|
||||
new_row.append(cell["label"])
|
||||
else:
|
||||
new_row.append(cell)
|
||||
writer.writerow(new_row)
|
||||
first = True
|
||||
next = None
|
||||
while first or (next and stream):
|
||||
if next:
|
||||
kwargs["_next"] = next
|
||||
if not first:
|
||||
data, extra_template_data, templates = await self.data(
|
||||
request, name, hash, **kwargs
|
||||
)
|
||||
if first:
|
||||
writer.writerow(headings)
|
||||
first = False
|
||||
next = data.get("next")
|
||||
for row in data["rows"]:
|
||||
if not expanded_columns:
|
||||
# Simple path
|
||||
writer.writerow(row)
|
||||
else:
|
||||
# Look for {"value": "label": } dicts and expand
|
||||
new_row = []
|
||||
for cell in row:
|
||||
if isinstance(cell, dict):
|
||||
new_row.append(cell["value"])
|
||||
new_row.append(cell["label"])
|
||||
else:
|
||||
new_row.append(cell)
|
||||
writer.writerow(new_row)
|
||||
|
||||
content_type = "text/plain; charset=utf-8"
|
||||
headers = {}
|
||||
|
|
|
|||
|
|
@ -125,3 +125,15 @@ Sets the amount of memory SQLite uses for its `per-connection cache <https://www
|
|||
::
|
||||
|
||||
datasette mydatabase.db --config cache_size_kb:5000
|
||||
|
||||
|
||||
allow_csv_stream
|
||||
----------------
|
||||
|
||||
Enables the feature where an entire table (potentially hundreds of thousands of
|
||||
rows) can be exported as a single CSV file. This is turned on by default - you
|
||||
can turn it off like this::
|
||||
|
||||
::
|
||||
|
||||
datasette mydatabase.db --config allow_csv_stream:off
|
||||
|
|
|
|||
|
|
@ -901,6 +901,7 @@ def test_config_json(app_client):
|
|||
"default_cache_ttl": 365 * 24 * 60 * 60,
|
||||
"num_sql_threads": 3,
|
||||
"cache_size_kb": 0,
|
||||
"allow_csv_stream": True,
|
||||
} == response.json
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -59,3 +59,16 @@ def test_table_csv_download(app_client):
|
|||
assert 'text/csv; charset=utf-8' == response.headers['Content-Type']
|
||||
expected_disposition = 'attachment; filename="simple_primary_key.csv"'
|
||||
assert expected_disposition == response.headers['Content-Disposition']
|
||||
|
||||
|
||||
def test_table_csv_stream(app_client):
|
||||
# Without _stream should return header + 100 rows:
|
||||
response = app_client.get(
|
||||
"/fixtures/compound_three_primary_keys.csv?_size=max"
|
||||
)
|
||||
assert 101 == len([b for b in response.body.split(b"\r\n") if b])
|
||||
# With _stream=1 should return header + 1001 rows
|
||||
response = app_client.get(
|
||||
"/fixtures/compound_three_primary_keys.csv?_stream=1"
|
||||
)
|
||||
assert 1002 == len([b for b in response.body.split(b"\r\n") if b])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue