Streaming mode for downloading all rows as a CSV (#315)

* table.csv?_stream=1 to download all rows - refs #266

This option causes Datasette to serve ALL rows in the table, by internally
following the _next= pagination links and serving everything out as a stream.

Also added new config option, allow_csv_stream, which can be used to disable
this feature.

* New config option max_csv_mb limiting size of CSV export
This commit is contained in:
Simon Willison 2018-06-17 20:21:02 -07:00 committed by GitHub
commit fc3660cfad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 142 additions and 24 deletions

View file

@ -71,6 +71,13 @@ def app_client_larger_cache_size():
})
@pytest.fixture(scope='session')
def app_client_csv_max_mb_one():
yield from app_client(config={
'max_csv_mb': 1,
})
def generate_compound_rows(num):
for a, b, c in itertools.islice(
itertools.product(string.ascii_lowercase, repeat=3), num

View file

@ -901,6 +901,8 @@ def test_config_json(app_client):
"default_cache_ttl": 365 * 24 * 60 * 60,
"num_sql_threads": 3,
"cache_size_kb": 0,
"allow_csv_stream": True,
"max_csv_mb": 100,
} == response.json

View file

@ -1,4 +1,4 @@
from .fixtures import app_client # noqa
from .fixtures import app_client, app_client_csv_max_mb_one # noqa
EXPECTED_TABLE_CSV = '''id,content
1,hello
@ -59,3 +59,28 @@ def test_table_csv_download(app_client):
assert 'text/csv; charset=utf-8' == response.headers['Content-Type']
expected_disposition = 'attachment; filename="simple_primary_key.csv"'
assert expected_disposition == response.headers['Content-Disposition']
def test_max_csv_mb(app_client_csv_max_mb_one):
response = app_client_csv_max_mb_one.get(
"/fixtures.csv?sql=select+randomblob(10000)+"
"from+compound_three_primary_keys&_stream=1&_size=max"
)
# It's a 200 because we started streaming before we knew the error
assert response.status == 200
# Last line should be an error message
last_line = [line for line in response.body.split(b"\r\n") if line][-1]
assert last_line.startswith(b"CSV contains more than")
def test_table_csv_stream(app_client):
# Without _stream should return header + 100 rows:
response = app_client.get(
"/fixtures/compound_three_primary_keys.csv?_size=max"
)
assert 101 == len([b for b in response.body.split(b"\r\n") if b])
# With _stream=1 should return header + 1001 rows
response = app_client.get(
"/fixtures/compound_three_primary_keys.csv?_stream=1"
)
assert 1002 == len([b for b in response.body.split(b"\r\n") if b])