mirror of
https://github.com/simonw/datasette.git
synced 2025-12-10 16:51:24 +01:00
Compare commits
6 commits
main
...
csv-stream
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6204ebb7d5 |
||
|
|
b15f412e04 |
||
|
|
9f9c737fc2 |
||
|
|
9d00718250 |
||
|
|
619a9ddb33 |
||
|
|
5a0a82faf9 |
15 changed files with 220 additions and 38 deletions
|
|
@ -13,6 +13,7 @@ script:
|
|||
jobs:
|
||||
include:
|
||||
- stage: deploy latest.datasette.io
|
||||
if: branch = master AND type = push
|
||||
script:
|
||||
- pip install .
|
||||
- npm install -g now
|
||||
|
|
@ -23,7 +24,6 @@ jobs:
|
|||
- now alias --token=$NOW_TOKEN
|
||||
- echo "{\"name\":\"datasette-latest-$ALIAS\",\"alias\":\"$ALIAS.datasette.io\"}" > now.json
|
||||
- now alias --token=$NOW_TOKEN
|
||||
on: master
|
||||
- stage: release tagged version
|
||||
if: tag IS present
|
||||
python: 3.6
|
||||
|
|
|
|||
|
|
@ -94,6 +94,12 @@ CONFIG_OPTIONS = (
|
|||
ConfigOption("cache_size_kb", 0, """
|
||||
SQLite cache size in KB (0 == use SQLite default)
|
||||
""".strip()),
|
||||
ConfigOption("allow_csv_stream", True, """
|
||||
Allow .csv?_stream=1 to download all rows (ignoring max_returned_rows)
|
||||
""".strip()),
|
||||
ConfigOption("max_csv_mb", 100, """
|
||||
Maximum size allowed for CSV export in MB. Set 0 to disable this limit.
|
||||
""".strip()),
|
||||
)
|
||||
DEFAULT_CONFIG = {
|
||||
option.name: option.default
|
||||
|
|
|
|||
|
|
@ -118,6 +118,13 @@ form label {
|
|||
display: inline-block;
|
||||
width: 15%;
|
||||
}
|
||||
.advanced-export form label {
|
||||
width: auto;
|
||||
}
|
||||
.advanced-export input[type=submit] {
|
||||
font-size: 0.6em;
|
||||
margin-left: 1em;
|
||||
}
|
||||
label.sort_by_desc {
|
||||
width: auto;
|
||||
padding-right: 1em;
|
||||
|
|
@ -272,3 +279,10 @@ a.not-underlined {
|
|||
.facet-info a.cross:active {
|
||||
text-decoration: none;
|
||||
}
|
||||
.advanced-export {
|
||||
margin-top: 1em;
|
||||
padding: 0.01em 2em 0.01em 1em;
|
||||
width: auto;
|
||||
display: inline-block;
|
||||
box-shadow: 1px 2px 8px 2px rgba(0,0,0,0.08);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@
|
|||
</form>
|
||||
|
||||
{% if rows %}
|
||||
<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
|
||||
<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>)</p>
|
||||
<table class="rows-and-columns">
|
||||
<thead>
|
||||
<tr>
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@
|
|||
<p><a class="not-underlined" title="{{ query.sql }}" href="/{{ database }}-{{ database_hash }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&{{ query.params|urlencode|safe }}{% endif %}">✎ <span class="underlined">View and edit SQL</span></a></p>
|
||||
{% endif %}
|
||||
|
||||
<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
|
||||
<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>{% if display_rows %}, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>){% endif %}</p>
|
||||
|
||||
{% if suggested_facets %}
|
||||
<p class="suggested-facets">
|
||||
|
|
@ -137,6 +137,27 @@
|
|||
<p><a href="{{ next_url }}">Next page</a></p>
|
||||
{% endif %}
|
||||
|
||||
{% if display_rows %}
|
||||
<div id="export" class="advanced-export">
|
||||
<h3>Advanced export</h3>
|
||||
<p>JSON shape: <a href="{{ url_json }}">default</a>, <a href="{{ append_querystring(url_json, '_shape=array') }}">array</a>{% if primary_keys %}, <a href="{{ append_querystring(url_json, '_shape=object') }}">object</a>{% endif %}</p>
|
||||
<form action="{{ url_csv_path }}" method="get">
|
||||
<p>
|
||||
CSV options:
|
||||
<label><input type="checkbox" name="_dl"> download file</label>
|
||||
{% if expandable_columns %}<label><input type="checkbox" name="_labels"> expand labels</label>{% endif %}
|
||||
{% if next_url %}<label><input type="checkbox" name="_stream"> stream all records</label>{% endif %}
|
||||
<input type="submit" value="Export CSV">
|
||||
{% for key, value in url_csv_args.items() %}
|
||||
{% if key != "_labels" %}
|
||||
<input type="hidden" name="{{ key }}" value="{{ value }}">
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</p>
|
||||
</form>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if table_definition %}
|
||||
<pre>{{ table_definition }}</pre>
|
||||
{% endif %}
|
||||
|
|
|
|||
|
|
@ -170,6 +170,13 @@ def validate_sql_select(sql):
|
|||
raise InvalidSql(msg)
|
||||
|
||||
|
||||
def append_querystring(url, querystring):
|
||||
op = "&" if ("?" in url) else "?"
|
||||
return "{}{}{}".format(
|
||||
url, op, querystring
|
||||
)
|
||||
|
||||
|
||||
def path_with_added_args(request, args, path=None):
|
||||
path = path or request.path
|
||||
if isinstance(args, dict):
|
||||
|
|
@ -832,3 +839,22 @@ def value_as_boolean(value):
|
|||
|
||||
class ValueAsBooleanError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class WriteLimitExceeded(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class LimitedWriter:
|
||||
def __init__(self, writer, limit_mb):
|
||||
self.writer = writer
|
||||
self.limit_bytes = limit_mb * 1024 * 1024
|
||||
self.bytes_count = 0
|
||||
|
||||
def write(self, bytes):
|
||||
self.bytes_count += len(bytes)
|
||||
if self.limit_bytes and (self.bytes_count > self.limit_bytes):
|
||||
raise WriteLimitExceeded("CSV contains more than {} bytes".format(
|
||||
self.limit_bytes
|
||||
))
|
||||
self.writer.write(bytes)
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from datasette.utils import (
|
|||
CustomJSONEncoder,
|
||||
InterruptedError,
|
||||
InvalidSql,
|
||||
LimitedWriter,
|
||||
path_from_row_pks,
|
||||
path_with_added_args,
|
||||
path_with_format,
|
||||
|
|
@ -150,13 +151,23 @@ class BaseView(RenderMixin):
|
|||
return await self.view_get(request, name, hash, **kwargs)
|
||||
|
||||
async def as_csv(self, request, name, hash, **kwargs):
|
||||
stream = request.args.get("_stream")
|
||||
if stream:
|
||||
# Some quick sanity checks
|
||||
if not self.ds.config["allow_csv_stream"]:
|
||||
raise DatasetteError("CSV streaming is disabled", status=400)
|
||||
if request.args.get("_next"):
|
||||
raise DatasetteError(
|
||||
"_next not allowed for CSV streaming", status=400
|
||||
)
|
||||
kwargs["_size"] = "max"
|
||||
# Fetch the first page
|
||||
try:
|
||||
response_or_template_contexts = await self.data(
|
||||
request, name, hash, **kwargs
|
||||
)
|
||||
if isinstance(response_or_template_contexts, response.HTTPResponse):
|
||||
return response_or_template_contexts
|
||||
|
||||
else:
|
||||
data, extra_template_data, templates = response_or_template_contexts
|
||||
except (sqlite3.OperationalError, InvalidSql) as e:
|
||||
|
|
@ -167,6 +178,7 @@ class BaseView(RenderMixin):
|
|||
|
||||
except DatasetteError:
|
||||
raise
|
||||
|
||||
# Convert rows and columns to CSV
|
||||
headings = data["columns"]
|
||||
# if there are expanded_columns we need to add additional headings
|
||||
|
|
@ -179,22 +191,40 @@ class BaseView(RenderMixin):
|
|||
headings.append("{}_label".format(column))
|
||||
|
||||
async def stream_fn(r):
|
||||
writer = csv.writer(r)
|
||||
writer.writerow(headings)
|
||||
for row in data["rows"]:
|
||||
if not expanded_columns:
|
||||
# Simple path
|
||||
writer.writerow(row)
|
||||
else:
|
||||
# Look for {"value": "label": } dicts and expand
|
||||
new_row = []
|
||||
for cell in row:
|
||||
if isinstance(cell, dict):
|
||||
new_row.append(cell["value"])
|
||||
new_row.append(cell["label"])
|
||||
nonlocal data
|
||||
writer = csv.writer(LimitedWriter(r, self.ds.config["max_csv_mb"]))
|
||||
first = True
|
||||
next = None
|
||||
while first or (next and stream):
|
||||
try:
|
||||
if next:
|
||||
kwargs["_next"] = next
|
||||
if not first:
|
||||
data, extra_template_data, templates = await self.data(
|
||||
request, name, hash, **kwargs
|
||||
)
|
||||
if first:
|
||||
writer.writerow(headings)
|
||||
first = False
|
||||
next = data.get("next")
|
||||
for row in data["rows"]:
|
||||
if not expanded_columns:
|
||||
# Simple path
|
||||
writer.writerow(row)
|
||||
else:
|
||||
new_row.append(cell)
|
||||
writer.writerow(new_row)
|
||||
# Look for {"value": "label": } dicts and expand
|
||||
new_row = []
|
||||
for cell in row:
|
||||
if isinstance(cell, dict):
|
||||
new_row.append(cell["value"])
|
||||
new_row.append(cell["label"])
|
||||
else:
|
||||
new_row.append(cell)
|
||||
writer.writerow(new_row)
|
||||
except Exception as e:
|
||||
print('caught this', e)
|
||||
r.write(str(e))
|
||||
return
|
||||
|
||||
content_type = "text/plain; charset=utf-8"
|
||||
headers = {}
|
||||
|
|
@ -352,6 +382,12 @@ class BaseView(RenderMixin):
|
|||
url_labels_extra = {}
|
||||
if data.get("expandable_columns"):
|
||||
url_labels_extra = {"_labels": "on"}
|
||||
url_csv_args = {
|
||||
"_size": "max",
|
||||
**url_labels_extra
|
||||
}
|
||||
url_csv = path_with_format(request, "csv", url_csv_args)
|
||||
url_csv_path = url_csv.split('?')[0]
|
||||
context = {
|
||||
**data,
|
||||
**extras,
|
||||
|
|
@ -359,15 +395,9 @@ class BaseView(RenderMixin):
|
|||
"url_json": path_with_format(request, "json", {
|
||||
**url_labels_extra,
|
||||
}),
|
||||
"url_csv": path_with_format(request, "csv", {
|
||||
"_size": "max",
|
||||
**url_labels_extra
|
||||
}),
|
||||
"url_csv_dl": path_with_format(request, "csv", {
|
||||
"_dl": "1",
|
||||
"_size": "max",
|
||||
**url_labels_extra
|
||||
}),
|
||||
"url_csv": url_csv,
|
||||
"url_csv_path": url_csv_path,
|
||||
"url_csv_args": url_csv_args,
|
||||
"extra_css_urls": self.ds.extra_css_urls(),
|
||||
"extra_js_urls": self.ds.extra_js_urls(),
|
||||
"datasette_version": __version__,
|
||||
|
|
@ -393,7 +423,8 @@ class BaseView(RenderMixin):
|
|||
return r
|
||||
|
||||
async def custom_sql(
|
||||
self, request, name, hash, sql, editable=True, canned_query=None
|
||||
self, request, name, hash, sql, editable=True, canned_query=None,
|
||||
_size=None
|
||||
):
|
||||
params = request.raw_args
|
||||
if "sql" in params:
|
||||
|
|
@ -415,6 +446,8 @@ class BaseView(RenderMixin):
|
|||
extra_args = {}
|
||||
if params.get("_timelimit"):
|
||||
extra_args["custom_time_limit"] = int(params["_timelimit"])
|
||||
if _size:
|
||||
extra_args["page_size"] = _size
|
||||
results = await self.ds.execute(
|
||||
name, sql, params, truncate=True, **extra_args
|
||||
)
|
||||
|
|
|
|||
|
|
@ -9,13 +9,13 @@ from .base import BaseView, DatasetteError
|
|||
|
||||
class DatabaseView(BaseView):
|
||||
|
||||
async def data(self, request, name, hash, default_labels=False):
|
||||
async def data(self, request, name, hash, default_labels=False, _size=None):
|
||||
if request.args.get("sql"):
|
||||
if not self.ds.config["allow_sql"]:
|
||||
raise DatasetteError("sql= is not allowed", status=400)
|
||||
sql = request.raw_args.pop("sql")
|
||||
validate_sql_select(sql)
|
||||
return await self.custom_sql(request, name, hash, sql)
|
||||
return await self.custom_sql(request, name, hash, sql, _size=_size)
|
||||
|
||||
info = self.ds.inspect()[name]
|
||||
metadata = self.ds.metadata.get("databases", {}).get(name, {})
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from datasette.utils import (
|
|||
CustomRow,
|
||||
Filters,
|
||||
InterruptedError,
|
||||
append_querystring,
|
||||
compound_keys_after_sql,
|
||||
escape_sqlite,
|
||||
filters_should_redirect,
|
||||
|
|
@ -220,7 +221,7 @@ class RowTableShared(BaseView):
|
|||
|
||||
class TableView(RowTableShared):
|
||||
|
||||
async def data(self, request, name, hash, table, default_labels=False):
|
||||
async def data(self, request, name, hash, table, default_labels=False, _next=None, _size=None):
|
||||
canned_query = self.ds.get_canned_query(name, table)
|
||||
if canned_query is not None:
|
||||
return await self.custom_sql(
|
||||
|
|
@ -375,7 +376,7 @@ class TableView(RowTableShared):
|
|||
|
||||
count_sql = "select count(*) {}".format(from_sql)
|
||||
|
||||
_next = special_args.get("_next")
|
||||
_next = _next or special_args.get("_next")
|
||||
offset = ""
|
||||
if _next:
|
||||
if is_view:
|
||||
|
|
@ -462,7 +463,7 @@ class TableView(RowTableShared):
|
|||
|
||||
extra_args = {}
|
||||
# Handle ?_size=500
|
||||
page_size = request.raw_args.get("_size")
|
||||
page_size = _size or request.raw_args.get("_size")
|
||||
if page_size:
|
||||
if page_size == "max":
|
||||
page_size = self.max_returned_rows
|
||||
|
|
@ -512,6 +513,8 @@ class TableView(RowTableShared):
|
|||
facet_results = {}
|
||||
facets_timed_out = []
|
||||
for column in facets:
|
||||
if _next:
|
||||
continue
|
||||
facet_sql = """
|
||||
select {col} as value, count(*) as count
|
||||
{from_sql} {and_or_where} {col} is not null
|
||||
|
|
@ -665,6 +668,8 @@ class TableView(RowTableShared):
|
|||
for facet_column in columns:
|
||||
if facet_column in facets:
|
||||
continue
|
||||
if _next:
|
||||
continue
|
||||
if not self.ds.config["suggest_facets"]:
|
||||
continue
|
||||
suggested_facet_sql = '''
|
||||
|
|
@ -744,6 +749,7 @@ class TableView(RowTableShared):
|
|||
"is_sortable": any(c["sortable"] for c in display_columns),
|
||||
"path_with_replaced_args": path_with_replaced_args,
|
||||
"path_with_removed_args": path_with_removed_args,
|
||||
"append_querystring": append_querystring,
|
||||
"request": request,
|
||||
"sort": sort,
|
||||
"sort_desc": sort_desc,
|
||||
|
|
|
|||
|
|
@ -125,3 +125,24 @@ Sets the amount of memory SQLite uses for its `per-connection cache <https://www
|
|||
::
|
||||
|
||||
datasette mydatabase.db --config cache_size_kb:5000
|
||||
|
||||
|
||||
allow_csv_stream
|
||||
----------------
|
||||
|
||||
Enables the feature where an entire table (potentially hundreds of thousands of
|
||||
rows) can be exported as a single CSV file. This is turned on by default - you
|
||||
can turn it off like this::
|
||||
|
||||
::
|
||||
|
||||
datasette mydatabase.db --config allow_csv_stream:off
|
||||
|
||||
|
||||
max_csv_mb
|
||||
----------
|
||||
|
||||
The maximum size of CSV that can be exported, in megabytes. Defaults to 100MB.
|
||||
You can disable the limit entirely by settings this to 0::
|
||||
|
||||
datasette mydatabase.db --config max_csv_mb:0
|
||||
|
|
|
|||
|
|
@ -122,6 +122,7 @@ This will restrict sorting of ``example_table`` to just the ``height`` and
|
|||
You can also disable sorting entirely by setting ``"sortable_columns": []``
|
||||
|
||||
.. _label_columns:
|
||||
|
||||
Specifying the label column for a table
|
||||
---------------------------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -71,6 +71,13 @@ def app_client_larger_cache_size():
|
|||
})
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def app_client_csv_max_mb_one():
|
||||
yield from app_client(config={
|
||||
'max_csv_mb': 1,
|
||||
})
|
||||
|
||||
|
||||
def generate_compound_rows(num):
|
||||
for a, b, c in itertools.islice(
|
||||
itertools.product(string.ascii_lowercase, repeat=3), num
|
||||
|
|
|
|||
|
|
@ -901,6 +901,8 @@ def test_config_json(app_client):
|
|||
"default_cache_ttl": 365 * 24 * 60 * 60,
|
||||
"num_sql_threads": 3,
|
||||
"cache_size_kb": 0,
|
||||
"allow_csv_stream": True,
|
||||
"max_csv_mb": 100,
|
||||
} == response.json
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from .fixtures import app_client # noqa
|
||||
from .fixtures import app_client, app_client_csv_max_mb_one # noqa
|
||||
|
||||
EXPECTED_TABLE_CSV = '''id,content
|
||||
1,hello
|
||||
|
|
@ -59,3 +59,28 @@ def test_table_csv_download(app_client):
|
|||
assert 'text/csv; charset=utf-8' == response.headers['Content-Type']
|
||||
expected_disposition = 'attachment; filename="simple_primary_key.csv"'
|
||||
assert expected_disposition == response.headers['Content-Disposition']
|
||||
|
||||
|
||||
def test_max_csv_mb(app_client_csv_max_mb_one):
|
||||
response = app_client_csv_max_mb_one.get(
|
||||
"/fixtures.csv?sql=select+randomblob(10000)+"
|
||||
"from+compound_three_primary_keys&_stream=1&_size=max"
|
||||
)
|
||||
# It's a 200 because we started streaming before we knew the error
|
||||
assert response.status == 200
|
||||
# Last line should be an error message
|
||||
last_line = [line for line in response.body.split(b"\r\n") if line][-1]
|
||||
assert last_line.startswith(b"CSV contains more than")
|
||||
|
||||
|
||||
def test_table_csv_stream(app_client):
|
||||
# Without _stream should return header + 100 rows:
|
||||
response = app_client.get(
|
||||
"/fixtures/compound_three_primary_keys.csv?_size=max"
|
||||
)
|
||||
assert 101 == len([b for b in response.body.split(b"\r\n") if b])
|
||||
# With _stream=1 should return header + 1001 rows
|
||||
response = app_client.get(
|
||||
"/fixtures/compound_three_primary_keys.csv?_stream=1"
|
||||
)
|
||||
assert 1002 == len([b for b in response.body.split(b"\r\n") if b])
|
||||
|
|
|
|||
|
|
@ -274,9 +274,10 @@ def test_table_html_simple_primary_key(app_client):
|
|||
] == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')]
|
||||
|
||||
|
||||
def test_table_csv_json_export_links(app_client):
|
||||
def test_table_csv_json_export_interface(app_client):
|
||||
response = app_client.get('/fixtures/simple_primary_key')
|
||||
assert response.status == 200
|
||||
# The links at the top of the page
|
||||
links = Soup(response.body, "html.parser").find("p", {
|
||||
"class": "export-links"
|
||||
}).findAll("a")
|
||||
|
|
@ -284,9 +285,28 @@ def test_table_csv_json_export_links(app_client):
|
|||
expected = [
|
||||
"simple_primary_key.json",
|
||||
"simple_primary_key.csv?_size=max",
|
||||
"simple_primary_key.csv?_dl=1&_size=max"
|
||||
"#export"
|
||||
]
|
||||
assert expected == actual
|
||||
# And the advaced export box at the bottom:
|
||||
div = Soup(response.body, "html.parser").find("div", {
|
||||
"class": "advanced-export"
|
||||
})
|
||||
json_links = [a["href"].split("/")[-1] for a in div.find("p").findAll("a")]
|
||||
assert [
|
||||
"simple_primary_key.json",
|
||||
"simple_primary_key.json?_shape=array",
|
||||
"simple_primary_key.json?_shape=object"
|
||||
] == json_links
|
||||
# And the CSV form
|
||||
form = div.find("form")
|
||||
assert form["action"].endswith("/simple_primary_key.csv")
|
||||
inputs = [str(input) for input in form.findAll("input")]
|
||||
assert [
|
||||
'<input name="_dl" type="checkbox"/>',
|
||||
'<input type="submit" value="Export CSV"/>',
|
||||
'<input name="_size" type="hidden" value="max"/>'
|
||||
] == inputs
|
||||
|
||||
|
||||
def test_csv_json_export_links_include_labels_if_foreign_keys(app_client):
|
||||
|
|
@ -299,7 +319,7 @@ def test_csv_json_export_links_include_labels_if_foreign_keys(app_client):
|
|||
expected = [
|
||||
"facetable.json?_labels=on",
|
||||
"facetable.csv?_labels=on&_size=max",
|
||||
"facetable.csv?_dl=1&_labels=on&_size=max"
|
||||
"#export"
|
||||
]
|
||||
assert expected == actual
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue