Basic CSV export, refs #266

Tables and custom SQL query results can now be exported as CSV.

The easiest way to do this is to use the .csv extension, e.g.

	/test_tables/facet_cities.csv

By default this is served as Content-Type: text/plain so you can see it in
your browser. If you want to download the file (using text/csv and with an
appropriate Content-Disposition: attachment header) you can do so like this:

	/test_tables/facet_cities.csv?_dl=1

We link to the CSV and downloadable CSV URLs from the table and query pages.

The links use ?_size=max and so by default will return 1,000 rows.

Also fixes #303 - table names ending in .json or .csv are now detected and
URLs are generated that look like this instead:

	/test_tables/table%2Fwith%2Fslashes.csv?_format=csv

The ?_format= option is available for everything else too, but we link to the
.csv / .json versions in most cases because they are aesthetically pleasing.
This commit is contained in:
Simon Willison 2018-06-14 23:51:23 -07:00
commit 3a79ad98ea
No known key found for this signature in database
GPG key ID: 17E2DEA2588B7F52
12 changed files with 243 additions and 38 deletions

View file

@ -224,6 +224,9 @@ class Datasette:
conn.execute('PRAGMA cache_size=-{}'.format(self.config["cache_size_kb"])) conn.execute('PRAGMA cache_size=-{}'.format(self.config["cache_size_kb"]))
pm.hook.prepare_connection(conn=conn) pm.hook.prepare_connection(conn=conn)
def table_exists(self, database, table):
return table in self.inspect().get(database, {}).get("tables")
def inspect(self): def inspect(self):
" Inspect the database and return a dictionary of table metadata " " Inspect the database and return a dictionary of table metadata "
if self._inspect: if self._inspect:
@ -395,7 +398,7 @@ class Datasette:
self.jinja_env.filters["escape_sqlite"] = escape_sqlite self.jinja_env.filters["escape_sqlite"] = escape_sqlite
self.jinja_env.filters["to_css_class"] = to_css_class self.jinja_env.filters["to_css_class"] = to_css_class
pm.hook.prepare_jinja2_environment(env=self.jinja_env) pm.hook.prepare_jinja2_environment(env=self.jinja_env)
app.add_route(IndexView.as_view(self), "/<as_json:(\.jsono?)?$>") app.add_route(IndexView.as_view(self), "/<as_format:(\.jsono?)?$>")
# TODO: /favicon.ico and /-/static/ deserve far-future cache expires # TODO: /favicon.ico and /-/static/ deserve far-future cache expires
app.add_route(favicon, "/favicon.ico") app.add_route(favicon, "/favicon.ico")
app.static("/-/static/", str(app_root / "datasette" / "static")) app.static("/-/static/", str(app_root / "datasette" / "static"))
@ -408,37 +411,37 @@ class Datasette:
app.static(modpath, plugin["static_path"]) app.static(modpath, plugin["static_path"])
app.add_route( app.add_route(
JsonDataView.as_view(self, "inspect.json", self.inspect), JsonDataView.as_view(self, "inspect.json", self.inspect),
"/-/inspect<as_json:(\.json)?$>", "/-/inspect<as_format:(\.json)?$>",
) )
app.add_route( app.add_route(
JsonDataView.as_view(self, "metadata.json", lambda: self.metadata), JsonDataView.as_view(self, "metadata.json", lambda: self.metadata),
"/-/metadata<as_json:(\.json)?$>", "/-/metadata<as_format:(\.json)?$>",
) )
app.add_route( app.add_route(
JsonDataView.as_view(self, "versions.json", self.versions), JsonDataView.as_view(self, "versions.json", self.versions),
"/-/versions<as_json:(\.json)?$>", "/-/versions<as_format:(\.json)?$>",
) )
app.add_route( app.add_route(
JsonDataView.as_view(self, "plugins.json", self.plugins), JsonDataView.as_view(self, "plugins.json", self.plugins),
"/-/plugins<as_json:(\.json)?$>", "/-/plugins<as_format:(\.json)?$>",
) )
app.add_route( app.add_route(
JsonDataView.as_view(self, "config.json", lambda: self.config), JsonDataView.as_view(self, "config.json", lambda: self.config),
"/-/config<as_json:(\.json)?$>", "/-/config<as_format:(\.json)?$>",
) )
app.add_route( app.add_route(
DatabaseView.as_view(self), "/<db_name:[^/\.]+?><as_json:(\.jsono?)?$>" DatabaseView.as_view(self), "/<db_name:[^/\.]+?><as_format:(\.jsono?|\.csv)?$>"
) )
app.add_route( app.add_route(
DatabaseDownload.as_view(self), "/<db_name:[^/]+?><as_db:(\.db)$>" DatabaseDownload.as_view(self), "/<db_name:[^/]+?><as_db:(\.db)$>"
) )
app.add_route( app.add_route(
TableView.as_view(self), TableView.as_view(self),
"/<db_name:[^/]+>/<table:[^/]+?><as_json:(\.jsono?)?$>", "/<db_name:[^/]+>/<table_and_format:[^/]+?$>",
) )
app.add_route( app.add_route(
RowView.as_view(self), RowView.as_view(self),
"/<db_name:[^/]+>/<table:[^/]+?>/<pk_path:[^/]+?><as_json:(\.jsono?)?$>", "/<db_name:[^/]+>/<table:[^/]+?>/<pk_path:[^/]+?><as_format:(\.jsono?)?$>",
) )
self.register_custom_units() self.register_custom_units()

View file

@ -40,7 +40,7 @@
</form> </form>
{% if rows %} {% if rows %}
<p>This data as <a href="{{ url_json }}">.json</a></p> <p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
<table class="rows-and-columns"> <table class="rows-and-columns">
<thead> <thead>
<tr> <tr>

View file

@ -92,7 +92,7 @@
<p><a class="not-underlined" title="{{ query.sql }}" href="/{{ database }}-{{ database_hash }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&amp;{{ query.params|urlencode|safe }}{% endif %}">&#x270e; <span class="underlined">View and edit SQL</span></a></p> <p><a class="not-underlined" title="{{ query.sql }}" href="/{{ database }}-{{ database_hash }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&amp;{{ query.params|urlencode|safe }}{% endif %}">&#x270e; <span class="underlined">View and edit SQL</span></a></p>
{% endif %} {% endif %}
<p>This data as <a href="{{ url_json }}">.json</a></p> <p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
{% if suggested_facets %} {% if suggested_facets %}
<p class="suggested-facets"> <p class="suggested-facets">

View file

@ -225,14 +225,6 @@ def path_with_replaced_args(request, args, path=None):
return path + query_string return path + query_string
def path_with_ext(request, ext):
path = request.path
path += ext
if request.query_string:
path += '?' + request.query_string
return path
_css_re = re.compile(r'''['"\n\\]''') _css_re = re.compile(r'''['"\n\\]''')
_boring_keyword_re = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') _boring_keyword_re = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
@ -772,3 +764,39 @@ def get_plugins(pm):
plugin_info['version'] = distinfo.version plugin_info['version'] = distinfo.version
plugins.append(plugin_info) plugins.append(plugin_info)
return plugins return plugins
FORMATS = ('csv', 'json', 'jsono')
def resolve_table_and_format(table_and_format, table_exists):
if '.' in table_and_format:
# Check if a table exists with this exact name
if table_exists(table_and_format):
return table_and_format, None
# Check if table ends with a known format
for _format in FORMATS:
if table_and_format.endswith(".{}".format(_format)):
table = table_and_format[:-(len(_format) + 1)]
return table, _format
return table_and_format, None
def path_with_format(request, format, extra_qs=None):
qs = extra_qs or {}
path = request.path
if "." in request.path:
qs["_format"] = format
else:
path = "{}.{}".format(path, format)
if qs:
extra = urllib.parse.urlencode(sorted(qs.items()))
if request.query_string:
path = "{}?{}&{}".format(
path, request.query_string, extra
)
else:
path = "{}?{}".format(path, extra)
elif request.query_string:
path = "{}?{}".format(path, request.query_string)
return path

View file

@ -1,8 +1,10 @@
import asyncio import asyncio
import csv
import json import json
import re import re
import sqlite3 import sqlite3
import time import time
import urllib
import pint import pint
from sanic import response from sanic import response
@ -16,7 +18,8 @@ from datasette.utils import (
InvalidSql, InvalidSql,
path_from_row_pks, path_from_row_pks,
path_with_added_args, path_with_added_args,
path_with_ext, path_with_format,
resolve_table_and_format,
to_css_class to_css_class
) )
@ -113,13 +116,23 @@ class BaseView(RenderMixin):
expected = info["hash"][:HASH_LENGTH] expected = info["hash"][:HASH_LENGTH]
if expected != hash: if expected != hash:
if "table_and_format" in kwargs:
table, _format = resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(
kwargs["table_and_format"]
),
table_exists=lambda t: self.ds.table_exists(name, t)
)
kwargs["table"] = table
if _format:
kwargs["as_format"] = ".{}".format(_format)
should_redirect = "/{}-{}".format(name, expected) should_redirect = "/{}-{}".format(name, expected)
if "table" in kwargs: if "table" in kwargs:
should_redirect += "/" + kwargs["table"] should_redirect += "/" + urllib.parse.quote_plus(kwargs["table"])
if "pk_path" in kwargs: if "pk_path" in kwargs:
should_redirect += "/" + kwargs["pk_path"] should_redirect += "/" + kwargs["pk_path"]
if "as_json" in kwargs: if "as_format" in kwargs:
should_redirect += kwargs["as_json"] should_redirect += kwargs["as_format"]
if "as_db" in kwargs: if "as_db" in kwargs:
should_redirect += kwargs["as_db"] should_redirect += kwargs["as_db"]
return name, expected, should_redirect return name, expected, should_redirect
@ -136,11 +149,65 @@ class BaseView(RenderMixin):
return await self.view_get(request, name, hash, **kwargs) return await self.view_get(request, name, hash, **kwargs)
async def view_get(self, request, name, hash, **kwargs): async def as_csv(self, request, name, hash, **kwargs):
try: try:
as_json = kwargs.pop("as_json") response_or_template_contexts = await self.data(
except KeyError: request, name, hash, **kwargs
as_json = False )
if isinstance(response_or_template_contexts, response.HTTPResponse):
return response_or_template_contexts
else:
data, extra_template_data, templates = response_or_template_contexts
except (sqlite3.OperationalError, InvalidSql) as e:
raise DatasetteError(str(e), title="Invalid SQL", status=400)
except (sqlite3.OperationalError) as e:
raise DatasetteError(str(e))
except DatasetteError:
raise
# Convert rows and columns to CSV
async def stream_fn(r):
writer = csv.writer(r)
writer.writerow(data["columns"])
for row in data["rows"]:
writer.writerow(row)
content_type = "text/plain; charset=utf-8"
headers = {}
if request.args.get("_dl", None):
content_type = "text/csv; charset=utf-8"
disposition = 'attachment; filename="{}.csv"'.format(
kwargs.get('table', name)
)
headers["Content-Disposition"] = disposition
return response.stream(
stream_fn,
headers=headers,
content_type=content_type
)
async def view_get(self, request, name, hash, **kwargs):
# If ?_format= is provided, use that as the format
_format = request.args.get("_format", None)
if not _format:
_format = (kwargs.pop("as_format", None) or "").lstrip(".")
if "table_and_format" in kwargs:
table, _ext_format = resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(
kwargs["table_and_format"]
),
table_exists=lambda t: self.ds.table_exists(name, t)
)
_format = _format or _ext_format
kwargs["table"] = table
del kwargs["table_and_format"]
if _format == "csv":
return await self.as_csv(request, name, hash, **kwargs)
extra_template_data = {} extra_template_data = {}
start = time.time() start = time.time()
status_code = 200 status_code = 200
@ -175,9 +242,9 @@ class BaseView(RenderMixin):
value = self.ds.metadata.get(key) value = self.ds.metadata.get(key)
if value: if value:
data[key] = value data[key] = value
if as_json: if _format in ("json", "jsono"):
# Special case for .jsono extension - redirect to _shape=objects # Special case for .jsono extension - redirect to _shape=objects
if as_json == ".jsono": if _format == "jsono":
return self.redirect( return self.redirect(
request, request,
path_with_added_args( path_with_added_args(
@ -260,8 +327,14 @@ class BaseView(RenderMixin):
**data, **data,
**extras, **extras,
**{ **{
"url_json": path_with_ext(request, ".json"), "url_json": path_with_format(request, "json"),
"url_jsono": path_with_ext(request, ".jsono"), "url_csv": path_with_format(request, "csv", {
"_size": "max"
}),
"url_csv_dl": path_with_format(request, "csv", {
"_dl": "1",
"_size": "max"
}),
"extra_css_urls": self.ds.extra_css_urls(), "extra_css_urls": self.ds.extra_css_urls(),
"extra_js_urls": self.ds.extra_js_urls(), "extra_js_urls": self.ds.extra_js_urls(),
"datasette_version": __version__, "datasette_version": __version__,

View file

@ -16,7 +16,7 @@ class IndexView(RenderMixin):
self.jinja_env = datasette.jinja_env self.jinja_env = datasette.jinja_env
self.executor = datasette.executor self.executor = datasette.executor
async def get(self, request, as_json): async def get(self, request, as_format):
databases = [] databases = []
for key, info in sorted(self.ds.inspect().items()): for key, info in sorted(self.ds.inspect().items()):
tables = [t for t in info["tables"].values() if not t["hidden"]] tables = [t for t in info["tables"].values() if not t["hidden"]]
@ -38,7 +38,7 @@ class IndexView(RenderMixin):
"views_count": len(info["views"]), "views_count": len(info["views"]),
} }
databases.append(database) databases.append(database)
if as_json: if as_format:
headers = {} headers = {}
if self.ds.cors: if self.ds.cors:
headers["Access-Control-Allow-Origin"] = "*" headers["Access-Control-Allow-Origin"] = "*"

View file

@ -10,9 +10,9 @@ class JsonDataView(RenderMixin):
self.filename = filename self.filename = filename
self.data_callback = data_callback self.data_callback = data_callback
async def get(self, request, as_json): async def get(self, request, as_format):
data = self.data_callback() data = self.data_callback()
if as_json: if as_format:
headers = {} headers = {}
if self.ds.cors: if self.ds.cors:
headers["Access-Control-Allow-Origin"] = "*" headers["Access-Control-Allow-Origin"] = "*"

View file

@ -232,7 +232,6 @@ class RowTableShared(BaseView):
class TableView(RowTableShared): class TableView(RowTableShared):
async def data(self, request, name, hash, table): async def data(self, request, name, hash, table):
table = urllib.parse.unquote_plus(table)
canned_query = self.ds.get_canned_query(name, table) canned_query = self.ds.get_canned_query(name, table)
if canned_query is not None: if canned_query is not None:
return await self.custom_sql( return await self.custom_sql(
@ -780,7 +779,6 @@ class TableView(RowTableShared):
class RowView(RowTableShared): class RowView(RowTableShared):
async def data(self, request, name, hash, table, pk_path): async def data(self, request, name, hash, table, pk_path):
table = urllib.parse.unquote_plus(table)
pk_values = urlsafe_components(pk_path) pk_values = urlsafe_components(pk_path)
info = self.ds.inspect()[name] info = self.ds.inspect()[name]
table_info = info["tables"].get(table) or {} table_info = info["tables"].get(table) or {}

View file

@ -507,7 +507,7 @@ def test_table_shape_object_compound_primary_Key(app_client):
def test_table_with_slashes_in_name(app_client): def test_table_with_slashes_in_name(app_client):
response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv.json?_shape=objects') response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv?_shape=objects&_format=json')
assert response.status == 200 assert response.status == 200
data = response.json data = response.json
assert data['rows'] == [{ assert data['rows'] == [{

37
tests/test_csv.py Normal file
View file

@ -0,0 +1,37 @@
from .fixtures import app_client # noqa
EXPECTED_TABLE_CSV = '''id,content
1,hello
2,world
3,
'''.replace('\n', '\r\n')
EXPECTED_CUSTOM_CSV = '''content
hello
world
""
'''.replace('\n', '\r\n')
def test_table_csv(app_client):
response = app_client.get('/test_tables/simple_primary_key.csv')
assert response.status == 200
assert 'text/plain; charset=utf-8' == response.headers['Content-Type']
assert EXPECTED_TABLE_CSV == response.text
def test_custom_sql_csv(app_client):
response = app_client.get(
'/test_tables.csv?sql=select+content+from+simple_primary_key'
)
assert response.status == 200
assert 'text/plain; charset=utf-8' == response.headers['Content-Type']
assert EXPECTED_CUSTOM_CSV == response.text
def test_table_csv_download(app_client):
response = app_client.get('/test_tables/simple_primary_key.csv?_dl=1')
assert response.status == 200
assert 'text/csv; charset=utf-8' == response.headers['Content-Type']
expected_disposition = 'attachment; filename="simple_primary_key.csv"'
assert expected_disposition == response.headers['Content-Disposition']

View file

@ -274,6 +274,21 @@ def test_table_html_simple_primary_key(app_client):
] == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')] ] == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')]
def test_table_csv_json_export_links(app_client):
response = app_client.get('/test_tables/simple_primary_key')
assert response.status == 200
links = Soup(response.body, "html.parser").find("p", {
"class": "export-links"
}).findAll("a")
actual = [l["href"].split("/")[-1] for l in links]
expected = [
"simple_primary_key.json",
"simple_primary_key.csv?_size=max",
"simple_primary_key.csv?_dl=1&_size=max"
]
assert expected == actual
def test_row_html_simple_primary_key(app_client): def test_row_html_simple_primary_key(app_client):
response = app_client.get('/test_tables/simple_primary_key/1') response = app_client.get('/test_tables/simple_primary_key/1')
assert response.status == 200 assert response.status == 200

View file

@ -299,3 +299,54 @@ def test_compound_keys_after_sql():
or or
(a = :p0 and b = :p1 and c > :p2)) (a = :p0 and b = :p1 and c > :p2))
'''.strip() == utils.compound_keys_after_sql(['a', 'b', 'c']) '''.strip() == utils.compound_keys_after_sql(['a', 'b', 'c'])
def table_exists(table):
return table == "exists.csv"
@pytest.mark.parametrize(
"table_and_format,expected_table,expected_format",
[
("blah", "blah", None),
("blah.csv", "blah", "csv"),
("blah.json", "blah", "json"),
("blah.baz", "blah.baz", None),
("exists.csv", "exists.csv", None),
],
)
def test_resolve_table_and_format(
table_and_format, expected_table, expected_format
):
actual_table, actual_format = utils.resolve_table_and_format(
table_and_format, table_exists
)
assert expected_table == actual_table
assert expected_format == actual_format
@pytest.mark.parametrize(
"path,format,extra_qs,expected",
[
("/foo?sql=select+1", "csv", {}, "/foo.csv?sql=select+1"),
("/foo?sql=select+1", "json", {}, "/foo.json?sql=select+1"),
("/foo/bar", "json", {}, "/foo/bar.json"),
("/foo/bar", "csv", {}, "/foo/bar.csv"),
("/foo/bar.csv", "json", {}, "/foo/bar.csv?_format=json"),
("/foo/bar", "csv", {"_dl": 1}, "/foo/bar.csv?_dl=1"),
("/foo/b.csv", "json", {"_dl": 1}, "/foo/b.csv?_dl=1&_format=json"),
(
"/sf-trees/Street_Tree_List?_search=cherry&_size=1000",
"csv",
{"_dl": 1},
"/sf-trees/Street_Tree_List.csv?_search=cherry&_size=1000&_dl=1",
),
],
)
def test_path_with_format(path, format, extra_qs, expected):
request = Request(
path.encode('utf8'),
{}, '1.1', 'GET', None
)
actual = utils.path_with_format(request, format, extra_qs)
assert expected == actual