Basic CSV export, refs #266

Tables and custom SQL query results can now be exported as CSV.

The easiest way to do this is to use the .csv extension, e.g.

	/test_tables/facet_cities.csv

By default this is served as Content-Type: text/plain so you can see it in
your browser. If you want to download the file (using text/csv and with an
appropriate Content-Disposition: attachment header) you can do so like this:

	/test_tables/facet_cities.csv?_dl=1

We link to the CSV and downloadable CSV URLs from the table and query pages.

The links use ?_size=max and so by default will return 1,000 rows.

Also fixes #303 - table names ending in .json or .csv are now detected and
URLs are generated that look like this instead:

	/test_tables/table%2Fwith%2Fslashes.csv?_format=csv

The ?_format= option is available for everything else too, but we link to the
.csv / .json versions in most cases because they are aesthetically pleasing.
This commit is contained in:
Simon Willison 2018-06-14 23:51:23 -07:00
commit 3a79ad98ea
No known key found for this signature in database
GPG key ID: 17E2DEA2588B7F52
12 changed files with 243 additions and 38 deletions

View file

@ -224,6 +224,9 @@ class Datasette:
conn.execute('PRAGMA cache_size=-{}'.format(self.config["cache_size_kb"]))
pm.hook.prepare_connection(conn=conn)
def table_exists(self, database, table):
return table in self.inspect().get(database, {}).get("tables")
def inspect(self):
" Inspect the database and return a dictionary of table metadata "
if self._inspect:
@ -395,7 +398,7 @@ class Datasette:
self.jinja_env.filters["escape_sqlite"] = escape_sqlite
self.jinja_env.filters["to_css_class"] = to_css_class
pm.hook.prepare_jinja2_environment(env=self.jinja_env)
app.add_route(IndexView.as_view(self), "/<as_json:(\.jsono?)?$>")
app.add_route(IndexView.as_view(self), "/<as_format:(\.jsono?)?$>")
# TODO: /favicon.ico and /-/static/ deserve far-future cache expires
app.add_route(favicon, "/favicon.ico")
app.static("/-/static/", str(app_root / "datasette" / "static"))
@ -408,37 +411,37 @@ class Datasette:
app.static(modpath, plugin["static_path"])
app.add_route(
JsonDataView.as_view(self, "inspect.json", self.inspect),
"/-/inspect<as_json:(\.json)?$>",
"/-/inspect<as_format:(\.json)?$>",
)
app.add_route(
JsonDataView.as_view(self, "metadata.json", lambda: self.metadata),
"/-/metadata<as_json:(\.json)?$>",
"/-/metadata<as_format:(\.json)?$>",
)
app.add_route(
JsonDataView.as_view(self, "versions.json", self.versions),
"/-/versions<as_json:(\.json)?$>",
"/-/versions<as_format:(\.json)?$>",
)
app.add_route(
JsonDataView.as_view(self, "plugins.json", self.plugins),
"/-/plugins<as_json:(\.json)?$>",
"/-/plugins<as_format:(\.json)?$>",
)
app.add_route(
JsonDataView.as_view(self, "config.json", lambda: self.config),
"/-/config<as_json:(\.json)?$>",
"/-/config<as_format:(\.json)?$>",
)
app.add_route(
DatabaseView.as_view(self), "/<db_name:[^/\.]+?><as_json:(\.jsono?)?$>"
DatabaseView.as_view(self), "/<db_name:[^/\.]+?><as_format:(\.jsono?|\.csv)?$>"
)
app.add_route(
DatabaseDownload.as_view(self), "/<db_name:[^/]+?><as_db:(\.db)$>"
)
app.add_route(
TableView.as_view(self),
"/<db_name:[^/]+>/<table:[^/]+?><as_json:(\.jsono?)?$>",
"/<db_name:[^/]+>/<table_and_format:[^/]+?$>",
)
app.add_route(
RowView.as_view(self),
"/<db_name:[^/]+>/<table:[^/]+?>/<pk_path:[^/]+?><as_json:(\.jsono?)?$>",
"/<db_name:[^/]+>/<table:[^/]+?>/<pk_path:[^/]+?><as_format:(\.jsono?)?$>",
)
self.register_custom_units()

View file

@ -40,7 +40,7 @@
</form>
{% if rows %}
<p>This data as <a href="{{ url_json }}">.json</a></p>
<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
<table class="rows-and-columns">
<thead>
<tr>

View file

@ -92,7 +92,7 @@
<p><a class="not-underlined" title="{{ query.sql }}" href="/{{ database }}-{{ database_hash }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&amp;{{ query.params|urlencode|safe }}{% endif %}">&#x270e; <span class="underlined">View and edit SQL</span></a></p>
{% endif %}
<p>This data as <a href="{{ url_json }}">.json</a></p>
<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
{% if suggested_facets %}
<p class="suggested-facets">

View file

@ -225,14 +225,6 @@ def path_with_replaced_args(request, args, path=None):
return path + query_string
def path_with_ext(request, ext):
path = request.path
path += ext
if request.query_string:
path += '?' + request.query_string
return path
_css_re = re.compile(r'''['"\n\\]''')
_boring_keyword_re = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
@ -772,3 +764,39 @@ def get_plugins(pm):
plugin_info['version'] = distinfo.version
plugins.append(plugin_info)
return plugins
FORMATS = ('csv', 'json', 'jsono')
def resolve_table_and_format(table_and_format, table_exists):
if '.' in table_and_format:
# Check if a table exists with this exact name
if table_exists(table_and_format):
return table_and_format, None
# Check if table ends with a known format
for _format in FORMATS:
if table_and_format.endswith(".{}".format(_format)):
table = table_and_format[:-(len(_format) + 1)]
return table, _format
return table_and_format, None
def path_with_format(request, format, extra_qs=None):
qs = extra_qs or {}
path = request.path
if "." in request.path:
qs["_format"] = format
else:
path = "{}.{}".format(path, format)
if qs:
extra = urllib.parse.urlencode(sorted(qs.items()))
if request.query_string:
path = "{}?{}&{}".format(
path, request.query_string, extra
)
else:
path = "{}?{}".format(path, extra)
elif request.query_string:
path = "{}?{}".format(path, request.query_string)
return path

View file

@ -1,8 +1,10 @@
import asyncio
import csv
import json
import re
import sqlite3
import time
import urllib
import pint
from sanic import response
@ -16,7 +18,8 @@ from datasette.utils import (
InvalidSql,
path_from_row_pks,
path_with_added_args,
path_with_ext,
path_with_format,
resolve_table_and_format,
to_css_class
)
@ -113,13 +116,23 @@ class BaseView(RenderMixin):
expected = info["hash"][:HASH_LENGTH]
if expected != hash:
if "table_and_format" in kwargs:
table, _format = resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(
kwargs["table_and_format"]
),
table_exists=lambda t: self.ds.table_exists(name, t)
)
kwargs["table"] = table
if _format:
kwargs["as_format"] = ".{}".format(_format)
should_redirect = "/{}-{}".format(name, expected)
if "table" in kwargs:
should_redirect += "/" + kwargs["table"]
should_redirect += "/" + urllib.parse.quote_plus(kwargs["table"])
if "pk_path" in kwargs:
should_redirect += "/" + kwargs["pk_path"]
if "as_json" in kwargs:
should_redirect += kwargs["as_json"]
if "as_format" in kwargs:
should_redirect += kwargs["as_format"]
if "as_db" in kwargs:
should_redirect += kwargs["as_db"]
return name, expected, should_redirect
@ -136,11 +149,65 @@ class BaseView(RenderMixin):
return await self.view_get(request, name, hash, **kwargs)
async def view_get(self, request, name, hash, **kwargs):
async def as_csv(self, request, name, hash, **kwargs):
try:
as_json = kwargs.pop("as_json")
except KeyError:
as_json = False
response_or_template_contexts = await self.data(
request, name, hash, **kwargs
)
if isinstance(response_or_template_contexts, response.HTTPResponse):
return response_or_template_contexts
else:
data, extra_template_data, templates = response_or_template_contexts
except (sqlite3.OperationalError, InvalidSql) as e:
raise DatasetteError(str(e), title="Invalid SQL", status=400)
except (sqlite3.OperationalError) as e:
raise DatasetteError(str(e))
except DatasetteError:
raise
# Convert rows and columns to CSV
async def stream_fn(r):
writer = csv.writer(r)
writer.writerow(data["columns"])
for row in data["rows"]:
writer.writerow(row)
content_type = "text/plain; charset=utf-8"
headers = {}
if request.args.get("_dl", None):
content_type = "text/csv; charset=utf-8"
disposition = 'attachment; filename="{}.csv"'.format(
kwargs.get('table', name)
)
headers["Content-Disposition"] = disposition
return response.stream(
stream_fn,
headers=headers,
content_type=content_type
)
async def view_get(self, request, name, hash, **kwargs):
# If ?_format= is provided, use that as the format
_format = request.args.get("_format", None)
if not _format:
_format = (kwargs.pop("as_format", None) or "").lstrip(".")
if "table_and_format" in kwargs:
table, _ext_format = resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(
kwargs["table_and_format"]
),
table_exists=lambda t: self.ds.table_exists(name, t)
)
_format = _format or _ext_format
kwargs["table"] = table
del kwargs["table_and_format"]
if _format == "csv":
return await self.as_csv(request, name, hash, **kwargs)
extra_template_data = {}
start = time.time()
status_code = 200
@ -175,9 +242,9 @@ class BaseView(RenderMixin):
value = self.ds.metadata.get(key)
if value:
data[key] = value
if as_json:
if _format in ("json", "jsono"):
# Special case for .jsono extension - redirect to _shape=objects
if as_json == ".jsono":
if _format == "jsono":
return self.redirect(
request,
path_with_added_args(
@ -260,8 +327,14 @@ class BaseView(RenderMixin):
**data,
**extras,
**{
"url_json": path_with_ext(request, ".json"),
"url_jsono": path_with_ext(request, ".jsono"),
"url_json": path_with_format(request, "json"),
"url_csv": path_with_format(request, "csv", {
"_size": "max"
}),
"url_csv_dl": path_with_format(request, "csv", {
"_dl": "1",
"_size": "max"
}),
"extra_css_urls": self.ds.extra_css_urls(),
"extra_js_urls": self.ds.extra_js_urls(),
"datasette_version": __version__,

View file

@ -16,7 +16,7 @@ class IndexView(RenderMixin):
self.jinja_env = datasette.jinja_env
self.executor = datasette.executor
async def get(self, request, as_json):
async def get(self, request, as_format):
databases = []
for key, info in sorted(self.ds.inspect().items()):
tables = [t for t in info["tables"].values() if not t["hidden"]]
@ -38,7 +38,7 @@ class IndexView(RenderMixin):
"views_count": len(info["views"]),
}
databases.append(database)
if as_json:
if as_format:
headers = {}
if self.ds.cors:
headers["Access-Control-Allow-Origin"] = "*"

View file

@ -10,9 +10,9 @@ class JsonDataView(RenderMixin):
self.filename = filename
self.data_callback = data_callback
async def get(self, request, as_json):
async def get(self, request, as_format):
data = self.data_callback()
if as_json:
if as_format:
headers = {}
if self.ds.cors:
headers["Access-Control-Allow-Origin"] = "*"

View file

@ -232,7 +232,6 @@ class RowTableShared(BaseView):
class TableView(RowTableShared):
async def data(self, request, name, hash, table):
table = urllib.parse.unquote_plus(table)
canned_query = self.ds.get_canned_query(name, table)
if canned_query is not None:
return await self.custom_sql(
@ -780,7 +779,6 @@ class TableView(RowTableShared):
class RowView(RowTableShared):
async def data(self, request, name, hash, table, pk_path):
table = urllib.parse.unquote_plus(table)
pk_values = urlsafe_components(pk_path)
info = self.ds.inspect()[name]
table_info = info["tables"].get(table) or {}

View file

@ -507,7 +507,7 @@ def test_table_shape_object_compound_primary_Key(app_client):
def test_table_with_slashes_in_name(app_client):
response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv.json?_shape=objects')
response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv?_shape=objects&_format=json')
assert response.status == 200
data = response.json
assert data['rows'] == [{

37
tests/test_csv.py Normal file
View file

@ -0,0 +1,37 @@
from .fixtures import app_client # noqa
EXPECTED_TABLE_CSV = '''id,content
1,hello
2,world
3,
'''.replace('\n', '\r\n')
EXPECTED_CUSTOM_CSV = '''content
hello
world
""
'''.replace('\n', '\r\n')
def test_table_csv(app_client):
response = app_client.get('/test_tables/simple_primary_key.csv')
assert response.status == 200
assert 'text/plain; charset=utf-8' == response.headers['Content-Type']
assert EXPECTED_TABLE_CSV == response.text
def test_custom_sql_csv(app_client):
response = app_client.get(
'/test_tables.csv?sql=select+content+from+simple_primary_key'
)
assert response.status == 200
assert 'text/plain; charset=utf-8' == response.headers['Content-Type']
assert EXPECTED_CUSTOM_CSV == response.text
def test_table_csv_download(app_client):
response = app_client.get('/test_tables/simple_primary_key.csv?_dl=1')
assert response.status == 200
assert 'text/csv; charset=utf-8' == response.headers['Content-Type']
expected_disposition = 'attachment; filename="simple_primary_key.csv"'
assert expected_disposition == response.headers['Content-Disposition']

View file

@ -274,6 +274,21 @@ def test_table_html_simple_primary_key(app_client):
] == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')]
def test_table_csv_json_export_links(app_client):
response = app_client.get('/test_tables/simple_primary_key')
assert response.status == 200
links = Soup(response.body, "html.parser").find("p", {
"class": "export-links"
}).findAll("a")
actual = [l["href"].split("/")[-1] for l in links]
expected = [
"simple_primary_key.json",
"simple_primary_key.csv?_size=max",
"simple_primary_key.csv?_dl=1&_size=max"
]
assert expected == actual
def test_row_html_simple_primary_key(app_client):
response = app_client.get('/test_tables/simple_primary_key/1')
assert response.status == 200

View file

@ -299,3 +299,54 @@ def test_compound_keys_after_sql():
or
(a = :p0 and b = :p1 and c > :p2))
'''.strip() == utils.compound_keys_after_sql(['a', 'b', 'c'])
def table_exists(table):
return table == "exists.csv"
@pytest.mark.parametrize(
"table_and_format,expected_table,expected_format",
[
("blah", "blah", None),
("blah.csv", "blah", "csv"),
("blah.json", "blah", "json"),
("blah.baz", "blah.baz", None),
("exists.csv", "exists.csv", None),
],
)
def test_resolve_table_and_format(
table_and_format, expected_table, expected_format
):
actual_table, actual_format = utils.resolve_table_and_format(
table_and_format, table_exists
)
assert expected_table == actual_table
assert expected_format == actual_format
@pytest.mark.parametrize(
"path,format,extra_qs,expected",
[
("/foo?sql=select+1", "csv", {}, "/foo.csv?sql=select+1"),
("/foo?sql=select+1", "json", {}, "/foo.json?sql=select+1"),
("/foo/bar", "json", {}, "/foo/bar.json"),
("/foo/bar", "csv", {}, "/foo/bar.csv"),
("/foo/bar.csv", "json", {}, "/foo/bar.csv?_format=json"),
("/foo/bar", "csv", {"_dl": 1}, "/foo/bar.csv?_dl=1"),
("/foo/b.csv", "json", {"_dl": 1}, "/foo/b.csv?_dl=1&_format=json"),
(
"/sf-trees/Street_Tree_List?_search=cherry&_size=1000",
"csv",
{"_dl": 1},
"/sf-trees/Street_Tree_List.csv?_search=cherry&_size=1000&_dl=1",
),
],
)
def test_path_with_format(path, format, extra_qs, expected):
request = Request(
path.encode('utf8'),
{}, '1.1', 'GET', None
)
actual = utils.path_with_format(request, format, extra_qs)
assert expected == actual