Basic CSV export, refs #266

Tables and custom SQL query results can now be exported as CSV.

The easiest way to do this is to use the .csv extension, e.g.

	/test_tables/facet_cities.csv

By default this is served as Content-Type: text/plain so you can see it in
your browser. If you want to download the file (using text/csv and with an
appropriate Content-Disposition: attachment header) you can do so like this:

	/test_tables/facet_cities.csv?_dl=1

We link to the CSV and downloadable CSV URLs from the table and query pages.

The links use ?_size=max and so by default will return 1,000 rows.

Also fixes #303 - table names ending in .json or .csv are now detected and
URLs are generated that look like this instead:

	/test_tables/table%2Fwith%2Fslashes.csv?_format=csv

The ?_format= option is available for everything else too, but we link to the
.csv / .json versions in most cases because they are aesthetically pleasing.
This commit is contained in:
Simon Willison 2018-06-14 23:51:23 -07:00
commit 3a79ad98ea
No known key found for this signature in database
GPG key ID: 17E2DEA2588B7F52
12 changed files with 243 additions and 38 deletions

View file

@ -224,6 +224,9 @@ class Datasette:
conn.execute('PRAGMA cache_size=-{}'.format(self.config["cache_size_kb"]))
pm.hook.prepare_connection(conn=conn)
def table_exists(self, database, table):
return table in self.inspect().get(database, {}).get("tables")
def inspect(self):
" Inspect the database and return a dictionary of table metadata "
if self._inspect:
@ -395,7 +398,7 @@ class Datasette:
self.jinja_env.filters["escape_sqlite"] = escape_sqlite
self.jinja_env.filters["to_css_class"] = to_css_class
pm.hook.prepare_jinja2_environment(env=self.jinja_env)
app.add_route(IndexView.as_view(self), "/<as_json:(\.jsono?)?$>")
app.add_route(IndexView.as_view(self), "/<as_format:(\.jsono?)?$>")
# TODO: /favicon.ico and /-/static/ deserve far-future cache expires
app.add_route(favicon, "/favicon.ico")
app.static("/-/static/", str(app_root / "datasette" / "static"))
@ -408,37 +411,37 @@ class Datasette:
app.static(modpath, plugin["static_path"])
app.add_route(
JsonDataView.as_view(self, "inspect.json", self.inspect),
"/-/inspect<as_json:(\.json)?$>",
"/-/inspect<as_format:(\.json)?$>",
)
app.add_route(
JsonDataView.as_view(self, "metadata.json", lambda: self.metadata),
"/-/metadata<as_json:(\.json)?$>",
"/-/metadata<as_format:(\.json)?$>",
)
app.add_route(
JsonDataView.as_view(self, "versions.json", self.versions),
"/-/versions<as_json:(\.json)?$>",
"/-/versions<as_format:(\.json)?$>",
)
app.add_route(
JsonDataView.as_view(self, "plugins.json", self.plugins),
"/-/plugins<as_json:(\.json)?$>",
"/-/plugins<as_format:(\.json)?$>",
)
app.add_route(
JsonDataView.as_view(self, "config.json", lambda: self.config),
"/-/config<as_json:(\.json)?$>",
"/-/config<as_format:(\.json)?$>",
)
app.add_route(
DatabaseView.as_view(self), "/<db_name:[^/\.]+?><as_json:(\.jsono?)?$>"
DatabaseView.as_view(self), "/<db_name:[^/\.]+?><as_format:(\.jsono?|\.csv)?$>"
)
app.add_route(
DatabaseDownload.as_view(self), "/<db_name:[^/]+?><as_db:(\.db)$>"
)
app.add_route(
TableView.as_view(self),
"/<db_name:[^/]+>/<table:[^/]+?><as_json:(\.jsono?)?$>",
"/<db_name:[^/]+>/<table_and_format:[^/]+?$>",
)
app.add_route(
RowView.as_view(self),
"/<db_name:[^/]+>/<table:[^/]+?>/<pk_path:[^/]+?><as_json:(\.jsono?)?$>",
"/<db_name:[^/]+>/<table:[^/]+?>/<pk_path:[^/]+?><as_format:(\.jsono?)?$>",
)
self.register_custom_units()

View file

@ -40,7 +40,7 @@
</form>
{% if rows %}
<p>This data as <a href="{{ url_json }}">.json</a></p>
<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
<table class="rows-and-columns">
<thead>
<tr>

View file

@ -92,7 +92,7 @@
<p><a class="not-underlined" title="{{ query.sql }}" href="/{{ database }}-{{ database_hash }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&amp;{{ query.params|urlencode|safe }}{% endif %}">&#x270e; <span class="underlined">View and edit SQL</span></a></p>
{% endif %}
<p>This data as <a href="{{ url_json }}">.json</a></p>
<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>, <a href="{{ url_csv }}">CSV</a> (<a href="{{ url_csv_dl }}">download CSV</a>)</p>
{% if suggested_facets %}
<p class="suggested-facets">

View file

@ -225,14 +225,6 @@ def path_with_replaced_args(request, args, path=None):
return path + query_string
def path_with_ext(request, ext):
path = request.path
path += ext
if request.query_string:
path += '?' + request.query_string
return path
_css_re = re.compile(r'''['"\n\\]''')
_boring_keyword_re = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
@ -772,3 +764,39 @@ def get_plugins(pm):
plugin_info['version'] = distinfo.version
plugins.append(plugin_info)
return plugins
FORMATS = ('csv', 'json', 'jsono')
def resolve_table_and_format(table_and_format, table_exists):
if '.' in table_and_format:
# Check if a table exists with this exact name
if table_exists(table_and_format):
return table_and_format, None
# Check if table ends with a known format
for _format in FORMATS:
if table_and_format.endswith(".{}".format(_format)):
table = table_and_format[:-(len(_format) + 1)]
return table, _format
return table_and_format, None
def path_with_format(request, format, extra_qs=None):
qs = extra_qs or {}
path = request.path
if "." in request.path:
qs["_format"] = format
else:
path = "{}.{}".format(path, format)
if qs:
extra = urllib.parse.urlencode(sorted(qs.items()))
if request.query_string:
path = "{}?{}&{}".format(
path, request.query_string, extra
)
else:
path = "{}?{}".format(path, extra)
elif request.query_string:
path = "{}?{}".format(path, request.query_string)
return path

View file

@ -1,8 +1,10 @@
import asyncio
import csv
import json
import re
import sqlite3
import time
import urllib
import pint
from sanic import response
@ -16,7 +18,8 @@ from datasette.utils import (
InvalidSql,
path_from_row_pks,
path_with_added_args,
path_with_ext,
path_with_format,
resolve_table_and_format,
to_css_class
)
@ -113,13 +116,23 @@ class BaseView(RenderMixin):
expected = info["hash"][:HASH_LENGTH]
if expected != hash:
if "table_and_format" in kwargs:
table, _format = resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(
kwargs["table_and_format"]
),
table_exists=lambda t: self.ds.table_exists(name, t)
)
kwargs["table"] = table
if _format:
kwargs["as_format"] = ".{}".format(_format)
should_redirect = "/{}-{}".format(name, expected)
if "table" in kwargs:
should_redirect += "/" + kwargs["table"]
should_redirect += "/" + urllib.parse.quote_plus(kwargs["table"])
if "pk_path" in kwargs:
should_redirect += "/" + kwargs["pk_path"]
if "as_json" in kwargs:
should_redirect += kwargs["as_json"]
if "as_format" in kwargs:
should_redirect += kwargs["as_format"]
if "as_db" in kwargs:
should_redirect += kwargs["as_db"]
return name, expected, should_redirect
@ -136,11 +149,65 @@ class BaseView(RenderMixin):
return await self.view_get(request, name, hash, **kwargs)
async def view_get(self, request, name, hash, **kwargs):
async def as_csv(self, request, name, hash, **kwargs):
try:
as_json = kwargs.pop("as_json")
except KeyError:
as_json = False
response_or_template_contexts = await self.data(
request, name, hash, **kwargs
)
if isinstance(response_or_template_contexts, response.HTTPResponse):
return response_or_template_contexts
else:
data, extra_template_data, templates = response_or_template_contexts
except (sqlite3.OperationalError, InvalidSql) as e:
raise DatasetteError(str(e), title="Invalid SQL", status=400)
except (sqlite3.OperationalError) as e:
raise DatasetteError(str(e))
except DatasetteError:
raise
# Convert rows and columns to CSV
async def stream_fn(r):
writer = csv.writer(r)
writer.writerow(data["columns"])
for row in data["rows"]:
writer.writerow(row)
content_type = "text/plain; charset=utf-8"
headers = {}
if request.args.get("_dl", None):
content_type = "text/csv; charset=utf-8"
disposition = 'attachment; filename="{}.csv"'.format(
kwargs.get('table', name)
)
headers["Content-Disposition"] = disposition
return response.stream(
stream_fn,
headers=headers,
content_type=content_type
)
async def view_get(self, request, name, hash, **kwargs):
# If ?_format= is provided, use that as the format
_format = request.args.get("_format", None)
if not _format:
_format = (kwargs.pop("as_format", None) or "").lstrip(".")
if "table_and_format" in kwargs:
table, _ext_format = resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(
kwargs["table_and_format"]
),
table_exists=lambda t: self.ds.table_exists(name, t)
)
_format = _format or _ext_format
kwargs["table"] = table
del kwargs["table_and_format"]
if _format == "csv":
return await self.as_csv(request, name, hash, **kwargs)
extra_template_data = {}
start = time.time()
status_code = 200
@ -175,9 +242,9 @@ class BaseView(RenderMixin):
value = self.ds.metadata.get(key)
if value:
data[key] = value
if as_json:
if _format in ("json", "jsono"):
# Special case for .jsono extension - redirect to _shape=objects
if as_json == ".jsono":
if _format == "jsono":
return self.redirect(
request,
path_with_added_args(
@ -260,8 +327,14 @@ class BaseView(RenderMixin):
**data,
**extras,
**{
"url_json": path_with_ext(request, ".json"),
"url_jsono": path_with_ext(request, ".jsono"),
"url_json": path_with_format(request, "json"),
"url_csv": path_with_format(request, "csv", {
"_size": "max"
}),
"url_csv_dl": path_with_format(request, "csv", {
"_dl": "1",
"_size": "max"
}),
"extra_css_urls": self.ds.extra_css_urls(),
"extra_js_urls": self.ds.extra_js_urls(),
"datasette_version": __version__,

View file

@ -16,7 +16,7 @@ class IndexView(RenderMixin):
self.jinja_env = datasette.jinja_env
self.executor = datasette.executor
async def get(self, request, as_json):
async def get(self, request, as_format):
databases = []
for key, info in sorted(self.ds.inspect().items()):
tables = [t for t in info["tables"].values() if not t["hidden"]]
@ -38,7 +38,7 @@ class IndexView(RenderMixin):
"views_count": len(info["views"]),
}
databases.append(database)
if as_json:
if as_format:
headers = {}
if self.ds.cors:
headers["Access-Control-Allow-Origin"] = "*"

View file

@ -10,9 +10,9 @@ class JsonDataView(RenderMixin):
self.filename = filename
self.data_callback = data_callback
async def get(self, request, as_json):
async def get(self, request, as_format):
data = self.data_callback()
if as_json:
if as_format:
headers = {}
if self.ds.cors:
headers["Access-Control-Allow-Origin"] = "*"

View file

@ -232,7 +232,6 @@ class RowTableShared(BaseView):
class TableView(RowTableShared):
async def data(self, request, name, hash, table):
table = urllib.parse.unquote_plus(table)
canned_query = self.ds.get_canned_query(name, table)
if canned_query is not None:
return await self.custom_sql(
@ -780,7 +779,6 @@ class TableView(RowTableShared):
class RowView(RowTableShared):
async def data(self, request, name, hash, table, pk_path):
table = urllib.parse.unquote_plus(table)
pk_values = urlsafe_components(pk_path)
info = self.ds.inspect()[name]
table_info = info["tables"].get(table) or {}