?_labels= and ?_label=COL to expand foreign keys in JSON/CSV

These new querystring arguments can be used to request expanded foreign keys
in both JSON and CSV formats.

?_labels=on turns on expansions for ALL foreign key columns

?_label=COLUMN1&_label=COLUMN2 can be used to pick specific columns to expand

e.g. `Street_Tree_List.json?_label=qSpecies&_label=PlantType&_shape=array`

    [
      {
        "rowid": 1,
        "TreeID": 141565,
        "qLegalStatus": 1,
        "qSpecies": {
          "value": 1,
          "label": "Myoporum laetum :: Myoporum"
        },
        "qAddress": "501X Baker St",
        "SiteOrder": 1,
        "qSiteInfo": 1,
        "PlantType": {
          "value": 1,
          "label": "Tree"
        },
        "qCaretaker": 1,
        "qCareAssistant": null,
        "PlantDate": "07/21/1988 12:00:00 AM",
        "DBH": 21,
        "PlotSize": "Width 0ft",
        "PermitNotes": "Permit Number 25401",
        "XCoord": 6000609,
        "YCoord": 2110829,
        "Latitude": 37.7759676911831,
        "Longitude": -122.441396661871,
        "Location": "(37.7759676911831, -122.441396661871)"
      },
      {
        "rowid": 2,
        "TreeID": 232565,
        "qLegalStatus": 2,
        "qSpecies": {
          "value": 2,
          "label": "Metrosideros excelsa :: New Zealand Xmas Tree"
        },
        "qAddress": "940 Elizabeth St",
        "SiteOrder": 1,
        "qSiteInfo": 2,
        "PlantType": {
          "value": 1,
          "label": "Tree"
        },
        "qCaretaker": 1,
        "qCareAssistant": null,
        "PlantDate": "03/20/2017 12:00:00 AM",
        "DBH": 3,
        "PlotSize": "Width 4ft",
        "PermitNotes": "Permit Number 779625",
        "XCoord": 6000396.48544,
        "YCoord": 2101998.8644,
        "Latitude": 37.7517102172731,
        "Longitude": -122.441498017841,
        "Location": "(37.7517102172731, -122.441498017841)"
      }
    ]

The labels option also works for the HTML and CSV views.

HTML defaults to `?_labels=on`, so if you pass `?_labels=off` you can disable
foreign key expansion entirely - or you can use `?_label=COLUMN` to request
just specific columns.

If you expand labels on CSV you get additional columns in the output:

`/Street_Tree_List.csv?_label=qLegalStatus`

    rowid,TreeID,qLegalStatus,qLegalStatus_label...
    1,141565,1,Permitted Site...
    2,232565,2,Undocumented...

I also refactored the existing foreign key expansion code.

Closes #233. Refs #266.
This commit is contained in:
Simon Willison 2018-06-16 15:00:14 -07:00
commit 40287b1ba0
No known key found for this signature in database
GPG key ID: 17E2DEA2588B7F52
7 changed files with 126 additions and 79 deletions

View file

@ -168,11 +168,33 @@ class BaseView(RenderMixin):
except DatasetteError: except DatasetteError:
raise raise
# Convert rows and columns to CSV # Convert rows and columns to CSV
headings = data["columns"]
# if there are columns_expanded we need to add additional headings
columns_expanded = set(data.get("columns_expanded") or [])
if columns_expanded:
headings = []
for column in data["columns"]:
headings.append(column)
if column in columns_expanded:
headings.append("{}_label".format(column))
async def stream_fn(r): async def stream_fn(r):
writer = csv.writer(r) writer = csv.writer(r)
writer.writerow(data["columns"]) writer.writerow(headings)
for row in data["rows"]: for row in data["rows"]:
writer.writerow(row) if not columns_expanded:
# Simple path
writer.writerow(row)
else:
# Look for {"value": "label": } dicts and expand
new_row = []
for cell in row:
if isinstance(cell, dict):
new_row.append(cell["value"])
new_row.append(cell["label"])
else:
new_row.append(cell)
writer.writerow(new_row)
content_type = "text/plain; charset=utf-8" content_type = "text/plain; charset=utf-8"
headers = {} headers = {}
@ -208,6 +230,10 @@ class BaseView(RenderMixin):
if _format == "csv": if _format == "csv":
return await self.as_csv(request, name, hash, **kwargs) return await self.as_csv(request, name, hash, **kwargs)
if _format is None:
# HTML views default to expanding all forign key labels
kwargs['default_labels'] = True
extra_template_data = {} extra_template_data = {}
start = time.time() start = time.time()
status_code = 200 status_code = 200

View file

@ -9,7 +9,7 @@ from .base import BaseView, DatasetteError
class DatabaseView(BaseView): class DatabaseView(BaseView):
async def data(self, request, name, hash): async def data(self, request, name, hash, default_labels=False):
if request.args.get("sql"): if request.args.get("sql"):
if not self.ds.config["allow_sql"]: if not self.ds.config["allow_sql"]:
raise DatasetteError("sql= is not allowed", status=400) raise DatasetteError("sql= is not allowed", status=400)

View file

@ -20,10 +20,14 @@ from datasette.utils import (
path_with_replaced_args, path_with_replaced_args,
to_css_class, to_css_class,
urlsafe_components, urlsafe_components,
value_as_boolean,
) )
from .base import BaseView, DatasetteError, ureg from .base import BaseView, DatasetteError, ureg
LINK_WITH_LABEL = '<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'
LINK_WITH_VALUE = '<a href="/{database}/{table}/{link_id}">{id}</a>'
class RowTableShared(BaseView): class RowTableShared(BaseView):
@ -39,7 +43,7 @@ class RowTableShared(BaseView):
return sortable_columns return sortable_columns
def expandable_columns(self, database, table): def expandable_columns(self, database, table):
# Returns list of (fk_dict, label_column) pairs for that table # Returns list of (fk_dict, label_column-or-None) pairs for that table
tables = self.ds.inspect()[database].get("tables", {}) tables = self.ds.inspect()[database].get("tables", {})
table_info = tables.get(table) table_info = tables.get(table)
if not table_info: if not table_info:
@ -51,9 +55,8 @@ class RowTableShared(BaseView):
database, fk["other_table"] database, fk["other_table"]
).get("label_column") ).get("label_column")
or tables.get(fk["other_table"], {}).get("label_column") or tables.get(fk["other_table"], {}).get("label_column")
) ) or None
if label_column: expandables.append((fk, label_column))
expandables.append((fk, label_column))
return expandables return expandables
async def expand_foreign_keys(self, database, table, column, values): async def expand_foreign_keys(self, database, table, column, values):
@ -80,7 +83,10 @@ class RowTableShared(BaseView):
or tables_info.get(fk["other_table"], {}).get("label_column") or tables_info.get(fk["other_table"], {}).get("label_column")
) )
if not label_column: if not label_column:
return {} return {
(fk["column"], value): str(value)
for value in values
}
labeled_fks = {} labeled_fks = {}
sql = ''' sql = '''
select {other_column}, {label_column} select {other_column}, {label_column}
@ -110,7 +116,6 @@ class RowTableShared(BaseView):
description, description,
rows, rows,
link_column=False, link_column=False,
expand_foreign_keys=True,
): ):
"Returns columns, rows for specified table - including fancy foreign key treatment" "Returns columns, rows for specified table - including fancy foreign key treatment"
table_metadata = self.table_metadata(database, table) table_metadata = self.table_metadata(database, table)
@ -122,44 +127,12 @@ class RowTableShared(BaseView):
tables = info["tables"] tables = info["tables"]
table_info = tables.get(table) or {} table_info = tables.get(table) or {}
pks = table_info.get("primary_keys") or [] pks = table_info.get("primary_keys") or []
column_to_foreign_key_table = {
# Prefetch foreign key resolutions for later expansion: fk["column"]: fk["other_table"]
fks = {} for fk in table_info.get(
labeled_fks = {} "foreign_keys", {}
if table_info and expand_foreign_keys: ).get("outgoing", None) or []
foreign_keys = table_info["foreign_keys"]["outgoing"] }
for fk in foreign_keys:
label_column = (
# First look in metadata.json definition for this foreign key table:
self.table_metadata(database, fk["other_table"]).get("label_column")
# Fall back to label_column from .inspect() detection:
or tables.get(fk["other_table"], {}).get("label_column")
)
if not label_column:
# No label for this FK
fks[fk["column"]] = fk["other_table"]
continue
ids_to_lookup = set([row[fk["column"]] for row in rows])
sql = '''
select {other_column}, {label_column}
from {other_table}
where {other_column} in ({placeholders})
'''.format(
other_column=escape_sqlite(fk["other_column"]),
label_column=escape_sqlite(label_column),
other_table=escape_sqlite(fk["other_table"]),
placeholders=", ".join(["?"] * len(ids_to_lookup)),
)
try:
results = await self.ds.execute(
database, sql, list(set(ids_to_lookup))
)
except InterruptedError:
pass
else:
for id, value in results:
labeled_fks[(fk["column"], id)] = (fk["other_table"], value)
cell_rows = [] cell_rows = []
for row in rows: for row in rows:
@ -192,26 +165,22 @@ class RowTableShared(BaseView):
# already shown in the link column. # already shown in the link column.
continue continue
if (column, value) in labeled_fks: if isinstance(value, dict):
other_table, label = labeled_fks[(column, value)] # It's an expanded foreign key - display link to other row
display_value = jinja2.Markup( label = value["label"]
'<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'.format( value = value["value"]
database=database, # The table we link to depends on the column
table=urllib.parse.quote_plus(other_table), other_table = column_to_foreign_key_table[column]
link_id=urllib.parse.quote_plus(str(value)), link_template = (
id=str(jinja2.escape(value)), LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE
label=str(jinja2.escape(label)),
)
)
elif column in fks:
display_value = jinja2.Markup(
'<a href="/{database}/{table}/{link_id}">{id}</a>'.format(
database=database,
table=urllib.parse.quote_plus(fks[column]),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
)
) )
display_value = jinja2.Markup(link_template.format(
database=database,
table=urllib.parse.quote_plus(other_table),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
label=str(jinja2.escape(label)),
))
elif value is None: elif value is None:
display_value = jinja2.Markup("&nbsp;") display_value = jinja2.Markup("&nbsp;")
elif is_url(str(value).strip()): elif is_url(str(value).strip()):
@ -251,7 +220,7 @@ class RowTableShared(BaseView):
class TableView(RowTableShared): class TableView(RowTableShared):
async def data(self, request, name, hash, table): async def data(self, request, name, hash, table, default_labels=False):
canned_query = self.ds.get_canned_query(name, table) canned_query = self.ds.get_canned_query(name, table)
if canned_query is not None: if canned_query is not None:
return await self.custom_sql( return await self.custom_sql(
@ -604,13 +573,29 @@ class TableView(RowTableShared):
filter_columns = filter_columns[1:] filter_columns = filter_columns[1:]
# Expand labeled columns if requested # Expand labeled columns if requested
labeled_columns = [] columns_expanded = []
if request.raw_args.get("_labels", None): expandable_columns = self.expandable_columns(name, table)
expandable_columns = self.expandable_columns(name, table) columns_to_expand = None
try:
all_labels = value_as_boolean(special_args.get("_labels", ""))
except ValueError:
all_labels = default_labels
# Check for explicit _label=
if "_label" in request.args:
columns_to_expand = request.args["_label"]
if columns_to_expand is None and all_labels:
# expand all columns with foreign keys
columns_to_expand = [
fk["column"] for fk, _ in expandable_columns
]
if columns_to_expand:
expanded_labels = {} expanded_labels = {}
for fk, label_column in expandable_columns: for fk, label_column in expandable_columns:
column = fk["column"] column = fk["column"]
labeled_columns.append(column) if column not in columns_to_expand:
continue
columns_expanded.append(column)
# Gather the values # Gather the values
column_index = columns.index(column) column_index = columns.index(column)
values = [row[column_index] for row in rows] values = [row[column_index] for row in rows]
@ -733,7 +718,6 @@ class TableView(RowTableShared):
results.description, results.description,
rows, rows,
link_column=not is_view, link_column=not is_view,
expand_foreign_keys=True,
) )
metadata = self.ds.metadata.get("databases", {}).get(name, {}).get( metadata = self.ds.metadata.get("databases", {}).get(name, {}).get(
"tables", {} "tables", {}
@ -787,7 +771,7 @@ class TableView(RowTableShared):
"truncated": results.truncated, "truncated": results.truncated,
"table_rows_count": table_rows_count, "table_rows_count": table_rows_count,
"filtered_table_rows_count": filtered_table_rows_count, "filtered_table_rows_count": filtered_table_rows_count,
"labeled_columns": labeled_columns, "columns_expanded": columns_expanded,
"columns": columns, "columns": columns,
"primary_keys": pks, "primary_keys": pks,
"units": units, "units": units,
@ -804,7 +788,7 @@ class TableView(RowTableShared):
class RowView(RowTableShared): class RowView(RowTableShared):
async def data(self, request, name, hash, table, pk_path): async def data(self, request, name, hash, table, pk_path, default_labels=False):
pk_values = urlsafe_components(pk_path) pk_values = urlsafe_components(pk_path)
info = self.ds.inspect()[name] info = self.ds.inspect()[name]
table_info = info["tables"].get(table) or {} table_info = info["tables"].get(table) or {}
@ -834,7 +818,6 @@ class RowView(RowTableShared):
results.description, results.description,
rows, rows,
link_column=False, link_column=False,
expand_foreign_keys=True,
) )
for column in display_columns: for column in display_columns:
column["sortable"] = False column["sortable"] = False

View file

@ -163,6 +163,12 @@ Special table arguments
The Datasette table view takes a number of special querystring arguments: The Datasette table view takes a number of special querystring arguments:
``?_labels=on/off``
Expand foreign key references for every possible column. See below.
``?_label=COLUMN1&_label=COLUMN2``
Expand foreign key references for one or more specified columns.
``?_size=1000`` or ``?_size=max`` ``?_size=1000`` or ``?_size=max``
Sets a custom page size. This cannot exceed the ``max_returned_rows`` limit Sets a custom page size. This cannot exceed the ``max_returned_rows`` limit
passed to ``datasette serve``. Use ``max`` to get ``max_returned_rows``. passed to ``datasette serve``. Use ``max`` to get ``max_returned_rows``.
@ -197,3 +203,34 @@ The Datasette table view takes a number of special querystring arguments:
``?_labels=1`` ``?_labels=1``
Indicates that you would like to expand any foreign key references. These Indicates that you would like to expand any foreign key references. These
will be exposed in the JSON as ``{"value": 3, "label": "Monterey"}``. will be exposed in the JSON as ``{"value": 3, "label": "Monterey"}``.
Expanding foreign key references
--------------------------------
Datasette can detect foreign key relationships and resolve those references into
labels. The HTML interface does this by default for every detected foreign key
column - you can turn that off using ``?_labels=off``.
You can request foreign keys be expanded in JSON using the ``_labels=on`` or
``_label=COLUMN`` special querystring parameters. Here's what an expanded row
looks like::
[
{
"rowid": 1,
"TreeID": 141565,
"qLegalStatus": {
"value": 1,
"label": "Permitted Site"
},
"qSpecies": {
"value": 1,
"label": "Myoporum laetum :: Myoporum"
},
"qAddress": "501X Baker St",
"SiteOrder": 1
}
]
The column in the foreign key table that is used for the label can be specified
in ``metadata.json`` - see :ref:`label_columns`.

View file

@ -121,6 +121,7 @@ This will restrict sorting of ``example_table`` to just the ``height`` and
You can also disable sorting entirely by setting ``"sortable_columns": []`` You can also disable sorting entirely by setting ``"sortable_columns": []``
.. _label_columns:
Specifying the label column for a table Specifying the label column for a table
--------------------------------------- ---------------------------------------

View file

@ -28,7 +28,7 @@ pk,planet_int,state,city_id,city_id_label,neighborhood
13,1,MI,3,Detroit,Corktown 13,1,MI,3,Detroit,Corktown
14,1,MI,3,Detroit,Mexicantown 14,1,MI,3,Detroit,Mexicantown
15,2,MC,4,Memnonia,Arcadia Planitia 15,2,MC,4,Memnonia,Arcadia Planitia
'''.strip().replace('\n', '\r\n') '''.lstrip().replace('\n', '\r\n')
def test_table_csv(app_client): def test_table_csv(app_client):
response = app_client.get('/test_tables/simple_primary_key.csv') response = app_client.get('/test_tables/simple_primary_key.csv')

View file

@ -394,9 +394,9 @@ def test_table_html_disable_foreign_key_links_with_labels(app_client):
table = Soup(response.body, 'html.parser').find('table') table = Soup(response.body, 'html.parser').find('table')
expected = [ expected = [
[ [
'<td no class="col-pk"><a href="/test_tables/foreign_key_references/1">1</a></td>', '<td class="col-pk"><a href="/test_tables/foreign_key_references/1">1</a></td>',
'<td class="col-foreign_key_with_label"><a href="/test_tables/simple_primary_key/1">hello</a>\xa0<em>1</em></td>', '<td class="col-foreign_key_with_label">1</td>',
'<td class="col-foreign_key_with_no_label"><a href="/test_tables/primary_key_multiple_columns/1">1</a></td>' '<td class="col-foreign_key_with_no_label">1</td>'
] ]
] ]
assert expected == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')] assert expected == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')]