?_labels= and ?_label=COL to expand foreign keys in JSON/CSV

These new querystring arguments can be used to request expanded foreign keys
in both JSON and CSV formats.

?_labels=on turns on expansions for ALL foreign key columns

?_label=COLUMN1&_label=COLUMN2 can be used to pick specific columns to expand

e.g. `Street_Tree_List.json?_label=qSpecies&_label=qLegalStatus`

    {
        "rowid": 233,
        "TreeID": 121240,
        "qLegalStatus": {
            "value" 2,
            "label": "Private"
        }
        "qSpecies": {
            "value": 16,
            "label": "Sycamore"
        }
        "qAddress": "91 Commonwealth Ave",
        ...
    }

The labels option also works for the HTML and CSV views.

HTML defaults to `?_labels=on`, so if you pass `?_labels=off` you can disable
foreign key expansion entirely - or you can use `?_label=COLUMN` to request
just specific columns.

If you expand labels on CSV you get additional columns in the output:

`/Street_Tree_List.csv?_label=qLegalStatus`

    rowid,TreeID,qLegalStatus,qLegalStatus_label...
    1,141565,1,Permitted Site...
    2,232565,2,Undocumented...

I also refactored the existing foreign key expansion code.

Closes #233. Refs #266.
This commit is contained in:
Simon Willison 2018-06-16 15:18:57 -07:00
commit ed631e690b
No known key found for this signature in database
GPG key ID: 17E2DEA2588B7F52
9 changed files with 276 additions and 69 deletions

View file

@ -1,4 +1,5 @@
from contextlib import contextmanager
from collections import OrderedDict
import base64
import hashlib
import imp
@ -802,6 +803,23 @@ def path_with_format(request, format, extra_qs=None):
return path
class CustomRow(OrderedDict):
# Loose imitation of sqlite3.Row which offers
# both index-based AND key-based lookups
def __init__(self, columns):
self.columns = columns
def __getitem__(self, key):
if isinstance(key, int):
return super().__getitem__(self.columns[key])
else:
return super().__getitem__(key)
def __iter__(self):
for column in self.columns:
yield self[column]
def value_as_boolean(value):
if value.lower() not in ('on', 'off', 'true', 'false', '1', '0'):
raise ValueAsBooleanError

View file

@ -168,11 +168,33 @@ class BaseView(RenderMixin):
except DatasetteError:
raise
# Convert rows and columns to CSV
headings = data["columns"]
# if there are columns_expanded we need to add additional headings
columns_expanded = set(data.get("columns_expanded") or [])
if columns_expanded:
headings = []
for column in data["columns"]:
headings.append(column)
if column in columns_expanded:
headings.append("{}_label".format(column))
async def stream_fn(r):
writer = csv.writer(r)
writer.writerow(data["columns"])
writer.writerow(headings)
for row in data["rows"]:
writer.writerow(row)
if not columns_expanded:
# Simple path
writer.writerow(row)
else:
# Look for {"value": "label": } dicts and expand
new_row = []
for cell in row:
if isinstance(cell, dict):
new_row.append(cell["value"])
new_row.append(cell["label"])
else:
new_row.append(cell)
writer.writerow(new_row)
content_type = "text/plain; charset=utf-8"
headers = {}
@ -208,6 +230,10 @@ class BaseView(RenderMixin):
if _format == "csv":
return await self.as_csv(request, name, hash, **kwargs)
if _format is None:
# HTML views default to expanding all forign key labels
kwargs['default_labels'] = True
extra_template_data = {}
start = time.time()
status_code = 200

View file

@ -9,7 +9,7 @@ from .base import BaseView, DatasetteError
class DatabaseView(BaseView):
async def data(self, request, name, hash):
async def data(self, request, name, hash, default_labels=False):
if request.args.get("sql"):
if not self.ds.config["allow_sql"]:
raise DatasetteError("sql= is not allowed", status=400)

View file

@ -1,3 +1,4 @@
from collections import namedtuple
import sqlite3
import urllib
@ -6,6 +7,7 @@ from sanic.exceptions import NotFound
from sanic.request import RequestParameters
from datasette.utils import (
CustomRow,
Filters,
InterruptedError,
compound_keys_after_sql,
@ -18,24 +20,45 @@ from datasette.utils import (
path_with_replaced_args,
to_css_class,
urlsafe_components,
value_as_boolean,
)
from .base import BaseView, DatasetteError, ureg
LINK_WITH_LABEL = '<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'
LINK_WITH_VALUE = '<a href="/{database}/{table}/{link_id}">{id}</a>'
class RowTableShared(BaseView):
def sortable_columns_for_table(self, name, table, use_rowid):
table_metadata = self.table_metadata(name, table)
def sortable_columns_for_table(self, database, table, use_rowid):
table_metadata = self.table_metadata(database, table)
if "sortable_columns" in table_metadata:
sortable_columns = set(table_metadata["sortable_columns"])
else:
table_info = self.ds.inspect()[name]["tables"].get(table) or {}
table_info = self.ds.inspect()[database]["tables"].get(table) or {}
sortable_columns = set(table_info.get("columns", []))
if use_rowid:
sortable_columns.add("rowid")
return sortable_columns
def expandable_columns(self, database, table):
# Returns list of (fk_dict, label_column-or-None) pairs for that table
tables = self.ds.inspect()[database].get("tables", {})
table_info = tables.get(table)
if not table_info:
return []
expandables = []
for fk in table_info["foreign_keys"]["outgoing"]:
label_column = (
self.table_metadata(
database, fk["other_table"]
).get("label_column")
or tables.get(fk["other_table"], {}).get("label_column")
) or None
expandables.append((fk, label_column))
return expandables
async def expand_foreign_keys(self, database, table, column, values):
"Returns dict mapping (column, value) -> label"
labeled_fks = {}
@ -60,7 +83,10 @@ class RowTableShared(BaseView):
or tables_info.get(fk["other_table"], {}).get("label_column")
)
if not label_column:
return {}
return {
(fk["column"], value): str(value)
for value in values
}
labeled_fks = {}
sql = '''
select {other_column}, {label_column}
@ -90,7 +116,6 @@ class RowTableShared(BaseView):
description,
rows,
link_column=False,
expand_foreign_keys=True,
):
"Returns columns, rows for specified table - including fancy foreign key treatment"
table_metadata = self.table_metadata(database, table)
@ -102,44 +127,12 @@ class RowTableShared(BaseView):
tables = info["tables"]
table_info = tables.get(table) or {}
pks = table_info.get("primary_keys") or []
# Prefetch foreign key resolutions for later expansion:
fks = {}
labeled_fks = {}
if table_info and expand_foreign_keys:
foreign_keys = table_info["foreign_keys"]["outgoing"]
for fk in foreign_keys:
label_column = (
# First look in metadata.json definition for this foreign key table:
self.table_metadata(database, fk["other_table"]).get("label_column")
# Fall back to label_column from .inspect() detection:
or tables.get(fk["other_table"], {}).get("label_column")
)
if not label_column:
# No label for this FK
fks[fk["column"]] = fk["other_table"]
continue
ids_to_lookup = set([row[fk["column"]] for row in rows])
sql = '''
select {other_column}, {label_column}
from {other_table}
where {other_column} in ({placeholders})
'''.format(
other_column=escape_sqlite(fk["other_column"]),
label_column=escape_sqlite(label_column),
other_table=escape_sqlite(fk["other_table"]),
placeholders=", ".join(["?"] * len(ids_to_lookup)),
)
try:
results = await self.ds.execute(
database, sql, list(set(ids_to_lookup))
)
except InterruptedError:
pass
else:
for id, value in results:
labeled_fks[(fk["column"], id)] = (fk["other_table"], value)
column_to_foreign_key_table = {
fk["column"]: fk["other_table"]
for fk in table_info.get(
"foreign_keys", {}
).get("outgoing", None) or []
}
cell_rows = []
for row in rows:
@ -172,26 +165,22 @@ class RowTableShared(BaseView):
# already shown in the link column.
continue
if (column, value) in labeled_fks:
other_table, label = labeled_fks[(column, value)]
display_value = jinja2.Markup(
'<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'.format(
database=database,
table=urllib.parse.quote_plus(other_table),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
label=str(jinja2.escape(label)),
)
)
elif column in fks:
display_value = jinja2.Markup(
'<a href="/{database}/{table}/{link_id}">{id}</a>'.format(
database=database,
table=urllib.parse.quote_plus(fks[column]),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
)
if isinstance(value, dict):
# It's an expanded foreign key - display link to other row
label = value["label"]
value = value["value"]
# The table we link to depends on the column
other_table = column_to_foreign_key_table[column]
link_template = (
LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE
)
display_value = jinja2.Markup(link_template.format(
database=database,
table=urllib.parse.quote_plus(other_table),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
label=str(jinja2.escape(label)),
))
elif value is None:
display_value = jinja2.Markup("&nbsp;")
elif is_url(str(value).strip()):
@ -231,7 +220,7 @@ class RowTableShared(BaseView):
class TableView(RowTableShared):
async def data(self, request, name, hash, table):
async def data(self, request, name, hash, table, default_labels=False):
canned_query = self.ds.get_canned_query(name, table)
if canned_query is not None:
return await self.custom_sql(
@ -583,6 +572,54 @@ class TableView(RowTableShared):
if use_rowid and filter_columns[0] == "rowid":
filter_columns = filter_columns[1:]
# Expand labeled columns if requested
columns_expanded = []
expandable_columns = self.expandable_columns(name, table)
columns_to_expand = None
try:
all_labels = value_as_boolean(special_args.get("_labels", ""))
except ValueError:
all_labels = default_labels
# Check for explicit _label=
if "_label" in request.args:
columns_to_expand = request.args["_label"]
if columns_to_expand is None and all_labels:
# expand all columns with foreign keys
columns_to_expand = [
fk["column"] for fk, _ in expandable_columns
]
if columns_to_expand:
expanded_labels = {}
for fk, label_column in expandable_columns:
column = fk["column"]
if column not in columns_to_expand:
continue
columns_expanded.append(column)
# Gather the values
column_index = columns.index(column)
values = [row[column_index] for row in rows]
# Expand them
expanded_labels.update(await self.expand_foreign_keys(
name, table, column, values
))
if expanded_labels:
# Rewrite the rows
new_rows = []
for row in rows:
new_row = CustomRow(columns)
for column in row.keys():
value = row[column]
if (column, value) in expanded_labels:
new_row[column] = {
'value': value,
'label': expanded_labels[(column, value)]
}
else:
new_row[column] = value
new_rows.append(new_row)
rows = new_rows
# Pagination next link
next_value = None
next_url = None
@ -681,7 +718,6 @@ class TableView(RowTableShared):
results.description,
rows,
link_column=not is_view,
expand_foreign_keys=True,
)
metadata = self.ds.metadata.get("databases", {}).get(name, {}).get(
"tables", {}
@ -735,6 +771,7 @@ class TableView(RowTableShared):
"truncated": results.truncated,
"table_rows_count": table_rows_count,
"filtered_table_rows_count": filtered_table_rows_count,
"columns_expanded": columns_expanded,
"columns": columns,
"primary_keys": pks,
"units": units,
@ -751,7 +788,7 @@ class TableView(RowTableShared):
class RowView(RowTableShared):
async def data(self, request, name, hash, table, pk_path):
async def data(self, request, name, hash, table, pk_path, default_labels=False):
pk_values = urlsafe_components(pk_path)
info = self.ds.inspect()[name]
table_info = info["tables"].get(table) or {}
@ -781,7 +818,6 @@ class RowView(RowTableShared):
results.description,
rows,
link_column=False,
expand_foreign_keys=True,
)
for column in display_columns:
column["sortable"] = False