diff --git a/datasette/utils.py b/datasette/utils.py index 299d7a4e..033f2c22 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -1,4 +1,5 @@ from contextlib import contextmanager +from collections import OrderedDict import base64 import hashlib import imp @@ -802,6 +803,23 @@ def path_with_format(request, format, extra_qs=None): return path +class CustomRow(OrderedDict): + # Loose imitation of sqlite3.Row which offers + # both index-based AND key-based lookups + def __init__(self, columns): + self.columns = columns + + def __getitem__(self, key): + if isinstance(key, int): + return super().__getitem__(self.columns[key]) + else: + return super().__getitem__(key) + + def __iter__(self): + for column in self.columns: + yield self[column] + + def value_as_boolean(value): if value.lower() not in ('on', 'off', 'true', 'false', '1', '0'): raise ValueAsBooleanError diff --git a/datasette/views/base.py b/datasette/views/base.py index f44aa5ce..055d174a 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -168,11 +168,33 @@ class BaseView(RenderMixin): except DatasetteError: raise # Convert rows and columns to CSV + headings = data["columns"] + # if there are columns_expanded we need to add additional headings + columns_expanded = set(data.get("columns_expanded") or []) + if columns_expanded: + headings = [] + for column in data["columns"]: + headings.append(column) + if column in columns_expanded: + headings.append("{}_label".format(column)) + async def stream_fn(r): writer = csv.writer(r) - writer.writerow(data["columns"]) + writer.writerow(headings) for row in data["rows"]: - writer.writerow(row) + if not columns_expanded: + # Simple path + writer.writerow(row) + else: + # Look for {"value": "label": } dicts and expand + new_row = [] + for cell in row: + if isinstance(cell, dict): + new_row.append(cell["value"]) + new_row.append(cell["label"]) + else: + new_row.append(cell) + writer.writerow(new_row) content_type = "text/plain; charset=utf-8" headers = {} @@ -208,6 +230,10 @@ class BaseView(RenderMixin): if _format == "csv": return await self.as_csv(request, name, hash, **kwargs) + if _format is None: + # HTML views default to expanding all forign key labels + kwargs['default_labels'] = True + extra_template_data = {} start = time.time() status_code = 200 diff --git a/datasette/views/database.py b/datasette/views/database.py index 56db1cdc..2f3f41d3 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -9,7 +9,7 @@ from .base import BaseView, DatasetteError class DatabaseView(BaseView): - async def data(self, request, name, hash): + async def data(self, request, name, hash, default_labels=False): if request.args.get("sql"): if not self.ds.config["allow_sql"]: raise DatasetteError("sql= is not allowed", status=400) diff --git a/datasette/views/table.py b/datasette/views/table.py index 359bc81b..d5a2cfb0 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1,3 +1,4 @@ +from collections import namedtuple import sqlite3 import urllib @@ -6,6 +7,7 @@ from sanic.exceptions import NotFound from sanic.request import RequestParameters from datasette.utils import ( + CustomRow, Filters, InterruptedError, compound_keys_after_sql, @@ -18,24 +20,45 @@ from datasette.utils import ( path_with_replaced_args, to_css_class, urlsafe_components, + value_as_boolean, ) from .base import BaseView, DatasetteError, ureg +LINK_WITH_LABEL = '{label} {id}' +LINK_WITH_VALUE = '{id}' + class RowTableShared(BaseView): - def sortable_columns_for_table(self, name, table, use_rowid): - table_metadata = self.table_metadata(name, table) + def sortable_columns_for_table(self, database, table, use_rowid): + table_metadata = self.table_metadata(database, table) if "sortable_columns" in table_metadata: sortable_columns = set(table_metadata["sortable_columns"]) else: - table_info = self.ds.inspect()[name]["tables"].get(table) or {} + table_info = self.ds.inspect()[database]["tables"].get(table) or {} sortable_columns = set(table_info.get("columns", [])) if use_rowid: sortable_columns.add("rowid") return sortable_columns + def expandable_columns(self, database, table): + # Returns list of (fk_dict, label_column-or-None) pairs for that table + tables = self.ds.inspect()[database].get("tables", {}) + table_info = tables.get(table) + if not table_info: + return [] + expandables = [] + for fk in table_info["foreign_keys"]["outgoing"]: + label_column = ( + self.table_metadata( + database, fk["other_table"] + ).get("label_column") + or tables.get(fk["other_table"], {}).get("label_column") + ) or None + expandables.append((fk, label_column)) + return expandables + async def expand_foreign_keys(self, database, table, column, values): "Returns dict mapping (column, value) -> label" labeled_fks = {} @@ -60,7 +83,10 @@ class RowTableShared(BaseView): or tables_info.get(fk["other_table"], {}).get("label_column") ) if not label_column: - return {} + return { + (fk["column"], value): str(value) + for value in values + } labeled_fks = {} sql = ''' select {other_column}, {label_column} @@ -90,7 +116,6 @@ class RowTableShared(BaseView): description, rows, link_column=False, - expand_foreign_keys=True, ): "Returns columns, rows for specified table - including fancy foreign key treatment" table_metadata = self.table_metadata(database, table) @@ -102,44 +127,12 @@ class RowTableShared(BaseView): tables = info["tables"] table_info = tables.get(table) or {} pks = table_info.get("primary_keys") or [] - - # Prefetch foreign key resolutions for later expansion: - fks = {} - labeled_fks = {} - if table_info and expand_foreign_keys: - foreign_keys = table_info["foreign_keys"]["outgoing"] - for fk in foreign_keys: - label_column = ( - # First look in metadata.json definition for this foreign key table: - self.table_metadata(database, fk["other_table"]).get("label_column") - # Fall back to label_column from .inspect() detection: - or tables.get(fk["other_table"], {}).get("label_column") - ) - if not label_column: - # No label for this FK - fks[fk["column"]] = fk["other_table"] - continue - - ids_to_lookup = set([row[fk["column"]] for row in rows]) - sql = ''' - select {other_column}, {label_column} - from {other_table} - where {other_column} in ({placeholders}) - '''.format( - other_column=escape_sqlite(fk["other_column"]), - label_column=escape_sqlite(label_column), - other_table=escape_sqlite(fk["other_table"]), - placeholders=", ".join(["?"] * len(ids_to_lookup)), - ) - try: - results = await self.ds.execute( - database, sql, list(set(ids_to_lookup)) - ) - except InterruptedError: - pass - else: - for id, value in results: - labeled_fks[(fk["column"], id)] = (fk["other_table"], value) + column_to_foreign_key_table = { + fk["column"]: fk["other_table"] + for fk in table_info.get( + "foreign_keys", {} + ).get("outgoing", None) or [] + } cell_rows = [] for row in rows: @@ -172,26 +165,22 @@ class RowTableShared(BaseView): # already shown in the link column. continue - if (column, value) in labeled_fks: - other_table, label = labeled_fks[(column, value)] - display_value = jinja2.Markup( - '{label} {id}'.format( - database=database, - table=urllib.parse.quote_plus(other_table), - link_id=urllib.parse.quote_plus(str(value)), - id=str(jinja2.escape(value)), - label=str(jinja2.escape(label)), - ) - ) - elif column in fks: - display_value = jinja2.Markup( - '{id}'.format( - database=database, - table=urllib.parse.quote_plus(fks[column]), - link_id=urllib.parse.quote_plus(str(value)), - id=str(jinja2.escape(value)), - ) + if isinstance(value, dict): + # It's an expanded foreign key - display link to other row + label = value["label"] + value = value["value"] + # The table we link to depends on the column + other_table = column_to_foreign_key_table[column] + link_template = ( + LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE ) + display_value = jinja2.Markup(link_template.format( + database=database, + table=urllib.parse.quote_plus(other_table), + link_id=urllib.parse.quote_plus(str(value)), + id=str(jinja2.escape(value)), + label=str(jinja2.escape(label)), + )) elif value is None: display_value = jinja2.Markup(" ") elif is_url(str(value).strip()): @@ -231,7 +220,7 @@ class RowTableShared(BaseView): class TableView(RowTableShared): - async def data(self, request, name, hash, table): + async def data(self, request, name, hash, table, default_labels=False): canned_query = self.ds.get_canned_query(name, table) if canned_query is not None: return await self.custom_sql( @@ -583,6 +572,54 @@ class TableView(RowTableShared): if use_rowid and filter_columns[0] == "rowid": filter_columns = filter_columns[1:] + # Expand labeled columns if requested + columns_expanded = [] + expandable_columns = self.expandable_columns(name, table) + columns_to_expand = None + try: + all_labels = value_as_boolean(special_args.get("_labels", "")) + except ValueError: + all_labels = default_labels + # Check for explicit _label= + if "_label" in request.args: + columns_to_expand = request.args["_label"] + if columns_to_expand is None and all_labels: + # expand all columns with foreign keys + columns_to_expand = [ + fk["column"] for fk, _ in expandable_columns + ] + + if columns_to_expand: + expanded_labels = {} + for fk, label_column in expandable_columns: + column = fk["column"] + if column not in columns_to_expand: + continue + columns_expanded.append(column) + # Gather the values + column_index = columns.index(column) + values = [row[column_index] for row in rows] + # Expand them + expanded_labels.update(await self.expand_foreign_keys( + name, table, column, values + )) + if expanded_labels: + # Rewrite the rows + new_rows = [] + for row in rows: + new_row = CustomRow(columns) + for column in row.keys(): + value = row[column] + if (column, value) in expanded_labels: + new_row[column] = { + 'value': value, + 'label': expanded_labels[(column, value)] + } + else: + new_row[column] = value + new_rows.append(new_row) + rows = new_rows + # Pagination next link next_value = None next_url = None @@ -681,7 +718,6 @@ class TableView(RowTableShared): results.description, rows, link_column=not is_view, - expand_foreign_keys=True, ) metadata = self.ds.metadata.get("databases", {}).get(name, {}).get( "tables", {} @@ -735,6 +771,7 @@ class TableView(RowTableShared): "truncated": results.truncated, "table_rows_count": table_rows_count, "filtered_table_rows_count": filtered_table_rows_count, + "columns_expanded": columns_expanded, "columns": columns, "primary_keys": pks, "units": units, @@ -751,7 +788,7 @@ class TableView(RowTableShared): class RowView(RowTableShared): - async def data(self, request, name, hash, table, pk_path): + async def data(self, request, name, hash, table, pk_path, default_labels=False): pk_values = urlsafe_components(pk_path) info = self.ds.inspect()[name] table_info = info["tables"].get(table) or {} @@ -781,7 +818,6 @@ class RowView(RowTableShared): results.description, rows, link_column=False, - expand_foreign_keys=True, ) for column in display_columns: column["sortable"] = False diff --git a/docs/json_api.rst b/docs/json_api.rst index 379186a1..97029bb3 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -163,6 +163,12 @@ Special table arguments The Datasette table view takes a number of special querystring arguments: +``?_labels=on/off`` + Expand foreign key references for every possible column. See below. + +``?_label=COLUMN1&_label=COLUMN2`` + Expand foreign key references for one or more specified columns. + ``?_size=1000`` or ``?_size=max`` Sets a custom page size. This cannot exceed the ``max_returned_rows`` limit passed to ``datasette serve``. Use ``max`` to get ``max_returned_rows``. @@ -193,3 +199,38 @@ The Datasette table view takes a number of special querystring arguments: ``?_next=TOKEN`` Pagination by continuation token - pass the token that was returned in the ``"next"`` property by the previous page. + +``?_labels=1`` + Indicates that you would like to expand any foreign key references. These + will be exposed in the JSON as ``{"value": 3, "label": "Monterey"}``. + +Expanding foreign key references +-------------------------------- + +Datasette can detect foreign key relationships and resolve those references into +labels. The HTML interface does this by default for every detected foreign key +column - you can turn that off using ``?_labels=off``. + +You can request foreign keys be expanded in JSON using the ``_labels=on`` or +``_label=COLUMN`` special querystring parameters. Here's what an expanded row +looks like:: + + [ + { + "rowid": 1, + "TreeID": 141565, + "qLegalStatus": { + "value": 1, + "label": "Permitted Site" + }, + "qSpecies": { + "value": 1, + "label": "Myoporum laetum :: Myoporum" + }, + "qAddress": "501X Baker St", + "SiteOrder": 1 + } + ] + +The column in the foreign key table that is used for the label can be specified +in ``metadata.json`` - see :ref:`label_columns`. diff --git a/docs/metadata.rst b/docs/metadata.rst index 6781ba83..029e2960 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -121,6 +121,7 @@ This will restrict sorting of ``example_table`` to just the ``height`` and You can also disable sorting entirely by setting ``"sortable_columns": []`` +.. _label_columns: Specifying the label column for a table --------------------------------------- diff --git a/tests/test_api.py b/tests/test_api.py index af1fe4c5..e9d1b94e 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1096,6 +1096,52 @@ def test_suggest_facets_off(): ).json["suggested_facets"] +def test_expand_labels(app_client): + response = app_client.get( + "/test_tables/facetable.json?_shape=object&_labels=1&_size=2" + "&neighborhood__contains=c" + ) + assert { + "2": { + "pk": 2, + "planet_int": 1, + "state": "CA", + "city_id": { + "value": 1, + "label": "San Francisco" + }, + "neighborhood": "Dogpatch" + }, + "13": { + "pk": 13, + "planet_int": 1, + "state": "MI", + "city_id": { + "value": 3, + "label": "Detroit" + }, + "neighborhood": "Corktown" + } + } == response.json + + +def test_expand_label(app_client): + response = app_client.get( + "/test_tables/foreign_key_references.json?_shape=object" + "&_label=foreign_key_with_label" + ) + assert { + "1": { + "pk": "1", + "foreign_key_with_label": { + "value": "1", + "label": "hello" + }, + "foreign_key_with_no_label": "1" + } + } == response.json + + @pytest.mark.parametrize('path,expected_cache_control', [ ("/test_tables/facetable.json", "max-age=31536000"), ("/test_tables/facetable.json?_ttl=invalid", "max-age=31536000"), diff --git a/tests/test_csv.py b/tests/test_csv.py index d2475f67..2353b9ce 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -11,6 +11,24 @@ hello world '''.replace('\n', '\r\n') +EXPECTED_TABLE_WITH_LABELS_CSV = ''' +pk,planet_int,state,city_id,city_id_label,neighborhood +1,1,CA,1,San Francisco,Mission +2,1,CA,1,San Francisco,Dogpatch +3,1,CA,1,San Francisco,SOMA +4,1,CA,1,San Francisco,Tenderloin +5,1,CA,1,San Francisco,Bernal Heights +6,1,CA,1,San Francisco,Hayes Valley +7,1,CA,2,Los Angeles,Hollywood +8,1,CA,2,Los Angeles,Downtown +9,1,CA,2,Los Angeles,Los Feliz +10,1,CA,2,Los Angeles,Koreatown +11,1,MI,3,Detroit,Downtown +12,1,MI,3,Detroit,Greektown +13,1,MI,3,Detroit,Corktown +14,1,MI,3,Detroit,Mexicantown +15,2,MC,4,Memnonia,Arcadia Planitia +'''.lstrip().replace('\n', '\r\n') def test_table_csv(app_client): response = app_client.get('/test_tables/simple_primary_key.csv') @@ -19,6 +37,13 @@ def test_table_csv(app_client): assert EXPECTED_TABLE_CSV == response.text +def test_table_csv_with_labels(app_client): + response = app_client.get('/test_tables/facetable.csv?_labels=1') + assert response.status == 200 + assert 'text/plain; charset=utf-8' == response.headers['Content-Type'] + assert EXPECTED_TABLE_WITH_LABELS_CSV == response.text + + def test_custom_sql_csv(app_client): response = app_client.get( '/test_tables.csv?sql=select+content+from+simple_primary_key+limit+2' diff --git a/tests/test_html.py b/tests/test_html.py index c6ac54a5..3f5815d4 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -388,6 +388,20 @@ def test_table_html_foreign_key_links(app_client): assert expected == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')] +def test_table_html_disable_foreign_key_links_with_labels(app_client): + response = app_client.get('/test_tables/foreign_key_references?_labels=off') + assert response.status == 200 + table = Soup(response.body, 'html.parser').find('table') + expected = [ + [ + '