From d923d847545e829bf946bb9170bebfc7c3f9d993 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 25 May 2019 09:05:52 -0700 Subject: [PATCH] Facet by many-to-many, closes #365 --- datasette/facets.py | 190 ++++++++++++++++++++++++++++++++++++++++++- docs/facets.rst | 17 +++- tests/test_facets.py | 59 +++++++++++++- 3 files changed, 262 insertions(+), 4 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index 0b07d0d4..97ddfa07 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -61,7 +61,7 @@ def load_facet_configs(request, table_metadata): @hookimpl def register_facet_classes(): - classes = [ColumnFacet, DateFacet] + classes = [ColumnFacet, DateFacet, ManyToManyFacet] if detect_json1(): classes.append(ArrayFacet) return classes @@ -477,3 +477,191 @@ class DateFacet(Facet): facets_timed_out.append(column) return facet_results, facets_timed_out + + +class ManyToManyFacet(Facet): + type = "m2m" + + async def suggest(self): + # This is calculated based on foreign key relationships to this table + # Are there any many-to-many tables pointing here? + suggested_facets = [] + all_foreign_keys = await self.ds.execute_against_connection_in_thread( + self.database, get_all_foreign_keys + ) + if not all_foreign_keys.get(self.table): + # It's probably a view + return [] + args = set(self.get_querystring_pairs()) + incoming = all_foreign_keys[self.table]["incoming"] + # Do any of these incoming tables have exactly two outgoing keys? + for fk in incoming: + other_table = fk["other_table"] + other_table_outgoing_foreign_keys = all_foreign_keys[other_table][ + "outgoing" + ] + if len(other_table_outgoing_foreign_keys) == 2: + destination_table = [ + t + for t in other_table_outgoing_foreign_keys + if t["other_table"] != self.table + ][0]["other_table"] + # Only suggest if it's not selected already + if ("_facet_m2m", destination_table) in args: + continue + suggested_facets.append( + { + "name": destination_table, + "type": "m2m", + "toggle_url": self.ds.absolute_url( + self.request, + path_with_added_args( + self.request, {"_facet_m2m": destination_table} + ), + ), + } + ) + return suggested_facets + + async def facet_results(self): + facet_results = {} + facets_timed_out = [] + args = set(self.get_querystring_pairs()) + facet_size = self.ds.config("default_facet_size") + all_foreign_keys = await self.ds.execute_against_connection_in_thread( + self.database, get_all_foreign_keys + ) + if not all_foreign_keys.get(self.table): + return [], [] + # We care about three tables: self.table, middle_table and destination_table + incoming = all_foreign_keys[self.table]["incoming"] + for source_and_config in self.get_configs(): + config = source_and_config["config"] + source = source_and_config["source"] + # The destination_table is specified in the _facet_m2m=xxx parameter + destination_table = config.get("column") or config["simple"] + # Find middle table - it has fks to self.table AND destination_table + fks = None + middle_table = None + for fk in incoming: + other_table = fk["other_table"] + other_table_outgoing_foreign_keys = all_foreign_keys[other_table][ + "outgoing" + ] + if ( + any( + o + for o in other_table_outgoing_foreign_keys + if o["other_table"] == destination_table + ) + and len(other_table_outgoing_foreign_keys) == 2 + ): + fks = other_table_outgoing_foreign_keys + middle_table = other_table + break + if middle_table is None or fks is None: + return [], [] + # Now that we have determined the middle_table, we need to figure out the three + # columns on that table which are relevant to us. These are: + # column_to_table - the middle_table column with a foreign key to self.table + # table_pk - the primary key column on self.table that is referenced + # column_to_destination - the column with a foreign key to destination_table + # + # It turns out we don't actually need the fourth obvious column: + # destination_pk = the primary key column on destination_table which is referenced + # + # These are both in the fks array - which now contains 2 foreign key relationships, e.g: + # [ + # {'other_table': 'characteristic', 'column': 'characteristic_id', 'other_column': 'pk'}, + # {'other_table': 'attractions', 'column': 'attraction_id', 'other_column': 'pk'} + # ] + column_to_table = None + table_pk = None + column_to_destination = None + for fk in fks: + if fk["other_table"] == self.table: + table_pk = fk["other_column"] + column_to_table = fk["column"] + elif fk["other_table"] == destination_table: + column_to_destination = fk["column"] + assert all((column_to_table, table_pk, column_to_destination)) + facet_sql = """ + select + {middle_table}.{column_to_destination} as value, + count(distinct {middle_table}.{column_to_table}) as count + from {middle_table} + where {middle_table}.{column_to_table} in ( + select {table_pk} from ({sql}) + ) + group by {middle_table}.{column_to_destination} + order by count desc limit {limit} + """.format( + sql=self.sql, + limit=facet_size + 1, + middle_table=escape_sqlite(middle_table), + column_to_destination=escape_sqlite(column_to_destination), + column_to_table=escape_sqlite(column_to_table), + table_pk=escape_sqlite(table_pk), + ) + try: + facet_rows_results = await self.ds.execute( + self.database, + facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.config("facet_time_limit_ms"), + ) + facet_results_values = [] + facet_results[destination_table] = { + "name": destination_table, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": path_with_removed_args( + self.request, {"_facet_m2m": destination_table} + ), + "truncated": len(facet_rows_results) > facet_size, + } + facet_rows = facet_rows_results.rows[:facet_size] + + # Attempt to expand foreign keys into labels + values = [row["value"] for row in facet_rows] + expanded = await self.ds.expand_foreign_keys( + self.database, middle_table, column_to_destination, values + ) + + for row in facet_rows: + through = json.dumps( + { + "table": middle_table, + "column": column_to_destination, + "value": str(row["value"]), + }, + separators=(",", ":"), + ) + selected = ("_through", through) in args + if selected: + toggle_path = path_with_removed_args( + self.request, {"_through": through} + ) + else: + toggle_path = path_with_added_args( + self.request, {"_through": through} + ) + facet_results_values.append( + { + "value": row["value"], + "label": expanded.get( + (column_to_destination, row["value"]), row["value"] + ), + "count": row["count"], + "toggle_url": self.ds.absolute_url( + self.request, toggle_path + ), + "selected": selected, + } + ) + except InterruptedError: + facets_timed_out.append(destination_table) + + return facet_results, facets_timed_out diff --git a/docs/facets.rst b/docs/facets.rst index 4e03a4fe..ddf69cb4 100644 --- a/docs/facets.rst +++ b/docs/facets.rst @@ -129,6 +129,17 @@ The performance of facets can be greatly improved by adding indexes on the colum Enter ".help" for usage hints. sqlite> CREATE INDEX Food_Trucks_state ON Food_Trucks("state"); +.. _facet_by_m2m: + +Facet by many-to-many +--------------------- + +Datasette can detect many-to-many SQL tables - defined as SQL tables which have foreign key relationships to two other tables. + +If a many-to-many table exists pointing at the table you are currently viewing, Datasette will suggest you facet the table based on that relationship. + +Example here: `latest.datasette.io/fixtures/roadside_attractions?_facet_m2m=attraction_characteristic `__ + .. _facet_by_json_array: Facet by JSON array @@ -138,11 +149,13 @@ If your SQLite installation provides the ``json1`` extension (you can check usin This is useful for modelling things like tags without needing to break them out into a new table. -You can try this functionality out at `latest.datasette.io/fixtures/facetable?_facet_array=tags `__ +Example here: `latest.datasette.io/fixtures/facetable?_facet_array=tags `__ + +.. _facet_by_date: Facet by date ------------- If Datasette finds any columns that contain dates in the first 100 values, it will offer a faceting interface against the dates of those values. This works especially well against timestamp values such as ``2019-03-01 12:44:00``. -Demo here: `latest.datasette.io/fixtures/facetable?_facet_date=created `__ +Example here: `latest.datasette.io/fixtures/facetable?_facet_date=created `__ diff --git a/tests/test_facets.py b/tests/test_facets.py index 1efc9c63..d2387d5c 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -1,4 +1,4 @@ -from datasette.facets import ColumnFacet, ArrayFacet, DateFacet +from datasette.facets import ColumnFacet, ArrayFacet, DateFacet, ManyToManyFacet from datasette.utils import detect_json1 from .fixtures import app_client # noqa from .utils import MockRequest @@ -303,3 +303,60 @@ async def test_date_facet_results(app_client): "truncated": False, } } == buckets + + +@pytest.mark.asyncio +async def test_m2m_facet_suggest(app_client): + facet = ManyToManyFacet( + app_client.ds, + MockRequest("http://localhost/"), + database="fixtures", + sql="select * from roadside_attractions", + table="roadside_attractions", + ) + suggestions = await facet.suggest() + assert [ + { + "name": "attraction_characteristic", + "type": "m2m", + "toggle_url": "http://localhost/?_facet_m2m=attraction_characteristic", + } + ] == suggestions + + +@pytest.mark.asyncio +async def test_m2m_facet_results(app_client): + facet = ManyToManyFacet( + app_client.ds, + MockRequest("http://localhost/?_facet_m2m=attraction_characteristic"), + database="fixtures", + sql="select * from roadside_attractions", + table="roadside_attractions", + ) + buckets, timed_out = await facet.facet_results() + assert [] == timed_out + assert { + "attraction_characteristic": { + "name": "attraction_characteristic", + "type": "m2m", + "results": [ + { + "value": 2, + "label": "Paranormal", + "count": 3, + "toggle_url": "http://localhost/?_facet_m2m=attraction_characteristic&_through=%7B%22table%22%3A%22roadside_attraction_characteristics%22%2C%22column%22%3A%22characteristic_id%22%2C%22value%22%3A%222%22%7D", + "selected": False, + }, + { + "value": 1, + "label": "Museum", + "count": 2, + "toggle_url": "http://localhost/?_facet_m2m=attraction_characteristic&_through=%7B%22table%22%3A%22roadside_attraction_characteristics%22%2C%22column%22%3A%22characteristic_id%22%2C%22value%22%3A%221%22%7D", + "selected": False, + }, + ], + "hideable": True, + "toggle_url": "/", + "truncated": False, + } + } == buckets