Facet by many-to-many, closes #365

This commit is contained in:
Simon Willison 2019-05-25 09:05:52 -07:00
commit d923d84754
3 changed files with 262 additions and 4 deletions

View file

@ -61,7 +61,7 @@ def load_facet_configs(request, table_metadata):
@hookimpl
def register_facet_classes():
classes = [ColumnFacet, DateFacet]
classes = [ColumnFacet, DateFacet, ManyToManyFacet]
if detect_json1():
classes.append(ArrayFacet)
return classes
@ -477,3 +477,191 @@ class DateFacet(Facet):
facets_timed_out.append(column)
return facet_results, facets_timed_out
class ManyToManyFacet(Facet):
type = "m2m"
async def suggest(self):
# This is calculated based on foreign key relationships to this table
# Are there any many-to-many tables pointing here?
suggested_facets = []
all_foreign_keys = await self.ds.execute_against_connection_in_thread(
self.database, get_all_foreign_keys
)
if not all_foreign_keys.get(self.table):
# It's probably a view
return []
args = set(self.get_querystring_pairs())
incoming = all_foreign_keys[self.table]["incoming"]
# Do any of these incoming tables have exactly two outgoing keys?
for fk in incoming:
other_table = fk["other_table"]
other_table_outgoing_foreign_keys = all_foreign_keys[other_table][
"outgoing"
]
if len(other_table_outgoing_foreign_keys) == 2:
destination_table = [
t
for t in other_table_outgoing_foreign_keys
if t["other_table"] != self.table
][0]["other_table"]
# Only suggest if it's not selected already
if ("_facet_m2m", destination_table) in args:
continue
suggested_facets.append(
{
"name": destination_table,
"type": "m2m",
"toggle_url": self.ds.absolute_url(
self.request,
path_with_added_args(
self.request, {"_facet_m2m": destination_table}
),
),
}
)
return suggested_facets
async def facet_results(self):
facet_results = {}
facets_timed_out = []
args = set(self.get_querystring_pairs())
facet_size = self.ds.config("default_facet_size")
all_foreign_keys = await self.ds.execute_against_connection_in_thread(
self.database, get_all_foreign_keys
)
if not all_foreign_keys.get(self.table):
return [], []
# We care about three tables: self.table, middle_table and destination_table
incoming = all_foreign_keys[self.table]["incoming"]
for source_and_config in self.get_configs():
config = source_and_config["config"]
source = source_and_config["source"]
# The destination_table is specified in the _facet_m2m=xxx parameter
destination_table = config.get("column") or config["simple"]
# Find middle table - it has fks to self.table AND destination_table
fks = None
middle_table = None
for fk in incoming:
other_table = fk["other_table"]
other_table_outgoing_foreign_keys = all_foreign_keys[other_table][
"outgoing"
]
if (
any(
o
for o in other_table_outgoing_foreign_keys
if o["other_table"] == destination_table
)
and len(other_table_outgoing_foreign_keys) == 2
):
fks = other_table_outgoing_foreign_keys
middle_table = other_table
break
if middle_table is None or fks is None:
return [], []
# Now that we have determined the middle_table, we need to figure out the three
# columns on that table which are relevant to us. These are:
# column_to_table - the middle_table column with a foreign key to self.table
# table_pk - the primary key column on self.table that is referenced
# column_to_destination - the column with a foreign key to destination_table
#
# It turns out we don't actually need the fourth obvious column:
# destination_pk = the primary key column on destination_table which is referenced
#
# These are both in the fks array - which now contains 2 foreign key relationships, e.g:
# [
# {'other_table': 'characteristic', 'column': 'characteristic_id', 'other_column': 'pk'},
# {'other_table': 'attractions', 'column': 'attraction_id', 'other_column': 'pk'}
# ]
column_to_table = None
table_pk = None
column_to_destination = None
for fk in fks:
if fk["other_table"] == self.table:
table_pk = fk["other_column"]
column_to_table = fk["column"]
elif fk["other_table"] == destination_table:
column_to_destination = fk["column"]
assert all((column_to_table, table_pk, column_to_destination))
facet_sql = """
select
{middle_table}.{column_to_destination} as value,
count(distinct {middle_table}.{column_to_table}) as count
from {middle_table}
where {middle_table}.{column_to_table} in (
select {table_pk} from ({sql})
)
group by {middle_table}.{column_to_destination}
order by count desc limit {limit}
""".format(
sql=self.sql,
limit=facet_size + 1,
middle_table=escape_sqlite(middle_table),
column_to_destination=escape_sqlite(column_to_destination),
column_to_table=escape_sqlite(column_to_table),
table_pk=escape_sqlite(table_pk),
)
try:
facet_rows_results = await self.ds.execute(
self.database,
facet_sql,
self.params,
truncate=False,
custom_time_limit=self.ds.config("facet_time_limit_ms"),
)
facet_results_values = []
facet_results[destination_table] = {
"name": destination_table,
"type": self.type,
"results": facet_results_values,
"hideable": source != "metadata",
"toggle_url": path_with_removed_args(
self.request, {"_facet_m2m": destination_table}
),
"truncated": len(facet_rows_results) > facet_size,
}
facet_rows = facet_rows_results.rows[:facet_size]
# Attempt to expand foreign keys into labels
values = [row["value"] for row in facet_rows]
expanded = await self.ds.expand_foreign_keys(
self.database, middle_table, column_to_destination, values
)
for row in facet_rows:
through = json.dumps(
{
"table": middle_table,
"column": column_to_destination,
"value": str(row["value"]),
},
separators=(",", ":"),
)
selected = ("_through", through) in args
if selected:
toggle_path = path_with_removed_args(
self.request, {"_through": through}
)
else:
toggle_path = path_with_added_args(
self.request, {"_through": through}
)
facet_results_values.append(
{
"value": row["value"],
"label": expanded.get(
(column_to_destination, row["value"]), row["value"]
),
"count": row["count"],
"toggle_url": self.ds.absolute_url(
self.request, toggle_path
),
"selected": selected,
}
)
except InterruptedError:
facets_timed_out.append(destination_table)
return facet_results, facets_timed_out

View file

@ -129,6 +129,17 @@ The performance of facets can be greatly improved by adding indexes on the colum
Enter ".help" for usage hints.
sqlite> CREATE INDEX Food_Trucks_state ON Food_Trucks("state");
.. _facet_by_m2m:
Facet by many-to-many
---------------------
Datasette can detect many-to-many SQL tables - defined as SQL tables which have foreign key relationships to two other tables.
If a many-to-many table exists pointing at the table you are currently viewing, Datasette will suggest you facet the table based on that relationship.
Example here: `latest.datasette.io/fixtures/roadside_attractions?_facet_m2m=attraction_characteristic <https://latest.datasette.io/fixtures/roadside_attractions?_facet_m2m=attraction_characteristic>`__
.. _facet_by_json_array:
Facet by JSON array
@ -138,11 +149,13 @@ If your SQLite installation provides the ``json1`` extension (you can check usin
This is useful for modelling things like tags without needing to break them out into a new table.
You can try this functionality out at `latest.datasette.io/fixtures/facetable?_facet_array=tags <https://latest.datasette.io/fixtures/facetable?_facet_array=tags>`__
Example here: `latest.datasette.io/fixtures/facetable?_facet_array=tags <https://latest.datasette.io/fixtures/facetable?_facet_array=tags>`__
.. _facet_by_date:
Facet by date
-------------
If Datasette finds any columns that contain dates in the first 100 values, it will offer a faceting interface against the dates of those values. This works especially well against timestamp values such as ``2019-03-01 12:44:00``.
Demo here: `latest.datasette.io/fixtures/facetable?_facet_date=created <https://latest.datasette.io/fixtures/facetable?_facet_date=created>`__
Example here: `latest.datasette.io/fixtures/facetable?_facet_date=created <https://latest.datasette.io/fixtures/facetable?_facet_date=created>`__

View file

@ -1,4 +1,4 @@
from datasette.facets import ColumnFacet, ArrayFacet, DateFacet
from datasette.facets import ColumnFacet, ArrayFacet, DateFacet, ManyToManyFacet
from datasette.utils import detect_json1
from .fixtures import app_client # noqa
from .utils import MockRequest
@ -303,3 +303,60 @@ async def test_date_facet_results(app_client):
"truncated": False,
}
} == buckets
@pytest.mark.asyncio
async def test_m2m_facet_suggest(app_client):
facet = ManyToManyFacet(
app_client.ds,
MockRequest("http://localhost/"),
database="fixtures",
sql="select * from roadside_attractions",
table="roadside_attractions",
)
suggestions = await facet.suggest()
assert [
{
"name": "attraction_characteristic",
"type": "m2m",
"toggle_url": "http://localhost/?_facet_m2m=attraction_characteristic",
}
] == suggestions
@pytest.mark.asyncio
async def test_m2m_facet_results(app_client):
facet = ManyToManyFacet(
app_client.ds,
MockRequest("http://localhost/?_facet_m2m=attraction_characteristic"),
database="fixtures",
sql="select * from roadside_attractions",
table="roadside_attractions",
)
buckets, timed_out = await facet.facet_results()
assert [] == timed_out
assert {
"attraction_characteristic": {
"name": "attraction_characteristic",
"type": "m2m",
"results": [
{
"value": 2,
"label": "Paranormal",
"count": 3,
"toggle_url": "http://localhost/?_facet_m2m=attraction_characteristic&_through=%7B%22table%22%3A%22roadside_attraction_characteristics%22%2C%22column%22%3A%22characteristic_id%22%2C%22value%22%3A%222%22%7D",
"selected": False,
},
{
"value": 1,
"label": "Museum",
"count": 2,
"toggle_url": "http://localhost/?_facet_m2m=attraction_characteristic&_through=%7B%22table%22%3A%22roadside_attraction_characteristics%22%2C%22column%22%3A%22characteristic_id%22%2C%22value%22%3A%221%22%7D",
"selected": False,
},
],
"hideable": True,
"toggle_url": "/",
"truncated": False,
}
} == buckets