Extract facet code out into a new plugin hook, closes #427 (#445)

Datasette previously only supported one type of faceting: exact column value counting.

With this change, faceting logic is extracted out into one or more separate classes which can implement other patterns of faceting - this is discussed in #427, but potential upcoming facet types include facet-by-date, facet-by-JSON-array, facet-by-many-2-many and more.

A new plugin hook, register_facet_classes, can be used by plugins to add in additional facet classes.

Each class must implement two methods: suggest(), which scans columns in the table to decide if they might be worth suggesting for faceting, and facet_results(), which executes the facet operation and returns results ready to be displayed in the UI.
This commit is contained in:
Simon Willison 2019-05-02 17:11:26 -07:00 committed by GitHub
commit ea66c45df9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 600 additions and 132 deletions

View file

@ -1,9 +1,11 @@
import urllib
import itertools
import jinja2
from sanic.exceptions import NotFound
from sanic.request import RequestParameters
from datasette.facets import load_facet_configs
from datasette.plugins import pm
from datasette.utils import (
CustomRow,
@ -344,9 +346,8 @@ class TableView(RowTableShared):
"where {} ".format(" and ".join(where_clauses))
) if where_clauses else "",
)
# Store current params and where_clauses for later:
# Copy of params so we can mutate them later:
from_sql_params = dict(**params)
from_sql_where_clauses = where_clauses[:]
count_sql = "select count(*) {}".format(from_sql)
@ -458,11 +459,14 @@ class TableView(RowTableShared):
else:
page_size = self.ds.page_size
sql = "select {select} from {table_name} {where}{order_by}limit {limit}{offset}".format(
sql_no_limit = "select {select} from {table_name} {where}{order_by}".format(
select=select,
table_name=escape_sqlite(table),
where=where_clause,
order_by=order_by,
)
sql = "{sql_no_limit} limit {limit}{offset}".format(
sql_no_limit=sql_no_limit.rstrip(),
limit=page_size + 1,
offset=offset,
)
@ -474,72 +478,46 @@ class TableView(RowTableShared):
database, sql, params, truncate=True, **extra_args
)
# Number of filtered rows in whole set:
filtered_table_rows_count = None
if count_sql:
try:
count_rows = list(await self.ds.execute(
database, count_sql, from_sql_params
))
filtered_table_rows_count = count_rows[0][0]
except InterruptedError:
pass
# facets support
facet_size = self.ds.config("default_facet_size")
metadata_facets = table_metadata.get("facets", [])
facets = metadata_facets[:]
if request.args.get("_facet") and not self.ds.config("allow_facet"):
if not self.ds.config("allow_facet") and any(arg.startswith("_facet") for arg in request.args):
raise DatasetteError("_facet= is not allowed", status=400)
try:
facets.extend(request.args["_facet"])
except KeyError:
pass
# pylint: disable=no-member
facet_classes = list(
itertools.chain.from_iterable(pm.hook.register_facet_classes())
)
facet_results = {}
facets_timed_out = []
for column in facets:
if _next:
continue
facet_sql = """
select {col} as value, count(*) as count
{from_sql} {and_or_where} {col} is not null
group by {col} order by count desc limit {limit}
""".format(
col=escape_sqlite(column),
from_sql=from_sql,
and_or_where='and' if from_sql_where_clauses else 'where',
limit=facet_size+1,
)
try:
facet_rows_results = await self.ds.execute(
database, facet_sql, params,
truncate=False,
custom_time_limit=self.ds.config("facet_time_limit_ms"),
)
facet_results_values = []
facet_results[column] = {
"name": column,
"results": facet_results_values,
"truncated": len(facet_rows_results) > facet_size,
}
facet_rows = facet_rows_results.rows[:facet_size]
# Attempt to expand foreign keys into labels
values = [row["value"] for row in facet_rows]
expanded = (await self.ds.expand_foreign_keys(
database, table, column, values
))
for row in facet_rows:
selected = (column, str(row["value"])) in other_args
if selected:
toggle_path = path_with_removed_args(
request, {column: str(row["value"])}
)
else:
toggle_path = path_with_added_args(
request, {column: row["value"]}
)
facet_results_values.append({
"value": row["value"],
"label": expanded.get(
(column, row["value"]),
row["value"]
),
"count": row["count"],
"toggle_url": self.ds.absolute_url(request, toggle_path),
"selected": selected,
})
except InterruptedError:
facets_timed_out.append(column)
facet_instances = []
for klass in facet_classes:
facet_instances.append(klass(
self.ds,
request,
database,
sql=sql_no_limit,
params=params,
table=table,
metadata=table_metadata,
row_count=filtered_table_rows_count,
))
for facet in facet_instances:
instance_facet_results, instance_facets_timed_out = await facet.facet_results()
facet_results.update(instance_facet_results)
facets_timed_out.extend(instance_facets_timed_out)
# Figure out columns and rows for the query
columns = [r[0] for r in results.description]
rows = list(results.rows)
@ -623,61 +601,14 @@ class TableView(RowTableShared):
)
rows = rows[:page_size]
# Number of filtered rows in whole set:
filtered_table_rows_count = None
if count_sql:
try:
count_rows = list(await self.ds.execute(
database, count_sql, from_sql_params
))
filtered_table_rows_count = count_rows[0][0]
except InterruptedError:
pass
# Detect suggested facets
suggested_facets = []
# Detect suggested facets
suggested_facets = []
if self.ds.config("suggest_facets") and self.ds.config("allow_facet"):
for facet_column in columns:
if facet_column in facets:
continue
if _next:
continue
if not self.ds.config("suggest_facets"):
continue
suggested_facet_sql = '''
select distinct {column} {from_sql}
{and_or_where} {column} is not null
limit {limit}
'''.format(
column=escape_sqlite(facet_column),
from_sql=from_sql,
and_or_where='and' if from_sql_where_clauses else 'where',
limit=facet_size+1
)
distinct_values = None
try:
distinct_values = await self.ds.execute(
database, suggested_facet_sql, from_sql_params,
truncate=False,
custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"),
)
num_distinct_values = len(distinct_values)
if (
num_distinct_values and
num_distinct_values > 1 and
num_distinct_values <= facet_size and
num_distinct_values < filtered_table_rows_count
):
suggested_facets.append({
'name': facet_column,
'toggle_url': self.ds.absolute_url(
request, path_with_added_args(
request, {"_facet": facet_column}
)
),
})
except InterruptedError:
pass
if self.ds.config("suggest_facets") and self.ds.config("allow_facet") and not _next:
for facet in facet_instances:
# TODO: ensure facet is not suggested if it is already active
# used to use 'if facet_column in facets' for this
suggested_facets.extend(await facet.suggest())
# human_description_en combines filters AND search, if provided
human_description_en = filters.human_description_en(extra=search_descriptions)
@ -725,7 +656,6 @@ class TableView(RowTableShared):
),
"extra_wheres_for_ui": extra_wheres_for_ui,
"form_hidden_args": form_hidden_args,
"facet_hideable": lambda facet: facet not in metadata_facets,
"is_sortable": any(c["sortable"] for c in display_columns),
"path_with_replaced_args": path_with_replaced_args,
"path_with_removed_args": path_with_removed_args,