WIP refactoring facets to plugin, refs #427

This commit is contained in:
Simon Willison 2019-04-13 13:03:59 -07:00
commit 538d91c44a
4 changed files with 199 additions and 97 deletions

171
datasette/facets.py Normal file
View file

@ -0,0 +1,171 @@
from sanic.request import RequestParameters
import urllib
from datasette import hookimpl
from datasette.utils import (
escape_sqlite,
path_with_added_args,
path_with_removed_args,
detect_json1
)
@hookimpl
def register_facet_classes():
return [ColumnFacet]
# classes = [ColumnFacet, ManyToManyFacet]
# if detect_json1():
# classes.append(ArrayFacet)
# return classes
class Facet:
type = None
def __init__(self, ds, request, database, table, configs):
self.ds = ds
self.request = request
self.database = database
self.table = table # can be None
self.configs = configs
async def suggest(self, sql, params):
raise NotImplementedError
async def facet_results(self, sql, params):
# returns ([results], [timed_out])
raise NotImplementedError
class ColumnFacet(Facet):
# This is the default so type=""
type = ""
async def suggest(self, sql, params, filtered_table_rows_count):
# Detect column names
columns = (
await self.ds.execute(
self.database, "select * from ({}) limit 0".format(sql),
params
)
).columns
facet_size = self.ds.config("default_facet_size")
suggested_facets = []
for column in columns:
suggested_facet_sql = '''
select distinct {column} from (
{sql}
) where {column} is not null
limit {limit}
'''.format(
column=escape_sqlite(column),
sql=sql,
limit=facet_size+1
)
distinct_values = None
try:
distinct_values = await self.ds.execute(
self.database, suggested_facet_sql, params,
truncate=False,
custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"),
)
num_distinct_values = len(distinct_values)
if (
num_distinct_values and
num_distinct_values > 1 and
num_distinct_values <= facet_size and
num_distinct_values < filtered_table_rows_count
):
suggested_facets.append({
'name': column,
'toggle_url': self.ds.absolute_url(
self.request, path_with_added_args(
self.request, {"_facet": column}
)
),
})
except InterruptedError:
pass
return suggested_facets
async def facet_results(self, sql, params):
# self.configs should be a plain list of columns
facet_results = {}
facets_timed_out = []
# TODO: refactor this
args = RequestParameters(
urllib.parse.parse_qs(self.request.query_string, keep_blank_values=True)
)
other_args = {}
for key, value in args.items():
if key.startswith("_") and "__" not in key:
pass
else:
other_args[key] = value[0]
facet_size = self.ds.config("default_facet_size")
for column in self.configs:
facet_sql = """
select {col} as value, count(*) as count from (
{sql}
)
where {col} is not null
group by {col} order by count desc limit {limit}
""".format(
col=escape_sqlite(column),
sql=sql,
limit=facet_size+1,
)
try:
facet_rows_results = await self.ds.execute(
self.database, facet_sql, params,
truncate=False,
custom_time_limit=self.ds.config("facet_time_limit_ms"),
)
facet_results_values = []
facet_results[column] = {
"name": column,
"results": facet_results_values,
"truncated": len(facet_rows_results) > facet_size,
}
facet_rows = facet_rows_results.rows[:facet_size]
if self.table:
# Attempt to expand foreign keys into labels
values = [row["value"] for row in facet_rows]
expanded = (await self.ds.expand_foreign_keys(
self.database, self.table, column, values
))
else:
expanded = {}
for row in facet_rows:
selected = str(other_args.get(column)) == str(row["value"])
if selected:
toggle_path = path_with_removed_args(
self.request, {column: str(row["value"])}
)
else:
toggle_path = path_with_added_args(
self.request, {column: row["value"]}
)
facet_results_values.append({
"value": row["value"],
"label": expanded.get(
(column, row["value"]),
row["value"]
),
"count": row["count"],
"toggle_url": self.ds.absolute_url(self.request, toggle_path),
"selected": selected,
})
except InterruptedError:
facets_timed_out.append(column)
return facet_results, facets_timed_out
class ManyToManyFacet(Facet):
type = "m2m"
class ArrayFacet(Facet):
type = "array"

View file

@ -38,3 +38,8 @@ def publish_subcommand(publish):
@hookspec(firstresult=True)
def render_cell(value, column, table, database, datasette):
"Customize rendering of HTML table cell values"
@hookspec
def register_facet_classes():
"Register Facet subclasses"

View file

@ -5,6 +5,7 @@ from . import hookspecs
DEFAULT_PLUGINS = (
"datasette.publish.heroku",
"datasette.publish.now",
"datasette.facets",
)
pm = pluggy.PluginManager("datasette")

View file

@ -1,4 +1,5 @@
import urllib
import itertools
import jinja2
from sanic.exceptions import NotFound
@ -478,7 +479,7 @@ class TableView(RowTableShared):
)
# facets support
facet_size = self.ds.config("default_facet_size")
# pylint: disable=no-member
metadata_facets = table_metadata.get("facets", [])
facets = metadata_facets[:]
if request.args.get("_facet") and not self.ds.config("allow_facet"):
@ -487,61 +488,21 @@ class TableView(RowTableShared):
facets.extend(request.args["_facet"])
except KeyError:
pass
facet_classes = list(
itertools.chain.from_iterable(pm.hook.register_facet_classes())
)
facet_results = {}
facets_timed_out = []
for column in facets:
if _next:
continue
facet_sql = """
select {col} as value, count(*) as count
{from_sql} {and_or_where} {col} is not null
group by {col} order by count desc limit {limit}
""".format(
col=escape_sqlite(column),
from_sql=from_sql,
and_or_where='and' if from_sql_where_clauses else 'where',
limit=facet_size+1,
facet_instances = []
for klass in facet_classes:
facet_instances.append(klass(self.ds, request, database, table, configs=facets))
for facet in facet_instances:
instance_facet_results, instance_facets_timed_out = await facet.facet_results(
sql, params,
)
try:
facet_rows_results = await self.ds.execute(
database, facet_sql, params,
truncate=False,
custom_time_limit=self.ds.config("facet_time_limit_ms"),
)
facet_results_values = []
facet_results[column] = {
"name": column,
"results": facet_results_values,
"truncated": len(facet_rows_results) > facet_size,
}
facet_rows = facet_rows_results.rows[:facet_size]
# Attempt to expand foreign keys into labels
values = [row["value"] for row in facet_rows]
expanded = (await self.ds.expand_foreign_keys(
database, table, column, values
))
for row in facet_rows:
selected = str(other_args.get(column)) == str(row["value"])
if selected:
toggle_path = path_with_removed_args(
request, {column: str(row["value"])}
)
else:
toggle_path = path_with_added_args(
request, {column: row["value"]}
)
facet_results_values.append({
"value": row["value"],
"label": expanded.get(
(column, row["value"]),
row["value"]
),
"count": row["count"],
"toggle_url": self.ds.absolute_url(request, toggle_path),
"selected": selected,
})
except InterruptedError:
facets_timed_out.append(column)
facet_results.update(instance_facet_results)
facets_timed_out.extend(instance_facets_timed_out)
columns = [r[0] for r in results.description]
rows = list(results.rows)
@ -637,50 +598,14 @@ class TableView(RowTableShared):
except InterruptedError:
pass
# Detect suggested facets
suggested_facets = []
if self.ds.config("suggest_facets") and self.ds.config("allow_facet"):
for facet_column in columns:
if facet_column in facets:
continue
if _next:
continue
if not self.ds.config("suggest_facets"):
continue
suggested_facet_sql = '''
select distinct {column} {from_sql}
{and_or_where} {column} is not null
limit {limit}
'''.format(
column=escape_sqlite(facet_column),
from_sql=from_sql,
and_or_where='and' if from_sql_where_clauses else 'where',
limit=facet_size+1
)
distinct_values = None
try:
distinct_values = await self.ds.execute(
database, suggested_facet_sql, from_sql_params,
truncate=False,
custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"),
)
num_distinct_values = len(distinct_values)
if (
num_distinct_values and
num_distinct_values > 1 and
num_distinct_values <= facet_size and
num_distinct_values < filtered_table_rows_count
):
suggested_facets.append({
'name': facet_column,
'toggle_url': self.ds.absolute_url(
request, path_with_added_args(
request, {"_facet": facet_column}
)
),
})
except InterruptedError:
pass
# Detect suggested facets
suggested_facets = []
if self.ds.config("suggest_facets") and self.ds.config("allow_facet") and not _next:
for facet in facet_instances:
# TODO: ensure facet is not suggested if it is already active
# used to use 'if facet_column in facets' for this
suggested_facets.extend(await facet.suggest(sql, params, filtered_table_rows_count))
# human_description_en combines filters AND search, if provided
human_description_en = filters.human_description_en(extra=search_descriptions)