diff --git a/datasette/facets.py b/datasette/facets.py new file mode 100644 index 00000000..a1467d4d --- /dev/null +++ b/datasette/facets.py @@ -0,0 +1,171 @@ +from sanic.request import RequestParameters +import urllib +from datasette import hookimpl +from datasette.utils import ( + escape_sqlite, + path_with_added_args, + path_with_removed_args, + detect_json1 +) + + +@hookimpl +def register_facet_classes(): + return [ColumnFacet] + # classes = [ColumnFacet, ManyToManyFacet] + # if detect_json1(): + # classes.append(ArrayFacet) + # return classes + + +class Facet: + type = None + + def __init__(self, ds, request, database, table, configs): + self.ds = ds + self.request = request + self.database = database + self.table = table # can be None + self.configs = configs + + async def suggest(self, sql, params): + raise NotImplementedError + + async def facet_results(self, sql, params): + # returns ([results], [timed_out]) + raise NotImplementedError + + +class ColumnFacet(Facet): + # This is the default so type="" + type = "" + + async def suggest(self, sql, params, filtered_table_rows_count): + # Detect column names + columns = ( + await self.ds.execute( + self.database, "select * from ({}) limit 0".format(sql), + params + ) + ).columns + facet_size = self.ds.config("default_facet_size") + suggested_facets = [] + for column in columns: + suggested_facet_sql = ''' + select distinct {column} from ( + {sql} + ) where {column} is not null + limit {limit} + '''.format( + column=escape_sqlite(column), + sql=sql, + limit=facet_size+1 + ) + distinct_values = None + try: + distinct_values = await self.ds.execute( + self.database, suggested_facet_sql, params, + truncate=False, + custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), + ) + num_distinct_values = len(distinct_values) + if ( + num_distinct_values and + num_distinct_values > 1 and + num_distinct_values <= facet_size and + num_distinct_values < filtered_table_rows_count + ): + suggested_facets.append({ + 'name': column, + 'toggle_url': self.ds.absolute_url( + self.request, path_with_added_args( + self.request, {"_facet": column} + ) + ), + }) + except InterruptedError: + pass + return suggested_facets + + async def facet_results(self, sql, params): + # self.configs should be a plain list of columns + facet_results = {} + facets_timed_out = [] + + # TODO: refactor this + args = RequestParameters( + urllib.parse.parse_qs(self.request.query_string, keep_blank_values=True) + ) + other_args = {} + for key, value in args.items(): + if key.startswith("_") and "__" not in key: + pass + else: + other_args[key] = value[0] + + facet_size = self.ds.config("default_facet_size") + for column in self.configs: + facet_sql = """ + select {col} as value, count(*) as count from ( + {sql} + ) + where {col} is not null + group by {col} order by count desc limit {limit} + """.format( + col=escape_sqlite(column), + sql=sql, + limit=facet_size+1, + ) + try: + facet_rows_results = await self.ds.execute( + self.database, facet_sql, params, + truncate=False, + custom_time_limit=self.ds.config("facet_time_limit_ms"), + ) + facet_results_values = [] + facet_results[column] = { + "name": column, + "results": facet_results_values, + "truncated": len(facet_rows_results) > facet_size, + } + facet_rows = facet_rows_results.rows[:facet_size] + if self.table: + # Attempt to expand foreign keys into labels + values = [row["value"] for row in facet_rows] + expanded = (await self.ds.expand_foreign_keys( + self.database, self.table, column, values + )) + else: + expanded = {} + for row in facet_rows: + selected = str(other_args.get(column)) == str(row["value"]) + if selected: + toggle_path = path_with_removed_args( + self.request, {column: str(row["value"])} + ) + else: + toggle_path = path_with_added_args( + self.request, {column: row["value"]} + ) + facet_results_values.append({ + "value": row["value"], + "label": expanded.get( + (column, row["value"]), + row["value"] + ), + "count": row["count"], + "toggle_url": self.ds.absolute_url(self.request, toggle_path), + "selected": selected, + }) + except InterruptedError: + facets_timed_out.append(column) + + return facet_results, facets_timed_out + + +class ManyToManyFacet(Facet): + type = "m2m" + + +class ArrayFacet(Facet): + type = "array" diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 6db95344..d244ba70 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -38,3 +38,8 @@ def publish_subcommand(publish): @hookspec(firstresult=True) def render_cell(value, column, table, database, datasette): "Customize rendering of HTML table cell values" + + +@hookspec +def register_facet_classes(): + "Register Facet subclasses" diff --git a/datasette/plugins.py b/datasette/plugins.py index 2d2c62e4..245df6b3 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -5,6 +5,7 @@ from . import hookspecs DEFAULT_PLUGINS = ( "datasette.publish.heroku", "datasette.publish.now", + "datasette.facets", ) pm = pluggy.PluginManager("datasette") diff --git a/datasette/views/table.py b/datasette/views/table.py index 5923ac92..3a26d247 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1,4 +1,5 @@ import urllib +import itertools import jinja2 from sanic.exceptions import NotFound @@ -478,7 +479,7 @@ class TableView(RowTableShared): ) # facets support - facet_size = self.ds.config("default_facet_size") + # pylint: disable=no-member metadata_facets = table_metadata.get("facets", []) facets = metadata_facets[:] if request.args.get("_facet") and not self.ds.config("allow_facet"): @@ -487,61 +488,21 @@ class TableView(RowTableShared): facets.extend(request.args["_facet"]) except KeyError: pass + facet_classes = list( + itertools.chain.from_iterable(pm.hook.register_facet_classes()) + ) facet_results = {} facets_timed_out = [] - for column in facets: - if _next: - continue - facet_sql = """ - select {col} as value, count(*) as count - {from_sql} {and_or_where} {col} is not null - group by {col} order by count desc limit {limit} - """.format( - col=escape_sqlite(column), - from_sql=from_sql, - and_or_where='and' if from_sql_where_clauses else 'where', - limit=facet_size+1, + facet_instances = [] + for klass in facet_classes: + facet_instances.append(klass(self.ds, request, database, table, configs=facets)) + + for facet in facet_instances: + instance_facet_results, instance_facets_timed_out = await facet.facet_results( + sql, params, ) - try: - facet_rows_results = await self.ds.execute( - database, facet_sql, params, - truncate=False, - custom_time_limit=self.ds.config("facet_time_limit_ms"), - ) - facet_results_values = [] - facet_results[column] = { - "name": column, - "results": facet_results_values, - "truncated": len(facet_rows_results) > facet_size, - } - facet_rows = facet_rows_results.rows[:facet_size] - # Attempt to expand foreign keys into labels - values = [row["value"] for row in facet_rows] - expanded = (await self.ds.expand_foreign_keys( - database, table, column, values - )) - for row in facet_rows: - selected = str(other_args.get(column)) == str(row["value"]) - if selected: - toggle_path = path_with_removed_args( - request, {column: str(row["value"])} - ) - else: - toggle_path = path_with_added_args( - request, {column: row["value"]} - ) - facet_results_values.append({ - "value": row["value"], - "label": expanded.get( - (column, row["value"]), - row["value"] - ), - "count": row["count"], - "toggle_url": self.ds.absolute_url(request, toggle_path), - "selected": selected, - }) - except InterruptedError: - facets_timed_out.append(column) + facet_results.update(instance_facet_results) + facets_timed_out.extend(instance_facets_timed_out) columns = [r[0] for r in results.description] rows = list(results.rows) @@ -637,50 +598,14 @@ class TableView(RowTableShared): except InterruptedError: pass - # Detect suggested facets - suggested_facets = [] - if self.ds.config("suggest_facets") and self.ds.config("allow_facet"): - for facet_column in columns: - if facet_column in facets: - continue - if _next: - continue - if not self.ds.config("suggest_facets"): - continue - suggested_facet_sql = ''' - select distinct {column} {from_sql} - {and_or_where} {column} is not null - limit {limit} - '''.format( - column=escape_sqlite(facet_column), - from_sql=from_sql, - and_or_where='and' if from_sql_where_clauses else 'where', - limit=facet_size+1 - ) - distinct_values = None - try: - distinct_values = await self.ds.execute( - database, suggested_facet_sql, from_sql_params, - truncate=False, - custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), - ) - num_distinct_values = len(distinct_values) - if ( - num_distinct_values and - num_distinct_values > 1 and - num_distinct_values <= facet_size and - num_distinct_values < filtered_table_rows_count - ): - suggested_facets.append({ - 'name': facet_column, - 'toggle_url': self.ds.absolute_url( - request, path_with_added_args( - request, {"_facet": facet_column} - ) - ), - }) - except InterruptedError: - pass + # Detect suggested facets + suggested_facets = [] + + if self.ds.config("suggest_facets") and self.ds.config("allow_facet") and not _next: + for facet in facet_instances: + # TODO: ensure facet is not suggested if it is already active + # used to use 'if facet_column in facets' for this + suggested_facets.extend(await facet.suggest(sql, params, filtered_table_rows_count)) # human_description_en combines filters AND search, if provided human_description_en = filters.human_description_en(extra=search_descriptions)