From 53d2f00b732488e0204332da181fc256aa00f41c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 2 May 2019 20:21:27 -0400 Subject: [PATCH] Implemented ArrayFacet, closes #359 --- datasette/facets.py | 110 ++++++++++++++++++++++++++++++++++++++++++- tests/test_api.py | 10 +++- tests/test_facets.py | 69 ++++++++++++++++++++++++++- 3 files changed, 185 insertions(+), 4 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index 9e3b3044..73d959c5 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -62,7 +62,10 @@ def load_facet_configs(request, table_metadata): @hookimpl def register_facet_classes(): - return [ColumnFacet] + classes = [ColumnFacet] + if detect_json1(): + classes.append(ArrayFacet) + return classes class Facet: @@ -249,3 +252,108 @@ class ColumnFacet(Facet): facets_timed_out.append(column) return facet_results, facets_timed_out + + +class ArrayFacet(Facet): + type = "array" + + async def suggest(self): + columns = await self.get_columns(self.sql, self.params) + suggested_facets = [] + already_enabled = [c["config"]["simple"] for c in self.get_configs()] + for column in columns: + if column in already_enabled: + continue + # Is every value in this column either null or a JSON array? + suggested_facet_sql = """ + select distinct json_type({column}) + from ({sql}) + """.format( + column=escape_sqlite(column), + sql=self.sql, + ) + try: + results = await self.ds.execute( + self.database, suggested_facet_sql, self.params, + truncate=False, + custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), + log_sql_errors=False, + ) + types = tuple(r[0] for r in results.rows) + if types in ( + ("array",), + ("array", None) + ): + suggested_facets.append({ + "name": column, + "type": "array", + "toggle_url": self.ds.absolute_url( + self.request, path_with_added_args( + self.request, {"_facet_array": column} + ) + ), + }) + except (InterruptedError, sqlite3.OperationalError): + continue + return suggested_facets + + async def facet_results(self): + # self.configs should be a plain list of columns + facet_results = {} + facets_timed_out = [] + + facet_size = self.ds.config("default_facet_size") + for source_and_config in self.get_configs(): + config = source_and_config["config"] + print(config) + source = source_and_config["source"] + column = config.get("column") or config["simple"] + facet_sql = """ + select j.value as value, count(*) as count from ( + {sql} + ) join json_each({col}) j + group by j.value order by count desc limit {limit} + """.format( + col=escape_sqlite(column), + sql=self.sql, + limit=facet_size+1, + ) + try: + facet_rows_results = await self.ds.execute( + self.database, facet_sql, self.params, + truncate=False, + custom_time_limit=self.ds.config("facet_time_limit_ms"), + ) + facet_results_values = [] + facet_results[column] = { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": path_with_removed_args(self.request, {"_facet_array": column}), + "truncated": len(facet_rows_results) > facet_size, + } + facet_rows = facet_rows_results.rows[:facet_size] + pairs = self.get_querystring_pairs() + for row in facet_rows: + value = str(row["value"]) + selected = ("{}__arraycontains".format(column), value) in pairs + if selected: + toggle_path = path_with_removed_args( + self.request, {"{}__arraycontains".format(column): value} + ) + else: + toggle_path = path_with_added_args( + self.request, {"{}__arraycontains".format(column): value} + ) + facet_results_values.append({ + "value": value, + "label": value, + "count": row["count"], + "toggle_url": self.ds.absolute_url(self.request, toggle_path), + "selected": selected, + }) + except InterruptedError: + facets_timed_out.append(column) + + return facet_results, facets_timed_out diff --git a/tests/test_api.py b/tests/test_api.py index 132f6958..c1687c01 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1300,14 +1300,20 @@ def test_suggested_facets(app_client): } for suggestion in app_client.get( "/fixtures/facetable.json" ).json["suggested_facets"]] - assert [ + expected = [ {"name": "planet_int", "querystring": "_facet=planet_int"}, {"name": "on_earth", "querystring": "_facet=on_earth"}, {"name": "state", "querystring": "_facet=state"}, {"name": "city_id", "querystring": "_facet=city_id"}, {"name": "neighborhood", "querystring": "_facet=neighborhood"}, {"name": "tags", "querystring": "_facet=tags"} - ] == suggestions + ] + if detect_json1(): + expected.append({ + "name": "tags", + "querystring": "_facet_array=tags" + }) + assert expected == suggestions def test_allow_facet_off(): diff --git a/tests/test_facets.py b/tests/test_facets.py index 4a8e3619..280194f5 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -1,4 +1,5 @@ -from datasette.facets import ColumnFacet +from datasette.facets import ColumnFacet, ArrayFacet +from datasette.utils import detect_json1 from .fixtures import app_client # noqa from .utils import MockRequest from collections import namedtuple @@ -172,3 +173,69 @@ async def test_column_facet_from_metadata_cannot_be_hidden(app_client): "truncated": False, } } == buckets + + +@pytest.mark.asyncio +@pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") +async def test_array_facet_suggest(app_client): + facet = ArrayFacet( + app_client.ds, + MockRequest("http://localhost/"), + database="fixtures", + sql="select * from facetable", + table="facetable", + ) + suggestions = await facet.suggest() + assert [ + { + "name": "tags", + "type": "array", + "toggle_url": "http://localhost/?_facet_array=tags", + } + ] == suggestions + + +@pytest.mark.asyncio +@pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") +async def test_array_facet_results(app_client): + facet = ArrayFacet( + app_client.ds, + MockRequest("http://localhost/?_facet_array=tags"), + database="fixtures", + sql="select * from facetable", + table="facetable", + ) + buckets, timed_out = await facet.facet_results() + assert [] == timed_out + assert { + "tags": { + "name": "tags", + "type": "array", + "results": [ + { + "value": "tag1", + "label": "tag1", + "count": 2, + "toggle_url": "http://localhost/?_facet_array=tags&tags__arraycontains=tag1", + "selected": False, + }, + { + "value": "tag2", + "label": "tag2", + "count": 1, + "toggle_url": "http://localhost/?_facet_array=tags&tags__arraycontains=tag2", + "selected": False, + }, + { + "value": "tag3", + "label": "tag3", + "count": 1, + "toggle_url": "http://localhost/?_facet_array=tags&tags__arraycontains=tag3", + "selected": False, + }, + ], + "hideable": True, + "toggle_url": "/", + "truncated": False, + } + } == buckets