From 967230c90e7467d4adb5df5a5732ece0247cf536 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 20 May 2019 23:09:22 -0700 Subject: [PATCH] Facet by date, closes #481 --- datasette/facets.py | 116 ++++++++++++++++++++++++++++++++++++++++++- docs/facets.rst | 9 +++- tests/fixtures.py | 33 ++++++------ tests/test_api.py | 13 +++-- tests/test_csv.py | 32 ++++++------ tests/test_facets.py | 68 ++++++++++++++++++++++++- 6 files changed, 231 insertions(+), 40 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index 90f4fca9..0b07d0d4 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -30,7 +30,6 @@ def load_facet_configs(request, table_metadata): type = "column" metadata_config = {"simple": metadata_config} else: - # This should have a single key and a single value assert ( len(metadata_config.values()) == 1 ), "Metadata config dicts should be {type: config}" @@ -62,7 +61,7 @@ def load_facet_configs(request, table_metadata): @hookimpl def register_facet_classes(): - classes = [ColumnFacet] + classes = [ColumnFacet, DateFacet] if detect_json1(): classes.append(ArrayFacet) return classes @@ -365,3 +364,116 @@ class ArrayFacet(Facet): facets_timed_out.append(column) return facet_results, facets_timed_out + + +class DateFacet(Facet): + type = "date" + + async def suggest(self): + columns = await self.get_columns(self.sql, self.params) + already_enabled = [c["config"]["simple"] for c in self.get_configs()] + suggested_facets = [] + for column in columns: + if column in already_enabled: + continue + # Does this column contain any dates in the first 100 rows? + suggested_facet_sql = """ + select date({column}) from ( + {sql} + ) where {column} glob "????-??-*" limit 100; + """.format( + column=escape_sqlite(column), sql=self.sql + ) + try: + results = await self.ds.execute( + self.database, + suggested_facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), + log_sql_errors=False, + ) + values = tuple(r[0] for r in results.rows) + if any(values): + suggested_facets.append( + { + "name": column, + "type": "date", + "toggle_url": self.ds.absolute_url( + self.request, + path_with_added_args( + self.request, {"_facet_date": column} + ), + ), + } + ) + except (InterruptedError, sqlite3.OperationalError): + continue + return suggested_facets + + async def facet_results(self): + facet_results = {} + facets_timed_out = [] + args = dict(self.get_querystring_pairs()) + facet_size = self.ds.config("default_facet_size") + for source_and_config in self.get_configs(): + config = source_and_config["config"] + source = source_and_config["source"] + column = config.get("column") or config["simple"] + # TODO: does this query break if inner sql produces value or count columns? + facet_sql = """ + select date({col}) as value, count(*) as count from ( + {sql} + ) + where date({col}) is not null + group by date({col}) order by count desc limit {limit} + """.format( + col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + ) + try: + facet_rows_results = await self.ds.execute( + self.database, + facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.config("facet_time_limit_ms"), + ) + facet_results_values = [] + facet_results[column] = { + "name": column, + "type": self.type, + "results": facet_results_values, + "hideable": source != "metadata", + "toggle_url": path_with_removed_args( + self.request, {"_facet_date": column} + ), + "truncated": len(facet_rows_results) > facet_size, + } + facet_rows = facet_rows_results.rows[:facet_size] + for row in facet_rows: + selected = str(args.get("{}__date".format(column))) == str( + row["value"] + ) + if selected: + toggle_path = path_with_removed_args( + self.request, {"{}__date".format(column): str(row["value"])} + ) + else: + toggle_path = path_with_added_args( + self.request, {"{}__date".format(column): row["value"]} + ) + facet_results_values.append( + { + "value": row["value"], + "label": row["value"], + "count": row["count"], + "toggle_url": self.ds.absolute_url( + self.request, toggle_path + ), + "selected": selected, + } + ) + except InterruptedError: + facets_timed_out.append(column) + + return facet_results, facets_timed_out diff --git a/docs/facets.rst b/docs/facets.rst index c3a7e50b..4e03a4fe 100644 --- a/docs/facets.rst +++ b/docs/facets.rst @@ -138,4 +138,11 @@ If your SQLite installation provides the ``json1`` extension (you can check usin This is useful for modelling things like tags without needing to break them out into a new table. -You can try this functionality out at `latest.datasette.io/fixtures/facetable?_facet_array=tags `__ \ No newline at end of file +You can try this functionality out at `latest.datasette.io/fixtures/facetable?_facet_array=tags `__ + +Facet by date +------------- + +If Datasette finds any columns that contain dates in the first 100 values, it will offer a faceting interface against the dates of those values. This works especially well against timestamp values such as ``2019-03-01 12:44:00``. + +Demo here: `latest.datasette.io/fixtures/facetable?_facet_date=created `__ diff --git a/tests/fixtures.py b/tests/fixtures.py index 7b9c09f1..315e306a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -518,6 +518,7 @@ INSERT INTO facet_cities (id, name) VALUES CREATE TABLE facetable ( pk integer primary key, + created text, planet_int integer, on_earth integer, state text, @@ -527,23 +528,23 @@ CREATE TABLE facetable ( FOREIGN KEY ("city_id") REFERENCES [facet_cities](id) ); INSERT INTO facetable - (planet_int, on_earth, state, city_id, neighborhood, tags) + (created, planet_int, on_earth, state, city_id, neighborhood, tags) VALUES - (1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]'), - (1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]'), - (1, 1, 'CA', 1, 'SOMA', '[]'), - (1, 1, 'CA', 1, 'Tenderloin', '[]'), - (1, 1, 'CA', 1, 'Bernal Heights', '[]'), - (1, 1, 'CA', 1, 'Hayes Valley', '[]'), - (1, 1, 'CA', 2, 'Hollywood', '[]'), - (1, 1, 'CA', 2, 'Downtown', '[]'), - (1, 1, 'CA', 2, 'Los Feliz', '[]'), - (1, 1, 'CA', 2, 'Koreatown', '[]'), - (1, 1, 'MI', 3, 'Downtown', '[]'), - (1, 1, 'MI', 3, 'Greektown', '[]'), - (1, 1, 'MI', 3, 'Corktown', '[]'), - (1, 1, 'MI', 3, 'Mexicantown', '[]'), - (2, 0, 'MC', 4, 'Arcadia Planitia', '[]') + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]'), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]'), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]'), + ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]'), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]'), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]'), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]'), + ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]') ; CREATE TABLE binary_data ( diff --git a/tests/test_api.py b/tests/test_api.py index 6fedc118..339cecde 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -167,6 +167,7 @@ def test_database_page(app_client): { "columns": [ "pk", + "created", "planet_int", "on_earth", "state", @@ -955,14 +956,16 @@ def test_table_filter_queries_multiple_of_same_type(app_client): def test_table_filter_json_arraycontains(app_client): response = app_client.get("/fixtures/facetable.json?tags__arraycontains=tag1") assert [ - [1, 1, 1, "CA", 1, "Mission", '["tag1", "tag2"]'], - [2, 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'], + [1, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Mission", '["tag1", "tag2"]'], + [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'], ] == response.json["rows"] def test_table_filter_extra_where(app_client): response = app_client.get("/fixtures/facetable.json?_where=neighborhood='Dogpatch'") - assert [[2, 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]']] == response.json["rows"] + assert [ + [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'] + ] == response.json["rows"] def test_table_filter_extra_where_invalid(app_client): @@ -1331,12 +1334,14 @@ def test_suggested_facets(app_client): ] ] expected = [ + {"name": "created", "querystring": "_facet=created"}, {"name": "planet_int", "querystring": "_facet=planet_int"}, {"name": "on_earth", "querystring": "_facet=on_earth"}, {"name": "state", "querystring": "_facet=state"}, {"name": "city_id", "querystring": "_facet=city_id"}, {"name": "neighborhood", "querystring": "_facet=neighborhood"}, {"name": "tags", "querystring": "_facet=tags"}, + {"name": "created", "querystring": "_facet_date=created"}, ] if detect_json1(): expected.append({"name": "tags", "querystring": "_facet_array=tags"}) @@ -1364,6 +1369,7 @@ def test_expand_labels(app_client): assert { "2": { "pk": 2, + "created": "2019-01-14 08:00:00", "planet_int": 1, "on_earth": 1, "state": "CA", @@ -1373,6 +1379,7 @@ def test_expand_labels(app_client): }, "13": { "pk": 13, + "created": "2019-01-17 08:00:00", "planet_int": 1, "on_earth": 1, "state": "MI", diff --git a/tests/test_csv.py b/tests/test_csv.py index cc3c908f..cf0e6732 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -21,22 +21,22 @@ world ) EXPECTED_TABLE_WITH_LABELS_CSV = """ -pk,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags -1,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]" -2,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]" -3,1,1,CA,1,San Francisco,SOMA,[] -4,1,1,CA,1,San Francisco,Tenderloin,[] -5,1,1,CA,1,San Francisco,Bernal Heights,[] -6,1,1,CA,1,San Francisco,Hayes Valley,[] -7,1,1,CA,2,Los Angeles,Hollywood,[] -8,1,1,CA,2,Los Angeles,Downtown,[] -9,1,1,CA,2,Los Angeles,Los Feliz,[] -10,1,1,CA,2,Los Angeles,Koreatown,[] -11,1,1,MI,3,Detroit,Downtown,[] -12,1,1,MI,3,Detroit,Greektown,[] -13,1,1,MI,3,Detroit,Corktown,[] -14,1,1,MI,3,Detroit,Mexicantown,[] -15,2,0,MC,4,Memnonia,Arcadia Planitia,[] +pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags +1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]" +2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]" +3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[] +4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[] +5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[] +6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[] +7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[] +8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[] +9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[] +10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[] +11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[] +12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[] +13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[] +14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[] +15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[] """.lstrip().replace( "\n", "\r\n" ) diff --git a/tests/test_facets.py b/tests/test_facets.py index 280194f5..1efc9c63 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -1,4 +1,4 @@ -from datasette.facets import ColumnFacet, ArrayFacet +from datasette.facets import ColumnFacet, ArrayFacet, DateFacet from datasette.utils import detect_json1 from .fixtures import app_client # noqa from .utils import MockRequest @@ -17,6 +17,7 @@ async def test_column_facet_suggest(app_client): ) suggestions = await facet.suggest() assert [ + {"name": "created", "toggle_url": "http://localhost/?_facet=created"}, {"name": "planet_int", "toggle_url": "http://localhost/?_facet=planet_int"}, {"name": "on_earth", "toggle_url": "http://localhost/?_facet=on_earth"}, {"name": "state", "toggle_url": "http://localhost/?_facet=state"}, @@ -37,6 +38,10 @@ async def test_column_facet_suggest_skip_if_already_selected(app_client): ) suggestions = await facet.suggest() assert [ + { + "name": "created", + "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=created", + }, { "name": "state", "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=state", @@ -67,7 +72,14 @@ async def test_column_facet_suggest_skip_if_enabled_by_metadata(app_client): metadata={"facets": ["city_id"]}, ) suggestions = [s["name"] for s in await facet.suggest()] - assert ["planet_int", "on_earth", "state", "neighborhood", "tags"] == suggestions + assert [ + "created", + "planet_int", + "on_earth", + "state", + "neighborhood", + "tags", + ] == suggestions @pytest.mark.asyncio @@ -239,3 +251,55 @@ async def test_array_facet_results(app_client): "truncated": False, } } == buckets + + +@pytest.mark.asyncio +async def test_date_facet_results(app_client): + facet = DateFacet( + app_client.ds, + MockRequest("http://localhost/?_facet_date=created"), + database="fixtures", + sql="select * from facetable", + table="facetable", + ) + buckets, timed_out = await facet.facet_results() + assert [] == timed_out + assert { + "created": { + "name": "created", + "type": "date", + "results": [ + { + "value": "2019-01-14", + "label": "2019-01-14", + "count": 4, + "toggle_url": "http://localhost/?_facet_date=created&created__date=2019-01-14", + "selected": False, + }, + { + "value": "2019-01-15", + "label": "2019-01-15", + "count": 4, + "toggle_url": "http://localhost/?_facet_date=created&created__date=2019-01-15", + "selected": False, + }, + { + "value": "2019-01-17", + "label": "2019-01-17", + "count": 4, + "toggle_url": "http://localhost/?_facet_date=created&created__date=2019-01-17", + "selected": False, + }, + { + "value": "2019-01-16", + "label": "2019-01-16", + "count": 3, + "toggle_url": "http://localhost/?_facet_date=created&created__date=2019-01-16", + "selected": False, + }, + ], + "hideable": True, + "toggle_url": "/", + "truncated": False, + } + } == buckets