From fd137da7f83c117b18e189707a1039e319dd5c91 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 21 Nov 2019 16:56:55 -0800 Subject: [PATCH] Suggest column facet only if at least one count > 1 Fixes #638 --- datasette/facets.py | 5 ++++- tests/fixtures.py | 33 +++++++++++++++++---------------- tests/test_api.py | 30 ++++++++++++++++++++++++++++-- tests/test_csv.py | 32 ++++++++++++++++---------------- 4 files changed, 65 insertions(+), 35 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index 0c6459d6..a314faaf 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -143,9 +143,10 @@ class ColumnFacet(Facet): if column in already_enabled: continue suggested_facet_sql = """ - select distinct {column} from ( + select {column}, count(*) as n from ( {sql} ) where {column} is not null + group by {column} limit {limit} """.format( column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 @@ -165,6 +166,8 @@ class ColumnFacet(Facet): and num_distinct_values > 1 and num_distinct_values <= facet_size and num_distinct_values < row_count + # And at least one has n > 1 + and any(r["n"] > 1 for r in distinct_values) ): suggested_facets.append( { diff --git a/tests/fixtures.py b/tests/fixtures.py index 3e4203f7..bb01d171 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -669,26 +669,27 @@ CREATE TABLE facetable ( neighborhood text, tags text, complex_array text, + distinct_some_null, FOREIGN KEY ("city_id") REFERENCES [facet_cities](id) ); INSERT INTO facetable - (created, planet_int, on_earth, state, city_id, neighborhood, tags, complex_array) + (created, planet_int, on_earth, state, city_id, neighborhood, tags, complex_array, distinct_some_null) VALUES - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]', '[{"foo": "bar"}]'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]', '[]'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]', '[]'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]', '[]'), - ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]', '[]'), - ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]', '[]'), - ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]', '[]'), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]', '[]'), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]', '[]'), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]', '[]'), - ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]', '[]') + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]', '[{"foo": "bar"}]', 'one'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]', '[]', 'two'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]', '[]', null), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]', '[]', null), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]', '[]', null), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]', '[]', null), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]', '[]', null), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]', '[]', null), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]', '[]', null), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]', '[]', null), + ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]', '[]', null), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]', '[]', null), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]', '[]', null), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]', '[]', null), + ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]', '[]', null) ; CREATE TABLE binary_data ( diff --git a/tests/test_api.py b/tests/test_api.py index 1fa8642f..34eef4ce 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -197,6 +197,7 @@ def test_database_page(app_client): "neighborhood", "tags", "complex_array", + "distinct_some_null", ], "primary_keys": ["pk"], "count": 15, @@ -1042,15 +1043,38 @@ def test_table_filter_json_arraycontains(app_client): "Mission", '["tag1", "tag2"]', '[{"foo": "bar"}]', + "one", + ], + [ + 2, + "2019-01-14 08:00:00", + 1, + 1, + "CA", + 1, + "Dogpatch", + '["tag1", "tag3"]', + "[]", + "two", ], - [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]', "[]"], ] == response.json["rows"] def test_table_filter_extra_where(app_client): response = app_client.get("/fixtures/facetable.json?_where=neighborhood='Dogpatch'") assert [ - [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]', "[]"] + [ + 2, + "2019-01-14 08:00:00", + 1, + 1, + "CA", + 1, + "Dogpatch", + '["tag1", "tag3"]', + "[]", + "two", + ] ] == response.json["rows"] @@ -1503,6 +1527,7 @@ def test_expand_labels(app_client): "neighborhood": "Dogpatch", "tags": '["tag1", "tag3"]', "complex_array": "[]", + "distinct_some_null": "two", }, "13": { "pk": 13, @@ -1514,6 +1539,7 @@ def test_expand_labels(app_client): "neighborhood": "Corktown", "tags": "[]", "complex_array": "[]", + "distinct_some_null": None, }, } == response.json diff --git a/tests/test_csv.py b/tests/test_csv.py index 13aca489..1030c2bb 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -21,22 +21,22 @@ world ) EXPECTED_TABLE_WITH_LABELS_CSV = """ -pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags,complex_array -1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]","[{""foo"": ""bar""}]" -2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]",[] -3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[],[] -4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[],[] -5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[],[] -6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[],[] -7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[],[] -8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[],[] -9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[],[] -10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[],[] -11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[],[] -12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[],[] -13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[],[] -14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[],[] -15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[],[] +pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags,complex_array,distinct_some_null +1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]","[{""foo"": ""bar""}]",one +2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]",[],two +3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[],[], +4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[],[], +5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[],[], +6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[],[], +7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[],[], +8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[],[], +9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[],[], +10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[],[], +11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[],[], +12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[],[], +13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[],[], +14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[],[], +15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[],[], """.lstrip().replace( "\n", "\r\n" )