diff --git a/datasette/facets.py b/datasette/facets.py index 365d9c65..9b5baaa2 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -257,6 +257,16 @@ class ColumnFacet(Facet): class ArrayFacet(Facet): type = "array" + def _is_json_array_of_strings(self, json_string): + try: + array = json.loads(json_string) + except ValueError: + return False + for item in array: + if not isinstance(item, str): + return False + return True + async def suggest(self): columns = await self.get_columns(self.sql, self.params) suggested_facets = [] @@ -282,18 +292,30 @@ class ArrayFacet(Facet): ) types = tuple(r[0] for r in results.rows) if types in (("array",), ("array", None)): - suggested_facets.append( - { - "name": column, - "type": "array", - "toggle_url": self.ds.absolute_url( - self.request, - path_with_added_args( - self.request, {"_facet_array": column} - ), - ), - } + # Now sanity check that first 100 arrays contain only strings + first_100 = await self.ds.execute( + self.database, + "select {column} from ({sql}) where {column} is not null".format( + column=escape_sqlite(column), sql=self.sql + ), + self.params, + truncate=False, + custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), + log_sql_errors=False, ) + if all(self._is_json_array_of_strings(r[0]) for r in first_100): + suggested_facets.append( + { + "name": column, + "type": "array", + "toggle_url": self.ds.absolute_url( + self.request, + path_with_added_args( + self.request, {"_facet_array": column} + ), + ), + } + ) except (QueryInterrupted, sqlite3.OperationalError): continue return suggested_facets diff --git a/tests/fixtures.py b/tests/fixtures.py index a4c32f36..93c3da9f 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -661,26 +661,27 @@ CREATE TABLE facetable ( city_id integer, neighborhood text, tags text, + complex_array text, FOREIGN KEY ("city_id") REFERENCES [facet_cities](id) ); INSERT INTO facetable - (created, planet_int, on_earth, state, city_id, neighborhood, tags) + (created, planet_int, on_earth, state, city_id, neighborhood, tags, complex_array) VALUES - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]'), - ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]'), - ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]'), - ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]'), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]'), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]'), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]'), - ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]') + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]', '[{"foo": "bar"}]'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]', '[]'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]', '[]'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]', '[]'), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]', '[]'), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]', '[]'), + ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]', '[]'), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]', '[]'), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]', '[]'), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]', '[]'), + ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]', '[]') ; CREATE TABLE binary_data ( diff --git a/tests/test_api.py b/tests/test_api.py index 4ea95e84..41557bcf 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -195,6 +195,7 @@ def test_database_page(app_client): "city_id", "neighborhood", "tags", + "complex_array", ], "primary_keys": ["pk"], "count": 15, @@ -1029,15 +1030,25 @@ def test_table_filter_queries_multiple_of_same_type(app_client): def test_table_filter_json_arraycontains(app_client): response = app_client.get("/fixtures/facetable.json?tags__arraycontains=tag1") assert [ - [1, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Mission", '["tag1", "tag2"]'], - [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'], + [ + 1, + "2019-01-14 08:00:00", + 1, + 1, + "CA", + 1, + "Mission", + '["tag1", "tag2"]', + '[{"foo": "bar"}]', + ], + [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]', "[]"], ] == response.json["rows"] def test_table_filter_extra_where(app_client): response = app_client.get("/fixtures/facetable.json?_where=neighborhood='Dogpatch'") assert [ - [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'] + [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]', "[]"] ] == response.json["rows"] @@ -1453,6 +1464,7 @@ def test_suggested_facets(app_client): {"name": "city_id", "querystring": "_facet=city_id"}, {"name": "neighborhood", "querystring": "_facet=neighborhood"}, {"name": "tags", "querystring": "_facet=tags"}, + {"name": "complex_array", "querystring": "_facet=complex_array"}, {"name": "created", "querystring": "_facet_date=created"}, ] if detect_json1(): @@ -1488,6 +1500,7 @@ def test_expand_labels(app_client): "city_id": {"value": 1, "label": "San Francisco"}, "neighborhood": "Dogpatch", "tags": '["tag1", "tag3"]', + "complex_array": "[]", }, "13": { "pk": 13, @@ -1498,6 +1511,7 @@ def test_expand_labels(app_client): "city_id": {"value": 3, "label": "Detroit"}, "neighborhood": "Corktown", "tags": "[]", + "complex_array": "[]", }, } == response.json diff --git a/tests/test_csv.py b/tests/test_csv.py index 1d5d2df2..b148b6db 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -21,22 +21,22 @@ world ) EXPECTED_TABLE_WITH_LABELS_CSV = """ -pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags -1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]" -2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]" -3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[] -4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[] -5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[] -6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[] -7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[] -8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[] -9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[] -10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[] -11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[] -12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[] -13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[] -14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[] -15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[] +pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags,complex_array +1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]","[{""foo"": ""bar""}]" +2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]",[] +3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[],[] +4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[],[] +5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[],[] +6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[],[] +7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[],[] +8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[],[] +9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[],[] +10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[],[] +11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[],[] +12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[],[] +13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[],[] +14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[],[] +15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[],[] """.lstrip().replace( "\n", "\r\n" ) diff --git a/tests/test_facets.py b/tests/test_facets.py index 9169f666..402c155b 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -23,6 +23,10 @@ async def test_column_facet_suggest(app_client): {"name": "city_id", "toggle_url": "http://localhost/?_facet=city_id"}, {"name": "neighborhood", "toggle_url": "http://localhost/?_facet=neighborhood"}, {"name": "tags", "toggle_url": "http://localhost/?_facet=tags"}, + { + "name": "complex_array", + "toggle_url": "http://localhost/?_facet=complex_array", + }, ] == suggestions @@ -57,6 +61,10 @@ async def test_column_facet_suggest_skip_if_already_selected(app_client): "name": "tags", "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=tags", }, + { + "name": "complex_array", + "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=complex_array", + }, ] == suggestions @@ -78,6 +86,7 @@ async def test_column_facet_suggest_skip_if_enabled_by_metadata(app_client): "state", "neighborhood", "tags", + "complex_array", ] == suggestions