Only suggest array facet for arrays of strings - closes #562

This commit is contained in:
Simon Willison 2019-11-01 12:37:46 -07:00
commit 0dde00e7bb
5 changed files with 92 additions and 46 deletions

View file

@ -257,6 +257,16 @@ class ColumnFacet(Facet):
class ArrayFacet(Facet): class ArrayFacet(Facet):
type = "array" type = "array"
def _is_json_array_of_strings(self, json_string):
try:
array = json.loads(json_string)
except ValueError:
return False
for item in array:
if not isinstance(item, str):
return False
return True
async def suggest(self): async def suggest(self):
columns = await self.get_columns(self.sql, self.params) columns = await self.get_columns(self.sql, self.params)
suggested_facets = [] suggested_facets = []
@ -282,18 +292,30 @@ class ArrayFacet(Facet):
) )
types = tuple(r[0] for r in results.rows) types = tuple(r[0] for r in results.rows)
if types in (("array",), ("array", None)): if types in (("array",), ("array", None)):
suggested_facets.append( # Now sanity check that first 100 arrays contain only strings
{ first_100 = await self.ds.execute(
"name": column, self.database,
"type": "array", "select {column} from ({sql}) where {column} is not null".format(
"toggle_url": self.ds.absolute_url( column=escape_sqlite(column), sql=self.sql
self.request, ),
path_with_added_args( self.params,
self.request, {"_facet_array": column} truncate=False,
), custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"),
), log_sql_errors=False,
}
) )
if all(self._is_json_array_of_strings(r[0]) for r in first_100):
suggested_facets.append(
{
"name": column,
"type": "array",
"toggle_url": self.ds.absolute_url(
self.request,
path_with_added_args(
self.request, {"_facet_array": column}
),
),
}
)
except (QueryInterrupted, sqlite3.OperationalError): except (QueryInterrupted, sqlite3.OperationalError):
continue continue
return suggested_facets return suggested_facets

View file

@ -661,26 +661,27 @@ CREATE TABLE facetable (
city_id integer, city_id integer,
neighborhood text, neighborhood text,
tags text, tags text,
complex_array text,
FOREIGN KEY ("city_id") REFERENCES [facet_cities](id) FOREIGN KEY ("city_id") REFERENCES [facet_cities](id)
); );
INSERT INTO facetable INSERT INTO facetable
(created, planet_int, on_earth, state, city_id, neighborhood, tags) (created, planet_int, on_earth, state, city_id, neighborhood, tags, complex_array)
VALUES VALUES
("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]'), ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]', '[{"foo": "bar"}]'),
("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]'), ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]', '[]'),
("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]'), ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]', '[]'),
("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]'), ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]', '[]'),
("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]'), ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]', '[]'),
("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]'), ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]', '[]'),
("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]'), ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]', '[]'),
("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]'), ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]', '[]'),
("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]'), ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]', '[]'),
("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]'), ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]', '[]'),
("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]'), ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]', '[]'),
("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]'), ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]', '[]'),
("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]'), ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]', '[]'),
("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]'), ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]', '[]'),
("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]') ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]', '[]')
; ;
CREATE TABLE binary_data ( CREATE TABLE binary_data (

View file

@ -195,6 +195,7 @@ def test_database_page(app_client):
"city_id", "city_id",
"neighborhood", "neighborhood",
"tags", "tags",
"complex_array",
], ],
"primary_keys": ["pk"], "primary_keys": ["pk"],
"count": 15, "count": 15,
@ -1029,15 +1030,25 @@ def test_table_filter_queries_multiple_of_same_type(app_client):
def test_table_filter_json_arraycontains(app_client): def test_table_filter_json_arraycontains(app_client):
response = app_client.get("/fixtures/facetable.json?tags__arraycontains=tag1") response = app_client.get("/fixtures/facetable.json?tags__arraycontains=tag1")
assert [ assert [
[1, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Mission", '["tag1", "tag2"]'], [
[2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'], 1,
"2019-01-14 08:00:00",
1,
1,
"CA",
1,
"Mission",
'["tag1", "tag2"]',
'[{"foo": "bar"}]',
],
[2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]', "[]"],
] == response.json["rows"] ] == response.json["rows"]
def test_table_filter_extra_where(app_client): def test_table_filter_extra_where(app_client):
response = app_client.get("/fixtures/facetable.json?_where=neighborhood='Dogpatch'") response = app_client.get("/fixtures/facetable.json?_where=neighborhood='Dogpatch'")
assert [ assert [
[2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'] [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]', "[]"]
] == response.json["rows"] ] == response.json["rows"]
@ -1453,6 +1464,7 @@ def test_suggested_facets(app_client):
{"name": "city_id", "querystring": "_facet=city_id"}, {"name": "city_id", "querystring": "_facet=city_id"},
{"name": "neighborhood", "querystring": "_facet=neighborhood"}, {"name": "neighborhood", "querystring": "_facet=neighborhood"},
{"name": "tags", "querystring": "_facet=tags"}, {"name": "tags", "querystring": "_facet=tags"},
{"name": "complex_array", "querystring": "_facet=complex_array"},
{"name": "created", "querystring": "_facet_date=created"}, {"name": "created", "querystring": "_facet_date=created"},
] ]
if detect_json1(): if detect_json1():
@ -1488,6 +1500,7 @@ def test_expand_labels(app_client):
"city_id": {"value": 1, "label": "San Francisco"}, "city_id": {"value": 1, "label": "San Francisco"},
"neighborhood": "Dogpatch", "neighborhood": "Dogpatch",
"tags": '["tag1", "tag3"]', "tags": '["tag1", "tag3"]',
"complex_array": "[]",
}, },
"13": { "13": {
"pk": 13, "pk": 13,
@ -1498,6 +1511,7 @@ def test_expand_labels(app_client):
"city_id": {"value": 3, "label": "Detroit"}, "city_id": {"value": 3, "label": "Detroit"},
"neighborhood": "Corktown", "neighborhood": "Corktown",
"tags": "[]", "tags": "[]",
"complex_array": "[]",
}, },
} == response.json } == response.json

View file

@ -21,22 +21,22 @@ world
) )
EXPECTED_TABLE_WITH_LABELS_CSV = """ EXPECTED_TABLE_WITH_LABELS_CSV = """
pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags,complex_array
1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]" 1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]","[{""foo"": ""bar""}]"
2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]" 2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]",[]
3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[] 3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[],[]
4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[] 4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[],[]
5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[] 5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[],[]
6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[] 6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[],[]
7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[] 7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[],[]
8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[] 8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[],[]
9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[] 9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[],[]
10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[] 10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[],[]
11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[] 11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[],[]
12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[] 12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[],[]
13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[] 13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[],[]
14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[] 14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[],[]
15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[] 15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[],[]
""".lstrip().replace( """.lstrip().replace(
"\n", "\r\n" "\n", "\r\n"
) )

View file

@ -23,6 +23,10 @@ async def test_column_facet_suggest(app_client):
{"name": "city_id", "toggle_url": "http://localhost/?_facet=city_id"}, {"name": "city_id", "toggle_url": "http://localhost/?_facet=city_id"},
{"name": "neighborhood", "toggle_url": "http://localhost/?_facet=neighborhood"}, {"name": "neighborhood", "toggle_url": "http://localhost/?_facet=neighborhood"},
{"name": "tags", "toggle_url": "http://localhost/?_facet=tags"}, {"name": "tags", "toggle_url": "http://localhost/?_facet=tags"},
{
"name": "complex_array",
"toggle_url": "http://localhost/?_facet=complex_array",
},
] == suggestions ] == suggestions
@ -57,6 +61,10 @@ async def test_column_facet_suggest_skip_if_already_selected(app_client):
"name": "tags", "name": "tags",
"toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=tags", "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=tags",
}, },
{
"name": "complex_array",
"toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=complex_array",
},
] == suggestions ] == suggestions
@ -78,6 +86,7 @@ async def test_column_facet_suggest_skip_if_enabled_by_metadata(app_client):
"state", "state",
"neighborhood", "neighborhood",
"tags", "tags",
"complex_array",
] == suggestions ] == suggestions