Facet by date, closes #481

This commit is contained in:
Simon Willison 2019-05-20 23:09:22 -07:00
commit 967230c90e
6 changed files with 231 additions and 40 deletions

View file

@ -30,7 +30,6 @@ def load_facet_configs(request, table_metadata):
type = "column"
metadata_config = {"simple": metadata_config}
else:
# This should have a single key and a single value
assert (
len(metadata_config.values()) == 1
), "Metadata config dicts should be {type: config}"
@ -62,7 +61,7 @@ def load_facet_configs(request, table_metadata):
@hookimpl
def register_facet_classes():
classes = [ColumnFacet]
classes = [ColumnFacet, DateFacet]
if detect_json1():
classes.append(ArrayFacet)
return classes
@ -365,3 +364,116 @@ class ArrayFacet(Facet):
facets_timed_out.append(column)
return facet_results, facets_timed_out
class DateFacet(Facet):
type = "date"
async def suggest(self):
columns = await self.get_columns(self.sql, self.params)
already_enabled = [c["config"]["simple"] for c in self.get_configs()]
suggested_facets = []
for column in columns:
if column in already_enabled:
continue
# Does this column contain any dates in the first 100 rows?
suggested_facet_sql = """
select date({column}) from (
{sql}
) where {column} glob "????-??-*" limit 100;
""".format(
column=escape_sqlite(column), sql=self.sql
)
try:
results = await self.ds.execute(
self.database,
suggested_facet_sql,
self.params,
truncate=False,
custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"),
log_sql_errors=False,
)
values = tuple(r[0] for r in results.rows)
if any(values):
suggested_facets.append(
{
"name": column,
"type": "date",
"toggle_url": self.ds.absolute_url(
self.request,
path_with_added_args(
self.request, {"_facet_date": column}
),
),
}
)
except (InterruptedError, sqlite3.OperationalError):
continue
return suggested_facets
async def facet_results(self):
facet_results = {}
facets_timed_out = []
args = dict(self.get_querystring_pairs())
facet_size = self.ds.config("default_facet_size")
for source_and_config in self.get_configs():
config = source_and_config["config"]
source = source_and_config["source"]
column = config.get("column") or config["simple"]
# TODO: does this query break if inner sql produces value or count columns?
facet_sql = """
select date({col}) as value, count(*) as count from (
{sql}
)
where date({col}) is not null
group by date({col}) order by count desc limit {limit}
""".format(
col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
)
try:
facet_rows_results = await self.ds.execute(
self.database,
facet_sql,
self.params,
truncate=False,
custom_time_limit=self.ds.config("facet_time_limit_ms"),
)
facet_results_values = []
facet_results[column] = {
"name": column,
"type": self.type,
"results": facet_results_values,
"hideable": source != "metadata",
"toggle_url": path_with_removed_args(
self.request, {"_facet_date": column}
),
"truncated": len(facet_rows_results) > facet_size,
}
facet_rows = facet_rows_results.rows[:facet_size]
for row in facet_rows:
selected = str(args.get("{}__date".format(column))) == str(
row["value"]
)
if selected:
toggle_path = path_with_removed_args(
self.request, {"{}__date".format(column): str(row["value"])}
)
else:
toggle_path = path_with_added_args(
self.request, {"{}__date".format(column): row["value"]}
)
facet_results_values.append(
{
"value": row["value"],
"label": row["value"],
"count": row["count"],
"toggle_url": self.ds.absolute_url(
self.request, toggle_path
),
"selected": selected,
}
)
except InterruptedError:
facets_timed_out.append(column)
return facet_results, facets_timed_out

View file

@ -138,4 +138,11 @@ If your SQLite installation provides the ``json1`` extension (you can check usin
This is useful for modelling things like tags without needing to break them out into a new table.
You can try this functionality out at `latest.datasette.io/fixtures/facetable?_facet_array=tags <https://latest.datasette.io/fixtures/facetable?_facet_array=tags>`__
You can try this functionality out at `latest.datasette.io/fixtures/facetable?_facet_array=tags <https://latest.datasette.io/fixtures/facetable?_facet_array=tags>`__
Facet by date
-------------
If Datasette finds any columns that contain dates in the first 100 values, it will offer a faceting interface against the dates of those values. This works especially well against timestamp values such as ``2019-03-01 12:44:00``.
Demo here: `latest.datasette.io/fixtures/facetable?_facet_date=created <https://latest.datasette.io/fixtures/facetable?_facet_date=created>`__

View file

@ -518,6 +518,7 @@ INSERT INTO facet_cities (id, name) VALUES
CREATE TABLE facetable (
pk integer primary key,
created text,
planet_int integer,
on_earth integer,
state text,
@ -527,23 +528,23 @@ CREATE TABLE facetable (
FOREIGN KEY ("city_id") REFERENCES [facet_cities](id)
);
INSERT INTO facetable
(planet_int, on_earth, state, city_id, neighborhood, tags)
(created, planet_int, on_earth, state, city_id, neighborhood, tags)
VALUES
(1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]'),
(1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]'),
(1, 1, 'CA', 1, 'SOMA', '[]'),
(1, 1, 'CA', 1, 'Tenderloin', '[]'),
(1, 1, 'CA', 1, 'Bernal Heights', '[]'),
(1, 1, 'CA', 1, 'Hayes Valley', '[]'),
(1, 1, 'CA', 2, 'Hollywood', '[]'),
(1, 1, 'CA', 2, 'Downtown', '[]'),
(1, 1, 'CA', 2, 'Los Feliz', '[]'),
(1, 1, 'CA', 2, 'Koreatown', '[]'),
(1, 1, 'MI', 3, 'Downtown', '[]'),
(1, 1, 'MI', 3, 'Greektown', '[]'),
(1, 1, 'MI', 3, 'Corktown', '[]'),
(1, 1, 'MI', 3, 'Mexicantown', '[]'),
(2, 0, 'MC', 4, 'Arcadia Planitia', '[]')
("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]'),
("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]'),
("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]'),
("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]'),
("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]'),
("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]'),
("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]'),
("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]'),
("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]'),
("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]'),
("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]'),
("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]'),
("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]'),
("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]'),
("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]')
;
CREATE TABLE binary_data (

View file

@ -167,6 +167,7 @@ def test_database_page(app_client):
{
"columns": [
"pk",
"created",
"planet_int",
"on_earth",
"state",
@ -955,14 +956,16 @@ def test_table_filter_queries_multiple_of_same_type(app_client):
def test_table_filter_json_arraycontains(app_client):
response = app_client.get("/fixtures/facetable.json?tags__arraycontains=tag1")
assert [
[1, 1, 1, "CA", 1, "Mission", '["tag1", "tag2"]'],
[2, 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'],
[1, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Mission", '["tag1", "tag2"]'],
[2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'],
] == response.json["rows"]
def test_table_filter_extra_where(app_client):
response = app_client.get("/fixtures/facetable.json?_where=neighborhood='Dogpatch'")
assert [[2, 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]']] == response.json["rows"]
assert [
[2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]']
] == response.json["rows"]
def test_table_filter_extra_where_invalid(app_client):
@ -1331,12 +1334,14 @@ def test_suggested_facets(app_client):
]
]
expected = [
{"name": "created", "querystring": "_facet=created"},
{"name": "planet_int", "querystring": "_facet=planet_int"},
{"name": "on_earth", "querystring": "_facet=on_earth"},
{"name": "state", "querystring": "_facet=state"},
{"name": "city_id", "querystring": "_facet=city_id"},
{"name": "neighborhood", "querystring": "_facet=neighborhood"},
{"name": "tags", "querystring": "_facet=tags"},
{"name": "created", "querystring": "_facet_date=created"},
]
if detect_json1():
expected.append({"name": "tags", "querystring": "_facet_array=tags"})
@ -1364,6 +1369,7 @@ def test_expand_labels(app_client):
assert {
"2": {
"pk": 2,
"created": "2019-01-14 08:00:00",
"planet_int": 1,
"on_earth": 1,
"state": "CA",
@ -1373,6 +1379,7 @@ def test_expand_labels(app_client):
},
"13": {
"pk": 13,
"created": "2019-01-17 08:00:00",
"planet_int": 1,
"on_earth": 1,
"state": "MI",

View file

@ -21,22 +21,22 @@ world
)
EXPECTED_TABLE_WITH_LABELS_CSV = """
pk,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags
1,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]"
2,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]"
3,1,1,CA,1,San Francisco,SOMA,[]
4,1,1,CA,1,San Francisco,Tenderloin,[]
5,1,1,CA,1,San Francisco,Bernal Heights,[]
6,1,1,CA,1,San Francisco,Hayes Valley,[]
7,1,1,CA,2,Los Angeles,Hollywood,[]
8,1,1,CA,2,Los Angeles,Downtown,[]
9,1,1,CA,2,Los Angeles,Los Feliz,[]
10,1,1,CA,2,Los Angeles,Koreatown,[]
11,1,1,MI,3,Detroit,Downtown,[]
12,1,1,MI,3,Detroit,Greektown,[]
13,1,1,MI,3,Detroit,Corktown,[]
14,1,1,MI,3,Detroit,Mexicantown,[]
15,2,0,MC,4,Memnonia,Arcadia Planitia,[]
pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags
1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]"
2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]"
3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[]
4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[]
5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[]
6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[]
7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[]
8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[]
9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[]
10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[]
11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[]
12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[]
13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[]
14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[]
15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[]
""".lstrip().replace(
"\n", "\r\n"
)

View file

@ -1,4 +1,4 @@
from datasette.facets import ColumnFacet, ArrayFacet
from datasette.facets import ColumnFacet, ArrayFacet, DateFacet
from datasette.utils import detect_json1
from .fixtures import app_client # noqa
from .utils import MockRequest
@ -17,6 +17,7 @@ async def test_column_facet_suggest(app_client):
)
suggestions = await facet.suggest()
assert [
{"name": "created", "toggle_url": "http://localhost/?_facet=created"},
{"name": "planet_int", "toggle_url": "http://localhost/?_facet=planet_int"},
{"name": "on_earth", "toggle_url": "http://localhost/?_facet=on_earth"},
{"name": "state", "toggle_url": "http://localhost/?_facet=state"},
@ -37,6 +38,10 @@ async def test_column_facet_suggest_skip_if_already_selected(app_client):
)
suggestions = await facet.suggest()
assert [
{
"name": "created",
"toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=created",
},
{
"name": "state",
"toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=state",
@ -67,7 +72,14 @@ async def test_column_facet_suggest_skip_if_enabled_by_metadata(app_client):
metadata={"facets": ["city_id"]},
)
suggestions = [s["name"] for s in await facet.suggest()]
assert ["planet_int", "on_earth", "state", "neighborhood", "tags"] == suggestions
assert [
"created",
"planet_int",
"on_earth",
"state",
"neighborhood",
"tags",
] == suggestions
@pytest.mark.asyncio
@ -239,3 +251,55 @@ async def test_array_facet_results(app_client):
"truncated": False,
}
} == buckets
@pytest.mark.asyncio
async def test_date_facet_results(app_client):
facet = DateFacet(
app_client.ds,
MockRequest("http://localhost/?_facet_date=created"),
database="fixtures",
sql="select * from facetable",
table="facetable",
)
buckets, timed_out = await facet.facet_results()
assert [] == timed_out
assert {
"created": {
"name": "created",
"type": "date",
"results": [
{
"value": "2019-01-14",
"label": "2019-01-14",
"count": 4,
"toggle_url": "http://localhost/?_facet_date=created&created__date=2019-01-14",
"selected": False,
},
{
"value": "2019-01-15",
"label": "2019-01-15",
"count": 4,
"toggle_url": "http://localhost/?_facet_date=created&created__date=2019-01-15",
"selected": False,
},
{
"value": "2019-01-17",
"label": "2019-01-17",
"count": 4,
"toggle_url": "http://localhost/?_facet_date=created&created__date=2019-01-17",
"selected": False,
},
{
"value": "2019-01-16",
"label": "2019-01-16",
"count": 3,
"toggle_url": "http://localhost/?_facet_date=created&created__date=2019-01-16",
"selected": False,
},
],
"hideable": True,
"toggle_url": "/",
"truncated": False,
}
} == buckets