Initial implementation of facets, plus tests and docs

Refs #255
This commit is contained in:
Simon Willison 2018-05-12 19:29:06 -03:00
commit eaaa3ea149
No known key found for this signature in database
GPG key ID: 17E2DEA2588B7F52
7 changed files with 217 additions and 4 deletions

View file

@ -733,8 +733,8 @@ class TableView(RowTableShared):
forward_querystring=False
)
units = self.table_metadata(name, table).get('units', {})
table_metadata = self.table_metadata(name, table)
units = table_metadata.get('units', {})
filters = Filters(sorted(other_args.items()), units, ureg)
where_clauses, params = filters.build_where_clauses()
@ -799,12 +799,13 @@ class TableView(RowTableShared):
raise DatasetteError('Cannot use _sort and _sort_desc at the same time')
order_by = '{} desc'.format(escape_sqlite(sort_desc))
count_sql = 'select count(*) from {table_name} {where}'.format(
from_sql = 'from {table_name} {where}'.format(
table_name=escape_sqlite(table),
where=(
'where {} '.format(' and '.join(where_clauses))
) if where_clauses else '',
)
count_sql = 'select count(*) {}'.format(from_sql)
_next = special_args.get('_next')
offset = ''
@ -933,6 +934,39 @@ class TableView(RowTableShared):
name, sql, params, truncate=True, **extra_args
)
# facets support
try:
facets = request.args['_facet']
except KeyError:
facets = table_metadata.get('facets', [])
facet_results = {}
for column in facets:
facet_sql = '''
select {col} as value, count(*) as count
{from_sql}
group by {col} order by count desc limit 20
'''.format(col=escape_sqlite(column), from_sql=from_sql)
try:
facet_rows = await self.execute(
name,
facet_sql,
params,
truncate=False,
custom_time_limit=200
)
facet_results[column] = [{
'value': row['value'],
'count': row['count'],
'toggle_url': urllib.parse.urljoin(
request.url, path_with_added_args(
request, {column: row['value']}
)
)
} for row in facet_rows]
except sqlite3.OperationalError:
# Hit time limit
pass
columns = [r[0] for r in description]
rows = list(rows)
@ -1043,6 +1077,7 @@ class TableView(RowTableShared):
'sql': sql,
'params': params,
},
'facet_results': facet_results,
'next': next_value and str(next_value) or None,
'next_url': next_url,
}, extra_template, (

View file

@ -91,6 +91,15 @@
<p>This data as <a href="{{ url_json }}">.json</a></p>
{% for facet_name, facet_values in facet_results.items() %}
<p><strong>{{ facet_name }}</strong></p>
<ul>
{% for facet_value in facet_values %}
<li><a href="{{ facet_value.toggle_url }}">{{ facet_value.value }}</a> ({{ facet_value.count }})</li>
{% endfor %}
</ul>
{% endfor %}
{% include custom_rows_and_columns_templates %}
{% if next_url %}

70
docs/facets.rst Normal file
View file

@ -0,0 +1,70 @@
.. _facets:
Facets
======
This feature is currently under development, see `#255 <https://github.com/simonw/datasette/issues/255>`_
Datasette facets can be used to add a faceted browse interface to any Datasette table. With facets, tables are displayed along with a summary showing the most common values in specified columns. These values can be selected to further filter the table.
Facets can be specified in two ways: using queryset parameters, or in ``metadata.json`` configuration for the table.
Facets in querystrings
----------------------
To turn on faceting for specific columns on a Datasette table view, add one or more ``_facet=COLUMN`` parameters to the URL. For example, if you want to turn on facets for the ``city`` and ``state`` columns, construct a URL that looks like this::
/dbname/tablename?_facet=state&_facet=city
This works for both the HTML interface and the ``.json`` view. When enabled, facets will cause a ``facet_results`` block to be added to the JSON output, looking something like this::
"facet_results": {
"state": [
{
"value": "CA",
"count": 10,
"toggle_url": "http://...&state=CA"
},
{
"value": "MI",
"count": 4,
"toggle_url": "http://...&state=MI"
}
],
"city": [
{
"value": "San Francisco",
"count": 6,
"toggle_url": "http://...=San+Francisco"
},
{
"value": "Detroit",
"count": 4,
"toggle_url": "http://...&city=Detroit"
},
{
"value": "Los Angeles",
"count": 4,
"toggle_url": "http://...=Los+Angeles"
}
]
}
Facets in metadata.json
-----------------------
You can turn facets on by default for specific tables by adding them to a ``"facets"`` key in a Datasette :ref:`metadata` file.
Here's an example that turns on faceting by default for the ``qLegalStatus`` column in the ``Street_Tree_List`` table in the ``sf-trees`` database::
{
"databases": {
"sf-trees": {
"tables": {
"Street_Tree_List": {
"facets": ["qLegalStatus"]
}
}
}
}
}

View file

@ -19,6 +19,7 @@ Contents
getting_started
json_api
sql_queries
facets
full_text_search
metadata
custom_templates

View file

@ -1,3 +1,5 @@
.. _metadata:
Metadata
========

View file

@ -267,6 +267,29 @@ CREATE TABLE [select] (
);
INSERT INTO [select] VALUES ('group', 'having', 'and');
CREATE TABLE facetable (
pk integer primary key,
state text,
city text,
neighborhood text
);
INSERT INTO facetable (state, city, neighborhood) VALUES
('CA', 'San Francisco', 'Mission'),
('CA', 'San Francisco', 'Dogpatch'),
('CA', 'San Francisco', 'SOMA'),
('CA', 'San Francisco', 'Tenderloin'),
('CA', 'San Francisco', 'Bernal Heights'),
('CA', 'San Francisco', 'Hayes Valley'),
('CA', 'Los Angeles', 'Hollywood'),
('CA', 'Los Angeles', 'Downtown'),
('CA', 'Los Angeles', 'Los Feliz'),
('CA', 'Los Angeles', 'Koreatown'),
('MI', 'Detroit', 'Downtown'),
('MI', 'Detroit', 'Greektown'),
('MI', 'Detroit', 'Corktown'),
('MI', 'Detroit', 'Mexicantown')
;
INSERT INTO simple_primary_key VALUES (1, 'hello');
INSERT INTO simple_primary_key VALUES (2, 'world');
INSERT INTO simple_primary_key VALUES (3, '');

View file

@ -19,7 +19,7 @@ def test_homepage(app_client):
assert response.json.keys() == {'test_tables': 0}.keys()
d = response.json['test_tables']
assert d['name'] == 'test_tables'
assert d['tables_count'] == 15
assert d['tables_count'] == 16
def test_database_page(app_client):
@ -104,6 +104,15 @@ def test_database_page(app_client):
'label_column': None,
'fts_table': None,
'primary_keys': ['pk'],
}, {
'columns': ['pk', 'state', 'city', 'neighborhood'],
'name': 'facetable',
'count': 14,
'foreign_keys': {'incoming': [], 'outgoing': []},
'fts_table': None,
'hidden': False,
'label_column': None,
'primary_keys': ['pk'],
}, {
'columns': ['pk', 'foreign_key_with_label', 'foreign_key_with_no_label'],
'name': 'foreign_key_references',
@ -878,3 +887,67 @@ def test_page_size_matching_max_returned_rows(app_client_returend_rows_matches_p
assert len(response.json['rows']) in (1, 50)
path = response.json['next_url']
assert 201 == len(fetched)
@pytest.mark.parametrize('path,expected_facet_results', [
(
"/test_tables/facetable.json?_facet=state&_facet=city",
{
"state": [
{
"value": "CA",
"count": 10,
"toggle_url": "_facet=state&_facet=city&state=CA",
},
{
"value": "MI",
"count": 4,
"toggle_url": "_facet=state&_facet=city&state=MI",
},
],
"city": [
{
"value": "San Francisco",
"count": 6,
"toggle_url": "_facet=state&_facet=city&city=San+Francisco",
},
{
"value": "Detroit",
"count": 4,
"toggle_url": "_facet=state&_facet=city&city=Detroit",
},
{
"value": "Los Angeles",
"count": 4,
"toggle_url": "_facet=state&_facet=city&city=Los+Angeles",
},
],
},
), (
"/test_tables/facetable.json?_facet=state&_facet=city&state=MI",
{
"state": [
{
"value": "MI",
"count": 4,
"toggle_url": "_facet=state&_facet=city&state=MI",
},
],
"city": [
{
"value": "Detroit",
"count": 4,
"toggle_url": "_facet=state&_facet=city&state=MI&city=Detroit",
},
],
},
)
])
def test_facets(app_client, path, expected_facet_results):
response = app_client.get(path, gather_request=False)
facet_results = response.json['facet_results']
# We only compare the querystring portion of the taggle_url
for facet_name, facet_values in facet_results.items():
for facet_value in facet_values:
facet_value['toggle_url'] = facet_value['toggle_url'].split('?')[1]
assert expected_facet_results == facet_results