Compare commits

...

18 commits

Author SHA1 Message Date
Simon Willison
5c198f7ca5 Include table metadata when calculating applied facets
Still need to ensure facets from metadata cannot be hidden in the UI
2019-04-18 09:29:43 -07:00
Simon Willison
f5e7db07aa Removed table_count() method - I don't need it after all 2019-04-18 09:03:52 -07:00
Simon Willison
938e072ece Facet class now entirely configured in constructor 2019-04-18 09:02:46 -07:00
Simon Willison
8310ad2336 Moved MockRequest to tests/utils.py 2019-04-18 07:55:56 -07:00
Simon Willison
dae6f0c4d8 Removed rogue debug code 2019-04-18 07:53:47 -07:00
Simon Willison
ce0510f937
Merge branch 'master' into facet-refactor 2019-04-18 07:52:21 -07:00
Simon Willison
c7a11ab4e6 Only doing ColumnFacet for the moment - tidied up and added tests 2019-04-18 07:45:49 -07:00
Simon Willison
a64a39648d Cleaned up rebase mess I made 2019-04-16 21:16:09 -07:00
Simon Willison
f6cdca3f6e Tests should now pass for facets branch 2019-04-16 21:10:49 -07:00
Simon Willison
63e52c0936 WIP refactoring facets to plugin, refs #427 2019-04-16 21:10:49 -07:00
Simon Willison
b495839e60 Support multiple filters of the same type
Closes #288
2019-04-16 21:09:27 -07:00
Simon Willison
661488e964 New ?column__date=yyyy-mm-dd filter 2019-04-15 16:02:20 -07:00
Simon Willison
78e9972b46 New colname__in=x,y,z filter, closes #433 2019-04-15 16:02:20 -07:00
Simon Willison
c4645c0f2b Documentation for filters, plus new documentation unit test
https://simonwillison.net/2018/Jul/28/documentation-unit-tests/
2019-04-15 16:02:20 -07:00
Simon Willison
65e913fbbc Extract and refactor filters into filters.py
This will help in implementing __in as a filter, refs #433
2019-04-15 16:02:20 -07:00
Simon Willison
458f858712 Slightly more interesting example link 2019-04-15 16:02:20 -07:00
Simon Willison
62810f8f7a Continue sketching out new facet design, refs #427
Includes various new partially implemented facet classes, to help exercise the API design.
2019-04-15 07:01:39 -07:00
Simon Willison
538d91c44a WIP refactoring facets to plugin, refs #427 2019-04-13 13:03:59 -07:00
9 changed files with 396 additions and 125 deletions

View file

@ -579,6 +579,7 @@ class Datasette:
truncate=False,
custom_time_limit=None,
page_size=None,
log_sql_errors=True,
):
"""Executes sql against db_name in a thread"""
page_size = page_size or self.page_size
@ -604,12 +605,13 @@ class Datasette:
truncated = False
except sqlite3.OperationalError as e:
if e.args == ('interrupted',):
raise InterruptedError(e)
print(
"ERROR: conn={}, sql = {}, params = {}: {}".format(
conn, repr(sql), params, e
raise InterruptedError(e, sql, params)
if log_sql_errors:
print(
"ERROR: conn={}, sql = {}, params = {}: {}".format(
conn, repr(sql), params, e
)
)
)
raise
if truncate:

238
datasette/facets.py Normal file
View file

@ -0,0 +1,238 @@
import json
import urllib
import re
from datasette import hookimpl
from datasette.utils import (
escape_sqlite,
get_all_foreign_keys,
path_with_added_args,
path_with_removed_args,
detect_json1,
InterruptedError,
InvalidSql,
sqlite3,
)
def load_facet_configs(request, table_metadata):
# Given a request and the metadata configuration for a table, return
# a dictionary of selected facets, their lists of configs and for each
# config whether it came from the request or the metadata.
#
# return {type: [
# {"source": "metadata", "config": config1},
# {"source": "request", "config": config2}]}
facet_configs = {}
metadata_facets = table_metadata.get("facets", [])
for metadata_config in metadata_facets:
if isinstance(metadata_config, str):
type = "column"
metadata_config = {"simple": metadata_config}
else:
# This should have a single key and a single value
assert len(metadata_config.values()) == 1, "Metadata config dicts should be {type: config}"
type, metadata_config = metadata_config.items()[0]
if isinstance(metadata_config, str):
metadata_config = {"simple": metadata_config}
facet_configs.setdefault(type, []).append({
"source": "metadata",
"config": metadata_config
})
qs_pairs = urllib.parse.parse_qs(request.query_string, keep_blank_values=True)
for key, values in qs_pairs.items():
if key.startswith("_facet"):
# Figure out the facet type
if key == "_facet":
type = "column"
elif key.startswith("_facet_"):
type = key[len("_facet_") :]
for value in values:
# The value is the config - either JSON or not
if value.startswith("{"):
config = json.loads(value)
else:
config = {"simple": value}
facet_configs.setdefault(type, []).append({
"source": "request",
"config": config
})
return facet_configs
@hookimpl
def register_facet_classes():
return [ColumnFacet]
class Facet:
type = None
def __init__(
self,
ds,
request,
database,
sql=None,
table=None,
params=None,
configs=None,
row_count=None,
):
assert table or sql, "Must provide either table= or sql="
self.ds = ds
self.request = request
self.database = database
# For foreign key expansion. Can be None for e.g. canned SQL queries:
self.table = table
self.sql = sql or "select * from [{}]".format(table)
self.params = params or []
self.configs = configs
# row_count can be None, in which case we calculate it ourselves:
self.row_count = row_count
def get_querystring_pairs(self):
# ?_foo=bar&_foo=2&empty= becomes:
# [('_foo', 'bar'), ('_foo', '2'), ('empty', '')]
return urllib.parse.parse_qsl(self.request.query_string, keep_blank_values=True)
async def suggest(self):
return []
async def facet_results(self):
# returns ([results], [timed_out])
# TODO: Include "hideable" with each one somehow, which indicates if it was
# defined in metadata (in which case you cannot turn it off)
raise NotImplementedError
async def get_columns(self, sql, params=None):
# Detect column names using the "limit 0" trick
return (
await self.ds.execute(
self.database, "select * from ({}) limit 0".format(sql), params or []
)
).columns
async def get_row_count(self):
if self.row_count is None:
self.row_count = (
await self.ds.execute(
self.database,
"select count(*) from ({})".format(self.sql),
self.params,
)
).rows[0][0]
return self.row_count
class ColumnFacet(Facet):
type = "column"
async def suggest(self):
row_count = await self.get_row_count()
columns = await self.get_columns(self.sql, self.params)
facet_size = self.ds.config("default_facet_size")
suggested_facets = []
for column in columns:
suggested_facet_sql = """
select distinct {column} from (
{sql}
) where {column} is not null
limit {limit}
""".format(
column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
)
distinct_values = None
try:
distinct_values = await self.ds.execute(
self.database,
suggested_facet_sql,
self.params,
truncate=False,
custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"),
)
num_distinct_values = len(distinct_values)
if (
num_distinct_values
and num_distinct_values > 1
and num_distinct_values <= facet_size
and num_distinct_values < row_count
):
suggested_facets.append(
{
"name": column,
"toggle_url": self.ds.absolute_url(
self.request,
path_with_added_args(self.request, {"_facet": column}),
),
}
)
except InterruptedError:
continue
return suggested_facets
async def facet_results(self):
facet_results = {}
facets_timed_out = []
qs_pairs = self.get_querystring_pairs()
facet_size = self.ds.config("default_facet_size")
for config in self.configs or []:
column = config.get("column") or config["simple"]
facet_sql = """
select {col} as value, count(*) as count from (
{sql}
)
where {col} is not null
group by {col} order by count desc limit {limit}
""".format(
col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
)
try:
facet_rows_results = await self.ds.execute(
self.database,
facet_sql,
self.params,
truncate=False,
custom_time_limit=self.ds.config("facet_time_limit_ms"),
)
facet_results_values = []
facet_results[column] = {
"name": column,
"results": facet_results_values,
"truncated": len(facet_rows_results) > facet_size,
}
facet_rows = facet_rows_results.rows[:facet_size]
if self.table:
# Attempt to expand foreign keys into labels
values = [row["value"] for row in facet_rows]
expanded = await self.ds.expand_foreign_keys(
self.database, self.table, column, values
)
else:
expanded = {}
for row in facet_rows:
selected = (column, str(row["value"])) in qs_pairs
if selected:
toggle_path = path_with_removed_args(
self.request, {column: str(row["value"])}
)
else:
toggle_path = path_with_added_args(
self.request, {column: row["value"]}
)
facet_results_values.append(
{
"value": row["value"],
"label": expanded.get((column, row["value"]), row["value"]),
"count": row["count"],
"toggle_url": self.ds.absolute_url(
self.request, toggle_path
),
"selected": selected,
}
)
except InterruptedError:
facets_timed_out.append(column)
return facet_results, facets_timed_out

View file

@ -38,3 +38,8 @@ def publish_subcommand(publish):
@hookspec(firstresult=True)
def render_cell(value, column, table, database, datasette):
"Customize rendering of HTML table cell values"
@hookspec
def register_facet_classes():
"Register Facet subclasses"

View file

@ -5,6 +5,7 @@ from . import hookspecs
DEFAULT_PLUGINS = (
"datasette.publish.heroku",
"datasette.publish.now",
"datasette.facets",
)
pm = pluggy.PluginManager("datasette")

View file

@ -110,7 +110,7 @@
{% if suggested_facets %}
<p class="suggested-facets">
Suggested facets: {% for facet in suggested_facets %}<a href="{{ facet.toggle_url }}#facet-{{ facet.name|to_css_class }}">{{ facet.name }}</a>{% if not loop.last %}, {% endif %}{% endfor %}
Suggested facets: {% for facet in suggested_facets %}<a href="{{ facet.toggle_url }}#facet-{{ facet.name|to_css_class }}">{{ facet.name }}</a>{% if facet.type %} ({{ facet.type }}){% endif %}{% if not loop.last %}, {% endif %}{% endfor %}
</p>
{% endif %}

View file

@ -1,9 +1,11 @@
import urllib
import itertools
import jinja2
from sanic.exceptions import NotFound
from sanic.request import RequestParameters
from datasette.facets import load_facet_configs
from datasette.plugins import pm
from datasette.utils import (
CustomRow,
@ -348,9 +350,8 @@ class TableView(RowTableShared):
"where {} ".format(" and ".join(where_clauses))
) if where_clauses else "",
)
# Store current params and where_clauses for later:
# Copy of params so we can mutate them later:
from_sql_params = dict(**params)
from_sql_where_clauses = where_clauses[:]
count_sql = "select count(*) {}".format(from_sql)
@ -462,11 +463,14 @@ class TableView(RowTableShared):
else:
page_size = self.ds.page_size
sql = "select {select} from {table_name} {where}{order_by}limit {limit}{offset}".format(
sql_no_limit = "select {select} from {table_name} {where}{order_by}".format(
select=select,
table_name=escape_sqlite(table),
where=where_clause,
order_by=order_by,
)
sql = "{sql_no_limit} limit {limit}{offset}".format(
sql_no_limit=sql_no_limit.rstrip(),
limit=page_size + 1,
offset=offset,
)
@ -478,72 +482,49 @@ class TableView(RowTableShared):
database, sql, params, truncate=True, **extra_args
)
# Number of filtered rows in whole set:
filtered_table_rows_count = None
if count_sql:
try:
count_rows = list(await self.ds.execute(
database, count_sql, from_sql_params
))
filtered_table_rows_count = count_rows[0][0]
except InterruptedError:
pass
# facets support
facet_size = self.ds.config("default_facet_size")
metadata_facets = table_metadata.get("facets", [])
facets = metadata_facets[:]
if request.args.get("_facet") and not self.ds.config("allow_facet"):
if not self.ds.config("allow_facet") and any(arg.startswith("_facet") for arg in request.args):
raise DatasetteError("_facet= is not allowed", status=400)
try:
facets.extend(request.args["_facet"])
except KeyError:
pass
facet_configs = load_facet_configs(request, table_metadata)
# pylint: disable=no-member
facet_classes = list(
itertools.chain.from_iterable(pm.hook.register_facet_classes())
)
facet_results = {}
facets_timed_out = []
for column in facets:
if _next:
continue
facet_sql = """
select {col} as value, count(*) as count
{from_sql} {and_or_where} {col} is not null
group by {col} order by count desc limit {limit}
""".format(
col=escape_sqlite(column),
from_sql=from_sql,
and_or_where='and' if from_sql_where_clauses else 'where',
limit=facet_size+1,
)
try:
facet_rows_results = await self.ds.execute(
database, facet_sql, params,
truncate=False,
custom_time_limit=self.ds.config("facet_time_limit_ms"),
)
facet_results_values = []
facet_results[column] = {
"name": column,
"results": facet_results_values,
"truncated": len(facet_rows_results) > facet_size,
}
facet_rows = facet_rows_results.rows[:facet_size]
# Attempt to expand foreign keys into labels
values = [row["value"] for row in facet_rows]
expanded = (await self.ds.expand_foreign_keys(
database, table, column, values
))
for row in facet_rows:
selected = (column, str(row["value"])) in other_args
if selected:
toggle_path = path_with_removed_args(
request, {column: str(row["value"])}
)
else:
toggle_path = path_with_added_args(
request, {column: row["value"]}
)
facet_results_values.append({
"value": row["value"],
"label": expanded.get(
(column, row["value"]),
row["value"]
),
"count": row["count"],
"toggle_url": self.ds.absolute_url(request, toggle_path),
"selected": selected,
})
except InterruptedError:
facets_timed_out.append(column)
facet_instances = []
for klass in facet_classes:
facet_instances.append(klass(
self.ds,
request,
database,
sql=sql_no_limit,
params=params,
table=table,
configs=[
fc["config"] for fc in facet_configs.get(klass.type, [])
],
row_count=filtered_table_rows_count,
))
for facet in facet_instances:
instance_facet_results, instance_facets_timed_out = await facet.facet_results()
facet_results.update(instance_facet_results)
facets_timed_out.extend(instance_facets_timed_out)
# Figure out columns and rows for the query
columns = [r[0] for r in results.description]
rows = list(results.rows)
@ -627,61 +608,14 @@ class TableView(RowTableShared):
)
rows = rows[:page_size]
# Number of filtered rows in whole set:
filtered_table_rows_count = None
if count_sql:
try:
count_rows = list(await self.ds.execute(
database, count_sql, from_sql_params
))
filtered_table_rows_count = count_rows[0][0]
except InterruptedError:
pass
# Detect suggested facets
suggested_facets = []
# Detect suggested facets
suggested_facets = []
if self.ds.config("suggest_facets") and self.ds.config("allow_facet"):
for facet_column in columns:
if facet_column in facets:
continue
if _next:
continue
if not self.ds.config("suggest_facets"):
continue
suggested_facet_sql = '''
select distinct {column} {from_sql}
{and_or_where} {column} is not null
limit {limit}
'''.format(
column=escape_sqlite(facet_column),
from_sql=from_sql,
and_or_where='and' if from_sql_where_clauses else 'where',
limit=facet_size+1
)
distinct_values = None
try:
distinct_values = await self.ds.execute(
database, suggested_facet_sql, from_sql_params,
truncate=False,
custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"),
)
num_distinct_values = len(distinct_values)
if (
num_distinct_values and
num_distinct_values > 1 and
num_distinct_values <= facet_size and
num_distinct_values < filtered_table_rows_count
):
suggested_facets.append({
'name': facet_column,
'toggle_url': self.ds.absolute_url(
request, path_with_added_args(
request, {"_facet": facet_column}
)
),
})
except InterruptedError:
pass
if self.ds.config("suggest_facets") and self.ds.config("allow_facet") and not _next:
for facet in facet_instances:
# TODO: ensure facet is not suggested if it is already active
# used to use 'if facet_column in facets' for this
suggested_facets.extend(await facet.suggest())
# human_description_en combines filters AND search, if provided
human_description_en = filters.human_description_en(extra=search_descriptions)
@ -729,7 +663,7 @@ class TableView(RowTableShared):
),
"extra_wheres_for_ui": extra_wheres_for_ui,
"form_hidden_args": form_hidden_args,
"facet_hideable": lambda facet: facet not in metadata_facets,
"facet_hideable": lambda facet: facet not in [], # TODO: used to be metadata_facets fix this
"is_sortable": any(c["sortable"] for c in display_columns),
"path_with_replaced_args": path_with_replaced_args,
"path_with_removed_args": path_with_removed_args,

View file

@ -551,3 +551,12 @@ The ``template``, ``database`` and ``table`` options can be used to return diffe
The ``datasette`` instance is provided primarily so that you can consult any plugin configuration options that may have been set, using the ``datasette.plugin_config(plugin_name)`` method documented above.
The string that you return from this function will be treated as "safe" for inclusion in a ``<script>`` block directly in the page, so it is up to you to apply any necessary escaping.
.. _plugin_hook_register_facet_classes:
register_facet_classes()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Return a list of additional ``Facet`` subclasses that can be used to suggest and render facets.
For examples of how this can be used, see `datasette/facets.py <https://github.com/simonw/datasette/blob/master/datasette/facets.py>`__.

75
tests/test_facets.py Normal file
View file

@ -0,0 +1,75 @@
from datasette.facets import ColumnFacet
from .fixtures import app_client # noqa
from .utils import MockRequest
from collections import namedtuple
import pytest
@pytest.mark.asyncio
async def test_column_facet_suggest(app_client):
facet = ColumnFacet(
app_client.ds,
MockRequest("http://localhost/"),
database="fixtures",
sql="select * from facetable",
table="facetable",
)
suggestions = await facet.suggest()
assert [
{"name": "planet_int", "toggle_url": "http://localhost/?_facet=planet_int"},
{"name": "on_earth", "toggle_url": "http://localhost/?_facet=on_earth"},
{"name": "state", "toggle_url": "http://localhost/?_facet=state"},
{"name": "city_id", "toggle_url": "http://localhost/?_facet=city_id"},
{"name": "neighborhood", "toggle_url": "http://localhost/?_facet=neighborhood"},
{"name": "tags", "toggle_url": "http://localhost/?_facet=tags"},
] == suggestions
@pytest.mark.asyncio
async def test_column_facet_results(app_client):
facet = ColumnFacet(
app_client.ds,
MockRequest("http://localhost/?_facet=city_id"),
database="fixtures",
sql="select * from facetable",
table="facetable",
configs=[{"simple": "city_id"}],
)
buckets, timed_out = await facet.facet_results()
assert [] == timed_out
assert {
"city_id": {
"name": "city_id",
"results": [
{
"value": 1,
"label": "San Francisco",
"count": 6,
"toggle_url": "http://localhost/?_facet=city_id?_facet=city_id&city_id=1",
"selected": False,
},
{
"value": 2,
"label": "Los Angeles",
"count": 4,
"toggle_url": "http://localhost/?_facet=city_id?_facet=city_id&city_id=2",
"selected": False,
},
{
"value": 3,
"label": "Detroit",
"count": 4,
"toggle_url": "http://localhost/?_facet=city_id?_facet=city_id&city_id=3",
"selected": False,
},
{
"value": 4,
"label": "Memnonia",
"count": 1,
"toggle_url": "http://localhost/?_facet=city_id?_facet=city_id&city_id=4",
"selected": False,
},
],
"truncated": False,
}
} == buckets

7
tests/utils.py Normal file
View file

@ -0,0 +1,7 @@
class MockRequest:
def __init__(self, url):
self.url = url
self.path = "/" + url.split("://")[1].split("/", 1)[1]
self.query_string = ""
if "?" in url:
self.query_string = url.split("?", 1)[1]