diff --git a/datasette/app.py b/datasette/app.py index 460464ab..d092e1ad 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -579,6 +579,7 @@ class Datasette: truncate=False, custom_time_limit=None, page_size=None, + log_sql_errors=True, ): """Executes sql against db_name in a thread""" page_size = page_size or self.page_size @@ -604,12 +605,13 @@ class Datasette: truncated = False except sqlite3.OperationalError as e: if e.args == ('interrupted',): - raise InterruptedError(e) - print( - "ERROR: conn={}, sql = {}, params = {}: {}".format( - conn, repr(sql), params, e + raise InterruptedError(e, sql, params) + if log_sql_errors: + print( + "ERROR: conn={}, sql = {}, params = {}: {}".format( + conn, repr(sql), params, e + ) ) - ) raise if truncate: diff --git a/datasette/facets.py b/datasette/facets.py new file mode 100644 index 00000000..1ba8a67c --- /dev/null +++ b/datasette/facets.py @@ -0,0 +1,238 @@ +import json +import urllib +import re +from datasette import hookimpl +from datasette.utils import ( + escape_sqlite, + get_all_foreign_keys, + path_with_added_args, + path_with_removed_args, + detect_json1, + InterruptedError, + InvalidSql, + sqlite3, +) + + +def load_facet_configs(request, table_metadata): + # Given a request and the metadata configuration for a table, return + # a dictionary of selected facets, their lists of configs and for each + # config whether it came from the request or the metadata. + # + # return {type: [ + # {"source": "metadata", "config": config1}, + # {"source": "request", "config": config2}]} + facet_configs = {} + metadata_facets = table_metadata.get("facets", []) + for metadata_config in metadata_facets: + if isinstance(metadata_config, str): + type = "column" + metadata_config = {"simple": metadata_config} + else: + # This should have a single key and a single value + assert len(metadata_config.values()) == 1, "Metadata config dicts should be {type: config}" + type, metadata_config = metadata_config.items()[0] + if isinstance(metadata_config, str): + metadata_config = {"simple": metadata_config} + facet_configs.setdefault(type, []).append({ + "source": "metadata", + "config": metadata_config + }) + qs_pairs = urllib.parse.parse_qs(request.query_string, keep_blank_values=True) + for key, values in qs_pairs.items(): + if key.startswith("_facet"): + # Figure out the facet type + if key == "_facet": + type = "column" + elif key.startswith("_facet_"): + type = key[len("_facet_") :] + for value in values: + # The value is the config - either JSON or not + if value.startswith("{"): + config = json.loads(value) + else: + config = {"simple": value} + facet_configs.setdefault(type, []).append({ + "source": "request", + "config": config + }) + return facet_configs + + +@hookimpl +def register_facet_classes(): + return [ColumnFacet] + + +class Facet: + type = None + + def __init__( + self, + ds, + request, + database, + sql=None, + table=None, + params=None, + configs=None, + row_count=None, + ): + assert table or sql, "Must provide either table= or sql=" + self.ds = ds + self.request = request + self.database = database + # For foreign key expansion. Can be None for e.g. canned SQL queries: + self.table = table + self.sql = sql or "select * from [{}]".format(table) + self.params = params or [] + self.configs = configs + # row_count can be None, in which case we calculate it ourselves: + self.row_count = row_count + + def get_querystring_pairs(self): + # ?_foo=bar&_foo=2&empty= becomes: + # [('_foo', 'bar'), ('_foo', '2'), ('empty', '')] + return urllib.parse.parse_qsl(self.request.query_string, keep_blank_values=True) + + async def suggest(self): + return [] + + async def facet_results(self): + # returns ([results], [timed_out]) + # TODO: Include "hideable" with each one somehow, which indicates if it was + # defined in metadata (in which case you cannot turn it off) + raise NotImplementedError + + async def get_columns(self, sql, params=None): + # Detect column names using the "limit 0" trick + return ( + await self.ds.execute( + self.database, "select * from ({}) limit 0".format(sql), params or [] + ) + ).columns + + async def get_row_count(self): + if self.row_count is None: + self.row_count = ( + await self.ds.execute( + self.database, + "select count(*) from ({})".format(self.sql), + self.params, + ) + ).rows[0][0] + return self.row_count + + +class ColumnFacet(Facet): + type = "column" + + async def suggest(self): + row_count = await self.get_row_count() + columns = await self.get_columns(self.sql, self.params) + facet_size = self.ds.config("default_facet_size") + suggested_facets = [] + for column in columns: + suggested_facet_sql = """ + select distinct {column} from ( + {sql} + ) where {column} is not null + limit {limit} + """.format( + column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + ) + distinct_values = None + try: + distinct_values = await self.ds.execute( + self.database, + suggested_facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), + ) + num_distinct_values = len(distinct_values) + if ( + num_distinct_values + and num_distinct_values > 1 + and num_distinct_values <= facet_size + and num_distinct_values < row_count + ): + suggested_facets.append( + { + "name": column, + "toggle_url": self.ds.absolute_url( + self.request, + path_with_added_args(self.request, {"_facet": column}), + ), + } + ) + except InterruptedError: + continue + return suggested_facets + + async def facet_results(self): + facet_results = {} + facets_timed_out = [] + + qs_pairs = self.get_querystring_pairs() + + facet_size = self.ds.config("default_facet_size") + for config in self.configs or []: + column = config.get("column") or config["simple"] + facet_sql = """ + select {col} as value, count(*) as count from ( + {sql} + ) + where {col} is not null + group by {col} order by count desc limit {limit} + """.format( + col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + ) + try: + facet_rows_results = await self.ds.execute( + self.database, + facet_sql, + self.params, + truncate=False, + custom_time_limit=self.ds.config("facet_time_limit_ms"), + ) + facet_results_values = [] + facet_results[column] = { + "name": column, + "results": facet_results_values, + "truncated": len(facet_rows_results) > facet_size, + } + facet_rows = facet_rows_results.rows[:facet_size] + if self.table: + # Attempt to expand foreign keys into labels + values = [row["value"] for row in facet_rows] + expanded = await self.ds.expand_foreign_keys( + self.database, self.table, column, values + ) + else: + expanded = {} + for row in facet_rows: + selected = (column, str(row["value"])) in qs_pairs + if selected: + toggle_path = path_with_removed_args( + self.request, {column: str(row["value"])} + ) + else: + toggle_path = path_with_added_args( + self.request, {column: row["value"]} + ) + facet_results_values.append( + { + "value": row["value"], + "label": expanded.get((column, row["value"]), row["value"]), + "count": row["count"], + "toggle_url": self.ds.absolute_url( + self.request, toggle_path + ), + "selected": selected, + } + ) + except InterruptedError: + facets_timed_out.append(column) + + return facet_results, facets_timed_out diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 6db95344..d244ba70 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -38,3 +38,8 @@ def publish_subcommand(publish): @hookspec(firstresult=True) def render_cell(value, column, table, database, datasette): "Customize rendering of HTML table cell values" + + +@hookspec +def register_facet_classes(): + "Register Facet subclasses" diff --git a/datasette/plugins.py b/datasette/plugins.py index 2d2c62e4..245df6b3 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -5,6 +5,7 @@ from . import hookspecs DEFAULT_PLUGINS = ( "datasette.publish.heroku", "datasette.publish.now", + "datasette.facets", ) pm = pluggy.PluginManager("datasette") diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 1c65aa10..730a78ff 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -110,7 +110,7 @@ {% if suggested_facets %}
- Suggested facets: {% for facet in suggested_facets %}{{ facet.name }}{% if not loop.last %}, {% endif %}{% endfor %} + Suggested facets: {% for facet in suggested_facets %}{{ facet.name }}{% if facet.type %} ({{ facet.type }}){% endif %}{% if not loop.last %}, {% endif %}{% endfor %}
{% endif %} diff --git a/datasette/views/table.py b/datasette/views/table.py index bc5e775e..41d99ae9 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1,9 +1,11 @@ import urllib +import itertools import jinja2 from sanic.exceptions import NotFound from sanic.request import RequestParameters +from datasette.facets import load_facet_configs from datasette.plugins import pm from datasette.utils import ( CustomRow, @@ -348,9 +350,8 @@ class TableView(RowTableShared): "where {} ".format(" and ".join(where_clauses)) ) if where_clauses else "", ) - # Store current params and where_clauses for later: + # Copy of params so we can mutate them later: from_sql_params = dict(**params) - from_sql_where_clauses = where_clauses[:] count_sql = "select count(*) {}".format(from_sql) @@ -462,11 +463,14 @@ class TableView(RowTableShared): else: page_size = self.ds.page_size - sql = "select {select} from {table_name} {where}{order_by}limit {limit}{offset}".format( + sql_no_limit = "select {select} from {table_name} {where}{order_by}".format( select=select, table_name=escape_sqlite(table), where=where_clause, order_by=order_by, + ) + sql = "{sql_no_limit} limit {limit}{offset}".format( + sql_no_limit=sql_no_limit.rstrip(), limit=page_size + 1, offset=offset, ) @@ -478,72 +482,49 @@ class TableView(RowTableShared): database, sql, params, truncate=True, **extra_args ) + # Number of filtered rows in whole set: + filtered_table_rows_count = None + if count_sql: + try: + count_rows = list(await self.ds.execute( + database, count_sql, from_sql_params + )) + filtered_table_rows_count = count_rows[0][0] + except InterruptedError: + pass + # facets support - facet_size = self.ds.config("default_facet_size") - metadata_facets = table_metadata.get("facets", []) - facets = metadata_facets[:] - if request.args.get("_facet") and not self.ds.config("allow_facet"): + if not self.ds.config("allow_facet") and any(arg.startswith("_facet") for arg in request.args): raise DatasetteError("_facet= is not allowed", status=400) - try: - facets.extend(request.args["_facet"]) - except KeyError: - pass + facet_configs = load_facet_configs(request, table_metadata) + + # pylint: disable=no-member + facet_classes = list( + itertools.chain.from_iterable(pm.hook.register_facet_classes()) + ) facet_results = {} facets_timed_out = [] - for column in facets: - if _next: - continue - facet_sql = """ - select {col} as value, count(*) as count - {from_sql} {and_or_where} {col} is not null - group by {col} order by count desc limit {limit} - """.format( - col=escape_sqlite(column), - from_sql=from_sql, - and_or_where='and' if from_sql_where_clauses else 'where', - limit=facet_size+1, - ) - try: - facet_rows_results = await self.ds.execute( - database, facet_sql, params, - truncate=False, - custom_time_limit=self.ds.config("facet_time_limit_ms"), - ) - facet_results_values = [] - facet_results[column] = { - "name": column, - "results": facet_results_values, - "truncated": len(facet_rows_results) > facet_size, - } - facet_rows = facet_rows_results.rows[:facet_size] - # Attempt to expand foreign keys into labels - values = [row["value"] for row in facet_rows] - expanded = (await self.ds.expand_foreign_keys( - database, table, column, values - )) - for row in facet_rows: - selected = (column, str(row["value"])) in other_args - if selected: - toggle_path = path_with_removed_args( - request, {column: str(row["value"])} - ) - else: - toggle_path = path_with_added_args( - request, {column: row["value"]} - ) - facet_results_values.append({ - "value": row["value"], - "label": expanded.get( - (column, row["value"]), - row["value"] - ), - "count": row["count"], - "toggle_url": self.ds.absolute_url(request, toggle_path), - "selected": selected, - }) - except InterruptedError: - facets_timed_out.append(column) + facet_instances = [] + for klass in facet_classes: + facet_instances.append(klass( + self.ds, + request, + database, + sql=sql_no_limit, + params=params, + table=table, + configs=[ + fc["config"] for fc in facet_configs.get(klass.type, []) + ], + row_count=filtered_table_rows_count, + )) + for facet in facet_instances: + instance_facet_results, instance_facets_timed_out = await facet.facet_results() + facet_results.update(instance_facet_results) + facets_timed_out.extend(instance_facets_timed_out) + + # Figure out columns and rows for the query columns = [r[0] for r in results.description] rows = list(results.rows) @@ -627,61 +608,14 @@ class TableView(RowTableShared): ) rows = rows[:page_size] - # Number of filtered rows in whole set: - filtered_table_rows_count = None - if count_sql: - try: - count_rows = list(await self.ds.execute( - database, count_sql, from_sql_params - )) - filtered_table_rows_count = count_rows[0][0] - except InterruptedError: - pass + # Detect suggested facets + suggested_facets = [] - # Detect suggested facets - suggested_facets = [] - if self.ds.config("suggest_facets") and self.ds.config("allow_facet"): - for facet_column in columns: - if facet_column in facets: - continue - if _next: - continue - if not self.ds.config("suggest_facets"): - continue - suggested_facet_sql = ''' - select distinct {column} {from_sql} - {and_or_where} {column} is not null - limit {limit} - '''.format( - column=escape_sqlite(facet_column), - from_sql=from_sql, - and_or_where='and' if from_sql_where_clauses else 'where', - limit=facet_size+1 - ) - distinct_values = None - try: - distinct_values = await self.ds.execute( - database, suggested_facet_sql, from_sql_params, - truncate=False, - custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), - ) - num_distinct_values = len(distinct_values) - if ( - num_distinct_values and - num_distinct_values > 1 and - num_distinct_values <= facet_size and - num_distinct_values < filtered_table_rows_count - ): - suggested_facets.append({ - 'name': facet_column, - 'toggle_url': self.ds.absolute_url( - request, path_with_added_args( - request, {"_facet": facet_column} - ) - ), - }) - except InterruptedError: - pass + if self.ds.config("suggest_facets") and self.ds.config("allow_facet") and not _next: + for facet in facet_instances: + # TODO: ensure facet is not suggested if it is already active + # used to use 'if facet_column in facets' for this + suggested_facets.extend(await facet.suggest()) # human_description_en combines filters AND search, if provided human_description_en = filters.human_description_en(extra=search_descriptions) @@ -729,7 +663,7 @@ class TableView(RowTableShared): ), "extra_wheres_for_ui": extra_wheres_for_ui, "form_hidden_args": form_hidden_args, - "facet_hideable": lambda facet: facet not in metadata_facets, + "facet_hideable": lambda facet: facet not in [], # TODO: used to be metadata_facets fix this "is_sortable": any(c["sortable"] for c in display_columns), "path_with_replaced_args": path_with_replaced_args, "path_with_removed_args": path_with_removed_args, diff --git a/docs/plugins.rst b/docs/plugins.rst index 984e5c95..103e8a2b 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -551,3 +551,12 @@ The ``template``, ``database`` and ``table`` options can be used to return diffe The ``datasette`` instance is provided primarily so that you can consult any plugin configuration options that may have been set, using the ``datasette.plugin_config(plugin_name)`` method documented above. The string that you return from this function will be treated as "safe" for inclusion in a ``