--limit= mechanism plus new limits for facets

Replaced the --max_returned_rows and --sql_time_limit_ms options to
"datasette serve" with a new --limit option, which supports a larger
list of limits.

Example usage:

	datasette serve --limit max_returned_rows:1000 \
		--limit sql_time_limit_ms:2500 \
		--limit default_facet_size:50 \
		--limit facet_time_limit_ms:1000 \
		--limit facet_suggest_time_limit_ms:500

New docs: https://datasette.readthedocs.io/en/latest/limits.html

Closes #270
Closes #264
This commit is contained in:
Simon Willison 2018-05-17 22:08:26 -07:00
commit cef9a9a870
No known key found for this signature in database
GPG key ID: 17E2DEA2588B7F52
10 changed files with 118 additions and 72 deletions

View file

@ -45,6 +45,15 @@ pm.add_hookspecs(hookspecs)
pm.load_setuptools_entrypoints("datasette")
DEFAULT_LIMITS = {
"max_returned_rows": 1000,
"sql_time_limit_ms": 1000,
"default_facet_size": 30,
"facet_time_limit_ms": 200,
"facet_suggest_time_limit_ms": 50,
}
class JsonDataView(RenderMixin):
def __init__(self, datasette, filename, data_callback):
@ -79,8 +88,6 @@ class Datasette:
num_threads=3,
cache_headers=True,
page_size=100,
max_returned_rows=1000,
sql_time_limit_ms=1000,
cors=False,
inspect_data=None,
metadata=None,
@ -88,14 +95,13 @@ class Datasette:
template_dir=None,
plugins_dir=None,
static_mounts=None,
limits=None,
):
self.files = files
self.num_threads = num_threads
self.executor = futures.ThreadPoolExecutor(max_workers=num_threads)
self.cache_headers = cache_headers
self.page_size = page_size
self.max_returned_rows = max_returned_rows
self.sql_time_limit_ms = sql_time_limit_ms
self.cors = cors
self._inspect = inspect_data
self.metadata = metadata or {}
@ -104,6 +110,9 @@ class Datasette:
self.template_dir = template_dir
self.plugins_dir = plugins_dir
self.static_mounts = static_mounts or []
self.limits = dict(DEFAULT_LIMITS, **(limits or {}))
self.max_returned_rows = self.limits["max_returned_rows"]
self.sql_time_limit_ms = self.limits["sql_time_limit_ms"]
# Execute plugins in constructor, to ensure they are available
# when the rest of `datasette inspect` executes
if self.plugins_dir:

View file

@ -5,7 +5,7 @@ import os
import shutil
from subprocess import call, check_output
import sys
from .app import Datasette
from .app import Datasette, DEFAULT_LIMITS
from .utils import temporary_docker_directory, temporary_heroku_directory
@ -15,7 +15,8 @@ class StaticMount(click.ParamType):
def convert(self, value, param, ctx):
if ":" not in value:
self.fail(
'"%s" should be of format mountpoint:directory' % value, param, ctx
'"{}" should be of format mountpoint:directory'.format(value),
param, ctx
)
path, dirpath = value.split(":")
if not os.path.exists(dirpath) or not os.path.isdir(dirpath):
@ -23,6 +24,26 @@ class StaticMount(click.ParamType):
return path, dirpath
class Limit(click.ParamType):
name = "limit"
def convert(self, value, param, ctx):
ok = True
if ":" not in value:
ok = False
else:
name, intvalue = value.split(":")
ok = intvalue.isdigit()
if not ok:
self.fail(
'"{}" should be of format name:integer'.format(value),
param, ctx
)
if name not in DEFAULT_LIMITS:
self.fail("{} is not a valid limit".format(name), param, ctx)
return name, int(intvalue)
@click.group(cls=DefaultGroup, default="serve", default_if_no_args=True)
@click.version_option()
def cli():
@ -364,14 +385,6 @@ def package(
"--cors", is_flag=True, help="Enable CORS by serving Access-Control-Allow-Origin: *"
)
@click.option("--page_size", default=100, help="Page size - default is 100")
@click.option(
"--max_returned_rows",
default=1000,
help="Max allowed rows to return at once - default is 1000. Set to 0 to disable check entirely.",
)
@click.option(
"--sql_time_limit_ms", default=1000, help="Max time allowed for SQL queries in ms"
)
@click.option(
"sqlite_extensions",
"--load-extension",
@ -405,6 +418,12 @@ def package(
help="mountpoint:path-to-directory for serving static files",
multiple=True,
)
@click.option(
"--limit",
type=Limit(),
help="Set a limit using limitname:integer datasette.readthedocs.io/en/latest/limits.html",
multiple=True,
)
def serve(
files,
host,
@ -413,14 +432,13 @@ def serve(
reload,
cors,
page_size,
max_returned_rows,
sql_time_limit_ms,
sqlite_extensions,
inspect_file,
metadata,
template_dir,
plugins_dir,
static,
limit,
):
"""Serve up specified SQLite database files with a web UI"""
if reload:
@ -444,14 +462,13 @@ def serve(
cache_headers=not debug and not reload,
cors=cors,
page_size=page_size,
max_returned_rows=max_returned_rows,
sql_time_limit_ms=sql_time_limit_ms,
inspect_data=inspect_data,
metadata=metadata_data,
sqlite_extensions=sqlite_extensions,
template_dir=template_dir,
plugins_dir=plugins_dir,
static_mounts=static,
limits=dict(limit),
)
# Force initial hashing/table counting
ds.inspect()

View file

@ -536,7 +536,7 @@ class TableView(RowTableShared):
)
# facets support
FACET_SIZE = 20
facet_size = self.ds.limits["default_facet_size"]
metadata_facets = table_metadata.get("facets", [])
facets = metadata_facets[:]
try:
@ -553,20 +553,21 @@ class TableView(RowTableShared):
col=escape_sqlite(column),
from_sql=from_sql,
and_or_where='and' if where_clauses else 'where',
limit=FACET_SIZE+1,
limit=facet_size+1,
)
try:
facet_rows = await self.execute(
name, facet_sql, params,
truncate=False, custom_time_limit=200
truncate=False,
custom_time_limit=self.ds.limits["facet_time_limit_ms"],
)
facet_results_values = []
facet_results[column] = {
"name": column,
"results": facet_results_values,
"truncated": len(facet_rows) > FACET_SIZE,
"truncated": len(facet_rows) > facet_size,
}
facet_rows = facet_rows[:FACET_SIZE]
facet_rows = facet_rows[:facet_size]
# Attempt to expand foreign keys into labels
values = [row["value"] for row in facet_rows]
expanded = (await self.expand_foreign_keys(
@ -644,7 +645,6 @@ class TableView(RowTableShared):
pass
# Detect suggested facets
FACET_LIMIT = 30
suggested_facets = []
for facet_column in columns:
if facet_column in facets:
@ -657,19 +657,20 @@ class TableView(RowTableShared):
column=escape_sqlite(facet_column),
from_sql=from_sql,
and_or_where='and' if where_clauses else 'where',
limit=FACET_LIMIT+1
limit=facet_size+1
)
distinct_values = None
try:
distinct_values = await self.execute(
name, suggested_facet_sql, params,
truncate=False, custom_time_limit=50
truncate=False,
custom_time_limit=self.ds.limits["facet_suggest_time_limit_ms"],
)
num_distinct_values = len(distinct_values)
if (
num_distinct_values and
num_distinct_values > 1 and
num_distinct_values <= FACET_LIMIT and
num_distinct_values <= facet_size and
num_distinct_values < filtered_table_rows_count
):
suggested_facets.append({