Consider just 1000 rows for suggest facet, closes #2406

This commit is contained in:
Simon Willison 2024-08-21 13:36:42 -07:00
commit f28ff8e4f0
2 changed files with 65 additions and 19 deletions

View file

@ -65,6 +65,8 @@ def register_facet_classes():
class Facet: class Facet:
type = None type = None
# How many rows to consider when suggesting facets:
suggest_consider = 1000
def __init__( def __init__(
self, self,
@ -145,17 +147,6 @@ class Facet:
) )
).columns ).columns
async def get_row_count(self):
if self.row_count is None:
self.row_count = (
await self.ds.execute(
self.database,
f"select count(*) from ({self.sql})",
self.params,
)
).rows[0][0]
return self.row_count
class ColumnFacet(Facet): class ColumnFacet(Facet):
type = "column" type = "column"
@ -170,13 +161,16 @@ class ColumnFacet(Facet):
if column in already_enabled: if column in already_enabled:
continue continue
suggested_facet_sql = """ suggested_facet_sql = """
select {column} as value, count(*) as n from ( with limited as (select * from ({sql}) limit {suggest_consider})
{sql} select {column} as value, count(*) as n from limited
) where value is not null where value is not null
group by value group by value
limit {limit} limit {limit}
""".format( """.format(
column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 column=escape_sqlite(column),
sql=self.sql,
limit=facet_size + 1,
suggest_consider=self.suggest_consider,
) )
distinct_values = None distinct_values = None
try: try:
@ -211,6 +205,17 @@ class ColumnFacet(Facet):
continue continue
return suggested_facets return suggested_facets
async def get_row_count(self):
if self.row_count is None:
self.row_count = (
await self.ds.execute(
self.database,
f"select count(*) from (select * from ({self.sql}) limit {self.suggest_consider})",
self.params,
)
).rows[0][0]
return self.row_count
async def facet_results(self): async def facet_results(self):
facet_results = [] facet_results = []
facets_timed_out = [] facets_timed_out = []
@ -313,11 +318,14 @@ class ArrayFacet(Facet):
continue continue
# Is every value in this column either null or a JSON array? # Is every value in this column either null or a JSON array?
suggested_facet_sql = """ suggested_facet_sql = """
with limited as (select * from ({sql}) limit {suggest_consider})
select distinct json_type({column}) select distinct json_type({column})
from ({sql}) from limited
where {column} is not null and {column} != '' where {column} is not null and {column} != ''
""".format( """.format(
column=escape_sqlite(column), sql=self.sql column=escape_sqlite(column),
sql=self.sql,
suggest_consider=self.suggest_consider,
) )
try: try:
results = await self.ds.execute( results = await self.ds.execute(
@ -402,7 +410,9 @@ class ArrayFacet(Facet):
order by order by
count(*) desc, value limit {limit} count(*) desc, value limit {limit}
""".format( """.format(
col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 col=escape_sqlite(column),
sql=self.sql,
limit=facet_size + 1,
) )
try: try:
facet_rows_results = await self.ds.execute( facet_rows_results = await self.ds.execute(

View file

@ -1,6 +1,6 @@
from datasette.app import Datasette from datasette.app import Datasette
from datasette.database import Database from datasette.database import Database
from datasette.facets import ColumnFacet, ArrayFacet, DateFacet from datasette.facets import Facet, ColumnFacet, ArrayFacet, DateFacet
from datasette.utils.asgi import Request from datasette.utils.asgi import Request
from datasette.utils import detect_json1 from datasette.utils import detect_json1
from .fixtures import make_app_client from .fixtures import make_app_client
@ -662,3 +662,39 @@ async def test_facet_against_in_memory_database():
assert response1.status_code == 200 assert response1.status_code == 200
response2 = await ds.client.get("/mem/t?_facet=name&_facet=name2") response2 = await ds.client.get("/mem/t?_facet=name&_facet=name2")
assert response2.status_code == 200 assert response2.status_code == 200
@pytest.mark.asyncio
async def test_facet_only_considers_first_x_rows():
# This test works by manually fiddling with Facet.suggest_consider
ds = Datasette()
original_suggest_consider = Facet.suggest_consider
try:
Facet.suggest_consider = 40
db = ds.add_memory_database("test_facet_only_x_rows")
await db.execute_write("create table t (id integer primary key, col text)")
# First 50 rows make it look like col and col_json should be faceted
to_insert = [{"col": "one" if i % 2 else "two"} for i in range(50)]
await db.execute_write_many("insert into t (col) values (:col)", to_insert)
# Next 50 break that assumption
to_insert2 = [{"col": f"x{i}"} for i in range(50)]
await db.execute_write_many("insert into t (col) values (:col)", to_insert2)
response = await ds.client.get(
"/test_facet_only_x_rows/t.json?_extra=suggested_facets"
)
data = response.json()
assert data["suggested_facets"] == [
{
"name": "col",
"toggle_url": "http://localhost/test_facet_only_x_rows/t.json?_extra=suggested_facets&_facet=col",
}
]
# But if we set suggest_consider to 100 they are not suggested
Facet.suggest_consider = 100
response2 = await ds.client.get(
"/test_facet_only_x_rows/t.json?_extra=suggested_facets"
)
data2 = response2.json()
assert data2["suggested_facets"] == []
finally:
Facet.suggest_consider = original_suggest_consider