mirror of
https://github.com/simonw/datasette.git
synced 2025-12-10 16:51:24 +01:00
Consider just 1000 rows for suggest facet, closes #2406
This commit is contained in:
parent
8a63cdccc7
commit
f28ff8e4f0
2 changed files with 65 additions and 19 deletions
|
|
@ -65,6 +65,8 @@ def register_facet_classes():
|
|||
|
||||
class Facet:
|
||||
type = None
|
||||
# How many rows to consider when suggesting facets:
|
||||
suggest_consider = 1000
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -145,17 +147,6 @@ class Facet:
|
|||
)
|
||||
).columns
|
||||
|
||||
async def get_row_count(self):
|
||||
if self.row_count is None:
|
||||
self.row_count = (
|
||||
await self.ds.execute(
|
||||
self.database,
|
||||
f"select count(*) from ({self.sql})",
|
||||
self.params,
|
||||
)
|
||||
).rows[0][0]
|
||||
return self.row_count
|
||||
|
||||
|
||||
class ColumnFacet(Facet):
|
||||
type = "column"
|
||||
|
|
@ -170,13 +161,16 @@ class ColumnFacet(Facet):
|
|||
if column in already_enabled:
|
||||
continue
|
||||
suggested_facet_sql = """
|
||||
select {column} as value, count(*) as n from (
|
||||
{sql}
|
||||
) where value is not null
|
||||
with limited as (select * from ({sql}) limit {suggest_consider})
|
||||
select {column} as value, count(*) as n from limited
|
||||
where value is not null
|
||||
group by value
|
||||
limit {limit}
|
||||
""".format(
|
||||
column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
|
||||
column=escape_sqlite(column),
|
||||
sql=self.sql,
|
||||
limit=facet_size + 1,
|
||||
suggest_consider=self.suggest_consider,
|
||||
)
|
||||
distinct_values = None
|
||||
try:
|
||||
|
|
@ -211,6 +205,17 @@ class ColumnFacet(Facet):
|
|||
continue
|
||||
return suggested_facets
|
||||
|
||||
async def get_row_count(self):
|
||||
if self.row_count is None:
|
||||
self.row_count = (
|
||||
await self.ds.execute(
|
||||
self.database,
|
||||
f"select count(*) from (select * from ({self.sql}) limit {self.suggest_consider})",
|
||||
self.params,
|
||||
)
|
||||
).rows[0][0]
|
||||
return self.row_count
|
||||
|
||||
async def facet_results(self):
|
||||
facet_results = []
|
||||
facets_timed_out = []
|
||||
|
|
@ -313,11 +318,14 @@ class ArrayFacet(Facet):
|
|||
continue
|
||||
# Is every value in this column either null or a JSON array?
|
||||
suggested_facet_sql = """
|
||||
with limited as (select * from ({sql}) limit {suggest_consider})
|
||||
select distinct json_type({column})
|
||||
from ({sql})
|
||||
from limited
|
||||
where {column} is not null and {column} != ''
|
||||
""".format(
|
||||
column=escape_sqlite(column), sql=self.sql
|
||||
column=escape_sqlite(column),
|
||||
sql=self.sql,
|
||||
suggest_consider=self.suggest_consider,
|
||||
)
|
||||
try:
|
||||
results = await self.ds.execute(
|
||||
|
|
@ -402,7 +410,9 @@ class ArrayFacet(Facet):
|
|||
order by
|
||||
count(*) desc, value limit {limit}
|
||||
""".format(
|
||||
col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
|
||||
col=escape_sqlite(column),
|
||||
sql=self.sql,
|
||||
limit=facet_size + 1,
|
||||
)
|
||||
try:
|
||||
facet_rows_results = await self.ds.execute(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from datasette.app import Datasette
|
||||
from datasette.database import Database
|
||||
from datasette.facets import ColumnFacet, ArrayFacet, DateFacet
|
||||
from datasette.facets import Facet, ColumnFacet, ArrayFacet, DateFacet
|
||||
from datasette.utils.asgi import Request
|
||||
from datasette.utils import detect_json1
|
||||
from .fixtures import make_app_client
|
||||
|
|
@ -662,3 +662,39 @@ async def test_facet_against_in_memory_database():
|
|||
assert response1.status_code == 200
|
||||
response2 = await ds.client.get("/mem/t?_facet=name&_facet=name2")
|
||||
assert response2.status_code == 200
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_facet_only_considers_first_x_rows():
|
||||
# This test works by manually fiddling with Facet.suggest_consider
|
||||
ds = Datasette()
|
||||
original_suggest_consider = Facet.suggest_consider
|
||||
try:
|
||||
Facet.suggest_consider = 40
|
||||
db = ds.add_memory_database("test_facet_only_x_rows")
|
||||
await db.execute_write("create table t (id integer primary key, col text)")
|
||||
# First 50 rows make it look like col and col_json should be faceted
|
||||
to_insert = [{"col": "one" if i % 2 else "two"} for i in range(50)]
|
||||
await db.execute_write_many("insert into t (col) values (:col)", to_insert)
|
||||
# Next 50 break that assumption
|
||||
to_insert2 = [{"col": f"x{i}"} for i in range(50)]
|
||||
await db.execute_write_many("insert into t (col) values (:col)", to_insert2)
|
||||
response = await ds.client.get(
|
||||
"/test_facet_only_x_rows/t.json?_extra=suggested_facets"
|
||||
)
|
||||
data = response.json()
|
||||
assert data["suggested_facets"] == [
|
||||
{
|
||||
"name": "col",
|
||||
"toggle_url": "http://localhost/test_facet_only_x_rows/t.json?_extra=suggested_facets&_facet=col",
|
||||
}
|
||||
]
|
||||
# But if we set suggest_consider to 100 they are not suggested
|
||||
Facet.suggest_consider = 100
|
||||
response2 = await ds.client.get(
|
||||
"/test_facet_only_x_rows/t.json?_extra=suggested_facets"
|
||||
)
|
||||
data2 = response2.json()
|
||||
assert data2["suggested_facets"] == []
|
||||
finally:
|
||||
Facet.suggest_consider = original_suggest_consider
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue