mirror of
https://github.com/simonw/datasette.git
synced 2025-12-10 16:51:24 +01:00
Consider just 1000 rows for suggest facet, closes #2406
This commit is contained in:
parent
8a63cdccc7
commit
f28ff8e4f0
2 changed files with 65 additions and 19 deletions
|
|
@ -65,6 +65,8 @@ def register_facet_classes():
|
||||||
|
|
||||||
class Facet:
|
class Facet:
|
||||||
type = None
|
type = None
|
||||||
|
# How many rows to consider when suggesting facets:
|
||||||
|
suggest_consider = 1000
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -145,17 +147,6 @@ class Facet:
|
||||||
)
|
)
|
||||||
).columns
|
).columns
|
||||||
|
|
||||||
async def get_row_count(self):
|
|
||||||
if self.row_count is None:
|
|
||||||
self.row_count = (
|
|
||||||
await self.ds.execute(
|
|
||||||
self.database,
|
|
||||||
f"select count(*) from ({self.sql})",
|
|
||||||
self.params,
|
|
||||||
)
|
|
||||||
).rows[0][0]
|
|
||||||
return self.row_count
|
|
||||||
|
|
||||||
|
|
||||||
class ColumnFacet(Facet):
|
class ColumnFacet(Facet):
|
||||||
type = "column"
|
type = "column"
|
||||||
|
|
@ -170,13 +161,16 @@ class ColumnFacet(Facet):
|
||||||
if column in already_enabled:
|
if column in already_enabled:
|
||||||
continue
|
continue
|
||||||
suggested_facet_sql = """
|
suggested_facet_sql = """
|
||||||
select {column} as value, count(*) as n from (
|
with limited as (select * from ({sql}) limit {suggest_consider})
|
||||||
{sql}
|
select {column} as value, count(*) as n from limited
|
||||||
) where value is not null
|
where value is not null
|
||||||
group by value
|
group by value
|
||||||
limit {limit}
|
limit {limit}
|
||||||
""".format(
|
""".format(
|
||||||
column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
|
column=escape_sqlite(column),
|
||||||
|
sql=self.sql,
|
||||||
|
limit=facet_size + 1,
|
||||||
|
suggest_consider=self.suggest_consider,
|
||||||
)
|
)
|
||||||
distinct_values = None
|
distinct_values = None
|
||||||
try:
|
try:
|
||||||
|
|
@ -211,6 +205,17 @@ class ColumnFacet(Facet):
|
||||||
continue
|
continue
|
||||||
return suggested_facets
|
return suggested_facets
|
||||||
|
|
||||||
|
async def get_row_count(self):
|
||||||
|
if self.row_count is None:
|
||||||
|
self.row_count = (
|
||||||
|
await self.ds.execute(
|
||||||
|
self.database,
|
||||||
|
f"select count(*) from (select * from ({self.sql}) limit {self.suggest_consider})",
|
||||||
|
self.params,
|
||||||
|
)
|
||||||
|
).rows[0][0]
|
||||||
|
return self.row_count
|
||||||
|
|
||||||
async def facet_results(self):
|
async def facet_results(self):
|
||||||
facet_results = []
|
facet_results = []
|
||||||
facets_timed_out = []
|
facets_timed_out = []
|
||||||
|
|
@ -313,11 +318,14 @@ class ArrayFacet(Facet):
|
||||||
continue
|
continue
|
||||||
# Is every value in this column either null or a JSON array?
|
# Is every value in this column either null or a JSON array?
|
||||||
suggested_facet_sql = """
|
suggested_facet_sql = """
|
||||||
|
with limited as (select * from ({sql}) limit {suggest_consider})
|
||||||
select distinct json_type({column})
|
select distinct json_type({column})
|
||||||
from ({sql})
|
from limited
|
||||||
where {column} is not null and {column} != ''
|
where {column} is not null and {column} != ''
|
||||||
""".format(
|
""".format(
|
||||||
column=escape_sqlite(column), sql=self.sql
|
column=escape_sqlite(column),
|
||||||
|
sql=self.sql,
|
||||||
|
suggest_consider=self.suggest_consider,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
results = await self.ds.execute(
|
results = await self.ds.execute(
|
||||||
|
|
@ -402,7 +410,9 @@ class ArrayFacet(Facet):
|
||||||
order by
|
order by
|
||||||
count(*) desc, value limit {limit}
|
count(*) desc, value limit {limit}
|
||||||
""".format(
|
""".format(
|
||||||
col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
|
col=escape_sqlite(column),
|
||||||
|
sql=self.sql,
|
||||||
|
limit=facet_size + 1,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
facet_rows_results = await self.ds.execute(
|
facet_rows_results = await self.ds.execute(
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from datasette.app import Datasette
|
from datasette.app import Datasette
|
||||||
from datasette.database import Database
|
from datasette.database import Database
|
||||||
from datasette.facets import ColumnFacet, ArrayFacet, DateFacet
|
from datasette.facets import Facet, ColumnFacet, ArrayFacet, DateFacet
|
||||||
from datasette.utils.asgi import Request
|
from datasette.utils.asgi import Request
|
||||||
from datasette.utils import detect_json1
|
from datasette.utils import detect_json1
|
||||||
from .fixtures import make_app_client
|
from .fixtures import make_app_client
|
||||||
|
|
@ -662,3 +662,39 @@ async def test_facet_against_in_memory_database():
|
||||||
assert response1.status_code == 200
|
assert response1.status_code == 200
|
||||||
response2 = await ds.client.get("/mem/t?_facet=name&_facet=name2")
|
response2 = await ds.client.get("/mem/t?_facet=name&_facet=name2")
|
||||||
assert response2.status_code == 200
|
assert response2.status_code == 200
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_facet_only_considers_first_x_rows():
|
||||||
|
# This test works by manually fiddling with Facet.suggest_consider
|
||||||
|
ds = Datasette()
|
||||||
|
original_suggest_consider = Facet.suggest_consider
|
||||||
|
try:
|
||||||
|
Facet.suggest_consider = 40
|
||||||
|
db = ds.add_memory_database("test_facet_only_x_rows")
|
||||||
|
await db.execute_write("create table t (id integer primary key, col text)")
|
||||||
|
# First 50 rows make it look like col and col_json should be faceted
|
||||||
|
to_insert = [{"col": "one" if i % 2 else "two"} for i in range(50)]
|
||||||
|
await db.execute_write_many("insert into t (col) values (:col)", to_insert)
|
||||||
|
# Next 50 break that assumption
|
||||||
|
to_insert2 = [{"col": f"x{i}"} for i in range(50)]
|
||||||
|
await db.execute_write_many("insert into t (col) values (:col)", to_insert2)
|
||||||
|
response = await ds.client.get(
|
||||||
|
"/test_facet_only_x_rows/t.json?_extra=suggested_facets"
|
||||||
|
)
|
||||||
|
data = response.json()
|
||||||
|
assert data["suggested_facets"] == [
|
||||||
|
{
|
||||||
|
"name": "col",
|
||||||
|
"toggle_url": "http://localhost/test_facet_only_x_rows/t.json?_extra=suggested_facets&_facet=col",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# But if we set suggest_consider to 100 they are not suggested
|
||||||
|
Facet.suggest_consider = 100
|
||||||
|
response2 = await ds.client.get(
|
||||||
|
"/test_facet_only_x_rows/t.json?_extra=suggested_facets"
|
||||||
|
)
|
||||||
|
data2 = response2.json()
|
||||||
|
assert data2["suggested_facets"] == []
|
||||||
|
finally:
|
||||||
|
Facet.suggest_consider = original_suggest_consider
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue