_facet_array no longer confused by duplicate array items, closes #448

This commit is contained in:
Simon Willison 2021-11-15 17:19:33 -08:00
commit 55024b5301
2 changed files with 77 additions and 4 deletions

View file

@ -354,11 +354,26 @@ class ArrayFacet(Facet):
config = source_and_config["config"]
source = source_and_config["source"]
column = config.get("column") or config["simple"]
# https://github.com/simonw/datasette/issues/448
facet_sql = """
select j.value as value, count(*) as count from (
{sql}
) join json_each({col}) j
group by j.value order by count desc, value limit {limit}
with inner as ({sql}),
deduped_array_items as (
select
distinct j.value,
inner.*
from
json_each([inner].{col}) j
join inner
)
select
value as value,
count(*) as count
from
deduped_array_items
group by
value
order by
count(*) desc limit {limit}
""".format(
col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
)

View file

@ -4,6 +4,7 @@ from datasette.facets import ColumnFacet, ArrayFacet, DateFacet
from datasette.utils.asgi import Request
from datasette.utils import detect_json1
from .fixtures import app_client # noqa
import json
import pytest
@ -402,6 +403,63 @@ async def test_array_facet_results(app_client):
} == buckets
@pytest.mark.asyncio
@pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module")
async def test_array_facet_handle_duplicate_tags():
ds = Datasette([], memory=True)
db = ds.add_database(Database(ds, memory_name="test_array_facet"))
await db.execute_write("create table otters(name text, tags text)", block=True)
for name, tags in (
("Charles", ["friendly", "cunning", "friendly"]),
("Shaun", ["cunning", "empathetic", "friendly"]),
("Tracy", ["empathetic", "eager"]),
):
await db.execute_write(
"insert into otters (name, tags) values (?, ?)",
[name, json.dumps(tags)],
block=True,
)
response = await ds.client.get("/test_array_facet/otters.json?_facet_array=tags")
assert response.json()["facet_results"]["tags"] == {
"name": "tags",
"type": "array",
"results": [
{
"value": "cunning",
"label": "cunning",
"count": 2,
"toggle_url": "http://localhost/test_array_facet/otters.json?_facet_array=tags&tags__arraycontains=cunning",
"selected": False,
},
{
"value": "empathetic",
"label": "empathetic",
"count": 2,
"toggle_url": "http://localhost/test_array_facet/otters.json?_facet_array=tags&tags__arraycontains=empathetic",
"selected": False,
},
{
"value": "friendly",
"label": "friendly",
"count": 2,
"toggle_url": "http://localhost/test_array_facet/otters.json?_facet_array=tags&tags__arraycontains=friendly",
"selected": False,
},
{
"value": "eager",
"label": "eager",
"count": 1,
"toggle_url": "http://localhost/test_array_facet/otters.json?_facet_array=tags&tags__arraycontains=eager",
"selected": False,
},
],
"hideable": True,
"toggle_url": "/test_array_facet/otters.json",
"truncated": False,
}
@pytest.mark.asyncio
async def test_date_facet_results(app_client):
facet = DateFacet(