From 751abbcc57c2b80275a9d507149dfe829a00493b Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Wed, 14 Aug 2024 15:39:49 -0700 Subject: [PATCH 001/266] don't hide virtual table, hide shadow tables. --- datasette/database.py | 76 ++++++++++++++++++++------------ tests/test_api.py | 46 +++++++++---------- tests/test_html.py | 3 +- tests/test_internals_database.py | 42 ++++++++++++++++++ 4 files changed, 116 insertions(+), 51 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index 71c134d1..8b55f8f3 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -20,6 +20,7 @@ from .utils import ( table_columns, table_column_details, ) +from .utils.sqlite import sqlite_version from .inspect import inspect_hash connections = threading.local() @@ -459,22 +460,56 @@ class Database: ) async def hidden_table_names(self): - # Mark tables 'hidden' if they relate to FTS virtual tables - hidden_tables = [ - r[0] - for r in ( - await self.execute( + hidden_tables = [] + # Add any tables marked as hidden in config + db_config = self.ds.config.get("databases", {}).get(self.name, {}) + if "tables" in db_config: + hidden_tables += [ + t for t in db_config["tables"] if db_config["tables"][t].get("hidden") + ] + + if sqlite_version()[1] >= 37: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + with shadow_tables as ( + select name + from pragma_table_list + where [type] = 'shadow' + order by name + ), + core_tables as ( + select name + from sqlite_master + WHERE name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + combined as ( + select name from shadow_tables + union all + select name from core_tables + ) + select name from combined order by 1 """ - select name from sqlite_master - where rootpage = 0 - and ( - sql like '%VIRTUAL TABLE%USING FTS%' - ) or name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') - or name like '\\_%' escape '\\' - """ ) - ).rows - ] + ] + else: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + with final as ( + select name + from sqlite_master + WHERE name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + select name from final order by 1 + """ + ) + ] + has_spatialite = await self.execute_fn(detect_spatialite) if has_spatialite: # Also hide Spatialite internal tables @@ -503,19 +538,6 @@ class Database: ) ).rows ] - # Add any tables marked as hidden in config - db_config = self.ds.config.get("databases", {}).get(self.name, {}) - if "tables" in db_config: - hidden_tables += [ - t for t in db_config["tables"] if db_config["tables"][t].get("hidden") - ] - # Also mark as hidden any tables which start with the name of a hidden table - # e.g. "searchable_fts" implies "searchable_fts_content" should be hidden - for table_name in await self.table_names(): - for hidden_table in hidden_tables[:]: - if table_name.startswith(hidden_table): - hidden_tables.append(table_name) - continue return hidden_tables diff --git a/tests/test_api.py b/tests/test_api.py index 431ab5ce..01c9bb79 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -389,6 +389,29 @@ async def test_database_page(ds_client): }, "private": False, }, + { + "name": "searchable_fts", + "columns": [ + "text1", + "text2", + "name with . and spaces", + ] + + ( + [ + "searchable_fts", + "docid", + "__langid", + ] + if supports_table_xinfo() + else [] + ), + "primary_keys": [], + "count": 2, + "hidden": False, + "fts_table": "searchable_fts", + "foreign_keys": {"incoming": [], "outgoing": []}, + "private": False, + }, { "name": "searchable_tags", "columns": ["searchable_id", "tag"], @@ -525,29 +548,6 @@ async def test_database_page(ds_client): "foreign_keys": {"incoming": [], "outgoing": []}, "private": False, }, - { - "name": "searchable_fts", - "columns": [ - "text1", - "text2", - "name with . and spaces", - ] - + ( - [ - "searchable_fts", - "docid", - "__langid", - ] - if supports_table_xinfo() - else [] - ), - "primary_keys": [], - "count": 2, - "hidden": True, - "fts_table": "searchable_fts", - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, { "name": "searchable_fts_docsize", "columns": ["docid", "size"], diff --git a/tests/test_html.py b/tests/test_html.py index 5b60d2f5..735a7ef7 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -39,13 +39,14 @@ def test_homepage(app_client_two_attached_databases): assert "extra database" == h2.text.strip() counts_p, links_p = h2.find_all_next("p")[:2] assert ( - "2 rows in 1 table, 5 rows in 4 hidden tables, 1 view" == counts_p.text.strip() + "4 rows in 2 tables, 3 rows in 3 hidden tables, 1 view" == counts_p.text.strip() ) # We should only show visible, not hidden tables here: table_links = [ {"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a") ] assert [ + {"href": r"/extra+database/searchable_fts", "text": "searchable_fts"}, {"href": r"/extra+database/searchable", "text": "searchable"}, {"href": r"/extra+database/searchable_view", "text": "searchable_view"}, ] == table_links diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 1c155cf3..70be0f4e 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -664,3 +664,45 @@ async def test_in_memory_databases_forbid_writes(app_client): # Using db.execute_write() should work: await db.execute_write("create table foo (t text)") assert await db.table_names() == ["foo"] + + +@pytest.mark.asyncio +async def test_hidden_tables(app_client): + ds = app_client.ds + db = ds.add_database(Database(ds, is_memory=True, is_mutable=True)) + assert await db.hidden_table_names() == [] + await db.execute("create virtual table f using fts5(a)") + assert await db.hidden_table_names() == [ + 'f_config', + 'f_content', + 'f_data', + 'f_docsize', + 'f_idx', + ] + + await db.execute("create virtual table r using rtree(id, amin, amax)") + assert await db.hidden_table_names() == [ + 'f_config', + 'f_content', + 'f_data', + 'f_docsize', + 'f_idx', + 'r_node', + 'r_parent', + 'r_rowid' + ] + + await db.execute("create table _hideme(_)") + assert await db.hidden_table_names() == [ + '_hideme', + 'f_config', + 'f_content', + 'f_data', + 'f_docsize', + 'f_idx', + 'r_node', + 'r_parent', + 'r_rowid' + ] + + From 86c5203451fa4d10fafc88c02b9b1b8c4e652112 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Wed, 14 Aug 2024 15:45:22 -0700 Subject: [PATCH 002/266] fmt --- tests/test_internals_database.py | 52 +++++++++++++++----------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 70be0f4e..2e591cd0 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -673,36 +673,34 @@ async def test_hidden_tables(app_client): assert await db.hidden_table_names() == [] await db.execute("create virtual table f using fts5(a)") assert await db.hidden_table_names() == [ - 'f_config', - 'f_content', - 'f_data', - 'f_docsize', - 'f_idx', - ] + "f_config", + "f_content", + "f_data", + "f_docsize", + "f_idx", + ] await db.execute("create virtual table r using rtree(id, amin, amax)") assert await db.hidden_table_names() == [ - 'f_config', - 'f_content', - 'f_data', - 'f_docsize', - 'f_idx', - 'r_node', - 'r_parent', - 'r_rowid' - ] + "f_config", + "f_content", + "f_data", + "f_docsize", + "f_idx", + "r_node", + "r_parent", + "r_rowid", + ] await db.execute("create table _hideme(_)") assert await db.hidden_table_names() == [ - '_hideme', - 'f_config', - 'f_content', - 'f_data', - 'f_docsize', - 'f_idx', - 'r_node', - 'r_parent', - 'r_rowid' - ] - - + "_hideme", + "f_config", + "f_content", + "f_data", + "f_docsize", + "f_idx", + "r_node", + "r_parent", + "r_rowid", + ] From 93067668fe3cba5576fe430c1e547635989ebb9a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 14 Aug 2024 17:57:13 -0700 Subject: [PATCH 003/266] /-/ alternative URL for homepage, closes #2393 --- datasette/app.py | 2 ++ datasette/templates/index.html | 4 ++++ datasette/views/index.py | 4 +++- docs/pages.rst | 2 ++ tests/test_html.py | 22 ++++++++++++++++++++++ 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/datasette/app.py b/datasette/app.py index 1f9e9d30..8f69ee98 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1476,6 +1476,8 @@ class Datasette: routes.append((regex, view)) add_route(IndexView.as_view(self), r"/(\.(?Pjsono?))?$") + add_route(IndexView.as_view(self), r"/-/(\.(?Pjsono?))?$") + add_route(permanent_redirect("/-/"), r"/-$") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires add_route(favicon, "/favicon.ico") diff --git a/datasette/templates/index.html b/datasette/templates/index.html index 6e95126d..a3595a39 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -2,6 +2,10 @@ {% block title %}{{ metadata.title or "Datasette" }}: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %} +{% block extra_head %} +{% if noindex %}{% endif %} +{% endblock %} + {% block body_class %}index{% endblock %} {% block content %} diff --git a/datasette/views/index.py b/datasette/views/index.py index a3178f53..63cc067d 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -152,8 +152,9 @@ class IndexView(BaseView): extra_links = await await_me_maybe(hook) if extra_links: homepage_actions.extend(extra_links) + alternative_homepage = request.path == "/-/" return await self.render( - ["index.html"], + ["default:index.html" if alternative_homepage else "index.html"], request=request, context={ "databases": databases, @@ -166,5 +167,6 @@ class IndexView(BaseView): "top_homepage", self.ds, request ), "homepage_actions": homepage_actions, + "noindex": request.path == "/-/", }, ) diff --git a/docs/pages.rst b/docs/pages.rst index 239c9f80..78d5520f 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -23,6 +23,8 @@ Add ``/.json`` to the end of the URL for the JSON version of the underlying data * `global-power-plants.datasettes.com/.json `_ * `register-of-members-interests.datasettes.com/.json `_ +The index page can also be accessed at ``/-/``, useful for if the default index page has been replaced using an :ref:`index.html custom template `. The ``/-/`` page will always render the default Datasette ``index.html`` template. + .. _DatabaseView: Database diff --git a/tests/test_html.py b/tests/test_html.py index 5b60d2f5..d648bdf0 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup as Soup +from datasette.app import Datasette from datasette.utils import allowed_pragmas from .fixtures import ( # noqa app_client, @@ -51,6 +52,27 @@ def test_homepage(app_client_two_attached_databases): ] == table_links +@pytest.mark.asyncio +@pytest.mark.parametrize("path", ("/", "/-/")) +async def test_homepage_alternative_location(path, tmp_path_factory): + template_dir = tmp_path_factory.mktemp("templates") + (template_dir / "index.html").write_text("Custom homepage", "utf-8") + datasette = Datasette(template_dir=str(template_dir)) + response = await datasette.client.get(path) + assert response.status_code == 200 + html = response.text + if path == "/": + assert html == "Custom homepage" + else: + assert '' in html + + +@pytest.mark.asyncio +async def test_homepage_alternative_redirect(ds_client): + response = await ds_client.get("/-") + assert response.status_code == 301 + + @pytest.mark.asyncio async def test_http_head(ds_client): response = await ds_client.head("/") From 06d4ffb92e768fe7b088bb18281718dd5e42ad18 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 14 Aug 2024 21:29:16 -0700 Subject: [PATCH 004/266] Custom error on CSRF failures, closes #2390 Uses https://github.com/simonw/asgi-csrf/issues/28 --- datasette/app.py | 12 ++++++++++++ setup.py | 2 +- tests/test_html.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/datasette/app.py b/datasette/app.py index 8f69ee98..1363bc5c 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,3 +1,4 @@ +from asgi_csrf import Errors import asyncio from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union import asgi_csrf @@ -1657,6 +1658,16 @@ class Datasette: if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) + async def custom_csrf_error(scope, send, message_id): + await asgi_send( + send, + await self.render_template( + "csrf_error.html", + {"message_id": message_id, "message_name": Errors(message_id).name}, + ), + 403, + ) + asgi = asgi_csrf.asgi_csrf( DatasetteRouter(self, routes), signing_secret=self._secret, @@ -1664,6 +1675,7 @@ class Datasette: skip_if_scope=lambda scope: any( pm.hook.skip_csrf(datasette=self, scope=scope) ), + send_csrf_failed=custom_csrf_error, ) if self.setting("trace_debug"): asgi = AsgiTracer(asgi) diff --git a/setup.py b/setup.py index c69404f8..923bc826 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ setup( "uvicorn>=0.11", "aiofiles>=0.4", "janus>=0.6.2", - "asgi-csrf>=0.9", + "asgi-csrf>=0.10", "PyYAML>=5.3", "mergedeep>=1.1.1", "itsdangerous>=1.1", diff --git a/tests/test_html.py b/tests/test_html.py index d648bdf0..c559f0c2 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1,3 +1,4 @@ +from asgi_csrf import Errors from bs4 import BeautifulSoup as Soup from datasette.app import Datasette from datasette.utils import allowed_pragmas @@ -1158,3 +1159,16 @@ async def test_database_color(ds_client): pdb.set_trace() assert any(fragment in response.text for fragment in expected_fragments) + + +@pytest.mark.asyncio +async def test_custom_csrf_error(ds_client): + response = await ds_client.post( + "/-/messages", + data={ + "message": "A message", + }, + cookies={"csrftoken": "x"}, + ) + assert response.status_code == 403 + assert "Error code is FORM_URLENCODED_MISMATCH." in response.text From e9d34a99b84762bef4ed6a5dafd86e18b507b32d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 14 Aug 2024 21:32:57 -0700 Subject: [PATCH 005/266] Missing template from previous commit, refs #2389 --- datasette/templates/csrf_error.html | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 datasette/templates/csrf_error.html diff --git a/datasette/templates/csrf_error.html b/datasette/templates/csrf_error.html new file mode 100644 index 00000000..7cd4b42b --- /dev/null +++ b/datasette/templates/csrf_error.html @@ -0,0 +1,13 @@ +{% extends "base.html" %} +{% block title %}CSRF check failed){% endblock %} +{% block content %} +

Form origin check failed

+ +

Your request's origin could not be validated. Please return to the form and submit it again.

+ +
Technical details +

Developers: consult Datasette's CSRF protection documentation.

+

Error code is {{ message_name }}.

+
+ +{% endblock %} From cf4274f2a3a0e6cddfdd7fe03f526cf9fe8b21a4 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Wed, 14 Aug 2024 21:33:58 -0700 Subject: [PATCH 006/266] less strict requirements to content-type=application/json (#2392) --- datasette/views/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index fa2c80de..ba0dd4f3 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -364,7 +364,7 @@ class TableInsertView(BaseView): def _errors(errors): return None, errors, {} - if request.headers.get("content-type") != "application/json": + if not request.headers.get("content-type").startswith("application/json"): # TODO: handle form-encoded data return _errors(["Invalid content-type, must be application/json"]) body = await request.post_body() From 492378c2a081fedabc1e2fd26bcc0b4ed50f2f83 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 14 Aug 2024 21:37:40 -0700 Subject: [PATCH 007/266] Test for application/json; charset=utf-8 Refs #2384, #2392 --- tests/test_api_write.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/test_api_write.py b/tests/test_api_write.py index b442113b..9c2b9b45 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -37,12 +37,22 @@ def _headers(token): @pytest.mark.asyncio -async def test_insert_row(ds_write): +@pytest.mark.parametrize( + "content_type", + ( + "application/json", + "application/json; charset=utf-8", + ), +) +async def test_insert_row(ds_write, content_type): token = write_token(ds_write) response = await ds_write.client.post( "/data/docs/-/insert", json={"row": {"title": "Test", "score": 1.2, "age": 5}}, - headers=_headers(token), + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": content_type, + }, ) expected_row = {"id": 1, "title": "Test", "score": 1.2, "age": 5} assert response.status_code == 201 From 160d82f06e670c4d3f780a382faf1e8e7b6263a8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 21:38:33 -0700 Subject: [PATCH 008/266] Bump furo and black (#2385) Updates `furo` from 2024.7.18 to 2024.8.6 - [Release notes](https://github.com/pradyunsg/furo/releases) - [Changelog](https://github.com/pradyunsg/furo/blob/main/docs/changelog.md) - [Commits](https://github.com/pradyunsg/furo/compare/2024.07.18...2024.08.06) Updates `black` from 24.4.2 to 24.8.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/24.4.2...24.8.0) --- updated-dependencies: dependency-group: python-packages - dependency-name: furo dependency-type: direct:development update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: black dependency-type: direct:development update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] * Pin Sphinx==7.4.7 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Simon Willison --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 923bc826..22ec7963 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ setup( extras_require={ "docs": [ "Sphinx==7.4.7", - "furo==2024.7.18", + "furo==2024.8.6", "sphinx-autobuild", "codespell>=2.2.5", "blacken-docs", @@ -84,7 +84,7 @@ setup( "pytest-xdist>=2.2.1", "pytest-asyncio>=0.17", "beautifulsoup4>=4.8.1", - "black==24.4.2", + "black==24.8.0", "blacken-docs==1.18.0", "pytest-timeout>=1.4.2", "trustme>=0.7", From 05dfd34fd0dff34b64fb47e0dd1716c8bdbddfac Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Aug 2024 08:48:47 -0700 Subject: [PATCH 009/266] Use text/html for CSRF error page, refs #2390 --- datasette/app.py | 5 +++-- tests/test_html.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 1363bc5c..fa5e90e3 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1661,11 +1661,12 @@ class Datasette: async def custom_csrf_error(scope, send, message_id): await asgi_send( send, - await self.render_template( + content=await self.render_template( "csrf_error.html", {"message_id": message_id, "message_name": Errors(message_id).name}, ), - 403, + status=403, + content_type="text/html; charset=utf-8", ) asgi = asgi_csrf.asgi_csrf( diff --git a/tests/test_html.py b/tests/test_html.py index c559f0c2..ae270486 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1171,4 +1171,5 @@ async def test_custom_csrf_error(ds_client): cookies={"csrftoken": "x"}, ) assert response.status_code == 403 + assert response.headers["content-type"] == "text/html; charset=utf-8" assert "Error code is FORM_URLENCODED_MISMATCH." in response.text From 999b9f03539fc78f1f24d773a4259d0420b63519 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Thu, 15 Aug 2024 09:37:54 -0700 Subject: [PATCH 010/266] test only on SQLite 3.37 and above --- tests/test_internals_database.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 2e591cd0..0602e5cc 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -4,7 +4,7 @@ Tests for the datasette.database.Database class from datasette.app import Datasette from datasette.database import Database, Results, MultipleValues -from datasette.utils.sqlite import sqlite3 +from datasette.utils.sqlite import sqlite3, sqlite_version from datasette.utils import Column from .fixtures import app_client, app_client_two_attached_databases_crossdb_enabled import pytest @@ -666,7 +666,12 @@ async def test_in_memory_databases_forbid_writes(app_client): assert await db.table_names() == ["foo"] +def pragma_table_list_supported(): + return sqlite_version()[1] >= 37 + + @pytest.mark.asyncio +@pytest.mark.skipif(not pragma_table_list_supported(), reason="Requires PRAGMA table_list support") async def test_hidden_tables(app_client): ds = app_client.ds db = ds.add_database(Database(ds, is_memory=True, is_mutable=True)) From 6d91d082e0a7b1a275fe72549c7e132382986342 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Thu, 15 Aug 2024 13:19:22 -0700 Subject: [PATCH 011/266] Hide shadow tables, don't hide virtual tables Closes #2296 --- datasette/database.py | 115 +++++++++++++++++++++++-------- tests/test_api.py | 46 ++++++------- tests/test_html.py | 3 +- tests/test_internals_database.py | 49 ++++++++++++- 4 files changed, 161 insertions(+), 52 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index 71c134d1..8d51befd 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -20,6 +20,7 @@ from .utils import ( table_columns, table_column_details, ) +from .utils.sqlite import sqlite_version from .inspect import inspect_hash connections = threading.local() @@ -459,22 +460,95 @@ class Database: ) async def hidden_table_names(self): - # Mark tables 'hidden' if they relate to FTS virtual tables - hidden_tables = [ - r[0] - for r in ( - await self.execute( + hidden_tables = [] + # Add any tables marked as hidden in config + db_config = self.ds.config.get("databases", {}).get(self.name, {}) + if "tables" in db_config: + hidden_tables += [ + t for t in db_config["tables"] if db_config["tables"][t].get("hidden") + ] + + if sqlite_version()[1] >= 37: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + with shadow_tables as ( + select name + from pragma_table_list + where [type] = 'shadow' + order by name + ), + core_tables as ( + select name + from sqlite_master + WHERE name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + combined as ( + select name from shadow_tables + union all + select name from core_tables + ) + select name from combined order by 1 """ - select name from sqlite_master - where rootpage = 0 - and ( - sql like '%VIRTUAL TABLE%USING FTS%' - ) or name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') - or name like '\\_%' escape '\\' - """ ) - ).rows - ] + ] + else: + hidden_tables += [ + x[0] + for x in await self.execute( + """ + WITH base AS ( + SELECT name + FROM sqlite_master + WHERE name IN ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4') + OR substr(name, 1, 1) == '_' + ), + fts_suffixes AS ( + SELECT column1 AS suffix + FROM (VALUES ('_data'), ('_idx'), ('_docsize'), ('_content'), ('_config')) + ), + fts5_names AS ( + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%USING FTS%' + ), + fts5_shadow_tables AS ( + SELECT + printf('%s%s', fts5_names.name, fts_suffixes.suffix) AS name + FROM fts5_names + JOIN fts_suffixes + ), + fts3_suffixes AS ( + SELECT column1 AS suffix + FROM (VALUES ('_content'), ('_segdir'), ('_segments'), ('_stat'), ('_docsize')) + ), + fts3_names AS ( + SELECT name + FROM sqlite_master + WHERE sql LIKE '%VIRTUAL TABLE%USING FTS3%' + OR sql LIKE '%VIRTUAL TABLE%USING FTS4%' + ), + fts3_shadow_tables AS ( + SELECT + printf('%s%s', fts3_names.name, fts3_suffixes.suffix) AS name + FROM fts3_names + JOIN fts3_suffixes + ), + final AS ( + SELECT name FROM base + UNION ALL + SELECT name FROM fts5_shadow_tables + UNION ALL + SELECT name FROM fts3_shadow_tables + ) + SELECT name FROM final ORDER BY 1 + + """ + ) + ] + has_spatialite = await self.execute_fn(detect_spatialite) if has_spatialite: # Also hide Spatialite internal tables @@ -503,19 +577,6 @@ class Database: ) ).rows ] - # Add any tables marked as hidden in config - db_config = self.ds.config.get("databases", {}).get(self.name, {}) - if "tables" in db_config: - hidden_tables += [ - t for t in db_config["tables"] if db_config["tables"][t].get("hidden") - ] - # Also mark as hidden any tables which start with the name of a hidden table - # e.g. "searchable_fts" implies "searchable_fts_content" should be hidden - for table_name in await self.table_names(): - for hidden_table in hidden_tables[:]: - if table_name.startswith(hidden_table): - hidden_tables.append(table_name) - continue return hidden_tables diff --git a/tests/test_api.py b/tests/test_api.py index 431ab5ce..01c9bb79 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -389,6 +389,29 @@ async def test_database_page(ds_client): }, "private": False, }, + { + "name": "searchable_fts", + "columns": [ + "text1", + "text2", + "name with . and spaces", + ] + + ( + [ + "searchable_fts", + "docid", + "__langid", + ] + if supports_table_xinfo() + else [] + ), + "primary_keys": [], + "count": 2, + "hidden": False, + "fts_table": "searchable_fts", + "foreign_keys": {"incoming": [], "outgoing": []}, + "private": False, + }, { "name": "searchable_tags", "columns": ["searchable_id", "tag"], @@ -525,29 +548,6 @@ async def test_database_page(ds_client): "foreign_keys": {"incoming": [], "outgoing": []}, "private": False, }, - { - "name": "searchable_fts", - "columns": [ - "text1", - "text2", - "name with . and spaces", - ] - + ( - [ - "searchable_fts", - "docid", - "__langid", - ] - if supports_table_xinfo() - else [] - ), - "primary_keys": [], - "count": 2, - "hidden": True, - "fts_table": "searchable_fts", - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, { "name": "searchable_fts_docsize", "columns": ["docid", "size"], diff --git a/tests/test_html.py b/tests/test_html.py index ae270486..4d95a8fa 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -41,13 +41,14 @@ def test_homepage(app_client_two_attached_databases): assert "extra database" == h2.text.strip() counts_p, links_p = h2.find_all_next("p")[:2] assert ( - "2 rows in 1 table, 5 rows in 4 hidden tables, 1 view" == counts_p.text.strip() + "4 rows in 2 tables, 3 rows in 3 hidden tables, 1 view" == counts_p.text.strip() ) # We should only show visible, not hidden tables here: table_links = [ {"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a") ] assert [ + {"href": r"/extra+database/searchable_fts", "text": "searchable_fts"}, {"href": r"/extra+database/searchable", "text": "searchable"}, {"href": r"/extra+database/searchable_view", "text": "searchable_view"}, ] == table_links diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 1c155cf3..bc3c8fcf 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -4,7 +4,7 @@ Tests for the datasette.database.Database class from datasette.app import Datasette from datasette.database import Database, Results, MultipleValues -from datasette.utils.sqlite import sqlite3 +from datasette.utils.sqlite import sqlite3, sqlite_version from datasette.utils import Column from .fixtures import app_client, app_client_two_attached_databases_crossdb_enabled import pytest @@ -664,3 +664,50 @@ async def test_in_memory_databases_forbid_writes(app_client): # Using db.execute_write() should work: await db.execute_write("create table foo (t text)") assert await db.table_names() == ["foo"] + + +def pragma_table_list_supported(): + return sqlite_version()[1] >= 37 + + +@pytest.mark.asyncio +@pytest.mark.skipif( + not pragma_table_list_supported(), reason="Requires PRAGMA table_list support" +) +async def test_hidden_tables(app_client): + ds = app_client.ds + db = ds.add_database(Database(ds, is_memory=True, is_mutable=True)) + assert await db.hidden_table_names() == [] + await db.execute("create virtual table f using fts5(a)") + assert await db.hidden_table_names() == [ + "f_config", + "f_content", + "f_data", + "f_docsize", + "f_idx", + ] + + await db.execute("create virtual table r using rtree(id, amin, amax)") + assert await db.hidden_table_names() == [ + "f_config", + "f_content", + "f_data", + "f_docsize", + "f_idx", + "r_node", + "r_parent", + "r_rowid", + ] + + await db.execute("create table _hideme(_)") + assert await db.hidden_table_names() == [ + "_hideme", + "f_config", + "f_content", + "f_data", + "f_docsize", + "f_idx", + "r_node", + "r_parent", + "r_rowid", + ] From 9cb5700d605b8c72930c8bce8b3eaa8a0763cca6 Mon Sep 17 00:00:00 2001 From: Seb Bacon Date: Thu, 15 Aug 2024 21:20:26 +0100 Subject: [PATCH 012/266] bugfix: correctly detect json1 in versions.json (#2327) Fixes #2326 --- datasette/app.py | 4 ++-- tests/test_api.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index fa5e90e3..c2e685ee 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -69,6 +69,7 @@ from .utils import ( async_call_with_supported_arguments, await_me_maybe, call_with_supported_arguments, + detect_json1, display_actor, escape_css_string, escape_sqlite, @@ -1172,9 +1173,8 @@ class Datasette: conn = sqlite3.connect(":memory:") self._prepare_connection(conn, "_memory") sqlite_version = conn.execute("select sqlite_version()").fetchone()[0] - sqlite_extensions = {} + sqlite_extensions = {"json1": detect_json1(conn)} for extension, testsql, hasversion in ( - ("json1", "SELECT json('{}')", False), ("spatialite", "SELECT spatialite_version()", True), ): try: diff --git a/tests/test_api.py b/tests/test_api.py index 01c9bb79..fbbe3f67 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -835,6 +835,10 @@ async def test_versions_json(ds_client): assert "version" in data["sqlite"] assert "fts_versions" in data["sqlite"] assert "compile_options" in data["sqlite"] + # By default, the json1 extension is enabled in the SQLite + # provided by the `ubuntu-latest` github actions runner, and + # all versions of SQLite from 3.38.0 onwards + assert data["sqlite"]["extensions"]["json1"] @pytest.mark.asyncio From 53a8ae1871f9650559e0281fd86c6ca8779deec4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Aug 2024 17:16:47 -0700 Subject: [PATCH 013/266] Applied Black, refs #2327, #2326 --- tests/test_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index fbbe3f67..8a3fcc92 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -835,8 +835,8 @@ async def test_versions_json(ds_client): assert "version" in data["sqlite"] assert "fts_versions" in data["sqlite"] assert "compile_options" in data["sqlite"] - # By default, the json1 extension is enabled in the SQLite - # provided by the `ubuntu-latest` github actions runner, and + # By default, the json1 extension is enabled in the SQLite + # provided by the `ubuntu-latest` github actions runner, and # all versions of SQLite from 3.38.0 onwards assert data["sqlite"]["extensions"]["json1"] From 0dd41efce6a41d19d52308dbd191c94098250b7a Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Thu, 15 Aug 2024 21:48:07 -0700 Subject: [PATCH 014/266] skip over "queries" blocks when processing database-level metadata items (#2386) --- datasette/app.py | 2 +- tests/test_internals_datasette.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/datasette/app.py b/datasette/app.py index c2e685ee..1c730a73 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -456,7 +456,7 @@ class Datasette: # step 2: database-level metadata for dbname, db in self._metadata_local.get("databases", {}).items(): for key, value in db.items(): - if key == "tables": + if key in ("tables", "queries"): continue await self.set_database_metadata(dbname, key, value) diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index 2614e02e..135a9099 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -173,3 +173,23 @@ async def test_get_permission(ds_client): # And test KeyError with pytest.raises(KeyError): ds.get_permission("missing-permission") + + +@pytest.mark.asyncio +async def test_apply_metadata_json(): + ds = Datasette( + metadata={ + "databases": { + "legislators": { + "tables": {"offices": {"summary": "office address or sumtin"}}, + "queries": { + "millenntial_represetatives": { + "summary": "Social media accounts for current legislators" + } + }, + } + } + }, + ) + await ds.invoke_startup() + assert (await ds.client.get("/")).status_code == 200 From 7d8dd2ac7fbbc53f484e8d383ac7f3656b28aa86 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Aug 2024 22:03:41 -0700 Subject: [PATCH 015/266] Release 1.0a15 Refs #2296, #2326, #2384, #2386, #2389, #2390, #2393, #2394 --- datasette/version.py | 2 +- docs/changelog.rst | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/datasette/version.py b/datasette/version.py index 30f98bd5..cb5d34bf 100644 --- a/datasette/version.py +++ b/datasette/version.py @@ -1,2 +1,2 @@ -__version__ = "1.0a14" +__version__ = "1.0a15" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/changelog.rst b/docs/changelog.rst index 8ffa66bd..089ae425 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,19 @@ Changelog ========= +.. _v1_0_a15: + +1.0a15 (2024-08-15) +------------------- + +- Datasette now defaults to hiding SQLite "shadow" tables, as seen in extensions such as SQLite FTS and `sqlite-vec `__. Virtual tables that it makes sense to display, such as FTS core tables, are no longer hidden. Thanks, `Alex Garcia `__. (:issue:`2296`) +- Fixed bug where running Datasette with one or more ``-s/--setting`` options could over-ride settings that were present in ``datasette.yml``. (:issue:`2389`) +- The Datasette homepage is now duplicated at ``/-/``, using the default ``index.html`` template. This ensures that the information on that page is still accessible even if the Datasette homepage has been customized using a custom ``index.html`` template, for example on sites like `datasette.io `__. (:issue:`2393`) +- Failed CSRF checks now display a more user-friendly error page. (:issue:`2390`) +- Fixed a bug where the ``json1`` extension was not correctly detected on the ``/-/versions`` page. Thanks, `Seb Bacon `__. (:issue:`2326`) +- Fixed a bug where the Datasette write API did not correctly accept ``Content-Type: application/json; charset=utf-8``. (:issue:`2384`) +- Fixed a bug where Datasette would fail to start if ``metadata.yml`` contained a ``queries`` block. (`#2386 `__) + .. _v1_0_a14: 1.0a14 (2024-08-05) From d444b6aad568e3743199b44d4ae978f5a9ce36a4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 20 Aug 2024 09:34:53 -0700 Subject: [PATCH 016/266] Fix for spacing on index page, closes #2399 --- datasette/templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/templates/index.html b/datasette/templates/index.html index a3595a39..03349279 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -21,7 +21,7 @@ {% for database in databases %}

{{ database.name }}{% if database.private %} 🔒{% endif %}

- {% if database.show_table_row_counts %}{{ "{:,}".format(database.table_rows_sum) }} rows in {% endif %}{{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.tables_count and database.hidden_tables_count %}, {% endif -%} + {% if database.show_table_row_counts %}{{ "{:,}".format(database.table_rows_sum) }} rows in {% endif %}{{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.hidden_tables_count %}, {% endif -%} {% if database.hidden_tables_count -%} {% if database.show_table_row_counts %}{{ "{:,}".format(database.hidden_table_rows_sum) }} rows in {% endif %}{{ database.hidden_tables_count }} hidden table{% if database.hidden_tables_count != 1 %}s{% endif -%} {% endif -%} From 39dfc7d7d77b901d7fef5481e91465fa48b88799 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 20 Aug 2024 19:03:33 -0700 Subject: [PATCH 017/266] Removed units functionality and Pint dependency Closes #2400, unblocks #2320 --- datasette/app.py | 1 - datasette/filters.py | 24 +------- datasette/utils/__init__.py | 1 - datasette/views/base.py | 4 -- datasette/views/row.py | 1 - datasette/views/table.py | 13 +---- docs/metadata.rst | 94 -------------------------------- setup.py | 1 - tests/fixtures.py | 11 ---- tests/plugins/my_plugin.py | 13 +++-- tests/test_api.py | 12 ---- tests/test_internals_database.py | 1 - tests/test_plugins.py | 4 +- tests/test_table_api.py | 16 ------ 14 files changed, 14 insertions(+), 182 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 1c730a73..d7d20016 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -37,7 +37,6 @@ from jinja2.exceptions import TemplateNotFound from .events import Event from .views import Context -from .views.base import ureg from .views.database import database_download, DatabaseView, TableCreateView, QueryView from .views.index import IndexView from .views.special import ( diff --git a/datasette/filters.py b/datasette/filters.py index 585d4865..67d4170b 100644 --- a/datasette/filters.py +++ b/datasette/filters.py @@ -368,12 +368,8 @@ class Filters: ) _filters_by_key = {f.key: f for f in _filters} - def __init__(self, pairs, units=None, ureg=None): - if units is None: - units = {} + def __init__(self, pairs): self.pairs = pairs - self.units = units - self.ureg = ureg def lookups(self): """Yields (lookup, display, no_argument) pairs""" @@ -413,20 +409,6 @@ class Filters: def has_selections(self): return bool(self.pairs) - def convert_unit(self, column, value): - """If the user has provided a unit in the query, convert it into the column unit, if present.""" - if column not in self.units: - return value - - # Try to interpret the value as a unit - value = self.ureg(value) - if isinstance(value, numbers.Number): - # It's just a bare number, assume it's the column unit - return value - - column_unit = self.ureg(self.units[column]) - return value.to(column_unit).magnitude - def build_where_clauses(self, table): sql_bits = [] params = {} @@ -434,9 +416,7 @@ class Filters: for column, lookup, value in self.selections(): filter = self._filters_by_key.get(lookup, None) if filter: - sql_bit, param = filter.where_clause( - table, column, self.convert_unit(column, value), i - ) + sql_bit, param = filter.where_clause(table, column, value, i) sql_bits.append(sql_bit) if param is not None: if not isinstance(param, list): diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 073d6e86..7d248ee5 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -1368,7 +1368,6 @@ _table_config_keys = ( "fts_table", "fts_pk", "searchmode", - "units", ) diff --git a/datasette/views/base.py b/datasette/views/base.py index 2e78b0a5..aee06b01 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -8,8 +8,6 @@ import urllib from markupsafe import escape -import pint - from datasette.database import QueryInterrupted from datasette.utils.asgi import Request from datasette.utils import ( @@ -32,8 +30,6 @@ from datasette.utils.asgi import ( BadRequest, ) -ureg = pint.UnitRegistry() - class DatasetteError(Exception): def __init__( diff --git a/datasette/views/row.py b/datasette/views/row.py index 6180446f..d802994e 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -103,7 +103,6 @@ class RowView(DataView): "columns": columns, "primary_keys": resolved.pks, "primary_key_values": pk_values, - "units": (await self.ds.table_config(database, table)).get("units", {}), } if "foreign_key_tables" in (request.args.get("_extras") or "").split(","): diff --git a/datasette/views/table.py b/datasette/views/table.py index ba0dd4f3..d71efeb0 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -43,7 +43,7 @@ from datasette.utils import ( from datasette.utils.asgi import BadRequest, Forbidden, NotFound, Response from datasette.filters import Filters import sqlite_utils -from .base import BaseView, DatasetteError, ureg, _error, stream_csv +from .base import BaseView, DatasetteError, _error, stream_csv from .database import QueryView LINK_WITH_LABEL = ( @@ -292,14 +292,6 @@ async def display_columns_and_rows( ), ) ) - elif column in table_config.get("units", {}) and value != "": - # Interpret units using pint - value = value * ureg(table_config["units"][column]) - # Pint uses floating point which sometimes introduces errors in the compact - # representation, which we have to round off to avoid ugliness. In the vast - # majority of cases this rounding will be inconsequential. I hope. - value = round(value.to_compact(), 6) - display_value = markupsafe.Markup(f"{value:~P}".replace(" ", " ")) else: display_value = str(value) if truncate_cells and len(display_value) > truncate_cells: @@ -1017,7 +1009,6 @@ async def table_view_data( nofacet = True table_metadata = await datasette.table_config(database_name, table_name) - units = table_metadata.get("units", {}) # Arguments that start with _ and don't contain a __ are # special - things like ?_search= - and should not be @@ -1029,7 +1020,7 @@ async def table_view_data( filter_args.append((key, v)) # Build where clauses from query string arguments - filters = Filters(sorted(filter_args), units, ureg) + filters = Filters(sorted(filter_args)) where_clauses, params = filters.build_where_clauses(table_name) # Execute filters_from_request plugin hooks - including the default diff --git a/docs/metadata.rst b/docs/metadata.rst index f3ca68ac..a3fa4040 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -205,100 +205,6 @@ These will be displayed at the top of the table page, and will also show in the You can see an example of how these look at `latest.datasette.io/fixtures/roadside_attractions `__. -Specifying units for a column ------------------------------ - -Datasette supports attaching units to a column, which will be used when displaying -values from that column. SI prefixes will be used where appropriate. - -Column units are configured in the metadata like so: - -.. [[[cog - metadata_example(cog, { - "databases": { - "database1": { - "tables": { - "example_table": { - "units": { - "column1": "metres", - "column2": "Hz" - } - } - } - } - } - }) -.. ]]] - -.. tab:: metadata.yaml - - .. code-block:: yaml - - databases: - database1: - tables: - example_table: - units: - column1: metres - column2: Hz - - -.. tab:: metadata.json - - .. code-block:: json - - { - "databases": { - "database1": { - "tables": { - "example_table": { - "units": { - "column1": "metres", - "column2": "Hz" - } - } - } - } - } - } -.. [[[end]]] - - -Units are interpreted using Pint_, and you can see the full list of available units in -Pint's `unit registry`_. You can also add `custom units`_ to the metadata, which will be -registered with Pint: - -.. [[[cog - metadata_example(cog, { - "custom_units": [ - "decibel = [] = dB" - ] - }) -.. ]]] - -.. tab:: metadata.yaml - - .. code-block:: yaml - - custom_units: - - decibel = [] = dB - - -.. tab:: metadata.json - - .. code-block:: json - - { - "custom_units": [ - "decibel = [] = dB" - ] - } -.. [[[end]]] - -.. _Pint: https://pint.readthedocs.io/ -.. _unit registry: https://github.com/hgrecco/pint/blob/master/pint/default_en.txt -.. _custom units: http://pint.readthedocs.io/en/latest/defining.html - .. _metadata_default_sort: Setting a default sort order diff --git a/setup.py b/setup.py index 22ec7963..47d796a3 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,6 @@ setup( "httpx>=0.20", 'importlib_resources>=1.3.1; python_version < "3.9"', 'importlib_metadata>=4.6; python_version < "3.10"', - "pint>=0.9", "pluggy>=1.0", "uvicorn>=0.11", "aiofiles>=0.4", diff --git a/tests/fixtures.py b/tests/fixtures.py index af6b610b..0539b7c8 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -379,7 +379,6 @@ METADATA = { ], }, "no_primary_key": {"sortable_columns": [], "hidden": True}, - "units": {"units": {"distance": "m", "frequency": "Hz"}}, "primary_key_multiple_columns_explicit_label": { "label_column": "content2" }, @@ -507,16 +506,6 @@ CREATE TABLE "custom_foreign_key_label" ( FOREIGN KEY ("foreign_key_with_custom_label") REFERENCES [primary_key_multiple_columns_explicit_label](id) ); -CREATE TABLE units ( - pk integer primary key, - distance int, - frequency int -); - -INSERT INTO units VALUES (1, 1, 100); -INSERT INTO units VALUES (2, 5000, 2500); -INSERT INTO units VALUES (3, 100000, 75000); - CREATE TABLE tags ( tag TEXT PRIMARY KEY ); diff --git a/tests/plugins/my_plugin.py b/tests/plugins/my_plugin.py index 4ca4f989..e87353ea 100644 --- a/tests/plugins/my_plugin.py +++ b/tests/plugins/my_plugin.py @@ -5,18 +5,21 @@ from datasette import tracer from datasette.utils import path_with_added_args from datasette.utils.asgi import asgi_send_json, Response import base64 -import pint import json -import urllib - -ureg = pint.UnitRegistry() +import urllib.parse @hookimpl def prepare_connection(conn, database, datasette): def convert_units(amount, from_, to_): """select convert_units(100, 'm', 'ft');""" - return (amount * ureg(from_)).to(to_).to_tuple()[0] + # Convert meters to feet + if from_ == "m" and to_ == "ft": + return amount * 3.28084 + # Convert feet to meters + if from_ == "ft" and to_ == "m": + return amount / 3.28084 + assert False, "Unsupported conversion" conn.create_function("convert_units", 3, convert_units) diff --git a/tests/test_api.py b/tests/test_api.py index 8a3fcc92..91f07563 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -528,16 +528,6 @@ async def test_database_page(ds_client): }, "private": False, }, - { - "name": "units", - "columns": ["pk", "distance", "frequency"], - "primary_keys": ["pk"], - "count": 3, - "hidden": False, - "fts_table": None, - "foreign_keys": {"incoming": [], "outgoing": []}, - "private": False, - }, { "name": "no_primary_key", "columns": ["content", "a", "b", "c"], @@ -1133,7 +1123,6 @@ async def test_config_json(config, expected): ], }, "no_primary_key": {"sortable_columns": [], "hidden": True}, - "units": {"units": {"distance": "m", "frequency": "Hz"}}, "primary_key_multiple_columns_explicit_label": { "label_column": "content2" }, @@ -1168,7 +1157,6 @@ async def test_config_json(config, expected): "text", ] }, - "units": {"units": {"distance": "m", "frequency": "Hz"}}, # These one get redacted: "no_primary_key": "***", "primary_key_multiple_columns_explicit_label": "***", diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index bc3c8fcf..0020668a 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -422,7 +422,6 @@ async def test_table_names(db): "table/with/slashes.csv", "complex_foreign_keys", "custom_foreign_key_label", - "units", "tags", "searchable", "searchable_tags", diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 5fad03ad..aa8f1578 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -424,8 +424,8 @@ def view_names_client(tmp_path_factory): ( ("/", "index"), ("/fixtures", "database"), - ("/fixtures/units", "table"), - ("/fixtures/units/1", "row"), + ("/fixtures/facetable", "table"), + ("/fixtures/facetable/1", "row"), ("/-/versions", "json_data"), ("/fixtures/-/query?sql=select+1", "database"), ), diff --git a/tests/test_table_api.py b/tests/test_table_api.py index 11542cb0..615b36eb 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -720,22 +720,6 @@ async def test_view(ds_client): ] -@pytest.mark.xfail -@pytest.mark.asyncio -async def test_unit_filters(ds_client): - response = await ds_client.get( - "/fixtures/units.json?_shape=arrays&distance__lt=75km&frequency__gt=1kHz" - ) - assert response.status_code == 200 - data = response.json() - - assert data["units"]["distance"] == "m" - assert data["units"]["frequency"] == "Hz" - - assert len(data["rows"]) == 1 - assert data["rows"][0][0] == 2 - - def test_page_size_matching_max_returned_rows( app_client_returned_rows_matches_page_size, ): From 4efcc29d02d594106e8e9f5206aa5a740b45eccb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 20 Aug 2024 19:15:36 -0700 Subject: [PATCH 018/266] Test against Python "3.13-dev" Refs: - #2320 --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3ac8756d..0e217ac3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} From 1f3fb5f96b3f6e773b8c1b8ec5d8f7516c6860b0 Mon Sep 17 00:00:00 2001 From: Tiago Ilieve Date: Wed, 21 Aug 2024 00:02:35 -0300 Subject: [PATCH 019/266] debugger: load 'ipdb' if present * debugger: load 'ipdb' if present Transparently chooses between the IPython-enhanced 'ipdb' or the standard 'pdb'. * datasette install ipdb --------- Co-authored-by: Simon Willison --- datasette/handle_exception.py | 6 +++++- docs/contributing.rst | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/datasette/handle_exception.py b/datasette/handle_exception.py index 1a0ac979..96398a4c 100644 --- a/datasette/handle_exception.py +++ b/datasette/handle_exception.py @@ -5,9 +5,13 @@ from .utils.asgi import ( ) from .views.base import DatasetteError from markupsafe import Markup -import pdb import traceback +try: + import ipdb as pdb +except ImportError: + import pdb + try: import rich except ImportError: diff --git a/docs/contributing.rst b/docs/contributing.rst index 45330a83..c1268321 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -111,10 +111,14 @@ Debugging Any errors that occur while Datasette is running while display a stack trace on the console. -You can tell Datasette to open an interactive ``pdb`` debugger session if an error occurs using the ``--pdb`` option:: +You can tell Datasette to open an interactive ``pdb`` (or ``ipdb``, if present) debugger session if an error occurs using the ``--pdb`` option:: datasette --pdb fixtures.db +For `ipdb `__, first run this:: + + datasette install ipdb + .. _contributing_formatting: Code formatting @@ -349,4 +353,4 @@ Datasette bundles `CodeMirror `__ for the SQL editing i -p @rollup/plugin-node-resolve \ -p @rollup/plugin-terser -* Update the version reference in the ``codemirror.html`` template. \ No newline at end of file +* Update the version reference in the ``codemirror.html`` template. From 9028d7f80527b696330805ef7921de14ab40b129 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 21 Aug 2024 09:53:52 -0700 Subject: [PATCH 020/266] Support nested JSON in metadata.json, closes #2403 --- datasette/app.py | 5 ++++- tests/test_internals_datasette.py | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index d7d20016..3a53afa5 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -450,7 +450,10 @@ class Datasette: for key in self._metadata_local or {}: if key == "databases": continue - await self.set_instance_metadata(key, self._metadata_local[key]) + value = self._metadata_local[key] + if not isinstance(value, str): + value = json.dumps(value) + await self.set_instance_metadata(key, value) # step 2: database-level metadata for dbname, db in self._metadata_local.get("databases", {}).items(): diff --git a/tests/test_internals_datasette.py b/tests/test_internals_datasette.py index 135a9099..fc4e42cb 100644 --- a/tests/test_internals_datasette.py +++ b/tests/test_internals_datasette.py @@ -183,13 +183,16 @@ async def test_apply_metadata_json(): "legislators": { "tables": {"offices": {"summary": "office address or sumtin"}}, "queries": { - "millenntial_represetatives": { + "millennial_representatives": { "summary": "Social media accounts for current legislators" } }, } - } + }, + "weird_instance_value": {"nested": [1, 2, 3]}, }, ) await ds.invoke_startup() assert (await ds.client.get("/")).status_code == 200 + value = (await ds.get_instance_metadata()).get("weird_instance_value") + assert value == '{"nested": [1, 2, 3]}' From 34a6b2ac844a0784fae1f36e0243336a48413594 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 21 Aug 2024 10:58:17 -0700 Subject: [PATCH 021/266] Fixed bug with ?_trace=1 and large responses, closes #2404 --- datasette/tracer.py | 4 +++- datasette/utils/testing.py | 3 +++ tests/test_tracer.py | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/datasette/tracer.py b/datasette/tracer.py index fc7338b0..29dd4556 100644 --- a/datasette/tracer.py +++ b/datasette/tracer.py @@ -90,6 +90,7 @@ class AsgiTracer: async def wrapped_send(message): nonlocal accumulated_body, size_limit_exceeded, response_headers + if message["type"] == "http.response.start": response_headers = message["headers"] await send(message) @@ -102,11 +103,12 @@ class AsgiTracer: # Accumulate body until the end or until size is exceeded accumulated_body += message["body"] if len(accumulated_body) > self.max_body_bytes: + # Send what we have accumulated so far await send( { "type": "http.response.body", "body": accumulated_body, - "more_body": True, + "more_body": bool(message.get("more_body")), } ) size_limit_exceeded = True diff --git a/datasette/utils/testing.py b/datasette/utils/testing.py index d4990784..1606da05 100644 --- a/datasette/utils/testing.py +++ b/datasette/utils/testing.py @@ -62,10 +62,13 @@ class TestClient: follow_redirects=False, redirect_count=0, method="GET", + params=None, cookies=None, if_none_match=None, headers=None, ): + if params: + path += "?" + urlencode(params, doseq=True) return await self._request( path=path, follow_redirects=follow_redirects, diff --git a/tests/test_tracer.py b/tests/test_tracer.py index ceadee50..1a4074b0 100644 --- a/tests/test_tracer.py +++ b/tests/test_tracer.py @@ -53,6 +53,23 @@ def test_trace(trace_debug): assert all(isinstance(trace["count"], int) for trace in execute_manys) +def test_trace_silently_fails_for_large_page(): + # Max HTML size is 256KB + with make_app_client(settings={"trace_debug": True}) as client: + # Small response should have trace + small_response = client.get("/fixtures/simple_primary_key.json?_trace=1") + assert small_response.status == 200 + assert "_trace" in small_response.json + + # Big response should not + big_response = client.get( + "/fixtures/-/query.json", + params={"_trace": 1, "sql": "select zeroblob(1024 * 256)"}, + ) + assert big_response.status == 200 + assert "_trace" not in big_response.json + + def test_trace_parallel_queries(): with make_app_client(settings={"trace_debug": True}) as client: response = client.get("/parallel-queries?_trace=1") From 8a63cdccc7744e6c6969edf47f7c519bf4c25fa6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 21 Aug 2024 12:19:18 -0700 Subject: [PATCH 022/266] Tracer now catches errors, closes #2405 --- datasette/database.py | 3 +++ datasette/tracer.py | 31 +++++++++++++++++++------------ tests/test_tracer.py | 14 ++++++++++++++ 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index 8d51befd..c761dad7 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -644,6 +644,9 @@ class QueryInterrupted(Exception): self.sql = sql self.params = params + def __str__(self): + return "QueryInterrupted: {}".format(self.e) + class MultipleValues(Exception): pass diff --git a/datasette/tracer.py b/datasette/tracer.py index 29dd4556..9e66613b 100644 --- a/datasette/tracer.py +++ b/datasette/tracer.py @@ -32,7 +32,7 @@ def trace_child_tasks(): @contextmanager -def trace(type, **kwargs): +def trace(trace_type, **kwargs): assert not TRACE_RESERVED_KEYS.intersection( kwargs.keys() ), f".trace() keyword parameters cannot include {TRACE_RESERVED_KEYS}" @@ -45,17 +45,24 @@ def trace(type, **kwargs): yield kwargs return start = time.perf_counter() - yield kwargs - end = time.perf_counter() - trace_info = { - "type": type, - "start": start, - "end": end, - "duration_ms": (end - start) * 1000, - "traceback": traceback.format_list(traceback.extract_stack(limit=6)[:-3]), - } - trace_info.update(kwargs) - tracer.append(trace_info) + captured_error = None + try: + yield kwargs + except Exception as ex: + captured_error = ex + raise + finally: + end = time.perf_counter() + trace_info = { + "type": trace_type, + "start": start, + "end": end, + "duration_ms": (end - start) * 1000, + "traceback": traceback.format_list(traceback.extract_stack(limit=6)[:-3]), + "error": str(captured_error) if captured_error else None, + } + trace_info.update(kwargs) + tracer.append(trace_info) @contextmanager diff --git a/tests/test_tracer.py b/tests/test_tracer.py index 1a4074b0..1e0d7001 100644 --- a/tests/test_tracer.py +++ b/tests/test_tracer.py @@ -70,6 +70,20 @@ def test_trace_silently_fails_for_large_page(): assert "_trace" not in big_response.json +def test_trace_query_errors(): + with make_app_client(settings={"trace_debug": True}) as client: + response = client.get( + "/fixtures/-/query.json", + params={"_trace": 1, "sql": "select * from non_existent_table"}, + ) + assert response.status == 400 + + data = response.json + assert "_trace" in data + trace_info = data["_trace"] + assert trace_info["traces"][-1]["error"] == "no such table: non_existent_table" + + def test_trace_parallel_queries(): with make_app_client(settings={"trace_debug": True}) as client: response = client.get("/parallel-queries?_trace=1") From f28ff8e4f0eb89bb67a6d8336e4a3e2655f3b983 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 21 Aug 2024 13:36:42 -0700 Subject: [PATCH 023/266] Consider just 1000 rows for suggest facet, closes #2406 --- datasette/facets.py | 46 +++++++++++++++++++++++++++----------------- tests/test_facets.py | 38 +++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index ccd85461..f49575d9 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -65,6 +65,8 @@ def register_facet_classes(): class Facet: type = None + # How many rows to consider when suggesting facets: + suggest_consider = 1000 def __init__( self, @@ -145,17 +147,6 @@ class Facet: ) ).columns - async def get_row_count(self): - if self.row_count is None: - self.row_count = ( - await self.ds.execute( - self.database, - f"select count(*) from ({self.sql})", - self.params, - ) - ).rows[0][0] - return self.row_count - class ColumnFacet(Facet): type = "column" @@ -170,13 +161,16 @@ class ColumnFacet(Facet): if column in already_enabled: continue suggested_facet_sql = """ - select {column} as value, count(*) as n from ( - {sql} - ) where value is not null + with limited as (select * from ({sql}) limit {suggest_consider}) + select {column} as value, count(*) as n from limited + where value is not null group by value limit {limit} """.format( - column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + column=escape_sqlite(column), + sql=self.sql, + limit=facet_size + 1, + suggest_consider=self.suggest_consider, ) distinct_values = None try: @@ -211,6 +205,17 @@ class ColumnFacet(Facet): continue return suggested_facets + async def get_row_count(self): + if self.row_count is None: + self.row_count = ( + await self.ds.execute( + self.database, + f"select count(*) from (select * from ({self.sql}) limit {self.suggest_consider})", + self.params, + ) + ).rows[0][0] + return self.row_count + async def facet_results(self): facet_results = [] facets_timed_out = [] @@ -313,11 +318,14 @@ class ArrayFacet(Facet): continue # Is every value in this column either null or a JSON array? suggested_facet_sql = """ + with limited as (select * from ({sql}) limit {suggest_consider}) select distinct json_type({column}) - from ({sql}) + from limited where {column} is not null and {column} != '' """.format( - column=escape_sqlite(column), sql=self.sql + column=escape_sqlite(column), + sql=self.sql, + suggest_consider=self.suggest_consider, ) try: results = await self.ds.execute( @@ -402,7 +410,9 @@ class ArrayFacet(Facet): order by count(*) desc, value limit {limit} """.format( - col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 + col=escape_sqlite(column), + sql=self.sql, + limit=facet_size + 1, ) try: facet_rows_results = await self.ds.execute( diff --git a/tests/test_facets.py b/tests/test_facets.py index 023efcf0..a2b505ec 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -1,6 +1,6 @@ from datasette.app import Datasette from datasette.database import Database -from datasette.facets import ColumnFacet, ArrayFacet, DateFacet +from datasette.facets import Facet, ColumnFacet, ArrayFacet, DateFacet from datasette.utils.asgi import Request from datasette.utils import detect_json1 from .fixtures import make_app_client @@ -662,3 +662,39 @@ async def test_facet_against_in_memory_database(): assert response1.status_code == 200 response2 = await ds.client.get("/mem/t?_facet=name&_facet=name2") assert response2.status_code == 200 + + +@pytest.mark.asyncio +async def test_facet_only_considers_first_x_rows(): + # This test works by manually fiddling with Facet.suggest_consider + ds = Datasette() + original_suggest_consider = Facet.suggest_consider + try: + Facet.suggest_consider = 40 + db = ds.add_memory_database("test_facet_only_x_rows") + await db.execute_write("create table t (id integer primary key, col text)") + # First 50 rows make it look like col and col_json should be faceted + to_insert = [{"col": "one" if i % 2 else "two"} for i in range(50)] + await db.execute_write_many("insert into t (col) values (:col)", to_insert) + # Next 50 break that assumption + to_insert2 = [{"col": f"x{i}"} for i in range(50)] + await db.execute_write_many("insert into t (col) values (:col)", to_insert2) + response = await ds.client.get( + "/test_facet_only_x_rows/t.json?_extra=suggested_facets" + ) + data = response.json() + assert data["suggested_facets"] == [ + { + "name": "col", + "toggle_url": "http://localhost/test_facet_only_x_rows/t.json?_extra=suggested_facets&_facet=col", + } + ] + # But if we set suggest_consider to 100 they are not suggested + Facet.suggest_consider = 100 + response2 = await ds.client.get( + "/test_facet_only_x_rows/t.json?_extra=suggested_facets" + ) + data2 = response2.json() + assert data2["suggested_facets"] == [] + finally: + Facet.suggest_consider = original_suggest_consider From bc46066f9d96550286ac7694f18f44c8169a83a7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 21 Aug 2024 14:38:11 -0700 Subject: [PATCH 024/266] Fix huge performance bug in DateFacet, refs #2407 --- datasette/facets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index f49575d9..dd149424 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -480,8 +480,8 @@ class DateFacet(Facet): # Does this column contain any dates in the first 100 rows? suggested_facet_sql = """ select date({column}) from ( - {sql} - ) where {column} glob "????-??-*" limit 100; + select * from ({sql}) limit 100 + ) where {column} glob "????-??-*" """.format( column=escape_sqlite(column), sql=self.sql ) From dc1d15247647c350ac73ad520229467180b8433c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 21 Aug 2024 14:58:29 -0700 Subject: [PATCH 025/266] Stop counting at 10,000 rows when listing tables, refs #2398 --- datasette/database.py | 5 ++++- datasette/templates/database.html | 2 +- datasette/views/database.py | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index c761dad7..da0ab1de 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -29,6 +29,9 @@ AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file")) class Database: + # For table counts stop at this many rows: + count_limit = 10000 + def __init__( self, ds, @@ -376,7 +379,7 @@ class Database: try: table_count = ( await self.execute( - f"select count(*) from [{table}]", + f"select count(*) from (select * from [{table}] limit {self.count_limit + 1})", custom_time_limit=limit, ) ).rows[0][0] diff --git a/datasette/templates/database.html b/datasette/templates/database.html index f921bc2d..c6f3da99 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -60,7 +60,7 @@

{{ table.name }}{% if table.private %} 🔒{% endif %}{% if table.hidden %} (hidden){% endif %}

{% for column in table.columns %}{{ column }}{% if not loop.last %}, {% endif %}{% endfor %}

-

{% if table.count is none %}Many rows{% else %}{{ "{:,}".format(table.count) }} row{% if table.count == 1 %}{% else %}s{% endif %}{% endif %}

+

{% if table.count is none %}Many rows{% elif table.count == count_limit + 1 %}>{{ "{:,}".format(count_limit) }} rows{% else %}{{ "{:,}".format(table.count) }} row{% if table.count == 1 %}{% else %}s{% endif %}{% endif %}

{% endif %} {% endfor %} diff --git a/datasette/views/database.py b/datasette/views/database.py index 9ab061a1..61fe15e4 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -159,6 +159,7 @@ class DatabaseView(View): "show_hidden": request.args.get("_show_hidden"), "editable": True, "metadata": metadata, + "count_limit": db.count_limit, "allow_download": datasette.setting("allow_download") and not db.is_mutable and not db.is_memory, @@ -272,7 +273,7 @@ class QueryContext: async def get_tables(datasette, request, db): tables = [] database = db.name - table_counts = await db.table_counts(5) + table_counts = await db.table_counts(100) hidden_table_names = set(await db.hidden_table_names()) all_foreign_keys = await db.get_all_foreign_keys() From 9ecce07b083824f56bd96966a1f63e18d44489b1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 21 Aug 2024 19:09:25 -0700 Subject: [PATCH 026/266] count all rows button on table page, refs #2408 --- datasette/templates/table.html | 33 ++++++++++++++++++++++++++++++++- datasette/url_builder.py | 6 ++++++ datasette/views/table.py | 11 ++++++++++- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 35e0b9c1..187f0143 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -40,7 +40,10 @@ {% endif %} {% if count or human_description_en %} -

{% if count or count == 0 %}{{ "{:,}".format(count) }} row{% if count == 1 %}{% else %}s{% endif %}{% endif %} +

+ {% if count == count_limit + 1 %}>{{ "{:,}".format(count_limit) }} rows + {% if allow_execute_sql and query.sql %} count all rows{% endif %} + {% elif count or count == 0 %}{{ "{:,}".format(count) }} row{% if count == 1 %}{% else %}s{% endif %}{% endif %} {% if human_description_en %}{{ human_description_en }}{% endif %}

{% endif %} @@ -172,4 +175,32 @@
{{ view_definition }}
{% endif %} +{% if allow_execute_sql and query.sql %} + +{% endif %} + {% endblock %} diff --git a/datasette/url_builder.py b/datasette/url_builder.py index 9c6bbde0..16b3d42b 100644 --- a/datasette/url_builder.py +++ b/datasette/url_builder.py @@ -31,6 +31,12 @@ class Urls: db = self.ds.get_database(database) return self.path(tilde_encode(db.route), format=format) + def database_query(self, database, sql, format=None): + path = f"{self.database(database)}/-/query?" + urllib.parse.urlencode( + {"sql": sql} + ) + return self.path(path, format=format) + def table(self, database, table, format=None): path = f"{self.database(database)}/{tilde_encode(table)}" if format is not None: diff --git a/datasette/views/table.py b/datasette/views/table.py index d71efeb0..ea044b36 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -929,6 +929,7 @@ async def table_view_traced(datasette, request): database=resolved.db.name, table=resolved.table, ), + count_limit=resolved.db.count_limit, ), request=request, view_name="table", @@ -1280,6 +1281,9 @@ async def table_view_data( if extra_extras: extras.update(extra_extras) + async def extra_count_sql(): + return count_sql + async def extra_count(): "Total count of rows matching these filters" # Calculate the total count for this query @@ -1299,8 +1303,11 @@ async def table_view_data( # Otherwise run a select count(*) ... if count_sql and count is None and not nocount: + count_sql_limited = ( + f"select count(*) from (select * {from_sql} limit 10001)" + ) try: - count_rows = list(await db.execute(count_sql, from_sql_params)) + count_rows = list(await db.execute(count_sql_limited, from_sql_params)) count = count_rows[0][0] except QueryInterrupted: pass @@ -1615,6 +1622,7 @@ async def table_view_data( "facet_results", "facets_timed_out", "count", + "count_sql", "human_description_en", "next_url", "metadata", @@ -1647,6 +1655,7 @@ async def table_view_data( registry = Registry( extra_count, + extra_count_sql, extra_facet_results, extra_facets_timed_out, extra_suggested_facets, From dc288056b81a3635bdb02a6d0121887db2720e5e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 21 Aug 2024 19:56:02 -0700 Subject: [PATCH 027/266] Better handling of errors for count all button, refs #2408 --- datasette/templates/table.html | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 187f0143..7246ff5d 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -42,7 +42,7 @@ {% if count or human_description_en %}

{% if count == count_limit + 1 %}>{{ "{:,}".format(count_limit) }} rows - {% if allow_execute_sql and query.sql %} count all rows{% endif %} + {% if allow_execute_sql and query.sql %} count all{% endif %} {% elif count or count == 0 %}{{ "{:,}".format(count) }} row{% if count == 1 %}{% else %}s{% endif %}{% endif %} {% if human_description_en %}{{ human_description_en }}{% endif %}

@@ -180,7 +180,7 @@ document.addEventListener('DOMContentLoaded', function() { const countLink = document.querySelector('a.count-sql'); if (countLink) { - countLink.addEventListener('click', function(ev) { + countLink.addEventListener('click', async function(ev) { ev.preventDefault(); // Replace countLink with span with same style attribute const span = document.createElement('span'); @@ -189,14 +189,23 @@ document.addEventListener('DOMContentLoaded', function() { countLink.replaceWith(span); countLink.setAttribute('disabled', 'disabled'); let url = countLink.href.replace(/(\?|$)/, '.json$1'); - fetch(url) - .then(response => response.json()) - .then(data => { - const count = data['rows'][0]['count(*)']; - const formattedCount = count.toLocaleString(); - span.closest('h3').textContent = formattedCount + ' rows'; - }) - .catch(error => countLink.textContent = 'error'); + try { + const response = await fetch(url); + console.log({response}); + const data = await response.json(); + console.log({data}); + if (!response.ok) { + console.log('throw error'); + throw new Error(data.title || data.error); + } + const count = data['rows'][0]['count(*)']; + const formattedCount = count.toLocaleString(); + span.closest('h3').textContent = formattedCount + ' rows'; + } catch (error) { + console.log('Update', span, 'with error message', error); + span.textContent = error.message; + span.style.color = 'red'; + } }); } }); From 92c4d41ca605e0837a2711ee52fde9cf1eea74d0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 1 Sep 2024 17:20:41 -0700 Subject: [PATCH 028/266] results.dicts() method, closes #2414 --- datasette/database.py | 3 +++ datasette/views/row.py | 3 +-- datasette/views/table.py | 2 +- docs/internals.rst | 3 +++ tests/test_api_write.py | 23 +++++++++-------------- tests/test_internals_database.py | 11 +++++++++++ 6 files changed, 28 insertions(+), 17 deletions(-) diff --git a/datasette/database.py b/datasette/database.py index da0ab1de..a2e899bc 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -677,6 +677,9 @@ class Results: else: raise MultipleValues + def dicts(self): + return [dict(row) for row in self.rows] + def __iter__(self): return iter(self.rows) diff --git a/datasette/views/row.py b/datasette/views/row.py index d802994e..f374fd94 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -277,8 +277,7 @@ class RowUpdateView(BaseView): results = await resolved.db.execute( resolved.sql, resolved.params, truncate=True ) - rows = list(results.rows) - result["row"] = dict(rows[0]) + result["row"] = results.dicts()[0] await self.ds.track_event( UpdateRowEvent( diff --git a/datasette/views/table.py b/datasette/views/table.py index ea044b36..82dab613 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -558,7 +558,7 @@ class TableInsertView(BaseView): ), args, ) - result["rows"] = [dict(r) for r in fetched_rows.rows] + result["rows"] = fetched_rows.dicts() else: result["rows"] = rows # We track the number of rows requested, but do not attempt to show which were actually diff --git a/docs/internals.rst b/docs/internals.rst index 4289c815..facbc224 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1093,6 +1093,9 @@ The ``Results`` object also has the following properties and methods: ``.rows`` - list of ``sqlite3.Row`` This property provides direct access to the list of rows returned by the database. You can access specific rows by index using ``results.rows[0]``. +``.dicts()`` - list of ``dict`` + This method returns a list of Python dictionaries, one for each row. + ``.first()`` - row or None Returns the first row in the results, or ``None`` if no rows were returned. diff --git a/tests/test_api_write.py b/tests/test_api_write.py index 9c2b9b45..04e61261 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -58,8 +58,8 @@ async def test_insert_row(ds_write, content_type): assert response.status_code == 201 assert response.json()["ok"] is True assert response.json()["rows"] == [expected_row] - rows = (await ds_write.get_database("data").execute("select * from docs")).rows - assert dict(rows[0]) == expected_row + rows = (await ds_write.get_database("data").execute("select * from docs")).dicts() + assert rows[0] == expected_row # Analytics event event = last_event(ds_write) assert event.name == "insert-rows" @@ -118,12 +118,9 @@ async def test_insert_rows(ds_write, return_rows): assert not event.ignore assert not event.replace - actual_rows = [ - dict(r) - for r in ( - await ds_write.get_database("data").execute("select * from docs") - ).rows - ] + actual_rows = ( + await ds_write.get_database("data").execute("select * from docs") + ).dicts() assert len(actual_rows) == 20 assert actual_rows == [ {"id": i + 1, "title": "Test {}".format(i), "score": 1.0, "age": 5} @@ -469,12 +466,10 @@ async def test_insert_ignore_replace( assert event.ignore == ignore assert event.replace == replace - actual_rows = [ - dict(r) - for r in ( - await ds_write.get_database("data").execute("select * from docs") - ).rows - ] + actual_rows = ( + await ds_write.get_database("data").execute("select * from docs") + ).dicts() + assert actual_rows == expected_rows assert response.json()["ok"] is True if should_return: diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py index 0020668a..edfc6bc7 100644 --- a/tests/test_internals_database.py +++ b/tests/test_internals_database.py @@ -40,6 +40,17 @@ async def test_results_bool(db, expected): assert bool(results) is expected +@pytest.mark.asyncio +async def test_results_dicts(db): + results = await db.execute("select pk, name from roadside_attractions") + assert results.dicts() == [ + {"pk": 1, "name": "The Mystery Spot"}, + {"pk": 2, "name": "Winchester Mystery House"}, + {"pk": 3, "name": "Burlingame Museum of PEZ Memorabilia"}, + {"pk": 4, "name": "Bigfoot Discovery Museum"}, + ] + + @pytest.mark.parametrize( "query,expected", [ From 2170269258d1de38f4e518aa3e55e6b3ed202841 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 3 Sep 2024 08:37:26 -0700 Subject: [PATCH 029/266] New .core CSS class for inputs and buttons * Initial .core input/button classes, refs #2415 * Docs for the new .core CSS class, refs #2415 * Applied .core class everywhere that needs it, closes #2415 --- datasette/static/app.css | 33 +++++++++++++++------- datasette/templates/allow_debug.html | 2 +- datasette/templates/api_explorer.html | 4 +-- datasette/templates/create_token.html | 2 +- datasette/templates/database.html | 2 +- datasette/templates/logout.html | 2 +- datasette/templates/messages_debug.html | 2 +- datasette/templates/permissions_debug.html | 2 +- datasette/templates/query.html | 2 +- datasette/templates/table.html | 4 +-- docs/custom_templates.rst | 9 ++++++ docs/writing_plugins.rst | 3 +- tests/test_permissions.py | 2 +- 13 files changed, 46 insertions(+), 23 deletions(-) diff --git a/datasette/static/app.css b/datasette/static/app.css index 562d6adb..f975f0ad 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -528,8 +528,11 @@ label.sort_by_desc { pre#sql-query { margin-bottom: 1em; } -form input[type=text], -form input[type=search] { + +.core input[type=text], +input.core[type=text], +.core input[type=search], +input.core[type=search] { border: 1px solid #ccc; border-radius: 3px; width: 60%; @@ -540,17 +543,25 @@ form input[type=search] { } /* Stop Webkit from styling search boxes in an inconsistent way */ /* https://css-tricks.com/webkit-html5-search-inputs/ comments */ -input[type=search] { +.core input[type=search], +input.core[type=search] { -webkit-appearance: textfield; } -input[type="search"]::-webkit-search-decoration, -input[type="search"]::-webkit-search-cancel-button, -input[type="search"]::-webkit-search-results-button, -input[type="search"]::-webkit-search-results-decoration { +.core input[type="search"]::-webkit-search-decoration, +input.core[type="search"]::-webkit-search-decoration, +.core input[type="search"]::-webkit-search-cancel-button, +input.core[type="search"]::-webkit-search-cancel-button, +.core input[type="search"]::-webkit-search-results-button, +input.core[type="search"]::-webkit-search-results-button, +.core input[type="search"]::-webkit-search-results-decoration, +input.core[type="search"]::-webkit-search-results-decoration { display: none; } -form input[type=submit], form button[type=button] { +.core input[type=submit], +.core button[type=button], +input.core[type=submit], +button.core[type=button] { font-weight: 400; cursor: pointer; text-align: center; @@ -563,14 +574,16 @@ form input[type=submit], form button[type=button] { border-radius: .25rem; } -form input[type=submit] { +.core input[type=submit], +input.core[type=submit] { color: #fff; background: linear-gradient(180deg, #007bff 0%, #4E79C7 100%); border-color: #007bff; -webkit-appearance: button; } -form button[type=button] { +.core button[type=button], +button.core[type=button] { color: #007bff; background-color: #fff; border-color: #007bff; diff --git a/datasette/templates/allow_debug.html b/datasette/templates/allow_debug.html index 04181531..610417d2 100644 --- a/datasette/templates/allow_debug.html +++ b/datasette/templates/allow_debug.html @@ -35,7 +35,7 @@ p.message-warning {

Use this tool to try out different actor and allow combinations. See Defining permissions with "allow" blocks for documentation.

-
+

diff --git a/datasette/templates/api_explorer.html b/datasette/templates/api_explorer.html index 109fb1e9..dc393c20 100644 --- a/datasette/templates/api_explorer.html +++ b/datasette/templates/api_explorer.html @@ -19,7 +19,7 @@

GET - +
@@ -29,7 +29,7 @@
POST - +
diff --git a/datasette/templates/create_token.html b/datasette/templates/create_token.html index 2be98d38..409fb8a9 100644 --- a/datasette/templates/create_token.html +++ b/datasette/templates/create_token.html @@ -39,7 +39,7 @@ {% endfor %} {% endif %} - +

diff --git a/datasette/templates/logout.html b/datasette/templates/logout.html index 4c4a7d11..c8fc642a 100644 --- a/datasette/templates/logout.html +++ b/datasette/templates/logout.html @@ -8,7 +8,7 @@

You are logged in as {{ display_actor(actor) }}

- +
diff --git a/datasette/templates/messages_debug.html b/datasette/templates/messages_debug.html index e0ab9a40..2940cd69 100644 --- a/datasette/templates/messages_debug.html +++ b/datasette/templates/messages_debug.html @@ -8,7 +8,7 @@

Set a message:

- +
diff --git a/datasette/templates/permissions_debug.html b/datasette/templates/permissions_debug.html index 5a5c1aa6..83891181 100644 --- a/datasette/templates/permissions_debug.html +++ b/datasette/templates/permissions_debug.html @@ -47,7 +47,7 @@ textarea {

This tool lets you simulate an actor and a permission check for that actor.

- +

diff --git a/datasette/templates/query.html b/datasette/templates/query.html index f7c8d0a3..a6e9a3aa 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -36,7 +36,7 @@ {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} - +

Custom SQL query{% if display_rows %} returning {% if truncated %}more than {% endif %}{{ "{:,}".format(display_rows|length) }} row{% if display_rows|length == 1 %}{% else %}s{% endif %}{% endif %}{% if not query_error %} ({{ show_hide_text }}) {% endif %}

diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 7246ff5d..c9e0e87b 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -48,7 +48,7 @@ {% endif %} - + {% if supports_search %}
{% endif %} @@ -152,7 +152,7 @@ object {% endif %}

- +

CSV options: diff --git a/docs/custom_templates.rst b/docs/custom_templates.rst index 534d8b33..8cc40f0f 100644 --- a/docs/custom_templates.rst +++ b/docs/custom_templates.rst @@ -83,6 +83,15 @@ database column they are representing, for example: +.. _customization_css: + +Writing custom CSS +~~~~~~~~~~~~~~~~~~ + +Custom templates need to take Datasette's default CSS into account. The pattern portfolio at ``/-/patterns`` (`example here `__) is a useful reference for understanding the available CSS classes. + +The ``core`` class is particularly useful - you can apply this directly to a ```` or ``