From f274f9004302c5ca75ce89d0abfd648457957e31 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 6 Jun 2019 01:37:46 -0700 Subject: [PATCH 01/88] Test against Python 3.8-dev using Travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 1553ef1e..b37ae967 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ python: - "3.6" - "3.7" - "3.5" + - "3.8-dev" # Executed for 3.5 AND 3.5 as the first "test" stage: script: From 1e95ed0fa4388e2450fa4d130ebd983c50dd2dfb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 8 Jun 2019 19:11:59 -0700 Subject: [PATCH 02/88] Added datasette-bplist plugin to ecosystem --- docs/ecosystem.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/ecosystem.rst b/docs/ecosystem.rst index 6842aca5..17631e2a 100644 --- a/docs/ecosystem.rst +++ b/docs/ecosystem.rst @@ -99,3 +99,8 @@ datasette-sqlite-fts4 --------------------- `datasette-sqlite-fts4 `__ provides search relevance ranking algorithms that can be used with SQLite's FTS4 search module. You can read more about it in `Exploring search relevance algorithms with SQLite `__. + +datasette-bplist +---------------- + +`datasette-bplist `__ provides tools for working with Apple's binary plist format embedded in SQLite database tables. If you use OS X you already have dozens of SQLite databases hidden away in your ``~/Library`` folder that include data in this format - this plugin allows you to view the decoded data and run SQL queries against embedded values using a ``bplist_to_json(value)`` custom SQL function. From 276e36c79555957c9881849f282f5de36db4902a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 9 Jun 2019 09:49:06 -0700 Subject: [PATCH 03/88] Added datasette-render-binary plugin to ecosystem --- docs/ecosystem.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/ecosystem.rst b/docs/ecosystem.rst index 17631e2a..844fe207 100644 --- a/docs/ecosystem.rst +++ b/docs/ecosystem.rst @@ -90,6 +90,11 @@ datasette-render-images `datasette-render-images `__ works with SQLite tables that contain binary image data in BLOB columns. It converts any images it finds into ``data-uri`` image elements, allowing you to view them directly in the Datasette interface. +datasette-render-binary +----------------------- + +`datasette-render-binary `__ renders binary data in a slightly more readable fashion: it shows ASCII characters as they are, and shows all other data as monospace octets. Useful as a tool for exploring new unfamiliar databases as it makes it easier to spot if a binary column may contain a decipherable binary format. + datasette-pretty-json --------------------- From 3f20e7debc9e1a7b59ce66049fd5080f9aafb606 Mon Sep 17 00:00:00 2001 From: Tom MacWright Date: Tue, 11 Jun 2019 11:48:40 -0700 Subject: [PATCH 04/88] Fix typo in install step: should be install -e (#500) --- docs/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index 27237a2f..993d01d8 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -38,7 +38,7 @@ The next step is to create a virtual environment for your project and use it to # Now activate the virtual environment, so pip can install into it source venv/bin/activate # Install Datasette and its testing dependencies - python3 -m pip -e .[test] + python3 -m pip install -e .[test] That last line does most of the work: ``pip install -e`` means "install this package in a way that allows me to edit the source code in place". The ``.[test]`` option means "use the setup.py in this directory and install the optional testing dependencies as well". From 425b4717381db2b90316d87785e84210093c8d53 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 15 Jun 2019 12:41:34 -0700 Subject: [PATCH 05/88] Refactored view class hierarchy, refs #272 See https://github.com/simonw/datasette/issues/272#issuecomment-502393107 --- datasette/views/base.py | 4 ++-- datasette/views/database.py | 6 +++--- datasette/views/index.py | 4 ++-- datasette/views/special.py | 4 ++-- datasette/views/table.py | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index b8863ff3..9db8cc76 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -49,7 +49,7 @@ class DatasetteError(Exception): self.messagge_is_html = messagge_is_html -class RenderMixin(HTTPMethodView): +class BaseView(HTTPMethodView): def _asset_urls(self, key, template, context): # Flatten list-of-lists from plugins: seen_urls = set() @@ -128,7 +128,7 @@ class RenderMixin(HTTPMethodView): ) -class BaseView(RenderMixin): +class DataView(BaseView): name = "" re_named_parameter = re.compile(":([a-zA-Z0-9_]+)") diff --git a/datasette/views/database.py b/datasette/views/database.py index 859a271f..a5b606f1 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -4,10 +4,10 @@ from sanic import response from datasette.utils import to_css_class, validate_sql_select -from .base import BaseView, DatasetteError +from .base import DataView, DatasetteError -class DatabaseView(BaseView): +class DatabaseView(DataView): name = "database" async def data(self, request, database, hash, default_labels=False, _size=None): @@ -65,7 +65,7 @@ class DatabaseView(BaseView): ) -class DatabaseDownload(BaseView): +class DatabaseDownload(DataView): name = "database_download" async def view_get(self, request, database, hash, correct_hash_present, **kwargs): diff --git a/datasette/views/index.py b/datasette/views/index.py index 30c77b41..c9d15c36 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -6,7 +6,7 @@ from sanic import response from datasette.utils import CustomJSONEncoder from datasette.version import __version__ -from .base import RenderMixin +from .base import BaseView # Truncate table list on homepage at: @@ -16,7 +16,7 @@ TRUNCATE_AT = 5 COUNT_TABLE_LIMIT = 30 -class IndexView(RenderMixin): +class IndexView(BaseView): name = "index" def __init__(self, datasette): diff --git a/datasette/views/special.py b/datasette/views/special.py index b93a330b..91b577fc 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -1,9 +1,9 @@ import json from sanic import response -from .base import RenderMixin +from .base import BaseView -class JsonDataView(RenderMixin): +class JsonDataView(BaseView): name = "json_data" def __init__(self, datasette, filename, data_callback): diff --git a/datasette/views/table.py b/datasette/views/table.py index 04100dc6..14b8743a 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -25,7 +25,7 @@ from datasette.utils import ( value_as_boolean, ) from datasette.filters import Filters -from .base import BaseView, DatasetteError, ureg +from .base import DataView, DatasetteError, ureg LINK_WITH_LABEL = ( '{label} {id}' @@ -33,7 +33,7 @@ LINK_WITH_LABEL = ( LINK_WITH_VALUE = '{id}' -class RowTableShared(BaseView): +class RowTableShared(DataView): async def sortable_columns_for_table(self, database, table, use_rowid): db = self.ds.databases[database] table_metadata = self.ds.table_metadata(database, table) From 3cf5830bc6523443b6a0d24e3b4488cfdac9545b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 18 Jun 2019 17:22:26 -0700 Subject: [PATCH 06/88] Revert "New encode/decode_path_component functions" Refs #272 This reverts commit 9fdb47ca952b93b7b60adddb965ea6642b1ff523. Now that ASGI supports raw_path we don't need our own encoding scheme! --- datasette/utils.py | 21 --------------------- tests/test_utils.py | 16 ---------------- 2 files changed, 37 deletions(-) diff --git a/datasette/utils.py b/datasette/utils.py index 56fe2996..58746be4 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -261,27 +261,6 @@ def escape_sqlite(s): return "[{}]".format(s) -_decode_path_component_re = re.compile(r"U\+([\da-h]{4})", re.IGNORECASE) -_encode_path_component_re = re.compile( - "[{}]".format( - "".join( - re.escape(c) - for c in (";", "/", "?", ":", "@", "&", "=", "+", "$", ",", "~") - ) - ) -) - - -def decode_path_component(table_name): - return _decode_path_component_re.sub(lambda m: chr(int(m.group(1), 16)), table_name) - - -def encode_path_component(table_name): - return _encode_path_component_re.sub( - lambda m: "U+{0:0{1}x}".format(ord(m.group(0)), 4).upper(), table_name - ) - - def make_dockerfile( files, metadata_file, diff --git a/tests/test_utils.py b/tests/test_utils.py index 73aee12a..a5f603e6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -381,19 +381,3 @@ def test_path_with_format(path, format, extra_qs, expected): ) def test_format_bytes(bytes, expected): assert expected == utils.format_bytes(bytes) - - -@pytest.mark.parametrize( - "name,expected", - [ - ("table", "table"), - ("table/and/slashes", "tableU+002FandU+002Fslashes"), - ("~table", "U+007Etable"), - ("+bobcats!", "U+002Bbobcats!"), - ("U+007Etable", "UU+002B007Etable"), - ], -) -def test_encode_decode_path_component(name, expected): - encoded = utils.encode_path_component(name) - assert encoded == expected - assert name == utils.decode_path_component(encoded) From 51c39ac398901b7bcf958154f831d17240dfda73 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 20:13:09 -0700 Subject: [PATCH 07/88] Port Datasette from Sanic to ASGI + Uvicorn (#518) Datasette now uses ASGI internally, and no longer depends on Sanic. It now uses Uvicorn as the underlying HTTP server. This was thirteen months in the making... for full details see the issue: https://github.com/simonw/datasette/issues/272 And for a full sequence of commits plus commentary, see the pull request: https://github.com/simonw/datasette/pull/518 --- datasette/app.py | 249 +++++++------- datasette/cli.py | 3 +- datasette/renderer.py | 2 +- datasette/tracer.py | 81 ++++- datasette/{utils.py => utils/__init__.py} | 17 +- datasette/utils/asgi.py | 377 ++++++++++++++++++++++ datasette/views/base.py | 68 ++-- datasette/views/database.py | 9 +- datasette/views/index.py | 7 +- datasette/views/special.py | 8 +- datasette/views/table.py | 7 +- pytest.ini | 2 - setup.py | 6 +- tests/fixtures.py | 84 ++++- tests/test_api.py | 12 +- tests/test_csv.py | 8 +- tests/test_html.py | 25 ++ tests/test_utils.py | 12 +- 18 files changed, 770 insertions(+), 207 deletions(-) rename datasette/{utils.py => utils/__init__.py} (98%) create mode 100644 datasette/utils/asgi.py diff --git a/datasette/app.py b/datasette/app.py index 2ef7da41..4a8ead1d 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,11 +1,9 @@ import asyncio import collections import hashlib -import json import os import sys import threading -import time import traceback import urllib.parse from concurrent import futures @@ -14,10 +12,8 @@ from pathlib import Path import click from markupsafe import Markup from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader -from sanic import Sanic, response -from sanic.exceptions import InvalidUsage, NotFound -from .views.base import DatasetteError, ureg +from .views.base import DatasetteError, ureg, AsgiRouter from .views.database import DatabaseDownload, DatabaseView from .views.index import IndexView from .views.special import JsonDataView @@ -36,7 +32,16 @@ from .utils import ( sqlite_timelimit, to_css_class, ) -from .tracer import capture_traces, trace +from .utils.asgi import ( + AsgiLifespan, + NotFound, + asgi_static, + asgi_send, + asgi_send_html, + asgi_send_json, + asgi_send_redirect, +) +from .tracer import trace, AsgiTracer from .plugins import pm, DEFAULT_PLUGINS from .version import __version__ @@ -126,8 +131,8 @@ CONFIG_OPTIONS = ( DEFAULT_CONFIG = {option.name: option.default for option in CONFIG_OPTIONS} -async def favicon(request): - return response.text("") +async def favicon(scope, receive, send): + await asgi_send(send, "", 200) class Datasette: @@ -413,6 +418,7 @@ class Datasette: "full": sys.version, }, "datasette": datasette_version, + "asgi": "3.0", "sqlite": { "version": sqlite_version, "fts_versions": fts_versions, @@ -543,21 +549,7 @@ class Datasette: self.renderers[renderer["extension"]] = renderer["callback"] def app(self): - class TracingSanic(Sanic): - async def handle_request(self, request, write_callback, stream_callback): - if request.args.get("_trace"): - request["traces"] = [] - request["trace_start"] = time.time() - with capture_traces(request["traces"]): - await super().handle_request( - request, write_callback, stream_callback - ) - else: - await super().handle_request( - request, write_callback, stream_callback - ) - - app = TracingSanic(__name__) + "Returns an ASGI app function that serves the whole of Datasette" default_templates = str(app_root / "datasette" / "templates") template_paths = [] if self.template_dir: @@ -588,134 +580,127 @@ class Datasette: pm.hook.prepare_jinja2_environment(env=self.jinja_env) self.register_renderers() + + routes = [] + + def add_route(view, regex): + routes.append((regex, view)) + # Generate a regex snippet to match all registered renderer file extensions renderer_regex = "|".join(r"\." + key for key in self.renderers.keys()) - app.add_route(IndexView.as_view(self), r"/") + add_route(IndexView.as_asgi(self), r"/(?P(\.jsono?)?$)") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires - app.add_route(favicon, "/favicon.ico") - app.static("/-/static/", str(app_root / "datasette" / "static")) + add_route(favicon, "/favicon.ico") + + add_route( + asgi_static(app_root / "datasette" / "static"), r"/-/static/(?P.*)$" + ) for path, dirname in self.static_mounts: - app.static(path, dirname) + add_route(asgi_static(dirname), r"/" + path + "/(?P.*)$") + # Mount any plugin static/ directories for plugin in get_plugins(pm): if plugin["static_path"]: - modpath = "/-/static-plugins/{}/".format(plugin["name"]) - app.static(modpath, plugin["static_path"]) - app.add_route( - JsonDataView.as_view(self, "metadata.json", lambda: self._metadata), - r"/-/metadata", + modpath = "/-/static-plugins/{}/(?P.*)$".format(plugin["name"]) + add_route(asgi_static(plugin["static_path"]), modpath) + add_route( + JsonDataView.as_asgi(self, "metadata.json", lambda: self._metadata), + r"/-/metadata(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "versions.json", self.versions), - r"/-/versions", + add_route( + JsonDataView.as_asgi(self, "versions.json", self.versions), + r"/-/versions(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "plugins.json", self.plugins), - r"/-/plugins", + add_route( + JsonDataView.as_asgi(self, "plugins.json", self.plugins), + r"/-/plugins(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "config.json", lambda: self._config), - r"/-/config", + add_route( + JsonDataView.as_asgi(self, "config.json", lambda: self._config), + r"/-/config(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "databases.json", self.connected_databases), - r"/-/databases", + add_route( + JsonDataView.as_asgi(self, "databases.json", self.connected_databases), + r"/-/databases(?P(\.json)?)$", ) - app.add_route( - DatabaseDownload.as_view(self), r"/" + add_route( + DatabaseDownload.as_asgi(self), r"/(?P[^/]+?)(?P\.db)$" ) - app.add_route( - DatabaseView.as_view(self), - r"/", - ) - app.add_route( - TableView.as_view(self), r"//" - ) - app.add_route( - RowView.as_view(self), - r"///[^/]+?)(?P" + renderer_regex - + r")?$>", + + r"|.jsono|\.csv)?$", + ) + add_route( + TableView.as_asgi(self), + r"/(?P[^/]+)/(?P[^/]+?$)", + ) + add_route( + RowView.as_asgi(self), + r"/(?P[^/]+)/(?P[^/]+?)/(?P[^/]+?)(?P" + + renderer_regex + + r")?$", ) self.register_custom_units() - # On 404 with a trailing slash redirect to path without that slash: - # pylint: disable=unused-variable - @app.middleware("response") - def redirect_on_404_with_trailing_slash(request, original_response): - if original_response.status == 404 and request.path.endswith("/"): - path = request.path.rstrip("/") - if request.query_string: - path = "{}?{}".format(path, request.query_string) - return response.redirect(path) - - @app.middleware("response") - async def add_traces_to_response(request, response): - if request.get("traces") is None: - return - traces = request["traces"] - trace_info = { - "request_duration_ms": 1000 * (time.time() - request["trace_start"]), - "sum_trace_duration_ms": sum(t["duration_ms"] for t in traces), - "num_traces": len(traces), - "traces": traces, - } - if "text/html" in response.content_type and b"" in response.body: - extra = json.dumps(trace_info, indent=2) - extra_html = "
{}
".format(extra).encode("utf8") - response.body = response.body.replace(b"", extra_html) - elif "json" in response.content_type and response.body.startswith(b"{"): - data = json.loads(response.body.decode("utf8")) - if "_trace" not in data: - data["_trace"] = trace_info - response.body = json.dumps(data).encode("utf8") - - @app.exception(Exception) - def on_exception(request, exception): - title = None - help = None - if isinstance(exception, NotFound): - status = 404 - info = {} - message = exception.args[0] - elif isinstance(exception, InvalidUsage): - status = 405 - info = {} - message = exception.args[0] - elif isinstance(exception, DatasetteError): - status = exception.status - info = exception.error_dict - message = exception.message - if exception.messagge_is_html: - message = Markup(message) - title = exception.title - else: - status = 500 - info = {} - message = str(exception) - traceback.print_exc() - templates = ["500.html"] - if status != 500: - templates = ["{}.html".format(status)] + templates - info.update( - {"ok": False, "error": message, "status": status, "title": title} - ) - if request is not None and request.path.split("?")[0].endswith(".json"): - r = response.json(info, status=status) - - else: - template = self.jinja_env.select_template(templates) - r = response.html(template.render(info), status=status) - if self.cors: - r.headers["Access-Control-Allow-Origin"] = "*" - return r - - # First time server starts up, calculate table counts for immutable databases - @app.listener("before_server_start") - async def setup_db(app, loop): + async def setup_db(): + # First time server starts up, calculate table counts for immutable databases for dbname, database in self.databases.items(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) - return app + return AsgiLifespan( + AsgiTracer(DatasetteRouter(self, routes)), on_startup=setup_db + ) + + +class DatasetteRouter(AsgiRouter): + def __init__(self, datasette, routes): + self.ds = datasette + super().__init__(routes) + + async def handle_404(self, scope, receive, send): + # If URL has a trailing slash, redirect to URL without it + path = scope.get("raw_path", scope["path"].encode("utf8")) + if path.endswith(b"/"): + path = path.rstrip(b"/") + if scope["query_string"]: + path += b"?" + scope["query_string"] + await asgi_send_redirect(send, path.decode("latin1")) + else: + await super().handle_404(scope, receive, send) + + async def handle_500(self, scope, receive, send, exception): + title = None + if isinstance(exception, NotFound): + status = 404 + info = {} + message = exception.args[0] + elif isinstance(exception, DatasetteError): + status = exception.status + info = exception.error_dict + message = exception.message + if exception.messagge_is_html: + message = Markup(message) + title = exception.title + else: + status = 500 + info = {} + message = str(exception) + traceback.print_exc() + templates = ["500.html"] + if status != 500: + templates = ["{}.html".format(status)] + templates + info.update({"ok": False, "error": message, "status": status, "title": title}) + headers = {} + if self.ds.cors: + headers["Access-Control-Allow-Origin"] = "*" + if scope["path"].split("?")[0].endswith(".json"): + await asgi_send_json(send, info, status=status, headers=headers) + else: + template = self.ds.jinja_env.select_template(templates) + await asgi_send_html( + send, template.render(info), status=status, headers=headers + ) diff --git a/datasette/cli.py b/datasette/cli.py index 0d47f47a..181b281c 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -1,4 +1,5 @@ import asyncio +import uvicorn import click from click import formatting from click_default_group import DefaultGroup @@ -354,4 +355,4 @@ def serve( asyncio.get_event_loop().run_until_complete(ds.run_sanity_checks()) # Start the server - ds.app().run(host=host, port=port, debug=debug) + uvicorn.run(ds.app(), host=host, port=port, log_level="info") diff --git a/datasette/renderer.py b/datasette/renderer.py index 417fecb5..349c2922 100644 --- a/datasette/renderer.py +++ b/datasette/renderer.py @@ -88,5 +88,5 @@ def json_renderer(args, data, view_name): content_type = "text/plain" else: body = json.dumps(data, cls=CustomJSONEncoder) - content_type = "application/json" + content_type = "application/json; charset=utf-8" return {"body": body, "status_code": status_code, "content_type": content_type} diff --git a/datasette/tracer.py b/datasette/tracer.py index c6fe0a00..e46a6fda 100644 --- a/datasette/tracer.py +++ b/datasette/tracer.py @@ -1,6 +1,7 @@ import asyncio from contextlib import contextmanager import time +import json import traceback tracers = {} @@ -32,15 +33,15 @@ def trace(type, **kwargs): start = time.time() yield end = time.time() - trace = { + trace_info = { "type": type, "start": start, "end": end, "duration_ms": (end - start) * 1000, "traceback": traceback.format_list(traceback.extract_stack(limit=6)[:-3]), } - trace.update(kwargs) - tracer.append(trace) + trace_info.update(kwargs) + tracer.append(trace_info) @contextmanager @@ -53,3 +54,77 @@ def capture_traces(tracer): tracers[task_id] = tracer yield del tracers[task_id] + + +class AsgiTracer: + # If the body is larger than this we don't attempt to append the trace + max_body_bytes = 1024 * 256 # 256 KB + + def __init__(self, app): + self.app = app + + async def __call__(self, scope, receive, send): + if b"_trace=1" not in scope.get("query_string", b"").split(b"&"): + await self.app(scope, receive, send) + return + trace_start = time.time() + traces = [] + + accumulated_body = b"" + size_limit_exceeded = False + response_headers = [] + + async def wrapped_send(message): + nonlocal accumulated_body, size_limit_exceeded, response_headers + if message["type"] == "http.response.start": + response_headers = message["headers"] + await send(message) + return + + if message["type"] != "http.response.body" or size_limit_exceeded: + await send(message) + return + + # Accumulate body until the end or until size is exceeded + accumulated_body += message["body"] + if len(accumulated_body) > self.max_body_bytes: + await send( + { + "type": "http.response.body", + "body": accumulated_body, + "more_body": True, + } + ) + size_limit_exceeded = True + return + + if not message.get("more_body"): + # We have all the body - modify it and send the result + # TODO: What to do about Content-Type or other cases? + trace_info = { + "request_duration_ms": 1000 * (time.time() - trace_start), + "sum_trace_duration_ms": sum(t["duration_ms"] for t in traces), + "num_traces": len(traces), + "traces": traces, + } + try: + content_type = [ + v.decode("utf8") + for k, v in response_headers + if k.lower() == b"content-type" + ][0] + except IndexError: + content_type = "" + if "text/html" in content_type and b"" in accumulated_body: + extra = json.dumps(trace_info, indent=2) + extra_html = "
{}
".format(extra).encode("utf8") + accumulated_body = accumulated_body.replace(b"", extra_html) + elif "json" in content_type and accumulated_body.startswith(b"{"): + data = json.loads(accumulated_body.decode("utf8")) + if "_trace" not in data: + data["_trace"] = trace_info + accumulated_body = json.dumps(data).encode("utf8") + await send({"type": "http.response.body", "body": accumulated_body}) + + with capture_traces(traces): + await self.app(scope, receive, wrapped_send) diff --git a/datasette/utils.py b/datasette/utils/__init__.py similarity index 98% rename from datasette/utils.py rename to datasette/utils/__init__.py index 58746be4..94ccc23e 100644 --- a/datasette/utils.py +++ b/datasette/utils/__init__.py @@ -697,13 +697,13 @@ class LimitedWriter: self.limit_bytes = limit_mb * 1024 * 1024 self.bytes_count = 0 - def write(self, bytes): + async def write(self, bytes): self.bytes_count += len(bytes) if self.limit_bytes and (self.bytes_count > self.limit_bytes): raise WriteLimitExceeded( "CSV contains more than {} bytes".format(self.limit_bytes) ) - self.writer.write(bytes) + await self.writer.write(bytes) _infinities = {float("inf"), float("-inf")} @@ -741,3 +741,16 @@ def format_bytes(bytes): return "{} {}".format(int(current), unit) else: return "{:.1f} {}".format(current, unit) + + +class RequestParameters(dict): + def get(self, name, default=None): + "Return first value in the list, if available" + try: + return super().get(name)[0] + except (KeyError, TypeError): + return default + + def getlist(self, name, default=None): + "Return full list" + return super().get(name, default) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py new file mode 100644 index 00000000..fdf330ae --- /dev/null +++ b/datasette/utils/asgi.py @@ -0,0 +1,377 @@ +import json +from datasette.utils import RequestParameters +from mimetypes import guess_type +from urllib.parse import parse_qs, urlunparse +from pathlib import Path +from html import escape +import re +import aiofiles + + +class NotFound(Exception): + pass + + +class Request: + def __init__(self, scope): + self.scope = scope + + @property + def method(self): + return self.scope["method"] + + @property + def url(self): + return urlunparse( + (self.scheme, self.host, self.path, None, self.query_string, None) + ) + + @property + def scheme(self): + return self.scope.get("scheme") or "http" + + @property + def headers(self): + return dict( + [ + (k.decode("latin-1").lower(), v.decode("latin-1")) + for k, v in self.scope.get("headers") or [] + ] + ) + + @property + def host(self): + return self.headers.get("host") or "localhost" + + @property + def path(self): + return ( + self.scope.get("raw_path", self.scope["path"].encode("latin-1")) + ).decode("latin-1") + + @property + def query_string(self): + return (self.scope.get("query_string") or b"").decode("latin-1") + + @property + def args(self): + return RequestParameters(parse_qs(qs=self.query_string)) + + @property + def raw_args(self): + return {key: value[0] for key, value in self.args.items()} + + @classmethod + def fake(cls, path_with_query_string, method="GET", scheme="http"): + "Useful for constructing Request objects for tests" + path, _, query_string = path_with_query_string.partition("?") + scope = { + "http_version": "1.1", + "method": method, + "path": path, + "raw_path": path.encode("latin-1"), + "query_string": query_string.encode("latin-1"), + "scheme": scheme, + "type": "http", + } + return cls(scope) + + +class AsgiRouter: + def __init__(self, routes=None): + routes = routes or [] + self.routes = [ + # Compile any strings to regular expressions + ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) + for pattern, view in routes + ] + + async def __call__(self, scope, receive, send): + # Because we care about "foo/bar" v.s. "foo%2Fbar" we decode raw_path ourselves + path = scope["raw_path"].decode("ascii") + for regex, view in self.routes: + match = regex.match(path) + if match is not None: + new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) + try: + return await view(new_scope, receive, send) + except Exception as exception: + return await self.handle_500(scope, receive, send, exception) + return await self.handle_404(scope, receive, send) + + async def handle_404(self, scope, receive, send): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + await send({"type": "http.response.body", "body": b"

404

"}) + + async def handle_500(self, scope, receive, send, exception): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + html = "

500

".format(escape(repr(exception))) + await send({"type": "http.response.body", "body": html.encode("latin-1")}) + + +class AsgiLifespan: + def __init__(self, app, on_startup=None, on_shutdown=None): + self.app = app + on_startup = on_startup or [] + on_shutdown = on_shutdown or [] + if not isinstance(on_startup or [], list): + on_startup = [on_startup] + if not isinstance(on_shutdown or [], list): + on_shutdown = [on_shutdown] + self.on_startup = on_startup + self.on_shutdown = on_shutdown + + async def __call__(self, scope, receive, send): + if scope["type"] == "lifespan": + while True: + message = await receive() + if message["type"] == "lifespan.startup": + for fn in self.on_startup: + await fn() + await send({"type": "lifespan.startup.complete"}) + elif message["type"] == "lifespan.shutdown": + for fn in self.on_shutdown: + await fn() + await send({"type": "lifespan.shutdown.complete"}) + return + else: + await self.app(scope, receive, send) + + +class AsgiView: + def dispatch_request(self, request, *args, **kwargs): + handler = getattr(self, request.method.lower(), None) + return handler(request, *args, **kwargs) + + @classmethod + def as_asgi(cls, *class_args, **class_kwargs): + async def view(scope, receive, send): + # Uses scope to create a request object, then dispatches that to + # self.get(...) or self.options(...) along with keyword arguments + # that were already tucked into scope["url_route"]["kwargs"] by + # the router, similar to how Django Channels works: + # https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter + request = Request(scope) + self = view.view_class(*class_args, **class_kwargs) + response = await self.dispatch_request( + request, **scope["url_route"]["kwargs"] + ) + await response.asgi_send(send) + + view.view_class = cls + view.__doc__ = cls.__doc__ + view.__module__ = cls.__module__ + view.__name__ = cls.__name__ + return view + + +class AsgiStream: + def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): + self.stream_fn = stream_fn + self.status = status + self.headers = headers or {} + self.content_type = content_type + + async def asgi_send(self, send): + # Remove any existing content-type header + headers = dict( + [(k, v) for k, v in self.headers.items() if k.lower() != "content-type"] + ) + headers["content-type"] = self.content_type + await send( + { + "type": "http.response.start", + "status": self.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in headers.items() + ], + } + ) + w = AsgiWriter(send) + await self.stream_fn(w) + await send({"type": "http.response.body", "body": b""}) + + +class AsgiWriter: + def __init__(self, send): + self.send = send + + async def write(self, chunk): + await self.send( + { + "type": "http.response.body", + "body": chunk.encode("latin-1"), + "more_body": True, + } + ) + + +async def asgi_send_json(send, info, status=200, headers=None): + headers = headers or {} + await asgi_send( + send, + json.dumps(info), + status=status, + headers=headers, + content_type="application/json; charset=utf-8", + ) + + +async def asgi_send_html(send, html, status=200, headers=None): + headers = headers or {} + await asgi_send( + send, html, status=status, headers=headers, content_type="text/html" + ) + + +async def asgi_send_redirect(send, location, status=302): + await asgi_send( + send, + "", + status=status, + headers={"Location": location}, + content_type="text/html", + ) + + +async def asgi_send(send, content, status, headers=None, content_type="text/plain"): + await asgi_start(send, status, headers, content_type) + await send({"type": "http.response.body", "body": content.encode("latin-1")}) + + +async def asgi_start(send, status, headers=None, content_type="text/plain"): + headers = headers or {} + # Remove any existing content-type header + headers = dict([(k, v) for k, v in headers.items() if k.lower() != "content-type"]) + headers["content-type"] = content_type + await send( + { + "type": "http.response.start", + "status": status, + "headers": [ + [key.encode("latin1"), value.encode("latin1")] + for key, value in headers.items() + ], + } + ) + + +async def asgi_send_file( + send, filepath, filename=None, content_type=None, chunk_size=4096 +): + headers = {} + if filename: + headers["Content-Disposition"] = 'attachment; filename="{}"'.format(filename) + first = True + async with aiofiles.open(str(filepath), mode="rb") as fp: + if first: + await asgi_start( + send, + 200, + headers, + content_type or guess_type(str(filepath))[0] or "text/plain", + ) + first = False + more_body = True + while more_body: + chunk = await fp.read(chunk_size) + more_body = len(chunk) == chunk_size + await send( + {"type": "http.response.body", "body": chunk, "more_body": more_body} + ) + + +def asgi_static(root_path, chunk_size=4096, headers=None, content_type=None): + async def inner_static(scope, receive, send): + path = scope["url_route"]["kwargs"]["path"] + full_path = (Path(root_path) / path).absolute() + # Ensure full_path is within root_path to avoid weird "../" tricks + try: + full_path.relative_to(root_path) + except ValueError: + await asgi_send_html(send, "404", 404) + return + first = True + try: + await asgi_send_file(send, full_path, chunk_size=chunk_size) + except FileNotFoundError: + await asgi_send_html(send, "404", 404) + return + + return inner_static + + +class Response: + def __init__(self, body=None, status=200, headers=None, content_type="text/plain"): + self.body = body + self.status = status + self.headers = headers or {} + self.content_type = content_type + + async def asgi_send(self, send): + headers = {} + headers.update(self.headers) + headers["content-type"] = self.content_type + await send( + { + "type": "http.response.start", + "status": self.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in headers.items() + ], + } + ) + body = self.body + if not isinstance(body, bytes): + body = body.encode("utf-8") + await send({"type": "http.response.body", "body": body}) + + @classmethod + def html(cls, body, status=200, headers=None): + return cls( + body, + status=status, + headers=headers, + content_type="text/html; charset=utf-8", + ) + + @classmethod + def text(cls, body, status=200, headers=None): + return cls( + body, + status=status, + headers=headers, + content_type="text/plain; charset=utf-8", + ) + + @classmethod + def redirect(cls, path, status=302, headers=None): + headers = headers or {} + headers["Location"] = path + return cls("", status=status, headers=headers) + + +class AsgiFileDownload: + def __init__( + self, filepath, filename=None, content_type="application/octet-stream" + ): + self.filepath = filepath + self.filename = filename + self.content_type = content_type + + async def asgi_send(self, send): + return await asgi_send_file(send, self.filepath, content_type=self.content_type) diff --git a/datasette/views/base.py b/datasette/views/base.py index 9db8cc76..7acb7304 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -7,9 +7,8 @@ import urllib import jinja2 import pint -from sanic import response -from sanic.exceptions import NotFound -from sanic.views import HTTPMethodView + +from html import escape from datasette import __version__ from datasette.plugins import pm @@ -26,6 +25,14 @@ from datasette.utils import ( sqlite3, to_css_class, ) +from datasette.utils.asgi import ( + AsgiStream, + AsgiWriter, + AsgiRouter, + AsgiView, + NotFound, + Response, +) ureg = pint.UnitRegistry() @@ -49,7 +56,14 @@ class DatasetteError(Exception): self.messagge_is_html = messagge_is_html -class BaseView(HTTPMethodView): +class BaseView(AsgiView): + ds = None + + async def head(self, *args, **kwargs): + response = await self.get(*args, **kwargs) + response.body = b"" + return response + def _asset_urls(self, key, template, context): # Flatten list-of-lists from plugins: seen_urls = set() @@ -104,7 +118,7 @@ class BaseView(HTTPMethodView): datasette=self.ds, ): body_scripts.append(jinja2.Markup(script)) - return response.html( + return Response.html( template.render( { **context, @@ -136,7 +150,7 @@ class DataView(BaseView): self.ds = datasette def options(self, request, *args, **kwargs): - r = response.text("ok") + r = Response.text("ok") if self.ds.cors: r.headers["Access-Control-Allow-Origin"] = "*" return r @@ -146,7 +160,7 @@ class DataView(BaseView): path = "{}?{}".format(path, request.query_string) if remove_args: path = path_with_removed_args(request, remove_args, path=path) - r = response.redirect(path) + r = Response.redirect(path) r.headers["Link"] = "<{}>; rel=preload".format(path) if self.ds.cors: r.headers["Access-Control-Allow-Origin"] = "*" @@ -195,17 +209,17 @@ class DataView(BaseView): kwargs["table"] = table if _format: kwargs["as_format"] = ".{}".format(_format) - elif "table" in kwargs: + elif kwargs.get("table"): kwargs["table"] = urllib.parse.unquote_plus(kwargs["table"]) should_redirect = "/{}-{}".format(name, expected) - if "table" in kwargs: + if kwargs.get("table"): should_redirect += "/" + urllib.parse.quote_plus(kwargs["table"]) - if "pk_path" in kwargs: + if kwargs.get("pk_path"): should_redirect += "/" + kwargs["pk_path"] - if "as_format" in kwargs: + if kwargs.get("as_format"): should_redirect += kwargs["as_format"] - if "as_db" in kwargs: + if kwargs.get("as_db"): should_redirect += kwargs["as_db"] if ( @@ -246,7 +260,7 @@ class DataView(BaseView): response_or_template_contexts = await self.data( request, database, hash, **kwargs ) - if isinstance(response_or_template_contexts, response.HTTPResponse): + if isinstance(response_or_template_contexts, Response): return response_or_template_contexts else: data, _, _ = response_or_template_contexts @@ -282,13 +296,13 @@ class DataView(BaseView): if not first: data, _, _ = await self.data(request, database, hash, **kwargs) if first: - writer.writerow(headings) + await writer.writerow(headings) first = False next = data.get("next") for row in data["rows"]: if not expanded_columns: # Simple path - writer.writerow(row) + await writer.writerow(row) else: # Look for {"value": "label": } dicts and expand new_row = [] @@ -298,10 +312,10 @@ class DataView(BaseView): new_row.append(cell["label"]) else: new_row.append(cell) - writer.writerow(new_row) + await writer.writerow(new_row) except Exception as e: print("caught this", e) - r.write(str(e)) + await r.write(str(e)) return content_type = "text/plain; charset=utf-8" @@ -315,7 +329,7 @@ class DataView(BaseView): ) headers["Content-Disposition"] = disposition - return response.stream(stream_fn, headers=headers, content_type=content_type) + return AsgiStream(stream_fn, headers=headers, content_type=content_type) async def get_format(self, request, database, args): """ Determine the format of the response from the request, from URL @@ -363,7 +377,7 @@ class DataView(BaseView): response_or_template_contexts = await self.data( request, database, hash, **kwargs ) - if isinstance(response_or_template_contexts, response.HTTPResponse): + if isinstance(response_or_template_contexts, Response): return response_or_template_contexts else: @@ -414,17 +428,11 @@ class DataView(BaseView): if result is None: raise NotFound("No data") - response_args = { - "content_type": result.get("content_type", "text/plain"), - "status": result.get("status_code", 200), - } - - if type(result.get("body")) == bytes: - response_args["body_bytes"] = result.get("body") - else: - response_args["body"] = result.get("body") - - r = response.HTTPResponse(**response_args) + r = Response( + body=result.get("body"), + status=result.get("status_code", 200), + content_type=result.get("content_type", "text/plain"), + ) else: extras = {} if callable(extra_template_data): diff --git a/datasette/views/database.py b/datasette/views/database.py index a5b606f1..78af19c5 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1,10 +1,9 @@ import os -from sanic import response - from datasette.utils import to_css_class, validate_sql_select +from datasette.utils.asgi import AsgiFileDownload -from .base import DataView, DatasetteError +from .base import DatasetteError, DataView class DatabaseView(DataView): @@ -79,8 +78,8 @@ class DatabaseDownload(DataView): if not db.path: raise DatasetteError("Cannot download database", status=404) filepath = db.path - return await response.file_stream( + return AsgiFileDownload( filepath, filename=os.path.basename(filepath), - mime_type="application/octet-stream", + content_type="application/octet-stream", ) diff --git a/datasette/views/index.py b/datasette/views/index.py index c9d15c36..2c1c017a 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -1,9 +1,8 @@ import hashlib import json -from sanic import response - from datasette.utils import CustomJSONEncoder +from datasette.utils.asgi import Response from datasette.version import __version__ from .base import BaseView @@ -104,9 +103,9 @@ class IndexView(BaseView): headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" - return response.HTTPResponse( + return Response( json.dumps({db["name"]: db for db in databases}, cls=CustomJSONEncoder), - content_type="application/json", + content_type="application/json; charset=utf-8", headers=headers, ) else: diff --git a/datasette/views/special.py b/datasette/views/special.py index 91b577fc..c4976bb2 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -1,5 +1,5 @@ import json -from sanic import response +from datasette.utils.asgi import Response from .base import BaseView @@ -17,8 +17,10 @@ class JsonDataView(BaseView): headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" - return response.HTTPResponse( - json.dumps(data), content_type="application/json", headers=headers + return Response( + json.dumps(data), + content_type="application/json; charset=utf-8", + headers=headers, ) else: diff --git a/datasette/views/table.py b/datasette/views/table.py index 14b8743a..06be5671 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -3,13 +3,12 @@ import itertools import json import jinja2 -from sanic.exceptions import NotFound -from sanic.request import RequestParameters from datasette.plugins import pm from datasette.utils import ( CustomRow, QueryInterrupted, + RequestParameters, append_querystring, compound_keys_after_sql, escape_sqlite, @@ -24,6 +23,7 @@ from datasette.utils import ( urlsafe_components, value_as_boolean, ) +from datasette.utils.asgi import NotFound from datasette.filters import Filters from .base import DataView, DatasetteError, ureg @@ -219,8 +219,7 @@ class TableView(RowTableShared): if is_view: order_by = "" - # We roll our own query_string decoder because by default Sanic - # drops anything with an empty value e.g. ?name__exact= + # Ensure we don't drop anything with an empty value e.g. ?name__exact= args = RequestParameters( urllib.parse.parse_qs(request.query_string, keep_blank_values=True) ) diff --git a/pytest.ini b/pytest.ini index f2c8a6d2..aa292efc 100644 --- a/pytest.ini +++ b/pytest.ini @@ -4,7 +4,5 @@ filterwarnings= ignore:Using or importing the ABCs::jinja2 # https://bugs.launchpad.net/beautifulsoup/+bug/1778909 ignore:Using or importing the ABCs::bs4.element - # Sanic verify_ssl=True - ignore:verify_ssl is deprecated::sanic # Python 3.7 PendingDeprecationWarning: Task.current_task() ignore:.*current_task.*:PendingDeprecationWarning diff --git a/setup.py b/setup.py index 60c1bcc5..f66d03da 100644 --- a/setup.py +++ b/setup.py @@ -37,17 +37,18 @@ setup( author="Simon Willison", license="Apache License, Version 2.0", url="https://github.com/simonw/datasette", - packages=find_packages(exclude='tests'), + packages=find_packages(exclude="tests"), package_data={"datasette": ["templates/*.html"]}, include_package_data=True, install_requires=[ "click>=6.7", "click-default-group==1.2", - "Sanic==0.7.0", "Jinja2==2.10.1", "hupper==1.0", "pint==0.8.1", "pluggy>=0.12.0", + "uvicorn>=0.8.1", + "aiofiles==0.4.0", ], entry_points=""" [console_scripts] @@ -60,6 +61,7 @@ setup( "pytest-asyncio==0.10.0", "aiohttp==3.5.3", "beautifulsoup4==4.6.1", + "asgiref==3.1.2", ] + maybe_black }, diff --git a/tests/fixtures.py b/tests/fixtures.py index 04ac3c68..00140f50 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,5 +1,7 @@ from datasette.app import Datasette from datasette.utils import sqlite3 +from asgiref.testing import ApplicationCommunicator +from asgiref.sync import async_to_sync import itertools import json import os @@ -10,16 +12,82 @@ import sys import string import tempfile import time +from urllib.parse import unquote + + +class TestResponse: + def __init__(self, status, headers, body): + self.status = status + self.headers = headers + self.body = body + + @property + def json(self): + return json.loads(self.text) + + @property + def text(self): + return self.body.decode("utf8") class TestClient: - def __init__(self, sanic_test_client): - self.sanic_test_client = sanic_test_client + max_redirects = 5 - def get(self, path, allow_redirects=True): - return self.sanic_test_client.get( - path, allow_redirects=allow_redirects, gather_request=False + def __init__(self, asgi_app): + self.asgi_app = asgi_app + + @async_to_sync + async def get(self, path, allow_redirects=True, redirect_count=0, method="GET"): + return await self._get(path, allow_redirects, redirect_count, method) + + async def _get(self, path, allow_redirects=True, redirect_count=0, method="GET"): + query_string = b"" + if "?" in path: + path, _, query_string = path.partition("?") + query_string = query_string.encode("utf8") + instance = ApplicationCommunicator( + self.asgi_app, + { + "type": "http", + "http_version": "1.0", + "method": method, + "path": unquote(path), + "raw_path": path.encode("ascii"), + "query_string": query_string, + "headers": [[b"host", b"localhost"]], + }, ) + await instance.send_input({"type": "http.request"}) + # First message back should be response.start with headers and status + messages = [] + start = await instance.receive_output(2) + messages.append(start) + assert start["type"] == "http.response.start" + headers = dict( + [(k.decode("utf8"), v.decode("utf8")) for k, v in start["headers"]] + ) + status = start["status"] + # Now loop until we run out of response.body + body = b"" + while True: + message = await instance.receive_output(2) + messages.append(message) + assert message["type"] == "http.response.body" + body += message["body"] + if not message.get("more_body"): + break + response = TestResponse(status, headers, body) + if allow_redirects and response.status in (301, 302): + assert ( + redirect_count < self.max_redirects + ), "Redirected {} times, max_redirects={}".format( + redirect_count, self.max_redirects + ) + location = response.headers["Location"] + return await self._get( + location, allow_redirects=True, redirect_count=redirect_count + 1 + ) + return response def make_app_client( @@ -32,6 +100,7 @@ def make_app_client( is_immutable=False, extra_databases=None, inspect_data=None, + static_mounts=None, ): with tempfile.TemporaryDirectory() as tmpdir: filepath = os.path.join(tmpdir, filename) @@ -73,9 +142,10 @@ def make_app_client( plugins_dir=plugins_dir, config=config, inspect_data=inspect_data, + static_mounts=static_mounts, ) ds.sqlite_functions.append(("sleep", 1, lambda n: time.sleep(float(n)))) - client = TestClient(ds.app().test_client) + client = TestClient(ds.app()) client.ds = ds yield client @@ -88,7 +158,7 @@ def app_client(): @pytest.fixture(scope="session") def app_client_no_files(): ds = Datasette([]) - client = TestClient(ds.app().test_client) + client = TestClient(ds.app()) client.ds = ds yield client diff --git a/tests/test_api.py b/tests/test_api.py index 5c1bff15..a32ed5e3 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -22,6 +22,7 @@ import urllib def test_homepage(app_client): response = app_client.get("/.json") assert response.status == 200 + assert "application/json; charset=utf-8" == response.headers["content-type"] assert response.json.keys() == {"fixtures": 0}.keys() d = response.json["fixtures"] assert d["name"] == "fixtures" @@ -771,8 +772,8 @@ def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pag fetched.extend(response.json["rows"]) path = response.json["next_url"] if path: - assert response.json["next"] assert urllib.parse.urlencode({"_next": response.json["next"]}) in path + path = path.replace("http://localhost", "") assert count < 30, "Possible infinite loop detected" assert expected_rows == len(fetched) @@ -812,6 +813,8 @@ def test_paginate_compound_keys(app_client): response = app_client.get(path) fetched.extend(response.json["rows"]) path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") assert page < 100 assert 1001 == len(fetched) assert 21 == page @@ -833,6 +836,8 @@ def test_paginate_compound_keys_with_extra_filters(app_client): response = app_client.get(path) fetched.extend(response.json["rows"]) path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") assert 2 == page expected = [r[3] for r in generate_compound_rows(1001) if "d" in r[3]] assert expected == [f["content"] for f in fetched] @@ -881,6 +886,8 @@ def test_sortable(app_client, query_string, sort_key, human_description_en): assert human_description_en == response.json["human_description_en"] fetched.extend(response.json["rows"]) path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") assert 5 == page expected = list(generate_sortable_rows(201)) expected.sort(key=sort_key) @@ -1191,6 +1198,7 @@ def test_plugins_json(app_client): def test_versions_json(app_client): response = app_client.get("/-/versions.json") assert "python" in response.json + assert "3.0" == response.json.get("asgi") assert "version" in response.json["python"] assert "full" in response.json["python"] assert "datasette" in response.json @@ -1236,6 +1244,8 @@ def test_page_size_matching_max_returned_rows( fetched.extend(response.json["rows"]) assert len(response.json["rows"]) in (1, 50) path = response.json["next_url"] + if path: + path = path.replace("http://localhost", "") assert 201 == len(fetched) diff --git a/tests/test_csv.py b/tests/test_csv.py index cf0e6732..c3cdc241 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -46,7 +46,7 @@ def test_table_csv(app_client): response = app_client.get("/fixtures/simple_primary_key.csv") assert response.status == 200 assert not response.headers.get("Access-Control-Allow-Origin") - assert "text/plain; charset=utf-8" == response.headers["Content-Type"] + assert "text/plain; charset=utf-8" == response.headers["content-type"] assert EXPECTED_TABLE_CSV == response.text @@ -59,7 +59,7 @@ def test_table_csv_cors_headers(app_client_with_cors): def test_table_csv_with_labels(app_client): response = app_client.get("/fixtures/facetable.csv?_labels=1") assert response.status == 200 - assert "text/plain; charset=utf-8" == response.headers["Content-Type"] + assert "text/plain; charset=utf-8" == response.headers["content-type"] assert EXPECTED_TABLE_WITH_LABELS_CSV == response.text @@ -68,14 +68,14 @@ def test_custom_sql_csv(app_client): "/fixtures.csv?sql=select+content+from+simple_primary_key+limit+2" ) assert response.status == 200 - assert "text/plain; charset=utf-8" == response.headers["Content-Type"] + assert "text/plain; charset=utf-8" == response.headers["content-type"] assert EXPECTED_CUSTOM_CSV == response.text def test_table_csv_download(app_client): response = app_client.get("/fixtures/simple_primary_key.csv?_dl=1") assert response.status == 200 - assert "text/csv; charset=utf-8" == response.headers["Content-Type"] + assert "text/csv; charset=utf-8" == response.headers["content-type"] expected_disposition = 'attachment; filename="simple_primary_key.csv"' assert expected_disposition == response.headers["Content-Disposition"] diff --git a/tests/test_html.py b/tests/test_html.py index 6b673c13..32fa2fe3 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -8,6 +8,7 @@ from .fixtures import ( # noqa METADATA, ) import json +import pathlib import pytest import re import urllib.parse @@ -16,6 +17,7 @@ import urllib.parse def test_homepage(app_client_two_attached_databases): response = app_client_two_attached_databases.get("/") assert response.status == 200 + assert "text/html; charset=utf-8" == response.headers["content-type"] soup = Soup(response.body, "html.parser") assert "Datasette Fixtures" == soup.find("h1").text assert ( @@ -44,6 +46,29 @@ def test_homepage(app_client_two_attached_databases): ] == table_links +def test_http_head(app_client): + response = app_client.get("/", method="HEAD") + assert response.status == 200 + + +def test_static(app_client): + response = app_client.get("/-/static/app2.css") + assert response.status == 404 + response = app_client.get("/-/static/app.css") + assert response.status == 200 + assert "text/css" == response.headers["content-type"] + + +def test_static_mounts(): + for client in make_app_client( + static_mounts=[("custom-static", str(pathlib.Path(__file__).parent))] + ): + response = client.get("/custom-static/test_html.py") + assert response.status == 200 + response = client.get("/custom-static/not_exists.py") + assert response.status == 404 + + def test_memory_database_page(): for client in make_app_client(memory=True): response = client.get("/:memory:") diff --git a/tests/test_utils.py b/tests/test_utils.py index a5f603e6..e9e722b8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,11 +3,11 @@ Tests for various datasette helper functions. """ from datasette import utils +from datasette.utils.asgi import Request from datasette.filters import Filters import json import os import pytest -from sanic.request import Request import sqlite3 import tempfile from unittest.mock import patch @@ -53,7 +53,7 @@ def test_urlsafe_components(path, expected): ], ) def test_path_with_added_args(path, added_args, expected): - request = Request(path.encode("utf8"), {}, "1.1", "GET", None) + request = Request.fake(path) actual = utils.path_with_added_args(request, added_args) assert expected == actual @@ -67,11 +67,11 @@ def test_path_with_added_args(path, added_args, expected): ], ) def test_path_with_removed_args(path, args, expected): - request = Request(path.encode("utf8"), {}, "1.1", "GET", None) + request = Request.fake(path) actual = utils.path_with_removed_args(request, args) assert expected == actual # Run the test again but this time use the path= argument - request = Request("/".encode("utf8"), {}, "1.1", "GET", None) + request = Request.fake("/") actual = utils.path_with_removed_args(request, args, path=path) assert expected == actual @@ -84,7 +84,7 @@ def test_path_with_removed_args(path, args, expected): ], ) def test_path_with_replaced_args(path, args, expected): - request = Request(path.encode("utf8"), {}, "1.1", "GET", None) + request = Request.fake(path) actual = utils.path_with_replaced_args(request, args) assert expected == actual @@ -363,7 +363,7 @@ def test_table_columns(): ], ) def test_path_with_format(path, format, extra_qs, expected): - request = Request(path.encode("utf8"), {}, "1.1", "GET", None) + request = Request.fake(path) actual = utils.path_with_format(request, format, extra_qs) assert expected == actual From e3dac311adea34b892eb2b67f3c46701d64489b2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 20:23:07 -0700 Subject: [PATCH 08/88] Install test dependencies so deploy can work python tests/fixtures.py needs asgiref or it fails with an error --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b37ae967..c868291d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ jobs: - stage: deploy latest.datasette.io if: branch = master AND type = push script: - - pip install . + - pip install .[test] - npm install -g now - python tests/fixtures.py fixtures.db fixtures.json - export ALIAS=`echo $TRAVIS_COMMIT | cut -c 1-7` From 2f4def62e0c98c2cdfe27051fdfd375e3626e89b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 21:23:24 -0700 Subject: [PATCH 09/88] Added datasette-doublemetaphone to list of plugins --- docs/ecosystem.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/ecosystem.rst b/docs/ecosystem.rst index 844fe207..7d87fa85 100644 --- a/docs/ecosystem.rst +++ b/docs/ecosystem.rst @@ -80,6 +80,11 @@ datasette-jellyfish `datasette-jellyfish `__ exposes custom SQL functions for a range of common fuzzy string matching functions, including soundex, porter stemming and levenshtein distance. It builds on top of the `Jellyfish Python library `__. +datasette-doublemetaphone +------------------------- + +`datasette-doublemetaphone `__ by Matthew Somerville adds custom SQL functions for applying the Double Metaphone fuzzy "sounds like" algorithm. + datasette-jq ------------ From 8e25aaa6f34e6ffe3b3511d2dff488b5f25353d3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 23 Jun 2019 22:28:37 -0700 Subject: [PATCH 10/88] Porting Datasette to ASGI, and Turtles all the way down --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 638dcd1c..91b42753 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ Datasette is aimed at data journalists, museum curators, archivists, local gover ## News + * 23rd June 2019: [Porting Datasette to ASGI, and Turtles all the way down](https://simonwillison.net/2019/Jun/23/datasette-asgi/) * 21st May 2019: The anonymized raw data from [the Stack Overflow Developer Survey 2019](https://stackoverflow.blog/2019/05/21/public-data-release-of-stack-overflows-2019-developer-survey/) has been [published in partnership with Glitch](https://glitch.com/culture/discover-insights-explore-developer-survey-results-2019/), powered by Datasette. * 19th May 2019: [Datasette 0.28](https://datasette.readthedocs.io/en/stable/changelog.html#v0-28) - a salmagundi of new features! * No longer immutable! Datasette now supports [databases that change](https://datasette.readthedocs.io/en/stable/changelog.html#supporting-databases-that-change). From 8b11788231df456d82402ca588601b52f032ce73 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 24 Jun 2019 09:28:42 -0700 Subject: [PATCH 11/88] Better coverage of sqlite-utils in FTS docs, closes #525 --- docs/full_text_search.rst | 32 ++++++++++++++++++++++---------- setup.py | 4 ++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/docs/full_text_search.rst b/docs/full_text_search.rst index 97656bb8..138b8e0b 100644 --- a/docs/full_text_search.rst +++ b/docs/full_text_search.rst @@ -28,7 +28,28 @@ To set up full-text search for a table, you need to do two things: * Create a new FTS virtual table associated with your table * Populate that FTS table with the data that you would like to be able to run searches against -To enable full-text search for a table called ``items`` that works against the ``name`` and ``description`` columns, you would run the following SQL to create a new ``items_fts`` FTS virtual table: +Configuring FTS using sqlite-utils +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`sqlite-utils `__ is a CLI utility and Python library for manipulating SQLite databases. You can use `it from Python code `__ to configure FTS search, or you can achieve the same goal `using the accompanying command-line tool `__. + +Here's how to use ``sqlite-utils`` to enable full-text search for an ``items`` table across the ``name`` and ``description`` columns:: + + $ sqlite-utils enable-fts mydatabase.db items name description + +Configuring FTS using csvs-to-sqlite +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your data starts out in CSV files, you can use Datasette's companion tool `csvs-to-sqlite `__ to convert that file into a SQLite database and enable full-text search on specific columns. For a file called ``items.csv`` where you want full-text search to operate against the ``name`` and ``description`` columns you would run the following:: + + $ csvs-to-sqlite items.csv items.db -f name -f description + +Configuring FTS by hand +~~~~~~~~~~~~~~~~~~~~~~~ + +We recommend using `sqlite-utils `__, but if you want to hand-roll a SQLite full-text search table you can do so using the following SQL. + +To enable full-text search for a table called ``items`` that works against the ``name`` and ``description`` columns, you would run this SQL to create a new ``items_fts`` FTS virtual table: .. code-block:: sql @@ -71,8 +92,6 @@ And then populate it like this: You can use this technique to populate the full-text search index from any combination of tables and joins that makes sense for your project. -The `sqlite-utils tool `__ provides a command-line mechanism that can be used to implement the above steps. - .. _full_text_search_table_or_view: Configuring full-text search for a table or view @@ -103,13 +122,6 @@ Here is an example which enables full-text search for a ``display_ads`` view whi } } -Setting up full-text search using csvs-to-sqlite ------------------------------------------------- - -If your data starts out in CSV files, you can use Datasette's companion tool `csvs-to-sqlite `_ to convert that file into a SQLite database and enable full-text search on specific columns. For a file called ``items.csv`` where you want full-text search to operate against the ``name`` and ``description`` columns you would run the following:: - - csvs-to-sqlite items.csv items.db -f name -f description - The table view API ------------------ diff --git a/setup.py b/setup.py index f66d03da..edb8d51e 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,10 @@ setup( """, setup_requires=["pytest-runner"], extras_require={ + "docs": [ + "sphinx_rtd_theme", + "sphinx-autobuild", + ], "test": [ "pytest==4.6.1", "pytest-asyncio==0.10.0", From a7befda136fc6274fece7b35603439b38868448a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Jun 2019 05:08:04 -0700 Subject: [PATCH 12/88] pip install -e .[docs] for docs dependencies --- docs/contributing.rst | 12 +++++------- setup.py | 7 ++----- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index 993d01d8..27e3b0db 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -91,7 +91,7 @@ You can build it locally by installing ``sphinx`` and ``sphinx_rtd_theme`` in yo source venv/bin/activate # Install the dependencies needed to build the docs - pip install sphinx sphinx_rtd_theme + pip install -e .[docs] # Now build the docs cd docs/ @@ -103,16 +103,14 @@ This will create the HTML version of the documentation in ``docs/_build/html``. Any time you make changes to a ``.rst`` file you can re-run ``make html`` to update the built documents, then refresh them in your browser. -For added productivity, you can run Sphinx in auto-build mode. This will run a local webserver serving the docs that automatically rebuilds them and refreshes the page any time you hit save in your editor. +For added productivity, you can use use `sphinx-autobuild `__ to run Sphinx in auto-build mode. This will run a local webserver serving the docs that automatically rebuilds them and refreshes the page any time you hit save in your editor. -To enable auto-build mode, first install `sphinx-autobuild `__:: - - pip install sphinx-autobuild - -Now start the server by running:: +``sphinx-autobuild`` will have been installed when you ran ``pip install -e .[docs]``. In your ``docs/`` directory you can start the server by running the following:: make livehtml +Now browse to ``http://localhost:8000/`` to view the documentation. Any edits you make should be instantly relected in your browser. + .. _contributing_release: Release process diff --git a/setup.py b/setup.py index edb8d51e..fdbb948e 100644 --- a/setup.py +++ b/setup.py @@ -56,10 +56,7 @@ setup( """, setup_requires=["pytest-runner"], extras_require={ - "docs": [ - "sphinx_rtd_theme", - "sphinx-autobuild", - ], + "docs": ["sphinx_rtd_theme", "sphinx-autobuild"], "test": [ "pytest==4.6.1", "pytest-asyncio==0.10.0", @@ -67,7 +64,7 @@ setup( "beautifulsoup4==4.6.1", "asgiref==3.1.2", ] - + maybe_black + + maybe_black, }, tests_require=["datasette[test]"], classifiers=[ From 2c94fdcdbddcc3e4c7e017ad5aed44821bb0b4cc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Jun 2019 08:36:39 -0700 Subject: [PATCH 13/88] Typo --- docs/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index 27e3b0db..43834edc 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -109,7 +109,7 @@ For added productivity, you can use use `sphinx-autobuild Date: Tue, 25 Jun 2019 05:02:42 -0700 Subject: [PATCH 14/88] New experimental Row() for templates, refs #521 --- datasette/views/table.py | 43 +++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 06be5671..f2f5fda0 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -33,6 +33,35 @@ LINK_WITH_LABEL = ( LINK_WITH_VALUE = '{id}' +class Row: + def __init__(self, cells): + self.cells = cells + + def __iter__(self): + return iter(self.cells) + + def __getitem__(self, key): + for cell in self.cells: + if cell["column"] == key: + return cell["value"] + raise KeyError + + def raw(self, key): + for cell in self.cells: + if cell["column"] == key: + return cell["raw"] + return None + + def __str__(self): + d = { + key: self[key] + for key in [ + c["column"] for c in self.cells if not c.get("is_special_link_column") + ] + } + return json.dumps(d, default=repr, indent=2) + + class RowTableShared(DataView): async def sortable_columns_for_table(self, database, table, use_rowid): db = self.ds.databases[database] @@ -76,18 +105,18 @@ class RowTableShared(DataView): # Unless we are a view, the first column is a link - either to the rowid # or to the simple or compound primary key if link_column: + is_special_link_column = len(pks) != 1 + pk_path = path_from_row_pks(row, pks, not pks, False) cells.append( { "column": pks[0] if len(pks) == 1 else "Link", + "is_special_link_column": is_special_link_column, + "raw": pk_path, "value": jinja2.Markup( '{flat_pks}'.format( database=database, table=urllib.parse.quote_plus(table), - flat_pks=str( - jinja2.escape( - path_from_row_pks(row, pks, not pks, False) - ) - ), + flat_pks=str(jinja2.escape(pk_path)), flat_pks_quoted=path_from_row_pks(row, pks, not pks), ) ), @@ -159,8 +188,8 @@ class RowTableShared(DataView): if truncate_cells and len(display_value) > truncate_cells: display_value = display_value[:truncate_cells] + u"\u2026" - cells.append({"column": column, "value": display_value}) - cell_rows.append(cells) + cells.append({"column": column, "value": display_value, "raw": value}) + cell_rows.append(Row(cells)) if link_column: # Add the link column header. From 7d3783fda100e4743ed795cb9236218915a3ccdb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 25 Jun 2019 05:21:10 -0700 Subject: [PATCH 15/88] Default to raw value, use Row.display(key) for display, refs #521 --- datasette/views/table.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index f2f5fda0..c41bc305 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -43,13 +43,13 @@ class Row: def __getitem__(self, key): for cell in self.cells: if cell["column"] == key: - return cell["value"] + return cell["raw"] raise KeyError - def raw(self, key): + def display(self, key): for cell in self.cells: if cell["column"] == key: - return cell["raw"] + return cell["value"] return None def __str__(self): From 55637ef9948f875617c724c9ce511e21610367ca Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Jul 2019 17:50:45 -0700 Subject: [PATCH 16/88] Rename _rows_and_columns.html to _table.html, refs #521 --- .../{_rows_and_columns.html => _table.html} | 0 datasette/templates/row.html | 2 +- datasette/templates/table.html | 2 +- datasette/views/table.py | 16 +++++++-------- docs/custom_templates.rst | 20 +++++++++---------- 5 files changed, 20 insertions(+), 20 deletions(-) rename datasette/templates/{_rows_and_columns.html => _table.html} (100%) diff --git a/datasette/templates/_rows_and_columns.html b/datasette/templates/_table.html similarity index 100% rename from datasette/templates/_rows_and_columns.html rename to datasette/templates/_table.html diff --git a/datasette/templates/row.html b/datasette/templates/row.html index baffaf96..bda1e4e2 100644 --- a/datasette/templates/row.html +++ b/datasette/templates/row.html @@ -24,7 +24,7 @@

This data as {% for name, url in renderers.items() %}{{ name }}{{ ", " if not loop.last }}{% endfor %}

-{% include custom_rows_and_columns_templates %} +{% include custom_table_templates %} {% if foreign_key_tables %}

Links from other tables

diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 5ba3ff6d..2287e901 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -145,7 +145,7 @@ {% endif %} -{% include custom_rows_and_columns_templates %} +{% include custom_table_templates %} {% if next_url %}

Next page

diff --git a/datasette/views/table.py b/datasette/views/table.py index c41bc305..8ba3abe4 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -744,14 +744,14 @@ class TableView(RowTableShared): "sort": sort, "sort_desc": sort_desc, "disable_sort": is_view, - "custom_rows_and_columns_templates": [ - "_rows_and_columns-{}-{}.html".format( + "custom_table_templates": [ + "_table-{}-{}.html".format( to_css_class(database), to_css_class(table) ), - "_rows_and_columns-table-{}-{}.html".format( + "_table-table-{}-{}.html".format( to_css_class(database), to_css_class(table) ), - "_rows_and_columns.html", + "_table.html", ], "metadata": metadata, "view_definition": await db.get_view_definition(table), @@ -828,14 +828,14 @@ class RowView(RowTableShared): ), "display_columns": display_columns, "display_rows": display_rows, - "custom_rows_and_columns_templates": [ - "_rows_and_columns-{}-{}.html".format( + "custom_table_templates": [ + "_table-{}-{}.html".format( to_css_class(database), to_css_class(table) ), - "_rows_and_columns-row-{}-{}.html".format( + "_table-row-{}-{}.html".format( to_css_class(database), to_css_class(table) ), - "_rows_and_columns.html", + "_table.html", ], "metadata": (self.ds.metadata("databases") or {}) .get(database, {}) diff --git a/docs/custom_templates.rst b/docs/custom_templates.rst index b0863381..1dfaf892 100644 --- a/docs/custom_templates.rst +++ b/docs/custom_templates.rst @@ -145,14 +145,14 @@ The lookup rules Datasette uses are as follows:: row.html Rows and columns include on table page: - _rows_and_columns-table-mydatabase-mytable.html - _rows_and_columns-mydatabase-mytable.html - _rows_and_columns.html + _table-table-mydatabase-mytable.html + _table-mydatabase-mytable.html + _table.html Rows and columns include on row page: - _rows_and_columns-row-mydatabase-mytable.html - _rows_and_columns-mydatabase-mytable.html - _rows_and_columns.html + _table-row-mydatabase-mytable.html + _table-mydatabase-mytable.html + _table.html If a table name has spaces or other unexpected characters in it, the template filename will follow the same rules as our custom ```` CSS classes - for @@ -189,16 +189,16 @@ content you can do so by creating a ``row.html`` template like this:: Note the ``default:row.html`` template name, which ensures Jinja will inherit from the default template. -The ``_rows_and_columns.html`` template is included on both the row and the table -page, and displays the content of the row. The default ``_rows_and_columns.html`` template -`can be seen here `_. +The ``_table.html`` template is included on both the row and the table +page, and displays the content of the row. The default ``_table.html`` template +`can be seen here `_. You can provide a custom template that applies to all of your databases and tables, or you can provide custom templates for specific tables using the template naming scheme described above. Say for example you want to output a certain column as unescaped HTML. You could -provide a custom ``_rows_and_columns.html`` template like this:: +provide a custom ``_table.html`` template like this::
From 986919aa03edc2adfe1dd3133560cb8bba816f67 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Jul 2019 20:06:22 -0700 Subject: [PATCH 17/88] Unit test for _table custom template, refs #521 --- tests/fixtures.py | 2 ++ tests/test_html.py | 13 +++++++++++++ tests/test_templates/_table.html | 3 +++ 3 files changed, 18 insertions(+) create mode 100644 tests/test_templates/_table.html diff --git a/tests/fixtures.py b/tests/fixtures.py index 00140f50..0330c8ed 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -101,6 +101,7 @@ def make_app_client( extra_databases=None, inspect_data=None, static_mounts=None, + template_dir=None, ): with tempfile.TemporaryDirectory() as tmpdir: filepath = os.path.join(tmpdir, filename) @@ -143,6 +144,7 @@ def make_app_client( config=config, inspect_data=inspect_data, static_mounts=static_mounts, + template_dir=template_dir, ) ds.sqlite_functions.append(("sleep", 1, lambda n: time.sleep(float(n)))) client = TestClient(ds.app()) diff --git a/tests/test_html.py b/tests/test_html.py index 32fa2fe3..f76f98b9 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -964,3 +964,16 @@ def test_metadata_json_html(app_client): assert response.status == 200 pre = Soup(response.body, "html.parser").find("pre") assert METADATA == json.loads(pre.text) + + +def test_custom_table_include(): + for client in make_app_client( + template_dir=str(pathlib.Path(__file__).parent / "test_templates") + ): + response = client.get("/fixtures/complex_foreign_keys") + assert response.status == 200 + assert ( + '
' + '1 - 2 - hello 1' + "
" + ) == str(Soup(response.text, "html.parser").select_one("div.custom-table-row")) diff --git a/tests/test_templates/_table.html b/tests/test_templates/_table.html new file mode 100644 index 00000000..14f635a8 --- /dev/null +++ b/tests/test_templates/_table.html @@ -0,0 +1,3 @@ +{% for row in display_rows %} +
{{ row["f1"] }} - {{ row["f2"] }} - {{ row.display("f3") }}
+{% endfor %} From 16f0ef9054a542f10627d864e479bd1598e4bf1b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Jul 2019 20:13:34 -0700 Subject: [PATCH 18/88] Updated custom template docs, refs #521 --- docs/custom_templates.rst | 52 ++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/docs/custom_templates.rst b/docs/custom_templates.rst index 1dfaf892..47271542 100644 --- a/docs/custom_templates.rst +++ b/docs/custom_templates.rst @@ -144,12 +144,12 @@ The lookup rules Datasette uses are as follows:: row-mydatabase-mytable.html row.html - Rows and columns include on table page: + Table of rows and columns include on table page: _table-table-mydatabase-mytable.html _table-mydatabase-mytable.html _table.html - Rows and columns include on row page: + Table of rows and columns include on row page: _table-row-mydatabase-mytable.html _table-mydatabase-mytable.html _table.html @@ -189,38 +189,28 @@ content you can do so by creating a ``row.html`` template like this:: Note the ``default:row.html`` template name, which ensures Jinja will inherit from the default template. -The ``_table.html`` template is included on both the row and the table -page, and displays the content of the row. The default ``_table.html`` template -`can be seen here `_. +The ``_table.html`` template is included by both the row and the table pages, +and a list of rows. The default ``_table.html`` template renders them as an +HTML template and `can be seen here `_. You can provide a custom template that applies to all of your databases and tables, or you can provide custom templates for specific tables using the template naming scheme described above. -Say for example you want to output a certain column as unescaped HTML. You could -provide a custom ``_table.html`` template like this:: +If you want to present your data in a format other than an HTML table, you +can do so by looping through ``display_rows`` in your own ``_table.html`` +template. You can use ``{{ row["column_name"] }}`` to output the raw value +of a specific column. -
- - - {% for column in display_columns %} - - {% endfor %} - - - - {% for row in display_rows %} - - {% for cell in row %} - - {% endfor %} - - {% endfor %} - -
{{ column }}
- {% if cell.column == 'description' %} - {{ cell.value|safe }} - {% else %} - {{ cell.value }} - {% endif %} -
+If you want to output the rendered HTML version of a column, including any +links to foreign keys, you can use ``{{ row.display("column_name") }}``. + +Here is an example of a custom ``_table.html`` template:: + + {% for row in display_rows %} +
+

{{ row["title"] }}

+

{{ row["description"] }} +

Category: {{ row.display("category_id") }}

+
+ {% endfor %} From a2531730086d4c73ddb48fe3b66c18dc43b7c99f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Jul 2019 20:57:28 -0700 Subject: [PATCH 19/88] Added asgi_wrapper plugin hook, closes #520 --- datasette/app.py | 5 ++++- datasette/hookspecs.py | 5 +++++ docs/plugins.rst | 41 +++++++++++++++++++++++++++++++++++++++++ tests/fixtures.py | 23 +++++++++++++++++++++++ tests/test_plugins.py | 5 +++++ 5 files changed, 78 insertions(+), 1 deletion(-) diff --git a/datasette/app.py b/datasette/app.py index 4a8ead1d..16a29e20 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -651,9 +651,12 @@ class Datasette: if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) - return AsgiLifespan( + asgi = AsgiLifespan( AsgiTracer(DatasetteRouter(self, routes)), on_startup=setup_db ) + for wrapper in pm.hook.asgi_wrapper(datasette=self): + asgi = wrapper(asgi) + return asgi class DatasetteRouter(AsgiRouter): diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 61523a31..42adaae8 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -5,6 +5,11 @@ hookspec = HookspecMarker("datasette") hookimpl = HookimplMarker("datasette") +@hookspec +def asgi_wrapper(datasette): + "Returns an ASGI middleware callable to wrap our ASGI application with" + + @hookspec def prepare_connection(conn): "Modify SQLite connection in some way e.g. register custom SQL functions" diff --git a/docs/plugins.rst b/docs/plugins.rst index bd32b3a6..be335546 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -666,3 +666,44 @@ The plugin hook can then be used to register the new facet class like this: @hookimpl def register_facet_classes(): return [SpecialFacet] + + +.. _plugin_asgi_wrapper: + +asgi_wrapper(datasette) +~~~~~~~~~~~~~~~~~~~~~~~ + +Return an `ASGI `__ middleware wrapper function that will be applied to the Datasette ASGI application. + +This is a very powerful hook. You can use it to manipulate the entire Datasette response, or even to configure new URL routes that will be handled by your own custom code. + +You can write your ASGI code directly against the low-level specification, or you can use the middleware utilites provided by an ASGI framework such as `Starlette `__. + +This example plugin adds a ``x-databases`` HTTP header listing the currently attached databases: + +.. code-block:: python + + from datasette import hookimpl + from functools import wraps + + + @hookimpl + def asgi_wrapper(datasette): + def wrap_with_databases_header(app): + @wraps(app) + async def add_x_databases_header(scope, recieve, send): + async def wrapped_send(event): + if event["type"] == "http.response.start": + original_headers = event.get("headers") or [] + event = { + "type": event["type"], + "status": event["status"], + "headers": original_headers + [ + [b"x-databases", + ", ".join(datasette.databases.keys()).encode("utf-8")] + ], + } + await send(event) + await app(scope, recieve, wrapped_send) + return add_x_databases_header + return wrap_with_databases_header diff --git a/tests/fixtures.py b/tests/fixtures.py index 0330c8ed..fab6509e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -372,6 +372,7 @@ def render_cell(value, column, table, database, datasette): PLUGIN2 = """ from datasette import hookimpl +from functools import wraps import jinja2 import json @@ -413,6 +414,28 @@ def render_cell(value, database): label=jinja2.escape(data["label"] or "") or " " ) ) + + +@hookimpl +def asgi_wrapper(datasette): + def wrap_with_databases_header(app): + @wraps(app) + async def add_x_databases_header(scope, recieve, send): + async def wrapped_send(event): + if event["type"] == "http.response.start": + original_headers = event.get("headers") or [] + event = { + "type": event["type"], + "status": event["status"], + "headers": original_headers + [ + [b"x-databases", + ", ".join(datasette.databases.keys()).encode("utf-8")] + ], + } + await send(event) + await app(scope, recieve, wrapped_send) + return add_x_databases_header + return wrap_with_databases_header """ TABLES = ( diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 56033bdd..9bdd491a 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -162,3 +162,8 @@ def test_plugins_extra_body_script(app_client, path, expected_extra_body_script) json_data = r.search(app_client.get(path).body.decode("utf8")).group(1) actual_data = json.loads(json_data) assert expected_extra_body_script == actual_data + + +def test_plugins_asgi_wrapper(app_client): + response = app_client.get("/fixtures") + assert "fixtures" == response.headers["x-databases"] From dea9f94742f795473440701edbac22c87141e6c0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 2 Jul 2019 21:32:55 -0700 Subject: [PATCH 20/88] Switch to ~= dependencies, closes #532 (#536) * Switch to ~= dependencies, closes #532 * Bump click and click-default-group * imp. is deprecated, use types.ModuleType instead - thanks https://stackoverflow.com/a/32175781 * Upgrade to pytest 5 --- datasette/utils/__init__.py | 4 ++-- setup.py | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 94ccc23e..17a4d595 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -3,7 +3,6 @@ from collections import OrderedDict import base64 import click import hashlib -import imp import json import os import pkg_resources @@ -11,6 +10,7 @@ import re import shlex import tempfile import time +import types import shutil import urllib import numbers @@ -588,7 +588,7 @@ def link_or_copy_directory(src, dst): def module_from_path(path, name): # Adapted from http://sayspy.blogspot.com/2011/07/how-to-import-module-from-just-file.html - mod = imp.new_module(name) + mod = types.ModuleType(name) mod.__file__ = path with open(path, "r") as file: code = compile(file.read(), path, "exec", dont_inherit=True) diff --git a/setup.py b/setup.py index fdbb948e..254859b0 100644 --- a/setup.py +++ b/setup.py @@ -41,14 +41,14 @@ setup( package_data={"datasette": ["templates/*.html"]}, include_package_data=True, install_requires=[ - "click>=6.7", - "click-default-group==1.2", - "Jinja2==2.10.1", - "hupper==1.0", - "pint==0.8.1", - "pluggy>=0.12.0", - "uvicorn>=0.8.1", - "aiofiles==0.4.0", + "click~=7.0", + "click-default-group~=1.2.1", + "Jinja2~=2.10.1", + "hupper~=1.0", + "pint~=0.8.1", + "pluggy~=0.12.0", + "uvicorn~=0.8.1", + "aiofiles~=0.4.0", ], entry_points=""" [console_scripts] @@ -58,11 +58,11 @@ setup( extras_require={ "docs": ["sphinx_rtd_theme", "sphinx-autobuild"], "test": [ - "pytest==4.6.1", - "pytest-asyncio==0.10.0", - "aiohttp==3.5.3", - "beautifulsoup4==4.6.1", - "asgiref==3.1.2", + "pytest~=5.0.0", + "pytest-asyncio~=0.10.0", + "aiohttp~=3.5.3", + "beautifulsoup4~=4.6.1", + "asgiref~=3.1.2", ] + maybe_black, }, From ec758527b684225739ae2be369d4f2e1f37223cc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 3 Jul 2019 22:36:44 -0700 Subject: [PATCH 21/88] Secret plugin configuration options (#539) Closes #538 --- datasette/app.py | 11 ++++++++++- docs/plugins.rst | 33 +++++++++++++++++++++++++++++++++ tests/fixtures.py | 10 +++++++++- tests/test_plugins.py | 15 ++++++++++++++- 4 files changed, 66 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 16a29e20..70bd3c12 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -268,7 +268,16 @@ class Datasette: ) if plugins is None: return None - return plugins.get(plugin_name) + plugin_config = plugins.get(plugin_name) + # Resolve any $file and $env keys + if isinstance(plugin_config, dict): + for key, value in plugin_config.items(): + if isinstance(value, dict): + if list(value.keys()) == ["$env"]: + plugin_config[key] = os.environ.get(list(value.values())[0]) + elif list(value.keys()) == ["$file"]: + plugin_config[key] = open(list(value.values())[0]).read() + return plugin_config def app_css_hash(self): if not hasattr(self, "_app_css_hash"): diff --git a/docs/plugins.rst b/docs/plugins.rst index be335546..609fa844 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -219,6 +219,39 @@ Here is an example of some plugin configuration for a specific table:: This tells the ``datasette-cluster-map`` column which latitude and longitude columns should be used for a table called ``Street_Tree_List`` inside a database file called ``sf-trees.db``. +Secret configuration values +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Any values embedded in ``metadata.json`` will be visible to anyone who views the ``/-/metadata`` page of your Datasette instance. Some plugins may need configuration that should stay secret - API keys for example. There are two ways in which you can store secret configuration values. + +**As environment variables**. If your secret lives in an environment variable that is available to the Datasette process, you can indicate that the configuration value should be read from that environment variable like so:: + + { + "plugins": { + "datasette-auth-github": { + "client_secret": { + "$env": "GITHUB_CLIENT_SECRET" + } + } + } + } + + +**As values in separate files**. Your secrets can also live in files on disk. To specify a secret should be read from a file, provide the full file path like this:: + + { + "plugins": { + "datasette-auth-github": { + "client_secret": { + "$file": "/secrets/client-secret" + } + } + } + } + +Writing plugins that accept configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + When you are writing plugins, you can access plugin configuration like this using the ``datasette.plugin_config()`` method. If you know you need plugin configuration for a specific table, you can access it like this:: plugin_config = datasette.plugin_config( diff --git a/tests/fixtures.py b/tests/fixtures.py index fab6509e..db5f06e2 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -15,6 +15,10 @@ import time from urllib.parse import unquote +# This temp file is used by one of the plugin config tests +TEMP_PLUGIN_SECRET_FILE = os.path.join(tempfile.gettempdir(), "plugin-secret") + + class TestResponse: def __init__(self, status, headers, body): self.status = status @@ -246,7 +250,11 @@ METADATA = { "source_url": "https://github.com/simonw/datasette/blob/master/tests/fixtures.py", "about": "About Datasette", "about_url": "https://github.com/simonw/datasette", - "plugins": {"name-of-plugin": {"depth": "root"}}, + "plugins": { + "name-of-plugin": {"depth": "root"}, + "env-plugin": {"foo": {"$env": "FOO_ENV"}}, + "file-plugin": {"foo": {"$file": TEMP_PLUGIN_SECRET_FILE}}, + }, "databases": { "fixtures": { "description": "Test tables description", diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 9bdd491a..f42eebd7 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -1,7 +1,8 @@ from bs4 import BeautifulSoup as Soup -from .fixtures import app_client # noqa +from .fixtures import app_client, make_app_client, TEMP_PLUGIN_SECRET_FILE # noqa import base64 import json +import os import re import pytest import urllib @@ -125,6 +126,18 @@ def test_plugin_config(app_client): assert None is app_client.ds.plugin_config("unknown-plugin") +def test_plugin_config_env(app_client): + os.environ["FOO_ENV"] = "FROM_ENVIRONMENT" + assert {"foo": "FROM_ENVIRONMENT"} == app_client.ds.plugin_config("env-plugin") + del os.environ["FOO_ENV"] + + +def test_plugin_config_file(app_client): + open(TEMP_PLUGIN_SECRET_FILE, "w").write("FROM_FILE") + assert {"foo": "FROM_FILE"} == app_client.ds.plugin_config("file-plugin") + os.remove(TEMP_PLUGIN_SECRET_FILE) + + @pytest.mark.parametrize( "path,expected_extra_body_script", [ From ac0a18dbb234aecf794df7f2cbd5c365febb4f04 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 3 Jul 2019 22:47:45 -0700 Subject: [PATCH 22/88] Fix for accidentally leaking secrets in /-/metadata, closes #538 --- datasette/app.py | 9 ++++++--- tests/test_plugins.py | 8 ++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 70bd3c12..56b60533 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -271,12 +271,15 @@ class Datasette: plugin_config = plugins.get(plugin_name) # Resolve any $file and $env keys if isinstance(plugin_config, dict): - for key, value in plugin_config.items(): + # Create a copy so we don't mutate the version visible at /-/metadata.json + plugin_config_copy = dict(plugin_config) + for key, value in plugin_config_copy.items(): if isinstance(value, dict): if list(value.keys()) == ["$env"]: - plugin_config[key] = os.environ.get(list(value.values())[0]) + plugin_config_copy[key] = os.environ.get(list(value.values())[0]) elif list(value.keys()) == ["$file"]: - plugin_config[key] = open(list(value.values())[0]).read() + plugin_config_copy[key] = open(list(value.values())[0]).read() + return plugin_config_copy return plugin_config def app_css_hash(self): diff --git a/tests/test_plugins.py b/tests/test_plugins.py index f42eebd7..9af2a430 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -129,12 +129,20 @@ def test_plugin_config(app_client): def test_plugin_config_env(app_client): os.environ["FOO_ENV"] = "FROM_ENVIRONMENT" assert {"foo": "FROM_ENVIRONMENT"} == app_client.ds.plugin_config("env-plugin") + # Ensure secrets aren't visible in /-/metadata.json + metadata = app_client.get("/-/metadata.json") + assert {"foo": {"$env": "FOO_ENV"}} == metadata.json["plugins"]["env-plugin"] del os.environ["FOO_ENV"] def test_plugin_config_file(app_client): open(TEMP_PLUGIN_SECRET_FILE, "w").write("FROM_FILE") assert {"foo": "FROM_FILE"} == app_client.ds.plugin_config("file-plugin") + # Ensure secrets aren't visible in /-/metadata.json + metadata = app_client.get("/-/metadata.json") + assert {"foo": {"$file": TEMP_PLUGIN_SECRET_FILE}} == metadata.json["plugins"][ + "file-plugin" + ] os.remove(TEMP_PLUGIN_SECRET_FILE) From a81312c0432c620d825043af113fc6a27d316111 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 3 Jul 2019 22:56:13 -0700 Subject: [PATCH 23/88] Black --- datasette/app.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datasette/app.py b/datasette/app.py index 56b60533..1a41c1c6 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -276,7 +276,9 @@ class Datasette: for key, value in plugin_config_copy.items(): if isinstance(value, dict): if list(value.keys()) == ["$env"]: - plugin_config_copy[key] = os.environ.get(list(value.values())[0]) + plugin_config_copy[key] = os.environ.get( + list(value.values())[0] + ) elif list(value.keys()) == ["$file"]: plugin_config_copy[key] = open(list(value.values())[0]).read() return plugin_config_copy From 8abc81319694ae56bf96e88ec2a5a1417af60f90 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 4 Jul 2019 07:03:02 -0700 Subject: [PATCH 24/88] Better robustness in face of missing raw_path --- datasette/utils/asgi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index fdf330ae..38ffc072 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -88,7 +88,10 @@ class AsgiRouter: async def __call__(self, scope, receive, send): # Because we care about "foo/bar" v.s. "foo%2Fbar" we decode raw_path ourselves - path = scope["raw_path"].decode("ascii") + path = scope["path"] + raw_path = scope.get("raw_path") + if raw_path: + path = raw_path.decode("ascii") for regex, view in self.routes: match = regex.match(path) if match is not None: From 859c79f115511beacdf6509721ab19122882f2a8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 5 Jul 2019 13:34:41 -0700 Subject: [PATCH 25/88] Refactor templates for better top nav customization, refs #540 --- datasette/static/app.css | 18 +++++++++++++++++- datasette/templates/_footer.html | 21 +++++++++++++++++++++ datasette/templates/base.html | 28 +++++----------------------- datasette/templates/database.html | 8 +++++++- datasette/templates/index.html | 3 ++- datasette/templates/row.html | 11 +++++++++-- datasette/templates/table.html | 9 ++++++++- 7 files changed, 69 insertions(+), 29 deletions(-) create mode 100644 datasette/templates/_footer.html diff --git a/datasette/static/app.css b/datasette/static/app.css index 468c15f6..76ecdd8d 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -1,5 +1,6 @@ body { - margin: 0 1em; + margin: 0; + padding: 0; font-family: "Helvetica Neue", sans-serif; font-size: 1rem; font-weight: 400; @@ -8,6 +9,9 @@ body { text-align: left; background-color: #fff; } +.bd { + margin: 0 1em; +} table { border-collapse: collapse; } @@ -82,9 +86,21 @@ table a:visited { .hd { border-bottom: 2px solid #ccc; + padding: 0.2em 1em; + background-color: #eee; + overflow: hidden; + box-sizing: border-box; +} +.hd p { + margin: 0; + padding: 0; +} +.hd .crumbs { + float: left; } .ft { margin: 1em 0; + padding: 0.5em 1em 0 1em; border-top: 1px solid #ccc; font-size: 0.8em; } diff --git a/datasette/templates/_footer.html b/datasette/templates/_footer.html new file mode 100644 index 00000000..f930f445 --- /dev/null +++ b/datasette/templates/_footer.html @@ -0,0 +1,21 @@ +Powered by Datasette +{% if query_ms %}· Query took {{ query_ms|round(3) }}ms{% endif %} +{% if metadata %} + {% if metadata.license or metadata.license_url %}· Data license: + {% if metadata.license_url %} + {{ metadata.license or metadata.license_url }} + {% else %} + {{ metadata.license }} + {% endif %} + {% endif %} + {% if metadata.source or metadata.source_url %}· + Data source: {% if metadata.source_url %} + + {% endif %}{{ metadata.source or metadata.source_url }}{% if metadata.source_url %}{% endif %} + {% endif %} + {% if metadata.about or metadata.about_url %}· + About: {% if metadata.about_url %} + + {% endif %}{{ metadata.about or metadata.about_url }}{% if metadata.about_url %}{% endif %} + {% endif %} +{% endif %} diff --git a/datasette/templates/base.html b/datasette/templates/base.html index 0ea41d7e..d26043f8 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -14,33 +14,15 @@ + + +
{% block content %} {% endblock %} - -
- Powered by Datasette - {% if query_ms %}· Query took {{ query_ms|round(3) }}ms{% endif %} - {% if metadata %} - {% if metadata.license or metadata.license_url %}· Data license: - {% if metadata.license_url %} - {{ metadata.license or metadata.license_url }} - {% else %} - {{ metadata.license }} - {% endif %} - {% endif %} - {% if metadata.source or metadata.source_url %}· - Data source: {% if metadata.source_url %} - - {% endif %}{{ metadata.source or metadata.source_url }}{% if metadata.source_url %}{% endif %} - {% endif %} - {% if metadata.about or metadata.about_url %}· - About: {% if metadata.about_url %} - - {% endif %}{{ metadata.about or metadata.about_url }}{% if metadata.about_url %}{% endif %} - {% endif %} - {% endif %}
+
{% block footer %}{% include "_footer.html" %}{% endblock %}
+ {% for body_script in body_scripts %} {% endfor %} diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 9fb4d6eb..f168db97 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -9,8 +9,14 @@ {% block body_class %}db db-{{ database|to_css_class }}{% endblock %} +{% block nav %} +

+ home +

+ {{ super() }} +{% endblock %} + {% block content %} -

{{ metadata.title or database }}

diff --git a/datasette/templates/index.html b/datasette/templates/index.html index c8ad4148..b394564a 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -21,7 +21,8 @@ {{ "{:,}".format(database.views_count) }} view{% if database.views_count != 1 %}s{% endif %} {% endif %}

-

{% for table in database.tables_and_views_truncated %}{{ table.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% if database.tables_and_views_more %}, ...{% endif %}

+

{% for table in database.tables_and_views_truncated %}{{ table.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% if database.tables_and_views_more %}, ...{% endif %}

{% endfor %} {% endblock %} diff --git a/datasette/templates/row.html b/datasette/templates/row.html index bda1e4e2..5703900d 100644 --- a/datasette/templates/row.html +++ b/datasette/templates/row.html @@ -15,9 +15,16 @@ {% block body_class %}row db-{{ database|to_css_class }} table-{{ table|to_css_class }}{% endblock %} -{% block content %} - +{% block nav %} +

+ home / + {{ database }} / + {{ table }} +

+ {{ super() }} +{% endblock %} +{% block content %}

{{ table }}: {{ ', '.join(primary_key_values) }}

{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 2287e901..c7913f60 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -16,8 +16,15 @@ {% block body_class %}table db-{{ database|to_css_class }} table-{{ table|to_css_class }}{% endblock %} +{% block nav %} +

+ home / + {{ database }} +

+ {{ super() }} +{% endblock %} + {% block content %} -

{{ metadata.title or table }}{% if is_view %} (view){% endif %}

From 42d6877784f9ce392277ecbbebd1f8c759eaf0a2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 5 Jul 2019 17:05:56 -0700 Subject: [PATCH 26/88] extra_template_vars plugin hook (#542) * extra_template_vars plugin hook Closes #541 * Workaround for cwd bug Based on https://github.com/pytest-dev/pytest/issues/1235#issuecomment-175295691 --- datasette/hookspecs.py | 5 ++ datasette/views/base.py | 25 ++++++++- datasette/views/index.py | 11 ++-- datasette/views/special.py | 6 +- docs/plugins.rst | 86 +++++++++++++++++++++++++---- tests/conftest.py | 15 +++++ tests/fixtures.py | 23 ++++++++ tests/test_plugins.py | 26 +++++++++ tests/test_templates/show_json.html | 8 +++ 9 files changed, 186 insertions(+), 19 deletions(-) create mode 100644 tests/test_templates/show_json.html diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 42adaae8..3c6726b7 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -35,6 +35,11 @@ def extra_body_script(template, database, table, view_name, datasette): "Extra JavaScript code to be included in diff --git a/datasette/templates/_codemirror_foot.html b/datasette/templates/_codemirror_foot.html index 4b55bf8d..9bc6d97f 100644 --- a/datasette/templates/_codemirror_foot.html +++ b/datasette/templates/_codemirror_foot.html @@ -1,5 +1,18 @@ diff --git a/datasette/templates/database.html b/datasette/templates/database.html index a934f336..a0d0fcf6 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -26,7 +26,10 @@

Custom SQL query

-

+

+ + +

{% endif %} diff --git a/datasette/templates/query.html b/datasette/templates/query.html index 7c6c59f3..34fa78a5 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -37,7 +37,7 @@ {% if editable and config.allow_sql %}

{% else %} -
{% if query %}{{ query.sql }}{% endif %}
+
{% if query %}{{ query.sql }}{% endif %}
{% endif %} {% else %} @@ -49,7 +49,10 @@

{% endfor %} {% endif %} -

+

+ + +

{% if display_rows %} From 2ad1f0d34e1517faea44fccb844ac225feef14fb Mon Sep 17 00:00:00 2001 From: Tobias Kunze Date: Mon, 14 Oct 2019 05:52:33 +0200 Subject: [PATCH 56/88] Sort databases on homepage by argument order - #591 Closes #585 - thanks, @rixx! --- datasette/app.py | 2 +- datasette/views/index.py | 2 -- tests/test_html.py | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 41a4eb37..935b1730 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -159,7 +159,7 @@ class Datasette: self.files = [MEMORY] elif memory: self.files = (MEMORY,) + self.files - self.databases = {} + self.databases = collections.OrderedDict() self.inspect_data = inspect_data for file in self.files: path = file diff --git a/datasette/views/index.py b/datasette/views/index.py index fddb04d9..f2e5f774 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -97,8 +97,6 @@ class IndexView(BaseView): } ) - databases.sort(key=lambda database: database["name"]) - if as_format: headers = {} if self.ds.cors: diff --git a/tests/test_html.py b/tests/test_html.py index 0a6df984..ec7765f6 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -26,11 +26,11 @@ def test_homepage(app_client_two_attached_databases): ) # Should be two attached databases assert [ - {"href": "/extra_database", "text": "extra_database"}, {"href": "/fixtures", "text": "fixtures"}, + {"href": "/extra_database", "text": "extra_database"}, ] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")] # The first attached database should show count text and attached tables - h2 = soup.select("h2")[0] + h2 = soup.select("h2")[1] assert "extra_database" == h2.text.strip() counts_p, links_p = h2.find_all_next("p")[:2] assert ( From 2bd116234b767bf99c01440cff41eea75524ff48 Mon Sep 17 00:00:00 2001 From: Tobias Kunze Date: Mon, 14 Oct 2019 05:53:21 +0200 Subject: [PATCH 57/88] Display metadata footer on custom SQL queries (#589) Closes #408 - thanks, @rixx! --- datasette/views/database.py | 10 ++++++---- tests/test_html.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/datasette/views/database.py b/datasette/views/database.py index 78af19c5..31d6af59 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -10,12 +10,17 @@ class DatabaseView(DataView): name = "database" async def data(self, request, database, hash, default_labels=False, _size=None): + metadata = (self.ds.metadata("databases") or {}).get(database, {}) + self.ds.update_with_inherited_metadata(metadata) + if request.args.get("sql"): if not self.ds.config("allow_sql"): raise DatasetteError("sql= is not allowed", status=400) sql = request.raw_args.pop("sql") validate_sql_select(sql) - return await self.custom_sql(request, database, hash, sql, _size=_size) + return await self.custom_sql( + request, database, hash, sql, _size=_size, metadata=metadata + ) db = self.ds.databases[database] @@ -24,9 +29,6 @@ class DatabaseView(DataView): hidden_table_names = set(await db.hidden_table_names()) all_foreign_keys = await db.get_all_foreign_keys() - metadata = (self.ds.metadata("databases") or {}).get(database, {}) - self.ds.update_with_inherited_metadata(metadata) - tables = [] for table in table_counts: table_columns = await db.table_columns(table) diff --git a/tests/test_html.py b/tests/test_html.py index ec7765f6..0bb1c163 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -737,6 +737,18 @@ def test_database_metadata(app_client): assert_footer_links(soup) +def test_database_metadata_with_custom_sql(app_client): + response = app_client.get("/fixtures?sql=select+*+from+simple_primary_key") + assert response.status == 200 + soup = Soup(response.body, "html.parser") + # Page title should be the default + assert "fixtures" == soup.find("h1").text + # Description should be custom + assert "Custom SQL query returning" in soup.find("h3").text + # The source/license should be inherited + assert_footer_links(soup) + + def test_table_metadata(app_client): response = app_client.get("/fixtures/simple_primary_key") assert response.status == 200 From b2d0ca3a1debc2f5fb0a0f7d4b1c34c522e9b829 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 14 Oct 2019 15:29:16 -0700 Subject: [PATCH 58/88] Add Python versions badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 59a6649e..a4db6611 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # Datasette [![PyPI](https://img.shields.io/pypi/v/datasette.svg)](https://pypi.org/project/datasette/) +[![Python 3.x](https://img.shields.io/pypi/pyversions/datasette.svg?logo=python&logoColor=white)](https://pypi.org/project/datasette/) [![Travis CI](https://travis-ci.org/simonw/datasette.svg?branch=master)](https://travis-ci.org/simonw/datasette) [![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](http://datasette.readthedocs.io/en/latest/?badge=latest) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette/blob/master/LICENSE) From e5308c1ec218f25e2825a10a4bbb1901039457bf Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 17 Oct 2019 14:51:45 -0700 Subject: [PATCH 59/88] Use --platform=managed for publish cloudrun, closes #587 --- datasette/publish/cloudrun.py | 2 +- tests/test_publish_cloudrun.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datasette/publish/cloudrun.py b/datasette/publish/cloudrun.py index 32c9cd2a..c2d77746 100644 --- a/datasette/publish/cloudrun.py +++ b/datasette/publish/cloudrun.py @@ -110,7 +110,7 @@ def publish_subcommand(publish): image_id = "gcr.io/{project}/{name}".format(project=project, name=name) check_call("gcloud builds submit --tag {}".format(image_id), shell=True) check_call( - "gcloud beta run deploy --allow-unauthenticated --image {}{}".format( + "gcloud beta run deploy --allow-unauthenticated --platform=managed --image {}{}".format( image_id, " {}".format(service) if service else "" ), shell=True, diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index 1e9bb830..481ac04d 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -40,7 +40,7 @@ def test_publish_cloudrun(mock_call, mock_output, mock_which): [ mock.call("gcloud builds submit --tag {}".format(tag), shell=True), mock.call( - "gcloud beta run deploy --allow-unauthenticated --image {}".format( + "gcloud beta run deploy --allow-unauthenticated --platform=managed --image {}".format( tag ), shell=True, From a9f877f7bf2fbca604576ef1352efaa97742c609 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 17 Oct 2019 22:23:01 -0700 Subject: [PATCH 60/88] Fixed bug returning non-ascii characters in CSV, closes #584 --- datasette/utils/asgi.py | 2 +- tests/test_csv.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index eaf3428d..bafcfb4a 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -217,7 +217,7 @@ class AsgiWriter: await self.send( { "type": "http.response.body", - "body": chunk.encode("latin-1"), + "body": chunk.encode("utf-8"), "more_body": True, } ) diff --git a/tests/test_csv.py b/tests/test_csv.py index c3cdc241..1d5d2df2 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -80,6 +80,15 @@ def test_table_csv_download(app_client): assert expected_disposition == response.headers["Content-Disposition"] +def test_csv_with_non_ascii_characters(app_client): + response = app_client.get( + "/fixtures.csv?sql=select%0D%0A++%27%F0%9D%90%9C%F0%9D%90%A2%F0%9D%90%AD%F0%9D%90%A2%F0%9D%90%9E%F0%9D%90%AC%27+as+text%2C%0D%0A++1+as+number%0D%0Aunion%0D%0Aselect%0D%0A++%27bob%27+as+text%2C%0D%0A++2+as+number%0D%0Aorder+by%0D%0A++number" + ) + assert response.status == 200 + assert "text/plain; charset=utf-8" == response.headers["content-type"] + assert "text,number\r\n𝐜𝐢𝐭𝐢𝐞𝐬,1\r\nbob,2\r\n" == response.body.decode("utf8") + + def test_max_csv_mb(app_client_csv_max_mb_one): response = app_client_csv_max_mb_one.get( "/fixtures.csv?sql=select+randomblob(10000)+" From c387b47ec6b4703c8bf833d20d126ea056d7dc3c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Oct 2019 15:51:07 -0700 Subject: [PATCH 61/88] Fix for /foo v.s. /foo-bar issue, closes #597 Pull request #599 --- datasette/views/base.py | 16 ++++++++-------- tests/fixtures.py | 7 +++++++ tests/test_api.py | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index db1d69d9..219630af 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -193,14 +193,14 @@ class DataView(BaseView): async def resolve_db_name(self, request, db_name, **kwargs): hash = None name = None - if "-" in db_name: - # Might be name-and-hash, or might just be - # a name with a hyphen in it - name, hash = db_name.rsplit("-", 1) - if name not in self.ds.databases: - # Try the whole name - name = db_name - hash = None + if db_name not in self.ds.databases and "-" in db_name: + # No matching DB found, maybe it's a name-hash? + name_bit, hash_bit = db_name.rsplit("-", 1) + if name_bit not in self.ds.databases: + raise NotFound("Database not found: {}".format(name)) + else: + name = name_bit + hash = hash_bit else: name = db_name # Verify the hash diff --git a/tests/fixtures.py b/tests/fixtures.py index dac28dc0..a4c32f36 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -178,6 +178,13 @@ def app_client_two_attached_databases(): ) +@pytest.fixture(scope="session") +def app_client_conflicting_database_names(): + yield from make_app_client( + extra_databases={"foo.db": EXTRA_DATABASE_SQL, "foo-bar.db": EXTRA_DATABASE_SQL} + ) + + @pytest.fixture(scope="session") def app_client_two_attached_databases_one_immutable(): yield from make_app_client( diff --git a/tests/test_api.py b/tests/test_api.py index cc00b780..826c00f3 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -7,6 +7,7 @@ from .fixtures import ( # noqa app_client_larger_cache_size, app_client_returned_rows_matches_page_size, app_client_two_attached_databases_one_immutable, + app_client_conflicting_database_names, app_client_with_cors, app_client_with_dot, generate_compound_rows, @@ -1652,3 +1653,20 @@ def test_cors(app_client_with_cors, path, status_code): response = app_client_with_cors.get(path) assert response.status == status_code assert "*" == response.headers["Access-Control-Allow-Origin"] + + +def test_common_prefix_database_names(app_client_conflicting_database_names): + # https://github.com/simonw/datasette/issues/597 + assert ["fixtures", "foo", "foo-bar"] == [ + d["name"] + for d in json.loads( + app_client_conflicting_database_names.get("/-/databases.json").body.decode( + "utf8" + ) + ) + ] + for db_name, path in (("foo", "/foo.json"), ("foo-bar", "/foo-bar.json")): + data = json.loads( + app_client_conflicting_database_names.get(path).body.decode("utf8") + ) + assert db_name == data["database"] From 32d9e2fbc67dfcec5301a813de14969b0ec36e2e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Oct 2019 16:56:44 -0700 Subject: [PATCH 62/88] Don't auto-format SQL on page load (#601) Closes #600 --- datasette/templates/_codemirror_foot.html | 6 ------ 1 file changed, 6 deletions(-) diff --git a/datasette/templates/_codemirror_foot.html b/datasette/templates/_codemirror_foot.html index 9bc6d97f..9aba61ab 100644 --- a/datasette/templates/_codemirror_foot.html +++ b/datasette/templates/_codemirror_foot.html @@ -6,12 +6,6 @@ window.onload = () => { if (sqlFormat && !readOnly) { sqlFormat.hidden = false; } - if (readOnly) { - readOnly.innerHTML = sqlFormatter.format(readOnly.innerHTML); - } - if (sqlInput) { - sqlInput.value = sqlFormatter.format(sqlInput.value); - } var editor = CodeMirror.fromTextArea(sqlInput, { lineNumbers: true, mode: "text/x-sql", From 7cbc51e92ead3b010235efc4f9bf2431315576c4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Oct 2019 18:05:47 -0700 Subject: [PATCH 63/88] Release 0.30 --- docs/changelog.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 26d0f75c..e8dafa35 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,20 @@ Changelog ========= +.. _v0_30: + +0.30 (2019-10-18) +----------------- + +- Added ``/-/threads`` debugging page +- Allow ``EXPLAIN WITH...`` (`#583 `__) +- Button to format SQL - thanks, Tobias Kunze (`#136 `__) +- Sort databases on homepage by argument order - thanks, Tobias Kunze (`#585 `__) +- Display metadata footer on custom SQL queries - thanks, Tobias Kunze (`#589 `__) +- Use ``--platform=managed`` for ``publish cloudrun`` (`#587 `__) +- Fixed bug returning non-ASCII characters in CSV (`#584 `__) +- Fix for ``/foo`` v.s. ``/foo-bar`` bug (`#601 `__) + .. _v0_29_3: 0.29.3 (2019-09-02) From a5d4f166a5abb6fdcfc457055655824fcc6ac1a1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 18 Oct 2019 18:08:04 -0700 Subject: [PATCH 64/88] Update news in README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a4db6611..5894017e 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Datasette is aimed at data journalists, museum curators, archivists, local gover ## News + * 18th October 2019: [Datasette 0.30](https://datasette.readthedocs.io/en/stable/changelog.html#v0-30) * 13th July 2019: [Single sign-on against GitHub using ASGI middleware](https://simonwillison.net/2019/Jul/14/sso-asgi/) talks about the implementation of [datasette-auth-github](https://github.com/simonw/datasette-auth-github) in more detail. * 7th July 2019: [Datasette 0.29](https://datasette.readthedocs.io/en/stable/changelog.html#v0-29) - ASGI, new plugin hooks, facet by date and much, much more... * [datasette-auth-github](https://github.com/simonw/datasette-auth-github) - a new plugin for Datasette 0.29 that lets you require users to authenticate against GitHub before accessing your Datasette instance. You can whitelist specific users, or you can restrict access to members of specific GitHub organizations or teams. From ca44cc03e3d84cb91c791c30960610d06fe91fa5 Mon Sep 17 00:00:00 2001 From: chris48s Date: Mon, 21 Oct 2019 03:03:08 +0100 Subject: [PATCH 65/88] Always pop as_format off args dict (#603) Closes #563. Thanks, @chris48s --- datasette/views/base.py | 2 ++ tests/test_api.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/datasette/views/base.py b/datasette/views/base.py index 219630af..348f0c03 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -362,6 +362,8 @@ class DataView(BaseView): _format = request.args.get("_format", None) if not _format: _format = (args.pop("as_format", None) or "").lstrip(".") + else: + args.pop("as_format", None) if "table_and_format" in args: db = self.ds.databases[database] diff --git a/tests/test_api.py b/tests/test_api.py index 826c00f3..a734b8de 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1107,6 +1107,15 @@ def test_row(app_client): assert [{"id": "1", "content": "hello"}] == response.json["rows"] +def test_row_format_in_querystring(app_client): + # regression test for https://github.com/simonw/datasette/issues/563 + response = app_client.get( + "/fixtures/simple_primary_key/1?_format=json&_shape=objects" + ) + assert response.status == 200 + assert [{"id": "1", "content": "hello"}] == response.json["rows"] + + def test_row_strange_table_name(app_client): response = app_client.get( "/fixtures/table%2Fwith%2Fslashes.csv/3.json?_shape=objects" From 48054b358a49a6a8ffb379813a24409808468f51 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 30 Oct 2019 11:49:01 -0700 Subject: [PATCH 66/88] Update to latest black (#609) --- datasette/views/base.py | 9 ++++++--- datasette/views/table.py | 7 ++++--- setup.py | 2 +- tests/test_api.py | 2 +- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index 348f0c03..1568b084 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -257,9 +257,12 @@ class DataView(BaseView): assert NotImplemented async def get(self, request, db_name, **kwargs): - database, hash, correct_hash_provided, should_redirect = await self.resolve_db_name( - request, db_name, **kwargs - ) + ( + database, + hash, + correct_hash_provided, + should_redirect, + ) = await self.resolve_db_name(request, db_name, **kwargs) if should_redirect: return self.redirect(request, should_redirect, remove_args={"_hash"}) diff --git a/datasette/views/table.py b/datasette/views/table.py index 8ba3abe4..e0362e53 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -586,9 +586,10 @@ class TableView(RowTableShared): ) for facet in facet_instances: - instance_facet_results, instance_facets_timed_out = ( - await facet.facet_results() - ) + ( + instance_facet_results, + instance_facets_timed_out, + ) = await facet.facet_results() facet_results.update(instance_facet_results) facets_timed_out.extend(instance_facets_timed_out) diff --git a/setup.py b/setup.py index cbe545a1..9ae56306 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def get_version(): # Only install black on Python 3.6 or higher maybe_black = [] if sys.version_info > (3, 6): - maybe_black = ["black"] + maybe_black = ["black~=19.10b0"] setup( name="datasette", diff --git a/tests/test_api.py b/tests/test_api.py index a734b8de..4ea95e84 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1245,7 +1245,7 @@ def test_config_json(app_client): def test_page_size_matching_max_returned_rows( - app_client_returned_rows_matches_page_size + app_client_returned_rows_matches_page_size, ): fetched = [] path = "/fixtures/no_primary_key.json" From 76eb6047d4d6d85493a2929e7025d773a53532e9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 30 Oct 2019 11:49:26 -0700 Subject: [PATCH 67/88] Persist _where= in hidden fields, closes #604 --- datasette/views/table.py | 3 +++ tests/test_html.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/datasette/views/table.py b/datasette/views/table.py index e0362e53..652ce994 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -721,6 +721,9 @@ class TableView(RowTableShared): for arg in ("_fts_table", "_fts_pk"): if arg in special_args: form_hidden_args.append((arg, special_args[arg])) + if request.args["_where"]: + for where_text in request.args["_where"]: + form_hidden_args.append(("_where", where_text)) return { "supports_search": bool(fts_table), "search": search or "", diff --git a/tests/test_html.py b/tests/test_html.py index 0bb1c163..aa628dec 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -955,6 +955,12 @@ def test_extra_where_clauses(app_client): "/fixtures/facetable?_where=city_id%3D1", "/fixtures/facetable?_where=neighborhood%3D%27Dogpatch%27", ] == hrefs + # These should also be persisted as hidden fields + inputs = soup.find("form").findAll("input") + hiddens = [i for i in inputs if i["type"] == "hidden"] + assert [("_where", "neighborhood='Dogpatch'"), ("_where", "city_id=1")] == [ + (hidden["name"], hidden["value"]) for hidden in hiddens + ] def test_binary_data_display(app_client): From f081bb818d9cee2b083cbaf8b6194a88e683d243 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 30 Oct 2019 11:56:04 -0700 Subject: [PATCH 68/88] Release 0.30.1 --- docs/changelog.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index e8dafa35..8ac32c45 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Changelog ========= +.. _v0_30_1: + +0.30.1 (2019-10-30) +------------------- + +- Fixed bug where ``?_where=`` parameter was not persisted in hidden form fields (`#604 `__) +- Fixed bug with .JSON representation of row pages - thanks, Chris Shaw (`#603 `__) + .. _v0_30: 0.30 (2019-10-18) From 66ac40be70d91a2f1f381d56e8855d65ec47e7a9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 30 Oct 2019 12:00:21 -0700 Subject: [PATCH 69/88] Fixed dumb error --- datasette/views/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 652ce994..44b186cf 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -721,7 +721,7 @@ class TableView(RowTableShared): for arg in ("_fts_table", "_fts_pk"): if arg in special_args: form_hidden_args.append((arg, special_args[arg])) - if request.args["_where"]: + if request.args.get("_where"): for where_text in request.args["_where"]: form_hidden_args.append(("_where", where_text)) return { From cd0984af2d7175a987d66d5030f9d8937f829e4b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 31 Oct 2019 22:39:59 -0700 Subject: [PATCH 70/88] Use distinfo.project_name for plugin name if available, closes #606 --- datasette/utils/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 449217b5..3d28a36b 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -633,6 +633,7 @@ def get_plugins(pm): distinfo = plugin_to_distinfo.get(plugin) if distinfo: plugin_info["version"] = distinfo.version + plugin_info["name"] = distinfo.project_name plugins.append(plugin_info) return plugins From 0dde00e7bb61a940e2a3e5fdf298346ca15cdff3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 1 Nov 2019 12:37:46 -0700 Subject: [PATCH 71/88] Only suggest array facet for arrays of strings - closes #562 --- datasette/facets.py | 44 +++++++++++++++++++++++++++++++++----------- tests/fixtures.py | 33 +++++++++++++++++---------------- tests/test_api.py | 20 +++++++++++++++++--- tests/test_csv.py | 32 ++++++++++++++++---------------- tests/test_facets.py | 9 +++++++++ 5 files changed, 92 insertions(+), 46 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index 365d9c65..9b5baaa2 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -257,6 +257,16 @@ class ColumnFacet(Facet): class ArrayFacet(Facet): type = "array" + def _is_json_array_of_strings(self, json_string): + try: + array = json.loads(json_string) + except ValueError: + return False + for item in array: + if not isinstance(item, str): + return False + return True + async def suggest(self): columns = await self.get_columns(self.sql, self.params) suggested_facets = [] @@ -282,18 +292,30 @@ class ArrayFacet(Facet): ) types = tuple(r[0] for r in results.rows) if types in (("array",), ("array", None)): - suggested_facets.append( - { - "name": column, - "type": "array", - "toggle_url": self.ds.absolute_url( - self.request, - path_with_added_args( - self.request, {"_facet_array": column} - ), - ), - } + # Now sanity check that first 100 arrays contain only strings + first_100 = await self.ds.execute( + self.database, + "select {column} from ({sql}) where {column} is not null".format( + column=escape_sqlite(column), sql=self.sql + ), + self.params, + truncate=False, + custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), + log_sql_errors=False, ) + if all(self._is_json_array_of_strings(r[0]) for r in first_100): + suggested_facets.append( + { + "name": column, + "type": "array", + "toggle_url": self.ds.absolute_url( + self.request, + path_with_added_args( + self.request, {"_facet_array": column} + ), + ), + } + ) except (QueryInterrupted, sqlite3.OperationalError): continue return suggested_facets diff --git a/tests/fixtures.py b/tests/fixtures.py index a4c32f36..93c3da9f 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -661,26 +661,27 @@ CREATE TABLE facetable ( city_id integer, neighborhood text, tags text, + complex_array text, FOREIGN KEY ("city_id") REFERENCES [facet_cities](id) ); INSERT INTO facetable - (created, planet_int, on_earth, state, city_id, neighborhood, tags) + (created, planet_int, on_earth, state, city_id, neighborhood, tags, complex_array) VALUES - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]'), - ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]'), - ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]'), - ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]'), - ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]'), - ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]'), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]'), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]'), - ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]'), - ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]') + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Mission', '["tag1", "tag2"]', '[{"foo": "bar"}]'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Dogpatch', '["tag1", "tag3"]', '[]'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'SOMA', '[]', '[]'), + ("2019-01-14 08:00:00", 1, 1, 'CA', 1, 'Tenderloin', '[]', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Bernal Heights', '[]', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 1, 'Hayes Valley', '[]', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Hollywood', '[]', '[]'), + ("2019-01-15 08:00:00", 1, 1, 'CA', 2, 'Downtown', '[]', '[]'), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Los Feliz', '[]', '[]'), + ("2019-01-16 08:00:00", 1, 1, 'CA', 2, 'Koreatown', '[]', '[]'), + ("2019-01-16 08:00:00", 1, 1, 'MI', 3, 'Downtown', '[]', '[]'), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Greektown', '[]', '[]'), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Corktown', '[]', '[]'), + ("2019-01-17 08:00:00", 1, 1, 'MI', 3, 'Mexicantown', '[]', '[]'), + ("2019-01-17 08:00:00", 2, 0, 'MC', 4, 'Arcadia Planitia', '[]', '[]') ; CREATE TABLE binary_data ( diff --git a/tests/test_api.py b/tests/test_api.py index 4ea95e84..41557bcf 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -195,6 +195,7 @@ def test_database_page(app_client): "city_id", "neighborhood", "tags", + "complex_array", ], "primary_keys": ["pk"], "count": 15, @@ -1029,15 +1030,25 @@ def test_table_filter_queries_multiple_of_same_type(app_client): def test_table_filter_json_arraycontains(app_client): response = app_client.get("/fixtures/facetable.json?tags__arraycontains=tag1") assert [ - [1, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Mission", '["tag1", "tag2"]'], - [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'], + [ + 1, + "2019-01-14 08:00:00", + 1, + 1, + "CA", + 1, + "Mission", + '["tag1", "tag2"]', + '[{"foo": "bar"}]', + ], + [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]', "[]"], ] == response.json["rows"] def test_table_filter_extra_where(app_client): response = app_client.get("/fixtures/facetable.json?_where=neighborhood='Dogpatch'") assert [ - [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]'] + [2, "2019-01-14 08:00:00", 1, 1, "CA", 1, "Dogpatch", '["tag1", "tag3"]', "[]"] ] == response.json["rows"] @@ -1453,6 +1464,7 @@ def test_suggested_facets(app_client): {"name": "city_id", "querystring": "_facet=city_id"}, {"name": "neighborhood", "querystring": "_facet=neighborhood"}, {"name": "tags", "querystring": "_facet=tags"}, + {"name": "complex_array", "querystring": "_facet=complex_array"}, {"name": "created", "querystring": "_facet_date=created"}, ] if detect_json1(): @@ -1488,6 +1500,7 @@ def test_expand_labels(app_client): "city_id": {"value": 1, "label": "San Francisco"}, "neighborhood": "Dogpatch", "tags": '["tag1", "tag3"]', + "complex_array": "[]", }, "13": { "pk": 13, @@ -1498,6 +1511,7 @@ def test_expand_labels(app_client): "city_id": {"value": 3, "label": "Detroit"}, "neighborhood": "Corktown", "tags": "[]", + "complex_array": "[]", }, } == response.json diff --git a/tests/test_csv.py b/tests/test_csv.py index 1d5d2df2..b148b6db 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -21,22 +21,22 @@ world ) EXPECTED_TABLE_WITH_LABELS_CSV = """ -pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags -1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]" -2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]" -3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[] -4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[] -5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[] -6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[] -7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[] -8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[] -9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[] -10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[] -11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[] -12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[] -13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[] -14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[] -15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[] +pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags,complex_array +1,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Mission,"[""tag1"", ""tag2""]","[{""foo"": ""bar""}]" +2,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Dogpatch,"[""tag1"", ""tag3""]",[] +3,2019-01-14 08:00:00,1,1,CA,1,San Francisco,SOMA,[],[] +4,2019-01-14 08:00:00,1,1,CA,1,San Francisco,Tenderloin,[],[] +5,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Bernal Heights,[],[] +6,2019-01-15 08:00:00,1,1,CA,1,San Francisco,Hayes Valley,[],[] +7,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Hollywood,[],[] +8,2019-01-15 08:00:00,1,1,CA,2,Los Angeles,Downtown,[],[] +9,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Los Feliz,[],[] +10,2019-01-16 08:00:00,1,1,CA,2,Los Angeles,Koreatown,[],[] +11,2019-01-16 08:00:00,1,1,MI,3,Detroit,Downtown,[],[] +12,2019-01-17 08:00:00,1,1,MI,3,Detroit,Greektown,[],[] +13,2019-01-17 08:00:00,1,1,MI,3,Detroit,Corktown,[],[] +14,2019-01-17 08:00:00,1,1,MI,3,Detroit,Mexicantown,[],[] +15,2019-01-17 08:00:00,2,0,MC,4,Memnonia,Arcadia Planitia,[],[] """.lstrip().replace( "\n", "\r\n" ) diff --git a/tests/test_facets.py b/tests/test_facets.py index 9169f666..402c155b 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -23,6 +23,10 @@ async def test_column_facet_suggest(app_client): {"name": "city_id", "toggle_url": "http://localhost/?_facet=city_id"}, {"name": "neighborhood", "toggle_url": "http://localhost/?_facet=neighborhood"}, {"name": "tags", "toggle_url": "http://localhost/?_facet=tags"}, + { + "name": "complex_array", + "toggle_url": "http://localhost/?_facet=complex_array", + }, ] == suggestions @@ -57,6 +61,10 @@ async def test_column_facet_suggest_skip_if_already_selected(app_client): "name": "tags", "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=tags", }, + { + "name": "complex_array", + "toggle_url": "http://localhost/?_facet=planet_int&_facet=on_earth&_facet=complex_array", + }, ] == suggestions @@ -78,6 +86,7 @@ async def test_column_facet_suggest_skip_if_enabled_by_metadata(app_client): "state", "neighborhood", "tags", + "complex_array", ] == suggestions From 1fa1c88aece64922b1c52e5bb7d0fb9db7150573 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 1 Nov 2019 12:38:15 -0700 Subject: [PATCH 72/88] Only inspect first 100 records for #562 --- datasette/facets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/facets.py b/datasette/facets.py index 9b5baaa2..7f350dfe 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -295,7 +295,7 @@ class ArrayFacet(Facet): # Now sanity check that first 100 arrays contain only strings first_100 = await self.ds.execute( self.database, - "select {column} from ({sql}) where {column} is not null".format( + "select {column} from ({sql}) where {column} is not null limit 100".format( column=escape_sqlite(column), sql=self.sql ), self.params, From 18ba0c27b5f7a02c6506262ba92baca84c7e0af9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 1 Nov 2019 14:45:59 -0700 Subject: [PATCH 73/88] Don't suggest array facet if column is only [], closes #610 --- datasette/facets.py | 29 ++++++++++++++++++----------- tests/test_facets.py | 14 ++++++++++++++ 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/datasette/facets.py b/datasette/facets.py index 7f350dfe..0c6459d6 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -293,17 +293,24 @@ class ArrayFacet(Facet): types = tuple(r[0] for r in results.rows) if types in (("array",), ("array", None)): # Now sanity check that first 100 arrays contain only strings - first_100 = await self.ds.execute( - self.database, - "select {column} from ({sql}) where {column} is not null limit 100".format( - column=escape_sqlite(column), sql=self.sql - ), - self.params, - truncate=False, - custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), - log_sql_errors=False, - ) - if all(self._is_json_array_of_strings(r[0]) for r in first_100): + first_100 = [ + v[0] + for v in await self.ds.execute( + self.database, + "select {column} from ({sql}) where {column} is not null and json_array_length({column}) > 0 limit 100".format( + column=escape_sqlite(column), sql=self.sql + ), + self.params, + truncate=False, + custom_time_limit=self.ds.config( + "facet_suggest_time_limit_ms" + ), + log_sql_errors=False, + ) + ] + if first_100 and all( + self._is_json_array_of_strings(r) for r in first_100 + ): suggested_facets.append( { "name": column, diff --git a/tests/test_facets.py b/tests/test_facets.py index 402c155b..e3dc3df3 100644 --- a/tests/test_facets.py +++ b/tests/test_facets.py @@ -215,6 +215,20 @@ async def test_array_facet_suggest(app_client): ] == suggestions +@pytest.mark.asyncio +@pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") +async def test_array_facet_suggest_not_if_all_empty_arrays(app_client): + facet = ArrayFacet( + app_client.ds, + MockRequest("http://localhost/"), + database="fixtures", + sql="select * from facetable where tags = '[]'", + table="facetable", + ) + suggestions = await facet.suggest() + assert [] == suggestions + + @pytest.mark.asyncio @pytest.mark.skipif(not detect_json1(), reason="Requires the SQLite json1 module") async def test_array_facet_results(app_client): From 566496c14663872c7e51e25af66dfde9fa496f5b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 1 Nov 2019 14:57:49 -0700 Subject: [PATCH 74/88] Better documentation of --host, closes #574 --- README.md | 25 +++++++++++++++---------- datasette/cli.py | 11 +++++++++-- docs/datasette-serve-help.txt | 7 +++++-- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 5894017e..9f85f1ba 100644 --- a/README.md +++ b/README.md @@ -89,26 +89,31 @@ Now visiting http://localhost:8001/History/downloads will show you a web interfa ## datasette serve options - $ datasette serve --help - Usage: datasette serve [OPTIONS] [FILES]... Serve up specified SQLite database files with a web UI Options: -i, --immutable PATH Database files to open in immutable mode - -h, --host TEXT host for server, defaults to 127.0.0.1 - -p, --port INTEGER port for server, defaults to 8001 + -h, --host TEXT Host for server. Defaults to 127.0.0.1 which means + only connections from the local machine will be + allowed. Use 0.0.0.0 to listen to all IPs and + allow access from other machines. + -p, --port INTEGER Port for server, defaults to 8001 --debug Enable debug mode - useful for development - --reload Automatically reload if database or code change detected - - useful for development - --cors Enable CORS by serving Access-Control-Allow-Origin: * + --reload Automatically reload if database or code change + detected - useful for development + --cors Enable CORS by serving Access-Control-Allow- + Origin: * --load-extension PATH Path to a SQLite extension to load - --inspect-file TEXT Path to JSON file created using "datasette inspect" - -m, --metadata FILENAME Path to JSON file containing license/source metadata + --inspect-file TEXT Path to JSON file created using "datasette + inspect" + -m, --metadata FILENAME Path to JSON file containing license/source + metadata --template-dir DIRECTORY Path to directory containing custom templates --plugins-dir DIRECTORY Path to directory containing custom plugins - --static STATIC MOUNT mountpoint:path-to-directory for serving static files + --static STATIC MOUNT mountpoint:path-to-directory for serving static + files --memory Make :memory: database available --config CONFIG Set config option using configname:value datasette.readthedocs.io/en/latest/config.html diff --git a/datasette/cli.py b/datasette/cli.py index 181b281c..67c2fe71 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -230,9 +230,16 @@ def package( multiple=True, ) @click.option( - "-h", "--host", default="127.0.0.1", help="host for server, defaults to 127.0.0.1" + "-h", + "--host", + default="127.0.0.1", + help=( + "Host for server. Defaults to 127.0.0.1 which means only connections " + "from the local machine will be allowed. Use 0.0.0.0 to listen to " + "all IPs and allow access from other machines." + ), ) -@click.option("-p", "--port", default=8001, help="port for server, defaults to 8001") +@click.option("-p", "--port", default=8001, help="Port for server, defaults to 8001") @click.option( "--debug", is_flag=True, help="Enable debug mode - useful for development" ) diff --git a/docs/datasette-serve-help.txt b/docs/datasette-serve-help.txt index 7b7c3b09..1447e84d 100644 --- a/docs/datasette-serve-help.txt +++ b/docs/datasette-serve-help.txt @@ -6,8 +6,11 @@ Usage: datasette serve [OPTIONS] [FILES]... Options: -i, --immutable PATH Database files to open in immutable mode - -h, --host TEXT host for server, defaults to 127.0.0.1 - -p, --port INTEGER port for server, defaults to 8001 + -h, --host TEXT Host for server. Defaults to 127.0.0.1 which means only + connections from the local machine will be allowed. Use + 0.0.0.0 to listen to all IPs and allow access from other + machines. + -p, --port INTEGER Port for server, defaults to 8001 --debug Enable debug mode - useful for development --reload Automatically reload if database or code change detected - useful for development From 872284d355a4e28c67c15f223a86e3407a483f45 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 1 Nov 2019 15:15:10 -0700 Subject: [PATCH 75/88] Plugin static assets support both hyphens and underscores in names Closes #611 --- datasette/app.py | 13 +++++++++++-- docs/plugins.rst | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 935b1730..203e0991 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -612,8 +612,17 @@ class Datasette: # Mount any plugin static/ directories for plugin in get_plugins(pm): if plugin["static_path"]: - modpath = "/-/static-plugins/{}/(?P.*)$".format(plugin["name"]) - add_route(asgi_static(plugin["static_path"]), modpath) + add_route( + asgi_static(plugin["static_path"]), + "/-/static-plugins/{}/(?P.*)$".format(plugin["name"]), + ) + # Support underscores in name in addition to hyphens, see https://github.com/simonw/datasette/issues/611 + add_route( + asgi_static(plugin["static_path"]), + "/-/static-plugins/{}/(?P.*)$".format( + plugin["name"].replace("-", "_") + ), + ) add_route( JsonDataView.as_asgi(self, "metadata.json", lambda: self._metadata), r"/-/metadata(?P(\.json)?)$", diff --git a/docs/plugins.rst b/docs/plugins.rst index 1d4f1e1a..6df7ff6a 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -442,7 +442,7 @@ you have one: @hookimpl def extra_js_urls(): return [ - '/-/static-plugins/your_plugin/app.js' + '/-/static-plugins/your-plugin/app.js' ] .. _plugin_hook_publish_subcommand: From 28ac836b587e4535fb779f2e8dece8954b552362 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 2 Nov 2019 15:29:40 -0700 Subject: [PATCH 76/88] Don't show 'None' as label for nullable foreign key, closes #406 --- datasette/views/table.py | 2 +- tests/fixtures.py | 1 + tests/test_api.py | 18 ++++++++++++++++-- tests/test_html.py | 9 +++++++-- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 44b186cf..326c11ae 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -637,7 +637,7 @@ class TableView(RowTableShared): new_row = CustomRow(columns) for column in row.keys(): value = row[column] - if (column, value) in expanded_labels: + if (column, value) in expanded_labels and value is not None: new_row[column] = { "value": value, "label": expanded_labels[(column, value)], diff --git a/tests/fixtures.py b/tests/fixtures.py index 93c3da9f..8aa44687 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -754,6 +754,7 @@ INSERT INTO primary_key_multiple_columns VALUES (1, 'hey', 'world'); INSERT INTO primary_key_multiple_columns_explicit_label VALUES (1, 'hey', 'world2'); INSERT INTO foreign_key_references VALUES (1, 1, 1); +INSERT INTO foreign_key_references VALUES (2, null, null); INSERT INTO complex_foreign_keys VALUES (1, 1, 2, 1); INSERT INTO custom_foreign_key_label VALUES (1, 1); diff --git a/tests/test_api.py b/tests/test_api.py index 41557bcf..c6acbab1 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -216,7 +216,7 @@ def test_database_page(app_client): "name": "foreign_key_references", "columns": ["pk", "foreign_key_with_label", "foreign_key_with_no_label"], "primary_keys": ["pk"], - "count": 1, + "count": 2, "hidden": False, "fts_table": None, "foreign_keys": { @@ -1519,7 +1519,7 @@ def test_expand_labels(app_client): def test_expand_label(app_client): response = app_client.get( "/fixtures/foreign_key_references.json?_shape=object" - "&_label=foreign_key_with_label" + "&_label=foreign_key_with_label&_size=1" ) assert { "1": { @@ -1693,3 +1693,17 @@ def test_common_prefix_database_names(app_client_conflicting_database_names): app_client_conflicting_database_names.get(path).body.decode("utf8") ) assert db_name == data["database"] + + +def test_null_foreign_keys_are_not_expanded(app_client): + response = app_client.get( + "/fixtures/foreign_key_references.json?_shape=array&_labels=on" + ) + assert [ + { + "pk": "1", + "foreign_key_with_label": {"value": "1", "label": "hello"}, + "foreign_key_with_no_label": {"value": "1", "label": "1"}, + }, + {"pk": "2", "foreign_key_with_label": None, "foreign_key_with_no_label": None,}, + ] == response.json diff --git a/tests/test_html.py b/tests/test_html.py index aa628dec..f63e595b 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -603,7 +603,12 @@ def test_table_html_foreign_key_links(app_client): '1', 'hello\xa01', '1', - ] + ], + [ + '2', + '\xa0', + '\xa0', + ], ] assert expected == [ [str(td) for td in tr.select("td")] for tr in table.select("tbody tr") @@ -611,7 +616,7 @@ def test_table_html_foreign_key_links(app_client): def test_table_html_disable_foreign_key_links_with_labels(app_client): - response = app_client.get("/fixtures/foreign_key_references?_labels=off") + response = app_client.get("/fixtures/foreign_key_references?_labels=off&_size=1") assert response.status == 200 table = Soup(response.body, "html.parser").find("table") expected = [ From 66dd9e00c7c0a4f88cc165f566259bbe099fc9ae Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 2 Nov 2019 15:47:20 -0700 Subject: [PATCH 77/88] Release notes for 0.30.2 --- docs/changelog.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 8ac32c45..f4761efe 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,16 @@ Changelog ========= +.. _v0_30_2: + +0.30.2 (2019-11-02) +------------------- + +- ``/-/plugins`` page now uses distribution name e.g. ``datasette-cluster-map`` instead of the name of the underlying Python package (``datasette_cluster_map``) (`#606 `__) +- Array faceting is now only suggested for columns that contain arrays of strings (`#562 `__) +- Better documentation for the ``--host`` argument (`#574 `__) +- Don't show ``None`` with a broken link for the label on a nullable foreign key (`#406 `__) + .. _v0_30_1: 0.30.1 (2019-10-30) @@ -14,6 +24,7 @@ Changelog .. _v0_30: + 0.30 (2019-10-18) ----------------- @@ -82,7 +93,7 @@ Two new plugins take advantage of this hook: New plugin hook: extra_template_vars ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The :ref:`plugin_extra_template_vars` plugin hook allows plugins to inject their own additional variables into the Datasette template context. This can be used in conjunction with custom templates to customize the Datasette interface. `datasette-auth-github `__ uses this hook to add custom HTML to the new top navigation bar (which is designed to be modified by plugins, see `#540 `__). +The :ref:`plugin_hook_extra_template_vars` plugin hook allows plugins to inject their own additional variables into the Datasette template context. This can be used in conjunction with custom templates to customize the Datasette interface. `datasette-auth-github `__ uses this hook to add custom HTML to the new top navigation bar (which is designed to be modified by plugins, see `#540 `__). Secret plugin configuration options ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 5eaf398592248c5c44d4d1bc588637432426ec88 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 2 Nov 2019 16:12:46 -0700 Subject: [PATCH 78/88] Fix CSV export for nullable foreign keys, closes #612 --- datasette/views/base.py | 12 ++++++++---- tests/test_csv.py | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index 1568b084..94945304 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -330,10 +330,14 @@ class DataView(BaseView): else: # Look for {"value": "label": } dicts and expand new_row = [] - for cell in row: - if isinstance(cell, dict): - new_row.append(cell["value"]) - new_row.append(cell["label"]) + for heading, cell in zip(data["columns"], row): + if heading in expanded_columns: + if cell is None: + new_row.extend(("", "")) + else: + assert isinstance(cell, dict) + new_row.append(cell["value"]) + new_row.append(cell["label"]) else: new_row.append(cell) await writer.writerow(new_row) diff --git a/tests/test_csv.py b/tests/test_csv.py index b148b6db..13aca489 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -41,6 +41,14 @@ pk,created,planet_int,on_earth,state,city_id,city_id_label,neighborhood,tags,com "\n", "\r\n" ) +EXPECTED_TABLE_WITH_NULLABLE_LABELS_CSV = """ +pk,foreign_key_with_label,foreign_key_with_label_label,foreign_key_with_no_label,foreign_key_with_no_label_label +1,1,hello,1,1 +2,,,, +""".lstrip().replace( + "\n", "\r\n" +) + def test_table_csv(app_client): response = app_client.get("/fixtures/simple_primary_key.csv") @@ -63,6 +71,13 @@ def test_table_csv_with_labels(app_client): assert EXPECTED_TABLE_WITH_LABELS_CSV == response.text +def test_table_csv_with_nullable_labels(app_client): + response = app_client.get("/fixtures/foreign_key_references.csv?_labels=1") + assert response.status == 200 + assert "text/plain; charset=utf-8" == response.headers["content-type"] + assert EXPECTED_TABLE_WITH_NULLABLE_LABELS_CSV == response.text + + def test_custom_sql_csv(app_client): response = app_client.get( "/fixtures.csv?sql=select+content+from+simple_primary_key+limit+2" From fa4d77b01ee268c97989adff2dda73fc6dd2800a Mon Sep 17 00:00:00 2001 From: Tobias Kunze Date: Mon, 4 Nov 2019 03:39:55 +0100 Subject: [PATCH 79/88] Offer to format readonly SQL (#602) Following discussion in #601, this PR adds a "Format SQL" button to read-only SQL (if the SQL actually differs from the formatting result). It also removes a console error on readonly SQL queries. Thanks, @rixx! --- datasette/templates/_codemirror_foot.html | 41 ++++++++++++++--------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/datasette/templates/_codemirror_foot.html b/datasette/templates/_codemirror_foot.html index 9aba61ab..4019d448 100644 --- a/datasette/templates/_codemirror_foot.html +++ b/datasette/templates/_codemirror_foot.html @@ -6,21 +6,32 @@ window.onload = () => { if (sqlFormat && !readOnly) { sqlFormat.hidden = false; } - var editor = CodeMirror.fromTextArea(sqlInput, { - lineNumbers: true, - mode: "text/x-sql", - lineWrapping: true, - }); - editor.setOption("extraKeys", { - "Shift-Enter": function() { - document.getElementsByClassName("sql")[0].submit(); - }, - Tab: false - }); - if (sqlInput && sqlFormat) { - sqlFormat.addEventListener("click", ev => { - editor.setValue(sqlFormatter.format(editor.getValue())); - }) + if (sqlInput) { + var editor = CodeMirror.fromTextArea(sqlInput, { + lineNumbers: true, + mode: "text/x-sql", + lineWrapping: true, + }); + editor.setOption("extraKeys", { + "Shift-Enter": function() { + document.getElementsByClassName("sql")[0].submit(); + }, + Tab: false + }); + if (sqlFormat) { + sqlFormat.addEventListener("click", ev => { + editor.setValue(sqlFormatter.format(editor.getValue())); + }) + } + } + if (sqlFormat && readOnly) { + const formatted = sqlFormatter.format(readOnly.innerHTML); + if (formatted != readOnly.innerHTML) { + sqlFormat.hidden = false; + sqlFormat.addEventListener("click", ev => { + readOnly.innerHTML = formatted; + }) + } } } From d3e9387466e01a418fe346d906d3142763f0b73e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 3 Nov 2019 20:11:55 -0800 Subject: [PATCH 80/88] pk__notin= filter, closes #614 --- datasette/filters.py | 15 +++++++++++++++ docs/json_api.rst | 3 +++ tests/test_filters.py | 3 +++ 3 files changed, 21 insertions(+) diff --git a/datasette/filters.py b/datasette/filters.py index efe014ae..5897a3ed 100644 --- a/datasette/filters.py +++ b/datasette/filters.py @@ -77,6 +77,20 @@ class InFilter(Filter): return "{} in {}".format(column, json.dumps(self.split_value(value))) +class NotInFilter(InFilter): + key = "notin" + display = "not in" + + def where_clause(self, table, column, value, param_counter): + values = self.split_value(value) + params = [":p{}".format(param_counter + i) for i in range(len(values))] + sql = "{} not in ({})".format(escape_sqlite(column), ", ".join(params)) + return sql, values + + def human_clause(self, column, value): + return "{} not in {}".format(column, json.dumps(self.split_value(value))) + + class Filters: _filters = ( [ @@ -125,6 +139,7 @@ class Filters: TemplatedFilter("like", "like", '"{c}" like :{p}', '{c} like "{v}"'), TemplatedFilter("glob", "glob", '"{c}" glob :{p}', '{c} glob "{v}"'), InFilter(), + NotInFilter(), ] + ( [ diff --git a/docs/json_api.rst b/docs/json_api.rst index 4b365e14..de70362c 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -228,6 +228,9 @@ You can filter the data returned by the table based on column values using a que ``?column__in=["value","value,with,commas"]`` +``?column__notin=value1,value2,value3`` + Rows where column does not match any of the provided values. The inverse of ``__in=``. Also supports JSON arrays. + ``?column__arraycontains=value`` Works against columns that contain JSON arrays - matches if any of the values in that array match. diff --git a/tests/test_filters.py b/tests/test_filters.py index fd682cd9..8598087f 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -47,6 +47,9 @@ import pytest ["foo in (:p0, :p1)"], ["dog,cat", "cat[dog]"], ), + # Not in, and JSON array not in + ((("foo__notin", "1,2,3"),), ["foo not in (:p0, :p1, :p2)"], ["1", "2", "3"]), + ((("foo__notin", "[1,2,3]"),), ["foo not in (:p0, :p1, :p2)"], [1, 2, 3]), ], ) def test_build_where(args, expected_where, expected_params): From daab48aaf548e2edeba0276f957ec4434123dd7b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 4 Nov 2019 15:03:48 -0800 Subject: [PATCH 81/88] Use select colnames, not select * for table view - refs #615 --- datasette/views/table.py | 8 ++++++-- tests/test_api.py | 3 ++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 326c11ae..139ff80b 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -235,13 +235,17 @@ class TableView(RowTableShared): raise NotFound("Table not found: {}".format(table)) pks = await db.primary_keys(table) + table_columns = await db.table_columns(table) + + select_columns = ", ".join(escape_sqlite(t) for t in table_columns) + use_rowid = not pks and not is_view if use_rowid: - select = "rowid, *" + select = "rowid, {}".format(select_columns) order_by = "rowid" order_by_pks = "rowid" else: - select = "*" + select = select_columns order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) order_by = order_by_pks diff --git a/tests/test_api.py b/tests/test_api.py index c6acbab1..4a09b238 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -610,7 +610,8 @@ def test_table_json(app_client): assert response.status == 200 data = response.json assert ( - data["query"]["sql"] == "select * from simple_primary_key order by id limit 51" + data["query"]["sql"] + == "select id, content from simple_primary_key order by id limit 51" ) assert data["query"]["params"] == {} assert data["rows"] == [ From e5dc89a58b5d3b4a57a4d5829235a2da13fbaa01 Mon Sep 17 00:00:00 2001 From: Tobias Kunze Date: Tue, 5 Nov 2019 00:16:30 +0100 Subject: [PATCH 82/88] Handle spaces in DB names (#590) Closes #503 - thanks, @rixx --- datasette/views/base.py | 3 ++- tests/fixtures.py | 4 ++-- tests/test_api.py | 19 ++++++++++++++++++- tests/test_html.py | 8 ++++---- 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index 94945304..062c6956 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -203,12 +203,13 @@ class DataView(BaseView): hash = hash_bit else: name = db_name - # Verify the hash + name = urllib.parse.unquote_plus(name) try: db = self.ds.databases[name] except KeyError: raise NotFound("Database not found: {}".format(name)) + # Verify the hash expected = "000" if db.hash is not None: expected = db.hash[:HASH_LENGTH] diff --git a/tests/fixtures.py b/tests/fixtures.py index 8aa44687..dcc414bf 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -174,7 +174,7 @@ def app_client_no_files(): @pytest.fixture(scope="session") def app_client_two_attached_databases(): yield from make_app_client( - extra_databases={"extra_database.db": EXTRA_DATABASE_SQL} + extra_databases={"extra database.db": EXTRA_DATABASE_SQL} ) @@ -188,7 +188,7 @@ def app_client_conflicting_database_names(): @pytest.fixture(scope="session") def app_client_two_attached_databases_one_immutable(): yield from make_app_client( - is_immutable=True, extra_databases={"extra_database.db": EXTRA_DATABASE_SQL} + is_immutable=True, extra_databases={"extra database.db": EXTRA_DATABASE_SQL} ) diff --git a/tests/test_api.py b/tests/test_api.py index 4a09b238..1fa8642f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -6,6 +6,7 @@ from .fixtures import ( # noqa app_client_shorter_time_limit, app_client_larger_cache_size, app_client_returned_rows_matches_page_size, + app_client_two_attached_databases, app_client_two_attached_databases_one_immutable, app_client_conflicting_database_names, app_client_with_cors, @@ -1188,7 +1189,7 @@ def test_databases_json(app_client_two_attached_databases_one_immutable): databases = response.json assert 2 == len(databases) extra_database, fixtures_database = databases - assert "extra_database" == extra_database["name"] + assert "extra database" == extra_database["name"] assert None == extra_database["hash"] assert True == extra_database["is_mutable"] assert False == extra_database["is_memory"] @@ -1679,6 +1680,22 @@ def test_cors(app_client_with_cors, path, status_code): assert "*" == response.headers["Access-Control-Allow-Origin"] +@pytest.mark.parametrize( + "path", + ( + "/", + ".json", + "/searchable", + "/searchable.json", + "/searchable_view", + "/searchable_view.json", + ), +) +def test_database_with_space_in_name(app_client_two_attached_databases, path): + response = app_client_two_attached_databases.get("/extra database" + path) + assert response.status == 200 + + def test_common_prefix_database_names(app_client_conflicting_database_names): # https://github.com/simonw/datasette/issues/597 assert ["fixtures", "foo", "foo-bar"] == [ diff --git a/tests/test_html.py b/tests/test_html.py index f63e595b..7f1af86e 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -27,11 +27,11 @@ def test_homepage(app_client_two_attached_databases): # Should be two attached databases assert [ {"href": "/fixtures", "text": "fixtures"}, - {"href": "/extra_database", "text": "extra_database"}, + {"href": "/extra database", "text": "extra database"}, ] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")] # The first attached database should show count text and attached tables h2 = soup.select("h2")[1] - assert "extra_database" == h2.text.strip() + assert "extra database" == h2.text.strip() counts_p, links_p = h2.find_all_next("p")[:2] assert ( "2 rows in 1 table, 5 rows in 4 hidden tables, 1 view" == counts_p.text.strip() @@ -41,8 +41,8 @@ def test_homepage(app_client_two_attached_databases): {"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a") ] assert [ - {"href": "/extra_database/searchable", "text": "searchable"}, - {"href": "/extra_database/searchable_view", "text": "searchable_view"}, + {"href": "/extra database/searchable", "text": "searchable"}, + {"href": "/extra database/searchable_view", "text": "searchable_view"}, ] == table_links From bccf474abdeacd9bd649448b01cc1f42e09fe9ef Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 5 Nov 2019 21:12:55 -0800 Subject: [PATCH 83/88] Removed _group_count=col feature, closes #504 --- datasette/views/table.py | 12 ------------ docs/json_api.rst | 9 --------- 2 files changed, 21 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 139ff80b..920693d7 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -499,18 +499,6 @@ class TableView(RowTableShared): if order_by: order_by = "order by {} ".format(order_by) - # _group_count=col1&_group_count=col2 - group_count = special_args_lists.get("_group_count") or [] - if group_count: - sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format( - group_cols=", ".join( - '"{}"'.format(group_count_col) for group_count_col in group_count - ), - table_name=escape_sqlite(table), - where=where_clause, - ) - return await self.custom_sql(request, database, hash, sql, editable=True) - extra_args = {} # Handle ?_size=500 page_size = _size or request.raw_args.get("_size") diff --git a/docs/json_api.rst b/docs/json_api.rst index de70362c..e369bee7 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -321,15 +321,6 @@ Special table arguments Here's `an example `__. - -``?_group_count=COLUMN`` - Executes a SQL query that returns a count of the number of rows matching - each unique value in that column, with the most common ordered first. - -``?_group_count=COLUMN1&_group_count=column2`` - You can pass multiple ``_group_count`` columns to return counts against - unique combinations of those columns. - ``?_next=TOKEN`` Pagination by continuation token - pass the token that was returned in the ``"next"`` property by the previous page. From b0b7c80571110ff5d79ca819dc1822eb4bd0ceb3 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 6 Nov 2019 16:55:44 -0800 Subject: [PATCH 84/88] Removed unused special_args_lists variable --- datasette/views/table.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 920693d7..a60a3941 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -261,12 +261,10 @@ class TableView(RowTableShared): # That's so if there is a column that starts with _ # it can still be queried using ?_col__exact=blah special_args = {} - special_args_lists = {} other_args = [] for key, value in args.items(): if key.startswith("_") and "__" not in key: special_args[key] = value[0] - special_args_lists[key] = value else: for v in value: other_args.append((key, v)) From 5481bf6da638d32e0749cb6f88c773ed8f447fee Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 7 Nov 2019 18:48:39 -0800 Subject: [PATCH 85/88] Improved UI for publish cloudrun, closes #608 --- datasette/publish/cloudrun.py | 39 ++++++++++++++++++++++-- tests/test_publish_cloudrun.py | 55 ++++++++++++++++++++++++++++++++-- 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/datasette/publish/cloudrun.py b/datasette/publish/cloudrun.py index c2d77746..a833a32b 100644 --- a/datasette/publish/cloudrun.py +++ b/datasette/publish/cloudrun.py @@ -60,6 +60,23 @@ def publish_subcommand(publish): "gcloud config get-value project", shell=True, universal_newlines=True ).strip() + if not service: + # Show the user their current services, then prompt for one + click.echo("Please provide a service name for this deployment\n") + click.echo("Using an existing service name will over-write it") + click.echo("") + existing_services = get_existing_services() + if existing_services: + click.echo("Your existing services:\n") + for existing_service in existing_services: + click.echo( + " {name} - created {created} - {url}".format( + **existing_service + ) + ) + click.echo("") + service = click.prompt("Service name", type=str) + extra_metadata = { "title": title, "license": license, @@ -110,8 +127,26 @@ def publish_subcommand(publish): image_id = "gcr.io/{project}/{name}".format(project=project, name=name) check_call("gcloud builds submit --tag {}".format(image_id), shell=True) check_call( - "gcloud beta run deploy --allow-unauthenticated --platform=managed --image {}{}".format( - image_id, " {}".format(service) if service else "" + "gcloud beta run deploy --allow-unauthenticated --platform=managed --image {} {}".format( + image_id, service, ), shell=True, ) + + +def get_existing_services(): + services = json.loads( + check_output( + "gcloud beta run services list --platform=managed --format json", + shell=True, + universal_newlines=True, + ) + ) + return [ + { + "name": service["metadata"]["name"], + "created": service["metadata"]["creationTimestamp"], + "url": service["status"]["address"]["url"], + } + for service in services + ] diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index 481ac04d..a038b60e 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -24,6 +24,53 @@ def test_publish_cloudrun_invalid_database(mock_which): assert 'Path "woop.db" does not exist' in result.output +@mock.patch("shutil.which") +@mock.patch("datasette.publish.cloudrun.check_output") +@mock.patch("datasette.publish.cloudrun.check_call") +@mock.patch("datasette.publish.cloudrun.get_existing_services") +def test_publish_cloudrun_prompts_for_service( + mock_get_existing_services, mock_call, mock_output, mock_which +): + mock_get_existing_services.return_value = [ + {"name": "existing", "created": "2019-01-01", "url": "http://www.example.com/"} + ] + mock_output.return_value = "myproject" + mock_which.return_value = True + runner = CliRunner() + with runner.isolated_filesystem(): + open("test.db", "w").write("data") + result = runner.invoke( + cli.cli, ["publish", "cloudrun", "test.db"], input="input-service" + ) + assert ( + """ +Please provide a service name for this deployment + +Using an existing service name will over-write it + +Your existing services: + + existing - created 2019-01-01 - http://www.example.com/ + +Service name: input-service +""".strip() + == result.output.strip() + ) + assert 0 == result.exit_code + tag = "gcr.io/myproject/datasette" + mock_call.assert_has_calls( + [ + mock.call("gcloud builds submit --tag {}".format(tag), shell=True), + mock.call( + "gcloud beta run deploy --allow-unauthenticated --platform=managed --image {} input-service".format( + tag + ), + shell=True, + ), + ] + ) + + @mock.patch("shutil.which") @mock.patch("datasette.publish.cloudrun.check_output") @mock.patch("datasette.publish.cloudrun.check_call") @@ -33,14 +80,16 @@ def test_publish_cloudrun(mock_call, mock_output, mock_which): runner = CliRunner() with runner.isolated_filesystem(): open("test.db", "w").write("data") - result = runner.invoke(cli.cli, ["publish", "cloudrun", "test.db"]) + result = runner.invoke( + cli.cli, ["publish", "cloudrun", "test.db", "--service", "test"] + ) assert 0 == result.exit_code tag = "gcr.io/{}/datasette".format(mock_output.return_value) mock_call.assert_has_calls( [ mock.call("gcloud builds submit --tag {}".format(tag), shell=True), mock.call( - "gcloud beta run deploy --allow-unauthenticated --platform=managed --image {}".format( + "gcloud beta run deploy --allow-unauthenticated --platform=managed --image {} test".format( tag ), shell=True, @@ -65,6 +114,8 @@ def test_publish_cloudrun_plugin_secrets(mock_call, mock_output, mock_which): "publish", "cloudrun", "test.db", + "--service", + "datasette", "--plugin-secret", "datasette-auth-github", "client_id", From 40f9682b232fdd008adbd820735528c60ea09af4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 8 Nov 2019 18:12:20 -0800 Subject: [PATCH 86/88] Improved documentation for "publish cloudrun" --- docs/publish.rst | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/docs/publish.rst b/docs/publish.rst index 304be8ef..89d33085 100644 --- a/docs/publish.rst +++ b/docs/publish.rst @@ -43,14 +43,16 @@ You will first need to install and configure the Google Cloud CLI tools by follo You can then publish a database to Google Cloud Run using the following command:: - datasette publish cloudrun mydatabase.db + datasette publish cloudrun mydatabase.db --service=my-database + +A Cloud Run **service** is a single hosted application. The service name you specify will be used as part of the Cloud Run URL. If you deploy to a service name that you have used in the past your new deployment will replace the previous one. + +If you omit the ``--service`` option you will be asked to pick a service name interactively during the deploy. You may need to interact with prompts from the tool. Once it has finished it will output a URL like this one:: - Service [datasette] revision [datasette-00001] has been deployed - and is serving traffic at https://datasette-j7hipcg4aq-uc.a.run.app - -During the deployment the tool will prompt you for the name of your service. You can reuse an existing name to replace your previous deployment with your new version, or pick a new name to deploy to a new URL. + Service [my-service] revision [my-service-00001] has been deployed + and is serving traffic at https://my-service-j7hipcg4aq-uc.a.run.app .. literalinclude:: datasette-publish-cloudrun-help.txt @@ -90,18 +92,18 @@ Custom metadata and plugins You can define your own :ref:`metadata` and deploy that with your instance like so:: - datasette publish nowv1 mydatabase.db -m metadata.json + datasette publish cloudrun --service=my-service mydatabase.db -m metadata.json If you just want to set the title, license or source information you can do that directly using extra options to ``datasette publish``:: - datasette publish nowv1 mydatabase.db \ + datasette publish cloudrun mydatabase.db --service=my-service \ --title="Title of my database" \ --source="Where the data originated" \ --source_url="http://www.example.com/" You can also specify plugins you would like to install. For example, if you want to include the `datasette-vega `_ visualization plugin you can use the following:: - datasette publish nowv1 mydatabase.db --install=datasette-vega + datasette publish cloudrun mydatabase.db --service=my-service --install=datasette-vega If a plugin has any :ref:`plugins_configuration_secret` you can use the ``--plugin-secret`` option to set those secrets at publish time. For example, using Heroku with `datasette-auth-github `__ you might run the following command:: From deeef8da96aae83954a2c96b3d3b247fd9443b50 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 8 Nov 2019 18:15:13 -0800 Subject: [PATCH 87/88] datasette-csvs on Glitch now uses sqlite-utils It previously used csvs-to-sqlite but that had heavy dependencies. See https://support.glitch.com/t/can-you-upgrade-python-to-latest-version/7980/33 --- docs/getting_started.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index d0c22583..fdf7d23c 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -25,7 +25,7 @@ Glitch allows you to "remix" any project to create your own copy and start editi .. image:: https://cdn.glitch.com/2703baf2-b643-4da7-ab91-7ee2a2d00b5b%2Fremix-button.svg :target: https://glitch.com/edit/#!/remix/datasette-csvs -Find a CSV file and drag it onto the Glitch file explorer panel - ``datasette-csvs`` will automatically convert it to a SQLite database (using `csvs-to-sqlite `__) and allow you to start exploring it using Datasette. +Find a CSV file and drag it onto the Glitch file explorer panel - ``datasette-csvs`` will automatically convert it to a SQLite database (using `sqlite-utils `__) and allow you to start exploring it using Datasette. If your CSV file has a ``latitude`` and ``longitude`` column you can visualize it on a map by uncommenting the ``datasette-cluster-map`` line in the ``requirements.txt`` file using the Glitch file editor. From a5defb684fcc734f6325ca08beef9f49c3e7a298 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 9 Nov 2019 17:29:36 -0800 Subject: [PATCH 88/88] CREATE INDEX statements on table page, closes #618 --- datasette/database.py | 13 ++++++++++++- tests/fixtures.py | 1 + tests/test_html.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/datasette/database.py b/datasette/database.py index 7e6f7245..3a1cea94 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -232,7 +232,18 @@ class Database: ) if not table_definition_rows: return None - return table_definition_rows[0][0] + bits = [table_definition_rows[0][0] + ";"] + # Add on any indexes + index_rows = list( + await self.ds.execute( + self.name, + "select sql from sqlite_master where tbl_name = :n and type='index' and sql is not null", + {"n": table}, + ) + ) + for index_row in index_rows: + bits.append(index_row[0] + ";") + return "\n".join(bits) async def get_view_definition(self, view): return await self.get_table_definition(view, "view") diff --git a/tests/fixtures.py b/tests/fixtures.py index dcc414bf..87e66f99 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -514,6 +514,7 @@ CREATE TABLE compound_three_primary_keys ( content text, PRIMARY KEY (pk1, pk2, pk3) ); +CREATE INDEX idx_compound_three_primary_keys_content ON compound_three_primary_keys(content); CREATE TABLE foreign_key_references ( pk varchar(30) primary key, diff --git a/tests/test_html.py b/tests/test_html.py index 7f1af86e..44627cdc 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -119,6 +119,39 @@ def test_row_strange_table_name_with_url_hash(app_client_with_hash): assert response.status == 200 +@pytest.mark.parametrize( + "path,expected_definition_sql", + [ + ( + "/fixtures/facet_cities", + """ +CREATE TABLE facet_cities ( + id integer primary key, + name text +); + """.strip(), + ), + ( + "/fixtures/compound_three_primary_keys", + """ +CREATE TABLE compound_three_primary_keys ( + pk1 varchar(30), + pk2 varchar(30), + pk3 varchar(30), + content text, + PRIMARY KEY (pk1, pk2, pk3) +); +CREATE INDEX idx_compound_three_primary_keys_content ON compound_three_primary_keys(content); + """.strip(), + ), + ], +) +def test_definition_sql(path, expected_definition_sql, app_client): + response = app_client.get(path) + pre = Soup(response.body, "html.parser").select_one("pre.wrapped-sql") + assert expected_definition_sql == pre.string + + def test_table_cell_truncation(): for client in make_app_client(config={"truncate_cells_html": 5}): response = client.get("/fixtures/facetable")