diff --git a/.travis.yml b/.travis.yml index 1553ef1e..40799d84 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ python: - "3.6" - "3.7" - "3.5" + - "3.8-dev" # Executed for 3.5 AND 3.5 as the first "test" stage: script: @@ -23,12 +24,23 @@ jobs: - stage: deploy latest.datasette.io if: branch = master AND type = push script: - - pip install . + - pip install .[test] - npm install -g now - python tests/fixtures.py fixtures.db fixtures.json - export ALIAS=`echo $TRAVIS_COMMIT | cut -c 1-7` - datasette publish nowv1 fixtures.db -m fixtures.json --token=$NOW_TOKEN --branch=$TRAVIS_COMMIT --version-note=$TRAVIS_COMMIT --name=datasette-latest-$ALIAS --alias=latest.datasette.io --alias=$ALIAS.datasette.io - stage: release tagged version + if: tag IS present + python: 3.6 + deploy: + - provider: pypi + user: simonw + distributions: bdist_wheel + password: ${PYPI_PASSWORD} + on: + branch: master + tags: true + - stage: publish docker image if: tag IS present python: 3.6 script: @@ -42,11 +54,3 @@ jobs: - docker build -f Dockerfile -t $REPO:$TRAVIS_TAG . - docker tag $REPO:$TRAVIS_TAG $REPO:latest - docker push $REPO - deploy: - - provider: pypi - user: simonw - distributions: bdist_wheel - password: ${PYPI_PASSWORD} - on: - branch: master - tags: true diff --git a/README.md b/README.md index 638dcd1c..9f85f1ba 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # Datasette [![PyPI](https://img.shields.io/pypi/v/datasette.svg)](https://pypi.org/project/datasette/) +[![Python 3.x](https://img.shields.io/pypi/pyversions/datasette.svg?logo=python&logoColor=white)](https://pypi.org/project/datasette/) [![Travis CI](https://travis-ci.org/simonw/datasette.svg?branch=master)](https://travis-ci.org/simonw/datasette) [![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](http://datasette.readthedocs.io/en/latest/?badge=latest) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette/blob/master/LICENSE) @@ -20,6 +21,12 @@ Datasette is aimed at data journalists, museum curators, archivists, local gover ## News + * 18th October 2019: [Datasette 0.30](https://datasette.readthedocs.io/en/stable/changelog.html#v0-30) + * 13th July 2019: [Single sign-on against GitHub using ASGI middleware](https://simonwillison.net/2019/Jul/14/sso-asgi/) talks about the implementation of [datasette-auth-github](https://github.com/simonw/datasette-auth-github) in more detail. + * 7th July 2019: [Datasette 0.29](https://datasette.readthedocs.io/en/stable/changelog.html#v0-29) - ASGI, new plugin hooks, facet by date and much, much more... + * [datasette-auth-github](https://github.com/simonw/datasette-auth-github) - a new plugin for Datasette 0.29 that lets you require users to authenticate against GitHub before accessing your Datasette instance. You can whitelist specific users, or you can restrict access to members of specific GitHub organizations or teams. + * [datasette-cors](https://github.com/simonw/datasette-cors) - a plugin that lets you configure CORS access from a list of domains (or a set of domain wildcards) so you can make JavaScript calls to a Datasette instance from a specific set of other hosts. + * 23rd June 2019: [Porting Datasette to ASGI, and Turtles all the way down](https://simonwillison.net/2019/Jun/23/datasette-asgi/) * 21st May 2019: The anonymized raw data from [the Stack Overflow Developer Survey 2019](https://stackoverflow.blog/2019/05/21/public-data-release-of-stack-overflows-2019-developer-survey/) has been [published in partnership with Glitch](https://glitch.com/culture/discover-insights-explore-developer-survey-results-2019/), powered by Datasette. * 19th May 2019: [Datasette 0.28](https://datasette.readthedocs.io/en/stable/changelog.html#v0-28) - a salmagundi of new features! * No longer immutable! Datasette now supports [databases that change](https://datasette.readthedocs.io/en/stable/changelog.html#supporting-databases-that-change). @@ -82,26 +89,31 @@ Now visiting http://localhost:8001/History/downloads will show you a web interfa ## datasette serve options - $ datasette serve --help - Usage: datasette serve [OPTIONS] [FILES]... Serve up specified SQLite database files with a web UI Options: -i, --immutable PATH Database files to open in immutable mode - -h, --host TEXT host for server, defaults to 127.0.0.1 - -p, --port INTEGER port for server, defaults to 8001 + -h, --host TEXT Host for server. Defaults to 127.0.0.1 which means + only connections from the local machine will be + allowed. Use 0.0.0.0 to listen to all IPs and + allow access from other machines. + -p, --port INTEGER Port for server, defaults to 8001 --debug Enable debug mode - useful for development - --reload Automatically reload if database or code change detected - - useful for development - --cors Enable CORS by serving Access-Control-Allow-Origin: * + --reload Automatically reload if database or code change + detected - useful for development + --cors Enable CORS by serving Access-Control-Allow- + Origin: * --load-extension PATH Path to a SQLite extension to load - --inspect-file TEXT Path to JSON file created using "datasette inspect" - -m, --metadata FILENAME Path to JSON file containing license/source metadata + --inspect-file TEXT Path to JSON file created using "datasette + inspect" + -m, --metadata FILENAME Path to JSON file containing license/source + metadata --template-dir DIRECTORY Path to directory containing custom templates --plugins-dir DIRECTORY Path to directory containing custom plugins - --static STATIC MOUNT mountpoint:path-to-directory for serving static files + --static STATIC MOUNT mountpoint:path-to-directory for serving static + files --memory Make :memory: database available --config CONFIG Set config option using configname:value datasette.readthedocs.io/en/latest/config.html diff --git a/datasette/__main__.py b/datasette/__main__.py new file mode 100644 index 00000000..4adef844 --- /dev/null +++ b/datasette/__main__.py @@ -0,0 +1,4 @@ +from datasette.cli import cli + +if __name__ == "__main__": + cli() diff --git a/datasette/_version.py b/datasette/_version.py index a12f24aa..5783f30f 100644 --- a/datasette/_version.py +++ b/datasette/_version.py @@ -409,7 +409,7 @@ def render_pep440_old(pieces): The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: diff --git a/datasette/app.py b/datasette/app.py index 2ef7da41..203e0991 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1,11 +1,9 @@ import asyncio import collections import hashlib -import json import os import sys import threading -import time import traceback import urllib.parse from concurrent import futures @@ -14,10 +12,8 @@ from pathlib import Path import click from markupsafe import Markup from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PrefixLoader -from sanic import Sanic, response -from sanic.exceptions import InvalidUsage, NotFound -from .views.base import DatasetteError, ureg +from .views.base import DatasetteError, ureg, AsgiRouter from .views.database import DatabaseDownload, DatabaseView from .views.index import IndexView from .views.special import JsonDataView @@ -36,7 +32,16 @@ from .utils import ( sqlite_timelimit, to_css_class, ) -from .tracer import capture_traces, trace +from .utils.asgi import ( + AsgiLifespan, + NotFound, + asgi_static, + asgi_send, + asgi_send_html, + asgi_send_json, + asgi_send_redirect, +) +from .tracer import trace, AsgiTracer from .plugins import pm, DEFAULT_PLUGINS from .version import __version__ @@ -126,8 +131,8 @@ CONFIG_OPTIONS = ( DEFAULT_CONFIG = {option.name: option.default for option in CONFIG_OPTIONS} -async def favicon(request): - return response.text("") +async def favicon(scope, receive, send): + await asgi_send(send, "", 200) class Datasette: @@ -154,7 +159,7 @@ class Datasette: self.files = [MEMORY] elif memory: self.files = (MEMORY,) + self.files - self.databases = {} + self.databases = collections.OrderedDict() self.inspect_data = inspect_data for file in self.files: path = file @@ -263,7 +268,21 @@ class Datasette: ) if plugins is None: return None - return plugins.get(plugin_name) + plugin_config = plugins.get(plugin_name) + # Resolve any $file and $env keys + if isinstance(plugin_config, dict): + # Create a copy so we don't mutate the version visible at /-/metadata.json + plugin_config_copy = dict(plugin_config) + for key, value in plugin_config_copy.items(): + if isinstance(value, dict): + if list(value.keys()) == ["$env"]: + plugin_config_copy[key] = os.environ.get( + list(value.values())[0] + ) + elif list(value.keys()) == ["$file"]: + plugin_config_copy[key] = open(list(value.values())[0]).read() + return plugin_config_copy + return plugin_config def app_css_hash(self): if not hasattr(self, "_app_css_hash"): @@ -413,6 +432,7 @@ class Datasette: "full": sys.version, }, "datasette": datasette_version, + "asgi": "3.0", "sqlite": { "version": sqlite_version, "fts_versions": fts_versions, @@ -437,6 +457,15 @@ class Datasette: for p in ps ] + def threads(self): + threads = list(threading.enumerate()) + return { + "num_threads": len(threads), + "threads": [ + {"name": t.name, "ident": t.ident, "daemon": t.daemon} for t in threads + ], + } + def table_metadata(self, database, table): "Fetch table-specific metadata." return ( @@ -450,20 +479,7 @@ class Datasette: def in_thread(): conn = getattr(connections, db_name, None) if not conn: - db = self.databases[db_name] - if db.is_memory: - conn = sqlite3.connect(":memory:") - else: - # mode=ro or immutable=1? - if db.is_mutable: - qs = "mode=ro" - else: - qs = "immutable=1" - conn = sqlite3.connect( - "file:{}?{}".format(db.path, qs), - uri=True, - check_same_thread=False, - ) + conn = self.databases[db_name].connect() self.prepare_connection(conn) setattr(connections, db_name, conn) return fn(conn) @@ -543,21 +559,7 @@ class Datasette: self.renderers[renderer["extension"]] = renderer["callback"] def app(self): - class TracingSanic(Sanic): - async def handle_request(self, request, write_callback, stream_callback): - if request.args.get("_trace"): - request["traces"] = [] - request["trace_start"] = time.time() - with capture_traces(request["traces"]): - await super().handle_request( - request, write_callback, stream_callback - ) - else: - await super().handle_request( - request, write_callback, stream_callback - ) - - app = TracingSanic(__name__) + "Returns an ASGI app function that serves the whole of Datasette" default_templates = str(app_root / "datasette" / "templates") template_paths = [] if self.template_dir: @@ -588,134 +590,143 @@ class Datasette: pm.hook.prepare_jinja2_environment(env=self.jinja_env) self.register_renderers() + + routes = [] + + def add_route(view, regex): + routes.append((regex, view)) + # Generate a regex snippet to match all registered renderer file extensions renderer_regex = "|".join(r"\." + key for key in self.renderers.keys()) - app.add_route(IndexView.as_view(self), r"/") + add_route(IndexView.as_asgi(self), r"/(?P(\.jsono?)?$)") # TODO: /favicon.ico and /-/static/ deserve far-future cache expires - app.add_route(favicon, "/favicon.ico") - app.static("/-/static/", str(app_root / "datasette" / "static")) + add_route(favicon, "/favicon.ico") + + add_route( + asgi_static(app_root / "datasette" / "static"), r"/-/static/(?P.*)$" + ) for path, dirname in self.static_mounts: - app.static(path, dirname) + add_route(asgi_static(dirname), r"/" + path + "/(?P.*)$") + # Mount any plugin static/ directories for plugin in get_plugins(pm): if plugin["static_path"]: - modpath = "/-/static-plugins/{}/".format(plugin["name"]) - app.static(modpath, plugin["static_path"]) - app.add_route( - JsonDataView.as_view(self, "metadata.json", lambda: self._metadata), - r"/-/metadata", + add_route( + asgi_static(plugin["static_path"]), + "/-/static-plugins/{}/(?P.*)$".format(plugin["name"]), + ) + # Support underscores in name in addition to hyphens, see https://github.com/simonw/datasette/issues/611 + add_route( + asgi_static(plugin["static_path"]), + "/-/static-plugins/{}/(?P.*)$".format( + plugin["name"].replace("-", "_") + ), + ) + add_route( + JsonDataView.as_asgi(self, "metadata.json", lambda: self._metadata), + r"/-/metadata(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "versions.json", self.versions), - r"/-/versions", + add_route( + JsonDataView.as_asgi(self, "versions.json", self.versions), + r"/-/versions(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "plugins.json", self.plugins), - r"/-/plugins", + add_route( + JsonDataView.as_asgi(self, "plugins.json", self.plugins), + r"/-/plugins(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "config.json", lambda: self._config), - r"/-/config", + add_route( + JsonDataView.as_asgi(self, "config.json", lambda: self._config), + r"/-/config(?P(\.json)?)$", ) - app.add_route( - JsonDataView.as_view(self, "databases.json", self.connected_databases), - r"/-/databases", + add_route( + JsonDataView.as_asgi(self, "threads.json", self.threads), + r"/-/threads(?P(\.json)?)$", ) - app.add_route( - DatabaseDownload.as_view(self), r"/" + add_route( + JsonDataView.as_asgi(self, "databases.json", self.connected_databases), + r"/-/databases(?P(\.json)?)$", ) - app.add_route( - DatabaseView.as_view(self), - r"/", + add_route( + DatabaseDownload.as_asgi(self), r"/(?P[^/]+?)(?P\.db)$" ) - app.add_route( - TableView.as_view(self), r"//" - ) - app.add_route( - RowView.as_view(self), - r"///[^/]+?)(?P" + renderer_regex - + r")?$>", + + r"|.jsono|\.csv)?$", + ) + add_route( + TableView.as_asgi(self), + r"/(?P[^/]+)/(?P[^/]+?$)", + ) + add_route( + RowView.as_asgi(self), + r"/(?P[^/]+)/(?P[^/]+?)/(?P[^/]+?)(?P" + + renderer_regex + + r")?$", ) self.register_custom_units() - # On 404 with a trailing slash redirect to path without that slash: - # pylint: disable=unused-variable - @app.middleware("response") - def redirect_on_404_with_trailing_slash(request, original_response): - if original_response.status == 404 and request.path.endswith("/"): - path = request.path.rstrip("/") - if request.query_string: - path = "{}?{}".format(path, request.query_string) - return response.redirect(path) - - @app.middleware("response") - async def add_traces_to_response(request, response): - if request.get("traces") is None: - return - traces = request["traces"] - trace_info = { - "request_duration_ms": 1000 * (time.time() - request["trace_start"]), - "sum_trace_duration_ms": sum(t["duration_ms"] for t in traces), - "num_traces": len(traces), - "traces": traces, - } - if "text/html" in response.content_type and b"" in response.body: - extra = json.dumps(trace_info, indent=2) - extra_html = "
{}
".format(extra).encode("utf8") - response.body = response.body.replace(b"", extra_html) - elif "json" in response.content_type and response.body.startswith(b"{"): - data = json.loads(response.body.decode("utf8")) - if "_trace" not in data: - data["_trace"] = trace_info - response.body = json.dumps(data).encode("utf8") - - @app.exception(Exception) - def on_exception(request, exception): - title = None - help = None - if isinstance(exception, NotFound): - status = 404 - info = {} - message = exception.args[0] - elif isinstance(exception, InvalidUsage): - status = 405 - info = {} - message = exception.args[0] - elif isinstance(exception, DatasetteError): - status = exception.status - info = exception.error_dict - message = exception.message - if exception.messagge_is_html: - message = Markup(message) - title = exception.title - else: - status = 500 - info = {} - message = str(exception) - traceback.print_exc() - templates = ["500.html"] - if status != 500: - templates = ["{}.html".format(status)] + templates - info.update( - {"ok": False, "error": message, "status": status, "title": title} - ) - if request is not None and request.path.split("?")[0].endswith(".json"): - r = response.json(info, status=status) - - else: - template = self.jinja_env.select_template(templates) - r = response.html(template.render(info), status=status) - if self.cors: - r.headers["Access-Control-Allow-Origin"] = "*" - return r - - # First time server starts up, calculate table counts for immutable databases - @app.listener("before_server_start") - async def setup_db(app, loop): + async def setup_db(): + # First time server starts up, calculate table counts for immutable databases for dbname, database in self.databases.items(): if not database.is_mutable: await database.table_counts(limit=60 * 60 * 1000) - return app + asgi = AsgiLifespan( + AsgiTracer(DatasetteRouter(self, routes)), on_startup=setup_db + ) + for wrapper in pm.hook.asgi_wrapper(datasette=self): + asgi = wrapper(asgi) + return asgi + + +class DatasetteRouter(AsgiRouter): + def __init__(self, datasette, routes): + self.ds = datasette + super().__init__(routes) + + async def handle_404(self, scope, receive, send): + # If URL has a trailing slash, redirect to URL without it + path = scope.get("raw_path", scope["path"].encode("utf8")) + if path.endswith(b"/"): + path = path.rstrip(b"/") + if scope["query_string"]: + path += b"?" + scope["query_string"] + await asgi_send_redirect(send, path.decode("latin1")) + else: + await super().handle_404(scope, receive, send) + + async def handle_500(self, scope, receive, send, exception): + title = None + if isinstance(exception, NotFound): + status = 404 + info = {} + message = exception.args[0] + elif isinstance(exception, DatasetteError): + status = exception.status + info = exception.error_dict + message = exception.message + if exception.messagge_is_html: + message = Markup(message) + title = exception.title + else: + status = 500 + info = {} + message = str(exception) + traceback.print_exc() + templates = ["500.html"] + if status != 500: + templates = ["{}.html".format(status)] + templates + info.update({"ok": False, "error": message, "status": status, "title": title}) + headers = {} + if self.ds.cors: + headers["Access-Control-Allow-Origin"] = "*" + if scope["path"].split("?")[0].endswith(".json"): + await asgi_send_json(send, info, status=status, headers=headers) + else: + template = self.ds.jinja_env.select_template(templates) + await asgi_send_html( + send, template.render(info), status=status, headers=headers + ) diff --git a/datasette/cli.py b/datasette/cli.py index 0d47f47a..67c2fe71 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -1,4 +1,5 @@ import asyncio +import uvicorn import click from click import formatting from click_default_group import DefaultGroup @@ -229,9 +230,16 @@ def package( multiple=True, ) @click.option( - "-h", "--host", default="127.0.0.1", help="host for server, defaults to 127.0.0.1" + "-h", + "--host", + default="127.0.0.1", + help=( + "Host for server. Defaults to 127.0.0.1 which means only connections " + "from the local machine will be allowed. Use 0.0.0.0 to listen to " + "all IPs and allow access from other machines." + ), ) -@click.option("-p", "--port", default=8001, help="port for server, defaults to 8001") +@click.option("-p", "--port", default=8001, help="Port for server, defaults to 8001") @click.option( "--debug", is_flag=True, help="Enable debug mode - useful for development" ) @@ -354,4 +362,4 @@ def serve( asyncio.get_event_loop().run_until_complete(ds.run_sanity_checks()) # Start the server - ds.app().run(host=host, port=port, debug=debug) + uvicorn.run(ds.app(), host=host, port=port, log_level="info") diff --git a/datasette/database.py b/datasette/database.py index e4915770..3a1cea94 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -33,6 +33,18 @@ class Database: for key, value in self.ds.inspect_data[self.name]["tables"].items() } + def connect(self): + if self.is_memory: + return sqlite3.connect(":memory:") + # mode=ro or immutable=1? + if self.is_mutable: + qs = "mode=ro" + else: + qs = "immutable=1" + return sqlite3.connect( + "file:{}?{}".format(self.path, qs), uri=True, check_same_thread=False + ) + @property def size(self): if self.is_memory: @@ -220,7 +232,18 @@ class Database: ) if not table_definition_rows: return None - return table_definition_rows[0][0] + bits = [table_definition_rows[0][0] + ";"] + # Add on any indexes + index_rows = list( + await self.ds.execute( + self.name, + "select sql from sqlite_master where tbl_name = :n and type='index' and sql is not null", + {"n": table}, + ) + ) + for index_row in index_rows: + bits.append(index_row[0] + ";") + return "\n".join(bits) async def get_view_definition(self, view): return await self.get_table_definition(view, "view") diff --git a/datasette/facets.py b/datasette/facets.py index 76d73e51..0c6459d6 100644 --- a/datasette/facets.py +++ b/datasette/facets.py @@ -60,7 +60,7 @@ def load_facet_configs(request, table_metadata): @hookimpl def register_facet_classes(): - classes = [ColumnFacet, DateFacet, ManyToManyFacet] + classes = [ColumnFacet, DateFacet] if detect_json1(): classes.append(ArrayFacet) return classes @@ -257,6 +257,16 @@ class ColumnFacet(Facet): class ArrayFacet(Facet): type = "array" + def _is_json_array_of_strings(self, json_string): + try: + array = json.loads(json_string) + except ValueError: + return False + for item in array: + if not isinstance(item, str): + return False + return True + async def suggest(self): columns = await self.get_columns(self.sql, self.params) suggested_facets = [] @@ -282,18 +292,37 @@ class ArrayFacet(Facet): ) types = tuple(r[0] for r in results.rows) if types in (("array",), ("array", None)): - suggested_facets.append( - { - "name": column, - "type": "array", - "toggle_url": self.ds.absolute_url( - self.request, - path_with_added_args( - self.request, {"_facet_array": column} - ), + # Now sanity check that first 100 arrays contain only strings + first_100 = [ + v[0] + for v in await self.ds.execute( + self.database, + "select {column} from ({sql}) where {column} is not null and json_array_length({column}) > 0 limit 100".format( + column=escape_sqlite(column), sql=self.sql ), - } - ) + self.params, + truncate=False, + custom_time_limit=self.ds.config( + "facet_suggest_time_limit_ms" + ), + log_sql_errors=False, + ) + ] + if first_100 and all( + self._is_json_array_of_strings(r) for r in first_100 + ): + suggested_facets.append( + { + "name": column, + "type": "array", + "toggle_url": self.ds.absolute_url( + self.request, + path_with_added_args( + self.request, {"_facet_array": column} + ), + ), + } + ) except (QueryInterrupted, sqlite3.OperationalError): continue return suggested_facets @@ -476,190 +505,3 @@ class DateFacet(Facet): facets_timed_out.append(column) return facet_results, facets_timed_out - - -class ManyToManyFacet(Facet): - type = "m2m" - - async def suggest(self): - # This is calculated based on foreign key relationships to this table - # Are there any many-to-many tables pointing here? - suggested_facets = [] - db = self.ds.databases[self.database] - all_foreign_keys = await db.get_all_foreign_keys() - if not all_foreign_keys.get(self.table): - # It's probably a view - return [] - args = set(self.get_querystring_pairs()) - incoming = all_foreign_keys[self.table]["incoming"] - # Do any of these incoming tables have exactly two outgoing keys? - for fk in incoming: - other_table = fk["other_table"] - other_table_outgoing_foreign_keys = all_foreign_keys[other_table][ - "outgoing" - ] - if len(other_table_outgoing_foreign_keys) == 2: - destination_table = [ - t - for t in other_table_outgoing_foreign_keys - if t["other_table"] != self.table - ][0]["other_table"] - # Only suggest if it's not selected already - if ("_facet_m2m", destination_table) in args: - continue - suggested_facets.append( - { - "name": destination_table, - "type": "m2m", - "toggle_url": self.ds.absolute_url( - self.request, - path_with_added_args( - self.request, {"_facet_m2m": destination_table} - ), - ), - } - ) - return suggested_facets - - async def facet_results(self): - facet_results = {} - facets_timed_out = [] - args = set(self.get_querystring_pairs()) - facet_size = self.ds.config("default_facet_size") - db = self.ds.databases[self.database] - all_foreign_keys = await db.get_all_foreign_keys() - if not all_foreign_keys.get(self.table): - return [], [] - # We care about three tables: self.table, middle_table and destination_table - incoming = all_foreign_keys[self.table]["incoming"] - for source_and_config in self.get_configs(): - config = source_and_config["config"] - source = source_and_config["source"] - # The destination_table is specified in the _facet_m2m=xxx parameter - destination_table = config.get("column") or config["simple"] - # Find middle table - it has fks to self.table AND destination_table - fks = None - middle_table = None - for fk in incoming: - other_table = fk["other_table"] - other_table_outgoing_foreign_keys = all_foreign_keys[other_table][ - "outgoing" - ] - if ( - any( - o - for o in other_table_outgoing_foreign_keys - if o["other_table"] == destination_table - ) - and len(other_table_outgoing_foreign_keys) == 2 - ): - fks = other_table_outgoing_foreign_keys - middle_table = other_table - break - if middle_table is None or fks is None: - return [], [] - # Now that we have determined the middle_table, we need to figure out the three - # columns on that table which are relevant to us. These are: - # column_to_table - the middle_table column with a foreign key to self.table - # table_pk - the primary key column on self.table that is referenced - # column_to_destination - the column with a foreign key to destination_table - # - # It turns out we don't actually need the fourth obvious column: - # destination_pk = the primary key column on destination_table which is referenced - # - # These are both in the fks array - which now contains 2 foreign key relationships, e.g: - # [ - # {'other_table': 'characteristic', 'column': 'characteristic_id', 'other_column': 'pk'}, - # {'other_table': 'attractions', 'column': 'attraction_id', 'other_column': 'pk'} - # ] - column_to_table = None - table_pk = None - column_to_destination = None - for fk in fks: - if fk["other_table"] == self.table: - table_pk = fk["other_column"] - column_to_table = fk["column"] - elif fk["other_table"] == destination_table: - column_to_destination = fk["column"] - assert all((column_to_table, table_pk, column_to_destination)) - facet_sql = """ - select - {middle_table}.{column_to_destination} as value, - count(distinct {middle_table}.{column_to_table}) as count - from {middle_table} - where {middle_table}.{column_to_table} in ( - select {table_pk} from ({sql}) - ) - group by {middle_table}.{column_to_destination} - order by count desc limit {limit} - """.format( - sql=self.sql, - limit=facet_size + 1, - middle_table=escape_sqlite(middle_table), - column_to_destination=escape_sqlite(column_to_destination), - column_to_table=escape_sqlite(column_to_table), - table_pk=escape_sqlite(table_pk), - ) - try: - facet_rows_results = await self.ds.execute( - self.database, - facet_sql, - self.params, - truncate=False, - custom_time_limit=self.ds.config("facet_time_limit_ms"), - ) - facet_results_values = [] - facet_results[destination_table] = { - "name": destination_table, - "type": self.type, - "results": facet_results_values, - "hideable": source != "metadata", - "toggle_url": path_with_removed_args( - self.request, {"_facet_m2m": destination_table} - ), - "truncated": len(facet_rows_results) > facet_size, - } - facet_rows = facet_rows_results.rows[:facet_size] - - # Attempt to expand foreign keys into labels - values = [row["value"] for row in facet_rows] - expanded = await self.ds.expand_foreign_keys( - self.database, middle_table, column_to_destination, values - ) - - for row in facet_rows: - through = json.dumps( - { - "table": middle_table, - "column": column_to_destination, - "value": str(row["value"]), - }, - separators=(",", ":"), - sort_keys=True, - ) - selected = ("_through", through) in args - if selected: - toggle_path = path_with_removed_args( - self.request, {"_through": through} - ) - else: - toggle_path = path_with_added_args( - self.request, {"_through": through} - ) - facet_results_values.append( - { - "value": row["value"], - "label": expanded.get( - (column_to_destination, row["value"]), row["value"] - ), - "count": row["count"], - "toggle_url": self.ds.absolute_url( - self.request, toggle_path - ), - "selected": selected, - } - ) - except QueryInterrupted: - facets_timed_out.append(destination_table) - - return facet_results, facets_timed_out diff --git a/datasette/filters.py b/datasette/filters.py index efe014ae..5897a3ed 100644 --- a/datasette/filters.py +++ b/datasette/filters.py @@ -77,6 +77,20 @@ class InFilter(Filter): return "{} in {}".format(column, json.dumps(self.split_value(value))) +class NotInFilter(InFilter): + key = "notin" + display = "not in" + + def where_clause(self, table, column, value, param_counter): + values = self.split_value(value) + params = [":p{}".format(param_counter + i) for i in range(len(values))] + sql = "{} not in ({})".format(escape_sqlite(column), ", ".join(params)) + return sql, values + + def human_clause(self, column, value): + return "{} not in {}".format(column, json.dumps(self.split_value(value))) + + class Filters: _filters = ( [ @@ -125,6 +139,7 @@ class Filters: TemplatedFilter("like", "like", '"{c}" like :{p}', '{c} like "{v}"'), TemplatedFilter("glob", "glob", '"{c}" glob :{p}', '{c} glob "{v}"'), InFilter(), + NotInFilter(), ] + ( [ diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 61523a31..3c6726b7 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -5,6 +5,11 @@ hookspec = HookspecMarker("datasette") hookimpl = HookimplMarker("datasette") +@hookspec +def asgi_wrapper(datasette): + "Returns an ASGI middleware callable to wrap our ASGI application with" + + @hookspec def prepare_connection(conn): "Modify SQLite connection in some way e.g. register custom SQL functions" @@ -30,6 +35,11 @@ def extra_body_script(template, database, table, view_name, datasette): "Extra JavaScript code to be included in diff --git a/datasette/templates/_codemirror_foot.html b/datasette/templates/_codemirror_foot.html index 4b55bf8d..4019d448 100644 --- a/datasette/templates/_codemirror_foot.html +++ b/datasette/templates/_codemirror_foot.html @@ -1,13 +1,37 @@ diff --git a/datasette/templates/_footer.html b/datasette/templates/_footer.html new file mode 100644 index 00000000..f930f445 --- /dev/null +++ b/datasette/templates/_footer.html @@ -0,0 +1,21 @@ +Powered by Datasette +{% if query_ms %}· Query took {{ query_ms|round(3) }}ms{% endif %} +{% if metadata %} + {% if metadata.license or metadata.license_url %}· Data license: + {% if metadata.license_url %} + {{ metadata.license or metadata.license_url }} + {% else %} + {{ metadata.license }} + {% endif %} + {% endif %} + {% if metadata.source or metadata.source_url %}· + Data source: {% if metadata.source_url %} + + {% endif %}{{ metadata.source or metadata.source_url }}{% if metadata.source_url %}{% endif %} + {% endif %} + {% if metadata.about or metadata.about_url %}· + About: {% if metadata.about_url %} + + {% endif %}{{ metadata.about or metadata.about_url }}{% if metadata.about_url %}{% endif %} + {% endif %} +{% endif %} diff --git a/datasette/templates/_rows_and_columns.html b/datasette/templates/_table.html similarity index 100% rename from datasette/templates/_rows_and_columns.html rename to datasette/templates/_table.html diff --git a/datasette/templates/base.html b/datasette/templates/base.html index 0ea41d7e..d26043f8 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -14,33 +14,15 @@ + + +
{% block content %} {% endblock %} - -
- Powered by Datasette - {% if query_ms %}· Query took {{ query_ms|round(3) }}ms{% endif %} - {% if metadata %} - {% if metadata.license or metadata.license_url %}· Data license: - {% if metadata.license_url %} - {{ metadata.license or metadata.license_url }} - {% else %} - {{ metadata.license }} - {% endif %} - {% endif %} - {% if metadata.source or metadata.source_url %}· - Data source: {% if metadata.source_url %} - - {% endif %}{{ metadata.source or metadata.source_url }}{% if metadata.source_url %}{% endif %} - {% endif %} - {% if metadata.about or metadata.about_url %}· - About: {% if metadata.about_url %} - - {% endif %}{{ metadata.about or metadata.about_url }}{% if metadata.about_url %}{% endif %} - {% endif %} - {% endif %}
+
{% block footer %}{% include "_footer.html" %}{% endblock %}
+ {% for body_script in body_scripts %} {% endfor %} diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 9fb4d6eb..a0d0fcf6 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -9,8 +9,14 @@ {% block body_class %}db db-{{ database|to_css_class }}{% endblock %} +{% block nav %} +

+ home +

+ {{ super() }} +{% endblock %} + {% block content %} -

{{ metadata.title or database }}

@@ -19,8 +25,11 @@ {% if config.allow_sql %}

Custom SQL query

-

-

+

+

+ + +

{% endif %} diff --git a/datasette/templates/index.html b/datasette/templates/index.html index c8ad4148..b394564a 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -21,7 +21,8 @@ {{ "{:,}".format(database.views_count) }} view{% if database.views_count != 1 %}s{% endif %} {% endif %}

-

{% for table in database.tables_and_views_truncated %}{{ table.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% if database.tables_and_views_more %}, ...{% endif %}

+

{% for table in database.tables_and_views_truncated %}{{ table.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% if database.tables_and_views_more %}, ...{% endif %}

{% endfor %} {% endblock %} diff --git a/datasette/templates/query.html b/datasette/templates/query.html index b4b759a5..34fa78a5 100644 --- a/datasette/templates/query.html +++ b/datasette/templates/query.html @@ -18,9 +18,15 @@ {% block body_class %}query db-{{ database|to_css_class }}{% endblock %} -{% block content %} - +{% block nav %} +

+ home / + {{ database }} +

+ {{ super() }} +{% endblock %} +{% block content %}

{{ metadata.title or database }}

{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} @@ -31,7 +37,7 @@ {% if editable and config.allow_sql %}

{% else %} -
{% if query %}{{ query.sql }}{% endif %}
+
{% if query %}{{ query.sql }}{% endif %}
{% endif %} {% else %} @@ -43,7 +49,10 @@

{% endfor %} {% endif %} -

+

+ + +

{% if display_rows %} diff --git a/datasette/templates/row.html b/datasette/templates/row.html index baffaf96..5703900d 100644 --- a/datasette/templates/row.html +++ b/datasette/templates/row.html @@ -15,16 +15,23 @@ {% block body_class %}row db-{{ database|to_css_class }} table-{{ table|to_css_class }}{% endblock %} -{% block content %} - +{% block nav %} +

+ home / + {{ database }} / + {{ table }} +

+ {{ super() }} +{% endblock %} +{% block content %}

{{ table }}: {{ ', '.join(primary_key_values) }}

{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %}

This data as {% for name, url in renderers.items() %}{{ name }}{{ ", " if not loop.last }}{% endfor %}

-{% include custom_rows_and_columns_templates %} +{% include custom_table_templates %} {% if foreign_key_tables %}

Links from other tables

diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 5ba3ff6d..1841300b 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -16,8 +16,15 @@ {% block body_class %}table db-{{ database|to_css_class }} table-{{ table|to_css_class }}{% endblock %} +{% block nav %} +

+ home / + {{ database }} +

+ {{ super() }} +{% endblock %} + {% block content %} -

{{ metadata.title or table }}{% if is_view %} (view){% endif %}

@@ -145,7 +152,7 @@
{% endif %} -{% include custom_rows_and_columns_templates %} +{% include custom_table_templates %} {% if next_url %}

Next page

@@ -177,11 +184,11 @@ {% endif %} {% if table_definition %} -
{{ table_definition }}
+
{{ table_definition }}
{% endif %} {% if view_definition %} -
{{ view_definition }}
+
{{ view_definition }}
{% endif %} {% endblock %} diff --git a/datasette/tracer.py b/datasette/tracer.py index c6fe0a00..e46a6fda 100644 --- a/datasette/tracer.py +++ b/datasette/tracer.py @@ -1,6 +1,7 @@ import asyncio from contextlib import contextmanager import time +import json import traceback tracers = {} @@ -32,15 +33,15 @@ def trace(type, **kwargs): start = time.time() yield end = time.time() - trace = { + trace_info = { "type": type, "start": start, "end": end, "duration_ms": (end - start) * 1000, "traceback": traceback.format_list(traceback.extract_stack(limit=6)[:-3]), } - trace.update(kwargs) - tracer.append(trace) + trace_info.update(kwargs) + tracer.append(trace_info) @contextmanager @@ -53,3 +54,77 @@ def capture_traces(tracer): tracers[task_id] = tracer yield del tracers[task_id] + + +class AsgiTracer: + # If the body is larger than this we don't attempt to append the trace + max_body_bytes = 1024 * 256 # 256 KB + + def __init__(self, app): + self.app = app + + async def __call__(self, scope, receive, send): + if b"_trace=1" not in scope.get("query_string", b"").split(b"&"): + await self.app(scope, receive, send) + return + trace_start = time.time() + traces = [] + + accumulated_body = b"" + size_limit_exceeded = False + response_headers = [] + + async def wrapped_send(message): + nonlocal accumulated_body, size_limit_exceeded, response_headers + if message["type"] == "http.response.start": + response_headers = message["headers"] + await send(message) + return + + if message["type"] != "http.response.body" or size_limit_exceeded: + await send(message) + return + + # Accumulate body until the end or until size is exceeded + accumulated_body += message["body"] + if len(accumulated_body) > self.max_body_bytes: + await send( + { + "type": "http.response.body", + "body": accumulated_body, + "more_body": True, + } + ) + size_limit_exceeded = True + return + + if not message.get("more_body"): + # We have all the body - modify it and send the result + # TODO: What to do about Content-Type or other cases? + trace_info = { + "request_duration_ms": 1000 * (time.time() - trace_start), + "sum_trace_duration_ms": sum(t["duration_ms"] for t in traces), + "num_traces": len(traces), + "traces": traces, + } + try: + content_type = [ + v.decode("utf8") + for k, v in response_headers + if k.lower() == b"content-type" + ][0] + except IndexError: + content_type = "" + if "text/html" in content_type and b"" in accumulated_body: + extra = json.dumps(trace_info, indent=2) + extra_html = "
{}
".format(extra).encode("utf8") + accumulated_body = accumulated_body.replace(b"", extra_html) + elif "json" in content_type and accumulated_body.startswith(b"{"): + data = json.loads(accumulated_body.decode("utf8")) + if "_trace" not in data: + data["_trace"] = trace_info + accumulated_body = json.dumps(data).encode("utf8") + await send({"type": "http.response.body", "body": accumulated_body}) + + with capture_traces(traces): + await self.app(scope, receive, wrapped_send) diff --git a/datasette/utils.py b/datasette/utils/__init__.py similarity index 95% rename from datasette/utils.py rename to datasette/utils/__init__.py index 56fe2996..3d28a36b 100644 --- a/datasette/utils.py +++ b/datasette/utils/__init__.py @@ -3,7 +3,6 @@ from collections import OrderedDict import base64 import click import hashlib -import imp import json import os import pkg_resources @@ -11,6 +10,7 @@ import re import shlex import tempfile import time +import types import shutil import urllib import numbers @@ -167,6 +167,8 @@ allowed_sql_res = [ re.compile(r"^explain select\b"), re.compile(r"^explain query plan select\b"), re.compile(r"^with\b"), + re.compile(r"^explain with\b"), + re.compile(r"^explain query plan with\b"), ] disallawed_sql_res = [(re.compile("pragma"), "Statement may not contain PRAGMA")] @@ -261,27 +263,6 @@ def escape_sqlite(s): return "[{}]".format(s) -_decode_path_component_re = re.compile(r"U\+([\da-h]{4})", re.IGNORECASE) -_encode_path_component_re = re.compile( - "[{}]".format( - "".join( - re.escape(c) - for c in (";", "/", "?", ":", "@", "&", "=", "+", "$", ",", "~") - ) - ) -) - - -def decode_path_component(table_name): - return _decode_path_component_re.sub(lambda m: chr(int(m.group(1), 16)), table_name) - - -def encode_path_component(table_name): - return _encode_path_component_re.sub( - lambda m: "U+{0:0{1}x}".format(ord(m.group(0)), 4).upper(), table_name - ) - - def make_dockerfile( files, metadata_file, @@ -293,6 +274,7 @@ def make_dockerfile( install, spatialite, version_note, + environment_variables=None, ): cmd = ["datasette", "serve", "--host", "0.0.0.0"] for filename in files: @@ -328,11 +310,18 @@ FROM python:3.6 COPY . /app WORKDIR /app {spatialite_extras} +{environment_variables} RUN pip install -U {install_from} RUN datasette inspect {files} --inspect-file inspect-data.json ENV PORT 8001 EXPOSE 8001 CMD {cmd}""".format( + environment_variables="\n".join( + [ + "ENV {} '{}'".format(key, value) + for key, value in (environment_variables or {}).items() + ] + ), files=" ".join(files), cmd=cmd, install_from=" ".join(install), @@ -354,6 +343,7 @@ def temporary_docker_directory( spatialite, version_note, extra_metadata=None, + environment_variables=None, ): extra_metadata = extra_metadata or {} tmp = tempfile.TemporaryDirectory() @@ -382,6 +372,7 @@ def temporary_docker_directory( install, spatialite, version_note, + environment_variables, ) os.chdir(datasette_dir) if metadata_content: @@ -480,6 +471,7 @@ def detect_fts_sql(table): where rootpage = 0 and ( sql like '%VIRTUAL TABLE%USING FTS%content="{table}"%' + or sql like '%VIRTUAL TABLE%USING FTS%content=[{table}]%' or ( tbl_name = "{table}" and sql like '%VIRTUAL TABLE%USING FTS%' @@ -609,7 +601,7 @@ def link_or_copy_directory(src, dst): def module_from_path(path, name): # Adapted from http://sayspy.blogspot.com/2011/07/how-to-import-module-from-just-file.html - mod = imp.new_module(name) + mod = types.ModuleType(name) mod.__file__ = path with open(path, "r") as file: code = compile(file.read(), path, "exec", dont_inherit=True) @@ -641,6 +633,7 @@ def get_plugins(pm): distinfo = plugin_to_distinfo.get(plugin) if distinfo: plugin_info["version"] = distinfo.version + plugin_info["name"] = distinfo.project_name plugins.append(plugin_info) return plugins @@ -718,13 +711,13 @@ class LimitedWriter: self.limit_bytes = limit_mb * 1024 * 1024 self.bytes_count = 0 - def write(self, bytes): + async def write(self, bytes): self.bytes_count += len(bytes) if self.limit_bytes and (self.bytes_count > self.limit_bytes): raise WriteLimitExceeded( "CSV contains more than {} bytes".format(self.limit_bytes) ) - self.writer.write(bytes) + await self.writer.write(bytes) _infinities = {float("inf"), float("-inf")} @@ -746,7 +739,8 @@ class StaticMount(click.ParamType): param, ctx, ) - path, dirpath = value.split(":") + path, dirpath = value.split(":", 1) + dirpath = os.path.abspath(dirpath) if not os.path.exists(dirpath) or not os.path.isdir(dirpath): self.fail("%s is not a valid directory path" % value, param, ctx) return path, dirpath @@ -762,3 +756,16 @@ def format_bytes(bytes): return "{} {}".format(int(current), unit) else: return "{:.1f} {}".format(current, unit) + + +class RequestParameters(dict): + def get(self, name, default=None): + "Return first value in the list, if available" + try: + return super().get(name)[0] + except (KeyError, TypeError): + return default + + def getlist(self, name, default=None): + "Return full list" + return super().get(name, default) diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py new file mode 100644 index 00000000..bafcfb4a --- /dev/null +++ b/datasette/utils/asgi.py @@ -0,0 +1,384 @@ +import json +from datasette.utils import RequestParameters +from mimetypes import guess_type +from urllib.parse import parse_qs, urlunparse +from pathlib import Path +from html import escape +import re +import aiofiles + + +class NotFound(Exception): + pass + + +class Request: + def __init__(self, scope): + self.scope = scope + + @property + def method(self): + return self.scope["method"] + + @property + def url(self): + return urlunparse( + (self.scheme, self.host, self.path, None, self.query_string, None) + ) + + @property + def scheme(self): + return self.scope.get("scheme") or "http" + + @property + def headers(self): + return dict( + [ + (k.decode("latin-1").lower(), v.decode("latin-1")) + for k, v in self.scope.get("headers") or [] + ] + ) + + @property + def host(self): + return self.headers.get("host") or "localhost" + + @property + def path(self): + if "raw_path" in self.scope: + return self.scope["raw_path"].decode("latin-1") + else: + return self.scope["path"].decode("utf-8") + + @property + def query_string(self): + return (self.scope.get("query_string") or b"").decode("latin-1") + + @property + def args(self): + return RequestParameters(parse_qs(qs=self.query_string)) + + @property + def raw_args(self): + return {key: value[0] for key, value in self.args.items()} + + @classmethod + def fake(cls, path_with_query_string, method="GET", scheme="http"): + "Useful for constructing Request objects for tests" + path, _, query_string = path_with_query_string.partition("?") + scope = { + "http_version": "1.1", + "method": method, + "path": path, + "raw_path": path.encode("latin-1"), + "query_string": query_string.encode("latin-1"), + "scheme": scheme, + "type": "http", + } + return cls(scope) + + +class AsgiRouter: + def __init__(self, routes=None): + routes = routes or [] + self.routes = [ + # Compile any strings to regular expressions + ((re.compile(pattern) if isinstance(pattern, str) else pattern), view) + for pattern, view in routes + ] + + async def __call__(self, scope, receive, send): + # Because we care about "foo/bar" v.s. "foo%2Fbar" we decode raw_path ourselves + path = scope["path"] + raw_path = scope.get("raw_path") + if raw_path: + path = raw_path.decode("ascii") + for regex, view in self.routes: + match = regex.match(path) + if match is not None: + new_scope = dict(scope, url_route={"kwargs": match.groupdict()}) + try: + return await view(new_scope, receive, send) + except Exception as exception: + return await self.handle_500(scope, receive, send, exception) + return await self.handle_404(scope, receive, send) + + async def handle_404(self, scope, receive, send): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + await send({"type": "http.response.body", "body": b"

404

"}) + + async def handle_500(self, scope, receive, send, exception): + await send( + { + "type": "http.response.start", + "status": 404, + "headers": [[b"content-type", b"text/html"]], + } + ) + html = "

500

".format(escape(repr(exception))) + await send({"type": "http.response.body", "body": html.encode("latin-1")}) + + +class AsgiLifespan: + def __init__(self, app, on_startup=None, on_shutdown=None): + self.app = app + on_startup = on_startup or [] + on_shutdown = on_shutdown or [] + if not isinstance(on_startup or [], list): + on_startup = [on_startup] + if not isinstance(on_shutdown or [], list): + on_shutdown = [on_shutdown] + self.on_startup = on_startup + self.on_shutdown = on_shutdown + + async def __call__(self, scope, receive, send): + if scope["type"] == "lifespan": + while True: + message = await receive() + if message["type"] == "lifespan.startup": + for fn in self.on_startup: + await fn() + await send({"type": "lifespan.startup.complete"}) + elif message["type"] == "lifespan.shutdown": + for fn in self.on_shutdown: + await fn() + await send({"type": "lifespan.shutdown.complete"}) + return + else: + await self.app(scope, receive, send) + + +class AsgiView: + def dispatch_request(self, request, *args, **kwargs): + handler = getattr(self, request.method.lower(), None) + return handler(request, *args, **kwargs) + + @classmethod + def as_asgi(cls, *class_args, **class_kwargs): + async def view(scope, receive, send): + # Uses scope to create a request object, then dispatches that to + # self.get(...) or self.options(...) along with keyword arguments + # that were already tucked into scope["url_route"]["kwargs"] by + # the router, similar to how Django Channels works: + # https://channels.readthedocs.io/en/latest/topics/routing.html#urlrouter + request = Request(scope) + self = view.view_class(*class_args, **class_kwargs) + response = await self.dispatch_request( + request, **scope["url_route"]["kwargs"] + ) + await response.asgi_send(send) + + view.view_class = cls + view.__doc__ = cls.__doc__ + view.__module__ = cls.__module__ + view.__name__ = cls.__name__ + return view + + +class AsgiStream: + def __init__(self, stream_fn, status=200, headers=None, content_type="text/plain"): + self.stream_fn = stream_fn + self.status = status + self.headers = headers or {} + self.content_type = content_type + + async def asgi_send(self, send): + # Remove any existing content-type header + headers = dict( + [(k, v) for k, v in self.headers.items() if k.lower() != "content-type"] + ) + headers["content-type"] = self.content_type + await send( + { + "type": "http.response.start", + "status": self.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in headers.items() + ], + } + ) + w = AsgiWriter(send) + await self.stream_fn(w) + await send({"type": "http.response.body", "body": b""}) + + +class AsgiWriter: + def __init__(self, send): + self.send = send + + async def write(self, chunk): + await self.send( + { + "type": "http.response.body", + "body": chunk.encode("utf-8"), + "more_body": True, + } + ) + + +async def asgi_send_json(send, info, status=200, headers=None): + headers = headers or {} + await asgi_send( + send, + json.dumps(info), + status=status, + headers=headers, + content_type="application/json; charset=utf-8", + ) + + +async def asgi_send_html(send, html, status=200, headers=None): + headers = headers or {} + await asgi_send( + send, html, status=status, headers=headers, content_type="text/html" + ) + + +async def asgi_send_redirect(send, location, status=302): + await asgi_send( + send, + "", + status=status, + headers={"Location": location}, + content_type="text/html", + ) + + +async def asgi_send(send, content, status, headers=None, content_type="text/plain"): + await asgi_start(send, status, headers, content_type) + await send({"type": "http.response.body", "body": content.encode("latin-1")}) + + +async def asgi_start(send, status, headers=None, content_type="text/plain"): + headers = headers or {} + # Remove any existing content-type header + headers = dict([(k, v) for k, v in headers.items() if k.lower() != "content-type"]) + headers["content-type"] = content_type + await send( + { + "type": "http.response.start", + "status": status, + "headers": [ + [key.encode("latin1"), value.encode("latin1")] + for key, value in headers.items() + ], + } + ) + + +async def asgi_send_file( + send, filepath, filename=None, content_type=None, chunk_size=4096 +): + headers = {} + if filename: + headers["Content-Disposition"] = 'attachment; filename="{}"'.format(filename) + first = True + async with aiofiles.open(str(filepath), mode="rb") as fp: + if first: + await asgi_start( + send, + 200, + headers, + content_type or guess_type(str(filepath))[0] or "text/plain", + ) + first = False + more_body = True + while more_body: + chunk = await fp.read(chunk_size) + more_body = len(chunk) == chunk_size + await send( + {"type": "http.response.body", "body": chunk, "more_body": more_body} + ) + + +def asgi_static(root_path, chunk_size=4096, headers=None, content_type=None): + async def inner_static(scope, receive, send): + path = scope["url_route"]["kwargs"]["path"] + try: + full_path = (Path(root_path) / path).resolve().absolute() + except FileNotFoundError: + await asgi_send_html(send, "404", 404) + return + # Ensure full_path is within root_path to avoid weird "../" tricks + try: + full_path.relative_to(root_path) + except ValueError: + await asgi_send_html(send, "404", 404) + return + try: + await asgi_send_file(send, full_path, chunk_size=chunk_size) + except FileNotFoundError: + await asgi_send_html(send, "404", 404) + return + + return inner_static + + +class Response: + def __init__(self, body=None, status=200, headers=None, content_type="text/plain"): + self.body = body + self.status = status + self.headers = headers or {} + self.content_type = content_type + + async def asgi_send(self, send): + headers = {} + headers.update(self.headers) + headers["content-type"] = self.content_type + await send( + { + "type": "http.response.start", + "status": self.status, + "headers": [ + [key.encode("utf-8"), value.encode("utf-8")] + for key, value in headers.items() + ], + } + ) + body = self.body + if not isinstance(body, bytes): + body = body.encode("utf-8") + await send({"type": "http.response.body", "body": body}) + + @classmethod + def html(cls, body, status=200, headers=None): + return cls( + body, + status=status, + headers=headers, + content_type="text/html; charset=utf-8", + ) + + @classmethod + def text(cls, body, status=200, headers=None): + return cls( + body, + status=status, + headers=headers, + content_type="text/plain; charset=utf-8", + ) + + @classmethod + def redirect(cls, path, status=302, headers=None): + headers = headers or {} + headers["Location"] = path + return cls("", status=status, headers=headers) + + +class AsgiFileDownload: + def __init__( + self, filepath, filename=None, content_type="application/octet-stream" + ): + self.filepath = filepath + self.filename = filename + self.content_type = content_type + + async def asgi_send(self, send): + return await asgi_send_file(send, self.filepath, content_type=self.content_type) diff --git a/datasette/views/base.py b/datasette/views/base.py index b8863ff3..062c6956 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -7,9 +7,8 @@ import urllib import jinja2 import pint -from sanic import response -from sanic.exceptions import NotFound -from sanic.views import HTTPMethodView + +from html import escape from datasette import __version__ from datasette.plugins import pm @@ -26,6 +25,14 @@ from datasette.utils import ( sqlite3, to_css_class, ) +from datasette.utils.asgi import ( + AsgiStream, + AsgiWriter, + AsgiRouter, + AsgiView, + NotFound, + Response, +) ureg = pint.UnitRegistry() @@ -49,7 +56,14 @@ class DatasetteError(Exception): self.messagge_is_html = messagge_is_html -class RenderMixin(HTTPMethodView): +class BaseView(AsgiView): + ds = None + + async def head(self, *args, **kwargs): + response = await self.get(*args, **kwargs) + response.body = b"" + return response + def _asset_urls(self, key, template, context): # Flatten list-of-lists from plugins: seen_urls = set() @@ -88,7 +102,7 @@ class RenderMixin(HTTPMethodView): def database_color(self, database): return "ff0000" - def render(self, templates, **context): + async def render(self, templates, request, context): template = self.ds.jinja_env.select_template(templates) select_templates = [ "{}{}".format("*" if template_name == template.name else "", template_name) @@ -104,7 +118,27 @@ class RenderMixin(HTTPMethodView): datasette=self.ds, ): body_scripts.append(jinja2.Markup(script)) - return response.html( + + extra_template_vars = {} + # pylint: disable=no-member + for extra_vars in pm.hook.extra_template_vars( + template=template.name, + database=context.get("database"), + table=context.get("table"), + view_name=self.name, + request=request, + datasette=self.ds, + ): + if callable(extra_vars): + extra_vars = extra_vars() + if asyncio.iscoroutine(extra_vars): + extra_vars = await extra_vars + assert isinstance(extra_vars, dict), "extra_vars is of type {}".format( + type(extra_vars) + ) + extra_template_vars.update(extra_vars) + + return Response.html( template.render( { **context, @@ -123,12 +157,13 @@ class RenderMixin(HTTPMethodView): "database_url": self.database_url, "database_color": self.database_color, }, + **extra_template_vars, } ) ) -class BaseView(RenderMixin): +class DataView(BaseView): name = "" re_named_parameter = re.compile(":([a-zA-Z0-9_]+)") @@ -136,7 +171,7 @@ class BaseView(RenderMixin): self.ds = datasette def options(self, request, *args, **kwargs): - r = response.text("ok") + r = Response.text("ok") if self.ds.cors: r.headers["Access-Control-Allow-Origin"] = "*" return r @@ -146,7 +181,7 @@ class BaseView(RenderMixin): path = "{}?{}".format(path, request.query_string) if remove_args: path = path_with_removed_args(request, remove_args, path=path) - r = response.redirect(path) + r = Response.redirect(path) r.headers["Link"] = "<{}>; rel=preload".format(path) if self.ds.cors: r.headers["Access-Control-Allow-Origin"] = "*" @@ -158,22 +193,23 @@ class BaseView(RenderMixin): async def resolve_db_name(self, request, db_name, **kwargs): hash = None name = None - if "-" in db_name: - # Might be name-and-hash, or might just be - # a name with a hyphen in it - name, hash = db_name.rsplit("-", 1) - if name not in self.ds.databases: - # Try the whole name - name = db_name - hash = None + if db_name not in self.ds.databases and "-" in db_name: + # No matching DB found, maybe it's a name-hash? + name_bit, hash_bit = db_name.rsplit("-", 1) + if name_bit not in self.ds.databases: + raise NotFound("Database not found: {}".format(name)) + else: + name = name_bit + hash = hash_bit else: name = db_name - # Verify the hash + name = urllib.parse.unquote_plus(name) try: db = self.ds.databases[name] except KeyError: raise NotFound("Database not found: {}".format(name)) + # Verify the hash expected = "000" if db.hash is not None: expected = db.hash[:HASH_LENGTH] @@ -195,17 +231,17 @@ class BaseView(RenderMixin): kwargs["table"] = table if _format: kwargs["as_format"] = ".{}".format(_format) - elif "table" in kwargs: + elif kwargs.get("table"): kwargs["table"] = urllib.parse.unquote_plus(kwargs["table"]) should_redirect = "/{}-{}".format(name, expected) - if "table" in kwargs: + if kwargs.get("table"): should_redirect += "/" + urllib.parse.quote_plus(kwargs["table"]) - if "pk_path" in kwargs: + if kwargs.get("pk_path"): should_redirect += "/" + kwargs["pk_path"] - if "as_format" in kwargs: + if kwargs.get("as_format"): should_redirect += kwargs["as_format"] - if "as_db" in kwargs: + if kwargs.get("as_db"): should_redirect += kwargs["as_db"] if ( @@ -222,9 +258,12 @@ class BaseView(RenderMixin): assert NotImplemented async def get(self, request, db_name, **kwargs): - database, hash, correct_hash_provided, should_redirect = await self.resolve_db_name( - request, db_name, **kwargs - ) + ( + database, + hash, + correct_hash_provided, + should_redirect, + ) = await self.resolve_db_name(request, db_name, **kwargs) if should_redirect: return self.redirect(request, should_redirect, remove_args={"_hash"}) @@ -246,7 +285,7 @@ class BaseView(RenderMixin): response_or_template_contexts = await self.data( request, database, hash, **kwargs ) - if isinstance(response_or_template_contexts, response.HTTPResponse): + if isinstance(response_or_template_contexts, Response): return response_or_template_contexts else: data, _, _ = response_or_template_contexts @@ -282,26 +321,30 @@ class BaseView(RenderMixin): if not first: data, _, _ = await self.data(request, database, hash, **kwargs) if first: - writer.writerow(headings) + await writer.writerow(headings) first = False next = data.get("next") for row in data["rows"]: if not expanded_columns: # Simple path - writer.writerow(row) + await writer.writerow(row) else: # Look for {"value": "label": } dicts and expand new_row = [] - for cell in row: - if isinstance(cell, dict): - new_row.append(cell["value"]) - new_row.append(cell["label"]) + for heading, cell in zip(data["columns"], row): + if heading in expanded_columns: + if cell is None: + new_row.extend(("", "")) + else: + assert isinstance(cell, dict) + new_row.append(cell["value"]) + new_row.append(cell["label"]) else: new_row.append(cell) - writer.writerow(new_row) + await writer.writerow(new_row) except Exception as e: print("caught this", e) - r.write(str(e)) + await r.write(str(e)) return content_type = "text/plain; charset=utf-8" @@ -315,7 +358,7 @@ class BaseView(RenderMixin): ) headers["Content-Disposition"] = disposition - return response.stream(stream_fn, headers=headers, content_type=content_type) + return AsgiStream(stream_fn, headers=headers, content_type=content_type) async def get_format(self, request, database, args): """ Determine the format of the response from the request, from URL @@ -327,6 +370,8 @@ class BaseView(RenderMixin): _format = request.args.get("_format", None) if not _format: _format = (args.pop("as_format", None) or "").lstrip(".") + else: + args.pop("as_format", None) if "table_and_format" in args: db = self.ds.databases[database] @@ -352,7 +397,7 @@ class BaseView(RenderMixin): return await self.as_csv(request, database, hash, **kwargs) if _format is None: - # HTML views default to expanding all foriegn key labels + # HTML views default to expanding all foreign key labels kwargs["default_labels"] = True extra_template_data = {} @@ -363,7 +408,7 @@ class BaseView(RenderMixin): response_or_template_contexts = await self.data( request, database, hash, **kwargs ) - if isinstance(response_or_template_contexts, response.HTTPResponse): + if isinstance(response_or_template_contexts, Response): return response_or_template_contexts else: @@ -414,17 +459,11 @@ class BaseView(RenderMixin): if result is None: raise NotFound("No data") - response_args = { - "content_type": result.get("content_type", "text/plain"), - "status": result.get("status_code", 200), - } - - if type(result.get("body")) == bytes: - response_args["body_bytes"] = result.get("body") - else: - response_args["body"] = result.get("body") - - r = response.HTTPResponse(**response_args) + r = Response( + body=result.get("body"), + status=result.get("status_code", 200), + content_type=result.get("content_type", "text/plain"), + ) else: extras = {} if callable(extra_template_data): @@ -463,7 +502,7 @@ class BaseView(RenderMixin): } if "metadata" not in context: context["metadata"] = self.ds.metadata - r = self.render(templates, **context) + r = await self.render(templates, request=request, context=context) r.status = status_code ttl = request.args.get("_ttl", None) diff --git a/datasette/views/database.py b/datasette/views/database.py index 859a271f..31d6af59 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1,22 +1,26 @@ import os -from sanic import response - from datasette.utils import to_css_class, validate_sql_select +from datasette.utils.asgi import AsgiFileDownload -from .base import BaseView, DatasetteError +from .base import DatasetteError, DataView -class DatabaseView(BaseView): +class DatabaseView(DataView): name = "database" async def data(self, request, database, hash, default_labels=False, _size=None): + metadata = (self.ds.metadata("databases") or {}).get(database, {}) + self.ds.update_with_inherited_metadata(metadata) + if request.args.get("sql"): if not self.ds.config("allow_sql"): raise DatasetteError("sql= is not allowed", status=400) sql = request.raw_args.pop("sql") validate_sql_select(sql) - return await self.custom_sql(request, database, hash, sql, _size=_size) + return await self.custom_sql( + request, database, hash, sql, _size=_size, metadata=metadata + ) db = self.ds.databases[database] @@ -25,9 +29,6 @@ class DatabaseView(BaseView): hidden_table_names = set(await db.hidden_table_names()) all_foreign_keys = await db.get_all_foreign_keys() - metadata = (self.ds.metadata("databases") or {}).get(database, {}) - self.ds.update_with_inherited_metadata(metadata) - tables = [] for table in table_counts: table_columns = await db.table_columns(table) @@ -65,7 +66,7 @@ class DatabaseView(BaseView): ) -class DatabaseDownload(BaseView): +class DatabaseDownload(DataView): name = "database_download" async def view_get(self, request, database, hash, correct_hash_present, **kwargs): @@ -79,8 +80,8 @@ class DatabaseDownload(BaseView): if not db.path: raise DatasetteError("Cannot download database", status=404) filepath = db.path - return await response.file_stream( + return AsgiFileDownload( filepath, filename=os.path.basename(filepath), - mime_type="application/octet-stream", + content_type="application/octet-stream", ) diff --git a/datasette/views/index.py b/datasette/views/index.py index 30c77b41..f2e5f774 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -1,12 +1,11 @@ import hashlib import json -from sanic import response - from datasette.utils import CustomJSONEncoder +from datasette.utils.asgi import Response from datasette.version import __version__ -from .base import RenderMixin +from .base import BaseView # Truncate table list on homepage at: @@ -16,7 +15,7 @@ TRUNCATE_AT = 5 COUNT_TABLE_LIMIT = 30 -class IndexView(RenderMixin): +class IndexView(BaseView): name = "index" def __init__(self, datasette): @@ -98,21 +97,22 @@ class IndexView(RenderMixin): } ) - databases.sort(key=lambda database: database["name"]) - if as_format: headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" - return response.HTTPResponse( + return Response( json.dumps({db["name"]: db for db in databases}, cls=CustomJSONEncoder), - content_type="application/json", + content_type="application/json; charset=utf-8", headers=headers, ) else: - return self.render( + return await self.render( ["index.html"], - databases=databases, - metadata=self.ds.metadata(), - datasette_version=__version__, + request=request, + context={ + "databases": databases, + "metadata": self.ds.metadata(), + "datasette_version": __version__, + }, ) diff --git a/datasette/views/special.py b/datasette/views/special.py index b93a330b..45e948f6 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -1,9 +1,9 @@ import json -from sanic import response -from .base import RenderMixin +from datasette.utils.asgi import Response +from .base import BaseView -class JsonDataView(RenderMixin): +class JsonDataView(BaseView): name = "json_data" def __init__(self, datasette, filename, data_callback): @@ -17,9 +17,15 @@ class JsonDataView(RenderMixin): headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" - return response.HTTPResponse( - json.dumps(data), content_type="application/json", headers=headers + return Response( + json.dumps(data), + content_type="application/json; charset=utf-8", + headers=headers, ) else: - return self.render(["show_json.html"], filename=self.filename, data=data) + return await self.render( + ["show_json.html"], + request=request, + context={"filename": self.filename, "data": data}, + ) diff --git a/datasette/views/table.py b/datasette/views/table.py index 04100dc6..a60a3941 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -3,13 +3,12 @@ import itertools import json import jinja2 -from sanic.exceptions import NotFound -from sanic.request import RequestParameters from datasette.plugins import pm from datasette.utils import ( CustomRow, QueryInterrupted, + RequestParameters, append_querystring, compound_keys_after_sql, escape_sqlite, @@ -24,8 +23,9 @@ from datasette.utils import ( urlsafe_components, value_as_boolean, ) +from datasette.utils.asgi import NotFound from datasette.filters import Filters -from .base import BaseView, DatasetteError, ureg +from .base import DataView, DatasetteError, ureg LINK_WITH_LABEL = ( '{label} {id}' @@ -33,7 +33,36 @@ LINK_WITH_LABEL = ( LINK_WITH_VALUE = '{id}' -class RowTableShared(BaseView): +class Row: + def __init__(self, cells): + self.cells = cells + + def __iter__(self): + return iter(self.cells) + + def __getitem__(self, key): + for cell in self.cells: + if cell["column"] == key: + return cell["raw"] + raise KeyError + + def display(self, key): + for cell in self.cells: + if cell["column"] == key: + return cell["value"] + return None + + def __str__(self): + d = { + key: self[key] + for key in [ + c["column"] for c in self.cells if not c.get("is_special_link_column") + ] + } + return json.dumps(d, default=repr, indent=2) + + +class RowTableShared(DataView): async def sortable_columns_for_table(self, database, table, use_rowid): db = self.ds.databases[database] table_metadata = self.ds.table_metadata(database, table) @@ -76,18 +105,18 @@ class RowTableShared(BaseView): # Unless we are a view, the first column is a link - either to the rowid # or to the simple or compound primary key if link_column: + is_special_link_column = len(pks) != 1 + pk_path = path_from_row_pks(row, pks, not pks, False) cells.append( { "column": pks[0] if len(pks) == 1 else "Link", + "is_special_link_column": is_special_link_column, + "raw": pk_path, "value": jinja2.Markup( '{flat_pks}'.format( database=database, table=urllib.parse.quote_plus(table), - flat_pks=str( - jinja2.escape( - path_from_row_pks(row, pks, not pks, False) - ) - ), + flat_pks=str(jinja2.escape(pk_path)), flat_pks_quoted=path_from_row_pks(row, pks, not pks), ) ), @@ -159,8 +188,8 @@ class RowTableShared(BaseView): if truncate_cells and len(display_value) > truncate_cells: display_value = display_value[:truncate_cells] + u"\u2026" - cells.append({"column": column, "value": display_value}) - cell_rows.append(cells) + cells.append({"column": column, "value": display_value, "raw": value}) + cell_rows.append(Row(cells)) if link_column: # Add the link column header. @@ -206,21 +235,24 @@ class TableView(RowTableShared): raise NotFound("Table not found: {}".format(table)) pks = await db.primary_keys(table) + table_columns = await db.table_columns(table) + + select_columns = ", ".join(escape_sqlite(t) for t in table_columns) + use_rowid = not pks and not is_view if use_rowid: - select = "rowid, *" + select = "rowid, {}".format(select_columns) order_by = "rowid" order_by_pks = "rowid" else: - select = "*" + select = select_columns order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) order_by = order_by_pks if is_view: order_by = "" - # We roll our own query_string decoder because by default Sanic - # drops anything with an empty value e.g. ?name__exact= + # Ensure we don't drop anything with an empty value e.g. ?name__exact= args = RequestParameters( urllib.parse.parse_qs(request.query_string, keep_blank_values=True) ) @@ -229,12 +261,10 @@ class TableView(RowTableShared): # That's so if there is a column that starts with _ # it can still be queried using ?_col__exact=blah special_args = {} - special_args_lists = {} other_args = [] for key, value in args.items(): if key.startswith("_") and "__" not in key: special_args[key] = value[0] - special_args_lists[key] = value else: for v in value: other_args.append((key, v)) @@ -467,18 +497,6 @@ class TableView(RowTableShared): if order_by: order_by = "order by {} ".format(order_by) - # _group_count=col1&_group_count=col2 - group_count = special_args_lists.get("_group_count") or [] - if group_count: - sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format( - group_cols=", ".join( - '"{}"'.format(group_count_col) for group_count_col in group_count - ), - table_name=escape_sqlite(table), - where=where_clause, - ) - return await self.custom_sql(request, database, hash, sql, editable=True) - extra_args = {} # Handle ?_size=500 page_size = _size or request.raw_args.get("_size") @@ -558,9 +576,10 @@ class TableView(RowTableShared): ) for facet in facet_instances: - instance_facet_results, instance_facets_timed_out = ( - await facet.facet_results() - ) + ( + instance_facet_results, + instance_facets_timed_out, + ) = await facet.facet_results() facet_results.update(instance_facet_results) facets_timed_out.extend(instance_facets_timed_out) @@ -608,7 +627,7 @@ class TableView(RowTableShared): new_row = CustomRow(columns) for column in row.keys(): value = row[column] - if (column, value) in expanded_labels: + if (column, value) in expanded_labels and value is not None: new_row[column] = { "value": value, "label": expanded_labels[(column, value)], @@ -692,6 +711,9 @@ class TableView(RowTableShared): for arg in ("_fts_table", "_fts_pk"): if arg in special_args: form_hidden_args.append((arg, special_args[arg])) + if request.args.get("_where"): + for where_text in request.args["_where"]: + form_hidden_args.append(("_where", where_text)) return { "supports_search": bool(fts_table), "search": search or "", @@ -716,14 +738,14 @@ class TableView(RowTableShared): "sort": sort, "sort_desc": sort_desc, "disable_sort": is_view, - "custom_rows_and_columns_templates": [ - "_rows_and_columns-{}-{}.html".format( + "custom_table_templates": [ + "_table-{}-{}.html".format( to_css_class(database), to_css_class(table) ), - "_rows_and_columns-table-{}-{}.html".format( + "_table-table-{}-{}.html".format( to_css_class(database), to_css_class(table) ), - "_rows_and_columns.html", + "_table.html", ], "metadata": metadata, "view_definition": await db.get_view_definition(table), @@ -800,14 +822,14 @@ class RowView(RowTableShared): ), "display_columns": display_columns, "display_rows": display_rows, - "custom_rows_and_columns_templates": [ - "_rows_and_columns-{}-{}.html".format( + "custom_table_templates": [ + "_table-{}-{}.html".format( to_css_class(database), to_css_class(table) ), - "_rows_and_columns-row-{}-{}.html".format( + "_table-row-{}-{}.html".format( to_css_class(database), to_css_class(table) ), - "_rows_and_columns.html", + "_table.html", ], "metadata": (self.ds.metadata("databases") or {}) .get(database, {}) diff --git a/docs/changelog.rst b/docs/changelog.rst index b4be3f2d..f4761efe 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,159 @@ Changelog ========= +.. _v0_30_2: + +0.30.2 (2019-11-02) +------------------- + +- ``/-/plugins`` page now uses distribution name e.g. ``datasette-cluster-map`` instead of the name of the underlying Python package (``datasette_cluster_map``) (`#606 `__) +- Array faceting is now only suggested for columns that contain arrays of strings (`#562 `__) +- Better documentation for the ``--host`` argument (`#574 `__) +- Don't show ``None`` with a broken link for the label on a nullable foreign key (`#406 `__) + +.. _v0_30_1: + +0.30.1 (2019-10-30) +------------------- + +- Fixed bug where ``?_where=`` parameter was not persisted in hidden form fields (`#604 `__) +- Fixed bug with .JSON representation of row pages - thanks, Chris Shaw (`#603 `__) + +.. _v0_30: + + +0.30 (2019-10-18) +----------------- + +- Added ``/-/threads`` debugging page +- Allow ``EXPLAIN WITH...`` (`#583 `__) +- Button to format SQL - thanks, Tobias Kunze (`#136 `__) +- Sort databases on homepage by argument order - thanks, Tobias Kunze (`#585 `__) +- Display metadata footer on custom SQL queries - thanks, Tobias Kunze (`#589 `__) +- Use ``--platform=managed`` for ``publish cloudrun`` (`#587 `__) +- Fixed bug returning non-ASCII characters in CSV (`#584 `__) +- Fix for ``/foo`` v.s. ``/foo-bar`` bug (`#601 `__) + +.. _v0_29_3: + +0.29.3 (2019-09-02) +------------------- + +- Fixed implementation of CodeMirror on database page (`#560 `__) +- Documentation typo fixes - thanks, Min ho Kim (`#561 `__) +- Mechanism for detecting if a table has FTS enabled now works if the table name used alternative escaping mechanisms (`#570 `__) - for compatibility with `a recent change to sqlite-utils `__. + +.. _v0_29_2: + +0.29.2 (2019-07-13) +------------------- + +- Bumped `Uvicorn `__ to 0.8.4, fixing a bug where the querystring was not included in the server logs. (`#559 `__) +- Fixed bug where the navigation breadcrumbs were not displayed correctly on the page for a custom query. (`#558 `__) +- Fixed bug where custom query names containing unicode characters caused errors. + +.. _v0_29_1: + +0.29.1 (2019-07-11) +------------------- + +- Fixed bug with static mounts using relative paths which could lead to traversal exploits (`#555 `__) - thanks Abdussamet Kocak! +- Datasette can now be run as a module: ``python -m datasette`` (`#556 `__) - thanks, Abdussamet Kocak! + +.. _v0_29: + +0.29 (2019-07-07) +----------------- + +ASGI, new plugin hooks, facet by date and much, much more... + +ASGI +~~~~ + +`ASGI `__ is the Asynchronous Server Gateway Interface standard. I've been wanting to convert Datasette into an ASGI application for over a year - `Port Datasette to ASGI #272 `__ tracks thirteen months of intermittent development - but with Datasette 0.29 the change is finally released. This also means Datasette now runs on top of `Uvicorn `__ and no longer depends on `Sanic `__. + +I wrote about the significance of this change in `Porting Datasette to ASGI, and Turtles all the way down `__. + +The most exciting consequence of this change is that Datasette plugins can now take advantage of the ASGI standard. + +New plugin hook: asgi_wrapper +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :ref:`plugin_asgi_wrapper` plugin hook allows plugins to entirely wrap the Datasette ASGI application in their own ASGI middleware. (`#520 `__) + +Two new plugins take advantage of this hook: + +* `datasette-auth-github `__ adds a authentication layer: users will have to sign in using their GitHub account before they can view data or interact with Datasette. You can also use it to restrict access to specific GitHub users, or to members of specified GitHub `organizations `__ or `teams `__. + +* `datasette-cors `__ allows you to configure `CORS headers `__ for your Datasette instance. You can use this to enable JavaScript running on a whitelisted set of domains to make ``fetch()`` calls to the JSON API provided by your Datasette instance. + +New plugin hook: extra_template_vars +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :ref:`plugin_hook_extra_template_vars` plugin hook allows plugins to inject their own additional variables into the Datasette template context. This can be used in conjunction with custom templates to customize the Datasette interface. `datasette-auth-github `__ uses this hook to add custom HTML to the new top navigation bar (which is designed to be modified by plugins, see `#540 `__). + +Secret plugin configuration options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Plugins like `datasette-auth-github `__ need a safe way to set secret configuration options. Since the default mechanism for configuring plugins exposes those settings in ``/-/metadata`` a new mechanism was needed. :ref:`plugins_configuration_secret` describes how plugins can now specify that their settings should be read from a file or an environment variable:: + + { + "plugins": { + "datasette-auth-github": { + "client_secret": { + "$env": "GITHUB_CLIENT_SECRET" + } + } + } + } + +These plugin secrets can be set directly using ``datasette publish``. See :ref:`publish_custom_metadata_and_plugins` for details. (`#538 `__ and `#543 `__) + +Facet by date +~~~~~~~~~~~~~ + +If a column contains datetime values, Datasette can now facet that column by date. (`#481 `__) + +.. _v0_29_medium_changes: + +Easier custom templates for table rows +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you want to customize the display of individual table rows, you can do so using a ``_table.html`` template include that looks something like this:: + + {% for row in display_rows %} +
+

{{ row["title"] }}

+

{{ row["description"] }} +

Category: {{ row.display("category_id") }}

+
+ {% endfor %} + +This is a **backwards incompatible change**. If you previously had a custom template called ``_rows_and_columns.html`` you need to rename it to ``_table.html``. + +See :ref:`customization_custom_templates` for full details. + +?_through= for joins through many-to-many tables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The new ``?_through={json}`` argument to the Table view allows records to be filtered based on a many-to-many relationship. See :ref:`json_api_table_arguments` for full documentation - here's `an example `__. (`#355 `__) + +This feature was added to help support `facet by many-to-many `__, which isn't quite ready yet but will be coming in the next Datasette release. + +Small changes +~~~~~~~~~~~~~ + +* Databases published using ``datasette publish`` now open in :ref:`performance_immutable_mode`. (`#469 `__) +* ``?col__date=`` now works for columns containing spaces +* Automatic label detection (for deciding which column to show when linking to a foreign key) has been improved. (`#485 `__) +* Fixed bug where pagination broke when combined with an expanded foreign key. (`#489 `__) +* Contributors can now run ``pip install -e .[docs]`` to get all of the dependencies needed to build the documentation, including ``cd docs && make livehtml`` support. +* Datasette's dependencies are now all specified using the ``~=`` match operator. (`#532 `__) +* ``white-space: pre-wrap`` now used for table creation SQL. (`#505 `__) + + +`Full list of commits `__ between 0.28 and 0.29. + .. _v0_28: 0.28 (2019-05-19) @@ -31,7 +184,7 @@ Datasette can still run against immutable files and gains numerous performance b Faceting improvements, and faceting plugins ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Datasette :ref:`facets` provide an intuitive way to quickly summarize and interact with data. Previously the only supported faceting technique was column faceting, but 0.28 introduces two powerful new capibilities: facet-by-JSON-array and the ability to define further facet types using plugins. +Datasette :ref:`facets` provide an intuitive way to quickly summarize and interact with data. Previously the only supported faceting technique was column faceting, but 0.28 introduces two powerful new capabilities: facet-by-JSON-array and the ability to define further facet types using plugins. Facet by array (`#359 `__) is only available if your SQLite installation provides the ``json1`` extension. Datasette will automatically detect columns that contain JSON arrays of values and offer a faceting interface against those columns - useful for modelling things like tags without needing to break them out into a new table. See :ref:`facet_by_json_array` for more. @@ -42,7 +195,7 @@ The new :ref:`plugin_register_facet_classes` plugin hook (`#445 `__ is a brand new serverless hosting platform from Google, which allows you to build a Docker container which will run only when HTTP traffic is recieved and will shut down (and hence cost you nothing) the rest of the time. It's similar to Zeit's Now v1 Docker hosting platform which sadly is `no longer accepting signups `__ from new users. +`Google Cloud Run `__ is a brand new serverless hosting platform from Google, which allows you to build a Docker container which will run only when HTTP traffic is received and will shut down (and hence cost you nothing) the rest of the time. It's similar to Zeit's Now v1 Docker hosting platform which sadly is `no longer accepting signups `__ from new users. The new ``datasette publish cloudrun`` command was contributed by Romain Primet (`#434 `__) and publishes selected databases to a new Datasette instance running on Google Cloud Run. @@ -481,7 +634,7 @@ Mostly new work on the :ref:`plugins` mechanism: plugins can now bundle static a - Longer time limit for test_paginate_compound_keys It was failing intermittently in Travis - see `#209 `_ -- Use application/octet-stream for downloadable databses +- Use application/octet-stream for downloadable databases - Updated PyPI classifiers - Updated PyPI link to pypi.org diff --git a/docs/contributing.rst b/docs/contributing.rst index 27237a2f..43834edc 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -38,7 +38,7 @@ The next step is to create a virtual environment for your project and use it to # Now activate the virtual environment, so pip can install into it source venv/bin/activate # Install Datasette and its testing dependencies - python3 -m pip -e .[test] + python3 -m pip install -e .[test] That last line does most of the work: ``pip install -e`` means "install this package in a way that allows me to edit the source code in place". The ``.[test]`` option means "use the setup.py in this directory and install the optional testing dependencies as well". @@ -91,7 +91,7 @@ You can build it locally by installing ``sphinx`` and ``sphinx_rtd_theme`` in yo source venv/bin/activate # Install the dependencies needed to build the docs - pip install sphinx sphinx_rtd_theme + pip install -e .[docs] # Now build the docs cd docs/ @@ -103,16 +103,14 @@ This will create the HTML version of the documentation in ``docs/_build/html``. Any time you make changes to a ``.rst`` file you can re-run ``make html`` to update the built documents, then refresh them in your browser. -For added productivity, you can run Sphinx in auto-build mode. This will run a local webserver serving the docs that automatically rebuilds them and refreshes the page any time you hit save in your editor. +For added productivity, you can use use `sphinx-autobuild `__ to run Sphinx in auto-build mode. This will run a local webserver serving the docs that automatically rebuilds them and refreshes the page any time you hit save in your editor. -To enable auto-build mode, first install `sphinx-autobuild `__:: - - pip install sphinx-autobuild - -Now start the server by running:: +``sphinx-autobuild`` will have been installed when you ran ``pip install -e .[docs]``. In your ``docs/`` directory you can start the server by running the following:: make livehtml +Now browse to ``http://localhost:8000/`` to view the documentation. Any edits you make should be instantly reflected in your browser. + .. _contributing_release: Release process diff --git a/docs/custom_templates.rst b/docs/custom_templates.rst index b0863381..5cabe152 100644 --- a/docs/custom_templates.rst +++ b/docs/custom_templates.rst @@ -102,6 +102,8 @@ database column they are representing, for example::
+.. _customization_custom_templates: + Custom templates ---------------- @@ -144,15 +146,15 @@ The lookup rules Datasette uses are as follows:: row-mydatabase-mytable.html row.html - Rows and columns include on table page: - _rows_and_columns-table-mydatabase-mytable.html - _rows_and_columns-mydatabase-mytable.html - _rows_and_columns.html + Table of rows and columns include on table page: + _table-table-mydatabase-mytable.html + _table-mydatabase-mytable.html + _table.html - Rows and columns include on row page: - _rows_and_columns-row-mydatabase-mytable.html - _rows_and_columns-mydatabase-mytable.html - _rows_and_columns.html + Table of rows and columns include on row page: + _table-row-mydatabase-mytable.html + _table-mydatabase-mytable.html + _table.html If a table name has spaces or other unexpected characters in it, the template filename will follow the same rules as our custom ```` CSS classes - for @@ -189,38 +191,28 @@ content you can do so by creating a ``row.html`` template like this:: Note the ``default:row.html`` template name, which ensures Jinja will inherit from the default template. -The ``_rows_and_columns.html`` template is included on both the row and the table -page, and displays the content of the row. The default ``_rows_and_columns.html`` template -`can be seen here `_. +The ``_table.html`` template is included by both the row and the table pages, +and a list of rows. The default ``_table.html`` template renders them as an +HTML template and `can be seen here `_. You can provide a custom template that applies to all of your databases and tables, or you can provide custom templates for specific tables using the template naming scheme described above. -Say for example you want to output a certain column as unescaped HTML. You could -provide a custom ``_rows_and_columns.html`` template like this:: +If you want to present your data in a format other than an HTML table, you +can do so by looping through ``display_rows`` in your own ``_table.html`` +template. You can use ``{{ row["column_name"] }}`` to output the raw value +of a specific column. - - - - {% for column in display_columns %} - - {% endfor %} - - - - {% for row in display_rows %} - - {% for cell in row %} - - {% endfor %} - - {% endfor %} - -
{{ column }}
- {% if cell.column == 'description' %} - {{ cell.value|safe }} - {% else %} - {{ cell.value }} - {% endif %} -
+If you want to output the rendered HTML version of a column, including any +links to foreign keys, you can use ``{{ row.display("column_name") }}``. + +Here is an example of a custom ``_table.html`` template:: + + {% for row in display_rows %} +
+

{{ row["title"] }}

+

{{ row["description"] }} +

Category: {{ row.display("category_id") }}

+
+ {% endfor %} diff --git a/docs/datasette-publish-cloudrun-help.txt b/docs/datasette-publish-cloudrun-help.txt index fc7d44d5..6cdc87eb 100644 --- a/docs/datasette-publish-cloudrun-help.txt +++ b/docs/datasette-publish-cloudrun-help.txt @@ -3,22 +3,26 @@ $ datasette publish cloudrun --help Usage: datasette publish cloudrun [OPTIONS] [FILES]... Options: - -m, --metadata FILENAME Path to JSON file containing metadata to publish - --extra-options TEXT Extra options to pass to datasette serve - --branch TEXT Install datasette from a GitHub branch e.g. master - --template-dir DIRECTORY Path to directory containing custom templates - --plugins-dir DIRECTORY Path to directory containing custom plugins - --static STATIC MOUNT mountpoint:path-to-directory for serving static files - --install TEXT Additional packages (e.g. plugins) to install - --version-note TEXT Additional note to show on /-/versions - --title TEXT Title for metadata - --license TEXT License label for metadata - --license_url TEXT License URL for metadata - --source TEXT Source label for metadata - --source_url TEXT Source URL for metadata - --about TEXT About label for metadata - --about_url TEXT About URL for metadata - -n, --name TEXT Application name to use when building - --service TEXT Cloud Run service to deploy (or over-write) - --spatialite Enable SpatialLite extension - --help Show this message and exit. + -m, --metadata FILENAME Path to JSON file containing metadata to publish + --extra-options TEXT Extra options to pass to datasette serve + --branch TEXT Install datasette from a GitHub branch e.g. master + --template-dir DIRECTORY Path to directory containing custom templates + --plugins-dir DIRECTORY Path to directory containing custom plugins + --static STATIC MOUNT mountpoint:path-to-directory for serving static files + --install TEXT Additional packages (e.g. plugins) to install + --plugin-secret ... + Secrets to pass to plugins, e.g. --plugin-secret + datasette-auth-github client_id xxx + --version-note TEXT Additional note to show on /-/versions + --title TEXT Title for metadata + --license TEXT License label for metadata + --license_url TEXT License URL for metadata + --source TEXT Source label for metadata + --source_url TEXT Source URL for metadata + --about TEXT About label for metadata + --about_url TEXT About URL for metadata + -n, --name TEXT Application name to use when building + --service TEXT Cloud Run service to deploy (or over-write) + --spatialite Enable SpatialLite extension + --show-files Output the generated Dockerfile and metadata.json + --help Show this message and exit. diff --git a/docs/datasette-publish-heroku-help.txt b/docs/datasette-publish-heroku-help.txt index cd9af09b..88d387a6 100644 --- a/docs/datasette-publish-heroku-help.txt +++ b/docs/datasette-publish-heroku-help.txt @@ -3,20 +3,23 @@ $ datasette publish heroku --help Usage: datasette publish heroku [OPTIONS] [FILES]... Options: - -m, --metadata FILENAME Path to JSON file containing metadata to publish - --extra-options TEXT Extra options to pass to datasette serve - --branch TEXT Install datasette from a GitHub branch e.g. master - --template-dir DIRECTORY Path to directory containing custom templates - --plugins-dir DIRECTORY Path to directory containing custom plugins - --static STATIC MOUNT mountpoint:path-to-directory for serving static files - --install TEXT Additional packages (e.g. plugins) to install - --version-note TEXT Additional note to show on /-/versions - --title TEXT Title for metadata - --license TEXT License label for metadata - --license_url TEXT License URL for metadata - --source TEXT Source label for metadata - --source_url TEXT Source URL for metadata - --about TEXT About label for metadata - --about_url TEXT About URL for metadata - -n, --name TEXT Application name to use when deploying - --help Show this message and exit. + -m, --metadata FILENAME Path to JSON file containing metadata to publish + --extra-options TEXT Extra options to pass to datasette serve + --branch TEXT Install datasette from a GitHub branch e.g. master + --template-dir DIRECTORY Path to directory containing custom templates + --plugins-dir DIRECTORY Path to directory containing custom plugins + --static STATIC MOUNT mountpoint:path-to-directory for serving static files + --install TEXT Additional packages (e.g. plugins) to install + --plugin-secret ... + Secrets to pass to plugins, e.g. --plugin-secret + datasette-auth-github client_id xxx + --version-note TEXT Additional note to show on /-/versions + --title TEXT Title for metadata + --license TEXT License label for metadata + --license_url TEXT License URL for metadata + --source TEXT Source label for metadata + --source_url TEXT Source URL for metadata + --about TEXT About label for metadata + --about_url TEXT About URL for metadata + -n, --name TEXT Application name to use when deploying + --help Show this message and exit. diff --git a/docs/datasette-publish-nowv1-help.txt b/docs/datasette-publish-nowv1-help.txt index a5417d71..c2bf23f1 100644 --- a/docs/datasette-publish-nowv1-help.txt +++ b/docs/datasette-publish-nowv1-help.txt @@ -3,24 +3,28 @@ $ datasette publish nowv1 --help Usage: datasette publish nowv1 [OPTIONS] [FILES]... Options: - -m, --metadata FILENAME Path to JSON file containing metadata to publish - --extra-options TEXT Extra options to pass to datasette serve - --branch TEXT Install datasette from a GitHub branch e.g. master - --template-dir DIRECTORY Path to directory containing custom templates - --plugins-dir DIRECTORY Path to directory containing custom plugins - --static STATIC MOUNT mountpoint:path-to-directory for serving static files - --install TEXT Additional packages (e.g. plugins) to install - --version-note TEXT Additional note to show on /-/versions - --title TEXT Title for metadata - --license TEXT License label for metadata - --license_url TEXT License URL for metadata - --source TEXT Source label for metadata - --source_url TEXT Source URL for metadata - --about TEXT About label for metadata - --about_url TEXT About URL for metadata - -n, --name TEXT Application name to use when deploying - --force Pass --force option to now - --token TEXT Auth token to use for deploy - --alias TEXT Desired alias e.g. yoursite.now.sh - --spatialite Enable SpatialLite extension - --help Show this message and exit. + -m, --metadata FILENAME Path to JSON file containing metadata to publish + --extra-options TEXT Extra options to pass to datasette serve + --branch TEXT Install datasette from a GitHub branch e.g. master + --template-dir DIRECTORY Path to directory containing custom templates + --plugins-dir DIRECTORY Path to directory containing custom plugins + --static STATIC MOUNT mountpoint:path-to-directory for serving static files + --install TEXT Additional packages (e.g. plugins) to install + --plugin-secret ... + Secrets to pass to plugins, e.g. --plugin-secret + datasette-auth-github client_id xxx + --version-note TEXT Additional note to show on /-/versions + --title TEXT Title for metadata + --license TEXT License label for metadata + --license_url TEXT License URL for metadata + --source TEXT Source label for metadata + --source_url TEXT Source URL for metadata + --about TEXT About label for metadata + --about_url TEXT About URL for metadata + -n, --name TEXT Application name to use when deploying + --force Pass --force option to now + --token TEXT Auth token to use for deploy + --alias TEXT Desired alias e.g. yoursite.now.sh + --spatialite Enable SpatialLite extension + --show-files Output the generated Dockerfile and metadata.json + --help Show this message and exit. diff --git a/docs/datasette-serve-help.txt b/docs/datasette-serve-help.txt index 7b7c3b09..1447e84d 100644 --- a/docs/datasette-serve-help.txt +++ b/docs/datasette-serve-help.txt @@ -6,8 +6,11 @@ Usage: datasette serve [OPTIONS] [FILES]... Options: -i, --immutable PATH Database files to open in immutable mode - -h, --host TEXT host for server, defaults to 127.0.0.1 - -p, --port INTEGER port for server, defaults to 8001 + -h, --host TEXT Host for server. Defaults to 127.0.0.1 which means only + connections from the local machine will be allowed. Use + 0.0.0.0 to listen to all IPs and allow access from other + machines. + -p, --port INTEGER Port for server, defaults to 8001 --debug Enable debug mode - useful for development --reload Automatically reload if database or code change detected - useful for development diff --git a/docs/ecosystem.rst b/docs/ecosystem.rst index 6842aca5..cb6a3768 100644 --- a/docs/ecosystem.rst +++ b/docs/ecosystem.rst @@ -70,6 +70,11 @@ datasette-vega `datasette-vega `__ exposes the powerful `Vega `__ charting library, allowing you to construct line, bar and scatter charts against your data and share links to your visualizations. +datasette-auth-github +--------------------- + +`datasette-auth-github `__ adds an authentication layer to Datasette. Users will have to sign in using their GitHub account before they can view data or interact with Datasette. You can also use it to restrict access to specific GitHub users, or to members of specified GitHub `organizations `__ or `teams `__. + datasette-json-html ------------------- @@ -80,6 +85,11 @@ datasette-jellyfish `datasette-jellyfish `__ exposes custom SQL functions for a range of common fuzzy string matching functions, including soundex, porter stemming and levenshtein distance. It builds on top of the `Jellyfish Python library `__. +datasette-doublemetaphone +------------------------- + +`datasette-doublemetaphone `__ by Matthew Somerville adds custom SQL functions for applying the Double Metaphone fuzzy "sounds like" algorithm. + datasette-jq ------------ @@ -90,6 +100,11 @@ datasette-render-images `datasette-render-images `__ works with SQLite tables that contain binary image data in BLOB columns. It converts any images it finds into ``data-uri`` image elements, allowing you to view them directly in the Datasette interface. +datasette-render-binary +----------------------- + +`datasette-render-binary `__ renders binary data in a slightly more readable fashion: it shows ASCII characters as they are, and shows all other data as monospace octets. Useful as a tool for exploring new unfamiliar databases as it makes it easier to spot if a binary column may contain a decipherable binary format. + datasette-pretty-json --------------------- @@ -99,3 +114,13 @@ datasette-sqlite-fts4 --------------------- `datasette-sqlite-fts4 `__ provides search relevance ranking algorithms that can be used with SQLite's FTS4 search module. You can read more about it in `Exploring search relevance algorithms with SQLite `__. + +datasette-bplist +---------------- + +`datasette-bplist `__ provides tools for working with Apple's binary plist format embedded in SQLite database tables. If you use OS X you already have dozens of SQLite databases hidden away in your ``~/Library`` folder that include data in this format - this plugin allows you to view the decoded data and run SQL queries against embedded values using a ``bplist_to_json(value)`` custom SQL function. + +datasette-cors +-------------- + +`datasette-cors `__ allows you to configure `CORS headers `__ for your Datasette instance. You can use this to enable JavaScript running on a whitelisted set of domains to make ``fetch()`` calls to the JSON API provided by your Datasette instance. \ No newline at end of file diff --git a/docs/facets.rst b/docs/facets.rst index ddf69cb4..13b18bd0 100644 --- a/docs/facets.rst +++ b/docs/facets.rst @@ -129,17 +129,6 @@ The performance of facets can be greatly improved by adding indexes on the colum Enter ".help" for usage hints. sqlite> CREATE INDEX Food_Trucks_state ON Food_Trucks("state"); -.. _facet_by_m2m: - -Facet by many-to-many ---------------------- - -Datasette can detect many-to-many SQL tables - defined as SQL tables which have foreign key relationships to two other tables. - -If a many-to-many table exists pointing at the table you are currently viewing, Datasette will suggest you facet the table based on that relationship. - -Example here: `latest.datasette.io/fixtures/roadside_attractions?_facet_m2m=attraction_characteristic `__ - .. _facet_by_json_array: Facet by JSON array diff --git a/docs/full_text_search.rst b/docs/full_text_search.rst index 97656bb8..138b8e0b 100644 --- a/docs/full_text_search.rst +++ b/docs/full_text_search.rst @@ -28,7 +28,28 @@ To set up full-text search for a table, you need to do two things: * Create a new FTS virtual table associated with your table * Populate that FTS table with the data that you would like to be able to run searches against -To enable full-text search for a table called ``items`` that works against the ``name`` and ``description`` columns, you would run the following SQL to create a new ``items_fts`` FTS virtual table: +Configuring FTS using sqlite-utils +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`sqlite-utils `__ is a CLI utility and Python library for manipulating SQLite databases. You can use `it from Python code `__ to configure FTS search, or you can achieve the same goal `using the accompanying command-line tool `__. + +Here's how to use ``sqlite-utils`` to enable full-text search for an ``items`` table across the ``name`` and ``description`` columns:: + + $ sqlite-utils enable-fts mydatabase.db items name description + +Configuring FTS using csvs-to-sqlite +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your data starts out in CSV files, you can use Datasette's companion tool `csvs-to-sqlite `__ to convert that file into a SQLite database and enable full-text search on specific columns. For a file called ``items.csv`` where you want full-text search to operate against the ``name`` and ``description`` columns you would run the following:: + + $ csvs-to-sqlite items.csv items.db -f name -f description + +Configuring FTS by hand +~~~~~~~~~~~~~~~~~~~~~~~ + +We recommend using `sqlite-utils `__, but if you want to hand-roll a SQLite full-text search table you can do so using the following SQL. + +To enable full-text search for a table called ``items`` that works against the ``name`` and ``description`` columns, you would run this SQL to create a new ``items_fts`` FTS virtual table: .. code-block:: sql @@ -71,8 +92,6 @@ And then populate it like this: You can use this technique to populate the full-text search index from any combination of tables and joins that makes sense for your project. -The `sqlite-utils tool `__ provides a command-line mechanism that can be used to implement the above steps. - .. _full_text_search_table_or_view: Configuring full-text search for a table or view @@ -103,13 +122,6 @@ Here is an example which enables full-text search for a ``display_ads`` view whi } } -Setting up full-text search using csvs-to-sqlite ------------------------------------------------- - -If your data starts out in CSV files, you can use Datasette's companion tool `csvs-to-sqlite `_ to convert that file into a SQLite database and enable full-text search on specific columns. For a file called ``items.csv`` where you want full-text search to operate against the ``name`` and ``description`` columns you would run the following:: - - csvs-to-sqlite items.csv items.db -f name -f description - The table view API ------------------ diff --git a/docs/getting_started.rst b/docs/getting_started.rst index d0c22583..fdf7d23c 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -25,7 +25,7 @@ Glitch allows you to "remix" any project to create your own copy and start editi .. image:: https://cdn.glitch.com/2703baf2-b643-4da7-ab91-7ee2a2d00b5b%2Fremix-button.svg :target: https://glitch.com/edit/#!/remix/datasette-csvs -Find a CSV file and drag it onto the Glitch file explorer panel - ``datasette-csvs`` will automatically convert it to a SQLite database (using `csvs-to-sqlite `__) and allow you to start exploring it using Datasette. +Find a CSV file and drag it onto the Glitch file explorer panel - ``datasette-csvs`` will automatically convert it to a SQLite database (using `sqlite-utils `__) and allow you to start exploring it using Datasette. If your CSV file has a ``latitude`` and ``longitude`` column you can visualize it on a map by uncommenting the ``datasette-cluster-map`` line in the ``requirements.txt`` file using the Glitch file editor. diff --git a/docs/introspection.rst b/docs/introspection.rst index e514ddf5..02552bab 100644 --- a/docs/introspection.rst +++ b/docs/introspection.rst @@ -90,6 +90,8 @@ Shows the :ref:`config` options for this instance of Datasette. `Config example "sql_time_limit_ms": 1000 } +.. _JsonDataView_databases: + /-/databases ------------ @@ -105,3 +107,26 @@ Shows currently attached databases. `Databases example `_:: + + { + "num_threads": 2, + "threads": [ + { + "daemon": false, + "ident": 4759197120, + "name": "MainThread" + }, + { + "daemon": true, + "ident": 123145319682048, + "name": "Thread-1" + }, + ] + } diff --git a/docs/json_api.rst b/docs/json_api.rst index 4b365e14..e369bee7 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -228,6 +228,9 @@ You can filter the data returned by the table based on column values using a que ``?column__in=["value","value,with,commas"]`` +``?column__notin=value1,value2,value3`` + Rows where column does not match any of the provided values. The inverse of ``__in=``. Also supports JSON arrays. + ``?column__arraycontains=value`` Works against columns that contain JSON arrays - matches if any of the values in that array match. @@ -318,15 +321,6 @@ Special table arguments Here's `an example `__. - -``?_group_count=COLUMN`` - Executes a SQL query that returns a count of the number of rows matching - each unique value in that column, with the most common ordered first. - -``?_group_count=COLUMN1&_group_count=column2`` - You can pass multiple ``_group_count`` columns to return counts against - unique combinations of those columns. - ``?_next=TOKEN`` Pagination by continuation token - pass the token that was returned in the ``"next"`` property by the previous page. diff --git a/docs/metadata.rst b/docs/metadata.rst index 0a2aa219..5d9155ea 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -62,7 +62,7 @@ Each of the top-level metadata fields can be used at the database and table leve Source, license and about ------------------------- -The three visible metadata fields you can apply to everything, specific databases or specific tables are source, license and about. All three are optionaly. +The three visible metadata fields you can apply to everything, specific databases or specific tables are source, license and about. All three are optional. **source** and **source_url** should be used to indicate where the underlying data came from. diff --git a/docs/performance.rst b/docs/performance.rst index 741c9a92..d7f852d5 100644 --- a/docs/performance.rst +++ b/docs/performance.rst @@ -7,6 +7,8 @@ Datasette runs on top of SQLite, and SQLite has excellent performance. For smal That said, there are a number of tricks you can use to improve Datasette's performance. +.. _performance_immutable_mode: + Immutable mode -------------- @@ -37,7 +39,7 @@ Then later you can start Datasette against the ``counts.json`` file and use it t datasette -i data.db --inspect-file=counts.json -You need to use the ``-i`` immutable mode agaist the databse file here or the counts from the JSON file will be ignored. +You need to use the ``-i`` immutable mode against the databse file here or the counts from the JSON file will be ignored. You will rarely need to use this optimization in every-day use, but several of the ``datasette publish`` commands described in :ref:`publishing` use this optimization for better performance when deploying a database file to a hosting provider. diff --git a/docs/plugins.rst b/docs/plugins.rst index bd32b3a6..6df7ff6a 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -219,6 +219,48 @@ Here is an example of some plugin configuration for a specific table:: This tells the ``datasette-cluster-map`` column which latitude and longitude columns should be used for a table called ``Street_Tree_List`` inside a database file called ``sf-trees.db``. +.. _plugins_configuration_secret: + +Secret configuration values +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Any values embedded in ``metadata.json`` will be visible to anyone who views the ``/-/metadata`` page of your Datasette instance. Some plugins may need configuration that should stay secret - API keys for example. There are two ways in which you can store secret configuration values. + +**As environment variables**. If your secret lives in an environment variable that is available to the Datasette process, you can indicate that the configuration value should be read from that environment variable like so:: + + { + "plugins": { + "datasette-auth-github": { + "client_secret": { + "$env": "GITHUB_CLIENT_SECRET" + } + } + } + } + +**As values in separate files**. Your secrets can also live in files on disk. To specify a secret should be read from a file, provide the full file path like this:: + + { + "plugins": { + "datasette-auth-github": { + "client_secret": { + "$file": "/secrets/client-secret" + } + } + } + } + +If you are publishing your data using the :ref:`datasette publish ` family of commands, you can use the ``--plugin-secret`` option to set these secrets at publish time. For example, using Heroku you might run the following command:: + + $ datasette publish heroku my_database.db \ + --name my-heroku-app-demo \ + --install=datasette-auth-github \ + --plugin-secret datasette-auth-github client_id your_client_id \ + --plugin-secret datasette-auth-github client_secret your_client_secret + +Writing plugins that accept configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + When you are writing plugins, you can access plugin configuration like this using the ``datasette.plugin_config()`` method. If you know you need plugin configuration for a specific table, you can access it like this:: plugin_config = datasette.plugin_config( @@ -400,7 +442,7 @@ you have one: @hookimpl def extra_js_urls(): return [ - '/-/static-plugins/your_plugin/app.js' + '/-/static-plugins/your-plugin/app.js' ] .. _plugin_hook_publish_subcommand: @@ -529,6 +571,8 @@ If the value matches that pattern, the plugin returns an HTML link element: extra_body_script(template, database, table, view_name, datasette) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Extra JavaScript to be added to a ``