From 947645d84710677ea50762016081a9fbc6b014a8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 26 Jul 2019 13:18:19 +0300 Subject: [PATCH] First working -d based Datasette Library Refs #417 First proof-of-concept for Datasette Library. Run like this: datasette -d ~/Library Uses a new plugin hook - available_databases() BUT... I don't think this is quite the way I want to go. --- datasette/app.py | 12 +++++- datasette/database.py | 12 ++++-- datasette/hookspecs.py | 5 +++ datasette/plugins.py | 1 + datasette/serve_dir.py | 76 ++++++++++++++++++++++++++++++++++ datasette/templates/index.html | 1 + datasette/views/database.py | 1 + datasette/views/index.py | 1 + docs/datasette-serve-help.txt | 2 + docs/plugins.rst | 22 ++++++++++ tests/fixtures.py | 16 +++++++ tests/test_api.py | 50 +++++++++++++++++----- tests/test_cli.py | 3 +- tests/test_html.py | 1 + tests/test_plugins.py | 18 +++++++- 15 files changed, 202 insertions(+), 19 deletions(-) create mode 100644 datasette/serve_dir.py diff --git a/datasette/app.py b/datasette/app.py index d80b2094..2ee32dc8 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -161,7 +161,7 @@ class Datasette: elif memory: self.files = (MEMORY,) + self.files self.extra_serve_options = extra_serve_options or {} - self.databases = {} + self._databases = {} self.inspect_data = inspect_data for file in self.files: path = file @@ -173,7 +173,7 @@ class Datasette: db = Database(self, path, is_mutable=is_mutable, is_memory=is_memory) if db.name in self.databases: raise Exception("Multiple files with same stem: {}".format(db.name)) - self.databases[db.name] = db + self._databases[db.name] = db self.cache_headers = cache_headers self.cors = cors self._metadata = metadata or {} @@ -203,6 +203,14 @@ class Datasette: # Plugin already registered pass + @property + def databases(self): + databases = dict(self._databases) + # pylint: disable=no-member + for pairs in pm.hook.available_databases(datasette=self): + databases.update(pairs) + return databases + async def run_sanity_checks(self): # Only one check right now, for Spatialite for database_name, database in self.databases.items(): diff --git a/datasette/database.py b/datasette/database.py index 7e6f7245..06d1c2ad 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -14,15 +14,19 @@ from .inspect import inspect_hash class Database: - def __init__(self, ds, path=None, is_mutable=False, is_memory=False): + def __init__( + self, ds, path=None, name=None, is_mutable=False, is_memory=False, comment=None + ): self.ds = ds + self._name = name self.path = path self.is_mutable = is_mutable self.is_memory = is_memory self.hash = None self.cached_size = None self.cached_table_counts = None - if not self.is_mutable: + self.comment = comment + if not self.is_mutable and path is not None: p = Path(path) self.hash = inspect_hash(p) self.cached_size = p.stat().st_size @@ -47,7 +51,7 @@ class Database: @property def size(self): - if self.is_memory: + if self.is_memory or self.path is None: return 0 if self.cached_size is not None: return self.cached_size @@ -83,6 +87,8 @@ class Database: @property def name(self): + if self._name: + return self._name if self.is_memory: return ":memory:" else: diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index bca47990..780b7732 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -63,3 +63,8 @@ def register_facet_classes(): @hookspec def extra_serve_options(): "Return list of extra click.option decorators to be applied to 'datasette serve'" + + +@hookspec +def available_databases(datasette): + "Return list of (name, database) pairs to be added to the available databases" diff --git a/datasette/plugins.py b/datasette/plugins.py index bf3735dc..7e755659 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -8,6 +8,7 @@ DEFAULT_PLUGINS = ( "datasette.publish.now", "datasette.publish.cloudrun", "datasette.facets", + "datasette.serve_dir", ) pm = pluggy.PluginManager("datasette") diff --git a/datasette/serve_dir.py b/datasette/serve_dir.py new file mode 100644 index 00000000..addeb33a --- /dev/null +++ b/datasette/serve_dir.py @@ -0,0 +1,76 @@ +from datasette import hookimpl +from pathlib import Path +from .database import Database +from .utils import escape_sqlite +import click + + +@hookimpl +def extra_serve_options(): + return [ + click.option( + "-d", + "--dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Directories to scan for SQLite databases", + multiple=True, + ), + click.option( + "--scan", + is_flag=True, + help="Continually scan directories for new database files", + ), + ] + + +cached_results = None + + +@hookimpl +def available_databases(datasette): + global cached_results + if cached_results is not None: + return cached_results + i = 0 + counts = {name: 0 for name in datasette._databases} + results = [] + for directory in datasette.extra_serve_options.get("dir") or []: + for filepath in Path(directory).glob("**/*"): + if is_sqlite(filepath): + name = filepath.stem + if name in counts: + new_name = "{}_{}".format(name, counts[name] + 1) + counts[name] += 1 + name = new_name + try: + database = Database(datasette, str(filepath), comment=str(filepath)) + conn = database.connect() + result = conn.execute( + "select name from sqlite_master where type = 'table'" + ) + table_names = [r[0] for r in result] + for table_name in table_names: + conn.execute( + "PRAGMA table_info({});".format(escape_sqlite(table_name)) + ) + except Exception as e: + print("Could not open {}".format(filepath)) + print(" " + str(e)) + else: + results.append((name, database)) + + cached_results = results + return results + + +magic = b"SQLite format 3\x00" + + +def is_sqlite(path): + if not path.is_file(): + return False + try: + with open(path, "rb") as fp: + return fp.read(len(magic)) == magic + except PermissionError: + return False diff --git a/datasette/templates/index.html b/datasette/templates/index.html index b394564a..69a34808 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -11,6 +11,7 @@ {% for database in databases %}

{{ database.name }}

+ {% if database.comment %}

{{ database.comment }}

{% endif %}

{% if database.show_table_row_counts %}{{ "{:,}".format(database.table_rows_sum) }} rows in {% endif %}{{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.tables_count and database.hidden_tables_count %}, {% endif -%} {% if database.hidden_tables_count -%} diff --git a/datasette/views/database.py b/datasette/views/database.py index 78af19c5..ce9498c5 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -47,6 +47,7 @@ class DatabaseView(DataView): { "database": database, "size": db.size, + "comment": db.comment, "tables": tables, "hidden_count": len([t for t in tables if t["hidden"]]), "views": views, diff --git a/datasette/views/index.py b/datasette/views/index.py index fddb04d9..64877f2b 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -79,6 +79,7 @@ class IndexView(BaseView): { "name": name, "hash": db.hash, + "comment": db.comment, "color": db.hash[:6] if db.hash else hashlib.md5(name.encode("utf8")).hexdigest()[:6], diff --git a/docs/datasette-serve-help.txt b/docs/datasette-serve-help.txt index 7b7c3b09..d2b96f1f 100644 --- a/docs/datasette-serve-help.txt +++ b/docs/datasette-serve-help.txt @@ -23,4 +23,6 @@ Options: datasette.readthedocs.io/en/latest/config.html --version-note TEXT Additional note to show on /-/versions --help-config Show available config options + -d, --dir DIRECTORY Directories to scan for SQLite databases + --scan Continually scan directories for new database files --help Show this message and exit. diff --git a/docs/plugins.rst b/docs/plugins.rst index 72473a39..4937e32f 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -854,3 +854,25 @@ Your other plugin hooks can then access these settings like so: } Be careful not to define an option which clashes with a Datasette default option, or with options provided by another plugin. For this reason we recommend using a common prefix for your plugin, as shown above. + +.. _plugin_hook_available_databases: + +available_databases(datasette) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Return a list of ``(name, database)`` pairs to be added to the available databases. + +``name`` should be a string. ``database`` should be a ``datasette.database.Database`` instance. + +This allows plugins to make databases available from new sources. + +.. code-block:: python + + from datasette import hookimpl + from datasette.database import Database + + @hookimpl + def available_databases(datasette): + return [ + ("hardcoded_database", Database(datasette, "/mnt/hard_coded.db")) + ] diff --git a/tests/fixtures.py b/tests/fixtures.py index 801abc43..d9ea947a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -321,6 +321,8 @@ METADATA = { PLUGIN1 = """ from datasette import hookimpl +from datasette.database import Database +from datasette.utils import sqlite3 import base64 import pint import json @@ -397,6 +399,20 @@ def extra_template_vars(template, database, table, view_name, request, datasette "extra_serve_options": datasette.extra_serve_options, }, default=lambda b: b.decode("utf8")) } + + +class SpecialDatabase(Database): + def connect(self): + db = sqlite3.connect(":memory:") + db.executescript("CREATE TABLE foo (id integer primary key, bar text)") + db.executescript("INSERT INTO foo (id, bar) VALUES (1, 'hello')") + return db + +@hookimpl +def available_databases(datasette): + return [ + ("special", SpecialDatabase(datasette, name="special")), + ] """ PLUGIN2 = """ diff --git a/tests/test_api.py b/tests/test_api.py index 163e2ec7..661ca75a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -24,7 +24,7 @@ def test_homepage(app_client): response = app_client.get("/.json") assert response.status == 200 assert "application/json; charset=utf-8" == response.headers["content-type"] - assert response.json.keys() == {"fixtures": 0}.keys() + assert {"fixtures", "special"} == set(response.json.keys()) d = response.json["fixtures"] assert d["name"] == "fixtures" assert d["tables_count"] == 24 @@ -518,19 +518,45 @@ def test_no_files_uses_memory_database(app_client_no_files): assert response.status == 200 assert { ":memory:": { + "name": ":memory:", "hash": None, + "comment": None, "color": "f7935d", + "path": "/:memory:", + "tables_and_views_truncated": [], + "tables_and_views_more": False, + "tables_count": 0, + "table_rows_sum": 0, + "show_table_row_counts": False, "hidden_table_rows_sum": 0, "hidden_tables_count": 0, - "name": ":memory:", - "show_table_row_counts": False, - "path": "/:memory:", - "table_rows_sum": 0, - "tables_count": 0, - "tables_and_views_more": False, - "tables_and_views_truncated": [], "views_count": 0, - } + }, + "special": { + "name": "special", + "hash": None, + "comment": None, + "color": "0bd650", + "path": "/special", + "tables_and_views_truncated": [ + { + "name": "foo", + "columns": ["id", "bar"], + "primary_keys": ["id"], + "count": 1, + "hidden": False, + "fts_table": None, + "num_relationships_for_sorting": 0, + } + ], + "tables_and_views_more": False, + "tables_count": 1, + "table_rows_sum": 1, + "show_table_row_counts": True, + "hidden_table_rows_sum": 0, + "hidden_tables_count": 0, + "views_count": 0, + }, } == response.json # Try that SQL query response = app_client_no_files.get( @@ -1170,8 +1196,10 @@ def test_unit_filters(app_client): def test_databases_json(app_client_two_attached_databases_one_immutable): response = app_client_two_attached_databases_one_immutable.get("/-/databases.json") databases = response.json - assert 2 == len(databases) - extra_database, fixtures_database = databases + assert 3 == len(databases) + by_name = {database["name"]: database for database in databases} + extra_database = by_name["extra_database"] + fixtures_database = by_name["fixtures"] assert "extra_database" == extra_database["name"] assert None == extra_database["hash"] assert True == extra_database["is_mutable"] diff --git a/tests/test_cli.py b/tests/test_cli.py index d1ab6522..1dab4d1f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,7 +9,6 @@ def test_inspect_cli(app_client): runner = CliRunner() result = runner.invoke(cli, ["inspect", "fixtures.db"]) data = json.loads(result.output) - assert ["fixtures"] == list(data.keys()) database = data["fixtures"] assert "fixtures.db" == database["file"] assert isinstance(database["hash"], str) @@ -28,7 +27,7 @@ def test_inspect_cli_writes_to_file(app_client): ) assert 0 == result.exit_code, result.output data = json.load(open("foo.json")) - assert ["fixtures"] == list(data.keys()) + assert {"fixtures", "special"} == set(data.keys()) def test_serve_with_inspect_file_prepopulates_table_counts_cache(): diff --git a/tests/test_html.py b/tests/test_html.py index 0a6df984..4a49551f 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -28,6 +28,7 @@ def test_homepage(app_client_two_attached_databases): assert [ {"href": "/extra_database", "text": "extra_database"}, {"href": "/fixtures", "text": "fixtures"}, + {"href": "/special", "text": "special"}, ] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")] # The first attached database should show count text and attached tables h2 = soup.select("h2")[0] diff --git a/tests/test_plugins.py b/tests/test_plugins.py index b306963c..c89a466d 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -188,7 +188,7 @@ def test_plugins_extra_body_script(app_client, path, expected_extra_body_script) def test_plugins_asgi_wrapper(app_client): response = app_client.get("/fixtures") - assert "fixtures" == response.headers["x-databases"] + assert "fixtures, special" == response.headers["x-databases"] def test_plugins_extra_template_vars(restore_working_directory): @@ -228,3 +228,19 @@ def test_extra_serve_options_available_on_datasette(restore_working_directory): Soup(response.body, "html.parser").select("pre.extra_template_vars")[0].text ) assert {"foo": "bar"} == extra_template_vars["extra_serve_options"] + + +def test_plugins_available_databases(app_client): + response = app_client.get("/-/databases.json") + assert 200 == response.status + assert { + "name": "special", + "path": None, + "size": 0, + "is_mutable": False, + "is_memory": False, + "hash": None, + } in response.json + assert [{"id": 1, "bar": "hello"}] == app_client.get( + "/special/foo.json?_shape=array" + ).json