From 6717c719dd36dc2adc0f9da38a8c8e08129e96b4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 2 Apr 2020 12:30:53 -0700 Subject: [PATCH] --metadata accepts YAML as well as JSON - closes #713 --- datasette/cli.py | 12 +++- datasette/publish/common.py | 2 +- datasette/publish/heroku.py | 4 +- datasette/utils/__init__.py | 18 +++++- docs/datasette-package-help.txt | 2 +- docs/datasette-publish-cloudrun-help.txt | 2 +- docs/datasette-publish-heroku-help.txt | 2 +- docs/datasette-serve-help.txt | 2 +- docs/metadata.rst | 80 +++++++++++++++++++++--- setup.py | 1 + tests/test_cli.py | 43 ++++++++++++- tests/test_publish_cloudrun.py | 6 +- 12 files changed, 149 insertions(+), 25 deletions(-) diff --git a/datasette/cli.py b/datasette/cli.py index 94da6ee4..0ed5e287 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -11,6 +11,7 @@ import sys from .app import Datasette, DEFAULT_CONFIG, CONFIG_OPTIONS, pm from .utils import ( check_connection, + parse_metadata, ConnectionProblem, SpatialiteConnectionProblem, temporary_docker_directory, @@ -147,7 +148,7 @@ def plugins(all, plugins_dir): "-m", "--metadata", type=click.File(mode="r"), - help="Path to JSON file containing metadata to publish", + help="Path to JSON/YAML file containing metadata to publish", ) @click.option("--extra-options", help="Extra options to pass to datasette serve") @click.option("--branch", help="Install datasette from a GitHub branch e.g. master") @@ -281,7 +282,7 @@ def package( "-m", "--metadata", type=click.File(mode="r"), - help="Path to JSON file containing license/source metadata", + help="Path to JSON/YAML file containing license/source metadata", ) @click.option( "--template-dir", @@ -326,6 +327,7 @@ def serve( config, version_note, help_config, + return_instance=False, ): """Serve up specified SQLite database files with a web UI""" if help_config: @@ -354,7 +356,7 @@ def serve( metadata_data = None if metadata: - metadata_data = json.loads(metadata.read()) + metadata_data = parse_metadata(metadata.read()) click.echo( "Serve! files={} (immutables={}) on port {}".format(files, immutable, port) @@ -374,6 +376,10 @@ def serve( memory=memory, version_note=version_note, ) + if return_instance: + # Private utility mechanism for writing unit tests + return ds + # Run async sanity checks - but only if we're not under pytest asyncio.get_event_loop().run_until_complete(check_databases(ds)) diff --git a/datasette/publish/common.py b/datasette/publish/common.py index ac4e2821..2911029d 100644 --- a/datasette/publish/common.py +++ b/datasette/publish/common.py @@ -12,7 +12,7 @@ def add_common_publish_arguments_and_options(subcommand): "-m", "--metadata", type=click.File(mode="r"), - help="Path to JSON file containing metadata to publish", + help="Path to JSON/YAML file containing metadata to publish", ), click.option( "--extra-options", help="Extra options to pass to datasette serve" diff --git a/datasette/publish/heroku.py b/datasette/publish/heroku.py index e75f76df..4db81d8e 100644 --- a/datasette/publish/heroku.py +++ b/datasette/publish/heroku.py @@ -11,7 +11,7 @@ from .common import ( add_common_publish_arguments_and_options, fail_if_publish_binary_not_installed, ) -from datasette.utils import link_or_copy, link_or_copy_directory +from datasette.utils import link_or_copy, link_or_copy_directory, parse_metadata @hookimpl @@ -154,7 +154,7 @@ def temporary_heroku_directory( file_names = [os.path.split(f)[-1] for f in files] if metadata: - metadata_content = json.load(metadata) + metadata_content = parse_metadata(metadata.read()) else: metadata_content = {} for key, value in extra_metadata.items(): diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index be99f890..226efe45 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -13,6 +13,7 @@ import types import shutil import urllib import numbers +import yaml try: import pysqlite3 as sqlite3 @@ -359,7 +360,7 @@ def temporary_docker_directory( file_paths = [os.path.join(saved_cwd, file_path) for file_path in files] file_names = [os.path.split(f)[-1] for f in files] if metadata: - metadata_content = json.load(metadata) + metadata_content = parse_metadata(metadata.read()) else: metadata_content = {} for key, value in extra_metadata.items(): @@ -785,3 +786,18 @@ def check_connection(conn): raise SpatialiteConnectionProblem(e) else: raise ConnectionProblem(e) + + +class BadMetadataError(Exception): + pass + + +def parse_metadata(content): + # content can be JSON or YAML + try: + return json.loads(content) + except json.JSONDecodeError: + try: + return yaml.safe_load(content) + except yaml.YAMLError: + raise BadMetadataError("Metadata is not valid JSON or YAML") diff --git a/docs/datasette-package-help.txt b/docs/datasette-package-help.txt index 51f66f71..326b66cb 100644 --- a/docs/datasette-package-help.txt +++ b/docs/datasette-package-help.txt @@ -8,7 +8,7 @@ Options: -t, --tag TEXT Name for the resulting Docker container, can optionally use name:tag format - -m, --metadata FILENAME Path to JSON file containing metadata to publish + -m, --metadata FILENAME Path to JSON/YAML file containing metadata to publish --extra-options TEXT Extra options to pass to datasette serve --branch TEXT Install datasette from a GitHub branch e.g. master --template-dir DIRECTORY Path to directory containing custom templates diff --git a/docs/datasette-publish-cloudrun-help.txt b/docs/datasette-publish-cloudrun-help.txt index 520e02a5..98fc9c71 100644 --- a/docs/datasette-publish-cloudrun-help.txt +++ b/docs/datasette-publish-cloudrun-help.txt @@ -3,7 +3,7 @@ $ datasette publish cloudrun --help Usage: datasette publish cloudrun [OPTIONS] [FILES]... Options: - -m, --metadata FILENAME Path to JSON file containing metadata to publish + -m, --metadata FILENAME Path to JSON/YAML file containing metadata to publish --extra-options TEXT Extra options to pass to datasette serve --branch TEXT Install datasette from a GitHub branch e.g. master --template-dir DIRECTORY Path to directory containing custom templates diff --git a/docs/datasette-publish-heroku-help.txt b/docs/datasette-publish-heroku-help.txt index a633ba2e..ec157753 100644 --- a/docs/datasette-publish-heroku-help.txt +++ b/docs/datasette-publish-heroku-help.txt @@ -3,7 +3,7 @@ $ datasette publish heroku --help Usage: datasette publish heroku [OPTIONS] [FILES]... Options: - -m, --metadata FILENAME Path to JSON file containing metadata to publish + -m, --metadata FILENAME Path to JSON/YAML file containing metadata to publish --extra-options TEXT Extra options to pass to datasette serve --branch TEXT Install datasette from a GitHub branch e.g. master --template-dir DIRECTORY Path to directory containing custom templates diff --git a/docs/datasette-serve-help.txt b/docs/datasette-serve-help.txt index c0b33c54..5265c294 100644 --- a/docs/datasette-serve-help.txt +++ b/docs/datasette-serve-help.txt @@ -21,7 +21,7 @@ Options: --cors Enable CORS by serving Access-Control-Allow-Origin: * --load-extension PATH Path to a SQLite extension to load --inspect-file TEXT Path to JSON file created using "datasette inspect" - -m, --metadata FILENAME Path to JSON file containing license/source metadata + -m, --metadata FILENAME Path to JSON/YAML file containing license/source metadata --template-dir DIRECTORY Path to directory containing custom templates --plugins-dir DIRECTORY Path to directory containing custom plugins --static MOUNT:DIRECTORY Serve static files from this directory at /MOUNT/... diff --git a/docs/metadata.rst b/docs/metadata.rst index 54008550..3cb1f739 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -11,7 +11,9 @@ Run Datasette like this:: datasette database1.db database2.db --metadata metadata.json -Your ``metadata.json`` file can look something like this:: +Your ``metadata.json`` file can look something like this: + +.. code-block:: json { "title": "Custom title for your index page", @@ -22,6 +24,8 @@ Your ``metadata.json`` file can look something like this:: "source_url": "http://example.com/" } +You can optionally use YAML instead of JSON, see :ref:`metadata_yaml`. + The above metadata will be displayed on the index page of your Datasette-powered site. The source and license information will also be included in the footer of every page served by Datasette. @@ -37,7 +41,9 @@ Metadata at the top level of the JSON will be shown on the index page and in the footer on every page of the site. The license and source is expected to apply to all of your data. -You can also provide metadata at the per-database or per-table level, like this:: +You can also provide metadata at the per-database or per-table level, like this: + +.. code-block:: json { "databases": { @@ -78,7 +84,9 @@ Specifying units for a column Datasette supports attaching units to a column, which will be used when displaying values from that column. SI prefixes will be used where appropriate. -Column units are configured in the metadata like so:: +Column units are configured in the metadata like so: + +.. code-block:: json { "databases": { @@ -97,7 +105,9 @@ Column units are configured in the metadata like so:: Units are interpreted using Pint_, and you can see the full list of available units in Pint's `unit registry`_. You can also add `custom units`_ to the metadata, which will be -registered with Pint:: +registered with Pint: + +.. code-block:: json { "custom_units": [ @@ -114,7 +124,9 @@ registered with Pint:: Setting a default sort order ---------------------------- -By default Datasette tables are sorted by primary key. You can over-ride this default for a specific table using the ``"sort"`` or ``"sort_desc"`` metadata properties:: +By default Datasette tables are sorted by primary key. You can over-ride this default for a specific table using the ``"sort"`` or ``"sort_desc"`` metadata properties: + +.. code-block:: json { "databases": { @@ -128,7 +140,9 @@ By default Datasette tables are sorted by primary key. You can over-ride this de } } -Or use ``"sort_desc"`` to sort in descending order:: +Or use ``"sort_desc"`` to sort in descending order: + +.. code-block:: json { "databases": { @@ -149,7 +163,9 @@ Setting which columns can be used for sorting Datasette allows any column to be used for sorting by default. If you need to control which columns are available for sorting you can do so using the optional -``sortable_columns`` key:: +``sortable_columns`` key: + +.. code-block:: json { "databases": { @@ -171,7 +187,9 @@ This will restrict sorting of ``example_table`` to just the ``height`` and You can also disable sorting entirely by setting ``"sortable_columns": []`` -By default, database views in Datasette do not support sorting. You can use ``sortable_columns`` to enable specific sort orders for a view called ``name_of_view`` in the database ``my_database`` like so:: +By default, database views in Datasette do not support sorting. You can use ``sortable_columns`` to enable specific sort orders for a view called ``name_of_view`` in the database ``my_database`` like so: + +.. code-block:: json { "databases": { @@ -199,7 +217,9 @@ two columns: a primary key column and one other. It assumes that the second column should be used as the link label. If your table has more than two columns you can specify which column should be -used for the link label with the ``label_column`` property:: +used for the link label with the ``label_column`` property: + +.. code-block:: json { "databases": { @@ -217,7 +237,9 @@ Hiding tables ------------- You can hide tables from the database listing view (in the same way that FTS and -Spatialite tables are automatically hidden) using ``"hidden": true``:: +Spatialite tables are automatically hidden) using ``"hidden": true``: + +.. code-block:: json { "databases": { @@ -230,3 +252,41 @@ Spatialite tables are automatically hidden) using ``"hidden": true``:: } } } + +.. _metadata_yaml: + +Using YAML for metadata +----------------------- + +Datasette accepts YAML as an alternative to JSON for your metadata configuration file. YAML is particularly useful for including multiline HTML and SQL strings. + +Here's an example of a ``metadata.yml`` file, re-using an example from :ref:`canned_queries`. + +.. code-block:: yaml + + title: Demonstrating Metadata from YAML + description_html: |- +

This description includes a long HTML string

+ + license: ODbL + license_url: https://opendatacommons.org/licenses/odbl/ + databases: + fixtures: + tables: + no_primary_key: + hidden: true + queries: + neighborhood_search: + sql: |- + select neighborhood, facet_cities.name, state + from facetable join facet_cities on facetable.city_id = facet_cities.id + where neighborhood like '%' || :text || '%' order by neighborhood; + title: Search neighborhoods + description_html: |- +

This demonstrates simple LIKE search + +The ``metadata.yml`` file is passed to Datasette using the same ``--metadata`` option:: + + datasette fixtures.db --metadata metadata.yml diff --git a/setup.py b/setup.py index 85c935a5..7b6422fe 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ setup( "uvicorn~=0.11", "aiofiles~=0.4.0", "janus~=0.4.0", + "PyYAML~=5.3", ], entry_points=""" [console_scripts] diff --git a/tests/test_cli.py b/tests/test_cli.py index d1ab6522..ac5746c6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,8 +1,10 @@ -from .fixtures import app_client, make_app_client -from datasette.cli import cli +from .fixtures import app_client, make_app_client, TestClient as _TestClient +from datasette.cli import cli, serve from click.testing import CliRunner -import pathlib +import io import json +import pathlib +import textwrap def test_inspect_cli(app_client): @@ -46,3 +48,38 @@ def test_spatialite_error_if_attempt_to_open_spatialite(): ) assert result.exit_code != 0 assert "trying to load a SpatiaLite database" in result.output + + +def test_metadata_yaml(): + yaml_file = io.StringIO( + textwrap.dedent( + """ + title: Hello from YAML + """ + ) + ) + # Annoyingly we have to provide all default arguments here: + ds = serve.callback( + [], + metadata=yaml_file, + immutable=[], + host="127.0.0.1", + port=8001, + debug=False, + reload=False, + cors=False, + sqlite_extensions=[], + inspect_file=None, + template_dir=None, + plugins_dir=None, + static=[], + memory=False, + config=[], + version_note=None, + help_config=False, + return_instance=True, + ) + client = _TestClient(ds.app()) + client.ds = ds + response = client.get("/-/metadata.json") + assert {"title": "Hello from YAML"} == response.json diff --git a/tests/test_publish_cloudrun.py b/tests/test_publish_cloudrun.py index efac7bbe..bb51b76b 100644 --- a/tests/test_publish_cloudrun.py +++ b/tests/test_publish_cloudrun.py @@ -146,12 +146,15 @@ def test_publish_cloudrun_plugin_secrets(mock_call, mock_output, mock_which): runner = CliRunner() with runner.isolated_filesystem(): open("test.db", "w").write("data") + open("metadata.yml", "w").write("title: Hello from metadata YAML") result = runner.invoke( cli.cli, [ "publish", "cloudrun", "test.db", + "--metadata", + "metadata.yml", "--service", "datasette", "--plugin-secret", @@ -183,9 +186,10 @@ CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data .strip() ) assert { + "title": "Hello from metadata YAML", "plugins": { "datasette-auth-github": { "client_id": {"$env": "DATASETTE_AUTH_GITHUB_CLIENT_ID"} } - } + }, } == json.loads(metadata)