Compare commits

...

1 commit

Author SHA1 Message Date
Claude
109cba4a30
Remove datasette publish and datasette package commands entirely
These features are being extracted into a plugin. This removes:

- The `datasette publish` command group (cloudrun and heroku subcommands)
- The `datasette package` command
- The `publish_subcommand` plugin hook from hookspecs
- The `datasette.publish` module (cloudrun, heroku, common)
- Docker-related utilities: `make_dockerfile`, `temporary_docker_directory`,
  `link_or_copy`, `link_or_copy_directory`, `APT_GET_DOCKERFILE_EXTRAS`
- All publish/package tests
- All publish/package documentation

https://claude.ai/code/session_01Ty3D3TwBAaic9vVXAr3VU5
2026-02-11 18:35:51 +00:00
22 changed files with 3 additions and 2139 deletions

View file

@ -9,8 +9,6 @@ import json
import os
import pathlib
from runpy import run_module
import shutil
from subprocess import call
import sys
import textwrap
import webbrowser
@ -32,7 +30,6 @@ from .utils import (
SpatialiteConnectionProblem,
initial_path_for_datasette,
pairs_to_nested_config,
temporary_docker_directory,
value_as_boolean,
SpatialiteNotFound,
StaticMount,
@ -171,16 +168,6 @@ async def inspect_(files, sqlite_extensions):
return data
@cli.group()
def publish():
"""Publish specified SQLite database files to the internet along with a Datasette-powered interface and API"""
pass
# Register publish plugins
pm.hook.publish_subcommand(publish=publish)
@cli.command()
@click.option("--all", help="Include built-in default plugins", is_flag=True)
@click.option(
@ -202,112 +189,6 @@ def plugins(all, requirements, plugins_dir):
click.echo(json.dumps(app._plugins(all=all), indent=4))
@cli.command()
@click.argument("files", type=click.Path(exists=True), nargs=-1, required=True)
@click.option(
"-t",
"--tag",
help="Name for the resulting Docker container, can optionally use name:tag format",
)
@click.option(
"-m",
"--metadata",
type=click.File(mode="r"),
help="Path to JSON/YAML file containing metadata to publish",
)
@click.option("--extra-options", help="Extra options to pass to datasette serve")
@click.option("--branch", help="Install datasette from a GitHub branch e.g. main")
@click.option(
"--template-dir",
type=click.Path(exists=True, file_okay=False, dir_okay=True),
help="Path to directory containing custom templates",
)
@click.option(
"--plugins-dir",
type=click.Path(exists=True, file_okay=False, dir_okay=True),
help="Path to directory containing custom plugins",
)
@click.option(
"--static",
type=StaticMount(),
help="Serve static files from this directory at /MOUNT/...",
multiple=True,
)
@click.option(
"--install", help="Additional packages (e.g. plugins) to install", multiple=True
)
@click.option("--spatialite", is_flag=True, help="Enable SpatialLite extension")
@click.option("--version-note", help="Additional note to show on /-/versions")
@click.option(
"--secret",
help="Secret used for signing secure values, such as signed cookies",
envvar="DATASETTE_PUBLISH_SECRET",
default=lambda: os.urandom(32).hex(),
)
@click.option(
"-p",
"--port",
default=8001,
type=click.IntRange(1, 65535),
help="Port to run the server on, defaults to 8001",
)
@click.option("--title", help="Title for metadata")
@click.option("--license", help="License label for metadata")
@click.option("--license_url", help="License URL for metadata")
@click.option("--source", help="Source label for metadata")
@click.option("--source_url", help="Source URL for metadata")
@click.option("--about", help="About label for metadata")
@click.option("--about_url", help="About URL for metadata")
def package(
files,
tag,
metadata,
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
spatialite,
version_note,
secret,
port,
**extra_metadata,
):
"""Package SQLite files into a Datasette Docker container"""
if not shutil.which("docker"):
click.secho(
' The package command requires "docker" to be installed and configured ',
bg="red",
fg="white",
bold=True,
err=True,
)
sys.exit(1)
with temporary_docker_directory(
files,
"datasette",
metadata=metadata,
extra_options=extra_options,
branch=branch,
template_dir=template_dir,
plugins_dir=plugins_dir,
static=static,
install=install,
spatialite=spatialite,
version_note=version_note,
secret=secret,
extra_metadata=extra_metadata,
port=port,
):
args = ["docker", "build"]
if tag:
args.append("-t")
args.append(tag)
args.append(".")
call(args)
@cli.command()
@click.argument("packages", nargs=-1)
@click.option(

View file

@ -49,11 +49,6 @@ def extra_template_vars(
"""Extra template variables to be made available to the template - can return dict or callable or awaitable"""
@hookspec
def publish_subcommand(publish):
"""Subcommands for 'datasette publish'"""
@hookspec
def render_cell(row, value, column, table, database, datasette, request):
"""Customize rendering of HTML table cell values"""

View file

@ -16,8 +16,6 @@ else:
DEFAULT_PLUGINS = (
"datasette.publish.heroku",
"datasette.publish.cloudrun",
"datasette.facets",
"datasette.filters",
"datasette.sql_functions",

View file

@ -1,313 +0,0 @@
from datasette import hookimpl
import click
import json
import os
import re
from subprocess import CalledProcessError, check_call, check_output
from .common import (
add_common_publish_arguments_and_options,
fail_if_publish_binary_not_installed,
)
from ..utils import temporary_docker_directory
@hookimpl
def publish_subcommand(publish):
@publish.command()
@add_common_publish_arguments_and_options
@click.option(
"-n",
"--name",
default="datasette",
help="Application name to use when building",
)
@click.option(
"--service",
default="",
help="Cloud Run service to deploy (or over-write)",
)
@click.option("--spatialite", is_flag=True, help="Enable SpatialLite extension")
@click.option(
"--show-files",
is_flag=True,
help="Output the generated Dockerfile and metadata.json",
)
@click.option(
"--memory",
callback=_validate_memory,
help="Memory to allocate in Cloud Run, e.g. 1Gi",
)
@click.option(
"--cpu",
type=click.Choice(["1", "2", "4"]),
help="Number of vCPUs to allocate in Cloud Run",
)
@click.option(
"--timeout",
type=int,
help="Build timeout in seconds",
)
@click.option(
"--apt-get-install",
"apt_get_extras",
multiple=True,
help="Additional packages to apt-get install",
)
@click.option(
"--max-instances",
type=int,
default=1,
show_default=True,
help="Maximum Cloud Run instances (use 0 to remove the limit)",
)
@click.option(
"--min-instances",
type=int,
help="Minimum Cloud Run instances",
)
@click.option(
"--artifact-repository",
default="datasette",
show_default=True,
help="Artifact Registry repository to store the image",
)
@click.option(
"--artifact-region",
default="us",
show_default=True,
help="Artifact Registry location (region or multi-region)",
)
@click.option(
"--artifact-project",
default=None,
help="Project ID for Artifact Registry (defaults to the active project)",
)
def cloudrun(
files,
metadata,
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
plugin_secret,
version_note,
secret,
title,
license,
license_url,
source,
source_url,
about,
about_url,
name,
service,
spatialite,
show_files,
memory,
cpu,
timeout,
apt_get_extras,
max_instances,
min_instances,
artifact_repository,
artifact_region,
artifact_project,
):
"Publish databases to Datasette running on Cloud Run"
fail_if_publish_binary_not_installed(
"gcloud", "Google Cloud", "https://cloud.google.com/sdk/"
)
project = check_output(
"gcloud config get-value project", shell=True, universal_newlines=True
).strip()
artifact_project = artifact_project or project
# Ensure Artifact Registry exists for the target image
_ensure_artifact_registry(
artifact_project=artifact_project,
artifact_region=artifact_region,
artifact_repository=artifact_repository,
)
artifact_host = (
artifact_region
if artifact_region.endswith("-docker.pkg.dev")
else f"{artifact_region}-docker.pkg.dev"
)
if not service:
# Show the user their current services, then prompt for one
click.echo("Please provide a service name for this deployment\n")
click.echo("Using an existing service name will over-write it")
click.echo("")
existing_services = get_existing_services()
if existing_services:
click.echo("Your existing services:\n")
for existing_service in existing_services:
click.echo(
" {name} - created {created} - {url}".format(
**existing_service
)
)
click.echo("")
service = click.prompt("Service name", type=str)
image_id = (
f"{artifact_host}/{artifact_project}/"
f"{artifact_repository}/datasette-{service}"
)
extra_metadata = {
"title": title,
"license": license,
"license_url": license_url,
"source": source,
"source_url": source_url,
"about": about,
"about_url": about_url,
}
if not extra_options:
extra_options = ""
if "force_https_urls" not in extra_options:
if extra_options:
extra_options += " "
extra_options += "--setting force_https_urls on"
environment_variables = {}
if plugin_secret:
extra_metadata["plugins"] = {}
for plugin_name, plugin_setting, setting_value in plugin_secret:
environment_variable = (
f"{plugin_name}_{plugin_setting}".upper().replace("-", "_")
)
environment_variables[environment_variable] = setting_value
extra_metadata["plugins"].setdefault(plugin_name, {})[
plugin_setting
] = {"$env": environment_variable}
with temporary_docker_directory(
files,
name,
metadata,
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
spatialite,
version_note,
secret,
extra_metadata,
environment_variables,
apt_get_extras=apt_get_extras,
):
if show_files:
if os.path.exists("metadata.json"):
print("=== metadata.json ===\n")
with open("metadata.json") as fp:
print(fp.read())
print("\n==== Dockerfile ====\n")
with open("Dockerfile") as fp:
print(fp.read())
print("\n====================\n")
check_call(
"gcloud builds submit --tag {}{}".format(
image_id, " --timeout {}".format(timeout) if timeout else ""
),
shell=True,
)
extra_deploy_options = []
for option, value in (
("--memory", memory),
("--cpu", cpu),
("--max-instances", max_instances),
("--min-instances", min_instances),
):
if value is not None:
extra_deploy_options.append("{} {}".format(option, value))
check_call(
"gcloud run deploy --allow-unauthenticated --platform=managed --image {} {}{}".format(
image_id,
service,
" " + " ".join(extra_deploy_options) if extra_deploy_options else "",
),
shell=True,
)
def _ensure_artifact_registry(artifact_project, artifact_region, artifact_repository):
"""Ensure Artifact Registry API is enabled and the repository exists."""
enable_cmd = (
"gcloud services enable artifactregistry.googleapis.com "
f"--project {artifact_project} --quiet"
)
try:
check_call(enable_cmd, shell=True)
except CalledProcessError as exc:
raise click.ClickException(
"Failed to enable artifactregistry.googleapis.com. "
"Please ensure you have permissions to manage services."
) from exc
describe_cmd = (
"gcloud artifacts repositories describe {repo} --project {project} "
"--location {location} --quiet"
).format(
repo=artifact_repository,
project=artifact_project,
location=artifact_region,
)
try:
check_call(describe_cmd, shell=True)
return
except CalledProcessError:
create_cmd = (
"gcloud artifacts repositories create {repo} --repository-format=docker "
'--location {location} --project {project} --description "Datasette Cloud Run images" --quiet'
).format(
repo=artifact_repository,
location=artifact_region,
project=artifact_project,
)
try:
check_call(create_cmd, shell=True)
click.echo(f"Created Artifact Registry repository '{artifact_repository}'")
except CalledProcessError as exc:
raise click.ClickException(
"Failed to create Artifact Registry repository. "
"Use --artifact-repository/--artifact-region to point to an existing repo "
"or create one manually."
) from exc
def get_existing_services():
services = json.loads(
check_output(
"gcloud run services list --platform=managed --format json",
shell=True,
universal_newlines=True,
)
)
return [
{
"name": service["metadata"]["name"],
"created": service["metadata"]["creationTimestamp"],
"url": service["status"]["address"]["url"],
}
for service in services
if "url" in service["status"]
]
def _validate_memory(ctx, param, value):
if value and re.match(r"^\d+(Gi|G|Mi|M)$", value) is None:
raise click.BadParameter("--memory should be a number then Gi/G/Mi/M e.g 1Gi")
return value

View file

@ -1,98 +0,0 @@
from ..utils import StaticMount
import click
import os
import shutil
import sys
def add_common_publish_arguments_and_options(subcommand):
for decorator in reversed(
(
click.argument("files", type=click.Path(exists=True), nargs=-1),
click.option(
"-m",
"--metadata",
type=click.File(mode="r"),
help="Path to JSON/YAML file containing metadata to publish",
),
click.option(
"--extra-options", help="Extra options to pass to datasette serve"
),
click.option(
"--branch", help="Install datasette from a GitHub branch e.g. main"
),
click.option(
"--template-dir",
type=click.Path(exists=True, file_okay=False, dir_okay=True),
help="Path to directory containing custom templates",
),
click.option(
"--plugins-dir",
type=click.Path(exists=True, file_okay=False, dir_okay=True),
help="Path to directory containing custom plugins",
),
click.option(
"--static",
type=StaticMount(),
help="Serve static files from this directory at /MOUNT/...",
multiple=True,
),
click.option(
"--install",
help="Additional packages (e.g. plugins) to install",
multiple=True,
),
click.option(
"--plugin-secret",
nargs=3,
type=(str, str, str),
callback=validate_plugin_secret,
multiple=True,
help="Secrets to pass to plugins, e.g. --plugin-secret datasette-auth-github client_id xxx",
),
click.option(
"--version-note", help="Additional note to show on /-/versions"
),
click.option(
"--secret",
help="Secret used for signing secure values, such as signed cookies",
envvar="DATASETTE_PUBLISH_SECRET",
default=lambda: os.urandom(32).hex(),
),
click.option("--title", help="Title for metadata"),
click.option("--license", help="License label for metadata"),
click.option("--license_url", help="License URL for metadata"),
click.option("--source", help="Source label for metadata"),
click.option("--source_url", help="Source URL for metadata"),
click.option("--about", help="About label for metadata"),
click.option("--about_url", help="About URL for metadata"),
)
):
subcommand = decorator(subcommand)
return subcommand
def fail_if_publish_binary_not_installed(binary, publish_target, install_link):
"""Exit (with error message) if ``binary` isn't installed"""
if not shutil.which(binary):
click.secho(
"Publishing to {publish_target} requires {binary} to be installed and configured".format(
publish_target=publish_target, binary=binary
),
bg="red",
fg="white",
bold=True,
err=True,
)
click.echo(
f"Follow the instructions at {install_link}",
err=True,
)
sys.exit(1)
def validate_plugin_secret(ctx, param, value):
for plugin_name, plugin_setting, setting_value in value:
if "'" in setting_value:
raise click.BadParameter("--plugin-secret cannot contain single quotes")
return value

View file

@ -1,252 +0,0 @@
from contextlib import contextmanager
from datasette import hookimpl
import click
import json
import os
import pathlib
import shlex
import shutil
from subprocess import call, check_output
import tempfile
from .common import (
add_common_publish_arguments_and_options,
fail_if_publish_binary_not_installed,
)
from datasette.utils import link_or_copy, link_or_copy_directory, parse_metadata
@hookimpl
def publish_subcommand(publish):
@publish.command()
@add_common_publish_arguments_and_options
@click.option(
"-n",
"--name",
default="datasette",
help="Application name to use when deploying",
)
@click.option(
"--tar",
help="--tar option to pass to Heroku, e.g. --tar=/usr/local/bin/gtar",
)
@click.option(
"--generate-dir",
type=click.Path(dir_okay=True, file_okay=False),
help="Output generated application files and stop without deploying",
)
def heroku(
files,
metadata,
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
plugin_secret,
version_note,
secret,
title,
license,
license_url,
source,
source_url,
about,
about_url,
name,
tar,
generate_dir,
):
"Publish databases to Datasette running on Heroku"
fail_if_publish_binary_not_installed(
"heroku", "Heroku", "https://cli.heroku.com"
)
# Check for heroku-builds plugin
plugins = [
line.split()[0] for line in check_output(["heroku", "plugins"]).splitlines()
]
if b"heroku-builds" not in plugins:
click.echo(
"Publishing to Heroku requires the heroku-builds plugin to be installed."
)
click.confirm(
"Install it? (this will run `heroku plugins:install heroku-builds`)",
abort=True,
)
call(["heroku", "plugins:install", "heroku-builds"])
extra_metadata = {
"title": title,
"license": license,
"license_url": license_url,
"source": source,
"source_url": source_url,
"about": about,
"about_url": about_url,
}
environment_variables = {}
if plugin_secret:
extra_metadata["plugins"] = {}
for plugin_name, plugin_setting, setting_value in plugin_secret:
environment_variable = (
f"{plugin_name}_{plugin_setting}".upper().replace("-", "_")
)
environment_variables[environment_variable] = setting_value
extra_metadata["plugins"].setdefault(plugin_name, {})[
plugin_setting
] = {"$env": environment_variable}
with temporary_heroku_directory(
files,
name,
metadata,
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
version_note,
secret,
extra_metadata,
):
if generate_dir:
# Recursively copy files from current working directory to it
if pathlib.Path(generate_dir).exists():
raise click.ClickException("Directory already exists")
shutil.copytree(".", generate_dir)
click.echo(
f"Generated files written to {generate_dir}, stopping without deploying",
err=True,
)
return
app_name = None
if name:
# Check to see if this app already exists
list_output = check_output(["heroku", "apps:list", "--json"]).decode(
"utf8"
)
apps = json.loads(list_output)
for app in apps:
if app["name"] == name:
app_name = name
break
if not app_name:
# Create a new app
cmd = ["heroku", "apps:create"]
if name:
cmd.append(name)
cmd.append("--json")
create_output = check_output(cmd).decode("utf8")
app_name = json.loads(create_output)["name"]
for key, value in environment_variables.items():
call(["heroku", "config:set", "-a", app_name, f"{key}={value}"])
tar_option = []
if tar:
tar_option = ["--tar", tar]
call(
["heroku", "builds:create", "-a", app_name, "--include-vcs-ignore"]
+ tar_option
)
@contextmanager
def temporary_heroku_directory(
files,
name,
metadata,
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
version_note,
secret,
extra_metadata=None,
):
extra_metadata = extra_metadata or {}
tmp = tempfile.TemporaryDirectory()
saved_cwd = os.getcwd()
file_paths = [os.path.join(saved_cwd, file_path) for file_path in files]
file_names = [os.path.split(f)[-1] for f in files]
if metadata:
metadata_content = parse_metadata(metadata.read())
else:
metadata_content = {}
for key, value in extra_metadata.items():
if value:
metadata_content[key] = value
try:
os.chdir(tmp.name)
if metadata_content:
with open("metadata.json", "w") as fp:
fp.write(json.dumps(metadata_content, indent=2))
with open("runtime.txt", "w") as fp:
fp.write("python-3.11.0")
if branch:
install = [
f"https://github.com/simonw/datasette/archive/{branch}.zip"
] + list(install)
else:
install = ["datasette"] + list(install)
with open("requirements.txt", "w") as fp:
fp.write("\n".join(install))
os.mkdir("bin")
with open("bin/post_compile", "w") as fp:
fp.write("datasette inspect --inspect-file inspect-data.json")
extras = []
if template_dir:
link_or_copy_directory(
os.path.join(saved_cwd, template_dir),
os.path.join(tmp.name, "templates"),
)
extras.extend(["--template-dir", "templates/"])
if plugins_dir:
link_or_copy_directory(
os.path.join(saved_cwd, plugins_dir), os.path.join(tmp.name, "plugins")
)
extras.extend(["--plugins-dir", "plugins/"])
if version_note:
extras.extend(["--version-note", version_note])
if metadata_content:
extras.extend(["--metadata", "metadata.json"])
if extra_options:
extras.extend(extra_options.split())
for mount_point, path in static:
link_or_copy_directory(
os.path.join(saved_cwd, path), os.path.join(tmp.name, mount_point)
)
extras.extend(["--static", f"{mount_point}:{mount_point}"])
quoted_files = " ".join(
["-i {}".format(shlex.quote(file_name)) for file_name in file_names]
)
procfile_cmd = "web: datasette serve --host 0.0.0.0 {quoted_files} --cors --port $PORT --inspect-file inspect-data.json {extras}".format(
quoted_files=quoted_files, extras=" ".join(extras)
)
with open("Procfile", "w") as fp:
fp.write(procfile_cmd)
for path, filename in zip(file_paths, file_names):
link_or_copy(path, os.path.join(tmp.name, filename))
yield
finally:
tmp.cleanup()
os.chdir(saved_cwd)

View file

@ -10,20 +10,15 @@ import hashlib
import inspect
import json
import markupsafe
import mergedeep
import os
import re
import shlex
import tempfile
import typing
import time
import types
import secrets
import shutil
from typing import Iterable, List, Tuple
import urllib
import yaml
from .shutil_backport import copytree
from .sqlite import sqlite3, supports_table_xinfo
if typing.TYPE_CHECKING:
@ -99,12 +94,6 @@ reserved_words = set(
).split()
)
APT_GET_DOCKERFILE_EXTRAS = r"""
RUN apt-get update && \
apt-get install -y {} && \
rm -rf /var/lib/apt/lists/*
"""
# Can replace with sqlite-utils when I add that dependency
SPATIALITE_PATHS = (
"/usr/lib/x86_64-linux-gnu/mod_spatialite.so",
@ -408,172 +397,6 @@ def escape_sqlite(s):
return f"[{s}]"
def make_dockerfile(
files,
metadata_file,
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
spatialite,
version_note,
secret,
environment_variables=None,
port=8001,
apt_get_extras=None,
):
cmd = ["datasette", "serve", "--host", "0.0.0.0"]
environment_variables = environment_variables or {}
environment_variables["DATASETTE_SECRET"] = secret
apt_get_extras = apt_get_extras or []
for filename in files:
cmd.extend(["-i", filename])
cmd.extend(["--cors", "--inspect-file", "inspect-data.json"])
if metadata_file:
cmd.extend(["--metadata", f"{metadata_file}"])
if template_dir:
cmd.extend(["--template-dir", "templates/"])
if plugins_dir:
cmd.extend(["--plugins-dir", "plugins/"])
if version_note:
cmd.extend(["--version-note", f"{version_note}"])
if static:
for mount_point, _ in static:
cmd.extend(["--static", f"{mount_point}:{mount_point}"])
if extra_options:
for opt in extra_options.split():
cmd.append(f"{opt}")
cmd = [shlex.quote(part) for part in cmd]
# port attribute is a (fixed) env variable and should not be quoted
cmd.extend(["--port", "$PORT"])
cmd = " ".join(cmd)
if branch:
install = [f"https://github.com/simonw/datasette/archive/{branch}.zip"] + list(
install
)
else:
install = ["datasette"] + list(install)
apt_get_extras_ = []
apt_get_extras_.extend(apt_get_extras)
apt_get_extras = apt_get_extras_
if spatialite:
apt_get_extras.extend(["python3-dev", "gcc", "libsqlite3-mod-spatialite"])
environment_variables["SQLITE_EXTENSIONS"] = (
"/usr/lib/x86_64-linux-gnu/mod_spatialite.so"
)
return """
FROM python:3.11.0-slim-bullseye
COPY . /app
WORKDIR /app
{apt_get_extras}
{environment_variables}
RUN pip install -U {install_from}
RUN datasette inspect {files} --inspect-file inspect-data.json
ENV PORT {port}
EXPOSE {port}
CMD {cmd}""".format(
apt_get_extras=(
APT_GET_DOCKERFILE_EXTRAS.format(" ".join(apt_get_extras))
if apt_get_extras
else ""
),
environment_variables="\n".join(
[
"ENV {} '{}'".format(key, value)
for key, value in environment_variables.items()
]
),
install_from=" ".join(install),
files=" ".join(files),
port=port,
cmd=cmd,
).strip()
@contextmanager
def temporary_docker_directory(
files,
name,
metadata,
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
spatialite,
version_note,
secret,
extra_metadata=None,
environment_variables=None,
port=8001,
apt_get_extras=None,
):
extra_metadata = extra_metadata or {}
tmp = tempfile.TemporaryDirectory()
# We create a datasette folder in there to get a nicer now deploy name
datasette_dir = os.path.join(tmp.name, name)
os.mkdir(datasette_dir)
saved_cwd = os.getcwd()
file_paths = [os.path.join(saved_cwd, file_path) for file_path in files]
file_names = [os.path.split(f)[-1] for f in files]
if metadata:
metadata_content = parse_metadata(metadata.read())
else:
metadata_content = {}
# Merge in the non-null values in extra_metadata
mergedeep.merge(
metadata_content,
{key: value for key, value in extra_metadata.items() if value is not None},
)
try:
dockerfile = make_dockerfile(
file_names,
metadata_content and "metadata.json",
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
spatialite,
version_note,
secret,
environment_variables,
port=port,
apt_get_extras=apt_get_extras,
)
os.chdir(datasette_dir)
if metadata_content:
with open("metadata.json", "w") as fp:
fp.write(json.dumps(metadata_content, indent=2))
with open("Dockerfile", "w") as fp:
fp.write(dockerfile)
for path, filename in zip(file_paths, file_names):
link_or_copy(path, os.path.join(datasette_dir, filename))
if template_dir:
link_or_copy_directory(
os.path.join(saved_cwd, template_dir),
os.path.join(datasette_dir, "templates"),
)
if plugins_dir:
link_or_copy_directory(
os.path.join(saved_cwd, plugins_dir),
os.path.join(datasette_dir, "plugins"),
)
for mount_point, path in static:
link_or_copy_directory(
os.path.join(saved_cwd, path), os.path.join(datasette_dir, mount_point)
)
yield datasette_dir
finally:
tmp.cleanup()
os.chdir(saved_cwd)
def detect_primary_keys(conn, table):
"""Figure out primary keys for a table."""
columns = table_column_details(conn, table)
@ -797,23 +620,6 @@ def to_css_class(s):
return "-".join(bits)
def link_or_copy(src, dst):
# Intended for use in populating a temp directory. We link if possible,
# but fall back to copying if the temp directory is on a different device
# https://github.com/simonw/datasette/issues/141
try:
os.link(src, dst)
except OSError:
shutil.copyfile(src, dst)
def link_or_copy_directory(src, dst):
try:
copytree(src, dst, copy_function=os.link, dirs_exist_ok=True)
except OSError:
copytree(src, dst, dirs_exist_ok=True)
def module_from_path(path, name):
# Adapted from http://sayspy.blogspot.com/2011/07/how-to-import-module-from-just-file.html
mod = types.ModuleType(name)

View file

@ -51,9 +51,7 @@ Running ``datasette --help`` shows a list of all of the available commands.
create-token Create a signed API token for the specified actor ID
inspect Generate JSON summary of provided database files
install Install plugins and packages from PyPI into the same...
package Package SQLite files into a Datasette Docker container
plugins List currently installed plugins
publish Publish specified SQLite database files to the internet...
uninstall Uninstall plugins and Python packages from the Datasette...
@ -409,200 +407,6 @@ Uninstall one or more plugins.
.. [[[end]]]
.. _cli_help_publish___help:
datasette publish
=================
Shows a list of available deployment targets for :ref:`publishing data <publishing>` with Datasette.
Additional deployment targets can be added by plugins that use the :ref:`plugin_hook_publish_subcommand` hook.
.. [[[cog
help(["publish", "--help"])
.. ]]]
::
Usage: datasette publish [OPTIONS] COMMAND [ARGS]...
Publish specified SQLite database files to the internet along with a
Datasette-powered interface and API
Options:
--help Show this message and exit.
Commands:
cloudrun Publish databases to Datasette running on Cloud Run
heroku Publish databases to Datasette running on Heroku
.. [[[end]]]
.. _cli_help_publish_cloudrun___help:
datasette publish cloudrun
==========================
See :ref:`publish_cloud_run`.
.. [[[cog
help(["publish", "cloudrun", "--help"])
.. ]]]
::
Usage: datasette publish cloudrun [OPTIONS] [FILES]...
Publish databases to Datasette running on Cloud Run
Options:
-m, --metadata FILENAME Path to JSON/YAML file containing metadata to
publish
--extra-options TEXT Extra options to pass to datasette serve
--branch TEXT Install datasette from a GitHub branch e.g.
main
--template-dir DIRECTORY Path to directory containing custom templates
--plugins-dir DIRECTORY Path to directory containing custom plugins
--static MOUNT:DIRECTORY Serve static files from this directory at
/MOUNT/...
--install TEXT Additional packages (e.g. plugins) to install
--plugin-secret <TEXT TEXT TEXT>...
Secrets to pass to plugins, e.g. --plugin-
secret datasette-auth-github client_id xxx
--version-note TEXT Additional note to show on /-/versions
--secret TEXT Secret used for signing secure values, such as
signed cookies
--title TEXT Title for metadata
--license TEXT License label for metadata
--license_url TEXT License URL for metadata
--source TEXT Source label for metadata
--source_url TEXT Source URL for metadata
--about TEXT About label for metadata
--about_url TEXT About URL for metadata
-n, --name TEXT Application name to use when building
--service TEXT Cloud Run service to deploy (or over-write)
--spatialite Enable SpatialLite extension
--show-files Output the generated Dockerfile and
metadata.json
--memory TEXT Memory to allocate in Cloud Run, e.g. 1Gi
--cpu [1|2|4] Number of vCPUs to allocate in Cloud Run
--timeout INTEGER Build timeout in seconds
--apt-get-install TEXT Additional packages to apt-get install
--max-instances INTEGER Maximum Cloud Run instances (use 0 to remove
the limit) [default: 1]
--min-instances INTEGER Minimum Cloud Run instances
--artifact-repository TEXT Artifact Registry repository to store the
image [default: datasette]
--artifact-region TEXT Artifact Registry location (region or multi-
region) [default: us]
--artifact-project TEXT Project ID for Artifact Registry (defaults to
the active project)
--help Show this message and exit.
.. [[[end]]]
.. _cli_help_publish_heroku___help:
datasette publish heroku
========================
See :ref:`publish_heroku`.
.. [[[cog
help(["publish", "heroku", "--help"])
.. ]]]
::
Usage: datasette publish heroku [OPTIONS] [FILES]...
Publish databases to Datasette running on Heroku
Options:
-m, --metadata FILENAME Path to JSON/YAML file containing metadata to
publish
--extra-options TEXT Extra options to pass to datasette serve
--branch TEXT Install datasette from a GitHub branch e.g.
main
--template-dir DIRECTORY Path to directory containing custom templates
--plugins-dir DIRECTORY Path to directory containing custom plugins
--static MOUNT:DIRECTORY Serve static files from this directory at
/MOUNT/...
--install TEXT Additional packages (e.g. plugins) to install
--plugin-secret <TEXT TEXT TEXT>...
Secrets to pass to plugins, e.g. --plugin-
secret datasette-auth-github client_id xxx
--version-note TEXT Additional note to show on /-/versions
--secret TEXT Secret used for signing secure values, such as
signed cookies
--title TEXT Title for metadata
--license TEXT License label for metadata
--license_url TEXT License URL for metadata
--source TEXT Source label for metadata
--source_url TEXT Source URL for metadata
--about TEXT About label for metadata
--about_url TEXT About URL for metadata
-n, --name TEXT Application name to use when deploying
--tar TEXT --tar option to pass to Heroku, e.g.
--tar=/usr/local/bin/gtar
--generate-dir DIRECTORY Output generated application files and stop
without deploying
--help Show this message and exit.
.. [[[end]]]
.. _cli_help_package___help:
datasette package
=================
Package SQLite files into a Datasette Docker container, see :ref:`cli_package`.
.. [[[cog
help(["package", "--help"])
.. ]]]
::
Usage: datasette package [OPTIONS] FILES...
Package SQLite files into a Datasette Docker container
Options:
-t, --tag TEXT Name for the resulting Docker container, can
optionally use name:tag format
-m, --metadata FILENAME Path to JSON/YAML file containing metadata to
publish
--extra-options TEXT Extra options to pass to datasette serve
--branch TEXT Install datasette from a GitHub branch e.g. main
--template-dir DIRECTORY Path to directory containing custom templates
--plugins-dir DIRECTORY Path to directory containing custom plugins
--static MOUNT:DIRECTORY Serve static files from this directory at /MOUNT/...
--install TEXT Additional packages (e.g. plugins) to install
--spatialite Enable SpatialLite extension
--version-note TEXT Additional note to show on /-/versions
--secret TEXT Secret used for signing secure values, such as
signed cookies
-p, --port INTEGER RANGE Port to run the server on, defaults to 8001
[1<=x<=65535]
--title TEXT Title for metadata
--license TEXT License label for metadata
--license_url TEXT License URL for metadata
--source TEXT Source label for metadata
--source_url TEXT Source URL for metadata
--about TEXT About label for metadata
--about_url TEXT About URL for metadata
--help Show this message and exit.
.. [[[end]]]
.. _cli_help_inspect___help:
datasette inspect
@ -615,7 +419,7 @@ If you are opening an immutable database, you can pass this file to the ``--insp
datasette inspect mydatabase.db > inspect-data.json
datasette serve -i mydatabase.db --inspect-file inspect-data.json
This performance optimization is used automatically by some of the ``datasette publish`` commands. You are unlikely to need to apply this optimization manually.
You are unlikely to need to apply this optimization manually.
.. [[[cog
help(["inspect", "--help"])

View file

@ -151,17 +151,6 @@ You can reference those files from ``datasette.yaml`` like this, see :ref:`custo
}
.. [[[end]]]
Publishing static assets
~~~~~~~~~~~~~~~~~~~~~~~~
The :ref:`cli_publish` command can be used to publish your static assets,
using the same syntax as above::
datasette publish cloudrun mydb.db --static assets:static-files/
This will upload the contents of the ``static-files/`` directory as part of the
deployment, and configure Datasette to correctly serve the assets from ``/assets/``.
.. _customization_custom_templates:
Custom templates

View file

@ -4,9 +4,7 @@
Deploying Datasette
=====================
The quickest way to deploy a Datasette instance on the internet is to use the ``datasette publish`` command, described in :ref:`publishing`. This can be used to quickly deploy Datasette to a number of hosting providers including Heroku, Google Cloud Run and Vercel.
You can deploy Datasette to other hosting providers using the instructions on this page.
You can deploy Datasette to various hosting providers using the instructions on this page.
.. _deploying_fundamentals:

View file

@ -43,7 +43,6 @@ Contents
ecosystem
cli-reference
pages
publish
deploying
json_api
sql_queries

View file

@ -43,7 +43,7 @@ Then later you can start Datasette against the ``counts.json`` file and use it t
You need to use the ``-i`` immutable mode against the database file here or the counts from the JSON file will be ignored.
You will rarely need to use this optimization in every-day use, but several of the ``datasette publish`` commands described in :ref:`publishing` use this optimization for better performance when deploying a database file to a hosting provider.
You will rarely need to use this optimization in every-day use.
HTTP caching
------------

View file

@ -414,64 +414,6 @@ This will add the following to the end of your page:
Example: `datasette-cluster-map <https://datasette.io/plugins/datasette-cluster-map>`_
.. _plugin_hook_publish_subcommand:
publish_subcommand(publish)
---------------------------
``publish`` - Click publish command group
The Click command group for the ``datasette publish`` subcommand
This hook allows you to create new providers for the ``datasette publish``
command. Datasette uses this hook internally to implement the default ``cloudrun``
and ``heroku`` subcommands, so you can read
`their source <https://github.com/simonw/datasette/tree/main/datasette/publish>`_
to see examples of this hook in action.
Let's say you want to build a plugin that adds a ``datasette publish my_hosting_provider --api_key=xxx mydatabase.db`` publish command. Your implementation would start like this:
.. code-block:: python
from datasette import hookimpl
from datasette.publish.common import (
add_common_publish_arguments_and_options,
)
import click
@hookimpl
def publish_subcommand(publish):
@publish.command()
@add_common_publish_arguments_and_options
@click.option(
"-k",
"--api_key",
help="API key for talking to my hosting provider",
)
def my_hosting_provider(
files,
metadata,
extra_options,
branch,
template_dir,
plugins_dir,
static,
install,
plugin_secret,
version_note,
secret,
title,
license,
license_url,
source,
source_url,
about,
about_url,
api_key,
): ...
Examples: `datasette-publish-fly <https://datasette.io/plugins/datasette-publish-fly>`_, `datasette-publish-vercel <https://datasette.io/plugins/datasette-publish-vercel>`_
.. _plugin_hook_render_cell:
render_cell(row, value, column, table, database, datasette, request)

View file

@ -69,19 +69,6 @@ You can also define one-off per-project plugins by saving them as ``plugin_name.
datasette mydb.db --plugins-dir=plugins/
Deploying plugins using datasette publish
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The ``datasette publish`` and ``datasette package`` commands both take an optional ``--install`` argument. You can use this one or more times to tell Datasette to ``pip install`` specific plugins as part of the process::
datasette publish cloudrun mydb.db --install=datasette-vega
You can use the name of a package on PyPI or any of the other valid arguments to ``pip install`` such as a URL to a ``.zip`` file::
datasette publish cloudrun mydb.db \
--install=https://url-to-my-package.zip
.. _plugins_datasette_load_plugins:
Controlling which plugins are loaded
@ -484,14 +471,6 @@ Some plugins may need configuration that should stay secret - API keys for examp
}
.. [[[end]]]
If you are publishing your data using the :ref:`datasette publish <cli_publish>` family of commands, you can use the ``--plugin-secret`` option to set these secrets at publish time. For example, using Heroku you might run the following command::
datasette publish heroku my_database.db \
--name my-heroku-app-demo \
--install=datasette-auth-github \
--plugin-secret datasette-auth-github client_id your_client_id \
--plugin-secret datasette-auth-github client_secret your_client_secret
This will set the necessary environment variables and add the following to the deployed ``metadata.yaml``:
.. [[[cog

View file

@ -1,187 +0,0 @@
.. _publishing:
=================
Publishing data
=================
Datasette includes tools for publishing and deploying your data to the internet. The ``datasette publish`` command will deploy a new Datasette instance containing your databases directly to a Heroku or Google Cloud hosting account. You can also use ``datasette package`` to create a Docker image that bundles your databases together with the datasette application that is used to serve them.
.. _cli_publish:
datasette publish
=================
Once you have created a SQLite database (e.g. using `csvs-to-sqlite <https://github.com/simonw/csvs-to-sqlite/>`_) you can deploy it to a hosting account using a single command.
You will need a hosting account with `Heroku <https://www.heroku.com/>`__ or `Google Cloud <https://cloud.google.com/>`__. Once you have created your account you will need to install and configure the ``heroku`` or ``gcloud`` command-line tools.
.. _publish_cloud_run:
Publishing to Google Cloud Run
------------------------------
`Google Cloud Run <https://cloud.google.com/run/>`__ allows you to publish data in a scale-to-zero environment, so your application will start running when the first request is received and will shut down again when traffic ceases. This means you only pay for time spent serving traffic.
.. warning::
Cloud Run is a great option for inexpensively hosting small, low traffic projects - but costs can add up for projects that serve a lot of requests.
Be particularly careful if your project has tables with large numbers of rows. Search engine crawlers that index a page for every row could result in a high bill.
The `datasette-block-robots <https://datasette.io/plugins/datasette-block-robots>`__ plugin can be used to request search engine crawlers omit crawling your site, which can help avoid this issue.
You will first need to install and configure the Google Cloud CLI tools by following `these instructions <https://cloud.google.com/sdk/>`__.
You can then publish one or more SQLite database files to Google Cloud Run using the following command::
datasette publish cloudrun mydatabase.db --service=my-database
A Cloud Run **service** is a single hosted application. The service name you specify will be used as part of the Cloud Run URL. If you deploy to a service name that you have used in the past your new deployment will replace the previous one.
If you omit the ``--service`` option you will be asked to pick a service name interactively during the deploy.
You may need to interact with prompts from the tool. Many of the prompts ask for values that can be `set as properties for the Google Cloud SDK <https://cloud.google.com/sdk/docs/properties>`_ if you want to avoid the prompts.
For example, the default region for the deployed instance can be set using the command::
gcloud config set run/region us-central1
You should replace ``us-central1`` with your desired `region <https://cloud.google.com/about/locations>`_. Alternately, you can specify the region by setting the ``CLOUDSDK_RUN_REGION`` environment variable.
Once it has finished it will output a URL like this one::
Service [my-service] revision [my-service-00001] has been deployed
and is serving traffic at https://my-service-j7hipcg4aq-uc.a.run.app
Cloud Run provides a URL on the ``.run.app`` domain, but you can also point your own domain or subdomain at your Cloud Run service - see `mapping custom domains <https://cloud.google.com/run/docs/mapping-custom-domains>`__ in the Cloud Run documentation for details.
See :ref:`cli_help_publish_cloudrun___help` for the full list of options for this command.
.. _publish_heroku:
Publishing to Heroku
--------------------
To publish your data using `Heroku <https://www.heroku.com/>`__, first create an account there and install and configure the `Heroku CLI tool <https://devcenter.heroku.com/articles/heroku-cli>`_.
You can publish one or more databases to Heroku using the following command::
datasette publish heroku mydatabase.db
This will output some details about the new deployment, including a URL like this one::
https://limitless-reef-88278.herokuapp.com/ deployed to Heroku
You can specify a custom app name by passing ``-n my-app-name`` to the publish command. This will also allow you to overwrite an existing app.
Rather than deploying directly you can use the ``--generate-dir`` option to output the files that would be deployed to a directory::
datasette publish heroku mydatabase.db --generate-dir=/tmp/deploy-this-to-heroku
See :ref:`cli_help_publish_heroku___help` for the full list of options for this command.
.. _publish_vercel:
Publishing to Vercel
--------------------
`Vercel <https://vercel.com/>`__ - previously known as Zeit Now - provides a layer over AWS Lambda to allow for quick, scale-to-zero deployment. You can deploy Datasette instances to Vercel using the `datasette-publish-vercel <https://github.com/simonw/datasette-publish-vercel>`__ plugin.
::
pip install datasette-publish-vercel
datasette publish vercel mydatabase.db --project my-database-project
Not every feature is supported: consult the `datasette-publish-vercel README <https://github.com/simonw/datasette-publish-vercel/blob/main/README.md>`__ for more details.
.. _publish_fly:
Publishing to Fly
-----------------
`Fly <https://fly.io/>`__ is a `competitively priced <https://fly.io/docs/pricing/>`__ Docker-compatible hosting platform that supports running applications in globally distributed data centers close to your end users. You can deploy Datasette instances to Fly using the `datasette-publish-fly <https://github.com/simonw/datasette-publish-fly>`__ plugin.
::
pip install datasette-publish-fly
datasette publish fly mydatabase.db --app="my-app"
Consult the `datasette-publish-fly README <https://github.com/simonw/datasette-publish-fly/blob/main/README.md>`__ for more details.
.. _publish_custom_metadata_and_plugins:
Custom metadata and plugins
---------------------------
``datasette publish`` accepts a number of additional options which can be used to further customize your Datasette instance.
You can define your own :ref:`metadata` and deploy that with your instance like so::
datasette publish cloudrun --service=my-service mydatabase.db -m metadata.json
If you just want to set the title, license or source information you can do that directly using extra options to ``datasette publish``::
datasette publish cloudrun mydatabase.db --service=my-service \
--title="Title of my database" \
--source="Where the data originated" \
--source_url="http://www.example.com/"
You can also specify plugins you would like to install. For example, if you want to include the `datasette-vega <https://github.com/simonw/datasette-vega>`_ visualization plugin you can use the following::
datasette publish cloudrun mydatabase.db --service=my-service --install=datasette-vega
If a plugin has any :ref:`plugins_configuration_secret` you can use the ``--plugin-secret`` option to set those secrets at publish time. For example, using Heroku with `datasette-auth-github <https://github.com/simonw/datasette-auth-github>`__ you might run the following command::
datasette publish heroku my_database.db \
--name my-heroku-app-demo \
--install=datasette-auth-github \
--plugin-secret datasette-auth-github client_id your_client_id \
--plugin-secret datasette-auth-github client_secret your_client_secret
.. _cli_package:
datasette package
=================
If you have docker installed (e.g. using `Docker for Mac <https://www.docker.com/docker-mac>`_) you can use the ``datasette package`` command to create a new Docker image in your local repository containing the datasette app bundled together with one or more SQLite databases::
datasette package mydatabase.db
Here's example output for the package command::
datasette package parlgov.db --extra-options="--setting sql_time_limit_ms 2500"
Sending build context to Docker daemon 4.459MB
Step 1/7 : FROM python:3.11.0-slim-bullseye
---> 79e1dc9af1c1
Step 2/7 : COPY . /app
---> Using cache
---> cd4ec67de656
Step 3/7 : WORKDIR /app
---> Using cache
---> 139699e91621
Step 4/7 : RUN pip install datasette
---> Using cache
---> 340efa82bfd7
Step 5/7 : RUN datasette inspect parlgov.db --inspect-file inspect-data.json
---> Using cache
---> 5fddbe990314
Step 6/7 : EXPOSE 8001
---> Using cache
---> 8e83844b0fed
Step 7/7 : CMD datasette serve parlgov.db --port 8001 --inspect-file inspect-data.json --setting sql_time_limit_ms 2500
---> Using cache
---> 1bd380ea8af3
Successfully built 1bd380ea8af3
You can now run the resulting container like so::
docker run -p 8081:8001 1bd380ea8af3
This exposes port 8001 inside the container as port 8081 on your host machine, so you can access the application at ``http://localhost:8081/``
You can customize the port that is exposed by the container using the ``--port`` option::
datasette package mydatabase.db --port 8080
A full list of options can be seen by running ``datasette package --help``:
See :ref:`cli_help_package___help` for the full list of options for this command.

View file

@ -376,15 +376,3 @@ One way to generate a secure random secret is to use Python like this::
Plugin authors can make use of this signing mechanism in their plugins using the :ref:`datasette.sign() <datasette_sign>` and :ref:`datasette.unsign() <datasette_unsign>` methods.
.. _setting_publish_secrets:
Using secrets with datasette publish
------------------------------------
The :ref:`cli_publish` and :ref:`cli_package` commands both generate a secret for you automatically when Datasette is deployed.
This means that every time you deploy a new version of a Datasette project, a new secret will be generated. This will cause signed cookies to become invalid on every fresh deploy.
You can fix this by creating a secret that will be used for multiple deploys and passing it using the ``--secret`` option::
datasette publish cloudrun mydb.db --service=my-service --secret=cdb19e94283a20f9d42cca5

View file

@ -116,8 +116,6 @@ def pytest_collection_modifyitems(items):
move_to_front(items, "test_serve_with_get_exit_code_for_error")
move_to_front(items, "test_inspect_cli_writes_to_file")
move_to_front(items, "test_spatialite_error_if_attempt_to_open_spatialite")
move_to_front(items, "test_package")
move_to_front(items, "test_package_with_port")
def move_to_front(items, test_name):

View file

@ -1,59 +0,0 @@
from click.testing import CliRunner
from datasette import cli
from unittest import mock
import os
import pathlib
import pytest
class CaptureDockerfile:
def __call__(self, _):
self.captured = (pathlib.Path() / "Dockerfile").read_text()
EXPECTED_DOCKERFILE = """
FROM python:3.11.0-slim-bullseye
COPY . /app
WORKDIR /app
ENV DATASETTE_SECRET 'sekrit'
RUN pip install -U datasette
RUN datasette inspect test.db --inspect-file inspect-data.json
ENV PORT {port}
EXPOSE {port}
CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --port $PORT
""".strip()
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.cli.call")
def test_package(mock_call, mock_which, tmp_path_factory):
mock_which.return_value = True
runner = CliRunner()
capture = CaptureDockerfile()
mock_call.side_effect = capture
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(cli.cli, ["package", "test.db", "--secret", "sekrit"])
assert 0 == result.exit_code
mock_call.assert_has_calls([mock.call(["docker", "build", "."])])
assert EXPECTED_DOCKERFILE.format(port=8001) == capture.captured
@mock.patch("shutil.which")
@mock.patch("datasette.cli.call")
def test_package_with_port(mock_call, mock_which, tmp_path_factory):
mock_which.return_value = True
capture = CaptureDockerfile()
mock_call.side_effect = capture
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(
cli.cli, ["package", "test.db", "-p", "8080", "--secret", "sekrit"]
)
assert 0 == result.exit_code
assert EXPECTED_DOCKERFILE.format(port=8080) == capture.captured

View file

@ -591,15 +591,6 @@ async def test_hook_prepare_jinja2_environment(ds_client):
assert "Hello there, 3,412,341, HI, 15" == rendered
def test_hook_publish_subcommand():
# This is hard to test properly, because publish subcommand plugins
# cannot be loaded using the --plugins-dir mechanism - they need
# to be installed using "pip install". So I'm cheating and taking
# advantage of the fact that cloudrun/heroku use the plugin hook
# to register themselves as default plugins.
assert ["cloudrun", "heroku"] == cli.publish.list_commands({})
@pytest.mark.asyncio
async def test_hook_register_facet_classes(ds_client):
response = await ds_client.get(
@ -1110,9 +1101,7 @@ def test_hook_register_commands():
"serve",
"inspect",
"install",
"package",
"plugins",
"publish",
"uninstall",
"create-token",
}
@ -1139,9 +1128,7 @@ def test_hook_register_commands():
"serve",
"inspect",
"install",
"package",
"plugins",
"publish",
"uninstall",
"verify",
"unverify",

View file

@ -1,408 +0,0 @@
from click.testing import CliRunner
from datasette import cli
from unittest import mock
import json
import os
import pytest
import textwrap
@pytest.mark.serial
@mock.patch("shutil.which")
def test_publish_cloudrun_requires_gcloud(mock_which, tmp_path_factory):
mock_which.return_value = False
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(cli.cli, ["publish", "cloudrun", "test.db"])
assert result.exit_code == 1
assert "Publishing to Google Cloud requires gcloud" in result.output
@mock.patch("shutil.which")
def test_publish_cloudrun_invalid_database(mock_which):
mock_which.return_value = True
runner = CliRunner()
result = runner.invoke(cli.cli, ["publish", "cloudrun", "woop.db"])
assert result.exit_code == 2
assert "Path 'woop.db' does not exist" in result.output
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.cloudrun.check_output")
@mock.patch("datasette.publish.cloudrun.check_call")
@mock.patch("datasette.publish.cloudrun.get_existing_services")
def test_publish_cloudrun_prompts_for_service(
mock_get_existing_services, mock_call, mock_output, mock_which, tmp_path_factory
):
mock_get_existing_services.return_value = [
{"name": "existing", "created": "2019-01-01", "url": "http://www.example.com/"}
]
mock_output.return_value = "myproject"
mock_which.return_value = True
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(
cli.cli, ["publish", "cloudrun", "test.db"], input="input-service"
)
assert (
"Please provide a service name for this deployment\n\n"
"Using an existing service name will over-write it\n\n"
"Your existing services:\n\n"
" existing - created 2019-01-01 - http://www.example.com/\n\n"
"Service name: input-service"
) == result.output.strip()
assert 0 == result.exit_code
tag = "us-docker.pkg.dev/myproject/datasette/datasette-input-service"
mock_call.assert_has_calls(
[
mock.call(
"gcloud services enable artifactregistry.googleapis.com --project myproject --quiet",
shell=True,
),
mock.call(
"gcloud artifacts repositories describe datasette --project myproject --location us --quiet",
shell=True,
),
mock.call(f"gcloud builds submit --tag {tag}", shell=True),
mock.call(
"gcloud run deploy --allow-unauthenticated --platform=managed --image {} input-service --max-instances 1".format(
tag
),
shell=True,
),
]
)
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.cloudrun.check_output")
@mock.patch("datasette.publish.cloudrun.check_call")
def test_publish_cloudrun(mock_call, mock_output, mock_which, tmp_path_factory):
mock_output.return_value = "myproject"
mock_which.return_value = True
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(
cli.cli, ["publish", "cloudrun", "test.db", "--service", "test"]
)
assert 0 == result.exit_code
tag = f"us-docker.pkg.dev/{mock_output.return_value}/datasette/datasette-test"
mock_call.assert_has_calls(
[
mock.call(
f"gcloud services enable artifactregistry.googleapis.com --project {mock_output.return_value} --quiet",
shell=True,
),
mock.call(
f"gcloud artifacts repositories describe datasette --project {mock_output.return_value} --location us --quiet",
shell=True,
),
mock.call(f"gcloud builds submit --tag {tag}", shell=True),
mock.call(
"gcloud run deploy --allow-unauthenticated --platform=managed --image {} test --max-instances 1".format(
tag
),
shell=True,
),
]
)
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.cloudrun.check_output")
@mock.patch("datasette.publish.cloudrun.check_call")
@pytest.mark.parametrize(
"memory,cpu,timeout,min_instances,max_instances,expected_gcloud_args",
[
["1Gi", None, None, None, None, "--memory 1Gi"],
["2G", None, None, None, None, "--memory 2G"],
["256Mi", None, None, None, None, "--memory 256Mi"],
[
"4",
None,
None,
None,
None,
None,
],
[
"GB",
None,
None,
None,
None,
None,
],
[None, 1, None, None, None, "--cpu 1"],
[None, 2, None, None, None, "--cpu 2"],
[None, 3, None, None, None, None],
[None, 4, None, None, None, "--cpu 4"],
["2G", 4, None, None, None, "--memory 2G --cpu 4"],
[None, None, 1800, None, None, "--timeout 1800"],
[None, None, None, 2, None, "--min-instances 2"],
[None, None, None, 2, 4, "--min-instances 2 --max-instances 4"],
[None, 2, None, None, 4, "--cpu 2 --max-instances 4"],
],
)
def test_publish_cloudrun_memory_cpu(
mock_call,
mock_output,
mock_which,
memory,
cpu,
timeout,
min_instances,
max_instances,
expected_gcloud_args,
tmp_path_factory,
):
mock_output.return_value = "myproject"
mock_which.return_value = True
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
args = ["publish", "cloudrun", "test.db", "--service", "test"]
if memory:
args.extend(["--memory", memory])
if cpu:
args.extend(["--cpu", str(cpu)])
if timeout:
args.extend(["--timeout", str(timeout)])
result = runner.invoke(cli.cli, args)
if expected_gcloud_args is None:
assert 2 == result.exit_code
return
assert 0 == result.exit_code
tag = f"us-docker.pkg.dev/{mock_output.return_value}/datasette/datasette-test"
expected_call = (
"gcloud run deploy --allow-unauthenticated --platform=managed"
" --image {} test".format(tag)
)
expected_build_call = f"gcloud builds submit --tag {tag}"
if memory:
expected_call += " --memory {}".format(memory)
if cpu:
expected_call += " --cpu {}".format(cpu)
if timeout:
expected_build_call += f" --timeout {timeout}"
# max_instances defaults to 1
expected_call += " --max-instances 1"
mock_call.assert_has_calls(
[
mock.call(
f"gcloud services enable artifactregistry.googleapis.com --project {mock_output.return_value} --quiet",
shell=True,
),
mock.call(
f"gcloud artifacts repositories describe datasette --project {mock_output.return_value} --location us --quiet",
shell=True,
),
mock.call(expected_build_call, shell=True),
mock.call(
expected_call,
shell=True,
),
]
)
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.cloudrun.check_output")
@mock.patch("datasette.publish.cloudrun.check_call")
def test_publish_cloudrun_plugin_secrets(
mock_call, mock_output, mock_which, tmp_path_factory
):
mock_which.return_value = True
mock_output.return_value = "myproject"
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
with open("metadata.yml", "w") as fp:
fp.write(
textwrap.dedent(
"""
title: Hello from metadata YAML
plugins:
datasette-auth-github:
foo: bar
"""
).strip()
)
result = runner.invoke(
cli.cli,
[
"publish",
"cloudrun",
"test.db",
"--metadata",
"metadata.yml",
"--service",
"datasette",
"--plugin-secret",
"datasette-auth-github",
"client_id",
"x-client-id",
"--show-files",
"--secret",
"x-secret",
],
)
assert result.exit_code == 0
dockerfile = (
result.output.split("==== Dockerfile ====\n")[1]
.split("\n====================\n")[0]
.strip()
)
expected = textwrap.dedent(
r"""
FROM python:3.11.0-slim-bullseye
COPY . /app
WORKDIR /app
ENV DATASETTE_AUTH_GITHUB_CLIENT_ID 'x-client-id'
ENV DATASETTE_SECRET 'x-secret'
RUN pip install -U datasette
RUN datasette inspect test.db --inspect-file inspect-data.json
ENV PORT 8001
EXPOSE 8001
CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --metadata metadata.json --setting force_https_urls on --port $PORT"""
).strip()
assert expected == dockerfile
metadata = (
result.output.split("=== metadata.json ===\n")[1]
.split("\n==== Dockerfile ====\n")[0]
.strip()
)
assert {
"title": "Hello from metadata YAML",
"plugins": {
"datasette-auth-github": {
"client_id": {"$env": "DATASETTE_AUTH_GITHUB_CLIENT_ID"},
"foo": "bar",
},
},
} == json.loads(metadata)
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.cloudrun.check_output")
@mock.patch("datasette.publish.cloudrun.check_call")
def test_publish_cloudrun_apt_get_install(
mock_call, mock_output, mock_which, tmp_path_factory
):
mock_which.return_value = True
mock_output.return_value = "myproject"
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(
cli.cli,
[
"publish",
"cloudrun",
"test.db",
"--service",
"datasette",
"--show-files",
"--secret",
"x-secret",
"--apt-get-install",
"ripgrep",
"--spatialite",
],
)
assert result.exit_code == 0
dockerfile = (
result.output.split("==== Dockerfile ====\n")[1]
.split("\n====================\n")[0]
.strip()
)
expected = textwrap.dedent(
r"""
FROM python:3.11.0-slim-bullseye
COPY . /app
WORKDIR /app
RUN apt-get update && \
apt-get install -y ripgrep python3-dev gcc libsqlite3-mod-spatialite && \
rm -rf /var/lib/apt/lists/*
ENV DATASETTE_SECRET 'x-secret'
ENV SQLITE_EXTENSIONS '/usr/lib/x86_64-linux-gnu/mod_spatialite.so'
RUN pip install -U datasette
RUN datasette inspect test.db --inspect-file inspect-data.json
ENV PORT 8001
EXPOSE 8001
CMD datasette serve --host 0.0.0.0 -i test.db --cors --inspect-file inspect-data.json --setting force_https_urls on --port $PORT
"""
).strip()
assert expected == dockerfile
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.cloudrun.check_output")
@mock.patch("datasette.publish.cloudrun.check_call")
@pytest.mark.parametrize(
"extra_options,expected",
[
("", "--setting force_https_urls on"),
(
"--setting base_url /foo",
"--setting base_url /foo --setting force_https_urls on",
),
("--setting force_https_urls off", "--setting force_https_urls off"),
],
)
def test_publish_cloudrun_extra_options(
mock_call, mock_output, mock_which, extra_options, expected, tmp_path_factory
):
mock_which.return_value = True
mock_output.return_value = "myproject"
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(
cli.cli,
[
"publish",
"cloudrun",
"test.db",
"--service",
"datasette",
"--show-files",
"--extra-options",
extra_options,
],
)
assert result.exit_code == 0
dockerfile = (
result.output.split("==== Dockerfile ====\n")[1]
.split("\n====================\n")[0]
.strip()
)
last_line = dockerfile.split("\n")[-1]
extra_options = (
last_line.split("--inspect-file inspect-data.json")[1]
.split("--port")[0]
.strip()
)
assert extra_options == expected

View file

@ -1,183 +0,0 @@
from click.testing import CliRunner
from datasette import cli
from unittest import mock
import os
import pathlib
import pytest
@pytest.mark.serial
@mock.patch("shutil.which")
def test_publish_heroku_requires_heroku(mock_which, tmp_path_factory):
mock_which.return_value = False
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(cli.cli, ["publish", "heroku", "test.db"])
assert result.exit_code == 1
assert "Publishing to Heroku requires heroku" in result.output
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.heroku.check_output")
@mock.patch("datasette.publish.heroku.call")
def test_publish_heroku_installs_plugin(
mock_call, mock_check_output, mock_which, tmp_path_factory
):
mock_which.return_value = True
mock_check_output.side_effect = lambda s: {"['heroku', 'plugins']": b""}[repr(s)]
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("t.db", "w") as fp:
fp.write("data")
result = runner.invoke(cli.cli, ["publish", "heroku", "t.db"], input="y\n")
assert 0 != result.exit_code
mock_check_output.assert_has_calls(
[mock.call(["heroku", "plugins"]), mock.call(["heroku", "apps:list", "--json"])]
)
mock_call.assert_has_calls(
[mock.call(["heroku", "plugins:install", "heroku-builds"])]
)
@mock.patch("shutil.which")
def test_publish_heroku_invalid_database(mock_which):
mock_which.return_value = True
runner = CliRunner()
result = runner.invoke(cli.cli, ["publish", "heroku", "woop.db"])
assert result.exit_code == 2
assert "Path 'woop.db' does not exist" in result.output
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.heroku.check_output")
@mock.patch("datasette.publish.heroku.call")
def test_publish_heroku(mock_call, mock_check_output, mock_which, tmp_path_factory):
mock_which.return_value = True
mock_check_output.side_effect = lambda s: {
"['heroku', 'plugins']": b"heroku-builds",
"['heroku', 'apps:list', '--json']": b"[]",
"['heroku', 'apps:create', 'datasette', '--json']": b'{"name": "f"}',
}[repr(s)]
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(cli.cli, ["publish", "heroku", "test.db", "--tar", "gtar"])
assert 0 == result.exit_code, result.output
mock_call.assert_has_calls(
[
mock.call(
[
"heroku",
"builds:create",
"-a",
"f",
"--include-vcs-ignore",
"--tar",
"gtar",
]
),
]
)
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.heroku.check_output")
@mock.patch("datasette.publish.heroku.call")
def test_publish_heroku_plugin_secrets(
mock_call, mock_check_output, mock_which, tmp_path_factory
):
mock_which.return_value = True
mock_check_output.side_effect = lambda s: {
"['heroku', 'plugins']": b"heroku-builds",
"['heroku', 'apps:list', '--json']": b"[]",
"['heroku', 'apps:create', 'datasette', '--json']": b'{"name": "f"}',
}[repr(s)]
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
result = runner.invoke(
cli.cli,
[
"publish",
"heroku",
"test.db",
"--plugin-secret",
"datasette-auth-github",
"client_id",
"x-client-id",
],
)
assert 0 == result.exit_code, result.output
mock_call.assert_has_calls(
[
mock.call(
[
"heroku",
"config:set",
"-a",
"f",
"DATASETTE_AUTH_GITHUB_CLIENT_ID=x-client-id",
]
),
mock.call(["heroku", "builds:create", "-a", "f", "--include-vcs-ignore"]),
]
)
@pytest.mark.serial
@mock.patch("shutil.which")
@mock.patch("datasette.publish.heroku.check_output")
@mock.patch("datasette.publish.heroku.call")
def test_publish_heroku_generate_dir(
mock_call, mock_check_output, mock_which, tmp_path_factory
):
mock_which.return_value = True
mock_check_output.side_effect = lambda s: {
"['heroku', 'plugins']": b"heroku-builds",
}[repr(s)]
runner = CliRunner()
os.chdir(tmp_path_factory.mktemp("runner"))
with open("test.db", "w") as fp:
fp.write("data")
output = str(tmp_path_factory.mktemp("generate_dir") / "output")
result = runner.invoke(
cli.cli,
[
"publish",
"heroku",
"test.db",
"--generate-dir",
output,
],
)
assert result.exit_code == 0
path = pathlib.Path(output)
assert path.exists()
file_names = {str(r.relative_to(path)) for r in path.glob("*")}
assert file_names == {
"requirements.txt",
"bin",
"runtime.txt",
"Procfile",
"test.db",
}
for name, expected in (
("requirements.txt", "datasette"),
("runtime.txt", "python-3.11.0"),
(
"Procfile",
(
"web: datasette serve --host 0.0.0.0 -i test.db "
"--cors --port $PORT --inspect-file inspect-data.json"
),
),
):
with open(path / name) as fp:
assert fp.read().strip() == expected