Forbid same DB passed twice or via config_dir, closes #2561

This commit is contained in:
Simon Willison 2025-10-30 21:39:55 -07:00
commit ba654b5576
2 changed files with 92 additions and 18 deletions

View file

@ -590,13 +590,20 @@ def serve(
internal=internal,
)
# if files is a single directory, use that as config_dir=
if 1 == len(files) and os.path.isdir(files[0]):
kwargs["config_dir"] = pathlib.Path(files[0])
files = []
# Separate directories from files
directories = [f for f in files if os.path.isdir(f)]
file_paths = [f for f in files if not os.path.isdir(f)]
# Handle config_dir - only one directory allowed
if len(directories) > 1:
raise click.ClickException(
"Cannot pass multiple directories. Pass a single directory as config_dir."
)
elif len(directories) == 1:
kwargs["config_dir"] = pathlib.Path(directories[0])
# Verify list of files, create if needed (and --create)
for file in files:
for file in file_paths:
if not pathlib.Path(file).exists():
if create:
sqlite3.connect(file).execute("vacuum")
@ -607,8 +614,32 @@ def serve(
)
)
# De-duplicate files so 'datasette db.db db.db' only attaches one /db
files = list(dict.fromkeys(files))
# Check for duplicate files by resolving all paths to their absolute forms
# Collect all database files that will be loaded (explicit files + config_dir files)
all_db_files = []
# Add explicit files
for file in file_paths:
all_db_files.append((file, pathlib.Path(file).resolve()))
# Add config_dir databases if config_dir is set
if "config_dir" in kwargs:
config_dir = kwargs["config_dir"]
for ext in ("db", "sqlite", "sqlite3"):
for db_file in config_dir.glob(f"*.{ext}"):
all_db_files.append((str(db_file), db_file.resolve()))
# Check for duplicates
seen = {}
for original_path, resolved_path in all_db_files:
if resolved_path in seen:
raise click.ClickException(
f"Duplicate database file: '{original_path}' and '{seen[resolved_path]}' "
f"both refer to {resolved_path}"
)
seen[resolved_path] = original_path
files = file_paths
try:
ds = Datasette(files, **kwargs)

View file

@ -447,17 +447,6 @@ def test_serve_duplicate_database_names(tmpdir):
assert {db["name"] for db in databases} == {"db", "db_2"}
def test_serve_deduplicate_same_database_path(tmpdir):
"'datasette db.db db.db' should only attach one database, /db"
runner = CliRunner()
db_path = str(tmpdir / "db.db")
sqlite3.connect(db_path).execute("vacuum")
result = runner.invoke(cli, [db_path, db_path, "--get", "/-/databases.json"])
assert result.exit_code == 0, result.output
databases = json.loads(result.output)
assert {db["name"] for db in databases} == {"db"}
@pytest.mark.parametrize(
"filename", ["test-database (1).sqlite", "database (1).sqlite"]
)
@ -496,3 +485,57 @@ def test_internal_db(tmpdir):
)
assert result.exit_code == 0
assert internal_path.exists()
def test_duplicate_database_files_error(tmpdir):
"""Test that passing the same database file multiple times raises an error"""
runner = CliRunner()
db_path = str(tmpdir / "test.db")
sqlite3.connect(db_path).execute("vacuum")
# Test with exact duplicate
result = runner.invoke(cli, ["serve", db_path, db_path, "--get", "/"])
assert result.exit_code == 1
assert "Duplicate database file" in result.output
assert "both refer to" in result.output
# Test with different paths to same file (relative vs absolute)
result2 = runner.invoke(
cli, ["serve", db_path, str(pathlib.Path(db_path).resolve()), "--get", "/"]
)
assert result2.exit_code == 1
assert "Duplicate database file" in result2.output
# Test that a file in the config_dir can't also be passed explicitly
config_dir = tmpdir / "config"
config_dir.mkdir()
config_db_path = str(config_dir / "data.db")
sqlite3.connect(config_db_path).execute("vacuum")
result3 = runner.invoke(
cli, ["serve", config_db_path, str(config_dir), "--get", "/"]
)
assert result3.exit_code == 1
assert "Duplicate database file" in result3.output
assert "both refer to" in result3.output
# Test that mixing a file NOT in the directory with a directory works fine
other_db_path = str(tmpdir / "other.db")
sqlite3.connect(other_db_path).execute("vacuum")
result4 = runner.invoke(
cli, ["serve", other_db_path, str(config_dir), "--get", "/-/databases.json"]
)
assert result4.exit_code == 0
databases = json.loads(result4.output)
assert {db["name"] for db in databases} == {"other", "data"}
# Test that multiple directories raise an error
config_dir2 = tmpdir / "config2"
config_dir2.mkdir()
result5 = runner.invoke(
cli, ["serve", str(config_dir), str(config_dir2), "--get", "/"]
)
assert result5.exit_code == 1
assert "Cannot pass multiple directories" in result5.output