From d1cb73180b4b5a07538380db76298618a5fc46b6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 5 Mar 2022 11:31:49 -0800 Subject: [PATCH] Dash encoding functions, tests and docs, refs #1439 --- datasette/utils/__init__.py | 12 ++++++++++++ docs/internals.rst | 23 +++++++++++++++++++++++ tests/test_utils.py | 16 ++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index e17b4d7f..f7b341ca 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -1139,3 +1139,15 @@ def add_cors_headers(headers): headers["Access-Control-Allow-Origin"] = "*" headers["Access-Control-Allow-Headers"] = "Authorization" headers["Access-Control-Expose-Headers"] = "Link" + + +@documented +def dash_encode(s: str) -> str: + "Returns dash-encoded string - for example ``/foo/bar`` -> ``-/foo-/bar``" + return s.replace("-", "--").replace(".", "-.").replace("/", "-/") + + +@documented +def dash_decode(s: str) -> str: + "Decodes a dash-encoded string, so ``-/foo-/bar`` -> ``/foo/bar``" + return s.replace("-/", "/").replace("-.", ".").replace("--", "-") diff --git a/docs/internals.rst b/docs/internals.rst index 12ef5c54..52688e5e 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -876,6 +876,29 @@ Utility function for calling ``await`` on a return value if it is awaitable, oth .. autofunction:: datasette.utils.await_me_maybe +.. _internals_dash_encoding: + +Dash encoding +------------- + +Datasette uses a custom encoding scheme in some places, called **dash encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URL that references them. + +Dash encoding applies the following rules, in order: + +- All single ``-`` characters are replaced by ``--`` +- ``.`` characters are replaced by ``-.`` +- ``/`` characters are replaced by ``./`` + +These rules are applied in reverse order to decode a dash encoded string. + +.. _internals_utils_dash_encode: + +.. autofunction:: datasette.utils.dash_encode + +.. _internals_utils_dash_decode: + +.. autofunction:: datasette.utils.dash_decode + .. _internals_tracer: datasette.tracer diff --git a/tests/test_utils.py b/tests/test_utils.py index e7d67045..e3386324 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -646,3 +646,19 @@ async def test_derive_named_parameters(sql, expected): db = ds.get_database("_memory") params = await utils.derive_named_parameters(db, sql) assert params == expected + + +@pytest.mark.parametrize( + "original,expected", + ( + ("abc", "abc"), + ("/foo/bar", "-/foo-/bar"), + ("/-/bar", "-/---/bar"), + ("-/db-/table---.csv-.csv", "---/db---/table-------.csv---.csv"), + ), +) +def test_dash_encoding(original, expected): + actual = utils.dash_encode(original) + assert actual == expected + # And test round-trip + assert original == utils.dash_decode(actual)