From 8f3e177d2caff117e83d34fe1e8016a6bb479e0e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Mar 2022 11:56:59 -0800 Subject: [PATCH 1/5] Min pytest-asyncio of 0.17 So that the asyncio_mode in pytest.ini does not produce a warning on older versions of that library. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8e69c2f5..e70839d6 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ setup( "test": [ "pytest>=5.2.2,<7.1.0", "pytest-xdist>=2.2.1,<2.6", - "pytest-asyncio>=0.10,<0.19", + "pytest-asyncio>=0.17,<0.19", "beautifulsoup4>=4.8.1,<4.11.0", "black==22.1.0", "pytest-timeout>=1.4.2,<2.2", From fd5df04d30bd5452fbc67c4730c8fba6031ea6fd Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 7 Mar 2022 12:45:11 -0800 Subject: [PATCH 2/5] Typo in comment --- datasette/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasette/app.py b/datasette/app.py index 7abccc05..5761a853 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1168,7 +1168,7 @@ class DatasetteRouter: path = "/" + path[len(base_url) :] scope = dict(scope, route_path=path) request = Request(scope, receive) - # Populate request_messages if ds_messages cookie is present + # Populate request._messages if ds_messages cookie is present try: request._messages = self.ds.unsign( request.cookies.get("ds_messages", ""), "messages" From 47151c23b471feea493c5d856a449b93a037c3c0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 14 Mar 2022 17:02:08 -0700 Subject: [PATCH 3/5] Dash encoding is now tilde encoding, refs #1657 Still a ton of broken tests though --- datasette/app.py | 8 +++--- datasette/url_builder.py | 10 +++---- datasette/utils/__init__.py | 37 +++++++++++------------- datasette/views/base.py | 57 ++++++++++++------------------------- datasette/views/table.py | 14 +++++---- docs/csv_export.rst | 18 ------------ docs/internals.rst | 2 +- tests/test_api.py | 13 ++------- tests/test_cli.py | 6 ++-- tests/test_html.py | 22 +++++++------- tests/test_table_api.py | 7 +++-- tests/test_utils.py | 30 ++++++------------- 12 files changed, 84 insertions(+), 140 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 5761a853..91cc2494 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -1211,11 +1211,11 @@ class DatasetteRouter: return await self.handle_404(request, send) async def handle_404(self, request, send, exception=None): - # If path contains % encoding, redirect to dash encoding + # If path contains % encoding, redirect to tilde encoding if "%" in request.path: - # Try the same path but with "%" replaced by "-" - # and "-" replaced with "-2D" - new_path = request.path.replace("-", "-2D").replace("%", "-") + # Try the same path but with "%" replaced by "~" + # and "~" replaced with "~7E" + new_path = request.path.replace("~", "~7E").replace("%", "~") if request.query_string: new_path += "?{}".format(request.query_string) await asgi_send_redirect(send, new_path) diff --git a/datasette/url_builder.py b/datasette/url_builder.py index eebfe31e..9f072462 100644 --- a/datasette/url_builder.py +++ b/datasette/url_builder.py @@ -1,4 +1,4 @@ -from .utils import dash_encode, path_with_format, HASH_LENGTH, PrefixedUrlString +from .utils import tilde_encode, path_with_format, HASH_LENGTH, PrefixedUrlString import urllib @@ -31,20 +31,20 @@ class Urls: db = self.ds.databases[database] if self.ds.setting("hash_urls") and db.hash: path = self.path( - f"{dash_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format + f"{tilde_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format ) else: - path = self.path(dash_encode(database), format=format) + path = self.path(tilde_encode(database), format=format) return path def table(self, database, table, format=None): - path = f"{self.database(database)}/{dash_encode(table)}" + path = f"{self.database(database)}/{tilde_encode(table)}" if format is not None: path = path_with_format(path=path, format=format) return PrefixedUrlString(path) def query(self, database, query, format=None): - path = f"{self.database(database)}/{dash_encode(query)}" + path = f"{self.database(database)}/{tilde_encode(query)}" if format is not None: path = path_with_format(path=path, format=format) return PrefixedUrlString(path) diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index e7c9fb1c..71e0d2ed 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -113,12 +113,12 @@ async def await_me_maybe(value: typing.Any) -> typing.Any: def urlsafe_components(token): - """Splits token on commas and dash-decodes each component""" - return [dash_decode(b) for b in token.split(",")] + """Splits token on commas and tilde-decodes each component""" + return [tilde_decode(b) for b in token.split(",")] def path_from_row_pks(row, pks, use_rowid, quote=True): - """Generate an optionally dash-quoted unique identifier + """Generate an optionally tilde-encoded unique identifier for a row from its primary keys.""" if use_rowid: bits = [row["rowid"]] @@ -127,7 +127,7 @@ def path_from_row_pks(row, pks, use_rowid, quote=True): row[pk]["value"] if isinstance(row[pk], dict) else row[pk] for pk in pks ] if quote: - bits = [dash_encode(str(bit)) for bit in bits] + bits = [tilde_encode(str(bit)) for bit in bits] else: bits = [str(bit) for bit in bits] @@ -758,10 +758,7 @@ def path_with_format( path = request.path if request else path if replace_format and path.endswith(f".{replace_format}"): path = path[: -(1 + len(replace_format))] - if "." in path: - qs["_format"] = format - else: - path = f"{path}.{format}" + path = f"{path}.{format}" if qs: extra = urllib.parse.urlencode(sorted(qs.items())) if request and request.query_string: @@ -1143,38 +1140,38 @@ def add_cors_headers(headers): headers["Access-Control-Expose-Headers"] = "Link" -_DASH_ENCODING_SAFE = frozenset( +_TILDE_ENCODING_SAFE = frozenset( b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" b"abcdefghijklmnopqrstuvwxyz" - b"0123456789_" + b"0123456789_-" # This is the same as Python percent-encoding but I removed - # '.' and '-' and '~' + # '.' and '~' ) -class DashEncoder(dict): +class TildeEncoder(dict): # Keeps a cache internally, via __missing__ def __missing__(self, b): # Handle a cache miss, store encoded string in cache and return. - res = chr(b) if b in _DASH_ENCODING_SAFE else "-{:02X}".format(b) + res = chr(b) if b in _TILDE_ENCODING_SAFE else "~{:02X}".format(b) self[b] = res return res -_dash_encoder = DashEncoder().__getitem__ +_tilde_encoder = TildeEncoder().__getitem__ @documented -def dash_encode(s: str) -> str: - "Returns dash-encoded string - for example ``/foo/bar`` -> ``-2Ffoo-2Fbar``" - return "".join(_dash_encoder(char) for char in s.encode("utf-8")) +def tilde_encode(s: str) -> str: + "Returns tilde-encoded string - for example ``/foo/bar`` -> ``~2Ffoo~2Fbar``" + return "".join(_tilde_encoder(char) for char in s.encode("utf-8")) @documented -def dash_decode(s: str) -> str: - "Decodes a dash-encoded string, so ``-2Ffoo-2Fbar`` -> ``/foo/bar``" +def tilde_decode(s: str) -> str: + "Decodes a tilde-encoded string, so ``~2Ffoo~2Fbar`` -> ``/foo/bar``" # Avoid accidentally decoding a %2f style sequence temp = secrets.token_hex(16) s = s.replace("%", temp) - decoded = urllib.parse.unquote(s.replace("-", "%")) + decoded = urllib.parse.unquote(s.replace("~", "%")) return decoded.replace(temp, "%") diff --git a/datasette/views/base.py b/datasette/views/base.py index 7cd385b7..c11e6aef 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -17,8 +17,8 @@ from datasette.utils import ( InvalidSql, LimitedWriter, call_with_supported_arguments, - dash_decode, - dash_encode, + tilde_decode, + tilde_encode, path_from_row_pks, path_with_added_args, path_with_removed_args, @@ -205,14 +205,14 @@ class DataView(BaseView): async def resolve_db_name(self, request, db_name, **kwargs): hash = None name = None - decoded_name = dash_decode(db_name) + decoded_name = tilde_decode(db_name) if decoded_name not in self.ds.databases and "-" in db_name: # No matching DB found, maybe it's a name-hash? name_bit, hash_bit = db_name.rsplit("-", 1) - if dash_decode(name_bit) not in self.ds.databases: + if tilde_decode(name_bit) not in self.ds.databases: raise NotFound(f"Database not found: {name}") else: - name = dash_decode(name_bit) + name = tilde_decode(name_bit) hash = hash_bit else: name = decoded_name @@ -235,7 +235,7 @@ class DataView(BaseView): return await db.table_exists(t) table, _format = await resolve_table_and_format( - table_and_format=dash_decode(kwargs["table_and_format"]), + table_and_format=tilde_decode(kwargs["table_and_format"]), table_exists=async_table_exists, allowed_formats=self.ds.renderers.keys(), ) @@ -243,11 +243,11 @@ class DataView(BaseView): if _format: kwargs["as_format"] = f".{_format}" elif kwargs.get("table"): - kwargs["table"] = dash_decode(kwargs["table"]) + kwargs["table"] = tilde_decode(kwargs["table"]) should_redirect = self.ds.urls.path(f"{name}-{expected}") if kwargs.get("table"): - should_redirect += "/" + dash_encode(kwargs["table"]) + should_redirect += "/" + tilde_encode(kwargs["table"]) if kwargs.get("pk_path"): should_redirect += "/" + kwargs["pk_path"] if kwargs.get("as_format"): @@ -448,38 +448,17 @@ class DataView(BaseView): return AsgiStream(stream_fn, headers=headers, content_type=content_type) - async def get_format(self, request, database, args): - """Determine the format of the response from the request, from URL - parameters or from a file extension. - - `args` is a dict of the path components parsed from the URL by the router. - """ - # If ?_format= is provided, use that as the format - _format = request.args.get("_format", None) - if not _format: - _format = (args.pop("as_format", None) or "").lstrip(".") - else: - args.pop("as_format", None) - if "table_and_format" in args: - db = self.ds.databases[database] - - async def async_table_exists(t): - return await db.table_exists(t) - - table, _ext_format = await resolve_table_and_format( - table_and_format=dash_decode(args["table_and_format"]), - table_exists=async_table_exists, - allowed_formats=self.ds.renderers.keys(), - ) - _format = _format or _ext_format - args["table"] = table - del args["table_and_format"] - elif "table" in args: - args["table"] = dash_decode(args["table"]) - return _format, args - async def view_get(self, request, database, hash, correct_hash_provided, **kwargs): - _format, kwargs = await self.get_format(request, database, kwargs) + _format = None + # _format may be in captured by the URL router + as_format = kwargs.pop("as_format", None) + if as_format: + _format = as_format + else: + # If there's a '.' in the last portion of the path, use that as format: + last_path_component = request.path.split("/")[-1] + if "." in last_path_component: + _format = last_path_component.split(".")[-1] if _format == "csv": return await self.as_csv(request, database, hash, **kwargs) diff --git a/datasette/views/table.py b/datasette/views/table.py index 1d81755e..72b8e9a4 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -12,7 +12,8 @@ from datasette.utils import ( MultiParams, append_querystring, compound_keys_after_sql, - dash_encode, + tilde_decode, + tilde_encode, escape_sqlite, filters_should_redirect, is_url, @@ -143,7 +144,7 @@ class RowTableShared(DataView): '{flat_pks}'.format( base_url=base_url, database=database, - table=dash_encode(table), + table=tilde_encode(table), flat_pks=str(markupsafe.escape(pk_path)), flat_pks_quoted=path_from_row_pks(row, pks, not pks), ) @@ -200,8 +201,8 @@ class RowTableShared(DataView): link_template.format( database=database, base_url=base_url, - table=dash_encode(other_table), - link_id=dash_encode(str(value)), + table=tilde_encode(other_table), + link_id=tilde_encode(str(value)), id=str(markupsafe.escape(value)), label=str(markupsafe.escape(label)) or "-", ) @@ -346,6 +347,8 @@ class TableView(RowTableShared): write=bool(canned_query.get("write")), ) + table = tilde_decode(table) + db = self.ds.databases[database] is_view = bool(await db.get_view_definition(table)) table_exists = bool(await db.table_exists(table)) @@ -766,7 +769,7 @@ class TableView(RowTableShared): if prefix is None: prefix = "$null" else: - prefix = dash_encode(str(prefix)) + prefix = tilde_encode(str(prefix)) next_value = f"{prefix},{next_value}" added_args = {"_next": next_value} if sort: @@ -938,6 +941,7 @@ class RowView(RowTableShared): name = "row" async def data(self, request, database, hash, table, pk_path, default_labels=False): + table = tilde_decode(table) await self.check_permissions( request, [ diff --git a/docs/csv_export.rst b/docs/csv_export.rst index b1cc673c..023fa05e 100644 --- a/docs/csv_export.rst +++ b/docs/csv_export.rst @@ -59,21 +59,3 @@ truncation error message. You can increase or remove this limit using the :ref:`setting_max_csv_mb` config setting. You can also disable the CSV export feature entirely using :ref:`setting_allow_csv_stream`. - -A note on URLs --------------- - -The default URL for the CSV representation of a table is that table with -``.csv`` appended to it: - -* https://latest.datasette.io/fixtures/facetable - HTML interface -* https://latest.datasette.io/fixtures/facetable.csv - CSV export -* https://latest.datasette.io/fixtures/facetable.json - JSON API - -This pattern doesn't work for tables with names that already end in ``.csv`` or -``.json``. For those tables, you can instead use the ``_format=`` query string -parameter: - -* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv - HTML interface -* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv?_format=csv - CSV export -* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv?_format=json - JSON API diff --git a/docs/internals.rst b/docs/internals.rst index d035e1f1..59a3c01d 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -545,7 +545,7 @@ These functions can be accessed via the ``{{ urls }}`` object in Datasette templ facetable table pragma_cache_size query -Use the ``format="json"`` (or ``"csv"`` or other formats supported by plugins) arguments to get back URLs to the JSON representation. This is usually the path with ``.json`` added on the end, but it may use ``?_format=json`` in cases where the path already includes ``.json``, for example a URL to a table named ``table.json``. +Use the ``format="json"`` (or ``"csv"`` or other formats supported by plugins) arguments to get back URLs to the JSON representation. This is the path with ``.json`` added on the end. These methods each return a ``datasette.utils.PrefixedUrlString`` object, which is a subclass of the Python ``str`` type. This allows the logic that considers the ``base_url`` setting to detect if that prefix has already been applied to the path. diff --git a/tests/test_api.py b/tests/test_api.py index dd916cf0..ba7ec3c5 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -679,18 +679,9 @@ def test_row(app_client): assert [{"id": "1", "content": "hello"}] == response.json["rows"] -def test_row_format_in_querystring(app_client): - # regression test for https://github.com/simonw/datasette/issues/563 - response = app_client.get( - "/fixtures/simple_primary_key/1?_format=json&_shape=objects" - ) - assert response.status == 200 - assert [{"id": "1", "content": "hello"}] == response.json["rows"] - - def test_row_strange_table_name(app_client): response = app_client.get( - "/fixtures/table%2Fwith%2Fslashes.csv/3.json?_shape=objects" + "/fixtures/table-2Fwith-2Fslashes-2Ecsv/3.json?_shape=objects" ) assert response.status == 200 assert [{"pk": "3", "content": "hey"}] == response.json["rows"] @@ -996,7 +987,7 @@ async def test_hidden_sqlite_stat1_table(): @pytest.mark.asyncio @pytest.mark.parametrize("db_name", ("foo", r"fo%o", "f~/c.d")) -async def test_dash_encoded_database_names(db_name): +async def test_tilde_encoded_database_names(db_name): ds = Datasette() ds.add_memory_database(db_name) response = await ds.client.get("/.json") diff --git a/tests/test_cli.py b/tests/test_cli.py index e30c2ad3..5afe72c1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,7 +9,7 @@ from datasette.app import SETTINGS from datasette.plugins import DEFAULT_PLUGINS from datasette.cli import cli, serve from datasette.version import __version__ -from datasette.utils import dash_encode +from datasette.utils import tilde_encode from datasette.utils.sqlite import sqlite3 from click.testing import CliRunner import io @@ -295,12 +295,12 @@ def test_weird_database_names(ensure_eventloop, tmpdir, filename): assert result1.exit_code == 0, result1.output filename_no_stem = filename.rsplit(".", 1)[0] expected_link = '{}'.format( - dash_encode(filename_no_stem), filename_no_stem + tilde_encode(filename_no_stem), filename_no_stem ) assert expected_link in result1.output # Now try hitting that database page result2 = runner.invoke( - cli, [db_path, "--get", "/{}".format(dash_encode(filename_no_stem))] + cli, [db_path, "--get", "/{}".format(tilde_encode(filename_no_stem))] ) assert result2.exit_code == 0, result2.output diff --git a/tests/test_html.py b/tests/test_html.py index 55d78c05..c58cbd10 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -229,7 +229,7 @@ def test_row_page_does_not_truncate(): ["query", "db-fixtures", "query-neighborhood_search"], ), ( - "/fixtures/table%2Fwith%2Fslashes.csv", + "/fixtures/table-2Fwith-2Fslashes-2Ecsv", ["table", "db-fixtures", "table-tablewithslashescsv-fa7563"], ), ( @@ -255,7 +255,7 @@ def test_css_classes_on_body(app_client, path, expected_classes): "table-fixtures-simple_primary_key.html, *table.html", ), ( - "/fixtures/table%2Fwith%2Fslashes.csv", + "/fixtures/table-2Fwith-2Fslashes-2Ecsv", "table-fixtures-tablewithslashescsv-fa7563.html, *table.html", ), ( @@ -816,7 +816,8 @@ def test_base_url_affects_metadata_extra_css_urls(app_client_base_url_prefix): ), ("/fixtures/pragma_cache_size", None), ( - "/fixtures/𝐜𝐢𝐭𝐢𝐞𝐬", + # /fixtures/𝐜𝐢𝐭𝐢𝐞𝐬 + "/fixtures/-F0-9D-90-9C-F0-9D-90-A2-F0-9D-90-AD-F0-9D-90-A2-F0-9D-90-9E-F0-9D-90-AC", "/fixtures?sql=select+id%2C+name+from+facet_cities+order+by+id+limit+1%3B", ), ("/fixtures/magic_parameters", None), @@ -824,6 +825,7 @@ def test_base_url_affects_metadata_extra_css_urls(app_client_base_url_prefix): ) def test_edit_sql_link_on_canned_queries(app_client, path, expected): response = app_client.get(path) + assert response.status == 200 expected_link = f'Edit SQL' if expected: assert expected_link in response.text @@ -898,8 +900,8 @@ def test_trace_correctly_escaped(app_client): # Table page ("/fixtures/facetable", "http://localhost/fixtures/facetable.json"), ( - "/fixtures/table%2Fwith%2Fslashes.csv", - "http://localhost/fixtures/table%2Fwith%2Fslashes.csv?_format=json", + "/fixtures/table-2Fwith-2Fslashes-2Ecsv", + "http://localhost/fixtures/table-2Fwith-2Fslashes-2Ecsv.json", ), # Row page ( @@ -959,17 +961,17 @@ def test_no_alternate_url_json(app_client, path): ( ( "/fivethirtyeight/twitter-ratio%2Fsenators", - "/fivethirtyeight/twitter-2Dratio-2Fsenators", + "/fivethirtyeight/twitter-ratio~2Fsenators", ), ( - "/fixtures/table%2Fwith%2Fslashes", - "/fixtures/table-2Fwith-2Fslashes", + "/fixtures/table%2Fwith%2Fslashes.csv", + "/fixtures/table~2Fwith~2Fslashes~2Ecsv", ), # query string should be preserved - ("/foo/bar%2Fbaz?id=5", "/foo/bar-2Fbaz?id=5"), + ("/foo/bar%2Fbaz?id=5", "/foo/bar~2Fbaz?id=5"), ), ) -def test_redirect_percent_encoding_to_dash_encoding(app_client, path, expected): +def test_redirect_percent_encoding_to_tilde_encoding(app_client, path, expected): response = app_client.get(path) assert response.status == 302 assert response.headers["location"] == expected diff --git a/tests/test_table_api.py b/tests/test_table_api.py index cc38d392..b60d7417 100644 --- a/tests/test_table_api.py +++ b/tests/test_table_api.py @@ -144,7 +144,7 @@ def test_table_shape_object_compound_primary_key(app_client): def test_table_with_slashes_in_name(app_client): response = app_client.get( - "/fixtures/table%2Fwith%2Fslashes.csv?_shape=objects&_format=json" + "/fixtures/table-2Fwith-2Fslashes-2Ecsv.json?_shape=objects" ) assert response.status == 200 data = response.json @@ -1032,7 +1032,10 @@ def test_infinity_returned_as_invalid_json_if_requested(app_client): def test_custom_query_with_unicode_characters(app_client): - response = app_client.get("/fixtures/𝐜𝐢𝐭𝐢𝐞𝐬.json?_shape=array") + # /fixtures/𝐜𝐢𝐭𝐢𝐞𝐬.json + response = app_client.get( + "/fixtures/-F0-9D-90-9C-F0-9D-90-A2-F0-9D-90-AD-F0-9D-90-A2-F0-9D-90-9E-F0-9D-90-AC.json?_shape=array" + ) assert [{"id": 1, "name": "San Francisco"}] == response.json diff --git a/tests/test_utils.py b/tests/test_utils.py index ff4f649a..3fac5078 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -393,9 +393,7 @@ def test_table_columns(): ("/foo?sql=select+1", "json", {}, "/foo.json?sql=select+1"), ("/foo/bar", "json", {}, "/foo/bar.json"), ("/foo/bar", "csv", {}, "/foo/bar.csv"), - ("/foo/bar.csv", "json", {}, "/foo/bar.csv?_format=json"), ("/foo/bar", "csv", {"_dl": 1}, "/foo/bar.csv?_dl=1"), - ("/foo/b.csv", "json", {"_dl": 1}, "/foo/b.csv?_dl=1&_format=json"), ( "/sf-trees/Street_Tree_List?_search=cherry&_size=1000", "csv", @@ -410,18 +408,6 @@ def test_path_with_format(path, format, extra_qs, expected): assert expected == actual -def test_path_with_format_replace_format(): - request = Request.fake("/foo/bar.csv") - assert ( - utils.path_with_format(request=request, format="blob") - == "/foo/bar.csv?_format=blob" - ) - assert ( - utils.path_with_format(request=request, format="blob", replace_format="csv") - == "/foo/bar.blob" - ) - - @pytest.mark.parametrize( "bytes,expected", [ @@ -652,15 +638,15 @@ async def test_derive_named_parameters(sql, expected): "original,expected", ( ("abc", "abc"), - ("/foo/bar", "-2Ffoo-2Fbar"), - ("/-/bar", "-2F-2D-2Fbar"), - ("-/db-/table.csv", "-2D-2Fdb-2D-2Ftable-2Ecsv"), - (r"%~-/", "-25-7E-2D-2F"), - ("-25-7E-2D-2F", "-2D25-2D7E-2D2D-2D2F"), + ("/foo/bar", "~2Ffoo~2Fbar"), + ("/-/bar", "~2F-~2Fbar"), + ("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"), + (r"%~-/", "~25~7E-~2F"), + ("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"), ), ) -def test_dash_encoding(original, expected): - actual = utils.dash_encode(original) +def test_tilde_encoding(original, expected): + actual = utils.tilde_encode(original) assert actual == expected # And test round-trip - assert original == utils.dash_decode(actual) + assert original == utils.tilde_decode(actual) From 5c02664e4a1aa98e25a330a638b357e67691a9e4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 14 Mar 2022 17:04:04 -0700 Subject: [PATCH 4/5] Update docs for tilde rather than dash encoding, refs #1657 --- docs/internals.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 59a3c01d..b76289bd 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -876,31 +876,31 @@ Utility function for calling ``await`` on a return value if it is awaitable, oth .. autofunction:: datasette.utils.await_me_maybe -.. _internals_dash_encoding: +.. _internals_tilde_encoding: -Dash encoding -------------- +Tilde encoding +-------------- -Datasette uses a custom encoding scheme in some places, called **dash encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them. +Datasette uses a custom encoding scheme in some places, called **tilde encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them. -Dash encoding uses the same algorithm as `URL percent-encoding `__, but with the ``-`` hyphen character used in place of ``%``. +Tilde encoding uses the same algorithm as `URL percent-encoding `__, but with the ``~`` hyphen character used in place of ``%``. -Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_`` will be replaced by the numeric equivalent preceded by a hyphen. For example: +Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example: -- ``/`` becomes ``-2F`` -- ``.`` becomes ``-2E`` -- ``%`` becomes ``-25`` -- ``-`` becomes ``-2D`` -- Space character becomes ``-20`` -- ``polls/2022.primary`` becomes ``polls-2F2022-2Eprimary`` +- ``/`` becomes ``~2F`` +- ``.`` becomes ``~2E`` +- ``%`` becomes ``~25`` +- ``~`` becomes ``~7E`` +- Space character becomes ``~20`` +- ``polls/2022.primary`` becomes ``polls~2F2022~2Eprimary`` -.. _internals_utils_dash_encode: +.. _internals_utils_tilde_encode: -.. autofunction:: datasette.utils.dash_encode +.. autofunction:: datasette.utils.tilde_encode -.. _internals_utils_dash_decode: +.. _internals_utils_tilde_decode: -.. autofunction:: datasette.utils.dash_decode +.. autofunction:: datasette.utils.tilde_decode .. _internals_tracer: From f33c9001915086b9c1b5d61c8e5addf08a4cf724 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 15 Mar 2022 08:31:10 -0700 Subject: [PATCH 5/5] Got a couple more test to pass --- datasette/views/base.py | 2 +- tests/test_api.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index c11e6aef..0d34ba71 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -453,7 +453,7 @@ class DataView(BaseView): # _format may be in captured by the URL router as_format = kwargs.pop("as_format", None) if as_format: - _format = as_format + _format = as_format.lstrip(".") else: # If there's a '.' in the last portion of the path, use that as format: last_path_component = request.path.split("/")[-1] diff --git a/tests/test_api.py b/tests/test_api.py index ba7ec3c5..87363a84 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -681,7 +681,7 @@ def test_row(app_client): def test_row_strange_table_name(app_client): response = app_client.get( - "/fixtures/table-2Fwith-2Fslashes-2Ecsv/3.json?_shape=objects" + "/fixtures/table~2Fwith~2Fslashes~2Ecsv/3.json?_shape=objects" ) assert response.status == 200 assert [{"pk": "3", "content": "hey"}] == response.json["rows"]