mirror of
https://github.com/simonw/datasette.git
synced 2025-12-10 16:51:24 +01:00
Compare commits
5 commits
main
...
issue-1657
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f33c900191 | ||
|
|
5c02664e4a | ||
|
|
47151c23b4 | ||
|
|
fd5df04d30 | ||
|
|
8f3e177d2c |
13 changed files with 102 additions and 158 deletions
|
|
@ -1168,7 +1168,7 @@ class DatasetteRouter:
|
||||||
path = "/" + path[len(base_url) :]
|
path = "/" + path[len(base_url) :]
|
||||||
scope = dict(scope, route_path=path)
|
scope = dict(scope, route_path=path)
|
||||||
request = Request(scope, receive)
|
request = Request(scope, receive)
|
||||||
# Populate request_messages if ds_messages cookie is present
|
# Populate request._messages if ds_messages cookie is present
|
||||||
try:
|
try:
|
||||||
request._messages = self.ds.unsign(
|
request._messages = self.ds.unsign(
|
||||||
request.cookies.get("ds_messages", ""), "messages"
|
request.cookies.get("ds_messages", ""), "messages"
|
||||||
|
|
@ -1211,11 +1211,11 @@ class DatasetteRouter:
|
||||||
return await self.handle_404(request, send)
|
return await self.handle_404(request, send)
|
||||||
|
|
||||||
async def handle_404(self, request, send, exception=None):
|
async def handle_404(self, request, send, exception=None):
|
||||||
# If path contains % encoding, redirect to dash encoding
|
# If path contains % encoding, redirect to tilde encoding
|
||||||
if "%" in request.path:
|
if "%" in request.path:
|
||||||
# Try the same path but with "%" replaced by "-"
|
# Try the same path but with "%" replaced by "~"
|
||||||
# and "-" replaced with "-2D"
|
# and "~" replaced with "~7E"
|
||||||
new_path = request.path.replace("-", "-2D").replace("%", "-")
|
new_path = request.path.replace("~", "~7E").replace("%", "~")
|
||||||
if request.query_string:
|
if request.query_string:
|
||||||
new_path += "?{}".format(request.query_string)
|
new_path += "?{}".format(request.query_string)
|
||||||
await asgi_send_redirect(send, new_path)
|
await asgi_send_redirect(send, new_path)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from .utils import dash_encode, path_with_format, HASH_LENGTH, PrefixedUrlString
|
from .utils import tilde_encode, path_with_format, HASH_LENGTH, PrefixedUrlString
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -31,20 +31,20 @@ class Urls:
|
||||||
db = self.ds.databases[database]
|
db = self.ds.databases[database]
|
||||||
if self.ds.setting("hash_urls") and db.hash:
|
if self.ds.setting("hash_urls") and db.hash:
|
||||||
path = self.path(
|
path = self.path(
|
||||||
f"{dash_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format
|
f"{tilde_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
path = self.path(dash_encode(database), format=format)
|
path = self.path(tilde_encode(database), format=format)
|
||||||
return path
|
return path
|
||||||
|
|
||||||
def table(self, database, table, format=None):
|
def table(self, database, table, format=None):
|
||||||
path = f"{self.database(database)}/{dash_encode(table)}"
|
path = f"{self.database(database)}/{tilde_encode(table)}"
|
||||||
if format is not None:
|
if format is not None:
|
||||||
path = path_with_format(path=path, format=format)
|
path = path_with_format(path=path, format=format)
|
||||||
return PrefixedUrlString(path)
|
return PrefixedUrlString(path)
|
||||||
|
|
||||||
def query(self, database, query, format=None):
|
def query(self, database, query, format=None):
|
||||||
path = f"{self.database(database)}/{dash_encode(query)}"
|
path = f"{self.database(database)}/{tilde_encode(query)}"
|
||||||
if format is not None:
|
if format is not None:
|
||||||
path = path_with_format(path=path, format=format)
|
path = path_with_format(path=path, format=format)
|
||||||
return PrefixedUrlString(path)
|
return PrefixedUrlString(path)
|
||||||
|
|
|
||||||
|
|
@ -113,12 +113,12 @@ async def await_me_maybe(value: typing.Any) -> typing.Any:
|
||||||
|
|
||||||
|
|
||||||
def urlsafe_components(token):
|
def urlsafe_components(token):
|
||||||
"""Splits token on commas and dash-decodes each component"""
|
"""Splits token on commas and tilde-decodes each component"""
|
||||||
return [dash_decode(b) for b in token.split(",")]
|
return [tilde_decode(b) for b in token.split(",")]
|
||||||
|
|
||||||
|
|
||||||
def path_from_row_pks(row, pks, use_rowid, quote=True):
|
def path_from_row_pks(row, pks, use_rowid, quote=True):
|
||||||
"""Generate an optionally dash-quoted unique identifier
|
"""Generate an optionally tilde-encoded unique identifier
|
||||||
for a row from its primary keys."""
|
for a row from its primary keys."""
|
||||||
if use_rowid:
|
if use_rowid:
|
||||||
bits = [row["rowid"]]
|
bits = [row["rowid"]]
|
||||||
|
|
@ -127,7 +127,7 @@ def path_from_row_pks(row, pks, use_rowid, quote=True):
|
||||||
row[pk]["value"] if isinstance(row[pk], dict) else row[pk] for pk in pks
|
row[pk]["value"] if isinstance(row[pk], dict) else row[pk] for pk in pks
|
||||||
]
|
]
|
||||||
if quote:
|
if quote:
|
||||||
bits = [dash_encode(str(bit)) for bit in bits]
|
bits = [tilde_encode(str(bit)) for bit in bits]
|
||||||
else:
|
else:
|
||||||
bits = [str(bit) for bit in bits]
|
bits = [str(bit) for bit in bits]
|
||||||
|
|
||||||
|
|
@ -758,10 +758,7 @@ def path_with_format(
|
||||||
path = request.path if request else path
|
path = request.path if request else path
|
||||||
if replace_format and path.endswith(f".{replace_format}"):
|
if replace_format and path.endswith(f".{replace_format}"):
|
||||||
path = path[: -(1 + len(replace_format))]
|
path = path[: -(1 + len(replace_format))]
|
||||||
if "." in path:
|
path = f"{path}.{format}"
|
||||||
qs["_format"] = format
|
|
||||||
else:
|
|
||||||
path = f"{path}.{format}"
|
|
||||||
if qs:
|
if qs:
|
||||||
extra = urllib.parse.urlencode(sorted(qs.items()))
|
extra = urllib.parse.urlencode(sorted(qs.items()))
|
||||||
if request and request.query_string:
|
if request and request.query_string:
|
||||||
|
|
@ -1143,38 +1140,38 @@ def add_cors_headers(headers):
|
||||||
headers["Access-Control-Expose-Headers"] = "Link"
|
headers["Access-Control-Expose-Headers"] = "Link"
|
||||||
|
|
||||||
|
|
||||||
_DASH_ENCODING_SAFE = frozenset(
|
_TILDE_ENCODING_SAFE = frozenset(
|
||||||
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
b"abcdefghijklmnopqrstuvwxyz"
|
b"abcdefghijklmnopqrstuvwxyz"
|
||||||
b"0123456789_"
|
b"0123456789_-"
|
||||||
# This is the same as Python percent-encoding but I removed
|
# This is the same as Python percent-encoding but I removed
|
||||||
# '.' and '-' and '~'
|
# '.' and '~'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DashEncoder(dict):
|
class TildeEncoder(dict):
|
||||||
# Keeps a cache internally, via __missing__
|
# Keeps a cache internally, via __missing__
|
||||||
def __missing__(self, b):
|
def __missing__(self, b):
|
||||||
# Handle a cache miss, store encoded string in cache and return.
|
# Handle a cache miss, store encoded string in cache and return.
|
||||||
res = chr(b) if b in _DASH_ENCODING_SAFE else "-{:02X}".format(b)
|
res = chr(b) if b in _TILDE_ENCODING_SAFE else "~{:02X}".format(b)
|
||||||
self[b] = res
|
self[b] = res
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
_dash_encoder = DashEncoder().__getitem__
|
_tilde_encoder = TildeEncoder().__getitem__
|
||||||
|
|
||||||
|
|
||||||
@documented
|
@documented
|
||||||
def dash_encode(s: str) -> str:
|
def tilde_encode(s: str) -> str:
|
||||||
"Returns dash-encoded string - for example ``/foo/bar`` -> ``-2Ffoo-2Fbar``"
|
"Returns tilde-encoded string - for example ``/foo/bar`` -> ``~2Ffoo~2Fbar``"
|
||||||
return "".join(_dash_encoder(char) for char in s.encode("utf-8"))
|
return "".join(_tilde_encoder(char) for char in s.encode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
@documented
|
@documented
|
||||||
def dash_decode(s: str) -> str:
|
def tilde_decode(s: str) -> str:
|
||||||
"Decodes a dash-encoded string, so ``-2Ffoo-2Fbar`` -> ``/foo/bar``"
|
"Decodes a tilde-encoded string, so ``~2Ffoo~2Fbar`` -> ``/foo/bar``"
|
||||||
# Avoid accidentally decoding a %2f style sequence
|
# Avoid accidentally decoding a %2f style sequence
|
||||||
temp = secrets.token_hex(16)
|
temp = secrets.token_hex(16)
|
||||||
s = s.replace("%", temp)
|
s = s.replace("%", temp)
|
||||||
decoded = urllib.parse.unquote(s.replace("-", "%"))
|
decoded = urllib.parse.unquote(s.replace("~", "%"))
|
||||||
return decoded.replace(temp, "%")
|
return decoded.replace(temp, "%")
|
||||||
|
|
|
||||||
|
|
@ -17,8 +17,8 @@ from datasette.utils import (
|
||||||
InvalidSql,
|
InvalidSql,
|
||||||
LimitedWriter,
|
LimitedWriter,
|
||||||
call_with_supported_arguments,
|
call_with_supported_arguments,
|
||||||
dash_decode,
|
tilde_decode,
|
||||||
dash_encode,
|
tilde_encode,
|
||||||
path_from_row_pks,
|
path_from_row_pks,
|
||||||
path_with_added_args,
|
path_with_added_args,
|
||||||
path_with_removed_args,
|
path_with_removed_args,
|
||||||
|
|
@ -205,14 +205,14 @@ class DataView(BaseView):
|
||||||
async def resolve_db_name(self, request, db_name, **kwargs):
|
async def resolve_db_name(self, request, db_name, **kwargs):
|
||||||
hash = None
|
hash = None
|
||||||
name = None
|
name = None
|
||||||
decoded_name = dash_decode(db_name)
|
decoded_name = tilde_decode(db_name)
|
||||||
if decoded_name not in self.ds.databases and "-" in db_name:
|
if decoded_name not in self.ds.databases and "-" in db_name:
|
||||||
# No matching DB found, maybe it's a name-hash?
|
# No matching DB found, maybe it's a name-hash?
|
||||||
name_bit, hash_bit = db_name.rsplit("-", 1)
|
name_bit, hash_bit = db_name.rsplit("-", 1)
|
||||||
if dash_decode(name_bit) not in self.ds.databases:
|
if tilde_decode(name_bit) not in self.ds.databases:
|
||||||
raise NotFound(f"Database not found: {name}")
|
raise NotFound(f"Database not found: {name}")
|
||||||
else:
|
else:
|
||||||
name = dash_decode(name_bit)
|
name = tilde_decode(name_bit)
|
||||||
hash = hash_bit
|
hash = hash_bit
|
||||||
else:
|
else:
|
||||||
name = decoded_name
|
name = decoded_name
|
||||||
|
|
@ -235,7 +235,7 @@ class DataView(BaseView):
|
||||||
return await db.table_exists(t)
|
return await db.table_exists(t)
|
||||||
|
|
||||||
table, _format = await resolve_table_and_format(
|
table, _format = await resolve_table_and_format(
|
||||||
table_and_format=dash_decode(kwargs["table_and_format"]),
|
table_and_format=tilde_decode(kwargs["table_and_format"]),
|
||||||
table_exists=async_table_exists,
|
table_exists=async_table_exists,
|
||||||
allowed_formats=self.ds.renderers.keys(),
|
allowed_formats=self.ds.renderers.keys(),
|
||||||
)
|
)
|
||||||
|
|
@ -243,11 +243,11 @@ class DataView(BaseView):
|
||||||
if _format:
|
if _format:
|
||||||
kwargs["as_format"] = f".{_format}"
|
kwargs["as_format"] = f".{_format}"
|
||||||
elif kwargs.get("table"):
|
elif kwargs.get("table"):
|
||||||
kwargs["table"] = dash_decode(kwargs["table"])
|
kwargs["table"] = tilde_decode(kwargs["table"])
|
||||||
|
|
||||||
should_redirect = self.ds.urls.path(f"{name}-{expected}")
|
should_redirect = self.ds.urls.path(f"{name}-{expected}")
|
||||||
if kwargs.get("table"):
|
if kwargs.get("table"):
|
||||||
should_redirect += "/" + dash_encode(kwargs["table"])
|
should_redirect += "/" + tilde_encode(kwargs["table"])
|
||||||
if kwargs.get("pk_path"):
|
if kwargs.get("pk_path"):
|
||||||
should_redirect += "/" + kwargs["pk_path"]
|
should_redirect += "/" + kwargs["pk_path"]
|
||||||
if kwargs.get("as_format"):
|
if kwargs.get("as_format"):
|
||||||
|
|
@ -448,38 +448,17 @@ class DataView(BaseView):
|
||||||
|
|
||||||
return AsgiStream(stream_fn, headers=headers, content_type=content_type)
|
return AsgiStream(stream_fn, headers=headers, content_type=content_type)
|
||||||
|
|
||||||
async def get_format(self, request, database, args):
|
|
||||||
"""Determine the format of the response from the request, from URL
|
|
||||||
parameters or from a file extension.
|
|
||||||
|
|
||||||
`args` is a dict of the path components parsed from the URL by the router.
|
|
||||||
"""
|
|
||||||
# If ?_format= is provided, use that as the format
|
|
||||||
_format = request.args.get("_format", None)
|
|
||||||
if not _format:
|
|
||||||
_format = (args.pop("as_format", None) or "").lstrip(".")
|
|
||||||
else:
|
|
||||||
args.pop("as_format", None)
|
|
||||||
if "table_and_format" in args:
|
|
||||||
db = self.ds.databases[database]
|
|
||||||
|
|
||||||
async def async_table_exists(t):
|
|
||||||
return await db.table_exists(t)
|
|
||||||
|
|
||||||
table, _ext_format = await resolve_table_and_format(
|
|
||||||
table_and_format=dash_decode(args["table_and_format"]),
|
|
||||||
table_exists=async_table_exists,
|
|
||||||
allowed_formats=self.ds.renderers.keys(),
|
|
||||||
)
|
|
||||||
_format = _format or _ext_format
|
|
||||||
args["table"] = table
|
|
||||||
del args["table_and_format"]
|
|
||||||
elif "table" in args:
|
|
||||||
args["table"] = dash_decode(args["table"])
|
|
||||||
return _format, args
|
|
||||||
|
|
||||||
async def view_get(self, request, database, hash, correct_hash_provided, **kwargs):
|
async def view_get(self, request, database, hash, correct_hash_provided, **kwargs):
|
||||||
_format, kwargs = await self.get_format(request, database, kwargs)
|
_format = None
|
||||||
|
# _format may be in <as_format> captured by the URL router
|
||||||
|
as_format = kwargs.pop("as_format", None)
|
||||||
|
if as_format:
|
||||||
|
_format = as_format.lstrip(".")
|
||||||
|
else:
|
||||||
|
# If there's a '.' in the last portion of the path, use that as format:
|
||||||
|
last_path_component = request.path.split("/")[-1]
|
||||||
|
if "." in last_path_component:
|
||||||
|
_format = last_path_component.split(".")[-1]
|
||||||
|
|
||||||
if _format == "csv":
|
if _format == "csv":
|
||||||
return await self.as_csv(request, database, hash, **kwargs)
|
return await self.as_csv(request, database, hash, **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,8 @@ from datasette.utils import (
|
||||||
MultiParams,
|
MultiParams,
|
||||||
append_querystring,
|
append_querystring,
|
||||||
compound_keys_after_sql,
|
compound_keys_after_sql,
|
||||||
dash_encode,
|
tilde_decode,
|
||||||
|
tilde_encode,
|
||||||
escape_sqlite,
|
escape_sqlite,
|
||||||
filters_should_redirect,
|
filters_should_redirect,
|
||||||
is_url,
|
is_url,
|
||||||
|
|
@ -143,7 +144,7 @@ class RowTableShared(DataView):
|
||||||
'<a href="{base_url}{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'.format(
|
'<a href="{base_url}{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'.format(
|
||||||
base_url=base_url,
|
base_url=base_url,
|
||||||
database=database,
|
database=database,
|
||||||
table=dash_encode(table),
|
table=tilde_encode(table),
|
||||||
flat_pks=str(markupsafe.escape(pk_path)),
|
flat_pks=str(markupsafe.escape(pk_path)),
|
||||||
flat_pks_quoted=path_from_row_pks(row, pks, not pks),
|
flat_pks_quoted=path_from_row_pks(row, pks, not pks),
|
||||||
)
|
)
|
||||||
|
|
@ -200,8 +201,8 @@ class RowTableShared(DataView):
|
||||||
link_template.format(
|
link_template.format(
|
||||||
database=database,
|
database=database,
|
||||||
base_url=base_url,
|
base_url=base_url,
|
||||||
table=dash_encode(other_table),
|
table=tilde_encode(other_table),
|
||||||
link_id=dash_encode(str(value)),
|
link_id=tilde_encode(str(value)),
|
||||||
id=str(markupsafe.escape(value)),
|
id=str(markupsafe.escape(value)),
|
||||||
label=str(markupsafe.escape(label)) or "-",
|
label=str(markupsafe.escape(label)) or "-",
|
||||||
)
|
)
|
||||||
|
|
@ -346,6 +347,8 @@ class TableView(RowTableShared):
|
||||||
write=bool(canned_query.get("write")),
|
write=bool(canned_query.get("write")),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
table = tilde_decode(table)
|
||||||
|
|
||||||
db = self.ds.databases[database]
|
db = self.ds.databases[database]
|
||||||
is_view = bool(await db.get_view_definition(table))
|
is_view = bool(await db.get_view_definition(table))
|
||||||
table_exists = bool(await db.table_exists(table))
|
table_exists = bool(await db.table_exists(table))
|
||||||
|
|
@ -766,7 +769,7 @@ class TableView(RowTableShared):
|
||||||
if prefix is None:
|
if prefix is None:
|
||||||
prefix = "$null"
|
prefix = "$null"
|
||||||
else:
|
else:
|
||||||
prefix = dash_encode(str(prefix))
|
prefix = tilde_encode(str(prefix))
|
||||||
next_value = f"{prefix},{next_value}"
|
next_value = f"{prefix},{next_value}"
|
||||||
added_args = {"_next": next_value}
|
added_args = {"_next": next_value}
|
||||||
if sort:
|
if sort:
|
||||||
|
|
@ -938,6 +941,7 @@ class RowView(RowTableShared):
|
||||||
name = "row"
|
name = "row"
|
||||||
|
|
||||||
async def data(self, request, database, hash, table, pk_path, default_labels=False):
|
async def data(self, request, database, hash, table, pk_path, default_labels=False):
|
||||||
|
table = tilde_decode(table)
|
||||||
await self.check_permissions(
|
await self.check_permissions(
|
||||||
request,
|
request,
|
||||||
[
|
[
|
||||||
|
|
|
||||||
|
|
@ -59,21 +59,3 @@ truncation error message.
|
||||||
You can increase or remove this limit using the :ref:`setting_max_csv_mb` config
|
You can increase or remove this limit using the :ref:`setting_max_csv_mb` config
|
||||||
setting. You can also disable the CSV export feature entirely using
|
setting. You can also disable the CSV export feature entirely using
|
||||||
:ref:`setting_allow_csv_stream`.
|
:ref:`setting_allow_csv_stream`.
|
||||||
|
|
||||||
A note on URLs
|
|
||||||
--------------
|
|
||||||
|
|
||||||
The default URL for the CSV representation of a table is that table with
|
|
||||||
``.csv`` appended to it:
|
|
||||||
|
|
||||||
* https://latest.datasette.io/fixtures/facetable - HTML interface
|
|
||||||
* https://latest.datasette.io/fixtures/facetable.csv - CSV export
|
|
||||||
* https://latest.datasette.io/fixtures/facetable.json - JSON API
|
|
||||||
|
|
||||||
This pattern doesn't work for tables with names that already end in ``.csv`` or
|
|
||||||
``.json``. For those tables, you can instead use the ``_format=`` query string
|
|
||||||
parameter:
|
|
||||||
|
|
||||||
* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv - HTML interface
|
|
||||||
* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv?_format=csv - CSV export
|
|
||||||
* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv?_format=json - JSON API
|
|
||||||
|
|
|
||||||
|
|
@ -545,7 +545,7 @@ These functions can be accessed via the ``{{ urls }}`` object in Datasette templ
|
||||||
<a href="{{ urls.table("fixtures", "facetable") }}">facetable table</a>
|
<a href="{{ urls.table("fixtures", "facetable") }}">facetable table</a>
|
||||||
<a href="{{ urls.query("fixtures", "pragma_cache_size") }}">pragma_cache_size query</a>
|
<a href="{{ urls.query("fixtures", "pragma_cache_size") }}">pragma_cache_size query</a>
|
||||||
|
|
||||||
Use the ``format="json"`` (or ``"csv"`` or other formats supported by plugins) arguments to get back URLs to the JSON representation. This is usually the path with ``.json`` added on the end, but it may use ``?_format=json`` in cases where the path already includes ``.json``, for example a URL to a table named ``table.json``.
|
Use the ``format="json"`` (or ``"csv"`` or other formats supported by plugins) arguments to get back URLs to the JSON representation. This is the path with ``.json`` added on the end.
|
||||||
|
|
||||||
These methods each return a ``datasette.utils.PrefixedUrlString`` object, which is a subclass of the Python ``str`` type. This allows the logic that considers the ``base_url`` setting to detect if that prefix has already been applied to the path.
|
These methods each return a ``datasette.utils.PrefixedUrlString`` object, which is a subclass of the Python ``str`` type. This allows the logic that considers the ``base_url`` setting to detect if that prefix has already been applied to the path.
|
||||||
|
|
||||||
|
|
@ -876,31 +876,31 @@ Utility function for calling ``await`` on a return value if it is awaitable, oth
|
||||||
|
|
||||||
.. autofunction:: datasette.utils.await_me_maybe
|
.. autofunction:: datasette.utils.await_me_maybe
|
||||||
|
|
||||||
.. _internals_dash_encoding:
|
.. _internals_tilde_encoding:
|
||||||
|
|
||||||
Dash encoding
|
Tilde encoding
|
||||||
-------------
|
--------------
|
||||||
|
|
||||||
Datasette uses a custom encoding scheme in some places, called **dash encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them.
|
Datasette uses a custom encoding scheme in some places, called **tilde encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them.
|
||||||
|
|
||||||
Dash encoding uses the same algorithm as `URL percent-encoding <https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding>`__, but with the ``-`` hyphen character used in place of ``%``.
|
Tilde encoding uses the same algorithm as `URL percent-encoding <https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding>`__, but with the ``~`` hyphen character used in place of ``%``.
|
||||||
|
|
||||||
Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_`` will be replaced by the numeric equivalent preceded by a hyphen. For example:
|
Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example:
|
||||||
|
|
||||||
- ``/`` becomes ``-2F``
|
- ``/`` becomes ``~2F``
|
||||||
- ``.`` becomes ``-2E``
|
- ``.`` becomes ``~2E``
|
||||||
- ``%`` becomes ``-25``
|
- ``%`` becomes ``~25``
|
||||||
- ``-`` becomes ``-2D``
|
- ``~`` becomes ``~7E``
|
||||||
- Space character becomes ``-20``
|
- Space character becomes ``~20``
|
||||||
- ``polls/2022.primary`` becomes ``polls-2F2022-2Eprimary``
|
- ``polls/2022.primary`` becomes ``polls~2F2022~2Eprimary``
|
||||||
|
|
||||||
.. _internals_utils_dash_encode:
|
.. _internals_utils_tilde_encode:
|
||||||
|
|
||||||
.. autofunction:: datasette.utils.dash_encode
|
.. autofunction:: datasette.utils.tilde_encode
|
||||||
|
|
||||||
.. _internals_utils_dash_decode:
|
.. _internals_utils_tilde_decode:
|
||||||
|
|
||||||
.. autofunction:: datasette.utils.dash_decode
|
.. autofunction:: datasette.utils.tilde_decode
|
||||||
|
|
||||||
.. _internals_tracer:
|
.. _internals_tracer:
|
||||||
|
|
||||||
|
|
|
||||||
2
setup.py
2
setup.py
|
|
@ -69,7 +69,7 @@ setup(
|
||||||
"test": [
|
"test": [
|
||||||
"pytest>=5.2.2,<7.1.0",
|
"pytest>=5.2.2,<7.1.0",
|
||||||
"pytest-xdist>=2.2.1,<2.6",
|
"pytest-xdist>=2.2.1,<2.6",
|
||||||
"pytest-asyncio>=0.10,<0.19",
|
"pytest-asyncio>=0.17,<0.19",
|
||||||
"beautifulsoup4>=4.8.1,<4.11.0",
|
"beautifulsoup4>=4.8.1,<4.11.0",
|
||||||
"black==22.1.0",
|
"black==22.1.0",
|
||||||
"pytest-timeout>=1.4.2,<2.2",
|
"pytest-timeout>=1.4.2,<2.2",
|
||||||
|
|
|
||||||
|
|
@ -679,18 +679,9 @@ def test_row(app_client):
|
||||||
assert [{"id": "1", "content": "hello"}] == response.json["rows"]
|
assert [{"id": "1", "content": "hello"}] == response.json["rows"]
|
||||||
|
|
||||||
|
|
||||||
def test_row_format_in_querystring(app_client):
|
|
||||||
# regression test for https://github.com/simonw/datasette/issues/563
|
|
||||||
response = app_client.get(
|
|
||||||
"/fixtures/simple_primary_key/1?_format=json&_shape=objects"
|
|
||||||
)
|
|
||||||
assert response.status == 200
|
|
||||||
assert [{"id": "1", "content": "hello"}] == response.json["rows"]
|
|
||||||
|
|
||||||
|
|
||||||
def test_row_strange_table_name(app_client):
|
def test_row_strange_table_name(app_client):
|
||||||
response = app_client.get(
|
response = app_client.get(
|
||||||
"/fixtures/table%2Fwith%2Fslashes.csv/3.json?_shape=objects"
|
"/fixtures/table~2Fwith~2Fslashes~2Ecsv/3.json?_shape=objects"
|
||||||
)
|
)
|
||||||
assert response.status == 200
|
assert response.status == 200
|
||||||
assert [{"pk": "3", "content": "hey"}] == response.json["rows"]
|
assert [{"pk": "3", "content": "hey"}] == response.json["rows"]
|
||||||
|
|
@ -996,7 +987,7 @@ async def test_hidden_sqlite_stat1_table():
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("db_name", ("foo", r"fo%o", "f~/c.d"))
|
@pytest.mark.parametrize("db_name", ("foo", r"fo%o", "f~/c.d"))
|
||||||
async def test_dash_encoded_database_names(db_name):
|
async def test_tilde_encoded_database_names(db_name):
|
||||||
ds = Datasette()
|
ds = Datasette()
|
||||||
ds.add_memory_database(db_name)
|
ds.add_memory_database(db_name)
|
||||||
response = await ds.client.get("/.json")
|
response = await ds.client.get("/.json")
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from datasette.app import SETTINGS
|
||||||
from datasette.plugins import DEFAULT_PLUGINS
|
from datasette.plugins import DEFAULT_PLUGINS
|
||||||
from datasette.cli import cli, serve
|
from datasette.cli import cli, serve
|
||||||
from datasette.version import __version__
|
from datasette.version import __version__
|
||||||
from datasette.utils import dash_encode
|
from datasette.utils import tilde_encode
|
||||||
from datasette.utils.sqlite import sqlite3
|
from datasette.utils.sqlite import sqlite3
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
import io
|
import io
|
||||||
|
|
@ -295,12 +295,12 @@ def test_weird_database_names(ensure_eventloop, tmpdir, filename):
|
||||||
assert result1.exit_code == 0, result1.output
|
assert result1.exit_code == 0, result1.output
|
||||||
filename_no_stem = filename.rsplit(".", 1)[0]
|
filename_no_stem = filename.rsplit(".", 1)[0]
|
||||||
expected_link = '<a href="/{}">{}</a>'.format(
|
expected_link = '<a href="/{}">{}</a>'.format(
|
||||||
dash_encode(filename_no_stem), filename_no_stem
|
tilde_encode(filename_no_stem), filename_no_stem
|
||||||
)
|
)
|
||||||
assert expected_link in result1.output
|
assert expected_link in result1.output
|
||||||
# Now try hitting that database page
|
# Now try hitting that database page
|
||||||
result2 = runner.invoke(
|
result2 = runner.invoke(
|
||||||
cli, [db_path, "--get", "/{}".format(dash_encode(filename_no_stem))]
|
cli, [db_path, "--get", "/{}".format(tilde_encode(filename_no_stem))]
|
||||||
)
|
)
|
||||||
assert result2.exit_code == 0, result2.output
|
assert result2.exit_code == 0, result2.output
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -229,7 +229,7 @@ def test_row_page_does_not_truncate():
|
||||||
["query", "db-fixtures", "query-neighborhood_search"],
|
["query", "db-fixtures", "query-neighborhood_search"],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"/fixtures/table%2Fwith%2Fslashes.csv",
|
"/fixtures/table-2Fwith-2Fslashes-2Ecsv",
|
||||||
["table", "db-fixtures", "table-tablewithslashescsv-fa7563"],
|
["table", "db-fixtures", "table-tablewithslashescsv-fa7563"],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
|
|
@ -255,7 +255,7 @@ def test_css_classes_on_body(app_client, path, expected_classes):
|
||||||
"table-fixtures-simple_primary_key.html, *table.html",
|
"table-fixtures-simple_primary_key.html, *table.html",
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"/fixtures/table%2Fwith%2Fslashes.csv",
|
"/fixtures/table-2Fwith-2Fslashes-2Ecsv",
|
||||||
"table-fixtures-tablewithslashescsv-fa7563.html, *table.html",
|
"table-fixtures-tablewithslashescsv-fa7563.html, *table.html",
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
|
|
@ -816,7 +816,8 @@ def test_base_url_affects_metadata_extra_css_urls(app_client_base_url_prefix):
|
||||||
),
|
),
|
||||||
("/fixtures/pragma_cache_size", None),
|
("/fixtures/pragma_cache_size", None),
|
||||||
(
|
(
|
||||||
"/fixtures/𝐜𝐢𝐭𝐢𝐞𝐬",
|
# /fixtures/𝐜𝐢𝐭𝐢𝐞𝐬
|
||||||
|
"/fixtures/-F0-9D-90-9C-F0-9D-90-A2-F0-9D-90-AD-F0-9D-90-A2-F0-9D-90-9E-F0-9D-90-AC",
|
||||||
"/fixtures?sql=select+id%2C+name+from+facet_cities+order+by+id+limit+1%3B",
|
"/fixtures?sql=select+id%2C+name+from+facet_cities+order+by+id+limit+1%3B",
|
||||||
),
|
),
|
||||||
("/fixtures/magic_parameters", None),
|
("/fixtures/magic_parameters", None),
|
||||||
|
|
@ -824,6 +825,7 @@ def test_base_url_affects_metadata_extra_css_urls(app_client_base_url_prefix):
|
||||||
)
|
)
|
||||||
def test_edit_sql_link_on_canned_queries(app_client, path, expected):
|
def test_edit_sql_link_on_canned_queries(app_client, path, expected):
|
||||||
response = app_client.get(path)
|
response = app_client.get(path)
|
||||||
|
assert response.status == 200
|
||||||
expected_link = f'<a href="{expected}" class="canned-query-edit-sql">Edit SQL</a>'
|
expected_link = f'<a href="{expected}" class="canned-query-edit-sql">Edit SQL</a>'
|
||||||
if expected:
|
if expected:
|
||||||
assert expected_link in response.text
|
assert expected_link in response.text
|
||||||
|
|
@ -898,8 +900,8 @@ def test_trace_correctly_escaped(app_client):
|
||||||
# Table page
|
# Table page
|
||||||
("/fixtures/facetable", "http://localhost/fixtures/facetable.json"),
|
("/fixtures/facetable", "http://localhost/fixtures/facetable.json"),
|
||||||
(
|
(
|
||||||
"/fixtures/table%2Fwith%2Fslashes.csv",
|
"/fixtures/table-2Fwith-2Fslashes-2Ecsv",
|
||||||
"http://localhost/fixtures/table%2Fwith%2Fslashes.csv?_format=json",
|
"http://localhost/fixtures/table-2Fwith-2Fslashes-2Ecsv.json",
|
||||||
),
|
),
|
||||||
# Row page
|
# Row page
|
||||||
(
|
(
|
||||||
|
|
@ -959,17 +961,17 @@ def test_no_alternate_url_json(app_client, path):
|
||||||
(
|
(
|
||||||
(
|
(
|
||||||
"/fivethirtyeight/twitter-ratio%2Fsenators",
|
"/fivethirtyeight/twitter-ratio%2Fsenators",
|
||||||
"/fivethirtyeight/twitter-2Dratio-2Fsenators",
|
"/fivethirtyeight/twitter-ratio~2Fsenators",
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"/fixtures/table%2Fwith%2Fslashes",
|
"/fixtures/table%2Fwith%2Fslashes.csv",
|
||||||
"/fixtures/table-2Fwith-2Fslashes",
|
"/fixtures/table~2Fwith~2Fslashes~2Ecsv",
|
||||||
),
|
),
|
||||||
# query string should be preserved
|
# query string should be preserved
|
||||||
("/foo/bar%2Fbaz?id=5", "/foo/bar-2Fbaz?id=5"),
|
("/foo/bar%2Fbaz?id=5", "/foo/bar~2Fbaz?id=5"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_redirect_percent_encoding_to_dash_encoding(app_client, path, expected):
|
def test_redirect_percent_encoding_to_tilde_encoding(app_client, path, expected):
|
||||||
response = app_client.get(path)
|
response = app_client.get(path)
|
||||||
assert response.status == 302
|
assert response.status == 302
|
||||||
assert response.headers["location"] == expected
|
assert response.headers["location"] == expected
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,7 @@ def test_table_shape_object_compound_primary_key(app_client):
|
||||||
|
|
||||||
def test_table_with_slashes_in_name(app_client):
|
def test_table_with_slashes_in_name(app_client):
|
||||||
response = app_client.get(
|
response = app_client.get(
|
||||||
"/fixtures/table%2Fwith%2Fslashes.csv?_shape=objects&_format=json"
|
"/fixtures/table-2Fwith-2Fslashes-2Ecsv.json?_shape=objects"
|
||||||
)
|
)
|
||||||
assert response.status == 200
|
assert response.status == 200
|
||||||
data = response.json
|
data = response.json
|
||||||
|
|
@ -1032,7 +1032,10 @@ def test_infinity_returned_as_invalid_json_if_requested(app_client):
|
||||||
|
|
||||||
|
|
||||||
def test_custom_query_with_unicode_characters(app_client):
|
def test_custom_query_with_unicode_characters(app_client):
|
||||||
response = app_client.get("/fixtures/𝐜𝐢𝐭𝐢𝐞𝐬.json?_shape=array")
|
# /fixtures/𝐜𝐢𝐭𝐢𝐞𝐬.json
|
||||||
|
response = app_client.get(
|
||||||
|
"/fixtures/-F0-9D-90-9C-F0-9D-90-A2-F0-9D-90-AD-F0-9D-90-A2-F0-9D-90-9E-F0-9D-90-AC.json?_shape=array"
|
||||||
|
)
|
||||||
assert [{"id": 1, "name": "San Francisco"}] == response.json
|
assert [{"id": 1, "name": "San Francisco"}] == response.json
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -393,9 +393,7 @@ def test_table_columns():
|
||||||
("/foo?sql=select+1", "json", {}, "/foo.json?sql=select+1"),
|
("/foo?sql=select+1", "json", {}, "/foo.json?sql=select+1"),
|
||||||
("/foo/bar", "json", {}, "/foo/bar.json"),
|
("/foo/bar", "json", {}, "/foo/bar.json"),
|
||||||
("/foo/bar", "csv", {}, "/foo/bar.csv"),
|
("/foo/bar", "csv", {}, "/foo/bar.csv"),
|
||||||
("/foo/bar.csv", "json", {}, "/foo/bar.csv?_format=json"),
|
|
||||||
("/foo/bar", "csv", {"_dl": 1}, "/foo/bar.csv?_dl=1"),
|
("/foo/bar", "csv", {"_dl": 1}, "/foo/bar.csv?_dl=1"),
|
||||||
("/foo/b.csv", "json", {"_dl": 1}, "/foo/b.csv?_dl=1&_format=json"),
|
|
||||||
(
|
(
|
||||||
"/sf-trees/Street_Tree_List?_search=cherry&_size=1000",
|
"/sf-trees/Street_Tree_List?_search=cherry&_size=1000",
|
||||||
"csv",
|
"csv",
|
||||||
|
|
@ -410,18 +408,6 @@ def test_path_with_format(path, format, extra_qs, expected):
|
||||||
assert expected == actual
|
assert expected == actual
|
||||||
|
|
||||||
|
|
||||||
def test_path_with_format_replace_format():
|
|
||||||
request = Request.fake("/foo/bar.csv")
|
|
||||||
assert (
|
|
||||||
utils.path_with_format(request=request, format="blob")
|
|
||||||
== "/foo/bar.csv?_format=blob"
|
|
||||||
)
|
|
||||||
assert (
|
|
||||||
utils.path_with_format(request=request, format="blob", replace_format="csv")
|
|
||||||
== "/foo/bar.blob"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"bytes,expected",
|
"bytes,expected",
|
||||||
[
|
[
|
||||||
|
|
@ -652,15 +638,15 @@ async def test_derive_named_parameters(sql, expected):
|
||||||
"original,expected",
|
"original,expected",
|
||||||
(
|
(
|
||||||
("abc", "abc"),
|
("abc", "abc"),
|
||||||
("/foo/bar", "-2Ffoo-2Fbar"),
|
("/foo/bar", "~2Ffoo~2Fbar"),
|
||||||
("/-/bar", "-2F-2D-2Fbar"),
|
("/-/bar", "~2F-~2Fbar"),
|
||||||
("-/db-/table.csv", "-2D-2Fdb-2D-2Ftable-2Ecsv"),
|
("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"),
|
||||||
(r"%~-/", "-25-7E-2D-2F"),
|
(r"%~-/", "~25~7E-~2F"),
|
||||||
("-25-7E-2D-2F", "-2D25-2D7E-2D2D-2D2F"),
|
("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_dash_encoding(original, expected):
|
def test_tilde_encoding(original, expected):
|
||||||
actual = utils.dash_encode(original)
|
actual = utils.tilde_encode(original)
|
||||||
assert actual == expected
|
assert actual == expected
|
||||||
# And test round-trip
|
# And test round-trip
|
||||||
assert original == utils.dash_decode(actual)
|
assert original == utils.tilde_decode(actual)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue