Switch to dash encoding for table/database/row-pk in paths

* Dash encoding functions, tests and docs, refs #1439
* dash encoding is now like percent encoding but with dashes
* Use dash-encoding for row PKs and ?_next=, refs #1439
* Use dash encoding for table names, refs #1439
* Use dash encoding for database names, too, refs #1439

See also https://simonwillison.net/2022/Mar/5/dash-encoding/
This commit is contained in:
Simon Willison 2022-03-07 07:38:29 -08:00 committed by GitHub
commit 1baa030eca
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 173 additions and 53 deletions

View file

@ -1,4 +1,4 @@
from .utils import path_with_format, HASH_LENGTH, PrefixedUrlString
from .utils import dash_encode, path_with_format, HASH_LENGTH, PrefixedUrlString
import urllib
@ -31,20 +31,20 @@ class Urls:
db = self.ds.databases[database]
if self.ds.setting("hash_urls") and db.hash:
path = self.path(
f"{urllib.parse.quote(database)}-{db.hash[:HASH_LENGTH]}", format=format
f"{dash_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format
)
else:
path = self.path(urllib.parse.quote(database), format=format)
path = self.path(dash_encode(database), format=format)
return path
def table(self, database, table, format=None):
path = f"{self.database(database)}/{urllib.parse.quote_plus(table)}"
path = f"{self.database(database)}/{dash_encode(table)}"
if format is not None:
path = path_with_format(path=path, format=format)
return PrefixedUrlString(path)
def query(self, database, query, format=None):
path = f"{self.database(database)}/{urllib.parse.quote_plus(query)}"
path = f"{self.database(database)}/{dash_encode(query)}"
if format is not None:
path = path_with_format(path=path, format=format)
return PrefixedUrlString(path)

View file

@ -112,12 +112,12 @@ async def await_me_maybe(value: typing.Any) -> typing.Any:
def urlsafe_components(token):
"""Splits token on commas and URL decodes each component"""
return [urllib.parse.unquote_plus(b) for b in token.split(",")]
"""Splits token on commas and dash-decodes each component"""
return [dash_decode(b) for b in token.split(",")]
def path_from_row_pks(row, pks, use_rowid, quote=True):
"""Generate an optionally URL-quoted unique identifier
"""Generate an optionally dash-quoted unique identifier
for a row from its primary keys."""
if use_rowid:
bits = [row["rowid"]]
@ -126,7 +126,7 @@ def path_from_row_pks(row, pks, use_rowid, quote=True):
row[pk]["value"] if isinstance(row[pk], dict) else row[pk] for pk in pks
]
if quote:
bits = [urllib.parse.quote_plus(str(bit)) for bit in bits]
bits = [dash_encode(str(bit)) for bit in bits]
else:
bits = [str(bit) for bit in bits]
@ -1140,3 +1140,36 @@ def add_cors_headers(headers):
headers["Access-Control-Allow-Origin"] = "*"
headers["Access-Control-Allow-Headers"] = "Authorization"
headers["Access-Control-Expose-Headers"] = "Link"
_DASH_ENCODING_SAFE = frozenset(
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"abcdefghijklmnopqrstuvwxyz"
b"0123456789_"
# This is the same as Python percent-encoding but I removed
# '.' and '-' and '~'
)
class DashEncoder(dict):
# Keeps a cache internally, via __missing__
def __missing__(self, b):
# Handle a cache miss, store encoded string in cache and return.
res = chr(b) if b in _DASH_ENCODING_SAFE else "-{:02X}".format(b)
self[b] = res
return res
_dash_encoder = DashEncoder().__getitem__
@documented
def dash_encode(s: str) -> str:
"Returns dash-encoded string - for example ``/foo/bar`` -> ``-2Ffoo-2Fbar``"
return "".join(_dash_encoder(char) for char in s.encode("utf-8"))
@documented
def dash_decode(s: str) -> str:
"Decodes a dash-encoded string, so ``-2Ffoo-2Fbar`` -> ``/foo/bar``"
return urllib.parse.unquote(s.replace("-", "%"))

View file

@ -17,6 +17,8 @@ from datasette.utils import (
InvalidSql,
LimitedWriter,
call_with_supported_arguments,
dash_decode,
dash_encode,
path_from_row_pks,
path_with_added_args,
path_with_removed_args,
@ -203,17 +205,17 @@ class DataView(BaseView):
async def resolve_db_name(self, request, db_name, **kwargs):
hash = None
name = None
db_name = urllib.parse.unquote_plus(db_name)
if db_name not in self.ds.databases and "-" in db_name:
decoded_name = dash_decode(db_name)
if decoded_name not in self.ds.databases and "-" in db_name:
# No matching DB found, maybe it's a name-hash?
name_bit, hash_bit = db_name.rsplit("-", 1)
if name_bit not in self.ds.databases:
if dash_decode(name_bit) not in self.ds.databases:
raise NotFound(f"Database not found: {name}")
else:
name = name_bit
name = dash_decode(name_bit)
hash = hash_bit
else:
name = db_name
name = decoded_name
try:
db = self.ds.databases[name]
@ -233,9 +235,7 @@ class DataView(BaseView):
return await db.table_exists(t)
table, _format = await resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(
kwargs["table_and_format"]
),
table_and_format=dash_decode(kwargs["table_and_format"]),
table_exists=async_table_exists,
allowed_formats=self.ds.renderers.keys(),
)
@ -243,11 +243,11 @@ class DataView(BaseView):
if _format:
kwargs["as_format"] = f".{_format}"
elif kwargs.get("table"):
kwargs["table"] = urllib.parse.unquote_plus(kwargs["table"])
kwargs["table"] = dash_decode(kwargs["table"])
should_redirect = self.ds.urls.path(f"{name}-{expected}")
if kwargs.get("table"):
should_redirect += "/" + urllib.parse.quote_plus(kwargs["table"])
should_redirect += "/" + dash_encode(kwargs["table"])
if kwargs.get("pk_path"):
should_redirect += "/" + kwargs["pk_path"]
if kwargs.get("as_format"):
@ -467,7 +467,7 @@ class DataView(BaseView):
return await db.table_exists(t)
table, _ext_format = await resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(args["table_and_format"]),
table_and_format=dash_decode(args["table_and_format"]),
table_exists=async_table_exists,
allowed_formats=self.ds.renderers.keys(),
)
@ -475,7 +475,7 @@ class DataView(BaseView):
args["table"] = table
del args["table_and_format"]
elif "table" in args:
args["table"] = urllib.parse.unquote_plus(args["table"])
args["table"] = dash_decode(args["table"])
return _format, args
async def view_get(self, request, database, hash, correct_hash_provided, **kwargs):

View file

@ -12,6 +12,7 @@ from datasette.utils import (
MultiParams,
append_querystring,
compound_keys_after_sql,
dash_encode,
escape_sqlite,
filters_should_redirect,
is_url,
@ -142,7 +143,7 @@ class RowTableShared(DataView):
'<a href="{base_url}{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'.format(
base_url=base_url,
database=database,
table=urllib.parse.quote_plus(table),
table=dash_encode(table),
flat_pks=str(markupsafe.escape(pk_path)),
flat_pks_quoted=path_from_row_pks(row, pks, not pks),
)
@ -199,8 +200,8 @@ class RowTableShared(DataView):
link_template.format(
database=database,
base_url=base_url,
table=urllib.parse.quote_plus(other_table),
link_id=urllib.parse.quote_plus(str(value)),
table=dash_encode(other_table),
link_id=dash_encode(str(value)),
id=str(markupsafe.escape(value)),
label=str(markupsafe.escape(label)) or "-",
)
@ -765,7 +766,7 @@ class TableView(RowTableShared):
if prefix is None:
prefix = "$null"
else:
prefix = urllib.parse.quote_plus(str(prefix))
prefix = dash_encode(str(prefix))
next_value = f"{prefix},{next_value}"
added_args = {"_next": next_value}
if sort: