URL hashing is now off by default - closes #418

Prior to this commit Datasette would calculate the content hash of every
database and redirect to a URL containing that hash, like so:

    https://v0-27.datasette.io/fixtures => https://v0-27.datasette.io/fixtures-dd88475

This assumed that all databases were opened in immutable mode and were not
expected to change.

This will be changing as a result of #419 - so this commit takes the first step
in implementing that change by changing this default behaviour. Datasette will
now only redirect hash-free URLs under two circumstances:

* The new `hash_urls` config option is set to true (it defaults to false).
* The user passes `?_hash=1` in the URL
This commit is contained in:
Simon Willison 2019-03-17 15:55:04 -07:00 committed by GitHub
commit 6f6d0ff2b4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 149 additions and 70 deletions

View file

@ -69,6 +69,9 @@ CONFIG_OPTIONS = (
ConfigOption("facet_suggest_time_limit_ms", 50, """
Time limit for calculating a suggested facet
""".strip()),
ConfigOption("hash_urls", False, """
Include DB file contents hash in URLs, for far-future caching
""".strip()),
ConfigOption("allow_facet", True, """
Allow users to specify columns to facet using ?_facet= parameter
""".strip()),
@ -81,9 +84,12 @@ CONFIG_OPTIONS = (
ConfigOption("allow_sql", True, """
Allow arbitrary SQL queries via ?sql= parameter
""".strip()),
ConfigOption("default_cache_ttl", 365 * 24 * 60 * 60, """
ConfigOption("default_cache_ttl", 5, """
Default HTTP cache TTL (used in Cache-Control: max-age= header)
""".strip()),
ConfigOption("default_cache_ttl_hashed", 365 * 24 * 60 * 60, """
Default HTTP cache TTL for hashed URL pages
""".strip()),
ConfigOption("cache_size_kb", 0, """
SQLite cache size in KB (0 == use SQLite default)
""".strip()),

View file

@ -12,12 +12,12 @@
{% block content %}
<div class="hd"><a href="/">home</a></div>
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_hash[:6] }}">{{ metadata.title or database }}</h1>
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_color(database) }}">{{ metadata.title or database }}</h1>
{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %}
{% if config.allow_sql %}
<form class="sql" action="/{{ database }}-{{ database_hash }}" method="get">
<form class="sql" action="{{ database_url(database) }}" method="get">
<h3>Custom SQL query</h3>
<p><textarea name="sql">{% if tables %}select * from {{ tables[0].name|escape_sqlite }}{% else %}select sqlite_version(){% endif %}</textarea></p>
<p><input type="submit" value="Run SQL"></p>
@ -27,7 +27,7 @@
{% for table in tables %}
{% if show_hidden or not table.hidden %}
<div class="db-table">
<h2><a href="/{{ database }}-{{ database_hash }}/{{ table.name|quote_plus }}">{{ table.name }}</a>{% if table.hidden %}<em> (hidden)</em>{% endif %}</h2>
<h2><a href="{{ database_url(database) }}/{{ table.name|quote_plus }}">{{ table.name }}</a>{% if table.hidden %}<em> (hidden)</em>{% endif %}</h2>
<p><em>{% for column in table.columns[:9] %}{{ column }}{% if not loop.last %}, {% endif %}{% endfor %}{% if table.columns|length > 9 %}...{% endif %}</em></p>
<p>{{ "{:,}".format(table.count) }} row{% if table.count == 1 %}{% else %}s{% endif %}</p>
</div>
@ -35,14 +35,14 @@
{% endfor %}
{% if hidden_count and not show_hidden %}
<p>... and <a href="/{{ database }}-{{ database_hash }}?_show_hidden=1">{{ "{:,}".format(hidden_count) }} hidden table{% if hidden_count == 1 %}{% else %}s{% endif %}</a></p>
<p>... and <a href="{{ database_url(database) }}?_show_hidden=1">{{ "{:,}".format(hidden_count) }} hidden table{% if hidden_count == 1 %}{% else %}s{% endif %}</a></p>
{% endif %}
{% if views %}
<h2>Views</h2>
<ul>
{% for view in views %}
<li><a href="/{{ database }}-{{ database_hash }}/{{ view|urlencode }}">{{ view }}</a></li>
<li><a href="{{ database_url(database) }}/{{ view|urlencode }}">{{ view }}</a></li>
{% endfor %}
</ul>
{% endif %}
@ -51,13 +51,13 @@
<h2>Queries</h2>
<ul>
{% for query in queries %}
<li><a href="/{{ database }}-{{ database_hash }}/{{ query.name|urlencode }}" title="{{ query.description or query.sql }}">{{ query.title or query.name }}</a></li>
<li><a href="{{ database_url(database) }}/{{ query.name|urlencode }}" title="{{ query.description or query.sql }}">{{ query.title or query.name }}</a></li>
{% endfor %}
</ul>
{% endif %}
{% if config.allow_download and database != ":memory:" %}
<p class="download-sqlite">Download SQLite DB: <a href="/{{ database }}-{{ database_hash }}.db">{{ database }}.db</a> <em>{{ format_bytes(size) }}</em></p>
<p class="download-sqlite">Download SQLite DB: <a href="{{ database_url(database) }}.db">{{ database }}.db</a> <em>{{ format_bytes(size) }}</em></p>
{% endif %}
{% include "_codemirror_foot.html" %}

View file

@ -19,13 +19,13 @@
{% block body_class %}query db-{{ database|to_css_class }}{% endblock %}
{% block content %}
<div class="hd"><a href="/">home</a> / <a href="/{{ database }}-{{ database_hash }}">{{ database }}</a></div>
<div class="hd"><a href="/">home</a> / <a href="{{ database_url(database) }}">{{ database }}</a></div>
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_hash[:6] }}">{{ metadata.title or database }}</h1>
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_color(database) }}">{{ metadata.title or database }}</h1>
{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %}
<form class="sql" action="/{{ database }}-{{ database_hash }}{% if canned_query %}/{{ canned_query }}{% endif %}" method="get">
<form class="sql" action="{{ database_url(database) }}{% if canned_query %}/{{ canned_query }}{% endif %}" method="get">
<h3>Custom SQL query{% if display_rows %} returning {% if truncated %}more than {% endif %}{{ "{:,}".format(display_rows|length) }} row{% if display_rows|length == 1 %}{% else %}s{% endif %}{% endif %} <span class="show-hide-sql">{% if hide_sql %}(<a href="{{ path_with_removed_args(request, {'_hide_sql': '1'}) }}">show</a>){% else %}(<a href="{{ path_with_added_args(request, {'_hide_sql': '1'}) }}">hide</a>){% endif %}</span></h3>
{% if not hide_sql %}
{% if editable and config.allow_sql %}

View file

@ -16,9 +16,9 @@
{% block body_class %}row db-{{ database|to_css_class }} table-{{ table|to_css_class }}{% endblock %}
{% block content %}
<div class="hd"><a href="/">home</a> / <a href="/{{ database }}-{{ database_hash }}">{{ database }}</a> / <a href="/{{ database }}-{{ database_hash }}/{{ table|quote_plus }}">{{ table }}</a></div>
<div class="hd"><a href="/">home</a> / <a href="{{ database_url(database) }}">{{ database }}</a> / <a href="{{ database_url(database) }}/{{ table|quote_plus }}">{{ table }}</a></div>
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_hash[:6] }}">{{ table }}: {{ ', '.join(primary_key_values) }}</a></h1>
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_color(database) }}">{{ table }}: {{ ', '.join(primary_key_values) }}</a></h1>
{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %}
@ -31,7 +31,7 @@
<ul>
{% for other in foreign_key_tables %}
<li>
<a href="/{{ database }}-{{ database_hash }}/{{ other.other_table|quote_plus }}?{{ other.other_column }}={{ ', '.join(primary_key_values) }}">
<a href="{{ database_url(database) }}/{{ other.other_table|quote_plus }}?{{ other.other_column }}={{ ', '.join(primary_key_values) }}">
{{ "{:,}".format(other.count) }} row{% if other.count == 1 %}{% else %}s{% endif %}</a>
from {{ other.other_column }} in {{ other.other_table }}
</li>

View file

@ -17,9 +17,9 @@
{% block body_class %}table db-{{ database|to_css_class }} table-{{ table|to_css_class }}{% endblock %}
{% block content %}
<div class="hd"><a href="/">home</a> / <a href="/{{ database }}-{{ database_hash }}">{{ database }}</a></div>
<div class="hd"><a href="/">home</a> / <a href="{{ database_url(database) }}">{{ database }}</a></div>
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_hash[:6] }}">{{ metadata.title or table }}{% if is_view %} (view){% endif %}</h1>
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_color(database) }}">{{ metadata.title or table }}{% if is_view %} (view){% endif %}</h1>
{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %}
@ -29,7 +29,7 @@
</h3>
{% endif %}
<form class="filters" action="/{{ database }}-{{ database_hash }}/{{ table|quote_plus }}" method="get">
<form class="filters" action="{{ database_url(database) }}/{{ table|quote_plus }}" method="get">
{% if supports_search %}
<div class="search-row"><label for="_search">Search:</label><input id="_search" type="search" name="_search" value="{{ search }}"></div>
{% endif %}
@ -89,7 +89,7 @@
</form>
{% if query.sql and config.allow_sql %}
<p><a class="not-underlined" title="{{ query.sql }}" href="/{{ database }}-{{ database_hash }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&amp;{{ query.params|urlencode|safe }}{% endif %}">&#x270e; <span class="underlined">View and edit SQL</span></a></p>
<p><a class="not-underlined" title="{{ query.sql }}" href="{{ database_url(database) }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&amp;{{ query.params|urlencode|safe }}{% endif %}">&#x270e; <span class="underlined">View and edit SQL</span></a></p>
{% endif %}
<p class="export-links">This data as <a href="{{ url_json }}">JSON</a>{% if display_rows %}, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>){% endif %}</p>

View file

@ -208,8 +208,14 @@ def path_with_added_args(request, args, path=None):
def path_with_removed_args(request, args, path=None):
query_string = request.query_string
if path is None:
path = request.path
else:
if "?" in path:
bits = path.split("?", 1)
path, query_string = bits
# args can be a dict or a set
path = path or request.path
current = []
if isinstance(args, set):
def should_remove(key, value):
@ -218,7 +224,7 @@ def path_with_removed_args(request, args, path=None):
# Must match key AND value
def should_remove(key, value):
return args.get(key) == value
for key, value in urllib.parse.parse_qsl(request.query_string):
for key, value in urllib.parse.parse_qsl(query_string):
if not should_remove(key, value):
current.append((key, value))
query_string = urllib.parse.urlencode(current)

View file

@ -75,6 +75,17 @@ class RenderMixin(HTTPMethodView):
else:
yield {"url": url}
def database_url(self, database):
if not self.ds.config("hash_urls"):
return "/{}".format(database)
else:
return "/{}-{}".format(
database, self.ds.inspect()[database]["hash"][:HASH_LENGTH]
)
def database_color(self, database):
return 'ff0000'
def render(self, templates, **context):
template = self.ds.jinja_env.select_template(templates)
select_templates = [
@ -105,6 +116,8 @@ class RenderMixin(HTTPMethodView):
"extra_js_urls", template, context
),
"format_bytes": format_bytes,
"database_url": self.database_url,
"database_color": self.database_color,
}
}
)
@ -131,16 +144,18 @@ class BaseView(RenderMixin):
r.headers["Access-Control-Allow-Origin"] = "*"
return r
def redirect(self, request, path, forward_querystring=True):
def redirect(self, request, path, forward_querystring=True, remove_args=None):
if request.query_string and "?" not in path and forward_querystring:
path = "{}?{}".format(path, request.query_string)
if remove_args:
path = path_with_removed_args(request, remove_args, path=path)
r = response.redirect(path)
r.headers["Link"] = "<{}>; rel=preload".format(path)
if self.ds.cors:
r.headers["Access-Control-Allow-Origin"] = "*"
return r
def resolve_db_name(self, db_name, **kwargs):
def resolve_db_name(self, request, db_name, **kwargs):
databases = self.ds.inspect()
hash = None
name = None
@ -161,7 +176,9 @@ class BaseView(RenderMixin):
raise NotFound("Database not found: {}".format(name))
expected = info["hash"][:HASH_LENGTH]
if expected != hash:
correct_hash_provided = (expected == hash)
if not correct_hash_provided:
if "table_and_format" in kwargs:
table, _format = resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(
@ -188,9 +205,11 @@ class BaseView(RenderMixin):
should_redirect += kwargs["as_format"]
if "as_db" in kwargs:
should_redirect += kwargs["as_db"]
return name, expected, should_redirect
return name, expected, None
if self.ds.config("hash_urls") or "_hash" in request.args:
return name, expected, correct_hash_provided, should_redirect
return name, expected, correct_hash_provided, None
def absolute_url(self, request, path):
url = urllib.parse.urljoin(request.url, path)
@ -202,11 +221,13 @@ class BaseView(RenderMixin):
assert NotImplemented
async def get(self, request, db_name, **kwargs):
database, hash, should_redirect = self.resolve_db_name(db_name, **kwargs)
database, hash, correct_hash_provided, should_redirect = self.resolve_db_name(
request, db_name, **kwargs
)
if should_redirect:
return self.redirect(request, should_redirect)
return self.redirect(request, should_redirect, remove_args={"_hash"})
return await self.view_get(request, database, hash, **kwargs)
return await self.view_get(request, database, hash, correct_hash_provided, **kwargs)
async def as_csv(self, request, database, hash, **kwargs):
stream = request.args.get("_stream")
@ -301,7 +322,7 @@ class BaseView(RenderMixin):
content_type=content_type
)
async def view_get(self, request, database, hash, **kwargs):
async def view_get(self, request, database, hash, correct_hash_provided, **kwargs):
# If ?_format= is provided, use that as the format
_format = request.args.get("_format", None)
if not _format:
@ -418,7 +439,6 @@ class BaseView(RenderMixin):
"ok": False,
"error": error,
"database": database,
"database_hash": hash,
}
elif shape == "array":
data = data["rows"]
@ -489,10 +509,13 @@ class BaseView(RenderMixin):
r = self.render(templates, **context)
r.status = status_code
# Set far-future cache expiry
if self.ds.cache_headers:
if self.ds.cache_headers and r.status == 200:
ttl = request.args.get("_ttl", None)
if ttl is None or not ttl.isdigit():
ttl = self.ds.config("default_cache_ttl")
if correct_hash_provided:
ttl = self.ds.config("default_cache_ttl_hashed")
else:
ttl = self.ds.config("default_cache_ttl")
else:
ttl = int(ttl)
if ttl == 0:
@ -572,7 +595,6 @@ class BaseView(RenderMixin):
display_rows.append(display_row)
return {
"display_rows": display_rows,
"database_hash": hash,
"custom_sql": True,
"named_parameter_values": named_parameter_values,
"editable": editable,

View file

@ -30,7 +30,6 @@ class DatabaseView(BaseView):
"views": info["views"],
"queries": self.ds.get_canned_queries(database),
}, {
"database_hash": hash,
"show_hidden": request.args.get("_show_hidden"),
"editable": True,
"metadata": metadata,
@ -41,7 +40,7 @@ class DatabaseView(BaseView):
class DatabaseDownload(BaseView):
async def view_get(self, request, database, hash, **kwargs):
async def view_get(self, request, database, hash, correct_hash_present, **kwargs):
if not self.ds.config("allow_download"):
raise DatasetteError("Database download is forbidden", status=403)
filepath = self.ds.inspect()[database]["file"]

View file

@ -21,7 +21,7 @@ class IndexView(RenderMixin):
database = {
"name": key,
"hash": info["hash"],
"path": "{}-{}".format(key, info["hash"][:HASH_LENGTH]),
"path": self.database_url(key),
"tables_truncated": sorted(
tables, key=lambda t: t["count"], reverse=True
)[

View file

@ -750,7 +750,6 @@ class TableView(RowTableShared):
)
self.ds.update_with_inherited_metadata(metadata)
return {
"database_hash": hash,
"supports_search": bool(fts_table),
"search": search or "",
"use_rowid": use_rowid,
@ -851,7 +850,6 @@ class RowView(RowTableShared):
for column in display_columns:
column["sortable"] = False
return {
"database_hash": hash,
"foreign_key_tables": await self.foreign_key_tables(
database, table, pk_values
),