URL hashing is now off by default - closes #418

Prior to this commit Datasette would calculate the content hash of every
database and redirect to a URL containing that hash, like so:

    https://v0-27.datasette.io/fixtures => https://v0-27.datasette.io/fixtures-dd88475

This assumed that all databases were opened in immutable mode and were not
expected to change.

This will be changing as a result of #419 - so this commit takes the first step
in implementing that change by changing this default behaviour. Datasette will
now only redirect hash-free URLs under two circumstances:

* The new `hash_urls` config option is set to true (it defaults to false).
* The user passes `?_hash=1` in the URL
This commit is contained in:
Simon Willison 2019-03-17 15:55:04 -07:00 committed by GitHub
commit 6f6d0ff2b4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 149 additions and 70 deletions

View file

@ -73,6 +73,13 @@ def app_client_no_files():
yield client
@pytest.fixture(scope="session")
def app_client_with_hash():
yield from make_app_client(config={
'hash_urls': True
})
@pytest.fixture(scope='session')
def app_client_shorter_time_limit():
yield from make_app_client(20)

View file

@ -1,6 +1,7 @@
from .fixtures import ( # noqa
app_client,
app_client_no_files,
app_client_with_hash,
app_client_shorter_time_limit,
app_client_larger_cache_size,
app_client_returned_rows_matches_page_size,
@ -378,7 +379,7 @@ def test_no_files_uses_memory_database(app_client_no_files):
"hidden_table_rows_sum": 0,
"hidden_tables_count": 0,
"name": ":memory:",
"path": ":memory:-000",
"path": "/:memory:",
"table_rows_sum": 0,
"tables_count": 0,
"tables_more": False,
@ -388,7 +389,7 @@ def test_no_files_uses_memory_database(app_client_no_files):
} == response.json
# Try that SQL query
response = app_client_no_files.get(
"/:memory:-0.json?sql=select+sqlite_version()&_shape=array"
"/:memory:.json?sql=select+sqlite_version()&_shape=array"
)
assert 1 == len(response.json)
assert ["sqlite_version()"] == list(response.json[0].keys())
@ -501,12 +502,12 @@ def test_table_not_exists_json(app_client):
} == app_client.get('/fixtures/blah.json').json
def test_jsono_redirects_to_shape_objects(app_client):
response_1 = app_client.get(
def test_jsono_redirects_to_shape_objects(app_client_with_hash):
response_1 = app_client_with_hash.get(
'/fixtures/simple_primary_key.jsono',
allow_redirects=False
)
response = app_client.get(
response = app_client_with_hash.get(
response_1.headers['Location'],
allow_redirects=False
)
@ -1049,13 +1050,15 @@ def test_config_json(app_client):
"allow_facet": True,
"suggest_facets": True,
"allow_sql": True,
"default_cache_ttl": 365 * 24 * 60 * 60,
"default_cache_ttl": 5,
"default_cache_ttl_hashed": 365 * 24 * 60 * 60,
"num_sql_threads": 3,
"cache_size_kb": 0,
"allow_csv_stream": True,
"max_csv_mb": 100,
"truncate_cells_html": 2048,
"force_https_urls": False,
"hash_urls": False,
} == response.json
@ -1300,8 +1303,8 @@ def test_expand_label(app_client):
@pytest.mark.parametrize('path,expected_cache_control', [
("/fixtures/facetable.json", "max-age=31536000"),
("/fixtures/facetable.json?_ttl=invalid", "max-age=31536000"),
("/fixtures/facetable.json", "max-age=5"),
("/fixtures/facetable.json?_ttl=invalid", "max-age=5"),
("/fixtures/facetable.json?_ttl=10", "max-age=10"),
("/fixtures/facetable.json?_ttl=0", "no-cache"),
])
@ -1310,6 +1313,19 @@ def test_ttl_parameter(app_client, path, expected_cache_control):
assert expected_cache_control == response.headers['Cache-Control']
@pytest.mark.parametrize("path,expected_redirect", [
("/fixtures/facetable.json?_hash=1", "/fixtures-HASH/facetable.json"),
("/fixtures/facetable.json?city_id=1&_hash=1", "/fixtures-HASH/facetable.json?city_id=1"),
])
def test_hash_parameter(app_client, path, expected_redirect):
# First get the current hash for the fixtures database
current_hash = app_client.get("/-/inspect.json").json["fixtures"]["hash"][:7]
response = app_client.get(path, allow_redirects=False)
assert response.status == 302
location = response.headers["Location"]
assert expected_redirect.replace("HASH", current_hash) == location
test_json_columns_default_expected = [{
"intval": 1,
"strval": "s",

View file

@ -2,6 +2,7 @@ from bs4 import BeautifulSoup as Soup
from .fixtures import ( # noqa
app_client,
app_client_shorter_time_limit,
app_client_with_hash,
make_app_client,
)
import pytest
@ -15,10 +16,10 @@ def test_homepage(app_client):
assert 'fixtures' in response.text
def test_database_page(app_client):
response = app_client.get('/fixtures', allow_redirects=False)
def test_database_page_redirects_with_url_hash(app_client_with_hash):
response = app_client_with_hash.get('/fixtures', allow_redirects=False)
assert response.status == 302
response = app_client.get('/fixtures')
response = app_client_with_hash.get('/fixtures')
assert 'fixtures' in response.text
@ -41,19 +42,19 @@ def test_sql_time_limit(app_client_shorter_time_limit):
assert expected_html_fragment in response.text
def test_row(app_client):
response = app_client.get(
def test_row_redirects_with_url_hash(app_client_with_hash):
response = app_client_with_hash.get(
'/fixtures/simple_primary_key/1',
allow_redirects=False
)
assert response.status == 302
assert response.headers['Location'].endswith('/1')
response = app_client.get('/fixtures/simple_primary_key/1')
response = app_client_with_hash.get('/fixtures/simple_primary_key/1')
assert response.status == 200
def test_row_strange_table_name(app_client):
response = app_client.get(
def test_row_strange_table_name_with_url_hash(app_client_with_hash):
response = app_client_with_hash.get(
'/fixtures/table%2Fwith%2Fslashes.csv/3',
allow_redirects=False
)
@ -61,7 +62,7 @@ def test_row_strange_table_name(app_client):
assert response.headers['Location'].endswith(
'/table%2Fwith%2Fslashes.csv/3'
)
response = app_client.get('/fixtures/table%2Fwith%2Fslashes.csv/3')
response = app_client_with_hash.get('/fixtures/table%2Fwith%2Fslashes.csv/3')
assert response.status == 200
@ -105,10 +106,7 @@ def test_add_filter_redirects(app_client):
'_filter_op': 'startswith',
'_filter_value': 'x'
})
# First we need to resolve the correct path before testing more redirects
path_base = app_client.get(
'/fixtures/simple_primary_key', allow_redirects=False
).headers['Location']
path_base = '/fixtures/simple_primary_key'
path = path_base + '?' + filter_args
response = app_client.get(path, allow_redirects=False)
assert response.status == 302
@ -146,9 +144,7 @@ def test_existing_filter_redirects(app_client):
'_filter_op_4': 'contains',
'_filter_value_4': 'world',
}
path_base = app_client.get(
'/fixtures/simple_primary_key', allow_redirects=False
).headers['Location']
path_base = '/fixtures/simple_primary_key'
path = path_base + '?' + urllib.parse.urlencode(filter_args)
response = app_client.get(path, allow_redirects=False)
assert response.status == 302
@ -174,9 +170,7 @@ def test_existing_filter_redirects(app_client):
def test_empty_search_parameter_gets_removed(app_client):
path_base = app_client.get(
'/fixtures/simple_primary_key', allow_redirects=False
).headers['Location']
path_base = '/fixtures/simple_primary_key'
path = path_base + '?' + urllib.parse.urlencode({
'_search': '',
'_filter_column': 'name',
@ -191,9 +185,7 @@ def test_empty_search_parameter_gets_removed(app_client):
def test_sort_by_desc_redirects(app_client):
path_base = app_client.get(
'/fixtures/sortable', allow_redirects=False
).headers['Location']
path_base = '/fixtures/sortable'
path = path_base + '?' + urllib.parse.urlencode({
'_sort': 'sortable',
'_sort_by_desc': '1',

View file

@ -59,6 +59,13 @@ def test_path_with_removed_args(path, args, expected):
)
actual = utils.path_with_removed_args(request, args)
assert expected == actual
# Run the test again but this time use the path= argument
request = Request(
"/".encode('utf8'),
{}, '1.1', 'GET', None
)
actual = utils.path_with_removed_args(request, args, path=path)
assert expected == actual
@pytest.mark.parametrize('path,args,expected', [