Merge branch 'master' into publish-heroku

This commit is contained in:
Jacob Kaplan-Moss 2017-11-17 13:36:50 -08:00
commit 54d58ef690
23 changed files with 945 additions and 66 deletions

View file

@ -0,0 +1 @@
from datasette.version import __version_info__, __version__ # noqa

View file

@ -4,6 +4,7 @@ from sanic.exceptions import NotFound
from sanic.views import HTTPMethodView
from sanic_jinja2 import SanicJinja2
from jinja2 import FileSystemLoader
import re
import sqlite3
from pathlib import Path
from concurrent import futures
@ -11,21 +12,24 @@ import asyncio
import threading
import urllib.parse
import json
import jinja2
import hashlib
import time
from .utils import (
build_where_clauses,
compound_pks_from_path,
CustomJSONEncoder,
escape_css_string,
escape_sqlite_table_name,
get_all_foreign_keys,
InvalidSql,
path_from_row_pks,
path_with_added_args,
path_with_ext,
compound_pks_from_path,
sqlite_timelimit,
validate_sql_select,
)
from .version import __version__
app_root = Path(__file__).parent.parent
@ -113,6 +117,10 @@ class BaseView(HTTPMethodView):
conn.text_factory = lambda x: str(x, 'utf-8', 'replace')
for name, num_args, func in self.ds.sqlite_functions:
conn.create_function(name, num_args, func)
if self.ds.sqlite_extensions:
conn.enable_load_extension(True)
for extension in self.ds.sqlite_extensions:
conn.execute("SELECT load_extension('{}')".format(extension))
async def execute(self, db_name, sql, params=None, truncate=False, custom_time_limit=None):
"""Executes sql against db_name in a thread"""
@ -221,6 +229,7 @@ class BaseView(HTTPMethodView):
'url_json': path_with_ext(request, '.json'),
'url_jsono': path_with_ext(request, '.jsono'),
'metadata': self.ds.metadata,
'datasette_version': __version__,
}}
r = self.jinja.render(
template,
@ -252,12 +261,12 @@ class IndexView(HTTPMethodView):
'path': '{}-{}'.format(key, info['hash'][:7]),
'tables_truncated': sorted(
info['tables'].items(),
key=lambda p: p[1],
key=lambda p: p[1]['count'],
reverse=True
)[:5],
'tables_count': len(info['tables'].items()),
'tables_more': len(info['tables'].items()) > 5,
'table_rows': sum(info['tables'].values()),
'table_rows': sum([t['count'] for t in info['tables'].values()]),
}
databases.append(database)
if as_json:
@ -277,6 +286,7 @@ class IndexView(HTTPMethodView):
request,
databases=databases,
metadata=self.ds.metadata,
datasette_version=__version__,
)
@ -286,13 +296,14 @@ async def favicon(request):
class DatabaseView(BaseView):
template = 'database.html'
re_named_parameter = re.compile(':([a-zA-Z0-0_]+)')
async def data(self, request, name, hash):
if request.args.get('sql'):
return await self.custom_sql(request, name, hash)
tables = []
table_inspect = self.ds.inspect()[name]['tables']
for table_name, table_rows in table_inspect.items():
for table_name, info in table_inspect.items():
rows = await self.execute(
name,
'PRAGMA table_info([{}]);'.format(table_name)
@ -300,7 +311,7 @@ class DatabaseView(BaseView):
tables.append({
'name': table_name,
'columns': [r[1] for r in rows],
'table_rows': table_rows,
'table_rows': info['count'],
})
tables.sort(key=lambda t: t['name'])
views = await self.execute(name, 'select name from sqlite_master where type = "view"')
@ -316,6 +327,19 @@ class DatabaseView(BaseView):
params = request.raw_args
sql = params.pop('sql')
validate_sql_select(sql)
# Extract any :named parameters
named_parameters = self.re_named_parameter.findall(sql)
named_parameter_values = {
named_parameter: params.get(named_parameter) or ''
for named_parameter in named_parameters
}
# Set to blank string if missing from params
for named_parameter in named_parameters:
if named_parameter not in params:
params[named_parameter] = ''
extra_args = {}
if params.get('_sql_time_limit_ms'):
extra_args['custom_time_limit'] = int(params['_sql_time_limit_ms'])
@ -335,6 +359,7 @@ class DatabaseView(BaseView):
}, {
'database_hash': hash,
'custom_sql': True,
'named_parameter_values': named_parameter_values,
}
@ -446,12 +471,16 @@ class TableView(BaseView):
)
columns = [r[0] for r in description]
display_columns = columns
if use_rowid:
display_columns = display_columns[1:]
rows = list(rows)
display_columns = columns
if not use_rowid and not is_view:
display_columns = ['Link'] + display_columns
info = self.ds.inspect()
table_rows = info[name]['tables'].get(table)
table_rows = None
if not is_view:
table_rows = info[name]['tables'][table]['count']
next_value = None
next_url = None
if len(rows) > self.page_size:
@ -462,6 +491,7 @@ class TableView(BaseView):
next_url = urllib.parse.urljoin(request.url, path_with_added_args(request, {
'_next': next_value,
}))
return {
'database': name,
'table': table,
@ -482,11 +512,47 @@ class TableView(BaseView):
}, lambda: {
'database_hash': hash,
'use_rowid': use_rowid,
'row_link': lambda row: path_from_row_pks(row, pks, use_rowid),
'display_columns': display_columns,
'display_rows': make_display_rows(name, hash, table, rows, display_columns, pks, is_view, use_rowid),
}
def make_display_rows(database, database_hash, table, rows, display_columns, pks, is_view, use_rowid):
for row in rows:
cells = []
# Unless we are a view, the first column is a link - either to the rowid
# or to the simple or compound primary key
if not is_view:
display_value = jinja2.Markup(
'<a href="/{database}-{database_hash}/{table}/{flat_pks}">{flat_pks}</a>'.format(
database=database,
database_hash=database_hash,
table=urllib.parse.quote_plus(table),
flat_pks=path_from_row_pks(row, pks, use_rowid),
)
)
cells.append({
'column': 'rowid' if use_rowid else 'Link',
'value': display_value,
})
for value, column in zip(row, display_columns):
if use_rowid and column == 'rowid':
# We already showed this in the linked first column
continue
if False: # TODO: This is where we will do foreign key linking
display_value = jinja2.Markup('<a href="#">{}</a>'.format('foreign key'))
elif value is None:
display_value = jinja2.Markup('&nbsp;')
else:
display_value = str(value)
cells.append({
'column': column,
'value': display_value,
})
yield cells
class RowView(BaseView):
template = 'row.html'
@ -524,7 +590,6 @@ class RowView(BaseView):
'primary_key_values': pk_values,
}, {
'database_hash': hash,
'row_link': None,
}
@ -532,7 +597,7 @@ class Datasette:
def __init__(
self, files, num_threads=3, cache_headers=True, page_size=100,
max_returned_rows=1000, sql_time_limit_ms=1000, cors=False,
inspect_data=None, metadata=None):
inspect_data=None, metadata=None, sqlite_extensions=None):
self.files = files
self.num_threads = num_threads
self.executor = futures.ThreadPoolExecutor(
@ -546,6 +611,7 @@ class Datasette:
self._inspect = inspect_data
self.metadata = metadata or {}
self.sqlite_functions = []
self.sqlite_extensions = sqlite_extensions or []
def inspect(self):
if not self._inspect:
@ -572,7 +638,13 @@ class Datasette:
for r in conn.execute('select * from sqlite_master where type="table"')
]
for table in table_names:
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
tables[table] = {
'count': conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0],
}
foreign_keys = get_all_foreign_keys(conn)
for table, info in foreign_keys.items():
tables[table]['foreign_keys'] = info
self._inspect[name] = {
'hash': m.hexdigest(),
@ -587,7 +659,8 @@ class Datasette:
app,
loader=FileSystemLoader([
str(app_root / 'datasette' / 'templates')
])
]),
autoescape=True,
)
self.jinja.add_env('escape_css_string', escape_css_string, 'filters')
self.jinja.add_env('quote_plus', lambda u: urllib.parse.quote_plus(u), 'filters')

View file

@ -124,9 +124,13 @@ def package(files, tag, metadata, extra_options, **extra_metadata):
@click.option('--page_size', default=100, help='Page size - default is 100')
@click.option('--max_returned_rows', default=1000, help='Max allowed rows to return at once - default is 1000. Set to 0 to disable check entirely.')
@click.option('--sql_time_limit_ms', default=1000, help='Max time allowed for SQL queries in ms')
@click.option(
'sqlite_extensions', '--load-extension', envvar='SQLITE_EXTENSIONS', multiple=True,
type=click.Path(exists=True, resolve_path=True), help='Path to a SQLite extension to load'
)
@click.option('--inspect-file', help='Path to JSON file created using "datasette build"')
@click.option('-m', '--metadata', type=click.File(mode='r'), help='Path to JSON file containing license/source metadata')
def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows, sql_time_limit_ms, inspect_file, metadata):
def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows, sql_time_limit_ms, sqlite_extensions, inspect_file, metadata):
"""Serve up specified SQLite database files with a web UI"""
if reload:
import hupper
@ -150,6 +154,7 @@ def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows,
sql_time_limit_ms=sql_time_limit_ms,
inspect_data=inspect_data,
metadata=metadata_data,
sqlite_extensions=sqlite_extensions,
)
# Force initial hashing/table counting
ds.inspect()

View file

@ -83,14 +83,24 @@ form.sql textarea {
font-family: monospace;
font-size: 1.3em;
}
form.sql label {
font-weight: bold;
display: inline-block;
width: 15%;
}
form.sql input[type=text] {
border: 1px solid #ccc;
width: 60%;
padding: 4px;
font-family: monospace;
display: inline-block;
font-size: 1.1em;
}
@media only screen and (max-width: 576px) {
form.sql textarea {
width: 95%;
}
}
form.sql p {
margin: 0;
}
form.sql input[type=submit] {
color: #fff;
background-color: #007bff;

View file

@ -12,7 +12,7 @@
{% endblock %}
<div class="ft">
Powered by <a href="https://github.com/simonw/datasette">Datasette</a>
Powered by <a href="https://github.com/simonw/datasette" title="Datasette v{{ datasette_version }}">Datasette</a>
{% if query_ms %}&middot; Query took {{ query_ms|round(3) }}ms{% endif %}
{% if metadata.license %}&middot; Data license:
{% if metadata.license_url %}

View file

@ -16,7 +16,7 @@
<link rel="stylesheet" href="/-/static/codemirror-5.31.0-min.css" />
<script src="/-/static/codemirror-5.31.0-sql.min.js"></script>
<style>
.CodeMirror { height: 70px; width: 80%; border: 1px solid #ddd; }
.CodeMirror { height: auto; min-height: 70px; width: 80%; border: 1px solid #ddd; }
.CodeMirror-scroll { max-height: 200px; }
</style>
{% endblock %}
@ -33,7 +33,14 @@
{% endif %}
<form class="sql" action="/{{ database }}-{{ database_hash }}" method="get">
<h3>Custom SQL query</h3>
<p><textarea name="sql">{% if query and query.sql %}{{ query.sql }}{% else %}select * from {{ tables[0].name|escape_table_name }}{% endif %}</textarea></p>
{% if named_parameter_values %}
<h3>Query parameters</h3>
{% for name, value in named_parameter_values.items() %}
<p><label for="qp{{ loop.counter }}">{{ name }}</label> <input type="text" id="qp{{ loop.counter }}" name="{{ name }}" value="{{ value }}"></p>
{% endfor %}
{% endif %}
<p><input type="submit" value="Run SQL"></p>
</form>
@ -52,7 +59,7 @@
{% for row in rows %}
<tr>
{% for td in row %}
<td>{{ td or "&nbsp;" }}</td>
<td>{{ td or "&nbsp;"|safe }}</td>
{% endfor %}
</tr>
{% endfor %}
@ -83,6 +90,11 @@
mode: "text/x-sql",
lineWrapping: true,
});
editor.setOption("extraKeys", {
"Shift-Enter": function() {
document.getElementsByClassName("sql")[0].submit();
}
});
</script>
{% endblock %}

View file

@ -5,11 +5,8 @@
{% block extra_head %}
<style>
@media only screen and (max-width: 576px) {
{% if not is_view %}
td:nth-of-type(1):before { content: "{% if use_rowid %}rowid{% else %}Link{% endif %}"; }
{% endif %}
{% for column in display_columns %}
td:nth-of-type({% if is_view %}{{ loop.index }}{% else %}{{ loop.index + 1 }}{% endif %}):before { content: "{{ column|escape_css_string }}"; }
td:nth-of-type({{ loop.index }}):before { content: "{{ column|escape_css_string }}"; }
{% endfor %}
}
</style>
@ -34,18 +31,14 @@
<table>
<thead>
<tr>
{% if not is_view %}<th scope="col">{% if use_rowid %}rowid{% else %}Link{% endif %}</th>{% endif %}
{% for column in display_columns %}<th scope="col">{{ column }}</th>{% endfor %}
</tr>
</thead>
<tbody>
{% for row in rows %}
{% for row in display_rows %}
<tr>
{% if not is_view %}<td><a href="/{{ database }}-{{ database_hash }}/{{ table|quote_plus }}/{{ row_link(row) }}">{{ row_link(row) }}</a></td>{% endif %}
{% for td in row %}
{% if not use_rowid or (use_rowid and not loop.first) %}
<td>{{ td or "&nbsp;" }}</td>
{% endif %}
{% for cell in row %}
<td>{{ cell.value }}</td>
{% endfor %}
</tr>
{% endfor %}

View file

@ -245,3 +245,240 @@ def temporary_heroku_directory(files, name, metadata, extra_options, extra_metad
tmp.cleanup()
os.chdir(saved_cwd)
from contextlib import contextmanager
import base64
import json
import os
import re
import sqlite3
import tempfile
import time
import urllib
def compound_pks_from_path(path):
return [
urllib.parse.unquote_plus(b) for b in path.split(',')
]
def path_from_row_pks(row, pks, use_rowid):
if use_rowid:
return urllib.parse.quote_plus(str(row['rowid']))
bits = []
for pk in pks:
bits.append(
urllib.parse.quote_plus(str(row[pk]))
)
return ','.join(bits)
def build_where_clauses(args):
sql_bits = []
params = {}
for i, (key, value) in enumerate(sorted(args.items())):
if '__' in key:
column, lookup = key.rsplit('__', 1)
else:
column = key
lookup = 'exact'
template = {
'exact': '"{}" = :{}',
'contains': '"{}" like :{}',
'endswith': '"{}" like :{}',
'startswith': '"{}" like :{}',
'gt': '"{}" > :{}',
'gte': '"{}" >= :{}',
'lt': '"{}" < :{}',
'lte': '"{}" <= :{}',
'glob': '"{}" glob :{}',
'like': '"{}" like :{}',
'isnull': '"{}" is null',
}[lookup]
numeric_operators = {'gt', 'gte', 'lt', 'lte'}
value_convert = {
'contains': lambda s: '%{}%'.format(s),
'endswith': lambda s: '%{}'.format(s),
'startswith': lambda s: '{}%'.format(s),
}.get(lookup, lambda s: s)
converted = value_convert(value)
if lookup in numeric_operators and converted.isdigit():
converted = int(converted)
if ':{}' in template:
param_id = 'p{}'.format(i)
params[param_id] = converted
tokens = (column, param_id)
else:
tokens = (column,)
sql_bits.append(
template.format(*tokens)
)
return sql_bits, params
class CustomJSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, sqlite3.Row):
return tuple(obj)
if isinstance(obj, sqlite3.Cursor):
return list(obj)
if isinstance(obj, bytes):
# Does it encode to utf8?
try:
return obj.decode('utf8')
except UnicodeDecodeError:
return {
'$base64': True,
'encoded': base64.b64encode(obj).decode('latin1'),
}
return json.JSONEncoder.default(self, obj)
@contextmanager
def sqlite_timelimit(conn, ms):
deadline = time.time() + (ms / 1000)
# n is the number of SQLite virtual machine instructions that will be
# executed between each check. It's hard to know what to pick here.
# After some experimentation, I've decided to go with 1000 by default and
# 1 for time limits that are less than 50ms
n = 1000
if ms < 50:
n = 1
def handler():
if time.time() >= deadline:
return 1
conn.set_progress_handler(handler, n)
yield
conn.set_progress_handler(None, n)
class InvalidSql(Exception):
pass
def validate_sql_select(sql):
sql = sql.strip().lower()
if not sql.startswith('select '):
raise InvalidSql('Statement must begin with SELECT')
if 'pragma' in sql:
raise InvalidSql('Statement may not contain PRAGMA')
def path_with_added_args(request, args):
current = request.raw_args.copy()
current.update(args)
return request.path + '?' + urllib.parse.urlencode(current)
def path_with_ext(request, ext):
path = request.path
path += ext
if request.query_string:
path += '?' + request.query_string
return path
_css_re = re.compile(r'''['"\n\\]''')
_boring_table_name_re = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
def escape_css_string(s):
return _css_re.sub(lambda m: '\\{:X}'.format(ord(m.group())), s)
def escape_sqlite_table_name(s):
if _boring_table_name_re.match(s):
return s
else:
return '[{}]'.format(s)
def make_dockerfile(files, metadata_file, extra_options=''):
cmd = ['"datasette"', '"serve"', '"--host"', '"0.0.0.0"']
cmd.append('"' + '", "'.join(files) + '"')
cmd.extend(['"--cors"', '"--port"', '"8001"', '"--inspect-file"', '"inspect-data.json"'])
if metadata_file:
cmd.extend(['"--metadata"', '"{}"'.format(metadata_file)])
if extra_options:
for opt in extra_options.split():
cmd.append('"{}"'.format(opt))
return '''
FROM python:3
COPY . /app
WORKDIR /app
RUN pip install datasette
RUN datasette build {} --inspect-file inspect-data.json
EXPOSE 8001
CMD [{}]'''.format(
' '.join(files),
', '.join(cmd)
).strip()
@contextmanager
def temporary_docker_directory(files, name, metadata, extra_options, extra_metadata=None):
extra_metadata = extra_metadata or {}
tmp = tempfile.TemporaryDirectory()
# We create a datasette folder in there to get a nicer now deploy name
datasette_dir = os.path.join(tmp.name, name)
os.mkdir(datasette_dir)
saved_cwd = os.getcwd()
file_paths = [
os.path.join(saved_cwd, name)
for name in files
]
file_names = [os.path.split(f)[-1] for f in files]
if metadata:
metadata_content = json.load(metadata)
else:
metadata_content = {}
for key, value in extra_metadata.items():
if value:
metadata_content[key] = value
try:
dockerfile = make_dockerfile(file_names, metadata_content and 'metadata.json', extra_options)
os.chdir(datasette_dir)
if metadata_content:
open('metadata.json', 'w').write(json.dumps(metadata_content, indent=2))
open('Dockerfile', 'w').write(dockerfile)
for path, filename in zip(file_paths, file_names):
os.link(path, os.path.join(datasette_dir, filename))
yield
finally:
tmp.cleanup()
os.chdir(saved_cwd)
def get_all_foreign_keys(conn):
tables = [r[0] for r in conn.execute('select name from sqlite_master where type="table"')]
table_to_foreign_keys = {}
for table in tables:
table_to_foreign_keys[table] = {
'incoming': [],
'outgoing': [],
}
for table in tables:
infos = conn.execute(
'PRAGMA foreign_key_list([{}])'.format(table)
).fetchall()
for info in infos:
if info is not None:
id, seq, table_name, from_, to_, on_update, on_delete, match = info
if table_name not in table_to_foreign_keys:
# Weird edge case where something refers to a table that does
# not actually exist
continue
table_to_foreign_keys[table_name]['incoming'].append({
'other_table': table,
'column': to_,
'other_column': from_
})
table_to_foreign_keys[table]['outgoing'].append({
'other_table': table_name,
'column': from_,
'other_column': to_
})
return table_to_foreign_keys

2
datasette/version.py Normal file
View file

@ -0,0 +1,2 @@
__version_info__ = (0, 12)
__version__ = '.'.join(map(str, __version_info__))