mirror of
https://github.com/simonw/datasette.git
synced 2025-12-10 16:51:24 +01:00
Started work on cli, which also meant adding setup.py
I'm using click, and click recommends using a setup.py - so I've added one of those. I also refactored code into a new datasite package. It's not quite deploying to now properly at the moment though - I seem to have messed up the path handling a bit. Also snuck in a new template for the "Row" view. Refs #40
This commit is contained in:
parent
2a9799bae6
commit
1592fd0419
15 changed files with 602 additions and 523 deletions
0
datasite/__init__.py
Normal file
0
datasite/__init__.py
Normal file
420
datasite/app.py
Normal file
420
datasite/app.py
Normal file
|
|
@ -0,0 +1,420 @@
|
|||
from sanic import Sanic
|
||||
from sanic import response
|
||||
from sanic.exceptions import NotFound
|
||||
from sanic.views import HTTPMethodView
|
||||
from sanic_jinja2 import SanicJinja2
|
||||
from jinja2 import FileSystemLoader
|
||||
import click
|
||||
import sqlite3
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from functools import wraps
|
||||
import urllib.parse
|
||||
import json
|
||||
import base64
|
||||
import hashlib
|
||||
import sys
|
||||
import time
|
||||
|
||||
app_root = Path(__file__).parent.parent
|
||||
|
||||
BUILD_METADATA = 'build-metadata.json'
|
||||
DB_GLOBS = ('*.db', '*.sqlite', '*.sqlite3')
|
||||
HASH_BLOCK_SIZE = 1024 * 1024
|
||||
SQL_TIME_LIMIT_MS = 1000
|
||||
|
||||
conns = {}
|
||||
|
||||
|
||||
app = Sanic(__name__)
|
||||
jinja = SanicJinja2(
|
||||
app,
|
||||
loader=FileSystemLoader([
|
||||
str(app_root / 'datasite' / 'templates')
|
||||
])
|
||||
)
|
||||
|
||||
|
||||
def get_conn(name):
|
||||
if name not in conns:
|
||||
info = ensure_build_metadata()[name]
|
||||
conns[name] = sqlite3.connect(
|
||||
'file:{}?immutable=1'.format(info['file']),
|
||||
uri=True
|
||||
)
|
||||
conns[name].row_factory = sqlite3.Row
|
||||
conns[name].text_factory = lambda x: str(x, 'utf-8', 'replace')
|
||||
return conns[name]
|
||||
|
||||
|
||||
def ensure_build_metadata(regenerate=False):
|
||||
build_metadata = app_root / BUILD_METADATA
|
||||
if build_metadata.exists() and not regenerate:
|
||||
return json.loads(build_metadata.read_text())
|
||||
print('Building metadata... path={}'.format(build_metadata))
|
||||
metadata = {}
|
||||
for glob in DB_GLOBS:
|
||||
for path in app_root.glob(glob):
|
||||
print(' globbing, path={}'.format(path))
|
||||
name = path.stem
|
||||
if name in metadata:
|
||||
raise Exception('Multiple files with same stem %s' % name)
|
||||
# Calculate hash, efficiently
|
||||
m = hashlib.sha256()
|
||||
with path.open('rb') as fp:
|
||||
while True:
|
||||
data = fp.read(HASH_BLOCK_SIZE)
|
||||
if not data:
|
||||
break
|
||||
m.update(data)
|
||||
# List tables and their row counts
|
||||
tables = {}
|
||||
with sqlite3.connect('file:{}?immutable=1'.format(path.name), uri=True) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
table_names = [
|
||||
r['name']
|
||||
for r in conn.execute('select * from sqlite_master where type="table"')
|
||||
]
|
||||
for table in table_names:
|
||||
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
|
||||
|
||||
metadata[name] = {
|
||||
'hash': m.hexdigest(),
|
||||
'file': path.name,
|
||||
'tables': tables,
|
||||
}
|
||||
build_metadata.write_text(json.dumps(metadata, indent=4))
|
||||
return metadata
|
||||
|
||||
|
||||
class BaseView(HTTPMethodView):
|
||||
template = None
|
||||
|
||||
def redirect(self, request, path):
|
||||
if request.query_string:
|
||||
path = '{}?{}'.format(
|
||||
path, request.query_string
|
||||
)
|
||||
r = response.redirect(path)
|
||||
r.headers['Link'] = '<{}>; rel=preload'.format(path)
|
||||
return r
|
||||
|
||||
async def get(self, request, db_name, **kwargs):
|
||||
name, hash, should_redirect = resolve_db_name(db_name, **kwargs)
|
||||
if should_redirect:
|
||||
return self.redirect(request, should_redirect)
|
||||
return await self.view_get(request, name, hash, **kwargs)
|
||||
|
||||
async def view_get(self, request, name, hash, **kwargs):
|
||||
try:
|
||||
as_json = kwargs.pop('as_json')
|
||||
except KeyError:
|
||||
as_json = False
|
||||
extra_template_data = {}
|
||||
start = time.time()
|
||||
try:
|
||||
data, extra_template_data = self.data(
|
||||
request, name, hash, **kwargs
|
||||
)
|
||||
except sqlite3.OperationalError as e:
|
||||
data = {
|
||||
'ok': False,
|
||||
'error': str(e),
|
||||
}
|
||||
end = time.time()
|
||||
data['took_ms'] = (end - start) * 1000
|
||||
if as_json:
|
||||
# Special case for .jsono extension
|
||||
if as_json == '.jsono':
|
||||
columns = data.get('columns')
|
||||
rows = data.get('rows')
|
||||
if rows and columns:
|
||||
data['rows'] = [
|
||||
dict(zip(columns, row))
|
||||
for row in rows
|
||||
]
|
||||
r = response.HTTPResponse(
|
||||
json.dumps(
|
||||
data, cls=CustomJSONEncoder
|
||||
),
|
||||
content_type='application/json',
|
||||
headers={
|
||||
'Access-Control-Allow-Origin': '*'
|
||||
}
|
||||
)
|
||||
else:
|
||||
context = {**data, **dict(
|
||||
extra_template_data()
|
||||
if callable(extra_template_data)
|
||||
else extra_template_data
|
||||
)}
|
||||
r = jinja.render(
|
||||
self.template,
|
||||
request,
|
||||
**context,
|
||||
)
|
||||
# Set far-future cache expiry
|
||||
r.headers['Cache-Control'] = 'max-age={}'.format(
|
||||
365 * 24 * 60 * 60
|
||||
)
|
||||
return r
|
||||
|
||||
|
||||
@app.route('/')
|
||||
async def index(request, sql=None):
|
||||
databases = []
|
||||
for key, info in ensure_build_metadata().items():
|
||||
database = {
|
||||
'name': key,
|
||||
'hash': info['hash'],
|
||||
'path': '{}-{}'.format(key, info['hash'][:7]),
|
||||
'tables_truncated': sorted(
|
||||
info['tables'].items(),
|
||||
key=lambda p: p[1],
|
||||
reverse=True
|
||||
)[:5],
|
||||
'tables_count': len(info['tables'].items()),
|
||||
'tables_more': len(info['tables'].items()) > 5,
|
||||
'total_rows': sum(info['tables'].values()),
|
||||
}
|
||||
databases.append(database)
|
||||
return jinja.render(
|
||||
'index.html',
|
||||
request,
|
||||
databases=databases,
|
||||
)
|
||||
|
||||
|
||||
@app.route('/favicon.ico')
|
||||
async def favicon(request):
|
||||
return response.text('')
|
||||
|
||||
|
||||
class DatabaseView(BaseView):
|
||||
template = 'database.html'
|
||||
|
||||
def data(self, request, name, hash):
|
||||
conn = get_conn(name)
|
||||
sql = request.args.get('sql') or 'select * from sqlite_master'
|
||||
with sqlite_timelimit(conn, SQL_TIME_LIMIT_MS):
|
||||
rows = conn.execute(sql)
|
||||
columns = [r[0] for r in rows.description]
|
||||
return {
|
||||
'database': name,
|
||||
'rows': rows,
|
||||
'columns': columns,
|
||||
}, {
|
||||
'database_hash': hash,
|
||||
}
|
||||
|
||||
|
||||
class DatabaseDownload(BaseView):
|
||||
async def view_get(self, request, name, hash, **kwargs):
|
||||
filepath = ensure_build_metadata()[name]['file']
|
||||
return await response.file_stream(
|
||||
filepath, headers={
|
||||
'Content-Disposition': 'attachment; filename="{}"'.format(filepath)
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class TableView(BaseView):
|
||||
template = 'table.html'
|
||||
|
||||
def data(self, request, name, hash, table):
|
||||
conn = get_conn(name)
|
||||
table = urllib.parse.unquote_plus(table)
|
||||
if request.args:
|
||||
where_clause, params = build_where_clause(request.args)
|
||||
sql = 'select * from "{}" where {} limit 50'.format(
|
||||
table, where_clause
|
||||
)
|
||||
else:
|
||||
sql = 'select * from "{}" limit 50'.format(table)
|
||||
params = []
|
||||
|
||||
with sqlite_timelimit(conn, SQL_TIME_LIMIT_MS):
|
||||
rows = conn.execute(sql, params)
|
||||
|
||||
columns = [r[0] for r in rows.description]
|
||||
rows = list(rows)
|
||||
pks = pks_for_table(conn, table)
|
||||
info = ensure_build_metadata()
|
||||
total_rows = info[name]['tables'].get(table)
|
||||
return {
|
||||
'database': name,
|
||||
'table': table,
|
||||
'rows': rows,
|
||||
'total_rows': total_rows,
|
||||
'columns': columns,
|
||||
'primary_keys': pks,
|
||||
}, lambda: {
|
||||
'database_hash': hash,
|
||||
'row_link': lambda row: path_from_row_pks(row, pks),
|
||||
}
|
||||
|
||||
|
||||
class RowView(BaseView):
|
||||
template = 'row.html'
|
||||
|
||||
def data(self, request, name, hash, table, pk_path):
|
||||
conn = get_conn(name)
|
||||
pk_values = compound_pks_from_path(pk_path)
|
||||
pks = pks_for_table(conn, table)
|
||||
wheres = [
|
||||
'"{}"=?'.format(pk)
|
||||
for pk in pks
|
||||
]
|
||||
sql = 'select * from "{}" where {}'.format(
|
||||
table, ' AND '.join(wheres)
|
||||
)
|
||||
rows = conn.execute(sql, pk_values)
|
||||
columns = [r[0] for r in rows.description]
|
||||
pks = pks_for_table(conn, table)
|
||||
rows = list(rows)
|
||||
if not rows:
|
||||
raise NotFound('Record not found: {}'.format(pk_values))
|
||||
return {
|
||||
'database': name,
|
||||
'table': table,
|
||||
'rows': rows,
|
||||
'columns': columns,
|
||||
'primary_keys': pks,
|
||||
}, {
|
||||
'database_hash': hash,
|
||||
'row_link': None,
|
||||
}
|
||||
|
||||
|
||||
app.add_route(DatabaseView.as_view(), '/<db_name:[^/\.]+?><as_json:(.jsono?)?$>')
|
||||
app.add_route(DatabaseDownload.as_view(), '/<db_name:[^/]+?><as_db:(\.db)$>')
|
||||
app.add_route(TableView.as_view(), '/<db_name:[^/]+>/<table:[^/]+?><as_json:(.jsono?)?$>')
|
||||
app.add_route(RowView.as_view(), '/<db_name:[^/]+>/<table:[^/]+?>/<pk_path:[^/]+?><as_json:(.jsono?)?$>')
|
||||
|
||||
|
||||
def resolve_db_name(db_name, **kwargs):
|
||||
databases = ensure_build_metadata()
|
||||
hash = None
|
||||
name = None
|
||||
if '-' in db_name:
|
||||
# Might be name-and-hash, or might just be
|
||||
# a name with a hyphen in it
|
||||
name, hash = db_name.rsplit('-', 1)
|
||||
if name not in databases:
|
||||
# Try the whole name
|
||||
name = db_name
|
||||
hash = None
|
||||
else:
|
||||
name = db_name
|
||||
# Verify the hash
|
||||
try:
|
||||
info = databases[name]
|
||||
except KeyError:
|
||||
raise NotFound('Database not found: {}'.format(name))
|
||||
expected = info['hash'][:7]
|
||||
if expected != hash:
|
||||
should_redirect = '/{}-{}'.format(
|
||||
name, expected,
|
||||
)
|
||||
if 'table' in kwargs:
|
||||
should_redirect += '/' + kwargs['table']
|
||||
if 'as_json' in kwargs:
|
||||
should_redirect += kwargs['as_json']
|
||||
if 'as_db' in kwargs:
|
||||
should_redirect += kwargs['as_db']
|
||||
return name, expected, should_redirect
|
||||
return name, expected, None
|
||||
|
||||
|
||||
def compound_pks_from_path(path):
|
||||
return [
|
||||
urllib.parse.unquote_plus(b) for b in path.split(',')
|
||||
]
|
||||
|
||||
|
||||
def pks_for_table(conn, table):
|
||||
rows = [
|
||||
row for row in conn.execute(
|
||||
'PRAGMA table_info("{}")'.format(table)
|
||||
).fetchall()
|
||||
if row[-1]
|
||||
]
|
||||
rows.sort(key=lambda row: row[-1])
|
||||
return [str(r[1]) for r in rows]
|
||||
|
||||
|
||||
def path_from_row_pks(row, pks):
|
||||
if not pks:
|
||||
return ''
|
||||
bits = []
|
||||
for pk in pks:
|
||||
bits.append(
|
||||
urllib.parse.quote_plus(str(row[pk]))
|
||||
)
|
||||
return ','.join(bits)
|
||||
|
||||
|
||||
def build_where_clause(args):
|
||||
sql_bits = []
|
||||
for key, values in args.items():
|
||||
if '__' in key:
|
||||
column, lookup = key.rsplit('__', 1)
|
||||
else:
|
||||
column = key
|
||||
lookup = 'exact'
|
||||
template = {
|
||||
'exact': '"{}" = ?',
|
||||
'contains': '"{}" like ?',
|
||||
'endswith': '"{}" like ?',
|
||||
'startswith': '"{}" like ?',
|
||||
'gt': '"{}" > ?',
|
||||
'gte': '"{}" >= ?',
|
||||
'lt': '"{}" < ?',
|
||||
'lte': '"{}" <= ?',
|
||||
'glob': '"{}" glob ?',
|
||||
'like': '"{}" like ?',
|
||||
}[lookup]
|
||||
value = values[0]
|
||||
value_convert = {
|
||||
'contains': lambda s: '%{}%'.format(s),
|
||||
'endswith': lambda s: '%{}'.format(s),
|
||||
'startswith': lambda s: '{}%'.format(s),
|
||||
}.get(lookup, lambda s: s)
|
||||
converted = value_convert(value)
|
||||
sql_bits.append(
|
||||
(template.format(column), converted)
|
||||
)
|
||||
sql_bits.sort(key=lambda p: p[0])
|
||||
where_clause = ' and '.join(p[0] for p in sql_bits)
|
||||
params = [p[1] for p in sql_bits]
|
||||
return where_clause, params
|
||||
|
||||
|
||||
class CustomJSONEncoder(json.JSONEncoder):
|
||||
def default(self, obj):
|
||||
if isinstance(obj, sqlite3.Row):
|
||||
return tuple(obj)
|
||||
if isinstance(obj, sqlite3.Cursor):
|
||||
return list(obj)
|
||||
if isinstance(obj, bytes):
|
||||
# Does it encode to utf8?
|
||||
try:
|
||||
return obj.decode('utf8')
|
||||
except UnicodeDecodeError:
|
||||
return {
|
||||
'$base64': True,
|
||||
'encoded': base64.b64encode(obj).decode('latin1'),
|
||||
}
|
||||
return json.JSONEncoder.default(self, obj)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def sqlite_timelimit(conn, ms):
|
||||
deadline = time.time() + (ms / 1000)
|
||||
|
||||
def handler():
|
||||
if time.time() >= deadline:
|
||||
return 1
|
||||
conn.set_progress_handler(handler, 10000)
|
||||
yield
|
||||
conn.set_progress_handler(None, 10000)
|
||||
24
datasite/cli.py
Normal file
24
datasite/cli.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import click
|
||||
from .app import app, ensure_build_metadata
|
||||
|
||||
@click.group()
|
||||
def cli():
|
||||
"""
|
||||
Datasite!
|
||||
"""
|
||||
|
||||
|
||||
@cli.command()
|
||||
def build():
|
||||
ensure_build_metadata(True)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument('files', type=click.Path(exists=True), nargs=-1)
|
||||
@click.option('-h', '--host', default='0.0.0.0')
|
||||
@click.option('-p', '--port', default=8001)
|
||||
@click.option('--debug', is_flag=True)
|
||||
def serve(files, host, port, debug):
|
||||
'''Serve up specified database files with a web UI'''
|
||||
click.echo('Serve! files={} on port {}'.format(files, port))
|
||||
app.run(host=host, port=port, debug=debug)
|
||||
19
datasite/templates/base.html
Normal file
19
datasite/templates/base.html
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>{% block title %}{% endblock %}</title>
|
||||
<style>
|
||||
th {
|
||||
text-align: left;
|
||||
}
|
||||
</style>
|
||||
{% block extra_head %}{% endblock %}
|
||||
</head>
|
||||
<body>
|
||||
{% if error %}
|
||||
<div style="padding: 1em; margin: 1em; border: 3px solid red;">{{ error }}</div>
|
||||
{% endif %}
|
||||
{% block content %}
|
||||
{% endblock %}
|
||||
</body>
|
||||
</html>
|
||||
35
datasite/templates/database.html
Normal file
35
datasite/templates/database.html
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}{{ database }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h1>{{ database }}</h1>
|
||||
|
||||
<p><a href="/{{ database }}-{{ database_hash }}.db">download {{ database }}.db</a></p>
|
||||
|
||||
<style>
|
||||
td {
|
||||
vertical-align: top;
|
||||
border-top: 1px solid #666;
|
||||
padding: 2px 4px;
|
||||
}
|
||||
</style>
|
||||
<table>
|
||||
<tr>
|
||||
{% for column in columns %}<th scope="col">{{ column }}</th>{% endfor %}
|
||||
</tr>
|
||||
{% for row in rows %}
|
||||
<tr>
|
||||
{% for td in row %}
|
||||
<td>
|
||||
{% if loop.index == 2 and row.type in ("table", "view") %}
|
||||
<a href="/{{ database }}-{{ database_hash }}/{{ td }}">{{ td }}</a>
|
||||
{% else %}
|
||||
{{ td }}
|
||||
{% endif %}
|
||||
</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
{% endblock %}
|
||||
13
datasite/templates/index.html
Normal file
13
datasite/templates/index.html
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Databases{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h1>Database{% if databases|length != 1 %}s{% endif %}</h1>
|
||||
{% for database in databases %}
|
||||
<h2><a href="{{ database.path }}">{{ database.name }}</a></h2>
|
||||
<p>{{ "{:,}".format(database.total_rows) }} rows in {{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}</p>
|
||||
<p>{% for table, count in database.tables_truncated %}<a href="{{ database.path }}/{{ table }}" title="{{ count }} rows">{{ table }}</a>{% if not loop.last %}, {% endif %}{% endfor %}{% if database.tables_more %}, <a href="{{ database.path }}">...</a>{% endif %}</p>
|
||||
{% endfor %}
|
||||
|
||||
{% endblock %}
|
||||
35
datasite/templates/row.html
Normal file
35
datasite/templates/row.html
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}{{ database }}: {{ table }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h1><a href="/{{ database }}-{{ database_hash }}">{{ database }}</a></h1>
|
||||
|
||||
<h2><a href="/{{ database }}-{{ database_hash }}/{{ table }}">{{ table }}</a></h2>
|
||||
|
||||
<style>
|
||||
td {
|
||||
white-space: pre;
|
||||
vertical-align: top;
|
||||
border-top: 1px solid #666;
|
||||
padding: 2px 4px;
|
||||
}
|
||||
</style>
|
||||
<table>
|
||||
<tr>
|
||||
{% if primary_keys and row_link %}<th scope="col">Link</th>{% endif %}
|
||||
{% for column in columns %}<th scope="col">{{ column }}</th>{% endfor %}
|
||||
</tr>
|
||||
{% for row in rows %}
|
||||
<tr>
|
||||
{% if primary_keys and row_link %}
|
||||
<td><a href="/{{ database }}-{{ database_hash }}/{{ table }}/{{ row_link(row) }}">{{ row_link(row) }}</a></td>
|
||||
{% endif %}
|
||||
{% for td in row %}
|
||||
<td>{{ td }}</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
{% if took_ms %}<small>Took {{ took_ms }}</small>{% endif %}
|
||||
{% endblock %}
|
||||
35
datasite/templates/table.html
Normal file
35
datasite/templates/table.html
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}{{ database }}: {{ table }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h1><a href="/{{ database }}-{{ database_hash }}">{{ database }}</a></h1>
|
||||
|
||||
<h2>{{ table }}{% if total_rows != None %} ({{ "{:,}".format(total_rows) }} total row{% if total_rows == 1 %}{% else %}s{% endif %} in this table){% endif %}</h2>
|
||||
|
||||
<style>
|
||||
td {
|
||||
white-space: pre;
|
||||
vertical-align: top;
|
||||
border-top: 1px solid #666;
|
||||
padding: 2px 4px;
|
||||
}
|
||||
</style>
|
||||
<table>
|
||||
<tr>
|
||||
{% if primary_keys and row_link %}<th scope="col">Link</th>{% endif %}
|
||||
{% for column in columns %}<th scope="col">{{ column }}</th>{% endfor %}
|
||||
</tr>
|
||||
{% for row in rows %}
|
||||
<tr>
|
||||
{% if primary_keys and row_link %}
|
||||
<td><a href="/{{ database }}-{{ database_hash }}/{{ table }}/{{ row_link(row) }}">{{ row_link(row) }}</a></td>
|
||||
{% endif %}
|
||||
{% for td in row %}
|
||||
<td>{{ td }}</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
{% if took_ms %}<small>Took {{ took_ms }}</small>{% endif %}
|
||||
{% endblock %}
|
||||
Loading…
Add table
Add a link
Reference in a new issue