diff --git a/datasette/app.py b/datasette/app.py index 08e34719..18d70eaa 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -27,56 +27,17 @@ from .utils import ( app_root = Path(__file__).parent.parent -BUILD_METADATA = 'build-metadata.json' HASH_BLOCK_SIZE = 1024 * 1024 SQL_TIME_LIMIT_MS = 1000 connections = threading.local() -def ensure_build_metadata(files, regenerate=True): - build_metadata = app_root / BUILD_METADATA - if build_metadata.exists() and not regenerate: - return json.loads(build_metadata.read_text()) - print('Building metadata... path={}'.format(build_metadata)) - metadata = {} - for filename in files: - path = Path(filename) - name = path.stem - if name in metadata: - raise Exception('Multiple files with same stem %s' % name) - # Calculate hash, efficiently - m = hashlib.sha256() - with path.open('rb') as fp: - while True: - data = fp.read(HASH_BLOCK_SIZE) - if not data: - break - m.update(data) - # List tables and their row counts - tables = {} - with sqlite3.connect('file:{}?immutable=1'.format(path.name), uri=True) as conn: - conn.row_factory = sqlite3.Row - table_names = [ - r['name'] - for r in conn.execute('select * from sqlite_master where type="table"') - ] - for table in table_names: - tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0] - - metadata[name] = { - 'hash': m.hexdigest(), - 'file': path.name, - 'tables': tables, - } - build_metadata.write_text(json.dumps(metadata, indent=4)) - return metadata - - class BaseView(HTTPMethodView): template = None def __init__(self, datasette): + self.ds = datasette self.files = datasette.files self.jinja = datasette.jinja self.executor = datasette.executor @@ -103,12 +64,45 @@ class BaseView(HTTPMethodView): rows.sort(key=lambda row: row[-1]) return [str(r[1]) for r in rows] + def resolve_db_name(self, db_name, **kwargs): + databases = self.ds.metadata() + hash = None + name = None + if '-' in db_name: + # Might be name-and-hash, or might just be + # a name with a hyphen in it + name, hash = db_name.rsplit('-', 1) + if name not in databases: + # Try the whole name + name = db_name + hash = None + else: + name = db_name + # Verify the hash + try: + info = databases[name] + except KeyError: + raise NotFound('Database not found: {}'.format(name)) + expected = info['hash'][:7] + if expected != hash: + should_redirect = '/{}-{}'.format( + name, expected, + ) + if 'table' in kwargs: + should_redirect += '/' + kwargs['table'] + if 'as_json' in kwargs: + should_redirect += kwargs['as_json'] + if 'as_db' in kwargs: + should_redirect += kwargs['as_db'] + return name, expected, should_redirect + return name, expected, None + async def execute(self, db_name, sql, params=None): """Executes sql against db_name in a thread""" def sql_operation_in_thread(): conn = getattr(connections, db_name, None) if not conn: - info = ensure_build_metadata(self.files)[db_name] + info = self.ds.metadata()[db_name] conn = sqlite3.connect( 'file:{}?immutable=1'.format(info['file']), uri=True, @@ -133,7 +127,7 @@ class BaseView(HTTPMethodView): ) async def get(self, request, db_name, **kwargs): - name, hash, should_redirect = resolve_db_name(self.files, db_name, **kwargs) + name, hash, should_redirect = self.resolve_db_name(db_name, **kwargs) if should_redirect: return self.redirect(request, should_redirect) return await self.view_get(request, name, hash, **kwargs) @@ -196,13 +190,14 @@ class BaseView(HTTPMethodView): class IndexView(HTTPMethodView): def __init__(self, datasette): + self.ds = datasette self.files = datasette.files self.jinja = datasette.jinja self.executor = datasette.executor async def get(self, request, as_json): databases = [] - for key, info in sorted(ensure_build_metadata(self.files).items()): + for key, info in sorted(self.ds.metadata().items()): database = { 'name': key, 'hash': info['hash'], @@ -263,7 +258,7 @@ class DatabaseView(BaseView): class DatabaseDownload(BaseView): async def view_get(self, request, name, hash, **kwargs): - filepath = ensure_build_metadata(self.files)[name]['file'] + filepath = self.ds.metadata()[name]['file'] return await response.file_stream( filepath, headers={ 'Content-Disposition': 'attachment; filename="{}"'.format(filepath) @@ -339,7 +334,7 @@ class TableView(BaseView): if use_rowid: display_columns = display_columns[1:] rows = list(rows) - info = ensure_build_metadata(self.files) + info = self.ds.metadata() total_rows = info[name]['tables'].get(table) after = None after_link = None @@ -404,42 +399,8 @@ class RowView(BaseView): } -def resolve_db_name(files, db_name, **kwargs): - databases = ensure_build_metadata(files) - hash = None - name = None - if '-' in db_name: - # Might be name-and-hash, or might just be - # a name with a hyphen in it - name, hash = db_name.rsplit('-', 1) - if name not in databases: - # Try the whole name - name = db_name - hash = None - else: - name = db_name - # Verify the hash - try: - info = databases[name] - except KeyError: - raise NotFound('Database not found: {}'.format(name)) - expected = info['hash'][:7] - if expected != hash: - should_redirect = '/{}-{}'.format( - name, expected, - ) - if 'table' in kwargs: - should_redirect += '/' + kwargs['table'] - if 'as_json' in kwargs: - should_redirect += kwargs['as_json'] - if 'as_db' in kwargs: - should_redirect += kwargs['as_db'] - return name, expected, should_redirect - return name, expected, None - - class Datasette: - def __init__(self, files, num_threads=3, cache_headers=True, page_size=50): + def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, metadata=None): self.files = files self.num_threads = num_threads self.executor = futures.ThreadPoolExecutor( @@ -447,6 +408,43 @@ class Datasette: ) self.cache_headers = cache_headers self.page_size = page_size + self._metadata = metadata + + def metadata(self): + if self._metadata: + return self._metadata + metadata = {} + for filename in self.files: + path = Path(filename) + name = path.stem + if name in metadata: + raise Exception('Multiple files with same stem %s' % name) + # Calculate hash, efficiently + m = hashlib.sha256() + with path.open('rb') as fp: + while True: + data = fp.read(HASH_BLOCK_SIZE) + if not data: + break + m.update(data) + # List tables and their row counts + tables = {} + with sqlite3.connect('file:{}?immutable=1'.format(path.name), uri=True) as conn: + conn.row_factory = sqlite3.Row + table_names = [ + r['name'] + for r in conn.execute('select * from sqlite_master where type="table"') + ] + for table in table_names: + tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0] + + metadata[name] = { + 'hash': m.hexdigest(), + 'file': path.name, + 'tables': tables, + } + self._metadata = metadata + return metadata def app(self): app = Sanic(__name__) diff --git a/datasette/cli.py b/datasette/cli.py index a7ef01d8..966bf2e2 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -1,11 +1,12 @@ import click from click_default_group import DefaultGroup +import json import os import shutil from subprocess import call import sys import tempfile -from .app import Datasette, ensure_build_metadata +from .app import Datasette from .utils import make_dockerfile @@ -18,8 +19,10 @@ def cli(): @cli.command() @click.argument('files', type=click.Path(exists=True), nargs=-1) -def build(files): - ensure_build_metadata(files, True) +@click.option('-m', '--metadata', default='metadata.json') +def build_metadata(files, metadata): + app = Datasette(files) + open(metadata, 'w').write(json.dumps(app.metadata(), indent=2)) @cli.command() @@ -62,12 +65,20 @@ def publish(files): @click.option('-p', '--port', default=8001) @click.option('--debug', is_flag=True) @click.option('--reload', is_flag=True) -def serve(files, host, port, debug, reload): +@click.option('-m', '--metadata') +def serve(files, host, port, debug, reload, metadata): """Serve up specified database files with a web UI""" if reload: import hupper hupper.start_reloader('datasette.cli.serve') + if metadata: + metadata = json.load(open(metadata)) + click.echo('Serve! files={} on port {}'.format(files, port)) - app = Datasette(files, cache_headers=not debug and not reload).app() + app = Datasette( + files, + cache_headers=not debug and not reload, + metadata=metadata, + ).app() app.run(host=host, port=port, debug=debug) diff --git a/datasette/utils.py b/datasette/utils.py index 3dba7588..e4e18d47 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -122,10 +122,10 @@ def make_dockerfile(files): FROM python:3 COPY . /app WORKDIR /app -RUN pip install https://static.simonwillison.net/static/2017/datasette-0.1-py3-none-any.whl -RUN datasette build {} +RUN pip install https://static.simonwillison.net/static/2017/datasette-0.2-py3-none-any.whl +RUN datasette build_metadata {} --metadata metadata.json EXPOSE 8006 -CMD ["datasette", "serve", {}, "--port", "8006"]'''.format( +CMD ["datasette", "serve", {}, "--port", "8006", "--metadata", "metadata.json"]'''.format( ' '.join(files), '"' + '", "'.join(files) + '"', ).strip() diff --git a/setup.py b/setup.py index 3d00eabd..a20425cd 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='datasette', - version='0.1', + version='0.2', packages=find_packages(), package_data={'datasette': ['templates/*.html']}, include_package_data=True,