diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..2c34db66 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +.DS_Store +.cache +.eggs +.git +.gitignore +.ipynb_checkpoints +.travis.yml +build +*.spec +*.egg-info +dist +scratchpad +venv diff --git a/.gitignore b/.gitignore index 22f7a699..cf8578ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ build-metadata.json datasets.json +scratchpad + # SQLite databases *.db *.sqlite diff --git a/Dockerfile b/Dockerfile index 8bfc47d2..04c08d2c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,21 @@ -FROM python:3.6 as build +FROM python:3.6-slim-stretch as build -ARG VERSION=0.11 -RUN pip install datasette==$VERSION +# Setup build dependencies +RUN apt update +RUN apt install -y python3-dev gcc libsqlite3-mod-spatialite +# Add local code to the image instead of fetching from pypi. +ADD . /datasette -FROM python:3.6-slim +RUN pip install /datasette +FROM python:3.6-slim-stretch + +# Copy python dependencies COPY --from=build /usr/local/lib/python3.6/site-packages /usr/local/lib/python3.6/site-packages +# Copy executables COPY --from=build /usr/local/bin /usr/local/bin +# Copy spatial extensions +COPY --from=build /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu EXPOSE 8001 CMD ["datasette"] diff --git a/README.md b/README.md index 13f03598..9cff2732 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![PyPI](https://img.shields.io/pypi/v/datasette.svg)](https://pypi.python.org/pypi/datasette) [![Travis CI](https://travis-ci.org/simonw/datasette.svg?branch=master)](https://travis-ci.org/simonw/datasette) +[![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](http://datasette.readthedocs.io/en/latest/?badge=latest) *An instant JSON API for your SQLite databases* @@ -95,6 +96,7 @@ http://localhost:8001/History/downloads.jsono will return that data as JSON in a --max_returned_rows INTEGER Max allowed rows to return at once - default is 1000. Set to 0 to disable check entirely. --sql_time_limit_ms INTEGER Max time allowed for SQL queries in ms + --load-extension TEXT Path to a SQLite extension to load --inspect-file TEXT Path to JSON file created using "datasette build" -m, --metadata FILENAME Path to JSON file containing license/source diff --git a/datasette/__init__.py b/datasette/__init__.py index e69de29b..668a8c82 100644 --- a/datasette/__init__.py +++ b/datasette/__init__.py @@ -0,0 +1 @@ +from datasette.version import __version_info__, __version__ # noqa diff --git a/datasette/app.py b/datasette/app.py index ff3de281..2a7a5e2b 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -4,6 +4,7 @@ from sanic.exceptions import NotFound from sanic.views import HTTPMethodView from sanic_jinja2 import SanicJinja2 from jinja2 import FileSystemLoader +import re import sqlite3 from pathlib import Path from concurrent import futures @@ -11,21 +12,24 @@ import asyncio import threading import urllib.parse import json +import jinja2 import hashlib import time from .utils import ( build_where_clauses, + compound_pks_from_path, CustomJSONEncoder, escape_css_string, escape_sqlite_table_name, + get_all_foreign_keys, InvalidSql, path_from_row_pks, path_with_added_args, path_with_ext, - compound_pks_from_path, sqlite_timelimit, validate_sql_select, ) +from .version import __version__ app_root = Path(__file__).parent.parent @@ -113,6 +117,10 @@ class BaseView(HTTPMethodView): conn.text_factory = lambda x: str(x, 'utf-8', 'replace') for name, num_args, func in self.ds.sqlite_functions: conn.create_function(name, num_args, func) + if self.ds.sqlite_extensions: + conn.enable_load_extension(True) + for extension in self.ds.sqlite_extensions: + conn.execute("SELECT load_extension('{}')".format(extension)) async def execute(self, db_name, sql, params=None, truncate=False, custom_time_limit=None): """Executes sql against db_name in a thread""" @@ -221,6 +229,7 @@ class BaseView(HTTPMethodView): 'url_json': path_with_ext(request, '.json'), 'url_jsono': path_with_ext(request, '.jsono'), 'metadata': self.ds.metadata, + 'datasette_version': __version__, }} r = self.jinja.render( template, @@ -252,12 +261,12 @@ class IndexView(HTTPMethodView): 'path': '{}-{}'.format(key, info['hash'][:7]), 'tables_truncated': sorted( info['tables'].items(), - key=lambda p: p[1], + key=lambda p: p[1]['count'], reverse=True )[:5], 'tables_count': len(info['tables'].items()), 'tables_more': len(info['tables'].items()) > 5, - 'table_rows': sum(info['tables'].values()), + 'table_rows': sum([t['count'] for t in info['tables'].values()]), } databases.append(database) if as_json: @@ -277,6 +286,7 @@ class IndexView(HTTPMethodView): request, databases=databases, metadata=self.ds.metadata, + datasette_version=__version__, ) @@ -286,13 +296,14 @@ async def favicon(request): class DatabaseView(BaseView): template = 'database.html' + re_named_parameter = re.compile(':([a-zA-Z0-0_]+)') async def data(self, request, name, hash): if request.args.get('sql'): return await self.custom_sql(request, name, hash) tables = [] table_inspect = self.ds.inspect()[name]['tables'] - for table_name, table_rows in table_inspect.items(): + for table_name, info in table_inspect.items(): rows = await self.execute( name, 'PRAGMA table_info([{}]);'.format(table_name) @@ -300,7 +311,7 @@ class DatabaseView(BaseView): tables.append({ 'name': table_name, 'columns': [r[1] for r in rows], - 'table_rows': table_rows, + 'table_rows': info['count'], }) tables.sort(key=lambda t: t['name']) views = await self.execute(name, 'select name from sqlite_master where type = "view"') @@ -316,6 +327,19 @@ class DatabaseView(BaseView): params = request.raw_args sql = params.pop('sql') validate_sql_select(sql) + + # Extract any :named parameters + named_parameters = self.re_named_parameter.findall(sql) + named_parameter_values = { + named_parameter: params.get(named_parameter) or '' + for named_parameter in named_parameters + } + + # Set to blank string if missing from params + for named_parameter in named_parameters: + if named_parameter not in params: + params[named_parameter] = '' + extra_args = {} if params.get('_sql_time_limit_ms'): extra_args['custom_time_limit'] = int(params['_sql_time_limit_ms']) @@ -335,6 +359,7 @@ class DatabaseView(BaseView): }, { 'database_hash': hash, 'custom_sql': True, + 'named_parameter_values': named_parameter_values, } @@ -446,12 +471,16 @@ class TableView(BaseView): ) columns = [r[0] for r in description] - display_columns = columns - if use_rowid: - display_columns = display_columns[1:] rows = list(rows) + + display_columns = columns + if not use_rowid and not is_view: + display_columns = ['Link'] + display_columns + info = self.ds.inspect() - table_rows = info[name]['tables'].get(table) + table_rows = None + if not is_view: + table_rows = info[name]['tables'][table]['count'] next_value = None next_url = None if len(rows) > self.page_size: @@ -462,6 +491,7 @@ class TableView(BaseView): next_url = urllib.parse.urljoin(request.url, path_with_added_args(request, { '_next': next_value, })) + return { 'database': name, 'table': table, @@ -482,11 +512,47 @@ class TableView(BaseView): }, lambda: { 'database_hash': hash, 'use_rowid': use_rowid, - 'row_link': lambda row: path_from_row_pks(row, pks, use_rowid), 'display_columns': display_columns, + 'display_rows': make_display_rows(name, hash, table, rows, display_columns, pks, is_view, use_rowid), } +def make_display_rows(database, database_hash, table, rows, display_columns, pks, is_view, use_rowid): + for row in rows: + cells = [] + # Unless we are a view, the first column is a link - either to the rowid + # or to the simple or compound primary key + if not is_view: + display_value = jinja2.Markup( + '{flat_pks}'.format( + database=database, + database_hash=database_hash, + table=urllib.parse.quote_plus(table), + flat_pks=path_from_row_pks(row, pks, use_rowid), + ) + ) + cells.append({ + 'column': 'rowid' if use_rowid else 'Link', + 'value': display_value, + }) + + for value, column in zip(row, display_columns): + if use_rowid and column == 'rowid': + # We already showed this in the linked first column + continue + if False: # TODO: This is where we will do foreign key linking + display_value = jinja2.Markup('{}'.format('foreign key')) + elif value is None: + display_value = jinja2.Markup(' ') + else: + display_value = str(value) + cells.append({ + 'column': column, + 'value': display_value, + }) + yield cells + + class RowView(BaseView): template = 'row.html' @@ -524,7 +590,6 @@ class RowView(BaseView): 'primary_key_values': pk_values, }, { 'database_hash': hash, - 'row_link': None, } @@ -532,7 +597,7 @@ class Datasette: def __init__( self, files, num_threads=3, cache_headers=True, page_size=100, max_returned_rows=1000, sql_time_limit_ms=1000, cors=False, - inspect_data=None, metadata=None): + inspect_data=None, metadata=None, sqlite_extensions=None): self.files = files self.num_threads = num_threads self.executor = futures.ThreadPoolExecutor( @@ -546,6 +611,7 @@ class Datasette: self._inspect = inspect_data self.metadata = metadata or {} self.sqlite_functions = [] + self.sqlite_extensions = sqlite_extensions or [] def inspect(self): if not self._inspect: @@ -572,7 +638,13 @@ class Datasette: for r in conn.execute('select * from sqlite_master where type="table"') ] for table in table_names: - tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0] + tables[table] = { + 'count': conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0], + } + + foreign_keys = get_all_foreign_keys(conn) + for table, info in foreign_keys.items(): + tables[table]['foreign_keys'] = info self._inspect[name] = { 'hash': m.hexdigest(), @@ -587,7 +659,8 @@ class Datasette: app, loader=FileSystemLoader([ str(app_root / 'datasette' / 'templates') - ]) + ]), + autoescape=True, ) self.jinja.add_env('escape_css_string', escape_css_string, 'filters') self.jinja.add_env('quote_plus', lambda u: urllib.parse.quote_plus(u), 'filters') diff --git a/datasette/cli.py b/datasette/cli.py index 3381dc88..42697c56 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -124,9 +124,13 @@ def package(files, tag, metadata, extra_options, **extra_metadata): @click.option('--page_size', default=100, help='Page size - default is 100') @click.option('--max_returned_rows', default=1000, help='Max allowed rows to return at once - default is 1000. Set to 0 to disable check entirely.') @click.option('--sql_time_limit_ms', default=1000, help='Max time allowed for SQL queries in ms') +@click.option( + 'sqlite_extensions', '--load-extension', envvar='SQLITE_EXTENSIONS', multiple=True, + type=click.Path(exists=True, resolve_path=True), help='Path to a SQLite extension to load' +) @click.option('--inspect-file', help='Path to JSON file created using "datasette build"') @click.option('-m', '--metadata', type=click.File(mode='r'), help='Path to JSON file containing license/source metadata') -def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows, sql_time_limit_ms, inspect_file, metadata): +def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows, sql_time_limit_ms, sqlite_extensions, inspect_file, metadata): """Serve up specified SQLite database files with a web UI""" if reload: import hupper @@ -150,6 +154,7 @@ def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows, sql_time_limit_ms=sql_time_limit_ms, inspect_data=inspect_data, metadata=metadata_data, + sqlite_extensions=sqlite_extensions, ) # Force initial hashing/table counting ds.inspect() diff --git a/datasette/static/app.css b/datasette/static/app.css index 9f48582a..9a3f153d 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -83,14 +83,24 @@ form.sql textarea { font-family: monospace; font-size: 1.3em; } +form.sql label { + font-weight: bold; + display: inline-block; + width: 15%; +} +form.sql input[type=text] { + border: 1px solid #ccc; + width: 60%; + padding: 4px; + font-family: monospace; + display: inline-block; + font-size: 1.1em; +} @media only screen and (max-width: 576px) { form.sql textarea { width: 95%; } } -form.sql p { - margin: 0; -} form.sql input[type=submit] { color: #fff; background-color: #007bff; diff --git a/datasette/templates/base.html b/datasette/templates/base.html index d779f12c..1976b4ac 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -12,7 +12,7 @@ {% endblock %}
- Powered by Datasette + Powered by Datasette {% if query_ms %}· Query took {{ query_ms|round(3) }}ms{% endif %} {% if metadata.license %}· Data license: {% if metadata.license_url %} diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 4b405ea0..59115c66 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -16,7 +16,7 @@ {% endblock %} @@ -33,7 +33,14 @@ {% endif %}
+

Custom SQL query

+ {% if named_parameter_values %} +

Query parameters

+ {% for name, value in named_parameter_values.items() %} +

+ {% endfor %} + {% endif %}

@@ -52,7 +59,7 @@ {% for row in rows %} {% for td in row %} - {{ td or " " }} + {{ td or " "|safe }} {% endfor %} {% endfor %} @@ -83,6 +90,11 @@ mode: "text/x-sql", lineWrapping: true, }); + editor.setOption("extraKeys", { + "Shift-Enter": function() { + document.getElementsByClassName("sql")[0].submit(); + } + }); {% endblock %} diff --git a/datasette/templates/table.html b/datasette/templates/table.html index f1539ee3..082d2782 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -5,11 +5,8 @@ {% block extra_head %} @@ -34,18 +31,14 @@ - {% if not is_view %}{% endif %} {% for column in display_columns %}{% endfor %} - {% for row in rows %} + {% for row in display_rows %} - {% if not is_view %}{% endif %} - {% for td in row %} - {% if not use_rowid or (use_rowid and not loop.first) %} - - {% endif %} + {% for cell in row %} + {% endfor %} {% endfor %} diff --git a/datasette/utils.py b/datasette/utils.py index 825a794e..544fdad2 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -245,3 +245,240 @@ def temporary_heroku_directory(files, name, metadata, extra_options, extra_metad tmp.cleanup() os.chdir(saved_cwd) +from contextlib import contextmanager +import base64 +import json +import os +import re +import sqlite3 +import tempfile +import time +import urllib + + +def compound_pks_from_path(path): + return [ + urllib.parse.unquote_plus(b) for b in path.split(',') + ] + + +def path_from_row_pks(row, pks, use_rowid): + if use_rowid: + return urllib.parse.quote_plus(str(row['rowid'])) + bits = [] + for pk in pks: + bits.append( + urllib.parse.quote_plus(str(row[pk])) + ) + return ','.join(bits) + + +def build_where_clauses(args): + sql_bits = [] + params = {} + for i, (key, value) in enumerate(sorted(args.items())): + if '__' in key: + column, lookup = key.rsplit('__', 1) + else: + column = key + lookup = 'exact' + template = { + 'exact': '"{}" = :{}', + 'contains': '"{}" like :{}', + 'endswith': '"{}" like :{}', + 'startswith': '"{}" like :{}', + 'gt': '"{}" > :{}', + 'gte': '"{}" >= :{}', + 'lt': '"{}" < :{}', + 'lte': '"{}" <= :{}', + 'glob': '"{}" glob :{}', + 'like': '"{}" like :{}', + 'isnull': '"{}" is null', + }[lookup] + numeric_operators = {'gt', 'gte', 'lt', 'lte'} + value_convert = { + 'contains': lambda s: '%{}%'.format(s), + 'endswith': lambda s: '%{}'.format(s), + 'startswith': lambda s: '{}%'.format(s), + }.get(lookup, lambda s: s) + converted = value_convert(value) + if lookup in numeric_operators and converted.isdigit(): + converted = int(converted) + if ':{}' in template: + param_id = 'p{}'.format(i) + params[param_id] = converted + tokens = (column, param_id) + else: + tokens = (column,) + sql_bits.append( + template.format(*tokens) + ) + return sql_bits, params + + +class CustomJSONEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, sqlite3.Row): + return tuple(obj) + if isinstance(obj, sqlite3.Cursor): + return list(obj) + if isinstance(obj, bytes): + # Does it encode to utf8? + try: + return obj.decode('utf8') + except UnicodeDecodeError: + return { + '$base64': True, + 'encoded': base64.b64encode(obj).decode('latin1'), + } + return json.JSONEncoder.default(self, obj) + + +@contextmanager +def sqlite_timelimit(conn, ms): + deadline = time.time() + (ms / 1000) + # n is the number of SQLite virtual machine instructions that will be + # executed between each check. It's hard to know what to pick here. + # After some experimentation, I've decided to go with 1000 by default and + # 1 for time limits that are less than 50ms + n = 1000 + if ms < 50: + n = 1 + + def handler(): + if time.time() >= deadline: + return 1 + + conn.set_progress_handler(handler, n) + yield + conn.set_progress_handler(None, n) + + +class InvalidSql(Exception): + pass + + +def validate_sql_select(sql): + sql = sql.strip().lower() + if not sql.startswith('select '): + raise InvalidSql('Statement must begin with SELECT') + if 'pragma' in sql: + raise InvalidSql('Statement may not contain PRAGMA') + + +def path_with_added_args(request, args): + current = request.raw_args.copy() + current.update(args) + return request.path + '?' + urllib.parse.urlencode(current) + + +def path_with_ext(request, ext): + path = request.path + path += ext + if request.query_string: + path += '?' + request.query_string + return path + + +_css_re = re.compile(r'''['"\n\\]''') +_boring_table_name_re = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') + + +def escape_css_string(s): + return _css_re.sub(lambda m: '\\{:X}'.format(ord(m.group())), s) + + +def escape_sqlite_table_name(s): + if _boring_table_name_re.match(s): + return s + else: + return '[{}]'.format(s) + + +def make_dockerfile(files, metadata_file, extra_options=''): + cmd = ['"datasette"', '"serve"', '"--host"', '"0.0.0.0"'] + cmd.append('"' + '", "'.join(files) + '"') + cmd.extend(['"--cors"', '"--port"', '"8001"', '"--inspect-file"', '"inspect-data.json"']) + if metadata_file: + cmd.extend(['"--metadata"', '"{}"'.format(metadata_file)]) + if extra_options: + for opt in extra_options.split(): + cmd.append('"{}"'.format(opt)) + return ''' +FROM python:3 +COPY . /app +WORKDIR /app +RUN pip install datasette +RUN datasette build {} --inspect-file inspect-data.json +EXPOSE 8001 +CMD [{}]'''.format( + ' '.join(files), + ', '.join(cmd) + ).strip() + + +@contextmanager +def temporary_docker_directory(files, name, metadata, extra_options, extra_metadata=None): + extra_metadata = extra_metadata or {} + tmp = tempfile.TemporaryDirectory() + # We create a datasette folder in there to get a nicer now deploy name + datasette_dir = os.path.join(tmp.name, name) + os.mkdir(datasette_dir) + saved_cwd = os.getcwd() + file_paths = [ + os.path.join(saved_cwd, name) + for name in files + ] + file_names = [os.path.split(f)[-1] for f in files] + if metadata: + metadata_content = json.load(metadata) + else: + metadata_content = {} + for key, value in extra_metadata.items(): + if value: + metadata_content[key] = value + try: + dockerfile = make_dockerfile(file_names, metadata_content and 'metadata.json', extra_options) + os.chdir(datasette_dir) + if metadata_content: + open('metadata.json', 'w').write(json.dumps(metadata_content, indent=2)) + open('Dockerfile', 'w').write(dockerfile) + for path, filename in zip(file_paths, file_names): + os.link(path, os.path.join(datasette_dir, filename)) + yield + finally: + tmp.cleanup() + os.chdir(saved_cwd) + + +def get_all_foreign_keys(conn): + tables = [r[0] for r in conn.execute('select name from sqlite_master where type="table"')] + table_to_foreign_keys = {} + for table in tables: + table_to_foreign_keys[table] = { + 'incoming': [], + 'outgoing': [], + } + for table in tables: + infos = conn.execute( + 'PRAGMA foreign_key_list([{}])'.format(table) + ).fetchall() + for info in infos: + if info is not None: + id, seq, table_name, from_, to_, on_update, on_delete, match = info + if table_name not in table_to_foreign_keys: + # Weird edge case where something refers to a table that does + # not actually exist + continue + table_to_foreign_keys[table_name]['incoming'].append({ + 'other_table': table, + 'column': to_, + 'other_column': from_ + }) + table_to_foreign_keys[table]['outgoing'].append({ + 'other_table': table_name, + 'column': from_, + 'other_column': to_ + }) + + return table_to_foreign_keys diff --git a/datasette/version.py b/datasette/version.py new file mode 100644 index 00000000..81e1cef6 --- /dev/null +++ b/datasette/version.py @@ -0,0 +1,2 @@ +__version_info__ = (0, 12) +__version__ = '.'.join(map(str, __version_info__)) diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 00000000..e35d8850 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_build diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..dbb89483 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXPROJ = Datasette +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/auto-build.sh b/docs/auto-build.sh new file mode 100644 index 00000000..a8ddf460 --- /dev/null +++ b/docs/auto-build.sh @@ -0,0 +1 @@ +sphinx-autobuild . _build/html diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 00000000..c661aa54 --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1,97 @@ +Changelog +========= + +0.12 (2017-11-16) +----------------- +- Added ``__version__``, now displayed as tooltip in page footer (`#108`_). +- Added initial docs, including a changelog (`#99`_). +- Turned on auto-escaping in Jinja. +- Added a UI for editing named parameters (`#96`_). + + You can now construct a custom SQL statement using SQLite named + parameters (e.g. ``:name``) and datasette will display form fields for + editing those parameters. `Here’s an example`_ which lets you see the + most popular names for dogs of different species registered through + various dog registration schemes in Australia. + +.. _Here’s an example: https://australian-dogs.now.sh/australian-dogs-3ba9628?sql=select+name%2C+count%28*%29+as+n+from+%28%0D%0A%0D%0Aselect+upper%28%22Animal+name%22%29+as+name+from+%5BAdelaide-City-Council-dog-registrations-2013%5D+where+Breed+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28Animal_Name%29+as+name+from+%5BAdelaide-City-Council-dog-registrations-2014%5D+where+Breed_Description+like+%3Abreed%0D%0A%0D%0Aunion+all+%0D%0A%0D%0Aselect+upper%28Animal_Name%29+as+name+from+%5BAdelaide-City-Council-dog-registrations-2015%5D+where+Breed_Description+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28%22AnimalName%22%29+as+name+from+%5BCity-of-Port-Adelaide-Enfield-Dog_Registrations_2016%5D+where+AnimalBreed+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28%22Animal+Name%22%29+as+name+from+%5BMitcham-dog-registrations-2015%5D+where+Breed+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28%22DOG_NAME%22%29+as+name+from+%5Bburnside-dog-registrations-2015%5D+where+DOG_BREED+like+%3Abreed%0D%0A%0D%0Aunion+all+%0D%0A%0D%0Aselect+upper%28%22Animal_Name%22%29+as+name+from+%5Bcity-of-playford-2015-dog-registration%5D+where+Breed_Description+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28%22Animal+Name%22%29+as+name+from+%5Bcity-of-prospect-dog-registration-details-2016%5D+where%22Breed+Description%22+like+%3Abreed%0D%0A%0D%0A%29+group+by+name+order+by+n+desc%3B&breed=pug + +- Pin to specific Jinja version. (`#100`_). +- Default to 127.0.0.1 not 0.0.0.0. (`#98`_). +- Added extra metadata options to publish and package commands. (`#92`_). + + You can now run these commands like so:: + + datasette now publish mydb.db \ + --title="My Title" \ + --source="Source" \ + --source_url="http://www.example.com/" \ + --license="CC0" \ + --license_url="https://creativecommons.org/publicdomain/zero/1.0/" + + This will write those values into the metadata.json that is packaged with the + app. If you also pass ``--metadata=metadata.json`` that file will be updated with the extra + values before being written into the Docker image. +- Added simple production-ready Dockerfile (`#94`_) [Andrew + Cutler] +- New ``?_sql_time_limit_ms=10`` argument to database and table page (`#95`_) +- SQL syntax highlighting with Codemirror (`#89`_) [Tom Dyson] + +.. _#89: https://github.com/simonw/datasette/issues/89 +.. _#92: https://github.com/simonw/datasette/issues/92 +.. _#94: https://github.com/simonw/datasette/issues/94 +.. _#95: https://github.com/simonw/datasette/issues/95 +.. _#96: https://github.com/simonw/datasette/issues/96 +.. _#98: https://github.com/simonw/datasette/issues/98 +.. _#99: https://github.com/simonw/datasette/issues/99 +.. _#100: https://github.com/simonw/datasette/issues/100 +.. _#108: https://github.com/simonw/datasette/issues/108 + +0.11 (2017-11-14) +----------------- +- Added ``datasette publish now --force`` option. + + This calls ``now`` with ``--force`` - useful as it means you get a fresh copy of datasette even if Now has already cached that docker layer. +- Enable ``--cors`` by default when running in a container. + +0.10 (2017-11-14) +----------------- +- Fixed `#83`_ - 500 error on individual row pages. +- Stop using sqlite WITH RECURSIVE in our tests. + + The version of Python 3 running in Travis CI doesn't support this. + +.. _#83: https://github.com/simonw/datasette/issues/83 + +0.9 (2017-11-13) +---------------- +- Added ``--sql_time_limit_ms`` and ``--extra-options``. + + The serve command now accepts ``--sql_time_limit_ms`` for customizing the SQL time + limit. + + The publish and package commands now accept ``--extra-options`` which can be used + to specify additional options to be passed to the datasite serve command when + it executes inside the resulting Docker containers. + +0.8 (2017-11-13) +---------------- +- V0.8 - added PyPI metadata, ready to ship. +- Implemented offset/limit pagination for views (`#70`_). +- Improved pagination. (`#78`_) +- Limit on max rows returned, controlled by ``--max_returned_rows`` option. (`#69`_) + + If someone executes 'select * from table' against a table with a million rows + in it, we could run into problems: just serializing that much data as JSON is + likely to lock up the server. + + Solution: we now have a hard limit on the maximum number of rows that can be + returned by a query. If that limit is exceeded, the server will return a + ``"truncated": true`` field in the JSON. + + This limit can be optionally controlled by the new ``--max_returned_rows`` + option. Setting that option to 0 disables the limit entirely. + +.. _#70: https://github.com/simonw/datasette/issues/70 +.. _#78: https://github.com/simonw/datasette/issues/78 +.. _#69: https://github.com/simonw/datasette/issues/69 diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..60b72a11 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Datasette documentation build configuration file, created by +# sphinx-quickstart on Thu Nov 16 06:50:13 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'Datasette' +copyright = '2017, Simon Willison' +author = 'Simon Willison' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '' +# The full version, including alpha/beta/rc tags. +release = '' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# This is required for the alabaster theme +# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars +html_sidebars = { + '**': [ + 'relations.html', # needs 'show_related': True theme option to display + 'searchbox.html', + ] +} + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Datasettedoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'Datasette.tex', 'Datasette Documentation', + 'Simon Willison', 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'datasette', 'Datasette Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'Datasette', 'Datasette Documentation', + author, 'Datasette', 'One line description of project.', + 'Miscellaneous'), +] + + + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..c459a58f --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,134 @@ +Datasette +========= + +*An instant JSON API for your SQLite databases* + +Datasette provides an instant, read-only JSON API for any SQLite +database. It also provides tools for packaging the database up as a +Docker container and deploying that container to hosting providers such +as `Zeit Now`_. + +Some examples: https://github.com/simonw/datasette/wiki/Datasettes + +Contents +-------- + +.. toctree:: + :maxdepth: 2 + + changelog + +Getting started +--------------- + +:: + + pip3 install datasette + +Datasette requires Python 3.5 or higher. + +Basic usage +----------- + +:: + + datasette serve path/to/database.db + +This will start a web server on port 8001 - visit http://localhost:8001/ +to access the web interface. + +``serve`` is the default subcommand, you can omit it if you like. + +Use Chrome on OS X? You can run datasette against your browser history +like so: + +:: + + datasette ~/Library/Application\ Support/Google/Chrome/Default/History + +Now visiting http://localhost:8001/History/downloads will show you a web +interface to browse your downloads data: + +.. figure:: https://static.simonwillison.net/static/2017/datasette-downloads.png + :alt: Downloads table rendered by datasette + +http://localhost:8001/History/downloads.json will return that data as +JSON: + +:: + + { + "database": "History", + "columns": [ + "id", + "current_path", + "target_path", + "start_time", + "received_bytes", + "total_bytes", + ... + ], + "table_rows": 576, + "rows": [ + [ + 1, + "/Users/simonw/Downloads/DropboxInstaller.dmg", + "/Users/simonw/Downloads/DropboxInstaller.dmg", + 13097290269022132, + 626688, + 0, + ... + ] + ] + } + +http://localhost:8001/History/downloads.jsono will return that data as +JSON in a more convenient but less efficient format: + +:: + + { + ... + "rows": [ + { + "start_time": 13097290269022132, + "interrupt_reason": 0, + "hash": "", + "id": 1, + "site_url": "", + "referrer": "https://www.dropbox.com/downloading?src=index", + ... + } + ] + } + +datasette serve options +----------------------- + +:: + + $ datasette serve --help + Usage: datasette serve [OPTIONS] [FILES]... + + Serve up specified SQLite database files with a web UI + + Options: + -h, --host TEXT host for server, defaults to 127.0.0.1 + -p, --port INTEGER port for server, defaults to 8001 + --debug Enable debug mode - useful for development + --reload Automatically reload if code change detected - + useful for development + --cors Enable CORS by serving Access-Control-Allow- + Origin: * + --page_size INTEGER Page size - default is 100 + --max_returned_rows INTEGER Max allowed rows to return at once - default is + 1000. Set to 0 to disable check entirely. + --sql_time_limit_ms INTEGER Max time allowed for SQL queries in ms + --load-extension TEXT Path to a SQLite extension to load + --inspect-file TEXT Path to JSON file created using "datasette + build" + -m, --metadata FILENAME Path to JSON file containing license/source + metadata + --help Show this message and exit. + +.. _Zeit Now: https://zeit.co/now diff --git a/setup.py b/setup.py index 1607c038..1fd3ab6a 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,11 @@ from setuptools import setup, find_packages +from datasette import __version__ setup( name='datasette', description='An instant JSON API for your SQLite databases', author='Simon Willison', - version='0.11', + version=__version__, license='Apache License, Version 2.0', url='https://github.com/simonw/datasette', packages=find_packages(), @@ -13,7 +14,8 @@ setup( install_requires=[ 'click==6.7', 'click-default-group==1.2', - 'sanic==0.6.0', + 'Sanic==0.6.0', + 'Jinja2==2.10', 'sanic-jinja2==0.5.5', 'hupper==1.0', ], diff --git a/tests/test_app.py b/tests/test_app.py index f25a9f0d..17d1fc30 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -26,7 +26,7 @@ def app_client(): def test_homepage(app_client): - _, response = app_client.get('/') + response = app_client.get('/', gather_request=False) assert response.status == 200 assert 'test_tables' in response.text @@ -40,12 +40,12 @@ def test_homepage(app_client): def test_database_page(app_client): - _, response = app_client.get('/test_tables', allow_redirects=False) + response = app_client.get('/test_tables', allow_redirects=False, gather_request=False) assert response.status == 302 - _, response = app_client.get('/test_tables') + response = app_client.get('/test_tables', gather_request=False) assert 'test_tables' in response.text # Test JSON list of tables - _, response = app_client.get('/test_tables.json') + response = app_client.get('/test_tables.json', gather_request=False) data = response.json assert 'test_tables' == data['database'] assert [{ @@ -76,8 +76,9 @@ def test_database_page(app_client): def test_custom_sql(app_client): - _, response = app_client.get( - '/test_tables.jsono?sql=select+content+from+simple_primary_key' + response = app_client.get( + '/test_tables.jsono?sql=select+content+from+simple_primary_key', + gather_request=False ) data = response.json assert { @@ -94,33 +95,38 @@ def test_custom_sql(app_client): def test_sql_time_limit(app_client): - _, response = app_client.get( - '/test_tables.jsono?sql=select+sleep(0.5)' + response = app_client.get( + '/test_tables.jsono?sql=select+sleep(0.5)', + gather_request=False ) assert 400 == response.status assert 'interrupted' == response.json['error'] def test_custom_sql_time_limit(app_client): - _, response = app_client.get( - '/test_tables.jsono?sql=select+sleep(0.01)' + response = app_client.get( + '/test_tables.jsono?sql=select+sleep(0.01)', + gather_request=False ) assert 200 == response.status - _, response = app_client.get( - '/test_tables.jsono?sql=select+sleep(0.01)&_sql_time_limit_ms=5' + response = app_client.get( + '/test_tables.jsono?sql=select+sleep(0.01)&_sql_time_limit_ms=5', + gather_request=False ) assert 400 == response.status assert 'interrupted' == response.json['error'] def test_invalid_custom_sql(app_client): - _, response = app_client.get( - '/test_tables?sql=.schema' + response = app_client.get( + '/test_tables?sql=.schema', + gather_request=False ) assert response.status == 400 assert 'Statement must begin with SELECT' in response.text - _, response = app_client.get( - '/test_tables.json?sql=.schema' + response = app_client.get( + '/test_tables.json?sql=.schema', + gather_request=False ) assert response.status == 400 assert response.json['ok'] is False @@ -128,9 +134,9 @@ def test_invalid_custom_sql(app_client): def test_table_page(app_client): - _, response = app_client.get('/test_tables/simple_primary_key') + response = app_client.get('/test_tables/simple_primary_key', gather_request=False) assert response.status == 200 - _, response = app_client.get('/test_tables/simple_primary_key.jsono') + response = app_client.get('/test_tables/simple_primary_key.jsono', gather_request=False) assert response.status == 200 data = response.json assert data['query']['sql'] == 'select * from simple_primary_key order by pk limit 51' @@ -145,9 +151,9 @@ def test_table_page(app_client): def test_table_with_slashes_in_name(app_client): - _, response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv') + response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv', gather_request=False) assert response.status == 200 - _, response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv.jsono') + response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv.jsono', gather_request=False) assert response.status == 200 data = response.json assert data['rows'] == [{ @@ -165,7 +171,7 @@ def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pag fetched = [] count = 0 while path: - _, response = app_client.get(path) + response = app_client.get(path, gather_request=False) count += 1 fetched.extend(response.json['rows']) path = response.json['next_url'] @@ -178,8 +184,9 @@ def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pag def test_max_returned_rows(app_client): - _, response = app_client.get( - '/test_tables.jsono?sql=select+content+from+no_primary_key' + response = app_client.get( + '/test_tables.jsono?sql=select+content+from+no_primary_key', + gather_request=False ) data = response.json assert { @@ -191,9 +198,9 @@ def test_max_returned_rows(app_client): def test_view(app_client): - _, response = app_client.get('/test_tables/simple_view') + response = app_client.get('/test_tables/simple_view', gather_request=False) assert response.status == 200 - _, response = app_client.get('/test_tables/simple_view.jsono') + response = app_client.get('/test_tables/simple_view.jsono', gather_request=False) assert response.status == 200 data = response.json assert data['rows'] == [{ @@ -206,12 +213,16 @@ def test_view(app_client): def test_row(app_client): - _, response = app_client.get('/test_tables/simple_primary_key/1', allow_redirects=False) + response = app_client.get( + '/test_tables/simple_primary_key/1', + allow_redirects=False, + gather_request=False + ) assert response.status == 302 assert response.headers['Location'].endswith('/1') - _, response = app_client.get('/test_tables/simple_primary_key/1') + response = app_client.get('/test_tables/simple_primary_key/1', gather_request=False) assert response.status == 200 - _, response = app_client.get('/test_tables/simple_primary_key/1.jsono') + response = app_client.get('/test_tables/simple_primary_key/1.jsono', gather_request=False) assert response.status == 200 assert [{'pk': '1', 'content': 'hello'}] == response.json['rows'] diff --git a/tests/test_inspect.py b/tests/test_inspect.py new file mode 100644 index 00000000..c138eaf9 --- /dev/null +++ b/tests/test_inspect.py @@ -0,0 +1,76 @@ +from datasette.app import Datasette +import os +import pytest +import sqlite3 +import tempfile + + +TABLES = ''' +CREATE TABLE "election_results" ( + "county" INTEGER, + "party" INTEGER, + "office" INTEGER, + "votes" INTEGER, + FOREIGN KEY (county) REFERENCES county(id), + FOREIGN KEY (party) REFERENCES party(id), + FOREIGN KEY (office) REFERENCES office(id) + ); + +CREATE TABLE "county" ( + "id" INTEGER PRIMARY KEY , + "name" TEXT +); + +CREATE TABLE "party" ( + "id" INTEGER PRIMARY KEY , + "name" TEXT +); + +CREATE TABLE "office" ( + "id" INTEGER PRIMARY KEY , + "name" TEXT +); +''' + + +@pytest.fixture(scope='module') +def ds_instance(): + with tempfile.TemporaryDirectory() as tmpdir: + filepath = os.path.join(tmpdir, 'test_tables.db') + conn = sqlite3.connect(filepath) + conn.executescript(TABLES) + yield Datasette([filepath]) + + +def test_inspect(ds_instance): + info = ds_instance.inspect() + tables = info['test_tables']['tables'] + for table_name in ('county', 'party', 'office'): + assert 0 == tables[table_name]['count'] + foreign_keys = tables[table_name]['foreign_keys'] + assert [] == foreign_keys['outgoing'] + assert [{ + 'column': 'id', + 'other_column': table_name, + 'other_table': 'election_results' + }] == foreign_keys['incoming'] + + election_results = tables['election_results'] + assert 0 == election_results['count'] + assert sorted([{ + 'column': 'county', + 'other_column': 'id', + 'other_table': 'county' + }, { + 'column': 'party', + 'other_column': 'id', + 'other_table': 'party' + }, { + 'column': 'office', + 'other_column': 'id', + 'other_table': 'office' + }], key=lambda d: d['column']) == sorted( + election_results['foreign_keys']['outgoing'], + key=lambda d: d['column'] + ) + assert [] == election_results['foreign_keys']['incoming'] diff --git a/tests/test_utils.py b/tests/test_utils.py index a8b0e379..168b9253 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -88,6 +88,15 @@ def test_custom_json_encoder(obj, expected): ['"foo" like :p0', '"zax" glob :p1'], ['2%2', '3*'] ), + ( + { + 'foo__isnull': '1', + 'baz__isnull': '1', + 'bar__gt': '10' + }, + ['"bar" > :p0', '"baz" is null', '"foo" is null'], + [10] + ), ]) def test_build_where(args, expected_where, expected_params): sql_bits, actual_params = utils.build_where_clauses(args)
{% if use_rowid %}rowid{% else %}Link{% endif %}{{ column }}
{{ row_link(row) }}{{ td or " " }}{{ cell.value }}