diff --git a/datasette/app.py b/datasette/app.py index 06543761..e36e2e62 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -154,8 +154,10 @@ class BaseView(RenderMixin): return name, expected, should_redirect return name, expected, None - async def execute(self, db_name, sql, params=None, truncate=False, custom_time_limit=None): + async def execute(self, db_name, sql, params=None, truncate=False, custom_time_limit=None, page_size=None): """Executes sql against db_name in a thread""" + page_size = page_size or self.page_size + def sql_operation_in_thread(): conn = getattr(connections, db_name, None) if not conn: @@ -177,7 +179,7 @@ class BaseView(RenderMixin): cursor = conn.cursor() cursor.execute(sql, params or {}) max_returned_rows = self.max_returned_rows - if max_returned_rows == self.page_size: + if max_returned_rows == page_size: max_returned_rows += 1 if max_returned_rows and truncate: rows = cursor.fetchmany(max_returned_rows + 1) @@ -768,18 +770,6 @@ class TableView(RowTableShared): ) if where_clauses else '', ) - # _group_count=col1&_group_count=col2 - group_count = special_args_lists.get('_group_count') or [] - if group_count: - sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format( - group_cols=', '.join('"{}"'.format(group_count_col) for group_count_col in group_count), - table_name=escape_sqlite(table), - where=( - 'where {} '.format(' and '.join(where_clauses)) - ) if where_clauses else '', - ) - return await self.custom_sql(request, name, hash, sql, editable=True) - _next = special_args.get('_next') offset = '' if _next: @@ -867,16 +857,37 @@ class TableView(RowTableShared): ) return await self.custom_sql(request, name, hash, sql, editable=True) + extra_args = {} + # Handle ?_page_size=500 + page_size = request.raw_args.get('_size') + if page_size: + try: + page_size = int(page_size) + if page_size < 0: + raise ValueError + except ValueError: + raise DatasetteError( + '_size must be a positive integer', + status=400 + ) + if page_size > self.max_returned_rows: + raise DatasetteError( + '_size must be <= {}'.format(self.max_returned_rows), + status=400 + ) + extra_args['page_size'] = page_size + else: + page_size = self.page_size + sql = 'select {select} from {table_name} {where}{order_by}limit {limit}{offset}'.format( select=select, table_name=escape_sqlite(table), where=where_clause, order_by=order_by, - limit=self.page_size + 1, + limit=page_size + 1, offset=offset, ) - extra_args = {} if request.raw_args.get('_sql_time_limit_ms'): extra_args['custom_time_limit'] = int(request.raw_args['_sql_time_limit_ms']) @@ -894,9 +905,9 @@ class TableView(RowTableShared): # Pagination next link next_value = None next_url = None - if len(rows) > self.page_size: + if len(rows) > page_size and page_size > 0: if is_view: - next_value = int(_next or 0) + self.page_size + next_value = int(_next or 0) + page_size else: next_value = path_from_row_pks(rows[-2], pks, use_rowid) # If there's a sort or sort_desc, add that value as a prefix @@ -921,7 +932,7 @@ class TableView(RowTableShared): next_url = urllib.parse.urljoin(request.url, path_with_added_args( request, added_args )) - rows = rows[:self.page_size] + rows = rows[:page_size] # Number of filtered rows in whole set: filtered_table_rows_count = None @@ -983,7 +994,7 @@ class TableView(RowTableShared): 'view_definition': view_definition, 'table_definition': table_definition, 'human_description_en': human_description_en, - 'rows': rows[:self.page_size], + 'rows': rows[:page_size], 'truncated': truncated, 'table_rows_count': table_rows_count, 'filtered_table_rows_count': filtered_table_rows_count, diff --git a/docs/json_api.rst b/docs/json_api.rst index 6e18f01a..e750dbbf 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -103,3 +103,41 @@ this format. The ``object`` keys are always strings. If your table has a compound primary key, the ``object`` keys will be a comma-separated string. + +Special table arguments +----------------------- + +The Datasette table view takes a number of special querystring arguments: + +``?_size=1000`` + Sets a custom page size. This cannot exceed the ``max_returned_rows`` option + passed to ``datasette serve``. + +``?_sort=COLUMN`` + Sorts the results by the specified column. + +``?_sort_desc=COLUMN`` + Sorts the results by the specified column in descending order. + +``?_search=keywords`` + For SQLite tables that have been configured for + `full-text search `_ executes a search + with the provided keywords. + +``?_group_count=COLUMN`` + Executes a SQL query that returns a count of the number of rows matching + each unique value in that column, with the most common ordered first. + +``?_group_count=COLUMN1&_group_count=column2`` + You can pass multiple ``_group_count`` columns to return counts against + unique combinations of those columns. + +``?_sql_time_limit_ms=MS`` + Sets a custom time limit for the query in ms. You can use this for optimistic + queries where you would like Datasette to give up if the query takes too + long, for example if you want to implement autocomplete search but only if + it can be executed in less than 10ms. + +``?_next=TOKEN`` + Pagination by continuation token - pass the token that was returned in the + ``"next"`` property by the previous page. diff --git a/tests/test_api.py b/tests/test_api.py index 0a804645..5bd8c15b 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -404,6 +404,8 @@ def test_table_with_reserved_word_name(app_client): @pytest.mark.parametrize('path,expected_rows,expected_pages', [ ('/test_tables/no_primary_key.json', 201, 5), ('/test_tables/paginated_view.json', 201, 5), + ('/test_tables/no_primary_key.json?_size=25', 201, 9), + ('/test_tables/paginated_view.json?_size=25', 201, 9), ('/test_tables/123_starts_with_digits.json', 0, 1), ]) def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pages): @@ -415,13 +417,36 @@ def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pag fetched.extend(response.json['rows']) path = response.json['next_url'] if path: - assert response.json['next'] and path.endswith(response.json['next']) + assert response.json['next'] + assert '_next={}'.format(response.json['next']) in path assert count < 10, 'Possible infinite loop detected' assert expected_rows == len(fetched) assert expected_pages == count +@pytest.mark.parametrize('path,expected_error', [ + ('/test_tables/no_primary_key.json?_size=-4', '_size must be a positive integer'), + ('/test_tables/no_primary_key.json?_size=dog', '_size must be a positive integer'), + ('/test_tables/no_primary_key.json?_size=1001', '_size must be <= 100'), +]) +def test_validate_page_size(app_client, path, expected_error): + response = app_client.get(path, gather_request=False) + assert expected_error == response.json['error'] + assert 400 == response.status + + +def test_page_size_zero(app_client): + "For _size=0 we return the counts, empty rows and no continuation token" + response = app_client.get('/test_tables/no_primary_key.json?_size=0', gather_request=False) + assert 200 == response.status + assert [] == response.json['rows'] + assert 201 == response.json['table_rows_count'] + assert 201 == response.json['filtered_table_rows_count'] + assert None is response.json['next'] + assert None is response.json['next_url'] + + def test_paginate_compound_keys(app_client_longer_time_limit): fetched = [] path = '/test_tables/compound_three_primary_keys.json?_shape=objects'