From 272982e8a6f45700ff93c3917b4688a86de0e672 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 7 Dec 2022 17:12:15 -0800 Subject: [PATCH] /db/table/-/upsert API Close #1878 Also made a few tweaks to how _r works in tokens and actors, refs #1855 - I needed that mechanism for the tests. --- datasette/app.py | 6 +- datasette/default_permissions.py | 4 +- datasette/views/special.py | 44 ++++++--- datasette/views/table.py | 117 ++++++++++++++++++---- docs/json_api.rst | 112 ++++++++++++++++++++- tests/test_api_write.py | 162 +++++++++++++++++++++++++++++-- 6 files changed, 401 insertions(+), 44 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 125b4969..282c0984 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -40,7 +40,7 @@ from .views.special import ( PermissionsDebugView, MessagesDebugView, ) -from .views.table import TableView, TableInsertView, TableDropView +from .views.table import TableView, TableInsertView, TableUpsertView, TableDropView from .views.row import RowView, RowDeleteView, RowUpdateView from .renderer import json_renderer from .url_builder import Urls @@ -1292,6 +1292,10 @@ class Datasette: TableInsertView.as_view(self), r"/(?P[^\/\.]+)/(?P[^\/\.]+)/-/insert$", ) + add_route( + TableUpsertView.as_view(self), + r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/upsert$", + ) add_route( TableDropView.as_view(self), r"/(?P[^\/\.]+)/(?P
[^\/\.]+)/-/drop$", diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 2aaf72d6..ab2f6312 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -85,7 +85,7 @@ def permission_allowed_actor_restrictions(actor, action, resource): if action_initials in database_allowed: return None # Or the current table? That's any time the resource is (database, table) - if not isinstance(resource, str) and len(resource) == 2: + if resource is not None and not isinstance(resource, str) and len(resource) == 2: database, table = resource table_allowed = _r.get("t", {}).get(database, {}).get(table) # TODO: What should this do for canned queries? @@ -138,6 +138,8 @@ def actor_from_request(datasette, request): # Expired return None actor = {"id": decoded["a"], "token": "dstok"} + if "_r" in decoded: + actor["_r"] = decoded["_r"] if duration: actor["token_expires"] = created + duration return actor diff --git a/datasette/views/special.py b/datasette/views/special.py index 1f84b094..1b4a9d3c 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -316,21 +316,37 @@ class ApiExplorerView(BaseView): request.actor, "insert-row", (name, table) ): pks = await db.primary_keys(table) - table_links.append( - { - "path": self.ds.urls.table(name, table) + "/-/insert", - "method": "POST", - "label": "Insert rows into {}".format(table), - "json": { - "rows": [ - { - column: None - for column in await db.table_columns(table) - if column not in pks - } - ] + table_links.extend( + [ + { + "path": self.ds.urls.table(name, table) + "/-/insert", + "method": "POST", + "label": "Insert rows into {}".format(table), + "json": { + "rows": [ + { + column: None + for column in await db.table_columns(table) + if column not in pks + } + ] + }, }, - } + { + "path": self.ds.urls.table(name, table) + "/-/upsert", + "method": "POST", + "label": "Upsert rows into {}".format(table), + "json": { + "rows": [ + { + column: None + for column in await db.table_columns(table) + if column not in pks + } + ] + }, + }, + ] ) if await self.ds.permission_allowed( request.actor, "drop-table", (name, table) diff --git a/datasette/views/table.py b/datasette/views/table.py index 7ba78c11..9e8b5254 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1074,9 +1074,18 @@ class TableInsertView(BaseView): def __init__(self, datasette): self.ds = datasette - async def _validate_data(self, request, db, table_name): + async def _validate_data(self, request, db, table_name, pks, upsert): errors = [] + pks_list = [] + if isinstance(pks, str): + pks_list = [pks] + else: + pks_list = list(pks) + + if not pks_list: + pks_list = ["rowid"] + def _errors(errors): return None, errors, {} @@ -1134,7 +1143,18 @@ class TableInsertView(BaseView): # Validate columns of each row columns = set(await db.table_columns(table_name)) + columns.update(pks_list) + for i, row in enumerate(rows): + if upsert: + # It MUST have the primary key + missing_pks = [pk for pk in pks_list if pk not in row] + if missing_pks: + errors.append( + 'Row {} is missing primary key column(s): "{}"'.format( + i, '", "'.join(missing_pks) + ) + ) invalid_columns = set(row.keys()) - columns if invalid_columns: errors.append( @@ -1146,7 +1166,7 @@ class TableInsertView(BaseView): return _errors(errors) return rows, errors, extras - async def post(self, request): + async def post(self, request, upsert=False): try: resolved = await self.ds.resolve_table(request) except NotFound as e: @@ -1159,28 +1179,66 @@ class TableInsertView(BaseView): db = self.ds.get_database(database_name) if not await db.table_exists(table_name): return _error(["Table not found: {}".format(table_name)], 404) - # Must have insert-row permission - if not await self.ds.permission_allowed( - request.actor, "insert-row", resource=(database_name, table_name) - ): - return _error(["Permission denied"], 403) - rows, errors, extras = await self._validate_data(request, db, table_name) + + if upsert: + # Must have insert-row AND upsert-row permissions + if not ( + await self.ds.permission_allowed( + request.actor, "insert-row", database_name, table_name + ) + and await self.ds.permission_allowed( + request.actor, "update-row", database_name, table_name + ) + ): + return _error( + ["Permission denied: need both insert-row and update-row"], 403 + ) + else: + # Must have insert-row permission + if not await self.ds.permission_allowed( + request.actor, "insert-row", resource=(database_name, table_name) + ): + return _error(["Permission denied"], 403) + + if not db.is_mutable: + return _error(["Database is immutable"], 403) + + pks = await db.primary_keys(table_name) + + rows, errors, extras = await self._validate_data( + request, db, table_name, pks, upsert + ) if errors: return _error(errors, 400) + # No that we've passed pks to _validate_data it's safe to + # fix the rowids case: + if not pks: + pks = ["rowid"] + ignore = extras.get("ignore") replace = extras.get("replace") + if upsert and (ignore or replace): + return _error(["Upsert does not support ignore or replace"], 400) + should_return = bool(extras.get("return", False)) - # Insert rows - def insert_rows(conn): + row_pk_values_for_later = [] + if should_return and upsert: + row_pk_values_for_later = [tuple(row[pk] for pk in pks) for row in rows] + + def insert_or_upsert_rows(conn): table = sqlite_utils.Database(conn)[table_name] - if should_return: + kwargs = {} + if upsert: + kwargs["pk"] = pks[0] if len(pks) == 1 else pks + else: + kwargs = {"ignore": ignore, "replace": replace} + if should_return and not upsert: rowids = [] + method = table.upsert if upsert else table.insert for row in rows: - rowids.append( - table.insert(row, ignore=ignore, replace=replace).last_rowid - ) + rowids.append(method(row, **kwargs).last_rowid) return list( table.rows_where( "rowid in ({})".format(",".join("?" for _ in rowids)), @@ -1188,16 +1246,39 @@ class TableInsertView(BaseView): ) ) else: - table.insert_all(rows, ignore=ignore, replace=replace) + method_all = table.upsert_all if upsert else table.insert_all + method_all(rows, **kwargs) try: - rows = await db.execute_write_fn(insert_rows) + rows = await db.execute_write_fn(insert_or_upsert_rows) except Exception as e: return _error([str(e)]) result = {"ok": True} if should_return: - result["rows"] = rows - return Response.json(result, status=201) + if upsert: + # Fetch based on initial input IDs + where_clause = " OR ".join( + ["({})".format(" AND ".join("{} = ?".format(pk) for pk in pks))] + * len(row_pk_values_for_later) + ) + args = list(itertools.chain.from_iterable(row_pk_values_for_later)) + fetched_rows = await db.execute( + "select {}* from [{}] where {}".format( + "rowid, " if pks == ["rowid"] else "", table_name, where_clause + ), + args, + ) + result["rows"] = [dict(r) for r in fetched_rows.rows] + else: + result["rows"] = rows + return Response.json(result, status=200 if upsert else 201) + + +class TableUpsertView(TableInsertView): + name = "table-upsert" + + async def post(self, request): + return await super().post(request, upsert=True) class TableDropView(BaseView): diff --git a/docs/json_api.rst b/docs/json_api.rst index 46973616..682d2a39 100644 --- a/docs/json_api.rst +++ b/docs/json_api.rst @@ -527,7 +527,7 @@ To insert multiple rows at a time, use the same API method but send a list of di ] } -If successful, this will return a ``201`` status code and an empty ``{}`` response body. +If successful, this will return a ``201`` status code and a ``{"ok": true}`` response body. To return the newly inserted rows, add the ``"return": true`` key to the request body: @@ -582,6 +582,116 @@ Pass ``"ignore": true`` to ignore these errors and insert the other rows: Or you can pass ``"replace": true`` to replace any rows with conflicting primary keys with the new values. +.. _TableUpsertView: + +Upserting rows +~~~~~~~~~~~~~~ + +An upsert is an insert or update operation. If a row with a matching primary key already exists it will be updated - otherwise a new row will be inserted. + +The upsert API is mostly the same shape as the :ref:`insert API `. It requires both the :ref:`permissions_insert_row` and :ref:`permissions_update_row` permissions. + +:: + + POST //
/-/upsert + Content-Type: application/json + Authorization: Bearer dstok_ + +.. code-block:: json + + { + "rows": [ + { + "id": 1, + "title": "Updated title for 1", + "description": "Updated description for 1" + }, + { + "id": 2, + "description": "Updated description for 2", + }, + { + "id": 3, + "title": "Item 3", + "description": "Description for 3" + } + ] + } + +Imagine a table with a primary key of ``id`` and which already has rows with ``id`` values of ``1`` and ``2``. + +The above example will: + +- Update the row with ``id`` of ``1`` to set both ``title`` and ``description`` to the new values +- Update the row with ``id`` of ``2`` to set ``title`` to the new value - ``description`` will be left unchanged +- Insert a new row with ``id`` of ``3`` and both ``title`` and ``description`` set to the new values + +Similar to ``/-/insert``, a ``row`` key with an object can be used instead of a ``rows`` array to upsert a single row. + +If successful, this will return a ``200`` status code and a ``{"ok": true}`` response body. + +Add ``"return": true`` to the request body to return full copies of the affected rows after they have been inserted or updated: + +.. code-block:: json + + { + "rows": [ + { + "id": 1, + "title": "Updated title for 1", + "description": "Updated description for 1" + }, + { + "id": 2, + "description": "Updated description for 2", + }, + { + "id": 3, + "title": "Item 3", + "description": "Description for 3" + } + ], + "return": true + } + +This will return the following: + +.. code-block:: json + + { + "ok": true, + "rows": [ + { + "id": 1, + "title": "Updated title for 1", + "description": "Updated description for 1" + }, + { + "id": 2, + "title": "Item 2", + "description": "Updated description for 2" + }, + { + "id": 3, + "title": "Item 3", + "description": "Description for 3" + } + ] + } + +When using upsert you must provide the primary key column (or columns if the table has a compound primary key) for every row, or you will get a ``400`` error: + +.. code-block:: json + + { + "ok": false, + "errors": [ + "Row 0 is missing primary key column(s): \"id\"" + ] + } + +If your table does not have an explicit primary key you should pass the SQLite ``rowid`` key instead. + .. _RowUpdateView: Updating a row diff --git a/tests/test_api_write.py b/tests/test_api_write.py index cfcf9db0..813097b2 100644 --- a/tests/test_api_write.py +++ b/tests/test_api_write.py @@ -21,16 +21,15 @@ def ds_write(tmp_path_factory): db.close() -def write_token(ds, actor_id="root"): - return "dstok_{}".format( - ds.sign( - {"a": actor_id, "token": "dstok", "t": int(time.time())}, namespace="token" - ) - ) +def write_token(ds, actor_id="root", permissions=None): + to_sign = {"a": actor_id, "token": "dstok", "t": int(time.time())} + if permissions: + to_sign["_r"] = {"a": permissions} + return "dstok_{}".format(ds.sign(to_sign, namespace="token")) @pytest.mark.asyncio -async def test_write_row(ds_write): +async def test_insert_row(ds_write): token = write_token(ds_write) response = await ds_write.client.post( "/data/docs/-/insert", @@ -42,6 +41,7 @@ async def test_write_row(ds_write): ) expected_row = {"id": 1, "title": "Test", "score": 1.2, "age": 5} assert response.status_code == 201 + assert response.json()["ok"] is True assert response.json()["rows"] == [expected_row] rows = (await ds_write.get_database("data").execute("select * from docs")).rows assert dict(rows[0]) == expected_row @@ -49,7 +49,7 @@ async def test_write_row(ds_write): @pytest.mark.asyncio @pytest.mark.parametrize("return_rows", (True, False)) -async def test_write_rows(ds_write, return_rows): +async def test_insert_rows(ds_write, return_rows): token = write_token(ds_write) data = { "rows": [ @@ -194,6 +194,13 @@ async def test_write_rows(ds_write, return_rows): 400, ['Invalid parameter: "one", "two"'], ), + ( + "/immutable/docs/-/insert", + {"rows": [{"title": "Test"}]}, + None, + 403, + ["Database is immutable"], + ), # Validate columns of each row ( "/data/docs/-/insert", @@ -205,12 +212,62 @@ async def test_write_rows(ds_write, return_rows): "Row 1 has invalid columns: bad, worse", ], ), + ## UPSERT ERRORS: + ( + "/immutable/docs/-/upsert", + {"rows": [{"title": "Test"}]}, + None, + 403, + ["Database is immutable"], + ), + ( + "/data/badtable/-/upsert", + {"rows": [{"title": "Test"}]}, + None, + 404, + ["Table not found: badtable"], + ), + # missing primary key + ( + "/data/docs/-/upsert", + {"rows": [{"title": "Missing PK"}]}, + None, + 400, + ['Row 0 is missing primary key column(s): "id"'], + ), + # Upsert does not support ignore or replace + ( + "/data/docs/-/upsert", + {"rows": [{"id": 1, "title": "Bad"}], "ignore": True}, + None, + 400, + ["Upsert does not support ignore or replace"], + ), + # Upsert permissions + ( + "/data/docs/-/upsert", + {"rows": [{"id": 1, "title": "Disallowed"}]}, + "insert-but-not-update", + 403, + ["Permission denied: need both insert-row and update-row"], + ), + ( + "/data/docs/-/upsert", + {"rows": [{"id": 1, "title": "Disallowed"}]}, + "update-but-not-insert", + 403, + ["Permission denied: need both insert-row and update-row"], + ), ), ) -async def test_write_row_errors( +async def test_insert_or_upsert_row_errors( ds_write, path, input, special_case, expected_status, expected_errors ): token = write_token(ds_write) + if special_case == "insert-but-not-update": + token = write_token(ds_write, permissions=["ir", "vi"]) + if special_case == "update-but-not-insert": + token = write_token(ds_write, permissions=["ur", "vi"]) if special_case == "duplicate_id": await ds_write.get_database("data").execute_write( "insert into docs (id) values (1)" @@ -226,6 +283,12 @@ async def test_write_row_errors( else "application/json", }, ) + + actor_response = ( + await ds_write.client.get("/-/actor.json", headers=kwargs["headers"]) + ).json() + print(actor_response) + if special_case == "invalid_json": del kwargs["json"] kwargs["content"] = "{bad json" @@ -302,6 +365,87 @@ async def test_insert_ignore_replace( assert response.json()["rows"] == expected_rows +@pytest.mark.asyncio +@pytest.mark.parametrize( + "initial,input,expected_rows", + ( + ( + # Simple primary key update + {"rows": [{"id": 1, "title": "One"}], "pk": "id"}, + {"rows": [{"id": 1, "title": "Two"}]}, + [ + {"id": 1, "title": "Two"}, + ], + ), + ( + # Multiple rows update one of them + { + "rows": [{"id": 1, "title": "One"}, {"id": 2, "title": "Two"}], + "pk": "id", + }, + {"rows": [{"id": 1, "title": "Three"}]}, + [ + {"id": 1, "title": "Three"}, + {"id": 2, "title": "Two"}, + ], + ), + ( + # rowid update + {"rows": [{"title": "One"}]}, + {"rows": [{"rowid": 1, "title": "Two"}]}, + [ + {"rowid": 1, "title": "Two"}, + ], + ), + ( + # Compound primary key update + {"rows": [{"id": 1, "title": "One", "score": 1}], "pks": ["id", "score"]}, + {"rows": [{"id": 1, "title": "Two", "score": 1}]}, + [ + {"id": 1, "title": "Two", "score": 1}, + ], + ), + ), +) +@pytest.mark.parametrize("should_return", (False, True)) +async def test_upsert(ds_write, initial, input, expected_rows, should_return): + token = write_token(ds_write) + # Insert initial data + initial["table"] = "upsert_test" + create_response = await ds_write.client.post( + "/data/-/create", + json=initial, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + assert create_response.status_code == 201 + if should_return: + input["return"] = True + response = await ds_write.client.post( + "/data/upsert_test/-/upsert", + json=input, + headers={ + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + }, + ) + assert response.status_code == 200 + assert response.json()["ok"] is True + if should_return: + # We only expect it to return rows corresponding to those we sent + expected_returned_rows = expected_rows[: len(input["rows"])] + assert response.json()["rows"] == expected_returned_rows + # Check the database too + actual_rows = ( + await ds_write.client.get("/data/upsert_test.json?_shape=array") + ).json() + assert actual_rows == expected_rows + # Drop the upsert_test table + await ds_write.get_database("data").execute_write("drop table upsert_test") + + async def _insert_row(ds): insert_response = await ds.client.post( "/data/docs/-/insert",