/db/table/-/upsert API

Close #1878

Also made a few tweaks to how _r works in tokens and actors,
refs #1855 - I needed that mechanism for the tests.
This commit is contained in:
Simon Willison 2022-12-07 17:12:15 -08:00 committed by GitHub
commit 272982e8a6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 401 additions and 44 deletions

View file

@ -40,7 +40,7 @@ from .views.special import (
PermissionsDebugView,
MessagesDebugView,
)
from .views.table import TableView, TableInsertView, TableDropView
from .views.table import TableView, TableInsertView, TableUpsertView, TableDropView
from .views.row import RowView, RowDeleteView, RowUpdateView
from .renderer import json_renderer
from .url_builder import Urls
@ -1292,6 +1292,10 @@ class Datasette:
TableInsertView.as_view(self),
r"/(?P<database>[^\/\.]+)/(?P<table>[^\/\.]+)/-/insert$",
)
add_route(
TableUpsertView.as_view(self),
r"/(?P<database>[^\/\.]+)/(?P<table>[^\/\.]+)/-/upsert$",
)
add_route(
TableDropView.as_view(self),
r"/(?P<database>[^\/\.]+)/(?P<table>[^\/\.]+)/-/drop$",

View file

@ -85,7 +85,7 @@ def permission_allowed_actor_restrictions(actor, action, resource):
if action_initials in database_allowed:
return None
# Or the current table? That's any time the resource is (database, table)
if not isinstance(resource, str) and len(resource) == 2:
if resource is not None and not isinstance(resource, str) and len(resource) == 2:
database, table = resource
table_allowed = _r.get("t", {}).get(database, {}).get(table)
# TODO: What should this do for canned queries?
@ -138,6 +138,8 @@ def actor_from_request(datasette, request):
# Expired
return None
actor = {"id": decoded["a"], "token": "dstok"}
if "_r" in decoded:
actor["_r"] = decoded["_r"]
if duration:
actor["token_expires"] = created + duration
return actor

View file

@ -316,21 +316,37 @@ class ApiExplorerView(BaseView):
request.actor, "insert-row", (name, table)
):
pks = await db.primary_keys(table)
table_links.append(
{
"path": self.ds.urls.table(name, table) + "/-/insert",
"method": "POST",
"label": "Insert rows into {}".format(table),
"json": {
"rows": [
{
column: None
for column in await db.table_columns(table)
if column not in pks
}
]
table_links.extend(
[
{
"path": self.ds.urls.table(name, table) + "/-/insert",
"method": "POST",
"label": "Insert rows into {}".format(table),
"json": {
"rows": [
{
column: None
for column in await db.table_columns(table)
if column not in pks
}
]
},
},
}
{
"path": self.ds.urls.table(name, table) + "/-/upsert",
"method": "POST",
"label": "Upsert rows into {}".format(table),
"json": {
"rows": [
{
column: None
for column in await db.table_columns(table)
if column not in pks
}
]
},
},
]
)
if await self.ds.permission_allowed(
request.actor, "drop-table", (name, table)

View file

@ -1074,9 +1074,18 @@ class TableInsertView(BaseView):
def __init__(self, datasette):
self.ds = datasette
async def _validate_data(self, request, db, table_name):
async def _validate_data(self, request, db, table_name, pks, upsert):
errors = []
pks_list = []
if isinstance(pks, str):
pks_list = [pks]
else:
pks_list = list(pks)
if not pks_list:
pks_list = ["rowid"]
def _errors(errors):
return None, errors, {}
@ -1134,7 +1143,18 @@ class TableInsertView(BaseView):
# Validate columns of each row
columns = set(await db.table_columns(table_name))
columns.update(pks_list)
for i, row in enumerate(rows):
if upsert:
# It MUST have the primary key
missing_pks = [pk for pk in pks_list if pk not in row]
if missing_pks:
errors.append(
'Row {} is missing primary key column(s): "{}"'.format(
i, '", "'.join(missing_pks)
)
)
invalid_columns = set(row.keys()) - columns
if invalid_columns:
errors.append(
@ -1146,7 +1166,7 @@ class TableInsertView(BaseView):
return _errors(errors)
return rows, errors, extras
async def post(self, request):
async def post(self, request, upsert=False):
try:
resolved = await self.ds.resolve_table(request)
except NotFound as e:
@ -1159,28 +1179,66 @@ class TableInsertView(BaseView):
db = self.ds.get_database(database_name)
if not await db.table_exists(table_name):
return _error(["Table not found: {}".format(table_name)], 404)
# Must have insert-row permission
if not await self.ds.permission_allowed(
request.actor, "insert-row", resource=(database_name, table_name)
):
return _error(["Permission denied"], 403)
rows, errors, extras = await self._validate_data(request, db, table_name)
if upsert:
# Must have insert-row AND upsert-row permissions
if not (
await self.ds.permission_allowed(
request.actor, "insert-row", database_name, table_name
)
and await self.ds.permission_allowed(
request.actor, "update-row", database_name, table_name
)
):
return _error(
["Permission denied: need both insert-row and update-row"], 403
)
else:
# Must have insert-row permission
if not await self.ds.permission_allowed(
request.actor, "insert-row", resource=(database_name, table_name)
):
return _error(["Permission denied"], 403)
if not db.is_mutable:
return _error(["Database is immutable"], 403)
pks = await db.primary_keys(table_name)
rows, errors, extras = await self._validate_data(
request, db, table_name, pks, upsert
)
if errors:
return _error(errors, 400)
# No that we've passed pks to _validate_data it's safe to
# fix the rowids case:
if not pks:
pks = ["rowid"]
ignore = extras.get("ignore")
replace = extras.get("replace")
if upsert and (ignore or replace):
return _error(["Upsert does not support ignore or replace"], 400)
should_return = bool(extras.get("return", False))
# Insert rows
def insert_rows(conn):
row_pk_values_for_later = []
if should_return and upsert:
row_pk_values_for_later = [tuple(row[pk] for pk in pks) for row in rows]
def insert_or_upsert_rows(conn):
table = sqlite_utils.Database(conn)[table_name]
if should_return:
kwargs = {}
if upsert:
kwargs["pk"] = pks[0] if len(pks) == 1 else pks
else:
kwargs = {"ignore": ignore, "replace": replace}
if should_return and not upsert:
rowids = []
method = table.upsert if upsert else table.insert
for row in rows:
rowids.append(
table.insert(row, ignore=ignore, replace=replace).last_rowid
)
rowids.append(method(row, **kwargs).last_rowid)
return list(
table.rows_where(
"rowid in ({})".format(",".join("?" for _ in rowids)),
@ -1188,16 +1246,39 @@ class TableInsertView(BaseView):
)
)
else:
table.insert_all(rows, ignore=ignore, replace=replace)
method_all = table.upsert_all if upsert else table.insert_all
method_all(rows, **kwargs)
try:
rows = await db.execute_write_fn(insert_rows)
rows = await db.execute_write_fn(insert_or_upsert_rows)
except Exception as e:
return _error([str(e)])
result = {"ok": True}
if should_return:
result["rows"] = rows
return Response.json(result, status=201)
if upsert:
# Fetch based on initial input IDs
where_clause = " OR ".join(
["({})".format(" AND ".join("{} = ?".format(pk) for pk in pks))]
* len(row_pk_values_for_later)
)
args = list(itertools.chain.from_iterable(row_pk_values_for_later))
fetched_rows = await db.execute(
"select {}* from [{}] where {}".format(
"rowid, " if pks == ["rowid"] else "", table_name, where_clause
),
args,
)
result["rows"] = [dict(r) for r in fetched_rows.rows]
else:
result["rows"] = rows
return Response.json(result, status=200 if upsert else 201)
class TableUpsertView(TableInsertView):
name = "table-upsert"
async def post(self, request):
return await super().post(request, upsert=True)
class TableDropView(BaseView):

View file

@ -527,7 +527,7 @@ To insert multiple rows at a time, use the same API method but send a list of di
]
}
If successful, this will return a ``201`` status code and an empty ``{}`` response body.
If successful, this will return a ``201`` status code and a ``{"ok": true}`` response body.
To return the newly inserted rows, add the ``"return": true`` key to the request body:
@ -582,6 +582,116 @@ Pass ``"ignore": true`` to ignore these errors and insert the other rows:
Or you can pass ``"replace": true`` to replace any rows with conflicting primary keys with the new values.
.. _TableUpsertView:
Upserting rows
~~~~~~~~~~~~~~
An upsert is an insert or update operation. If a row with a matching primary key already exists it will be updated - otherwise a new row will be inserted.
The upsert API is mostly the same shape as the :ref:`insert API <TableInsertView>`. It requires both the :ref:`permissions_insert_row` and :ref:`permissions_update_row` permissions.
::
POST /<database>/<table>/-/upsert
Content-Type: application/json
Authorization: Bearer dstok_<rest-of-token>
.. code-block:: json
{
"rows": [
{
"id": 1,
"title": "Updated title for 1",
"description": "Updated description for 1"
},
{
"id": 2,
"description": "Updated description for 2",
},
{
"id": 3,
"title": "Item 3",
"description": "Description for 3"
}
]
}
Imagine a table with a primary key of ``id`` and which already has rows with ``id`` values of ``1`` and ``2``.
The above example will:
- Update the row with ``id`` of ``1`` to set both ``title`` and ``description`` to the new values
- Update the row with ``id`` of ``2`` to set ``title`` to the new value - ``description`` will be left unchanged
- Insert a new row with ``id`` of ``3`` and both ``title`` and ``description`` set to the new values
Similar to ``/-/insert``, a ``row`` key with an object can be used instead of a ``rows`` array to upsert a single row.
If successful, this will return a ``200`` status code and a ``{"ok": true}`` response body.
Add ``"return": true`` to the request body to return full copies of the affected rows after they have been inserted or updated:
.. code-block:: json
{
"rows": [
{
"id": 1,
"title": "Updated title for 1",
"description": "Updated description for 1"
},
{
"id": 2,
"description": "Updated description for 2",
},
{
"id": 3,
"title": "Item 3",
"description": "Description for 3"
}
],
"return": true
}
This will return the following:
.. code-block:: json
{
"ok": true,
"rows": [
{
"id": 1,
"title": "Updated title for 1",
"description": "Updated description for 1"
},
{
"id": 2,
"title": "Item 2",
"description": "Updated description for 2"
},
{
"id": 3,
"title": "Item 3",
"description": "Description for 3"
}
]
}
When using upsert you must provide the primary key column (or columns if the table has a compound primary key) for every row, or you will get a ``400`` error:
.. code-block:: json
{
"ok": false,
"errors": [
"Row 0 is missing primary key column(s): \"id\""
]
}
If your table does not have an explicit primary key you should pass the SQLite ``rowid`` key instead.
.. _RowUpdateView:
Updating a row

View file

@ -21,16 +21,15 @@ def ds_write(tmp_path_factory):
db.close()
def write_token(ds, actor_id="root"):
return "dstok_{}".format(
ds.sign(
{"a": actor_id, "token": "dstok", "t": int(time.time())}, namespace="token"
)
)
def write_token(ds, actor_id="root", permissions=None):
to_sign = {"a": actor_id, "token": "dstok", "t": int(time.time())}
if permissions:
to_sign["_r"] = {"a": permissions}
return "dstok_{}".format(ds.sign(to_sign, namespace="token"))
@pytest.mark.asyncio
async def test_write_row(ds_write):
async def test_insert_row(ds_write):
token = write_token(ds_write)
response = await ds_write.client.post(
"/data/docs/-/insert",
@ -42,6 +41,7 @@ async def test_write_row(ds_write):
)
expected_row = {"id": 1, "title": "Test", "score": 1.2, "age": 5}
assert response.status_code == 201
assert response.json()["ok"] is True
assert response.json()["rows"] == [expected_row]
rows = (await ds_write.get_database("data").execute("select * from docs")).rows
assert dict(rows[0]) == expected_row
@ -49,7 +49,7 @@ async def test_write_row(ds_write):
@pytest.mark.asyncio
@pytest.mark.parametrize("return_rows", (True, False))
async def test_write_rows(ds_write, return_rows):
async def test_insert_rows(ds_write, return_rows):
token = write_token(ds_write)
data = {
"rows": [
@ -194,6 +194,13 @@ async def test_write_rows(ds_write, return_rows):
400,
['Invalid parameter: "one", "two"'],
),
(
"/immutable/docs/-/insert",
{"rows": [{"title": "Test"}]},
None,
403,
["Database is immutable"],
),
# Validate columns of each row
(
"/data/docs/-/insert",
@ -205,12 +212,62 @@ async def test_write_rows(ds_write, return_rows):
"Row 1 has invalid columns: bad, worse",
],
),
## UPSERT ERRORS:
(
"/immutable/docs/-/upsert",
{"rows": [{"title": "Test"}]},
None,
403,
["Database is immutable"],
),
(
"/data/badtable/-/upsert",
{"rows": [{"title": "Test"}]},
None,
404,
["Table not found: badtable"],
),
# missing primary key
(
"/data/docs/-/upsert",
{"rows": [{"title": "Missing PK"}]},
None,
400,
['Row 0 is missing primary key column(s): "id"'],
),
# Upsert does not support ignore or replace
(
"/data/docs/-/upsert",
{"rows": [{"id": 1, "title": "Bad"}], "ignore": True},
None,
400,
["Upsert does not support ignore or replace"],
),
# Upsert permissions
(
"/data/docs/-/upsert",
{"rows": [{"id": 1, "title": "Disallowed"}]},
"insert-but-not-update",
403,
["Permission denied: need both insert-row and update-row"],
),
(
"/data/docs/-/upsert",
{"rows": [{"id": 1, "title": "Disallowed"}]},
"update-but-not-insert",
403,
["Permission denied: need both insert-row and update-row"],
),
),
)
async def test_write_row_errors(
async def test_insert_or_upsert_row_errors(
ds_write, path, input, special_case, expected_status, expected_errors
):
token = write_token(ds_write)
if special_case == "insert-but-not-update":
token = write_token(ds_write, permissions=["ir", "vi"])
if special_case == "update-but-not-insert":
token = write_token(ds_write, permissions=["ur", "vi"])
if special_case == "duplicate_id":
await ds_write.get_database("data").execute_write(
"insert into docs (id) values (1)"
@ -226,6 +283,12 @@ async def test_write_row_errors(
else "application/json",
},
)
actor_response = (
await ds_write.client.get("/-/actor.json", headers=kwargs["headers"])
).json()
print(actor_response)
if special_case == "invalid_json":
del kwargs["json"]
kwargs["content"] = "{bad json"
@ -302,6 +365,87 @@ async def test_insert_ignore_replace(
assert response.json()["rows"] == expected_rows
@pytest.mark.asyncio
@pytest.mark.parametrize(
"initial,input,expected_rows",
(
(
# Simple primary key update
{"rows": [{"id": 1, "title": "One"}], "pk": "id"},
{"rows": [{"id": 1, "title": "Two"}]},
[
{"id": 1, "title": "Two"},
],
),
(
# Multiple rows update one of them
{
"rows": [{"id": 1, "title": "One"}, {"id": 2, "title": "Two"}],
"pk": "id",
},
{"rows": [{"id": 1, "title": "Three"}]},
[
{"id": 1, "title": "Three"},
{"id": 2, "title": "Two"},
],
),
(
# rowid update
{"rows": [{"title": "One"}]},
{"rows": [{"rowid": 1, "title": "Two"}]},
[
{"rowid": 1, "title": "Two"},
],
),
(
# Compound primary key update
{"rows": [{"id": 1, "title": "One", "score": 1}], "pks": ["id", "score"]},
{"rows": [{"id": 1, "title": "Two", "score": 1}]},
[
{"id": 1, "title": "Two", "score": 1},
],
),
),
)
@pytest.mark.parametrize("should_return", (False, True))
async def test_upsert(ds_write, initial, input, expected_rows, should_return):
token = write_token(ds_write)
# Insert initial data
initial["table"] = "upsert_test"
create_response = await ds_write.client.post(
"/data/-/create",
json=initial,
headers={
"Authorization": "Bearer {}".format(token),
"Content-Type": "application/json",
},
)
assert create_response.status_code == 201
if should_return:
input["return"] = True
response = await ds_write.client.post(
"/data/upsert_test/-/upsert",
json=input,
headers={
"Authorization": "Bearer {}".format(token),
"Content-Type": "application/json",
},
)
assert response.status_code == 200
assert response.json()["ok"] is True
if should_return:
# We only expect it to return rows corresponding to those we sent
expected_returned_rows = expected_rows[: len(input["rows"])]
assert response.json()["rows"] == expected_returned_rows
# Check the database too
actual_rows = (
await ds_write.client.get("/data/upsert_test.json?_shape=array")
).json()
assert actual_rows == expected_rows
# Drop the upsert_test table
await ds_write.get_database("data").execute_write("drop table upsert_test")
async def _insert_row(ds):
insert_response = await ds.client.post(
"/data/docs/-/insert",