Move Metadata to --internal database

Refs:
- https://github.com/simonw/datasette/pull/2343
- https://github.com/simonw/datasette/issues/2341
This commit is contained in:
Alex Garcia 2024-06-11 09:33:23 -07:00 committed by GitHub
commit e1bfab3fca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 286 additions and 214 deletions

View file

@ -443,6 +443,37 @@ class Datasette:
self._root_token = secrets.token_hex(32)
self.client = DatasetteClient(self)
async def apply_metadata_json(self):
# Apply any metadata entries from metadata.json to the internal tables
# step 1: top-level metadata
for key in self._metadata_local or {}:
if key == "databases":
continue
await self.set_instance_metadata(key, self._metadata_local[key])
# step 2: database-level metadata
for dbname, db in self._metadata_local.get("databases", {}).items():
for key, value in db.items():
if key == "tables":
continue
await self.set_database_metadata(dbname, key, value)
# step 3: table-level metadata
for tablename, table in db.get("tables", {}).items():
for key, value in table.items():
if key == "columns":
continue
await self.set_resource_metadata(dbname, tablename, key, value)
# step 4: column-level metadata (only descriptions in metadata.json)
for columnname, column_description in table.get("columns", {}).items():
await self.set_column_metadata(
dbname, tablename, columnname, "description", column_description
)
# TODO(alex) is metadata.json was loaded in, and --internal is not memory, then log
# a warning to user that they should delete their metadata.json file
def get_jinja_environment(self, request: Request = None) -> Environment:
environment = self._jinja_env
if request:
@ -476,6 +507,7 @@ class Datasette:
internal_db = self.get_internal_database()
if not self.internal_db_created:
await init_internal_db(internal_db)
await self.apply_metadata_json()
self.internal_db_created = True
current_schema_versions = {
row["database_name"]: row["schema_version"]
@ -646,57 +678,113 @@ class Datasette:
orig[key] = upd_value
return orig
def metadata(self, key=None, database=None, table=None, fallback=True):
"""
Looks up metadata, cascading backwards from specified level.
Returns None if metadata value is not found.
"""
assert not (
database is None and table is not None
), "Cannot call metadata() with table= specified but not database="
metadata = {}
async def get_instance_metadata(self):
rows = await self.get_internal_database().execute(
"""
SELECT
key,
value
FROM datasette_metadata_instance_entries
"""
)
return dict(rows)
for hook_dbs in pm.hook.get_metadata(
datasette=self, key=key, database=database, table=table
):
metadata = self._metadata_recursive_update(metadata, hook_dbs)
async def get_database_metadata(self, database_name: str):
rows = await self.get_internal_database().execute(
"""
SELECT
key,
value
FROM datasette_metadata_database_entries
WHERE database_name = ?
""",
[database_name],
)
return dict(rows)
# security precaution!! don't allow anything in the local config
# to be overwritten. this is a temporary measure, not sure if this
# is a good idea long term or maybe if it should just be a concern
# of the plugin's implemtnation
metadata = self._metadata_recursive_update(metadata, self._metadata_local)
async def get_resource_metadata(self, database_name: str, resource_name: str):
rows = await self.get_internal_database().execute(
"""
SELECT
key,
value
FROM datasette_metadata_resource_entries
WHERE database_name = ?
AND resource_name = ?
""",
[database_name, resource_name],
)
return dict(rows)
databases = metadata.get("databases") or {}
async def get_column_metadata(
self, database_name: str, resource_name: str, column_name: str
):
rows = await self.get_internal_database().execute(
"""
SELECT
key,
value
FROM datasette_metadata_column_entries
WHERE database_name = ?
AND resource_name = ?
AND column_name = ?
""",
[database_name, resource_name, column_name],
)
return dict(rows)
search_list = []
if database is not None:
search_list.append(databases.get(database) or {})
if table is not None:
table_metadata = ((databases.get(database) or {}).get("tables") or {}).get(
table
) or {}
search_list.insert(0, table_metadata)
async def set_instance_metadata(self, key: str, value: str):
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04)
await self.get_internal_database().execute_write(
"""
INSERT INTO datasette_metadata_instance_entries(key, value)
VALUES(?, ?)
ON CONFLICT(key) DO UPDATE SET value = excluded.value;
""",
[key, value],
)
search_list.append(metadata)
if not fallback:
# No fallback allowed, so just use the first one in the list
search_list = search_list[:1]
if key is not None:
for item in search_list:
if key in item:
return item[key]
return None
else:
# Return the merged list
m = {}
for item in search_list:
m.update(item)
return m
async def set_database_metadata(self, database_name: str, key: str, value: str):
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04)
await self.get_internal_database().execute_write(
"""
INSERT INTO datasette_metadata_database_entries(database_name, key, value)
VALUES(?, ?, ?)
ON CONFLICT(database_name, key) DO UPDATE SET value = excluded.value;
""",
[database_name, key, value],
)
@property
def _metadata(self):
return self.metadata()
async def set_resource_metadata(
self, database_name: str, resource_name: str, key: str, value: str
):
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04)
await self.get_internal_database().execute_write(
"""
INSERT INTO datasette_metadata_resource_entries(database_name, resource_name, key, value)
VALUES(?, ?, ?, ?)
ON CONFLICT(database_name, resource_name, key) DO UPDATE SET value = excluded.value;
""",
[database_name, resource_name, key, value],
)
async def set_column_metadata(
self,
database_name: str,
resource_name: str,
column_name: str,
key: str,
value: str,
):
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04)
await self.get_internal_database().execute_write(
"""
INSERT INTO datasette_metadata_column_entries(database_name, resource_name, column_name, key, value)
VALUES(?, ?, ?, ?, ?)
ON CONFLICT(database_name, resource_name, column_name, key) DO UPDATE SET value = excluded.value;
""",
[database_name, resource_name, column_name, key, value],
)
def get_internal_database(self):
return self._internal_database
@ -774,20 +862,6 @@ class Datasette:
if query:
return query
def update_with_inherited_metadata(self, metadata):
# Fills in source/license with defaults, if available
metadata.update(
{
"source": metadata.get("source") or self.metadata("source"),
"source_url": metadata.get("source_url") or self.metadata("source_url"),
"license": metadata.get("license") or self.metadata("license"),
"license_url": metadata.get("license_url")
or self.metadata("license_url"),
"about": metadata.get("about") or self.metadata("about"),
"about_url": metadata.get("about_url") or self.metadata("about_url"),
}
)
def _prepare_connection(self, conn, database):
conn.row_factory = sqlite3.Row
conn.text_factory = lambda x: str(x, "utf-8", "replace")
@ -1079,11 +1153,6 @@ class Datasette:
url = "https://" + url[len("http://") :]
return url
def _register_custom_units(self):
"""Register any custom units defined in the metadata.json with Pint"""
for unit in self.metadata("custom_units") or []:
ureg.define(unit)
def _connected_databases(self):
return [
{
@ -1436,10 +1505,6 @@ class Datasette:
),
r"/:memory:(?P<rest>.*)$",
)
add_route(
JsonDataView.as_view(self, "metadata.json", lambda: self.metadata()),
r"/-/metadata(\.(?P<format>json))?$",
)
add_route(
JsonDataView.as_view(self, "versions.json", self._versions),
r"/-/versions(\.(?P<format>json))?$",
@ -1585,7 +1650,6 @@ class Datasette:
def app(self):
"""Returns an ASGI app function that serves the whole of Datasette"""
routes = self._routes()
self._register_custom_units()
async def setup_db():
# First time server starts up, calculate table counts for immutable databases

View file

@ -17,10 +17,6 @@ def menu_links(datasette, actor):
"href": datasette.urls.path("/-/versions"),
"label": "Version info",
},
{
"href": datasette.urls.path("/-/metadata"),
"label": "Metadata",
},
{
"href": datasette.urls.path("/-/settings"),
"label": "Settings",

View file

@ -103,10 +103,15 @@ class Facet:
max_returned_rows = self.ds.setting("max_returned_rows")
table_facet_size = None
if self.table:
tables_metadata = self.ds.metadata("tables", database=self.database) or {}
table_metadata = tables_metadata.get(self.table) or {}
if table_metadata:
table_facet_size = table_metadata.get("facet_size")
config_facet_size = (
self.ds.config.get("databases", {})
.get(self.database, {})
.get("tables", {})
.get(self.table, {})
.get("facet_size")
)
if config_facet_size:
table_facet_size = config_facet_size
custom_facet_size = self.request.args.get("_facet_size")
if custom_facet_size:
if custom_facet_size == "max":

View file

@ -10,11 +10,6 @@ def startup(datasette):
"""Fires directly after Datasette first starts running"""
@hookspec
def get_metadata(datasette, key, database, table):
"""Return metadata to be merged into Datasette's metadata dictionary"""
@hookspec
def asgi_wrapper(datasette):
"""Returns an ASGI middleware callable to wrap our ASGI application with"""

View file

@ -56,7 +56,6 @@ def json_renderer(request, args, data, error, truncated=None):
if truncated is not None:
data["truncated"] = truncated
if shape == "arrayfirst":
if not data["rows"]:
data = []

View file

@ -63,6 +63,43 @@ async def init_internal_db(db):
"""
).strip()
await db.execute_write_script(create_tables_sql)
await initialize_metadata_tables(db)
async def initialize_metadata_tables(db):
await db.execute_write_script(
"""
CREATE TABLE IF NOT EXISTS datasette_metadata_instance_entries(
key text,
value text,
unique(key)
);
CREATE TABLE IF NOT EXISTS datasette_metadata_database_entries(
database_name text,
key text,
value text,
unique(database_name, key)
);
CREATE TABLE IF NOT EXISTS datasette_metadata_resource_entries(
database_name text,
resource_name text,
key text,
value text,
unique(database_name, resource_name, key)
);
CREATE TABLE IF NOT EXISTS datasette_metadata_column_entries(
database_name text,
resource_name text,
column_name text,
key text,
value text,
unique(database_name, resource_name, column_name, key)
);
"""
)
async def populate_schema_tables(internal_db, db):

View file

@ -274,10 +274,6 @@ class DataView(BaseView):
end = time.perf_counter()
data["query_ms"] = (end - start) * 1000
for key in ("source", "source_url", "license", "license_url"):
value = self.ds.metadata(key)
if value:
data[key] = value
# Special case for .jsono extension - redirect to _shape=objects
if _format == "jsono":
@ -385,7 +381,7 @@ class DataView(BaseView):
},
}
if "metadata" not in context:
context["metadata"] = self.ds.metadata()
context["metadata"] = await self.ds.get_instance_metadata()
r = await self.render(templates, request=request, context=context)
if status_code is not None:
r.status = status_code

View file

@ -63,8 +63,7 @@ class DatabaseView(View):
if format_ not in ("html", "json"):
raise NotFound("Invalid format: {}".format(format_))
metadata = (datasette.metadata("databases") or {}).get(database, {})
datasette.update_with_inherited_metadata(metadata)
metadata = await datasette.get_database_metadata(database)
sql_views = []
for view_name in await db.view_names():
@ -131,6 +130,7 @@ class DatabaseView(View):
"table_columns": (
await _table_columns(datasette, database) if allow_execute_sql else {}
),
"metadata": await datasette.get_database_metadata(database),
}
if format_ == "json":
@ -625,8 +625,7 @@ class QueryView(View):
)
}
)
metadata = (datasette.metadata("databases") or {}).get(database, {})
datasette.update_with_inherited_metadata(metadata)
metadata = await datasette.get_database_metadata(database)
renderers = {}
for key, (_, can_render) in datasette.renderers.items():

View file

@ -132,7 +132,13 @@ class IndexView(BaseView):
if self.ds.cors:
add_cors_headers(headers)
return Response(
json.dumps({db["name"]: db for db in databases}, cls=CustomJSONEncoder),
json.dumps(
{
"databases": {db["name"]: db for db in databases},
"metadata": await self.ds.get_instance_metadata(),
},
cls=CustomJSONEncoder,
),
content_type="application/json; charset=utf-8",
headers=headers,
)
@ -151,7 +157,7 @@ class IndexView(BaseView):
request=request,
context={
"databases": databases,
"metadata": self.ds.metadata(),
"metadata": await self.ds.get_instance_metadata(),
"datasette_version": __version__,
"private": not await self.ds.permission_allowed(
None, "view-instance"

View file

@ -85,10 +85,6 @@ class RowView(DataView):
"_table.html",
],
"row_actions": row_actions,
"metadata": (self.ds.metadata("databases") or {})
.get(database, {})
.get("tables", {})
.get(table, {}),
"top_row": make_slot_function(
"top_row",
self.ds,
@ -97,6 +93,7 @@ class RowView(DataView):
table=resolved.table,
row=rows[0],
),
"metadata": {},
}
data = {

View file

@ -147,7 +147,21 @@ async def display_columns_and_rows(
"""Returns columns, rows for specified table - including fancy foreign key treatment"""
sortable_columns = sortable_columns or set()
db = datasette.databases[database_name]
column_descriptions = datasette.metadata("columns", database_name, table_name) or {}
column_descriptions = dict(
await datasette.get_internal_database().execute(
"""
SELECT
column_name,
value
FROM datasette_metadata_column_entries
WHERE database_name = ?
AND resource_name = ?
AND key = 'description'
""",
[database_name, table_name],
)
)
column_details = {
col.name: col for col in await db.table_column_details(table_name)
}
@ -1478,14 +1492,22 @@ async def table_view_data(
async def extra_metadata():
"Metadata about the table and database"
metadata = (
(datasette.metadata("databases") or {})
.get(database_name, {})
.get("tables", {})
.get(table_name, {})
tablemetadata = await datasette.get_resource_metadata(database_name, table_name)
rows = await datasette.get_internal_database().execute(
"""
SELECT
column_name,
value
FROM datasette_metadata_column_entries
WHERE database_name = ?
AND resource_name = ?
AND key = 'description'
""",
[database_name, table_name],
)
datasette.update_with_inherited_metadata(metadata)
return metadata
tablemetadata["columns"] = dict(rows)
return tablemetadata
async def extra_database():
return database_name