diff --git a/datasette/tracer.py b/datasette/tracer.py index 9e66613b..7531d87f 100644 --- a/datasette/tracer.py +++ b/datasette/tracer.py @@ -14,6 +14,14 @@ trace_task_id = ContextVar("trace_task_id", default=None) def get_task_id(): + """Return a stable identifier for the current asyncio task. + + Checks the ``trace_task_id`` context variable first (set by + :func:`trace_child_tasks`) so that child tasks spawned inside a traced + block share the parent's ID and are captured by the same tracer. + Falls back to the ``id()`` of the running asyncio task, or ``None`` + when called outside an event loop. + """ current = trace_task_id.get(None) if current is not None: return current @@ -26,6 +34,14 @@ def get_task_id(): @contextmanager def trace_child_tasks(): + """Context manager that propagates the current task's trace ID to child tasks. + + Normally each asyncio task gets its own task ID, so traces recorded inside + ``asyncio.create_task()`` calls would be invisible to the parent tracer. + Wrapping the ``create_task`` call in this context manager pins the parent's + ID onto the ``trace_task_id`` context variable so child tasks are attributed + to the same tracer bucket. + """ token = trace_task_id.set(get_task_id()) yield trace_task_id.reset(token) @@ -33,6 +49,23 @@ def trace_child_tasks(): @contextmanager def trace(trace_type, **kwargs): + """Context manager that records a single timed trace entry. + + If no tracer is active for the current task (i.e. the code is not running + inside a :func:`capture_traces` block) the context manager is a no-op and + simply yields ``kwargs`` unchanged. + + When a tracer *is* active, the yielded dict is populated with any extra + context added inside the ``with`` block, and on exit a trace entry is + appended containing ``type``, ``start``, ``end``, ``duration_ms``, + ``traceback``, ``error``, plus all keyword arguments. + + Args: + trace_type: A string label for this trace entry (e.g. ``"sql"``). + **kwargs: Arbitrary extra key/value pairs included in the trace entry. + Must not use any of the reserved keys: ``type``, ``start``, + ``end``, ``duration_ms``, ``traceback``. + """ assert not TRACE_RESERVED_KEYS.intersection( kwargs.keys() ), f".trace() keyword parameters cannot include {TRACE_RESERVED_KEYS}" @@ -67,6 +100,15 @@ def trace(trace_type, **kwargs): @contextmanager def capture_traces(tracer): + """Context manager that activates trace collection for the current task. + + While the block is active, any :func:`trace` calls made from the same + asyncio task will append entries to ``tracer``. The list is de-registered + when the block exits so that subsequent work is not attributed to it. + + Args: + tracer: A list to which trace entry dicts will be appended. + """ # tracer is a list task_id = get_task_id() if task_id is None: @@ -78,6 +120,16 @@ def capture_traces(tracer): class AsgiTracer: + """ASGI middleware that appends timing traces to responses when ``?_trace=1`` is set. + + When the ``_trace=1`` query parameter is present the middleware collects + all :func:`trace` entries produced while handling the request and injects + a ``_trace`` object into JSON responses, or a ``
`` block before
+    ```` in HTML responses.  Responses larger than
+    :attr:`max_body_bytes` are passed through unchanged to avoid excessive
+    memory use.
+    """
+
     # If the body is larger than this we don't attempt to append the trace
     max_body_bytes = 1024 * 256  # 256 KB
 
diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py
index 2dff9667..58c9d085 100644
--- a/datasette/utils/__init__.py
+++ b/datasette/utils/__init__.py
@@ -589,6 +589,13 @@ def detect_primary_keys(conn, table):
 
 
 def get_outbound_foreign_keys(conn, table):
+    """Return a list of outbound foreign keys for ``table``.
+
+    Each entry is a dict with ``column``, ``other_table``, and ``other_column``
+    keys.  Compound foreign keys (where a single constraint spans multiple
+    columns) are excluded because they cannot be represented in this flat
+    format.
+    """
     infos = conn.execute(f"PRAGMA foreign_key_list([{table}])").fetchall()
     fks = []
     for info in infos:
@@ -617,6 +624,21 @@ def get_outbound_foreign_keys(conn, table):
 
 
 def get_all_foreign_keys(conn):
+    """Return a mapping of every table to its incoming and outgoing foreign keys.
+
+    Returns a dict of the form::
+
+        {
+            "table_name": {
+                "incoming": [{"other_table": ..., "column": ..., "other_column": ...}, ...],
+                "outgoing": [{"other_table": ..., "column": ..., "other_column": ...}, ...],
+            },
+            ...
+        }
+
+    Both lists are sorted for deterministic ordering.  Compound foreign keys
+    and references to non-existent tables are silently skipped.
+    """
     tables = [
         r[0]
         for r in conn.execute(
@@ -656,6 +678,11 @@ def get_all_foreign_keys(conn):
 
 
 def detect_spatialite(conn):
+    """Return True if the SpatiaLite extension is loaded on ``conn``.
+
+    Detection is based on the presence of the ``geometry_columns`` table,
+    which SpatiaLite creates when it initialises a spatial database.
+    """
     rows = conn.execute(
         'select 1 from sqlite_master where tbl_name = "geometry_columns"'
     ).fetchall()
@@ -731,6 +758,14 @@ filter_column_re = re.compile(r"^_filter_column_\d+$")
 
 
 def filters_should_redirect(special_args):
+    """Convert legacy ``_filter_column`` / ``_filter_op`` / ``_filter_value``
+    query parameters into the compact ``column__op=value`` form used by the
+    table view, and signal that a redirect is needed.
+
+    Returns a list of ``(key, value)`` pairs to add to the redirect URL.
+    A ``value`` of ``None`` means the key should be removed from the URL.
+    Returns an empty list when no legacy filter parameters are present.
+    """
     redirect_params = []
     # Handle _filter_column=foo&_filter_op=exact&_filter_value=...
     filter_column = special_args.get("_filter_column")
@@ -966,6 +1001,12 @@ class LoadExtension(click.ParamType):
 
 
 def format_bytes(bytes):
+    """Format a byte count as a human-readable string.
+
+    Steps through ``bytes``, ``KB``, ``MB``, ``GB``, and ``TB``, dividing by
+    1024 at each step, and returns the value formatted to one decimal place
+    (or as an integer for plain bytes).
+    """
     current = float(bytes)
     for unit in ("bytes", "KB", "MB", "GB", "TB"):
         if current < 1024:
@@ -981,6 +1022,13 @@ _escape_fts_re = re.compile(r'\s+|(".*?")')
 
 
 def escape_fts(query):
+    """Escape a full-text search query for safe use with SQLite FTS.
+
+    Splits the query on whitespace, wrapping each bare token in double quotes
+    so that special FTS operators (``AND``, ``OR``, ``*``, etc.) are treated
+    as literals.  Tokens that are already quoted are left unchanged.
+    An unbalanced leading quote is closed before processing.
+    """
     # If query has unbalanced ", add one at end
     if query.count('"') % 2:
         query += '"'