feat(导出): 添加Markdown格式导出功能

为数据库查询和表格视图添加Markdown格式导出支持,实现类似CSV导出的流式处理机制。新增stream_markdown函数处理Markdown表格生成,包括表头、分隔线和数据行的格式化,同时支持扩展列和二进制数据的处理。
This commit is contained in:
muyusajiangtian 2026-04-29 11:03:42 +08:00
commit c854ad4bf5
3 changed files with 217 additions and 0 deletions

View file

@ -568,3 +568,184 @@ async def stream_csv(datasette, fetch_data, request, database):
headers["content-disposition"] = disposition
return AsgiStream(stream_fn, headers=headers, content_type=content_type)
async def stream_markdown(datasette, fetch_data, request, database):
kwargs = {}
stream = request.args.get("_stream")
# Do not calculate facets or counts:
extra_parameters = [
"{}=1".format(key)
for key in ("_nofacet", "_nocount")
if not request.args.get(key)
]
if extra_parameters:
# Replace request object with a new one with modified scope
if not request.query_string:
new_query_string = "&".join(extra_parameters)
else:
new_query_string = request.query_string + "&" + "&".join(extra_parameters)
new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1"))
receive = request.receive
request = Request(new_scope, receive)
if stream:
# Some quick soundness checks
if not datasette.setting("allow_csv_stream"):
raise BadRequest("Markdown streaming is disabled")
if request.args.get("_next"):
raise BadRequest("_next not allowed for Markdown streaming")
kwargs["_size"] = "max"
# Fetch the first page
try:
response_or_template_contexts = await fetch_data(request)
if isinstance(response_or_template_contexts, Response):
return response_or_template_contexts
elif len(response_or_template_contexts) == 4:
data, _, _, _ = response_or_template_contexts
else:
data, _, _ = response_or_template_contexts
except (sqlite3.OperationalError, InvalidSql) as e:
raise DatasetteError(str(e), title="Invalid SQL", status=400)
except sqlite3.OperationalError as e:
raise DatasetteError(str(e))
except DatasetteError:
raise
# Convert rows and columns to Markdown table
headings = data["columns"]
# if there are expanded_columns we need to add additional headings
expanded_columns = set(data.get("expanded_columns") or [])
if expanded_columns:
headings = []
for column in data["columns"]:
headings.append(column)
if column in expanded_columns:
headings.append(f"{column}_label")
content_type = "text/markdown; charset=utf-8"
preamble = ""
postamble = ""
trace = request.args.get("_trace")
if trace:
content_type = "text/html; charset=utf-8"
preamble = (
"<html><head><title>Markdown debug</title></head>"
'<body><textarea style="width: 90%; height: 70vh">'
)
postamble = "</textarea></body></html>"
def escape_markdown(value):
if value is None:
return ""
value = str(value)
# Escape pipe characters and newlines for Markdown table
value = value.replace("|", "\\|")
value = value.replace("\n", " ")
value = value.replace("\r", " ")
return value
async def stream_fn(r):
nonlocal data, trace
limited_writer = LimitedWriter(r, datasette.setting("max_csv_mb"))
if trace:
await limited_writer.write(preamble)
first = True
next = None
while first or (next and stream):
try:
kwargs = {}
if next:
kwargs["_next"] = next
if not first:
data, _, _ = await fetch_data(request, **kwargs)
if first:
if request.args.get("_header") != "off":
# Write header row
header_row = "| " + " | ".join(escape_markdown(h) for h in headings) + " |\n"
await limited_writer.write(header_row)
# Write separator row
separator_row = "| " + " | ".join(["---"] * len(headings)) + " |\n"
await limited_writer.write(separator_row)
first = False
next = data.get("next")
for row in data["rows"]:
if any(isinstance(r, bytes) for r in row):
new_row = []
for column, cell in zip(headings, row):
if isinstance(cell, bytes):
# If this is a table page, use .urls.row_blob()
if data.get("table"):
pks = data.get("primary_keys") or []
cell = datasette.absolute_url(
request,
datasette.urls.row_blob(
database,
data["table"],
path_from_row_pks(row, pks, not pks),
column,
),
)
else:
# Otherwise generate URL for this query
url = datasette.absolute_url(
request,
path_with_format(
request=request,
format="blob",
extra_qs={
"_blob_column": column,
"_blob_hash": hashlib.sha256(
cell
).hexdigest(),
},
replace_format="markdown",
),
)
cell = url.replace("&_nocount=1", "").replace(
"&_nofacet=1", ""
)
new_row.append(cell)
row = new_row
if not expanded_columns:
# Simple path
markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in row) + " |\n"
await limited_writer.write(markdown_row)
else:
# Look for {"value": "label": } dicts and expand
new_row = []
for heading, cell in zip(data["columns"], row):
if heading in expanded_columns:
if cell is None:
new_row.extend(("", ""))
else:
if not isinstance(cell, dict):
new_row.extend((cell, ""))
else:
new_row.append(cell["value"])
new_row.append(cell["label"])
else:
new_row.append(cell)
markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in new_row) + " |\n"
await limited_writer.write(markdown_row)
except Exception as ex:
sys.stderr.write("Caught this error: {}\n".format(ex))
sys.stderr.flush()
await r.write(str(ex))
return
await limited_writer.write(postamble)
headers = {}
if datasette.cors:
add_cors_headers(headers)
# Always set Content-Disposition for Markdown export
disposition = 'attachment; filename="{}.md"'.format(
request.url_vars.get("table", database)
)
headers["content-disposition"] = disposition
return AsgiStream(stream_fn, headers=headers, content_type=content_type)

View file

@ -651,6 +651,15 @@ class QueryView(View):
return data, None, None
return await stream_csv(datasette, fetch_data_for_csv, request, db.name)
elif format_ == "markdown":
async def fetch_data_for_markdown(request, _next=None):
results = await db.execute(sql, params, truncate=True)
data = {"rows": results.rows, "columns": results.columns}
return data, None, None
from datasette.views.base import stream_markdown
return await stream_markdown(datasette, fetch_data_for_markdown, request, db.name)
elif format_ in datasette.renderers.keys():
# Dispatch request to the correct output format renderer
# (CSV is not handled here due to streaming)

View file

@ -1024,6 +1024,33 @@ async def table_view_traced(datasette, request):
return data, None, None
return await stream_csv(datasette, fetch_data, request, resolved.db.name)
elif format_ == "markdown":
async def fetch_data(request, _next=None):
(
data,
rows,
columns,
expanded_columns,
sql,
next_url,
) = await table_view_data(
datasette,
request,
resolved,
extra_extras=extra_extras,
context_for_html_hack=context_for_html_hack,
default_labels=default_labels,
_next=_next,
)
data["rows"] = rows
data["table"] = resolved.table
data["columns"] = columns
data["expanded_columns"] = expanded_columns
return data, None, None
from datasette.views.base import stream_markdown
return await stream_markdown(datasette, fetch_data, request, resolved.db.name)
elif format_ in datasette.renderers.keys():
# Dispatch request to the correct output format renderer
# (CSV is not handled here due to streaming)