mirror of
https://github.com/simonw/datasette.git
synced 2026-06-14 04:56:59 +02:00
Merge 3bc4560a75 into 911954347e
This commit is contained in:
commit
ac98b7a262
5 changed files with 1867 additions and 5 deletions
|
|
@ -124,7 +124,7 @@
|
|||
<p><a class="not-underlined" title="{{ query.sql }}" href="{{ urls.database(database) }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&{{ query.params|urlencode|safe }}{% endif %}">✎ <span class="underlined">View and edit SQL</span></a></p>
|
||||
{% endif %}
|
||||
|
||||
<p class="export-links">This data as {% for name, url in renderers.items() %}<a href="{{ url }}">{{ name }}</a>{{ ", " if not loop.last }}{% endfor %}{% if display_rows %}, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>){% endif %}</p>
|
||||
<p class="export-links">This data as {% for name, url in renderers.items() %}<a href="{{ url }}">{{ name }}</a>{{ ", " if not loop.last }}{% endfor %}{% if display_rows %}, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>), <a href="{{ url_markdown }}">Markdown</a> (<a href="#export-markdown">advanced</a>){% endif %}</p>
|
||||
|
||||
{% if suggested_facets %}
|
||||
{% include "_suggested_facets.html" %}
|
||||
|
|
@ -186,6 +186,20 @@ window._setColumnTypeData = {{ set_column_type_ui|tojson }};
|
|||
{% endfor %}
|
||||
</p>
|
||||
</form>
|
||||
<div id="export-markdown" class="advanced-export">
|
||||
<form class="core" action="{{ url_markdown_path }}" method="get">
|
||||
<p>
|
||||
Markdown options:
|
||||
{% if expandable_columns %}<label><input type="checkbox" name="_labels" checked> expand labels</label>{% endif %}
|
||||
{% if next_url and settings.allow_csv_stream %}<label><input type="checkbox" name="_stream"> export all rows (unlimited)</label>{% endif %}
|
||||
<label>Max rows: <input type="number" name="_max_rows" value="500" min="0" max="10000" style="width: 80px;"></label>
|
||||
<input type="submit" value="Export Markdown">
|
||||
{% for key, value in url_markdown_hidden_args %}
|
||||
<input type="hidden" name="{{ key }}" value="{{ value }}">
|
||||
{% endfor %}
|
||||
</p>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
|
|
|||
|
|
@ -212,16 +212,21 @@ class DataView(BaseView):
|
|||
async def as_csv(self, request, database):
|
||||
return await stream_csv(self.ds, self.data, request, database)
|
||||
|
||||
async def as_markdown(self, request, database):
|
||||
return await stream_markdown(self.ds, self.data, request, database)
|
||||
|
||||
async def get(self, request):
|
||||
db = await self.ds.resolve_database(request)
|
||||
database = db.name
|
||||
database_route = db.route
|
||||
|
||||
_format = request.url_vars["format"]
|
||||
_format = request.url_vars["format"] or request.args.get("_format")
|
||||
data_kwargs = {}
|
||||
|
||||
if _format == "csv":
|
||||
return await self.as_csv(request, database_route)
|
||||
elif _format == "markdown":
|
||||
return await self.as_markdown(request, database_route)
|
||||
|
||||
if _format is None:
|
||||
# HTML views default to expanding all foreign key labels
|
||||
|
|
@ -582,3 +587,233 @@ async def stream_csv(datasette, fetch_data, request, database):
|
|||
headers["content-disposition"] = disposition
|
||||
|
||||
return AsgiStream(stream_fn, headers=headers, content_type=content_type)
|
||||
|
||||
|
||||
async def stream_markdown(datasette, fetch_data, request, database):
|
||||
# 行数限制配置
|
||||
DEFAULT_MAX_ROWS = 500
|
||||
MAX_ALLOWED_ROWS = 10000
|
||||
|
||||
# 解析 _max_rows 参数
|
||||
max_rows_param = request.args.get("_max_rows")
|
||||
if max_rows_param:
|
||||
try:
|
||||
max_rows = int(max_rows_param)
|
||||
# 确保不超过最大允许值
|
||||
max_rows = min(max_rows, MAX_ALLOWED_ROWS)
|
||||
# 确保不小于 0
|
||||
max_rows = max(max_rows, 0)
|
||||
except ValueError:
|
||||
# 无效参数,使用默认值
|
||||
max_rows = DEFAULT_MAX_ROWS
|
||||
else:
|
||||
max_rows = DEFAULT_MAX_ROWS
|
||||
|
||||
# 是否使用流式模式获取全部数据
|
||||
use_stream = request.args.get("_stream")
|
||||
|
||||
# 如果 _stream=1,则忽略行数限制(获取全部数据)
|
||||
if use_stream:
|
||||
max_rows = None # None 表示无限制
|
||||
|
||||
kwargs = {}
|
||||
# Do not calculate facets or counts:
|
||||
extra_parameters = [
|
||||
"{}=1".format(key)
|
||||
for key in ("_nofacet", "_nocount")
|
||||
if not request.args.get(key)
|
||||
]
|
||||
if extra_parameters:
|
||||
# Replace request object with a new one with modified scope
|
||||
if not request.query_string:
|
||||
new_query_string = "&".join(extra_parameters)
|
||||
else:
|
||||
new_query_string = request.query_string + "&" + "&".join(extra_parameters)
|
||||
new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1"))
|
||||
receive = request.receive
|
||||
request = Request(new_scope, receive)
|
||||
|
||||
# 如果使用流式模式获取全部数据
|
||||
if use_stream:
|
||||
# Some quick soundness checks
|
||||
if not datasette.setting("allow_csv_stream"):
|
||||
raise BadRequest("Markdown streaming is disabled")
|
||||
if request.args.get("_next"):
|
||||
raise BadRequest("_next not allowed for Markdown streaming")
|
||||
kwargs["_size"] = "max"
|
||||
|
||||
# Fetch the first page
|
||||
try:
|
||||
response_or_template_contexts = await fetch_data(request)
|
||||
if isinstance(response_or_template_contexts, Response):
|
||||
return response_or_template_contexts
|
||||
elif len(response_or_template_contexts) == 4:
|
||||
data, _, _, _ = response_or_template_contexts
|
||||
else:
|
||||
data, _, _ = response_or_template_contexts
|
||||
except (sqlite3.OperationalError, InvalidSql) as e:
|
||||
raise DatasetteError(str(e), title="Invalid SQL", status=400)
|
||||
|
||||
except sqlite3.OperationalError as e:
|
||||
raise DatasetteError(str(e))
|
||||
|
||||
except DatasetteError:
|
||||
raise
|
||||
|
||||
# Convert rows and columns to Markdown table
|
||||
headings = data["columns"]
|
||||
# if there are expanded_columns we need to add additional headings
|
||||
expanded_columns = set(data.get("expanded_columns") or [])
|
||||
if expanded_columns:
|
||||
headings = []
|
||||
for column in data["columns"]:
|
||||
headings.append(column)
|
||||
if column in expanded_columns:
|
||||
headings.append(f"{column}_label")
|
||||
|
||||
content_type = "text/markdown; charset=utf-8"
|
||||
preamble = ""
|
||||
postamble = ""
|
||||
|
||||
trace = request.args.get("_trace")
|
||||
if trace:
|
||||
content_type = "text/html; charset=utf-8"
|
||||
preamble = (
|
||||
"<html><head><title>Markdown debug</title></head>"
|
||||
'<body><textarea style="width: 90%; height: 70vh">'
|
||||
)
|
||||
postamble = "</textarea></body></html>"
|
||||
|
||||
def escape_markdown(value):
|
||||
if value is None:
|
||||
return ""
|
||||
value = str(value)
|
||||
# Escape pipe characters and newlines for Markdown table
|
||||
value = value.replace("|", "\\|")
|
||||
value = value.replace("\n", " ")
|
||||
value = value.replace("\r", " ")
|
||||
return value
|
||||
|
||||
async def stream_fn(r):
|
||||
nonlocal data, trace, max_rows, use_stream
|
||||
limited_writer = LimitedWriter(r, datasette.setting("max_csv_mb"))
|
||||
if trace:
|
||||
await limited_writer.write(preamble)
|
||||
|
||||
first = True
|
||||
next = None
|
||||
row_count = 0
|
||||
is_truncated = False
|
||||
|
||||
while first or (next and use_stream):
|
||||
try:
|
||||
kwargs = {}
|
||||
if next:
|
||||
kwargs["_next"] = next
|
||||
if not first:
|
||||
data, _, _ = await fetch_data(request, **kwargs)
|
||||
if first:
|
||||
if request.args.get("_header") != "off":
|
||||
# Write header row
|
||||
header_row = "| " + " | ".join(escape_markdown(h) for h in headings) + " |\n"
|
||||
await limited_writer.write(header_row)
|
||||
# Write separator row
|
||||
separator_row = "| " + " | ".join(["---"] * len(headings)) + " |\n"
|
||||
await limited_writer.write(separator_row)
|
||||
first = False
|
||||
next = data.get("next")
|
||||
for row in data["rows"]:
|
||||
# 检查是否达到行数限制
|
||||
if max_rows is not None and row_count >= max_rows:
|
||||
is_truncated = True
|
||||
# 退出循环
|
||||
next = None
|
||||
break
|
||||
|
||||
if any(isinstance(r, bytes) for r in row):
|
||||
new_row = []
|
||||
for column, cell in zip(headings, row):
|
||||
if isinstance(cell, bytes):
|
||||
# If this is a table page, use .urls.row_blob()
|
||||
if data.get("table"):
|
||||
pks = data.get("primary_keys") or []
|
||||
cell = datasette.absolute_url(
|
||||
request,
|
||||
datasette.urls.row_blob(
|
||||
database,
|
||||
data["table"],
|
||||
path_from_row_pks(row, pks, not pks),
|
||||
column,
|
||||
),
|
||||
)
|
||||
else:
|
||||
# Otherwise generate URL for this query
|
||||
url = datasette.absolute_url(
|
||||
request,
|
||||
path_with_format(
|
||||
request=request,
|
||||
format="blob",
|
||||
extra_qs={
|
||||
"_blob_column": column,
|
||||
"_blob_hash": hashlib.sha256(
|
||||
cell
|
||||
).hexdigest(),
|
||||
},
|
||||
replace_format="markdown",
|
||||
),
|
||||
)
|
||||
cell = url.replace("&_nocount=1", "").replace(
|
||||
"&_nofacet=1", ""
|
||||
)
|
||||
new_row.append(cell)
|
||||
row = new_row
|
||||
|
||||
# 构建并写入数据行
|
||||
if not expanded_columns:
|
||||
# Simple path
|
||||
markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in row) + " |\n"
|
||||
await limited_writer.write(markdown_row)
|
||||
else:
|
||||
# Look for {"value": "label": } dicts and expand
|
||||
new_row = []
|
||||
for heading, cell in zip(data["columns"], row):
|
||||
if heading in expanded_columns:
|
||||
if cell is None:
|
||||
new_row.extend(("", ""))
|
||||
else:
|
||||
if not isinstance(cell, dict):
|
||||
new_row.extend((cell, ""))
|
||||
else:
|
||||
new_row.append(cell["value"])
|
||||
new_row.append(cell["label"])
|
||||
else:
|
||||
new_row.append(cell)
|
||||
markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in new_row) + " |\n"
|
||||
await limited_writer.write(markdown_row)
|
||||
|
||||
# 增加行数计数
|
||||
row_count += 1
|
||||
except Exception as ex:
|
||||
sys.stderr.write("Caught this error: {}\n".format(ex))
|
||||
sys.stderr.flush()
|
||||
await r.write(str(ex))
|
||||
return
|
||||
|
||||
# 如果被截断,添加截断注释
|
||||
if is_truncated:
|
||||
truncation_note = f"\n*已截断,仅显示前{row_count}行*\n"
|
||||
await limited_writer.write(truncation_note)
|
||||
|
||||
await limited_writer.write(postamble)
|
||||
|
||||
headers = {}
|
||||
if datasette.cors:
|
||||
add_cors_headers(headers)
|
||||
|
||||
# Always set Content-Disposition for Markdown export
|
||||
disposition = 'attachment; filename="{}.md"'.format(
|
||||
request.url_vars.get("table", database)
|
||||
)
|
||||
headers["content-disposition"] = disposition
|
||||
|
||||
return AsgiStream(stream_fn, headers=headers, content_type=content_type)
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ from . import Context
|
|||
|
||||
class DatabaseView(View):
|
||||
async def get(self, request, datasette):
|
||||
format_ = request.url_vars.get("format") or "html"
|
||||
format_ = request.url_vars.get("format") or request.args.get("_format") or "html"
|
||||
|
||||
await datasette.refresh_schemas()
|
||||
|
||||
|
|
@ -641,7 +641,7 @@ class QueryView(View):
|
|||
if params.get("_timelimit"):
|
||||
extra_args["custom_time_limit"] = int(params["_timelimit"])
|
||||
|
||||
format_ = request.url_vars.get("format") or "html"
|
||||
format_ = request.url_vars.get("format") or request.args.get("_format") or "html"
|
||||
|
||||
query_error = None
|
||||
results = None
|
||||
|
|
@ -703,6 +703,15 @@ class QueryView(View):
|
|||
return data, None, None
|
||||
|
||||
return await stream_csv(datasette, fetch_data_for_csv, request, db.name)
|
||||
elif format_ == "markdown":
|
||||
|
||||
async def fetch_data_for_markdown(request, _next=None):
|
||||
results = await db.execute(sql, params, truncate=True)
|
||||
data = {"rows": results.rows, "columns": results.columns}
|
||||
return data, None, None
|
||||
|
||||
from datasette.views.base import stream_markdown
|
||||
return await stream_markdown(datasette, fetch_data_for_markdown, request, db.name)
|
||||
elif format_ in datasette.renderers.keys():
|
||||
# Dispatch request to the correct output format renderer
|
||||
# (CSV is not handled here due to streaming)
|
||||
|
|
|
|||
|
|
@ -976,7 +976,7 @@ async def table_view_traced(datasette, request):
|
|||
if request.method == "POST":
|
||||
return Response.text("Method not allowed", status=405)
|
||||
|
||||
format_ = request.url_vars.get("format") or "html"
|
||||
format_ = request.url_vars.get("format") or request.args.get("_format") or "html"
|
||||
extra_extras = None
|
||||
context_for_html_hack = False
|
||||
default_labels = False
|
||||
|
|
@ -1024,6 +1024,33 @@ async def table_view_traced(datasette, request):
|
|||
return data, None, None
|
||||
|
||||
return await stream_csv(datasette, fetch_data, request, resolved.db.name)
|
||||
elif format_ == "markdown":
|
||||
|
||||
async def fetch_data(request, _next=None):
|
||||
(
|
||||
data,
|
||||
rows,
|
||||
columns,
|
||||
expanded_columns,
|
||||
sql,
|
||||
next_url,
|
||||
) = await table_view_data(
|
||||
datasette,
|
||||
request,
|
||||
resolved,
|
||||
extra_extras=extra_extras,
|
||||
context_for_html_hack=context_for_html_hack,
|
||||
default_labels=default_labels,
|
||||
_next=_next,
|
||||
)
|
||||
data["rows"] = rows
|
||||
data["table"] = resolved.table
|
||||
data["columns"] = columns
|
||||
data["expanded_columns"] = expanded_columns
|
||||
return data, None, None
|
||||
|
||||
from datasette.views.base import stream_markdown
|
||||
return await stream_markdown(datasette, fetch_data, request, resolved.db.name)
|
||||
elif format_ in datasette.renderers.keys():
|
||||
# Dispatch request to the correct output format renderer
|
||||
# (CSV is not handled here due to streaming)
|
||||
|
|
@ -2059,6 +2086,12 @@ async def table_view_data(
|
|||
)
|
||||
)
|
||||
url_csv_path = url_csv.split("?")[0]
|
||||
# Markdown export URL
|
||||
url_markdown_args = {**url_labels_extra}
|
||||
url_markdown = datasette.urls.path(
|
||||
path_with_format(request=request, format="markdown", extra_qs=url_markdown_args)
|
||||
)
|
||||
url_markdown_path = url_markdown.split("?")[0]
|
||||
data.update(
|
||||
{
|
||||
"url_csv": url_csv,
|
||||
|
|
@ -2069,6 +2102,13 @@ async def table_view_data(
|
|||
if key not in ("_labels", "_facet", "_size")
|
||||
]
|
||||
+ [("_size", "max")],
|
||||
"url_markdown": url_markdown,
|
||||
"url_markdown_path": url_markdown_path,
|
||||
"url_markdown_hidden_args": [
|
||||
(key, value)
|
||||
for key, value in urllib.parse.parse_qsl(request.query_string)
|
||||
if key not in ("_labels", "_facet", "_size", "_max_rows")
|
||||
],
|
||||
}
|
||||
)
|
||||
# if no sort specified AND table has a single primary key,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue