mirror of
https://github.com/simonw/datasette.git
synced 2026-06-05 08:37:00 +02:00
* Add request.form() for multipart form data and file uploads New Request.form() method that handles both application/x-www-form-urlencoded and multipart/form-data content types with streaming parsing. Features: - Streaming multipart parser that doesn't buffer entire body in memory - Files spill to disk above 1MB threshold via SpooledTemporaryFile - files=False (default) discards file content, files=True stores them - Security limits: max_request_size, max_file_size, max_fields, max_files - FormData container with dict-like access and getlist() for multiple values - UploadedFile class with async read(), seek(), filename, content_type, size - Support for RFC 5987 filename* encoding for international filenames Uses multipart-form-data-conformance test suite for validation. * Update views to use request.form() and document new API - Migrate PermissionsDebugView, MessagesDebugView, and CreateTokenView from post_vars() to form() - Add documentation for request.form(), FormData, and UploadedFile classes Centralize multipart defaults and expose stricter limits via Request.form(). Enforce header, part, file, and disk space limits even when files are discarded; detect truncated bodies and client disconnects; and move blocking work off the event loop. Add FormData close/aclose context managers, update internals docs, and expand multipart tests (including len semantics and stricter conformance expectations).
757 lines
25 KiB
Python
757 lines
25 KiB
Python
"""
|
|
Streaming multipart/form-data parser for ASGI applications.
|
|
|
|
Supports:
|
|
- Streaming parsing without buffering entire body in memory
|
|
- Files spill to disk above configurable threshold
|
|
- Security limits on request size, file size, field count
|
|
- Both multipart/form-data and application/x-www-form-urlencoded
|
|
"""
|
|
|
|
import asyncio
|
|
import shutil
|
|
import tempfile
|
|
from dataclasses import dataclass, field
|
|
from typing import (
|
|
Any,
|
|
Callable,
|
|
Dict,
|
|
List,
|
|
Optional,
|
|
Tuple,
|
|
Union,
|
|
)
|
|
from urllib.parse import parse_qsl
|
|
|
|
# Centralized defaults for multipart/form-data parsing
|
|
DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
|
|
DEFAULT_MAX_REQUEST_SIZE = 100 * 1024 * 1024 # 100MB
|
|
DEFAULT_MAX_FIELDS = 1000
|
|
DEFAULT_MAX_FILES = 100
|
|
# If max_parts is not specified, it defaults to max_fields + max_files
|
|
DEFAULT_MAX_PARTS: Optional[int] = None
|
|
DEFAULT_MAX_FIELD_SIZE = 100 * 1024 # 100KB
|
|
DEFAULT_MAX_MEMORY_FILE_SIZE = 1024 * 1024 # 1MB
|
|
DEFAULT_MAX_PART_HEADER_BYTES = 16 * 1024 # 16KB
|
|
DEFAULT_MAX_PART_HEADER_LINES = 100
|
|
DEFAULT_MIN_FREE_DISK_BYTES = 50 * 1024 * 1024 # 50MB
|
|
|
|
|
|
class MultipartParseError(Exception):
|
|
"""Raised when multipart parsing fails."""
|
|
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class UploadedFile:
|
|
"""
|
|
Represents an uploaded file from a multipart form.
|
|
|
|
Attributes:
|
|
name: The form field name
|
|
filename: The original filename from the upload
|
|
content_type: The MIME type of the file
|
|
size: Size in bytes
|
|
"""
|
|
|
|
name: str
|
|
filename: str
|
|
content_type: Optional[str]
|
|
size: int
|
|
_file: tempfile.SpooledTemporaryFile = field(repr=False)
|
|
|
|
async def read(self, size: int = -1) -> bytes:
|
|
"""Read file contents."""
|
|
return await asyncio.to_thread(self._file.read, size)
|
|
|
|
async def seek(self, offset: int, whence: int = 0) -> int:
|
|
"""Seek to position in file."""
|
|
return await asyncio.to_thread(self._file.seek, offset, whence)
|
|
|
|
async def close(self) -> None:
|
|
"""Close the underlying file."""
|
|
await asyncio.to_thread(self._file.close)
|
|
|
|
def close_sync(self) -> None:
|
|
"""Close the underlying file synchronously."""
|
|
self._file.close()
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc, tb):
|
|
await self.close()
|
|
|
|
def __del__(self):
|
|
try:
|
|
self._file.close()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
class FormData:
|
|
"""
|
|
Container for parsed form data, supporting both fields and files.
|
|
|
|
Provides dict-like access with support for multiple values per key.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._data: List[Tuple[str, Union[str, UploadedFile]]] = []
|
|
|
|
def append(self, key: str, value: Union[str, UploadedFile]) -> None:
|
|
"""Add a key-value pair."""
|
|
self._data.append((key, value))
|
|
|
|
def __getitem__(self, key: str) -> Union[str, UploadedFile]:
|
|
"""Get the first value for a key."""
|
|
for k, v in self._data:
|
|
if k == key:
|
|
return v
|
|
raise KeyError(key)
|
|
|
|
def get(self, key: str, default: Any = None) -> Optional[Union[str, UploadedFile]]:
|
|
"""Get the first value for a key, or default if not found."""
|
|
try:
|
|
return self[key]
|
|
except KeyError:
|
|
return default
|
|
|
|
def getlist(self, key: str) -> List[Union[str, UploadedFile]]:
|
|
"""Get all values for a key."""
|
|
return [v for k, v in self._data if k == key]
|
|
|
|
def __contains__(self, key: str) -> bool:
|
|
"""Check if key exists."""
|
|
return any(k == key for k, _ in self._data)
|
|
|
|
def __len__(self) -> int:
|
|
"""Return number of items."""
|
|
return len(self._data)
|
|
|
|
def __iter__(self):
|
|
"""Iterate over unique keys."""
|
|
seen = set()
|
|
for k, _ in self._data:
|
|
if k not in seen:
|
|
seen.add(k)
|
|
yield k
|
|
|
|
def keys(self):
|
|
"""Return unique keys."""
|
|
return list(self)
|
|
|
|
def items(self) -> List[Tuple[str, Union[str, UploadedFile]]]:
|
|
"""Return all key-value pairs."""
|
|
return list(self._data)
|
|
|
|
def values(self) -> List[Union[str, UploadedFile]]:
|
|
"""Return all values."""
|
|
return [v for _, v in self._data]
|
|
|
|
def _uploaded_files(self) -> List[UploadedFile]:
|
|
"""Return UploadedFile instances contained in this form."""
|
|
return [v for _, v in self._data if isinstance(v, UploadedFile)]
|
|
|
|
def close(self) -> None:
|
|
"""
|
|
Close any uploaded files.
|
|
|
|
This provides deterministic cleanup for spooled temp files.
|
|
"""
|
|
for uploaded in self._uploaded_files():
|
|
try:
|
|
uploaded.close_sync()
|
|
except Exception:
|
|
# Best-effort cleanup; ignore close errors
|
|
pass
|
|
|
|
async def aclose(self) -> None:
|
|
"""Asynchronously close any uploaded files."""
|
|
for uploaded in self._uploaded_files():
|
|
try:
|
|
await uploaded.close()
|
|
except Exception:
|
|
# Best-effort cleanup; ignore close errors
|
|
pass
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc, tb):
|
|
self.close()
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc, tb):
|
|
await self.aclose()
|
|
|
|
|
|
def parse_content_disposition(header: str) -> Dict[str, Optional[str]]:
|
|
"""
|
|
Parse Content-Disposition header value.
|
|
|
|
Returns dict with 'name', 'filename' keys (filename may be None).
|
|
"""
|
|
result: Dict[str, Optional[str]] = {"name": None, "filename": None}
|
|
|
|
# Split on semicolons, handling quoted strings
|
|
parts = []
|
|
current = ""
|
|
in_quotes = False
|
|
i = 0
|
|
while i < len(header):
|
|
char = header[i]
|
|
if char == '"' and (i == 0 or header[i - 1] != "\\"):
|
|
in_quotes = not in_quotes
|
|
current += char
|
|
elif char == ";" and not in_quotes:
|
|
parts.append(current.strip())
|
|
current = ""
|
|
else:
|
|
current += char
|
|
i += 1
|
|
if current.strip():
|
|
parts.append(current.strip())
|
|
|
|
for part in parts[1:]: # Skip the "form-data" part
|
|
if "=" not in part:
|
|
continue
|
|
|
|
key, _, value = part.partition("=")
|
|
key = key.strip().lower()
|
|
value = value.strip()
|
|
|
|
# Handle filename* (RFC 5987 encoding)
|
|
if key == "filename*":
|
|
# Format: utf-8''encoded_filename or charset'language'encoded_filename
|
|
if "'" in value:
|
|
parts_star = value.split("'", 2)
|
|
if len(parts_star) >= 3:
|
|
# charset = parts_star[0]
|
|
# language = parts_star[1]
|
|
encoded = parts_star[2]
|
|
# URL decode
|
|
try:
|
|
from urllib.parse import unquote
|
|
|
|
result["filename"] = unquote(encoded, encoding="utf-8")
|
|
except Exception:
|
|
pass
|
|
continue
|
|
|
|
# Remove quotes if present
|
|
if value.startswith('"') and value.endswith('"'):
|
|
value = value[1:-1]
|
|
# Unescape backslash sequences
|
|
value = value.replace('\\"', '"').replace("\\\\", "\\")
|
|
|
|
if key == "name":
|
|
result["name"] = value
|
|
elif key == "filename":
|
|
# Only set if filename* hasn't already set it
|
|
if result["filename"] is None:
|
|
# Strip path components (security)
|
|
# Handle both Unix and Windows paths
|
|
value = value.replace("\\", "/")
|
|
if "/" in value:
|
|
value = value.rsplit("/", 1)[-1]
|
|
result["filename"] = value
|
|
|
|
return result
|
|
|
|
|
|
def parse_content_type(header: str) -> Tuple[str, Dict[str, str]]:
|
|
"""
|
|
Parse Content-Type header value.
|
|
|
|
Returns (media_type, parameters_dict).
|
|
"""
|
|
parts = header.split(";")
|
|
media_type = parts[0].strip().lower()
|
|
params = {}
|
|
|
|
for part in parts[1:]:
|
|
part = part.strip()
|
|
if "=" in part:
|
|
key, _, value = part.partition("=")
|
|
key = key.strip().lower()
|
|
value = value.strip()
|
|
# Remove quotes if present
|
|
if value.startswith('"') and value.endswith('"'):
|
|
value = value[1:-1]
|
|
params[key] = value
|
|
|
|
return media_type, params
|
|
|
|
|
|
class MultipartParser:
|
|
"""
|
|
Streaming multipart/form-data parser.
|
|
|
|
Processes the body chunk by chunk without loading everything into memory.
|
|
"""
|
|
|
|
# Parser states
|
|
STATE_PREAMBLE = 0
|
|
STATE_HEADER = 1
|
|
STATE_BODY = 2
|
|
STATE_DONE = 3
|
|
|
|
def __init__(
|
|
self,
|
|
boundary: bytes,
|
|
max_file_size: int = DEFAULT_MAX_FILE_SIZE,
|
|
max_request_size: int = DEFAULT_MAX_REQUEST_SIZE,
|
|
max_fields: int = DEFAULT_MAX_FIELDS,
|
|
max_files: int = DEFAULT_MAX_FILES,
|
|
max_parts: Optional[int] = DEFAULT_MAX_PARTS,
|
|
max_field_size: int = DEFAULT_MAX_FIELD_SIZE,
|
|
max_memory_file_size: int = DEFAULT_MAX_MEMORY_FILE_SIZE,
|
|
max_part_header_bytes: int = DEFAULT_MAX_PART_HEADER_BYTES,
|
|
max_part_header_lines: int = DEFAULT_MAX_PART_HEADER_LINES,
|
|
min_free_disk_bytes: int = DEFAULT_MIN_FREE_DISK_BYTES,
|
|
handle_files: bool = False,
|
|
):
|
|
self.boundary = b"--" + boundary
|
|
self.end_boundary = self.boundary + b"--"
|
|
self.max_file_size = max_file_size
|
|
self.max_request_size = max_request_size
|
|
self.max_fields = max_fields
|
|
self.max_files = max_files
|
|
# If not specified, tie max_parts to the other cardinality limits
|
|
if max_parts is None:
|
|
max_parts = max_fields + max_files
|
|
self.max_parts = max_parts
|
|
self.max_field_size = max_field_size
|
|
self.max_memory_file_size = max_memory_file_size
|
|
self.max_part_header_bytes = max_part_header_bytes
|
|
self.max_part_header_lines = max_part_header_lines
|
|
self.min_free_disk_bytes = min_free_disk_bytes
|
|
self.handle_files = handle_files
|
|
|
|
self.state = self.STATE_PREAMBLE
|
|
self.buffer = bytearray()
|
|
self.total_bytes = 0
|
|
self.field_count = 0
|
|
self.file_count = 0
|
|
self.part_count = 0
|
|
self.current_part_size = 0
|
|
self.current_header_bytes = 0
|
|
self.current_header_lines = 0
|
|
|
|
self.form_data = FormData()
|
|
self._disk_check_interval_bytes = 1024 * 1024 # 1MB between disk checks
|
|
self._bytes_since_disk_check = 0
|
|
self._tempdir = tempfile.gettempdir()
|
|
|
|
# Current part state
|
|
self.current_headers: Dict[str, str] = {}
|
|
self.current_file: Optional[tempfile.SpooledTemporaryFile] = None
|
|
self.current_body = bytearray()
|
|
self.current_name: Optional[str] = None
|
|
self.current_filename: Optional[str] = None
|
|
self.current_content_type: Optional[str] = None
|
|
|
|
def feed(self, chunk: bytes) -> None:
|
|
"""Feed a chunk of data to the parser."""
|
|
self.total_bytes += len(chunk)
|
|
if self.total_bytes > self.max_request_size:
|
|
raise MultipartParseError("Request body too large")
|
|
|
|
self.buffer.extend(chunk)
|
|
self._process()
|
|
|
|
def _process(self) -> None:
|
|
"""Process buffered data."""
|
|
while True:
|
|
if self.state == self.STATE_PREAMBLE:
|
|
if not self._process_preamble():
|
|
break
|
|
elif self.state == self.STATE_HEADER:
|
|
if not self._process_header():
|
|
break
|
|
elif self.state == self.STATE_BODY:
|
|
if not self._process_body():
|
|
break
|
|
elif self.state == self.STATE_DONE:
|
|
break
|
|
|
|
def _process_preamble(self) -> bool:
|
|
"""Skip preamble and find first boundary."""
|
|
# Look for boundary (could be at start or after preamble)
|
|
# Try both \r\n prefixed and bare boundary at start
|
|
idx = self.buffer.find(self.boundary)
|
|
if idx == -1:
|
|
# Keep potential partial boundary at end
|
|
keep = len(self.boundary) - 1
|
|
if len(self.buffer) > keep:
|
|
self.buffer = self.buffer[-keep:]
|
|
return False
|
|
|
|
# Found boundary, skip to after it
|
|
after_boundary = idx + len(self.boundary)
|
|
|
|
# Check for end boundary
|
|
if self.buffer[idx : idx + len(self.end_boundary)] == self.end_boundary:
|
|
self.state = self.STATE_DONE
|
|
return False
|
|
|
|
# Skip CRLF or LF after boundary
|
|
if after_boundary < len(self.buffer):
|
|
if self.buffer[after_boundary : after_boundary + 2] == b"\r\n":
|
|
after_boundary += 2
|
|
elif self.buffer[after_boundary : after_boundary + 1] == b"\n":
|
|
after_boundary += 1
|
|
|
|
self.buffer = self.buffer[after_boundary:]
|
|
self.state = self.STATE_HEADER
|
|
self.current_headers = {}
|
|
self.current_header_bytes = 0
|
|
self.current_header_lines = 0
|
|
return True
|
|
|
|
def _process_header(self) -> bool:
|
|
"""Parse part headers."""
|
|
while True:
|
|
# Look for end of header line
|
|
crlf_idx = self.buffer.find(b"\r\n")
|
|
lf_idx = self.buffer.find(b"\n")
|
|
|
|
if crlf_idx == -1 and lf_idx == -1:
|
|
# Guard against unbounded header buffering if no newline is ever sent
|
|
if len(self.buffer) > self.max_part_header_bytes:
|
|
raise MultipartParseError("Part headers too large")
|
|
return False # Need more data
|
|
|
|
# Use whichever comes first
|
|
if crlf_idx != -1 and (lf_idx == -1 or crlf_idx < lf_idx):
|
|
idx = crlf_idx
|
|
line_end_len = 2
|
|
else:
|
|
idx = lf_idx
|
|
line_end_len = 1
|
|
|
|
line = self.buffer[:idx]
|
|
self.buffer = self.buffer[idx + line_end_len :]
|
|
|
|
self.current_header_lines += 1
|
|
self.current_header_bytes += idx + line_end_len
|
|
if (
|
|
self.current_header_lines > self.max_part_header_lines
|
|
or self.current_header_bytes > self.max_part_header_bytes
|
|
):
|
|
raise MultipartParseError("Part headers too large")
|
|
|
|
if not line:
|
|
# Empty line = end of headers
|
|
self._start_body()
|
|
self.state = self.STATE_BODY
|
|
return True
|
|
|
|
# Parse header
|
|
try:
|
|
line_str = line.decode("utf-8", errors="replace")
|
|
except Exception:
|
|
line_str = line.decode("latin-1")
|
|
|
|
if ":" in line_str:
|
|
name, _, value = line_str.partition(":")
|
|
self.current_headers[name.strip().lower()] = value.strip()
|
|
|
|
def _start_body(self) -> None:
|
|
"""Initialize body parsing for current part."""
|
|
self.part_count += 1
|
|
if self.part_count > self.max_parts:
|
|
raise MultipartParseError("Too many parts")
|
|
|
|
# Parse Content-Disposition
|
|
cd = self.current_headers.get("content-disposition", "")
|
|
parsed = parse_content_disposition(cd)
|
|
self.current_name = parsed.get("name")
|
|
self.current_filename = parsed.get("filename")
|
|
self.current_content_type = self.current_headers.get("content-type")
|
|
self.current_part_size = 0
|
|
|
|
if self.current_filename is not None:
|
|
# It's a file
|
|
self.file_count += 1
|
|
if self.file_count > self.max_files:
|
|
raise MultipartParseError("Too many files")
|
|
if self.handle_files:
|
|
self.current_file = tempfile.SpooledTemporaryFile(
|
|
max_size=self.max_memory_file_size
|
|
)
|
|
else:
|
|
# Will discard file content
|
|
self.current_file = None
|
|
else:
|
|
# It's a text field
|
|
self.field_count += 1
|
|
if self.field_count > self.max_fields:
|
|
raise MultipartParseError("Too many fields")
|
|
self.current_body = bytearray()
|
|
self.current_file = None
|
|
|
|
# Check disk space before allocating a spooled temp file
|
|
if self.current_filename is not None and self.handle_files:
|
|
self._ensure_disk_space()
|
|
|
|
def _process_body(self) -> bool:
|
|
"""Process body data for current part."""
|
|
# Look for boundary in buffer
|
|
# Need to handle boundary potentially split across chunks
|
|
|
|
# The boundary is preceded by \r\n (or \n for lenient parsing)
|
|
search_boundary = b"\r\n" + self.boundary
|
|
|
|
idx = self.buffer.find(search_boundary)
|
|
if idx == -1:
|
|
# Try LF-only boundary (lenient)
|
|
search_boundary_lf = b"\n" + self.boundary
|
|
idx = self.buffer.find(search_boundary_lf)
|
|
if idx != -1:
|
|
search_boundary = search_boundary_lf
|
|
|
|
if idx == -1:
|
|
# No boundary found yet
|
|
# Keep potential partial boundary at end of buffer
|
|
safe_len = len(self.buffer) - len(search_boundary) - 1
|
|
if safe_len > 0:
|
|
safe_data = self.buffer[:safe_len]
|
|
self._write_body_data(bytes(safe_data))
|
|
self.buffer = self.buffer[safe_len:]
|
|
return False
|
|
|
|
# Found boundary - write remaining body data
|
|
body_data = self.buffer[:idx]
|
|
self._write_body_data(bytes(body_data))
|
|
|
|
# Move past the boundary
|
|
after_boundary = idx + len(search_boundary)
|
|
|
|
# Check for end boundary
|
|
remaining = self.buffer[after_boundary:]
|
|
if remaining.startswith(b"--"):
|
|
# End boundary
|
|
self._finish_part()
|
|
self.state = self.STATE_DONE
|
|
return False
|
|
|
|
# Skip CRLF or LF after boundary
|
|
if remaining.startswith(b"\r\n"):
|
|
after_boundary += 2
|
|
elif remaining.startswith(b"\n"):
|
|
after_boundary += 1
|
|
|
|
self.buffer = self.buffer[after_boundary:]
|
|
self._finish_part()
|
|
self.state = self.STATE_HEADER
|
|
self.current_headers = {}
|
|
self.current_header_bytes = 0
|
|
self.current_header_lines = 0
|
|
return True
|
|
|
|
def _write_body_data(self, data: bytes) -> None:
|
|
"""Write data to current part body."""
|
|
if not data:
|
|
return
|
|
|
|
self.current_part_size += len(data)
|
|
|
|
if self.current_filename is not None:
|
|
# File data
|
|
if self.current_part_size > self.max_file_size:
|
|
raise MultipartParseError("File too large")
|
|
if self.handle_files and self.current_file:
|
|
self._bytes_since_disk_check += len(data)
|
|
if self._bytes_since_disk_check >= self._disk_check_interval_bytes:
|
|
self._ensure_disk_space()
|
|
self._bytes_since_disk_check = 0
|
|
self.current_file.write(data)
|
|
# else: discard file data
|
|
else:
|
|
# Field data
|
|
if self.current_part_size > self.max_field_size:
|
|
raise MultipartParseError("Field value too large")
|
|
self.current_body.extend(data)
|
|
|
|
def _finish_part(self) -> None:
|
|
"""Finalize current part and add to form data."""
|
|
if self.current_name is None:
|
|
return
|
|
|
|
if self.current_filename is not None:
|
|
# File
|
|
if self.handle_files and self.current_file:
|
|
self.current_file.seek(0)
|
|
uploaded = UploadedFile(
|
|
name=self.current_name,
|
|
filename=self.current_filename,
|
|
content_type=self.current_content_type,
|
|
size=self.current_part_size,
|
|
_file=self.current_file,
|
|
)
|
|
self.form_data.append(self.current_name, uploaded)
|
|
# else: file was discarded
|
|
else:
|
|
# Text field
|
|
try:
|
|
value = bytes(self.current_body).decode("utf-8")
|
|
except UnicodeDecodeError:
|
|
value = bytes(self.current_body).decode("latin-1")
|
|
self.form_data.append(self.current_name, value)
|
|
|
|
# Reset part state
|
|
self.current_file = None
|
|
self.current_body = bytearray()
|
|
self.current_name = None
|
|
self.current_filename = None
|
|
self.current_content_type = None
|
|
|
|
def finalize(self) -> FormData:
|
|
"""Finalize parsing and return form data."""
|
|
# Process any remaining data
|
|
self._process()
|
|
if self.state != self.STATE_DONE:
|
|
raise MultipartParseError(
|
|
"Truncated multipart body (missing closing boundary)"
|
|
)
|
|
return self.form_data
|
|
|
|
def _ensure_disk_space(self) -> None:
|
|
"""
|
|
Ensure there is enough free space on the temp filesystem.
|
|
|
|
This is a best-effort guard against filling the disk with uploads.
|
|
"""
|
|
if not self.handle_files:
|
|
return
|
|
if self.min_free_disk_bytes <= 0:
|
|
return
|
|
free_bytes = shutil.disk_usage(self._tempdir).free
|
|
if free_bytes < self.min_free_disk_bytes:
|
|
raise MultipartParseError("Insufficient disk space for uploads")
|
|
|
|
|
|
async def parse_form_data(
|
|
receive: Callable,
|
|
content_type: str,
|
|
files: bool = False,
|
|
max_file_size: int = DEFAULT_MAX_FILE_SIZE,
|
|
max_request_size: int = DEFAULT_MAX_REQUEST_SIZE,
|
|
max_fields: int = DEFAULT_MAX_FIELDS,
|
|
max_files: int = DEFAULT_MAX_FILES,
|
|
max_parts: Optional[int] = DEFAULT_MAX_PARTS,
|
|
max_field_size: int = DEFAULT_MAX_FIELD_SIZE,
|
|
max_memory_file_size: int = DEFAULT_MAX_MEMORY_FILE_SIZE,
|
|
max_part_header_bytes: int = DEFAULT_MAX_PART_HEADER_BYTES,
|
|
max_part_header_lines: int = DEFAULT_MAX_PART_HEADER_LINES,
|
|
min_free_disk_bytes: int = DEFAULT_MIN_FREE_DISK_BYTES,
|
|
) -> FormData:
|
|
"""
|
|
Parse form data from an ASGI receive callable.
|
|
|
|
Supports both application/x-www-form-urlencoded and multipart/form-data.
|
|
|
|
Args:
|
|
receive: ASGI receive callable
|
|
content_type: Content-Type header value
|
|
files: If True, store file uploads; if False, discard them
|
|
max_file_size: Maximum size per file in bytes
|
|
max_request_size: Maximum total request size in bytes
|
|
max_fields: Maximum number of form fields
|
|
max_files: Maximum number of file uploads
|
|
max_field_size: Maximum size of a text field value
|
|
max_memory_file_size: File size threshold before spilling to disk
|
|
|
|
Returns:
|
|
FormData object containing parsed fields and files
|
|
"""
|
|
media_type, params = parse_content_type(content_type)
|
|
|
|
if media_type == "application/x-www-form-urlencoded":
|
|
# Read entire body for URL-encoded forms (they're typically small)
|
|
body = bytearray()
|
|
total = 0
|
|
while True:
|
|
message = await receive()
|
|
message_type = message.get("type")
|
|
if message_type == "http.disconnect":
|
|
raise MultipartParseError("Client disconnected during request body")
|
|
if message_type is not None and message_type != "http.request":
|
|
continue
|
|
chunk = message.get("body", b"")
|
|
total += len(chunk)
|
|
if total > max_request_size:
|
|
raise MultipartParseError("Request body too large")
|
|
body.extend(chunk)
|
|
if not message.get("more_body", False):
|
|
break
|
|
|
|
form_data = FormData()
|
|
try:
|
|
pairs = parse_qsl(bytes(body).decode("utf-8"), keep_blank_values=True)
|
|
except UnicodeDecodeError:
|
|
pairs = parse_qsl(bytes(body).decode("latin-1"), keep_blank_values=True)
|
|
|
|
for key, value in pairs:
|
|
form_data.append(key, value)
|
|
|
|
return form_data
|
|
|
|
elif media_type == "multipart/form-data":
|
|
boundary = params.get("boundary")
|
|
if not boundary:
|
|
raise MultipartParseError("Missing boundary in Content-Type")
|
|
|
|
parser = MultipartParser(
|
|
boundary=boundary.encode("utf-8"),
|
|
max_file_size=max_file_size,
|
|
max_request_size=max_request_size,
|
|
max_fields=max_fields,
|
|
max_files=max_files,
|
|
max_parts=max_parts,
|
|
max_field_size=max_field_size,
|
|
max_memory_file_size=max_memory_file_size,
|
|
max_part_header_bytes=max_part_header_bytes,
|
|
max_part_header_lines=max_part_header_lines,
|
|
min_free_disk_bytes=min_free_disk_bytes,
|
|
handle_files=files,
|
|
)
|
|
|
|
# Stream body through parser
|
|
batch_target = 64 * 1024
|
|
batch = bytearray()
|
|
|
|
async def flush_batch() -> None:
|
|
if batch:
|
|
data = bytes(batch)
|
|
batch.clear()
|
|
await asyncio.to_thread(parser.feed, data)
|
|
|
|
while True:
|
|
message = await receive()
|
|
message_type = message.get("type")
|
|
if message_type == "http.disconnect":
|
|
raise MultipartParseError("Client disconnected during request body")
|
|
if message_type is not None and message_type != "http.request":
|
|
continue
|
|
chunk = message.get("body", b"")
|
|
if chunk:
|
|
batch.extend(chunk)
|
|
if len(batch) >= batch_target:
|
|
await flush_batch()
|
|
if not message.get("more_body", False):
|
|
break
|
|
|
|
await flush_batch()
|
|
return await asyncio.to_thread(parser.finalize)
|
|
|
|
else:
|
|
raise MultipartParseError(
|
|
f"Unsupported Content-Type: {media_type}. "
|
|
"Expected application/x-www-form-urlencoded or multipart/form-data"
|
|
)
|