Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import annotations

import base64
import dataclasses
import json
import logging
import warnings
from typing import TYPE_CHECKING, Any, Callable, Mapping, MutableMapping, Sequence, Union, cast
from urllib.parse import parse_qs

Expand All @@ -25,7 +27,7 @@
RequestValidationError,
ResponseValidationError,
)
from aws_lambda_powertools.event_handler.openapi.params import Param
from aws_lambda_powertools.event_handler.openapi.params import Param, UploadFile
from aws_lambda_powertools.event_handler.openapi.types import UnionType

if TYPE_CHECKING:
Expand All @@ -44,6 +46,7 @@
CONTENT_DISPOSITION_NAME_PARAM = "name="
APPLICATION_JSON_CONTENT_TYPE = "application/json"
APPLICATION_FORM_CONTENT_TYPE = "application/x-www-form-urlencoded"
MULTIPART_FORM_DATA_CONTENT_TYPE = "multipart/form-data"


class OpenAPIRequestValidationMiddleware(BaseMiddlewareHandler):
Expand Down Expand Up @@ -134,14 +137,18 @@
elif content_type.startswith(APPLICATION_FORM_CONTENT_TYPE):
return self._parse_form_data(app)

# Handle multipart/form-data (file uploads)
elif content_type.startswith(MULTIPART_FORM_DATA_CONTENT_TYPE):
return self._parse_multipart_data(app, content_type)

else:
raise RequestUnsupportedContentType(
"Only JSON body or Form() are supported",
"Unsupported content type",
errors=[
{
"type": "unsupported_content_type",
"loc": ("body",),
"msg": "Only JSON body or Form() are supported",
"msg": f"Unsupported content type: {content_type}",
"input": {},
"ctx": {},
},
Expand Down Expand Up @@ -188,6 +195,49 @@
],
) from e

def _parse_multipart_data(self, app: EventHandlerInstance, content_type: str) -> dict[str, Any]:
"""Parse multipart/form-data from the request body (file uploads)."""
try:
# Extract the boundary from the content-type header
boundary = _extract_multipart_boundary(content_type)
if not boundary:
raise ValueError("Missing boundary in multipart/form-data content-type header")

# Get raw body bytes
raw_body = app.current_event.body or ""
if app.current_event.is_base64_encoded:
body_bytes = base64.b64decode(raw_body)
else:
warnings.warn(
"Received multipart/form-data without base64 encoding. "
"Binary file uploads may be corrupted. "
"If using API Gateway REST API (v1), configure Binary Media Types "
"to include 'multipart/form-data'. "
"See: https://docs.aws.amazon.com/apigateway/latest/developerguide/"
"api-gateway-payload-encodings.html",
stacklevel=2,
)
# Use latin-1 to preserve all byte values (0-255) since the body
# may contain raw binary data that isn't valid UTF-8
body_bytes = raw_body.encode("latin-1")

return _parse_multipart_body(body_bytes, boundary)

except ValueError:
raise
except Exception as e:
raise RequestValidationError(
[
{
"type": "multipart_invalid",
"loc": ("body",),
"msg": "Multipart form data parsing error",
"input": {},
"ctx": {"error": str(e)},
},
],
) from e


class OpenAPIResponseValidationMiddleware(BaseMiddlewareHandler):
"""
Expand Down Expand Up @@ -391,7 +441,12 @@
continue

value = _normalize_field_value(value=value, field_info=field.field_info)
values[field.name] = _validate_field(field=field, value=value, loc=loc, existing_errors=errors)

# UploadFile objects bypass Pydantic validation — they're already constructed
if isinstance(value, UploadFile):
values[field.name] = value
else:
values[field.name] = _validate_field(field=field, value=value, loc=loc, existing_errors=errors)

return values, errors

Expand Down Expand Up @@ -467,6 +522,10 @@

def _normalize_field_value(value: Any, field_info: FieldInfo) -> Any:
"""Normalize field value, converting lists to single values for non-sequence fields."""
# When annotation is bytes but value is UploadFile, extract raw content
if isinstance(value, UploadFile) and field_info.annotation is bytes:
return value.content

if _is_or_contains_sequence(field_info.annotation):
return value
elif isinstance(value, list) and value:
Expand Down Expand Up @@ -580,3 +639,106 @@
value = input_dict.get(field_name)

return value


def _extract_multipart_boundary(content_type: str) -> str | None:
"""Extract the boundary string from a multipart/form-data content-type header."""
for segment in content_type.split(";"):
stripped = segment.strip()
if stripped.startswith("boundary="):
boundary = stripped[len("boundary=") :]
# Remove optional quotes around boundary
if boundary.startswith('"') and boundary.endswith('"'):
boundary = boundary[1:-1]
return boundary
return None


def _parse_multipart_body(body: bytes, boundary: str) -> dict[str, Any]:

Check failure on line 657 in aws_lambda_powertools/event_handler/middlewares/openapi_validation.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 30 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=aws-powertools_powertools-lambda-python&issues=AZ1TEg-NG3MuLJLzyLsR&open=AZ1TEg-NG3MuLJLzyLsR&pullRequest=8093
"""
Parse a multipart/form-data body into a dict of field names to values.

File fields get bytes values; regular form fields get string values.
Multiple values for the same field name are collected into lists.
"""
delimiter = f"--{boundary}".encode()
end_delimiter = f"--{boundary}--".encode()

result: dict[str, Any] = {}

# Split body by the boundary delimiter
raw_parts = body.split(delimiter)

for raw_part in raw_parts:
# Skip the preamble (before first boundary) and epilogue (after closing boundary)
if not raw_part or raw_part.strip() == b"" or raw_part.strip() == b"--":
continue

# Remove the end delimiter marker if present
chunk = raw_part
if chunk.endswith(end_delimiter):
chunk = chunk[: -len(end_delimiter)]

# Strip leading \r\n
if chunk.startswith(b"\r\n"):
chunk = chunk[2:]

# Strip trailing \r\n
if chunk.endswith(b"\r\n"):
chunk = chunk[:-2]

# Split headers from body at the double CRLF
header_end = chunk.find(b"\r\n\r\n")
if header_end == -1:
continue

header_section = chunk[:header_end].decode("utf-8")
body_section = chunk[header_end + 4 :]

# Parse Content-Disposition to get the field name and optional filename
field_name = None
filename = None
content_type_header = None

for header_line in header_section.split("\r\n"):
header_lower = header_line.lower()
if header_lower.startswith("content-disposition:"):
field_name = _extract_header_param(header_line, "name")
filename = _extract_header_param(header_line, "filename")
elif header_lower.startswith("content-type:"):
content_type_header = header_line.split(":", 1)[1].strip()

if field_name is None:
continue

# If it has a filename, it's a file upload — wrap as UploadFile
# Otherwise it's a regular form field — decode to string
if filename is not None:
value: Any = UploadFile(content=body_section, filename=filename, content_type=content_type_header)
else:
value = body_section.decode("utf-8")

# Collect multiple values for same field name into a list
if field_name in result:
existing = result[field_name]
if isinstance(existing, list):
existing.append(value)
else:
result[field_name] = [existing, value]
else:
result[field_name] = value

return result


def _extract_header_param(header_line: str, param_name: str) -> str | None:
"""Extract a parameter value from a header line (e.g., name="file" from Content-Disposition)."""
search = f'{param_name}="'
idx = header_line.find(search)
if idx == -1:
return None
start = idx + len(search)
end = header_line.find('"', start)
if end == -1:
return None
return header_line[start:end]
8 changes: 4 additions & 4 deletions aws_lambda_powertools/event_handler/openapi/dependant.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
from aws_lambda_powertools.event_handler.openapi.params import (
Body,
Dependant,
File,
Form,
Param,
ParamTypes,
_File,
analyze_param,
create_response_field,
get_flat_dependant,
Expand Down Expand Up @@ -370,9 +370,9 @@ def get_body_field_info(
if not required:
body_field_info_kwargs["default"] = None

if any(isinstance(f.field_info, _File) for f in flat_dependant.body_params):
# MAINTENANCE: body_field_info: type[Body] = _File
raise NotImplementedError("_File fields are not supported in request bodies")
if any(isinstance(f.field_info, File) for f in flat_dependant.body_params):
body_field_info = Body
body_field_info_kwargs["media_type"] = "multipart/form-data"
elif any(isinstance(f.field_info, Form) for f in flat_dependant.body_params):
body_field_info = Body
body_field_info_kwargs["media_type"] = "application/x-www-form-urlencoded"
Expand Down
52 changes: 51 additions & 1 deletion aws_lambda_powertools/event_handler/openapi/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,57 @@ def __init__(
)


class _File(Form): # type: ignore[misc]
class UploadFile:
"""
Represents an uploaded file with its metadata.

Use with ``Annotated[UploadFile, File()]`` to receive file content along with
filename and content type. For raw bytes only, use ``Annotated[bytes, File()]``.

Attributes
----------
filename : str | None
The original filename from the upload.
content_type : str | None
The MIME type declared by the client (e.g. ``image/jpeg``).
content : bytes
The raw file content.
"""

__slots__ = ("content", "content_type", "filename")

def __init__(self, *, content: bytes, filename: str | None = None, content_type: str | None = None):
self.content = content
self.filename = filename
self.content_type = content_type

def __len__(self) -> int:
return len(self.content)

def __repr__(self) -> str:
return f"UploadFile(filename={self.filename!r}, content_type={self.content_type!r}, size={len(self.content)})"

@classmethod
def __get_pydantic_core_schema__(cls, _source_type: Any, _handler: Any) -> Any:
from pydantic_core import core_schema

return core_schema.no_info_plain_validator_function(
cls._validate,
serialization=core_schema.plain_serializer_function_ser_schema(lambda v: v, info_arg=False),
)

@classmethod
def _validate(cls, v: Any) -> UploadFile:
if isinstance(v, cls):
return v
raise ValueError(f"Expected UploadFile, got {type(v).__name__}")

@classmethod
def __get_pydantic_json_schema__(cls, _schema: Any, handler: Any) -> dict[str, Any]:
return {"type": "string", "format": "binary"}


class File(Form): # type: ignore[misc]
"""
A class used to represent a file parameter in a path operation.
"""
Expand Down
51 changes: 51 additions & 0 deletions docs/core/event_handler/api_gateway.md
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,57 @@ You can use the `Form` type to tell the Event Handler that a parameter expects f
--8<-- "examples/event_handler_rest/src/working_with_form_data.py"
```

#### Handling file uploads

!!! info "You must set `enable_validation=True` to handle file uploads via type annotation."

You can use the `File` type to accept `multipart/form-data` file uploads. This automatically sets the correct OpenAPI schema, and Swagger UI will render a file picker for each `File()` parameter.

There are two ways to receive uploaded files:

* **`bytes`** — receive raw file content only
* **`UploadFile`** — receive file content along with metadata (filename, content type)

=== "working_with_file_uploads.py"

```python hl_lines="4 12"
--8<-- "examples/event_handler_rest/src/working_with_file_uploads.py"
```

1. `File` is a special OpenAPI type for `multipart/form-data` file uploads. When annotated as `bytes`, you receive the raw file content.

=== "working_with_file_uploads_metadata.py"

```python hl_lines="4 11 15-16"
--8<-- "examples/event_handler_rest/src/working_with_file_uploads_metadata.py"
```

1. Using `UploadFile` instead of `bytes` gives you access to file metadata.
2. `filename` and `content_type` come from the multipart headers sent by the client.

=== "working_with_file_uploads_mixed.py"

You can combine `File()` and `Form()` parameters in the same route to accept file uploads with additional form fields.

```python hl_lines="6 14-15"
--8<-- "examples/event_handler_rest/src/working_with_file_uploads_mixed.py"
```

1. File upload parameter — receives the uploaded file with metadata.
2. Regular form field — receives a string value from the same multipart request.

!!! warning "API Gateway REST API (v1) requires Binary Media Types configuration"
When using API Gateway REST API (v1), you must configure Binary Media Types to include `multipart/form-data`, otherwise binary file content will be corrupted.

```yaml title="SAM template.yaml"
Globals:
Api:
BinaryMediaTypes:
- "multipart~1form-data"
```

API Gateway HTTP API (v2), Lambda Function URL, and ALB handle binary encoding automatically — no extra configuration needed.

#### Supported types for response serialization

With data validation enabled, we natively support serializing the following data types to JSON:
Expand Down
17 changes: 17 additions & 0 deletions examples/event_handler_rest/src/working_with_file_uploads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import Annotated

from aws_lambda_powertools.event_handler import APIGatewayRestResolver
from aws_lambda_powertools.event_handler.openapi.params import File

app = APIGatewayRestResolver(enable_validation=True)


@app.post("/upload")
def upload_file(
file_data: Annotated[bytes, File(description="File to upload")], # (1)!
):
return {"file_size": len(file_data)}


def lambda_handler(event, context):
return app.resolve(event, context)
Loading
Loading