From fe899d0cdf767a7289a8bf746b7f72c2907a1b4b Mon Sep 17 00:00:00 2001 From: pgjones Date: Sun, 29 Jan 2023 15:17:37 +0000 Subject: [PATCH] limit the maximum number of multipart form parts Add a limit to the number of multipart form data parts the parser will attempt to parse. If the limit is exceeded, it raises `RequestEntityTooLargeError`. A default of 1000 seems large enough to allow legitimate use cases while preventing the previous unlimited parsing. This differs from similar settings that are unset by default, as I think safe defaults are better practice. The limit can be adjusted per request by changing it on the request object before parsing. For example, it can be set based on what you expect a given endpoint to handle. ```python req.max_form_parts = 20 form = req.form ``` --- CHANGES.rst | 4 ++++ src/werkzeug/formparser.py | 13 ++++++++++++- src/werkzeug/sansio/multipart.py | 7 +++++++ src/werkzeug/wrappers/request.py | 11 +++++++++++ tests/test_formparser.py | 9 +++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 470f70cf7..0d89c6d01 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -21,6 +21,10 @@ Unreleased the requested size in one ``read`` call. :issue:`2558` - A cookie header that starts with ``=`` is treated as an empty key and discarded, rather than stripping the leading ``==``. +- Specify a maximum number of multipart parts, default 100, after + which a RequestEntityTooLarge exception is raised on parsing. The + mitigates a DOS attack whereby a larger number file/form parts are + sent resulting in a heavy parsing cost. Version 2.2.2 diff --git a/src/werkzeug/formparser.py b/src/werkzeug/formparser.py index 10d58ca3f..820190be2 100644 --- a/src/werkzeug/formparser.py +++ b/src/werkzeug/formparser.py @@ -179,6 +179,10 @@ class FormDataParser: :param cls: an optional dict class to use. If this is not specified or `None` the default :class:`MultiDict` is used. :param silent: If set to False parsing errors will not be caught. + :param max_form_parts: the maximum number of parts to be accepted for the + multipart data sent. If this is exceeded an + :exc:`~exceptions.RequestEntityTooLarge` exception + is raised. """ def __init__( @@ -190,6 +194,7 @@ def __init__( max_content_length: t.Optional[int] = None, cls: t.Optional[t.Type[MultiDict]] = None, silent: bool = True, + max_form_parts: t.Optional[int] = None, ) -> None: if stream_factory is None: stream_factory = default_stream_factory @@ -205,6 +210,7 @@ def __init__( self.cls = cls self.silent = silent + self.max_form_parts = max_form_parts def get_parse_func( self, mimetype: str, options: t.Dict[str, str] @@ -281,6 +287,7 @@ def _parse_multipart( self.errors, max_form_memory_size=self.max_form_memory_size, cls=self.cls, + max_form_parts=self.max_form_parts, ) boundary = options.get("boundary", "").encode("ascii") @@ -346,10 +353,12 @@ def __init__( max_form_memory_size: t.Optional[int] = None, cls: t.Optional[t.Type[MultiDict]] = None, buffer_size: int = 64 * 1024, + max_form_parts: t.Optional[int] = None, ) -> None: self.charset = charset self.errors = errors self.max_form_memory_size = max_form_memory_size + self.max_form_parts = max_form_parts if stream_factory is None: stream_factory = default_stream_factory @@ -409,7 +418,9 @@ def parse( [None], ) - parser = MultipartDecoder(boundary, self.max_form_memory_size) + parser = MultipartDecoder( + boundary, self.max_form_memory_size, self.max_form_parts + ) fields = [] files = [] diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py index d8abeb354..d90b445b9 100644 --- a/src/werkzeug/sansio/multipart.py +++ b/src/werkzeug/sansio/multipart.py @@ -87,10 +87,12 @@ def __init__( self, boundary: bytes, max_form_memory_size: Optional[int] = None, + max_parts: Optional[int] = None, ) -> None: self.buffer = bytearray() self.complete = False self.max_form_memory_size = max_form_memory_size + self.max_parts = max_parts self.state = State.PREAMBLE self.boundary = boundary @@ -118,6 +120,7 @@ def __init__( re.MULTILINE, ) self._search_position = 0 + self._parts_decoded = 0 def last_newline(self) -> int: try: @@ -191,6 +194,10 @@ def next_event(self) -> Event: ) self.state = State.DATA self._search_position = 0 + self._parts_decoded += 1 + + if self.max_parts is not None and self._parts_decoded > self.max_parts: + raise RequestEntityTooLarge() else: # Update the search start position to be equal to the # current buffer length (already searched) minus a diff --git a/src/werkzeug/wrappers/request.py b/src/werkzeug/wrappers/request.py index 283e59229..fa5b18586 100644 --- a/src/werkzeug/wrappers/request.py +++ b/src/werkzeug/wrappers/request.py @@ -83,6 +83,16 @@ class Request(_SansIORequest): #: .. versionadded:: 0.5 max_form_memory_size: t.Optional[int] = None + #: the maximum number of multipart parts. This is forwarded to teh + #: form data parsing function (:func:`parse_form_data`). When the + #: :attr:`form` or :attr:`files` attribute is accessed and the + #: parsing fails because more parts than the specified value is + #: transmitted a :exc:`~werkzeug.exceptions.RequestEntityTooLarge` + #: exception is raised. + #: + #: .. versionadded:: 2.2.3 + max_form_parts = 1000 + #: The form data parser that should be used. Can be replaced to customize #: the form date parsing. form_data_parser_class: t.Type[FormDataParser] = FormDataParser @@ -246,6 +256,7 @@ def make_form_data_parser(self) -> FormDataParser: self.max_form_memory_size, self.max_content_length, self.parameter_storage_class, + max_form_parts=self.max_form_parts, ) def _load_form_data(self) -> None: diff --git a/tests/test_formparser.py b/tests/test_formparser.py index 49010b46c..4c518b14b 100644 --- a/tests/test_formparser.py +++ b/tests/test_formparser.py @@ -127,6 +127,15 @@ def test_limiting(self): req.max_form_memory_size = 400 assert req.form["foo"] == "Hello World" + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + req.max_form_parts = 1 + pytest.raises(RequestEntityTooLarge, lambda: req.form["foo"]) + def test_missing_multipart_boundary(self): data = ( b"--foo\r\nContent-Disposition: form-field; name=foo\r\n\r\n"