From c4fe4d3cebc08c660e57dd709af1ffa7059b3177 Mon Sep 17 00:00:00 2001 From: Marcelo Trylesinski Date: Sun, 1 Dec 2024 07:59:34 +0100 Subject: [PATCH] Don't warn when CRLF is found after last boundary (#193) --- CHANGELOG.md | 4 ++++ python_multipart/__init__.py | 2 +- python_multipart/multipart.py | 4 ++++ scripts/check | 2 +- tests/test_multipart.py | 26 ++++++++++++++++++++++++++ 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c7faf0..50074c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 0.0.19 (2024-11-30) + +* Don't warn when CRLF is found after last boundary on `MultipartParser` [#193](https://github.com/Kludex/python-multipart/pull/193). + ## 0.0.18 (2024-11-28) * Hard break if found data after last boundary on `MultipartParser` [#189](https://github.com/Kludex/python-multipart/pull/189). diff --git a/python_multipart/__init__.py b/python_multipart/__init__.py index 69a3ed4..d555f80 100644 --- a/python_multipart/__init__.py +++ b/python_multipart/__init__.py @@ -2,7 +2,7 @@ __author__ = "Andrew Dunham" __license__ = "Apache" __copyright__ = "Copyright (c) 2012-2013, Andrew Dunham" -__version__ = "0.0.18" +__version__ = "0.0.19" from .multipart import ( BaseParser, diff --git a/python_multipart/multipart.py b/python_multipart/multipart.py index be76d24..a996379 100644 --- a/python_multipart/multipart.py +++ b/python_multipart/multipart.py @@ -1397,6 +1397,10 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No i -= 1 elif state == MultipartState.END: + # Don't do anything if chunk ends with CRLF. + if c == CR and i + 1 < length and data[i + 1] == LF: + i += 2 + continue # Skip data after the last boundary. self.logger.warning("Skipping data after last boundary") i = length diff --git a/scripts/check b/scripts/check index 13ce9ed..bc37333 100755 --- a/scripts/check +++ b/scripts/check @@ -6,5 +6,5 @@ SOURCE_FILES="python_multipart multipart tests" uvx ruff format --check --diff $SOURCE_FILES uvx ruff check $SOURCE_FILES -uvx --with types-PyYAML mypy $SOURCE_FILES +uv run mypy $SOURCE_FILES uvx check-sdist diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 7fbeff7..ce92ff4 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import os import random import sys @@ -9,6 +10,7 @@ from typing import TYPE_CHECKING, cast from unittest.mock import Mock +import pytest import yaml from python_multipart.decoders import Base64Decoder, QuotedPrintableDecoder @@ -1248,6 +1250,30 @@ def on_file(f: FileProtocol) -> None: f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary") f.write(data.encode("latin-1")) + @pytest.fixture(autouse=True) + def inject_fixtures(self, caplog: pytest.LogCaptureFixture) -> None: + self._caplog = caplog + + def test_multipart_parser_data_end_with_crlf_without_warnings(self) -> None: + """This test makes sure that the parser does not handle when the data ends with a CRLF.""" + data = ( + "--boundary\r\n" + 'Content-Disposition: form-data; name="file"; filename="filename.txt"\r\n' + "Content-Type: text/plain\r\n\r\n" + "hello\r\n" + "--boundary--\r\n" + ) + + files: list[File] = [] + + def on_file(f: FileProtocol) -> None: + files.append(cast(File, f)) + + f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary") + with self._caplog.at_level(logging.WARNING): + f.write(data.encode("latin-1")) + assert len(self._caplog.records) == 0 + def test_max_size_multipart(self) -> None: # Load test data. test_file = "single_field_single_file.http"