From cacb452215dc0e87bbe9c585edf21878debf0eea Mon Sep 17 00:00:00 2001 From: jianghang Date: Wed, 24 Feb 2021 17:06:14 +0800 Subject: [PATCH 1/7] Fix unicode length --- httpx/_multipart.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index f690afc9ae..945c9ed07b 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -74,6 +74,8 @@ def __init__(self, name: str, value: FileTypes) -> None: except ValueError: filename, fileobj = value # type: ignore content_type = guess_content_type(filename) + if isinstance(fileobj, str): + fileobj = to_bytes(fileobj) else: filename = Path(str(getattr(value, "name", "upload"))).name fileobj = value @@ -87,7 +89,7 @@ def __init__(self, name: str, value: FileTypes) -> None: def get_length(self) -> int: headers = self.render_headers() - if isinstance(self.file, (str, bytes)): + if isinstance(self.file, bytes): return len(headers) + len(self.file) # Let's do our best not to read `file` into memory. @@ -119,8 +121,8 @@ def render_headers(self) -> bytes: return self._headers def render_data(self) -> typing.Iterator[bytes]: - if isinstance(self.file, (str, bytes)): - yield to_bytes(self.file) + if isinstance(self.file, bytes): + yield self.file return if hasattr(self, "_data"): From a60a89e2b52ebb2aedfac2d96dc1f2a1052dff0e Mon Sep 17 00:00:00 2001 From: jianghang Date: Wed, 24 Feb 2021 17:06:24 +0800 Subject: [PATCH 2/7] Add test case --- tests/test_multipart.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 94813932a8..672217f1ed 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -105,7 +105,10 @@ def test_multipart_encode(tmp_path: typing.Any) -> None: "c": ["11", "22", "33"], "d": "", } - files = {"file": ("name.txt", open(path, "rb"))} + files = { + "file": ("name.txt", open(path, "rb")), + "file2": ("file2.txt", "<únicode string>") + } with mock.patch("os.urandom", return_value=os.urandom(16)): boundary = os.urandom(16).hex() @@ -123,8 +126,11 @@ def test_multipart_encode(tmp_path: typing.Any) -> None: '--{0}\r\nContent-Disposition: form-data; name="file";' ' filename="name.txt"\r\n' "Content-Type: text/plain\r\n\r\n\r\n" + '--{0}\r\nContent-Disposition: form-data; name="file2";' + ' filename="file2.txt"\r\n' + "Content-Type: text/plain\r\n\r\n<únicode string>\r\n" "--{0}--\r\n" - "".format(boundary).encode("ascii") + "".format(boundary).encode("utf-8") ) assert headers == { "Content-Type": f"multipart/form-data; boundary={boundary}", From b874f49b8811f236bb94f17439ba2ce83a78b1f2 Mon Sep 17 00:00:00 2001 From: jianghang Date: Wed, 24 Feb 2021 17:13:54 +0800 Subject: [PATCH 3/7] Black --- tests/test_multipart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 672217f1ed..b17b28e342 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -107,7 +107,7 @@ def test_multipart_encode(tmp_path: typing.Any) -> None: } files = { "file": ("name.txt", open(path, "rb")), - "file2": ("file2.txt", "<únicode string>") + "file2": ("file2.txt", "<únicode string>"), } with mock.patch("os.urandom", return_value=os.urandom(16)): From 70f688ce6100659a9f834f0fedaacb78ac59e41b Mon Sep 17 00:00:00 2001 From: jianghang Date: Wed, 24 Feb 2021 17:31:33 +0800 Subject: [PATCH 4/7] Fix test type error --- tests/test_multipart.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_multipart.py b/tests/test_multipart.py index b17b28e342..b9edabf2bc 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -8,6 +8,7 @@ import httpx from httpx._content import encode_request +from httpx._types import RequestFiles from httpx._utils import format_form_param @@ -105,7 +106,7 @@ def test_multipart_encode(tmp_path: typing.Any) -> None: "c": ["11", "22", "33"], "d": "", } - files = { + files: RequestFiles = { "file": ("name.txt", open(path, "rb")), "file2": ("file2.txt", "<únicode string>"), } From 9b27b7b6396cc879262846ee383a7592e0d39654 Mon Sep 17 00:00:00 2001 From: jianghang Date: Wed, 24 Feb 2021 18:33:58 +0800 Subject: [PATCH 5/7] Fix mypy --- httpx/_multipart.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index 945c9ed07b..b28e552f87 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -2,6 +2,7 @@ import os import typing from pathlib import Path +from typing import IO, Union from ._types import FileContent, FileTypes, RequestFiles from ._utils import ( @@ -78,11 +79,11 @@ def __init__(self, name: str, value: FileTypes) -> None: fileobj = to_bytes(fileobj) else: filename = Path(str(getattr(value, "name", "upload"))).name - fileobj = value + fileobj = typing.cast(Union[IO[bytes], IO[str]], value) content_type = guess_content_type(filename) self.filename = filename - self.file = fileobj + self.file: Union[IO[bytes], IO[str], bytes] = fileobj self.content_type = content_type self._consumed = False From cfe9a22a399f7b1ab7d3097e837c21621c8cf225 Mon Sep 17 00:00:00 2001 From: jianghang Date: Thu, 25 Feb 2021 00:00:08 +0800 Subject: [PATCH 6/7] Use to_bytes --- httpx/_multipart.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index b28e552f87..81c5a8059f 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -41,11 +41,7 @@ def render_headers(self) -> bytes: def render_data(self) -> bytes: if not hasattr(self, "_data"): - self._data = ( - self.value - if isinstance(self.value, bytes) - else self.value.encode("utf-8") - ) + self._data = to_bytes(self.value) return self._data From d091fe5fcc27fc72a7bd979f1631ade75cecc581 Mon Sep 17 00:00:00 2001 From: jianghang Date: Mon, 1 Mar 2021 00:03:32 +0800 Subject: [PATCH 7/7] Reconsider types --- httpx/_multipart.py | 17 +++++++++-------- httpx/_types.py | 10 ++++++---- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index 81c5a8059f..2b22a4d31d 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -2,9 +2,8 @@ import os import typing from pathlib import Path -from typing import IO, Union -from ._types import FileContent, FileTypes, RequestFiles +from ._types import FileTypes, RequestFiles from ._utils import ( format_form_param, guess_content_type, @@ -63,23 +62,25 @@ class FileField: def __init__(self, name: str, value: FileTypes) -> None: self.name = name - fileobj: FileContent - if isinstance(value, tuple): try: filename, fileobj, content_type = value # type: ignore except ValueError: filename, fileobj = value # type: ignore content_type = guess_content_type(filename) - if isinstance(fileobj, str): - fileobj = to_bytes(fileobj) else: filename = Path(str(getattr(value, "name", "upload"))).name - fileobj = typing.cast(Union[IO[bytes], IO[str]], value) + fileobj = value content_type = guess_content_type(filename) + if isinstance(fileobj, str): + # Ensure we only deal with bytes to prevent any content-length + # mismatch due to str -> bytes encoding. + # See: https://github.com/encode/httpx/issues/1482 + fileobj = to_bytes(fileobj) + self.filename = filename - self.file: Union[IO[bytes], IO[str], bytes] = fileobj + self.file = fileobj self.content_type = content_type self._consumed = False diff --git a/httpx/_types.py b/httpx/_types.py index 7768bac11b..17dfdcbfef 100644 --- a/httpx/_types.py +++ b/httpx/_types.py @@ -73,13 +73,15 @@ RequestData = dict -FileContent = Union[IO[str], IO[bytes], str, bytes] +FileContent = Union[IO[str], IO[bytes], bytes] FileTypes = Union[ - # file (or text) + # text + str, + # file FileContent, # (filename, file (or text)) - Tuple[Optional[str], FileContent], + Tuple[Optional[str], Union[str, FileContent]], # (filename, file (or text), content_type) - Tuple[Optional[str], FileContent, Optional[str]], + Tuple[Optional[str], Union[str, FileContent], Optional[str]], ] RequestFiles = Union[Mapping[str, FileTypes], Sequence[Tuple[str, FileTypes]]]