diff --git a/httpx/_multipart.py b/httpx/_multipart.py index f690afc9ae..bf75a5663b 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -40,11 +40,7 @@ def render_headers(self) -> bytes: def render_data(self) -> bytes: if not hasattr(self, "_data"): - self._data = ( - self.value - if isinstance(self.value, bytes) - else self.value.encode("utf-8") - ) + self._data = to_bytes(self.value) return self._data @@ -88,7 +84,7 @@ def get_length(self) -> int: headers = self.render_headers() if isinstance(self.file, (str, bytes)): - return len(headers) + len(self.file) + return len(headers) + len(to_bytes(self.file)) # Let's do our best not to read `file` into memory. try: diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 94813932a8..199af4b0a5 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -133,6 +133,29 @@ def test_multipart_encode(tmp_path: typing.Any) -> None: assert content == b"".join(stream) +def test_multipart_encode_unicode_file_contents() -> None: + files = {"file": ("name.txt", "<Ășnicode string>")} + + with mock.patch("os.urandom", return_value=os.urandom(16)): + boundary = os.urandom(16).hex() + + headers, stream = encode_request(files=files) + assert isinstance(stream, typing.Iterable) + + content = ( + '--{0}\r\nContent-Disposition: form-data; name="file";' + ' filename="name.txt"\r\n' + "Content-Type: text/plain\r\n\r\n<Ășnicode string>\r\n" + "--{0}--\r\n" + "".format(boundary).encode("utf-8") + ) + assert headers == { + "Content-Type": f"multipart/form-data; boundary={boundary}", + "Content-Length": str(len(content)), + } + assert content == b"".join(stream) + + def test_multipart_encode_files_allows_filenames_as_none() -> None: files = {"file": (None, io.BytesIO(b""))} with mock.patch("os.urandom", return_value=os.urandom(16)):