Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Content-Lenght calculation of unicode string #1484

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions httpx/_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import typing
from pathlib import Path
from typing import IO, Union

from ._types import FileContent, FileTypes, RequestFiles
from ._utils import (
Expand Down Expand Up @@ -40,11 +41,7 @@ def render_headers(self) -> bytes:

def render_data(self) -> bytes:
if not hasattr(self, "_data"):
self._data = (
self.value
if isinstance(self.value, bytes)
else self.value.encode("utf-8")
)
j178 marked this conversation as resolved.
Show resolved Hide resolved
self._data = to_bytes(self.value)

return self._data

Expand Down Expand Up @@ -74,20 +71,22 @@ def __init__(self, name: str, value: FileTypes) -> None:
except ValueError:
filename, fileobj = value # type: ignore
content_type = guess_content_type(filename)
if isinstance(fileobj, str):
fileobj = to_bytes(fileobj)
j178 marked this conversation as resolved.
Show resolved Hide resolved
else:
filename = Path(str(getattr(value, "name", "upload"))).name
fileobj = value
fileobj = typing.cast(Union[IO[bytes], IO[str]], value)
content_type = guess_content_type(filename)

self.filename = filename
self.file = fileobj
self.file: Union[IO[bytes], IO[str], bytes] = fileobj
j178 marked this conversation as resolved.
Show resolved Hide resolved
self.content_type = content_type
self._consumed = False

def get_length(self) -> int:
headers = self.render_headers()

if isinstance(self.file, (str, bytes)):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The low-footprint change here is presumably to keep this check the same, but use the following...

return len(headers) + len(to_bytes(self.file))

Right?

if isinstance(self.file, bytes):
return len(headers) + len(self.file)
florimondmanca marked this conversation as resolved.
Show resolved Hide resolved

# Let's do our best not to read `file` into memory.
Expand Down Expand Up @@ -119,8 +118,8 @@ def render_headers(self) -> bytes:
return self._headers

def render_data(self) -> typing.Iterator[bytes]:
if isinstance(self.file, (str, bytes)):
yield to_bytes(self.file)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't need to change this.

if isinstance(self.file, bytes):
yield self.file
return

if hasattr(self, "_data"):
Expand Down
11 changes: 9 additions & 2 deletions tests/test_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import httpx
from httpx._content import encode_request
from httpx._types import RequestFiles
from httpx._utils import format_form_param


Expand Down Expand Up @@ -105,7 +106,10 @@ def test_multipart_encode(tmp_path: typing.Any) -> None:
"c": ["11", "22", "33"],
"d": "",
}
files = {"file": ("name.txt", open(path, "rb"))}
files: RequestFiles = {
"file": ("name.txt", open(path, "rb")),
"file2": ("file2.txt", "<únicode string>"),
}

with mock.patch("os.urandom", return_value=os.urandom(16)):
boundary = os.urandom(16).hex()
Expand All @@ -123,8 +127,11 @@ def test_multipart_encode(tmp_path: typing.Any) -> None:
'--{0}\r\nContent-Disposition: form-data; name="file";'
' filename="name.txt"\r\n'
"Content-Type: text/plain\r\n\r\n<file content>\r\n"
'--{0}\r\nContent-Disposition: form-data; name="file2";'
' filename="file2.txt"\r\n'
"Content-Type: text/plain\r\n\r\n<únicode string>\r\n"
"--{0}--\r\n"
"".format(boundary).encode("ascii")
"".format(boundary).encode("utf-8")
)
assert headers == {
"Content-Type": f"multipart/form-data; boundary={boundary}",
Expand Down