Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix round-trip of IPv6 addresses when converting to a string #1158

Merged
merged 29 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions tests/test_url_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,3 +604,28 @@ def test_schemes_that_require_host(scheme: str) -> None:
)
with pytest.raises(ValueError, match=expect):
URL(f"{scheme}://:1")


@pytest.mark.parametrize(
("url", "hostname", "hostname_without_brackets"),
[
("http://[::1]", "[::1]", "::1"),
("http://[::1]:8080", "[::1]", "::1"),
("http://127.0.0.1:8080", "127.0.0.1", "127.0.0.1"),
(
"http://xn--jxagkqfkduily1i.eu",
"xn--jxagkqfkduily1i.eu",
"xn--jxagkqfkduily1i.eu",
),
],
)
def test_ipv6_url_round_trips(
bdraco marked this conversation as resolved.
Show resolved Hide resolved
url: str, hostname: str, hostname_without_brackets: str
) -> None:
"""Verify that IPv6 URLs round-trip correctly."""
bdraco marked this conversation as resolved.
Show resolved Hide resolved
parsed = URL(url)
assert parsed._val.hostname == hostname_without_brackets
assert parsed.raw_host == hostname_without_brackets
assert parsed.host_subcomponent == hostname
assert str(parsed) == url
assert str(URL(str(parsed))) == url
40 changes: 31 additions & 9 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ def __str__(self) -> str:
netloc=self._make_netloc(
self.raw_user,
self.raw_password,
self.raw_host,
self.host_subcomponent,
port,
encode_host=False,
)
Expand Down Expand Up @@ -647,6 +647,8 @@ def raw_host(self) -> Union[str, None]:

None for relative URLs.

For literal IPv6 addresses, use the host_subcomponent property instead
bdraco marked this conversation as resolved.
Show resolved Hide resolved
as it will return the host part with brackets.
bdraco marked this conversation as resolved.
Show resolved Hide resolved
"""
# Use host instead of hostname for sake of shortness
# May add .hostname prop later
Expand All @@ -660,16 +662,35 @@ def host(self) -> Union[str, None]:
None for relative URLs.

"""
raw = self.raw_host
if raw is None:
if (raw := self.raw_host) is None:
return None
if "%" in raw:
# Hack for scoped IPv6 addresses like
# fe80::2%Перевірка
# presence of '%' sign means only IPv6 address, so idna is useless.
if raw and (":" in raw or raw[-1].isdigit()):
bdraco marked this conversation as resolved.
Show resolved Hide resolved
# IP addresses are never IDNA encoded
return raw
return _idna_decode(raw)

@cached_property
def host_subcomponent(self) -> Union[str, None]:
"""Return the host subcomponent part of URL.

None for relative URLs.

https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2

`IP-literal = "[" ( IPv6address / IPvFuture ) "]"`

Examples:
- `http://example.com:8080` -> `example.com`
- `http://example.com:80` -> `example.com`
- `https://127.0.0.1:8443` -> `127.0.0.1`
- `https://[::1]:8443` -> `[::1]`
- `http://[::1]` -> `[::1]`

"""
if (raw := self.raw_host) is None:
return None
return f"[{raw}]" if ":" in raw else raw

@cached_property
def port(self) -> Union[int, None]:
"""Port part of URL, with scheme-based fallback.
Expand Down Expand Up @@ -945,15 +966,16 @@ def _encode_host(cls, host: str, human: bool = False) -> str:
raw_ip = host
sep = zone = ""

if raw_ip and raw_ip[-1].isdigit() or ":" in raw_ip:
if raw_ip and (":" in raw_ip or raw_ip[-1].isdigit()):
bdraco marked this conversation as resolved.
Show resolved Hide resolved
# Might be an IP address, check it
#
# IP Addresses can look like:
# https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
# - 127.0.0.1 (last character is a digit)
# - 2001:db8::ff00:42:8329 (contains a colon)
# - 2001:db8::ff00:42:8329%eth0 (contains a colon)
# - [2001:db8::ff00:42:8329] (contains a colon)
# - [2001:db8::ff00:42:8329] (contains a colon -- brackets should
# have been removed before it gets here)
# Rare IP Address formats are not supported per:
# https://datatracker.ietf.org/doc/html/rfc3986#section-7.4
#
Expand Down
Loading