From 7c0cda153d301bde9a011e1dd7157d7e2b20889d Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Fri, 26 Jul 2024 09:36:03 +0100 Subject: [PATCH] Improve InvalidURL error message. (#3250) --- CHANGELOG.md | 1 + httpx/_urlparse.py | 17 ++++++++++++++--- tests/models/test_url.py | 10 ++++++---- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18ded9d27f..d8c2ec7ccb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed +* Improved error messaging for `InvalidURL` exceptions. (#3250) * Fix `app` type signature in `ASGITransport`. (#3109) ## 0.27.0 (21st February, 2024) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 215c5368f6..479c2ef8a1 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -160,7 +160,12 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: # If a URL includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. if any(char.isascii() and not char.isprintable() for char in url): - raise InvalidURL("Invalid non-printable ASCII character in URL") + char = next(char for char in url if char.isascii() and not char.isprintable()) + idx = url.find(char) + error = ( + f"Invalid non-printable ASCII character in URL, {char!r} at position {idx}." + ) + raise InvalidURL(error) # Some keyword arguments require special handling. # ------------------------------------------------ @@ -205,9 +210,15 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: # If a component includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. if any(char.isascii() and not char.isprintable() for char in value): - raise InvalidURL( - f"Invalid non-printable ASCII character in URL component '{key}'" + char = next( + char for char in value if char.isascii() and not char.isprintable() + ) + idx = value.find(char) + error = ( + f"Invalid non-printable ASCII character in URL {key} component, " + f"{char!r} at position {idx}." ) + raise InvalidURL(error) # Ensure that keyword arguments match as a valid regex. if not COMPONENT_REGEX[key].fullmatch(value): diff --git a/tests/models/test_url.py b/tests/models/test_url.py index 48872be89f..523a89bf65 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -367,15 +367,17 @@ def test_url_excessively_long_component(): def test_url_non_printing_character_in_url(): with pytest.raises(httpx.InvalidURL) as exc: httpx.URL("https://www.example.com/\n") - assert str(exc.value) == "Invalid non-printable ASCII character in URL" + assert str(exc.value) == ( + "Invalid non-printable ASCII character in URL, '\\n' at position 24." + ) def test_url_non_printing_character_in_component(): with pytest.raises(httpx.InvalidURL) as exc: httpx.URL("https://www.example.com", path="/\n") - assert ( - str(exc.value) - == "Invalid non-printable ASCII character in URL component 'path'" + assert str(exc.value) == ( + "Invalid non-printable ASCII character in URL path component, " + "'\\n' at position 1." )