Skip to content

Commit

Permalink
remove _invalid_iri_to_uri workaround
Browse files Browse the repository at this point in the history
tell Python to handle itms-services scheme correctly
  • Loading branch information
davidism committed May 5, 2024
1 parent 249527f commit 97fb2f7
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 21 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ Unreleased

- Make reloader more robust when ``""`` is in ``sys.path``. :pr:`2823`
- Better TLS cert format with ``adhoc`` dev certs. :pr:`2891`
- Inform Python < 3.12 how to handle ``itms-services`` URIs correctly, rather
than using an overly-broad workaround in Werkzeug that caused some redirect
URIs to be passed on without encoding. :issue:`2828`


Version 3.0.2
Expand Down
25 changes: 6 additions & 19 deletions src/werkzeug/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import codecs
import re
import typing as t
import urllib.parse
from urllib.parse import quote
from urllib.parse import unquote
from urllib.parse import urlencode
Expand Down Expand Up @@ -164,25 +165,11 @@ def iri_to_uri(iri: str) -> str:
return urlunsplit((parts.scheme, netloc, path, query, fragment))


def _invalid_iri_to_uri(iri: str) -> str:
"""The URL scheme ``itms-services://`` must contain the ``//`` even though it does
not have a host component. There may be other invalid schemes as well. Currently,
responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which
removes the ``//``. For now, if the IRI only contains ASCII and does not contain
spaces, pass it on as-is. In Werkzeug 3.0, this should become a
``response.process_location`` flag.
:meta private:
"""
try:
iri.encode("ascii")
except UnicodeError:
pass
else:
if len(iri.split(None, 1)) == 1:
return iri

return iri_to_uri(iri)
# Python < 3.12
# itms-services was worked around in previous iri_to_uri implementations, but
# we can tell Python directly that it needs to preserve the //.
if "itms-services" not in urllib.parse.uses_netloc:
urllib.parse.uses_netloc.append("itms-services")


def _decode_idna(domain: str) -> str:
Expand Down
3 changes: 1 addition & 2 deletions src/werkzeug/wrappers/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from ..http import parse_range_header
from ..http import remove_entity_headers
from ..sansio.response import Response as _SansIOResponse
from ..urls import _invalid_iri_to_uri
from ..urls import iri_to_uri
from ..utils import cached_property
from ..wsgi import _RangeWrapper
Expand Down Expand Up @@ -479,7 +478,7 @@ def get_wsgi_headers(self, environ: WSGIEnvironment) -> Headers:
content_length = value

if location is not None:
location = _invalid_iri_to_uri(location)
location = iri_to_uri(location)

if self.autocorrect_location_header:
# Make the location header an absolute URL.
Expand Down
6 changes: 6 additions & 0 deletions tests/test_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,9 @@ def test_iri_to_uri_dont_quote_valid_code_points():
# [] are not valid URL code points according to WhatWG URL Standard
# https://url.spec.whatwg.org/#url-code-points
assert urls.iri_to_uri("/path[bracket]?(paren)") == "/path%5Bbracket%5D?(paren)"


# Python < 3.12
def test_itms_services() -> None:
url = "itms-services://?action=download-manifest&url=https://test.example/path"
assert urls.iri_to_uri(url) == url
1 change: 1 addition & 0 deletions tests/test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,6 +1154,7 @@ class MyResponse(wrappers.Response):
("auto", "location", "expect"),
(
(False, "/test", "/test"),
(False, "/\\\\test.example?q", "/%5C%5Ctest.example?q"),
(True, "/test", "http://localhost/test"),
(True, "test", "http://localhost/a/b/test"),
(True, "./test", "http://localhost/a/b/test"),
Expand Down

0 comments on commit 97fb2f7

Please sign in to comment.