From dfcf523c5d71eb6f2284c04087142147c13d8d12 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 20 Aug 2024 18:09:12 +0300 Subject: [PATCH] gh-85110: Preserve relative path in URL without netloc in urllib.parse.urlunsplit() --- Lib/test/test_urlparse.py | 37 +++++++++++++++---- Lib/urllib/parse.py | 8 +++- ...4-08-20-18-02-27.gh-issue-85110.8_iDQy.rst | 2 + 3 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-20-18-02-27.gh-issue-85110.8_iDQy.rst diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index d6c83a75c1c03a..3dbbd9cf1d3364 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -207,6 +207,9 @@ def test_roundtrips(self): ('scheme://///path/to/file', ('scheme', '', '///path/to/file', '', '', ''), ('scheme', '', '///path/to/file', '', '')), + ('file:tmp/junk.txt', + ('file', '', 'tmp/junk.txt', '', '', ''), + ('file', '', 'tmp/junk.txt', '', '')), ('file:///tmp/junk.txt', ('file', '', '/tmp/junk.txt', '', '', ''), ('file', '', '/tmp/junk.txt', '', '')), @@ -216,6 +219,18 @@ def test_roundtrips(self): ('file://///tmp/junk.txt', ('file', '', '///tmp/junk.txt', '', '', ''), ('file', '', '///tmp/junk.txt', '', '')), + ('http:tmp/junk.txt', + ('http', '', 'tmp/junk.txt', '', '', ''), + ('http', '', 'tmp/junk.txt', '', '')), + ('http://example.com/tmp/junk.txt', + ('http', 'example.com', '/tmp/junk.txt', '', '', ''), + ('http', 'example.com', '/tmp/junk.txt', '', '')), + ('http:///example.com/tmp/junk.txt', + ('http', '', '/example.com/tmp/junk.txt', '', '', ''), + ('http', '', '/example.com/tmp/junk.txt', '', '')), + ('http:////example.com/tmp/junk.txt', + ('http', '', '//example.com/tmp/junk.txt', '', '', ''), + ('http', '', '//example.com/tmp/junk.txt', '', '')), ('imap://mail.python.org/mbox1', ('imap', 'mail.python.org', '/mbox1', '', '', ''), ('imap', 'mail.python.org', '/mbox1', '', '')), @@ -260,7 +275,8 @@ def _encode(t): ('', '', 'schème:path/to/file', '', '')), ] for url, parsed, split in str_cases + bytes_cases: - self.checkRoundtrips(url, parsed, split) + with self.subTest(url): + self.checkRoundtrips(url, parsed, split) def test_roundtrips_normalization(self): str_cases = [ @@ -292,7 +308,8 @@ def _encode(t): tuple(x.encode('ascii') for x in t[3])) bytes_cases = [_encode(x) for x in str_cases] for url, url2, parsed, split in str_cases + bytes_cases: - self.checkRoundtrips(url, parsed, split, url2) + with self.subTest(url): + self.checkRoundtrips(url, parsed, split, url2) def test_http_roundtrips(self): # urllib.parse.urlsplit treats 'http:' as an optimized special case, @@ -333,11 +350,17 @@ def _encode(t): self.checkRoundtrips(url, parsed, split) def checkJoin(self, base, relurl, expected): - str_components = (base, relurl, expected) - self.assertEqual(urllib.parse.urljoin(base, relurl), expected) - bytes_components = baseb, relurlb, expectedb = [ - x.encode('ascii') for x in str_components] - self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) + with self.subTest(base=base, relurl=relurl): + self.assertEqual(urllib.parse.urljoin(base, relurl), expected) + baseb = base.encode('ascii') + relurlb = relurl.encode('ascii') + expectedb = expected.encode('ascii') + self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) + + relurl = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl)) + self.assertEqual(urllib.parse.urljoin(base, relurl), expected) + relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb)) + self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) def test_unparse_parse(self): str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 8f724f907d4217..33165309daf0c4 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -525,9 +525,13 @@ def urlunsplit(components): empty query; the RFC states that these are equivalent).""" scheme, netloc, url, query, fragment, _coerce_result = ( _coerce_args(*components)) - if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//': + if netloc: if url and url[:1] != '/': url = '/' + url - url = '//' + (netloc or '') + url + url = '//' + netloc + url + elif url[:2] == '//': + url = '//' + url + elif scheme and scheme in uses_netloc and (not url or url[:1] == '/'): + url = '//' + url if scheme: url = scheme + ':' + url if query: diff --git a/Misc/NEWS.d/next/Library/2024-08-20-18-02-27.gh-issue-85110.8_iDQy.rst b/Misc/NEWS.d/next/Library/2024-08-20-18-02-27.gh-issue-85110.8_iDQy.rst new file mode 100644 index 00000000000000..f22fac16b79c0b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-20-18-02-27.gh-issue-85110.8_iDQy.rst @@ -0,0 +1,2 @@ +Preserve relative path in URL without netloc in +:func:`urllib.parse.urlunsplit` and :func:`urllib.parse.urlunparse`.