Skip to content

Commit

Permalink
pythongh-67693: Fix urlunparse() and urlunsplit() for URIs with path …
Browse files Browse the repository at this point in the history
…starting with multiple slashes and no authority (pythonGH-113563)
  • Loading branch information
serhiy-storchaka authored and estyxx committed Jul 17, 2024
1 parent 2743976 commit 73e7fcf
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 4 deletions.
70 changes: 67 additions & 3 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,17 @@

class UrlParseTestCase(unittest.TestCase):

def checkRoundtrips(self, url, parsed, split):
def checkRoundtrips(self, url, parsed, split, url2=None):
if url2 is None:
url2 = url
result = urllib.parse.urlparse(url)
self.assertSequenceEqual(result, parsed)
t = (result.scheme, result.netloc, result.path,
result.params, result.query, result.fragment)
self.assertSequenceEqual(t, parsed)
# put it back together and it should be the same
result2 = urllib.parse.urlunparse(result)
self.assertSequenceEqual(result2, url)
self.assertSequenceEqual(result2, url2)
self.assertSequenceEqual(result2, result.geturl())

# the result of geturl() is a fixpoint; we can always parse it
Expand All @@ -137,7 +139,7 @@ def checkRoundtrips(self, url, parsed, split):
result.query, result.fragment)
self.assertSequenceEqual(t, split)
result2 = urllib.parse.urlunsplit(result)
self.assertSequenceEqual(result2, url)
self.assertSequenceEqual(result2, url2)
self.assertSequenceEqual(result2, result.geturl())

# check the fixpoint property of re-parsing the result of geturl()
Expand Down Expand Up @@ -175,9 +177,39 @@ def test_qs(self):

def test_roundtrips(self):
str_cases = [
('path/to/file',
('', '', 'path/to/file', '', '', ''),
('', '', 'path/to/file', '', '')),
('/path/to/file',
('', '', '/path/to/file', '', '', ''),
('', '', '/path/to/file', '', '')),
('//path/to/file',
('', 'path', '/to/file', '', '', ''),
('', 'path', '/to/file', '', '')),
('////path/to/file',
('', '', '//path/to/file', '', '', ''),
('', '', '//path/to/file', '', '')),
('scheme:path/to/file',
('scheme', '', 'path/to/file', '', '', ''),
('scheme', '', 'path/to/file', '', '')),
('scheme:/path/to/file',
('scheme', '', '/path/to/file', '', '', ''),
('scheme', '', '/path/to/file', '', '')),
('scheme://path/to/file',
('scheme', 'path', '/to/file', '', '', ''),
('scheme', 'path', '/to/file', '', '')),
('scheme:////path/to/file',
('scheme', '', '//path/to/file', '', '', ''),
('scheme', '', '//path/to/file', '', '')),
('file:///tmp/junk.txt',
('file', '', '/tmp/junk.txt', '', '', ''),
('file', '', '/tmp/junk.txt', '', '')),
('file:////tmp/junk.txt',
('file', '', '//tmp/junk.txt', '', '', ''),
('file', '', '//tmp/junk.txt', '', '')),
('file://///tmp/junk.txt',
('file', '', '///tmp/junk.txt', '', '', ''),
('file', '', '///tmp/junk.txt', '', '')),
('imap://mail.python.org/mbox1',
('imap', 'mail.python.org', '/mbox1', '', '', ''),
('imap', 'mail.python.org', '/mbox1', '', '')),
Expand Down Expand Up @@ -213,6 +245,38 @@ def _encode(t):
for url, parsed, split in str_cases + bytes_cases:
self.checkRoundtrips(url, parsed, split)

def test_roundtrips_normalization(self):
str_cases = [
('///path/to/file',
'/path/to/file',
('', '', '/path/to/file', '', '', ''),
('', '', '/path/to/file', '', '')),
('scheme:///path/to/file',
'scheme:/path/to/file',
('scheme', '', '/path/to/file', '', '', ''),
('scheme', '', '/path/to/file', '', '')),
('file:/tmp/junk.txt',
'file:///tmp/junk.txt',
('file', '', '/tmp/junk.txt', '', '', ''),
('file', '', '/tmp/junk.txt', '', '')),
('http:/tmp/junk.txt',
'http:///tmp/junk.txt',
('http', '', '/tmp/junk.txt', '', '', ''),
('http', '', '/tmp/junk.txt', '', '')),
('https:/tmp/junk.txt',
'https:///tmp/junk.txt',
('https', '', '/tmp/junk.txt', '', '', ''),
('https', '', '/tmp/junk.txt', '', '')),
]
def _encode(t):
return (t[0].encode('ascii'),
t[1].encode('ascii'),
tuple(x.encode('ascii') for x in t[2]),
tuple(x.encode('ascii') for x in t[3]))
bytes_cases = [_encode(x) for x in str_cases]
for url, url2, parsed, split in str_cases + bytes_cases:
self.checkRoundtrips(url, parsed, split, url2)

def test_http_roundtrips(self):
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
# so we test both 'http:' and 'https:' in all the following.
Expand Down
2 changes: 1 addition & 1 deletion Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def urlunsplit(components):
empty query; the RFC states that these are equivalent)."""
scheme, netloc, url, query, fragment, _coerce_result = (
_coerce_args(*components))
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
if url and url[:1] != '/': url = '/' + url
url = '//' + (netloc or '') + url
if scheme:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
Based on patch by Ashwin Ramaswami.

0 comments on commit 73e7fcf

Please sign in to comment.