diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 51b99701c9d727..b33051f5331514 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -120,7 +120,13 @@ def guess_type(self, url, strict=True): but non-standard types. """ url = os.fspath(url) - scheme, url = urllib.parse._splittype(url) + p = urllib.parse.urlparse(url) + if p.scheme and len(p.scheme) > 1: + scheme = p.scheme + url = p.path + else: + scheme = None + url = os.path.splitdrive(url)[1] if scheme == 'data': # syntax of data URLs: # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 01bba0ac2eed5a..cc9bae893bb55a 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -1,5 +1,6 @@ import io import mimetypes +import os import pathlib import sys import unittest.mock @@ -109,15 +110,40 @@ def test_filename_with_url_delimiters(self): # compared to when interpreted as filename because of the semicolon. eq = self.assertEqual gzip_expected = ('application/x-tar', 'gzip') - eq(self.db.guess_type(";1.tar.gz"), gzip_expected) - eq(self.db.guess_type("?1.tar.gz"), gzip_expected) - eq(self.db.guess_type("#1.tar.gz"), gzip_expected) - eq(self.db.guess_type("#1#.tar.gz"), gzip_expected) - eq(self.db.guess_type(";1#.tar.gz"), gzip_expected) - eq(self.db.guess_type(";&1=123;?.tar.gz"), gzip_expected) - eq(self.db.guess_type("?k1=v1&k2=v2.tar.gz"), gzip_expected) + for name in ( + ';1.tar.gz', + '?1.tar.gz', + '#1.tar.gz', + '#1#.tar.gz', + ';1#.tar.gz', + ';&1=123;?.tar.gz', + '?k1=v1&k2=v2.tar.gz', + ): + for prefix in ('', '/', '\\', + 'c:', 'c:/', 'c:\\', 'c:/d/', 'c:\\d\\', + '//share/server/', '\\\\share\\server\\'): + path = prefix + name + with self.subTest(path=path): + eq(self.db.guess_type(path), gzip_expected) + expected = (None, None) if os.name == 'nt' else gzip_expected + for prefix in ('//', '\\\\', '//share/', '\\\\share\\'): + path = prefix + name + with self.subTest(path=path): + eq(self.db.guess_type(path), expected) eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected) + def test_url(self): + result = self.db.guess_type('http://host.html') + msg = 'URL only has a host name, not a file' + self.assertSequenceEqual(result, (None, None), msg) + result = self.db.guess_type('http://example.com/host.html') + msg = 'Should be text/html' + self.assertSequenceEqual(result, ('text/html', None), msg) + result = self.db.guess_type('http://example.com/host.html#x.tar') + self.assertSequenceEqual(result, ('text/html', None)) + result = self.db.guess_type('http://example.com/host.html?q=x.tar') + self.assertSequenceEqual(result, ('text/html', None)) + def test_guess_all_types(self): # First try strict. Use a set here for testing the results because if # test_urllib2 is run before test_mimetypes, global state is modified diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 739c15df13de21..6febb491788b42 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -777,7 +777,7 @@ def connect_ftp(self, user, passwd, host, port, dirs, ["foo", "bar"], "", None), ("ftp://localhost/baz.gif;type=a", "localhost", ftplib.FTP_PORT, "", "", "A", - [], "baz.gif", None), # XXX really this should guess image/gif + [], "baz.gif", "image/gif"), ]: req = Request(url) req.timeout = None diff --git a/Misc/NEWS.d/next/Library/2019-08-27-01-03-26.gh-issue-66543._TRpYr.rst b/Misc/NEWS.d/next/Library/2019-08-27-01-03-26.gh-issue-66543._TRpYr.rst new file mode 100644 index 00000000000000..62f7aa2490bb73 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-27-01-03-26.gh-issue-66543._TRpYr.rst @@ -0,0 +1,4 @@ +Make :func:`mimetypes.guess_type` properly parsing of URLs with only a host +name, URLs containing fragment or query, and filenames with only a UNC +sharepoint on Windows. +Based on patch by Dong-hee Na.