From f5047f43c1764d08a91a293d59eff345d71607c4 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Thu, 5 Sep 2019 09:34:35 +0900 Subject: [PATCH] bpo-22347: Update mimetypes.guess_type to allow proper parsing of URLs (GH-15522) https://bugs.python.org/issue22347 --- Lib/mimetypes.py | 3 ++- Lib/test/test_mimetypes.py | 8 ++++++++ Lib/test/test_urllib2.py | 2 +- .../next/Library/2019-08-27-01-03-26.bpo-22347._TRpYr.rst | 2 ++ 4 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-08-27-01-03-26.bpo-22347._TRpYr.rst diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 01a16fdf9aa1b3..f38005c9d29598 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -114,7 +114,8 @@ def guess_type(self, url, strict=True): but non-standard types. """ url = os.fspath(url) - scheme, url = urllib.parse._splittype(url) + p = urllib.parse.urlparse(url) + scheme, url = p.scheme, p.path if scheme == 'data': # syntax of data URLs: # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index bfd5eeedaa77b4..7761c3fe867a7e 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -51,6 +51,14 @@ def test_non_standard_types(self): eq(self.db.guess_type('foo.xul', strict=False), ('text/xul', None)) eq(self.db.guess_extension('image/jpg', strict=False), '.jpg') + def test_url(self): + result = self.db.guess_type('http://host.html') + msg = 'URL only has a host name, not a file' + self.assertSequenceEqual(result, (None, None), msg) + result = self.db.guess_type('http://example.com/host.html') + msg = 'Should be text/html' + self.assertSequenceEqual(result, ('text/html', None), msg) + def test_guess_all_types(self): eq = self.assertEqual unless = self.assertTrue diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 7b576db4e3aaf4..c228fa7d1dc12d 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -742,7 +742,7 @@ def connect_ftp(self, user, passwd, host, port, dirs, ["foo", "bar"], "", None), ("ftp://localhost/baz.gif;type=a", "localhost", ftplib.FTP_PORT, "", "", "A", - [], "baz.gif", None), # XXX really this should guess image/gif + [], "baz.gif", "image/gif"), ]: req = Request(url) req.timeout = None diff --git a/Misc/NEWS.d/next/Library/2019-08-27-01-03-26.bpo-22347._TRpYr.rst b/Misc/NEWS.d/next/Library/2019-08-27-01-03-26.bpo-22347._TRpYr.rst new file mode 100644 index 00000000000000..1a3c19938217c4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-27-01-03-26.bpo-22347._TRpYr.rst @@ -0,0 +1,2 @@ +Update mimetypes.guess_type to allow proper parsing of URLs with only a host name. +Patch by Dong-hee Na.