From 31d020e94996606f6cd716bd585f9993372d2990 Mon Sep 17 00:00:00 2001 From: Steve Repsher Date: Mon, 8 Jul 2024 05:00:06 +0000 Subject: [PATCH 1/7] Fix response headers for compressed file requests --- CHANGES/4462.bugfix.rst | 1 + aiohttp/web_fileresponse.py | 29 ++++++++++++++++++++------- tests/test_web_sendfile_functional.py | 19 +++++++++++++----- 3 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 CHANGES/4462.bugfix.rst diff --git a/CHANGES/4462.bugfix.rst b/CHANGES/4462.bugfix.rst new file mode 100644 index 00000000000..b7fe5fc1664 --- /dev/null +++ b/CHANGES/4462.bugfix.rst @@ -0,0 +1 @@ +Fixed server response headers for ``Content-Type`` and ``Content-Encoding`` for static compressed files -- by :user:`steverep`. diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 94db64dd601..6b61978bb6c 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -48,6 +48,19 @@ {ext: mimetypes.encodings_map[ext] for ext in (".br", ".gz")} ) +FALLBACK_CONTENT_TYPE = "application/octet-stream" + +# https://en.wikipedia.org/wiki/List_of_archive_formats#Compression_only +ENCODING_CONTENT_TYPES = MappingProxyType( + { + "gzip": "application/gzip", + "br": "application/x-brotli", + "bzip2": "application/x-bzip2", + "compress": "application/x-compress", + "xz": "application/x-xz", + } +) + class FileResponse(StreamResponse): """A response object can be used to send files.""" @@ -192,13 +205,16 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter ): return await self._not_modified(request, etag_value, last_modified) + # If the Content-Type header is not already set, guess it based on the + # extension of the request path. If the request is for a compressed + # file, map the encoding back to the correct content type. ct = None if hdrs.CONTENT_TYPE not in self.headers: - ct, encoding = mimetypes.guess_type(str(file_path)) - if not ct: - ct = "application/octet-stream" - else: - encoding = file_encoding + ct, encoding = mimetypes.guess_type(str(self._path)) + if encoding: + ct = ENCODING_CONTENT_TYPES.get(encoding, FALLBACK_CONTENT_TYPE) + elif not ct: + ct = FALLBACK_CONTENT_TYPE status = self._status file_size = st.st_size @@ -276,9 +292,8 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter if ct: self.content_type = ct - if encoding: - self.headers[hdrs.CONTENT_ENCODING] = encoding if file_encoding: + self.headers[hdrs.CONTENT_ENCODING] = file_encoding self.headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING # Disable compression if we are already sending # a compressed file since we don't want to double diff --git a/tests/test_web_sendfile_functional.py b/tests/test_web_sendfile_functional.py index 7605f3771b0..6b5ed5b8da3 100644 --- a/tests/test_web_sendfile_functional.py +++ b/tests/test_web_sendfile_functional.py @@ -1,5 +1,6 @@ # type: ignore import asyncio +import bz2 import gzip import pathlib import socket @@ -37,10 +38,12 @@ def hello_txt(request, tmp_path_factory) -> pathlib.Path: None: txt, "gzip": txt.with_suffix(f"{txt.suffix}.gz"), "br": txt.with_suffix(f"{txt.suffix}.br"), + "bzip2": txt.with_suffix(f"{txt.suffix}.bz2"), } hello[None].write_bytes(HELLO_AIOHTTP) hello["gzip"].write_bytes(gzip.compress(HELLO_AIOHTTP)) hello["br"].write_bytes(brotli.compress(HELLO_AIOHTTP)) + hello["bzip2"].write_bytes(bz2.compress(HELLO_AIOHTTP)) encoding = getattr(request, "param", None) return hello[encoding] @@ -318,10 +321,16 @@ async def handler(request): @pytest.mark.parametrize( - ("hello_txt", "expect_encoding"), [["gzip"] * 2, ["br"] * 2], indirect=["hello_txt"] + ("hello_txt", "expect_type"), + [ + ("gzip", "application/gzip"), + ("br", "application/x-brotli"), + ("bzip2", "application/x-bzip2"), + ], + indirect=["hello_txt"], ) async def test_static_file_with_content_encoding( - hello_txt: pathlib.Path, aiohttp_client: Any, sender: Any, expect_encoding: str + hello_txt: pathlib.Path, aiohttp_client: Any, sender: Any, expect_type: str ) -> None: """Test requesting static compressed files returns the correct content type and encoding.""" @@ -334,9 +343,9 @@ async def handler(request): resp = await client.get("/") assert resp.status == 200 - assert resp.headers.get("Content-Encoding") == expect_encoding - assert resp.headers["Content-Type"] == "text/plain" - assert await resp.read() == HELLO_AIOHTTP + assert resp.headers.get("Content-Encoding") is None + assert resp.headers["Content-Type"] == expect_type + assert await resp.read() == hello_txt.read_bytes() resp.close() await resp.release() From 8fc851f640f62e99ebcc2c974e27b760dffaa928 Mon Sep 17 00:00:00 2001 From: Steve Repsher Date: Tue, 9 Jul 2024 16:08:01 +0000 Subject: [PATCH 2/7] Add more detail for users to change fragment --- CHANGES/4462.bugfix.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES/4462.bugfix.rst b/CHANGES/4462.bugfix.rst index b7fe5fc1664..fe897a08b39 100644 --- a/CHANGES/4462.bugfix.rst +++ b/CHANGES/4462.bugfix.rst @@ -1 +1,7 @@ -Fixed server response headers for ``Content-Type`` and ``Content-Encoding`` for static compressed files -- by :user:`steverep`. +Fixed server response headers for ``Content-Type`` and ``Content-Encoding`` for +static compressed files -- by :user:`steverep`. + +Server will now respond with a ``Content-Type`` appropriate for the compressed +file (e.g. ``"application/gzip"``), and omit the ``Content-Encoding`` header. +Users should expect that most clients will no longer decompress such responses +by default. From 839aedd71d116baf23fda499e857cd90dcf2c695 Mon Sep 17 00:00:00 2001 From: Steve Repsher Date: Fri, 12 Jul 2024 21:06:39 +0000 Subject: [PATCH 3/7] Clear mimetypes encodings and add custom type/extension pairs --- aiohttp/web_fileresponse.py | 39 +++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 6b61978bb6c..1a43444a4e5 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -50,17 +50,23 @@ FALLBACK_CONTENT_TYPE = "application/octet-stream" +# Provide additional MIME type/extension pairs to be recognized. +# IANA-registered types can be skipped (e.g. application/gzip). # https://en.wikipedia.org/wiki/List_of_archive_formats#Compression_only -ENCODING_CONTENT_TYPES = MappingProxyType( +ADDITIONAL_CONTENT_TYPES = MappingProxyType( { - "gzip": "application/gzip", - "br": "application/x-brotli", - "bzip2": "application/x-bzip2", - "compress": "application/x-compress", - "xz": "application/x-xz", + "application/x-brotli": ".br", + "application/x-bzip2": ".bz2", + "application/x-compress": ".Z", + "application/x-xz": ".xz", } ) +# Add custom pairs and clear the encodings map so guess_type ignores them. +mimetypes.encodings_map.clear() +for content_type, extension in ADDITIONAL_CONTENT_TYPES.items(): + mimetypes.add_type(content_type, extension) + class FileResponse(StreamResponse): """A response object can be used to send files.""" @@ -205,17 +211,6 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter ): return await self._not_modified(request, etag_value, last_modified) - # If the Content-Type header is not already set, guess it based on the - # extension of the request path. If the request is for a compressed - # file, map the encoding back to the correct content type. - ct = None - if hdrs.CONTENT_TYPE not in self.headers: - ct, encoding = mimetypes.guess_type(str(self._path)) - if encoding: - ct = ENCODING_CONTENT_TYPES.get(encoding, FALLBACK_CONTENT_TYPE) - elif not ct: - ct = FALLBACK_CONTENT_TYPE - status = self._status file_size = st.st_size count = file_size @@ -290,8 +285,14 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # return a HTTP 206 for a Range request. self.set_status(status) - if ct: - self.content_type = ct + # If the Content-Type header is not already set, guess it based on the + # extension of the request path. The encoding returned by guess_type + # can be ignored since the map was cleared above. + if hdrs.CONTENT_TYPE not in self.headers: + self.content_type = ( + mimetypes.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE + ) + if file_encoding: self.headers[hdrs.CONTENT_ENCODING] = file_encoding self.headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING From 2b9ce111b05ea82153f5e3212902c8f20202c5c5 Mon Sep 17 00:00:00 2001 From: Steve Repsher Date: Fri, 12 Jul 2024 21:54:08 +0000 Subject: [PATCH 4/7] Add back application/gzip for MacOS --- aiohttp/web_fileresponse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 1a43444a4e5..fce6685d81a 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -51,10 +51,10 @@ FALLBACK_CONTENT_TYPE = "application/octet-stream" # Provide additional MIME type/extension pairs to be recognized. -# IANA-registered types can be skipped (e.g. application/gzip). # https://en.wikipedia.org/wiki/List_of_archive_formats#Compression_only ADDITIONAL_CONTENT_TYPES = MappingProxyType( { + "application/gzip": ".gz", "application/x-brotli": ".br", "application/x-bzip2": ".bz2", "application/x-compress": ".Z", From 89f03bb9ea8f63013aa716aacc5ad54d6176c70f Mon Sep 17 00:00:00 2001 From: Steve Repsher Date: Fri, 12 Jul 2024 23:59:40 +0000 Subject: [PATCH 5/7] Use custom MIMETypes object --- aiohttp/web_fileresponse.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index fce6685d81a..fc494ef626c 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -1,9 +1,9 @@ import asyncio -import mimetypes import os import pathlib import sys from contextlib import suppress +from mimetypes import MimeTypes from types import MappingProxyType from typing import ( IO, @@ -40,12 +40,14 @@ NOSENDFILE: Final[bool] = bool(os.environ.get("AIOHTTP_NOSENDFILE")) +content_types = MimeTypes() + if sys.version_info < (3, 9): - mimetypes.encodings_map[".br"] = "br" + content_types.encodings_map[".br"] = "br" # File extension to IANA encodings map that will be checked in the order defined. ENCODING_EXTENSIONS = MappingProxyType( - {ext: mimetypes.encodings_map[ext] for ext in (".br", ".gz")} + {ext: content_types.encodings_map[ext] for ext in (".br", ".gz")} ) FALLBACK_CONTENT_TYPE = "application/octet-stream" @@ -63,9 +65,9 @@ ) # Add custom pairs and clear the encodings map so guess_type ignores them. -mimetypes.encodings_map.clear() +content_types.encodings_map.clear() for content_type, extension in ADDITIONAL_CONTENT_TYPES.items(): - mimetypes.add_type(content_type, extension) + content_types.add_type(content_type, extension) class FileResponse(StreamResponse): @@ -290,7 +292,7 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # can be ignored since the map was cleared above. if hdrs.CONTENT_TYPE not in self.headers: self.content_type = ( - mimetypes.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE + content_types.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE ) if file_encoding: From 606b62af9266326d16c9644a518bfdd5d5a78376 Mon Sep 17 00:00:00 2001 From: Steve Repsher Date: Sat, 13 Jul 2024 00:23:41 +0000 Subject: [PATCH 6/7] Ignore mypy error --- aiohttp/web_fileresponse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index fc494ef626c..cec99131721 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -67,7 +67,7 @@ # Add custom pairs and clear the encodings map so guess_type ignores them. content_types.encodings_map.clear() for content_type, extension in ADDITIONAL_CONTENT_TYPES.items(): - content_types.add_type(content_type, extension) + content_types.add_type(content_type, extension) # type: ignore[attr-defined] class FileResponse(StreamResponse): From dd78dbb4cfd1390fbcdd7c43297bf39d7762deaa Mon Sep 17 00:00:00 2001 From: Steve Repsher Date: Sat, 13 Jul 2024 02:06:05 +0000 Subject: [PATCH 7/7] Rename constant object --- aiohttp/web_fileresponse.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index cec99131721..75cd12ef363 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -40,14 +40,14 @@ NOSENDFILE: Final[bool] = bool(os.environ.get("AIOHTTP_NOSENDFILE")) -content_types = MimeTypes() +CONTENT_TYPES: Final[MimeTypes] = MimeTypes() if sys.version_info < (3, 9): - content_types.encodings_map[".br"] = "br" + CONTENT_TYPES.encodings_map[".br"] = "br" # File extension to IANA encodings map that will be checked in the order defined. ENCODING_EXTENSIONS = MappingProxyType( - {ext: content_types.encodings_map[ext] for ext in (".br", ".gz")} + {ext: CONTENT_TYPES.encodings_map[ext] for ext in (".br", ".gz")} ) FALLBACK_CONTENT_TYPE = "application/octet-stream" @@ -65,9 +65,9 @@ ) # Add custom pairs and clear the encodings map so guess_type ignores them. -content_types.encodings_map.clear() +CONTENT_TYPES.encodings_map.clear() for content_type, extension in ADDITIONAL_CONTENT_TYPES.items(): - content_types.add_type(content_type, extension) # type: ignore[attr-defined] + CONTENT_TYPES.add_type(content_type, extension) # type: ignore[attr-defined] class FileResponse(StreamResponse): @@ -292,7 +292,7 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # can be ignored since the map was cleared above. if hdrs.CONTENT_TYPE not in self.headers: self.content_type = ( - content_types.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE + CONTENT_TYPES.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE ) if file_encoding: