From 127303c138f547deb509c908e6a8e7ff04dcfaf3 Mon Sep 17 00:00:00 2001 From: Ivan Ergunov Date: Tue, 17 Sep 2019 00:23:57 +0300 Subject: [PATCH] Handle the HTTP request body as a spooled temp file. Co-authored-by: Carlton Gibson --- CHANGELOG.txt | 7 ++++ channels/http.py | 58 +++++++++++++++--------------- tests/test_http.py | 89 +++++++++++++++++++++++++++++++++++++++------- 3 files changed, 112 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index c4f72f264..d06386df9 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,13 @@ Full release notes, with more details and upgrade information, are available at: https://channels.readthedocs.io/en/latest/releases +UNRELEASED +---------- + +* Adjusted HTTP body handling to use a spooled file, rather than reading + request body into memory. ``AsgiRequest.__init__()`` is adjusted to expected + an IO stream, rather than bytes. + 2.2.0 (2019-04-14) ------------------ diff --git a/channels/http.py b/channels/http.py index f2f50f4ff..5ea1ead4c 100644 --- a/channels/http.py +++ b/channels/http.py @@ -2,8 +2,8 @@ import codecs import logging import sys +import tempfile import traceback -from io import BytesIO from django import http from django.conf import settings @@ -30,7 +30,7 @@ class AsgiRequest(http.HttpRequest): # body and aborts. body_receive_timeout = 60 - def __init__(self, scope, body): + def __init__(self, scope, stream): self.scope = scope self._content_length = 0 self._post_parse_error = False @@ -117,21 +117,7 @@ def __init__(self, scope, body): except (ValueError, TypeError): pass # Body handling - # TODO: chunked bodies - - # Limit the maximum request data size that will be handled in-memory. - if ( - settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None - and self._content_length > settings.DATA_UPLOAD_MAX_MEMORY_SIZE - ): - raise RequestDataTooBig( - "Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE." - ) - - self._body = body - assert isinstance(self._body, bytes), "Body is not bytes" - # Add a stream-a-like for the body - self._stream = BytesIO(self._body) + self._stream = stream # Other bits self.resolver_match = None @@ -144,7 +130,6 @@ def _get_scheme(self): def _get_post(self): if not hasattr(self, "_post"): - self._read_started = False self._load_post_and_files() return self._post @@ -153,7 +138,6 @@ def _set_post(self, post): def _get_files(self): if not hasattr(self, "_files"): - self._read_started = False self._load_post_and_files() return self._files @@ -198,20 +182,34 @@ async def __call__(self, receive, send): threadpool. """ self.send = async_to_sync(send) - body = b"" + + # Receive the HTTP request body as a stream object. + try: + body_stream = await self.read_body(receive) + except RequestAborted: + return + # Launch into body handling (and a synchronous subthread). + await self.handle(body_stream) + + async def read_body(self, receive): + """Reads a HTTP body from an ASGI connection.""" + # Use the tempfile that auto rolls-over to a disk file as it fills up. + body_file = tempfile.SpooledTemporaryFile( + max_size=settings.FILE_UPLOAD_MAX_MEMORY_SIZE, mode="w+b" + ) while True: message = await receive() if message["type"] == "http.disconnect": - # Once they disconnect, that's it. GOOD DAY SIR. I SAID GOOD. DAY. - return - else: - # See if the message has body, and if it's the end, launch into - # handling (and a synchronous subthread) - if "body" in message: - body += message["body"] - if not message.get("more_body", False): - await self.handle(body) - return + # Early client disconnect. + raise RequestAborted() + # Add a body chunk from the message, if provided. + if "body" in message: + body_file.write(message["body"]) + # Quit out if that's the end. + if not message.get("more_body", False): + break + body_file.seek(0) + return body_file @sync_to_async def handle(self, body): diff --git a/tests/test_http.py b/tests/test_http.py index c93e1f1db..aff160a05 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -1,10 +1,11 @@ import re import unittest +from io import BytesIO from unittest.mock import MagicMock, patch import pytest from django.core.exceptions import RequestDataTooBig -from django.http import HttpResponse +from django.http import HttpResponse, RawPostDataException from django.test import override_settings from asgiref.testing import ApplicationCommunicator @@ -26,7 +27,7 @@ def test_basic(self): with all optional fields omitted. """ request = AsgiRequest( - {"http_version": "1.1", "method": "GET", "path": "/test/"}, b"" + {"http_version": "1.1", "method": "GET", "path": "/test/"}, BytesIO(b"") ) self.assertEqual(request.path, "/test/") self.assertEqual(request.method, "GET") @@ -58,7 +59,7 @@ def test_extended(self): "client": ["10.0.0.1", 1234], "server": ["10.0.0.2", 80], }, - b"", + BytesIO(b""), ) self.assertEqual(request.path, "/test2/") self.assertEqual(request.method, "GET") @@ -91,7 +92,7 @@ def test_post(self): "content-length": b"18", }, }, - b"djangoponies=are+awesome", + BytesIO(b"djangoponies=are+awesome"), ) self.assertEqual(request.path, "/test2/") self.assertEqual(request.method, "POST") @@ -130,7 +131,7 @@ def test_post_files(self): "content-length": str(len(body)).encode("ascii"), }, }, - body, + BytesIO(body), ) self.assertEqual(request.method, "POST") self.assertEqual(len(request.body), len(body)) @@ -150,7 +151,7 @@ def test_stream(self): "path": "/", "headers": {"host": b"example.com", "content-length": b"11"}, }, - b"onetwothree", + BytesIO(b"onetwothree"), ) self.assertEqual(request.method, "PUT") self.assertEqual(request.read(3), b"one") @@ -169,6 +170,25 @@ def test_script_name(self): self.assertEqual(request.path, "/path/to/test/") + def test_reading_body_after_stream_raises(self): + request = AsgiRequest( + { + "http_version": "1.1", + "method": "POST", + "path": "/test2/", + "query_string": "django=great", + "headers": { + "host": b"example.com", + "content-type": b"application/x-www-form-urlencoded", + "content-length": b"18", + }, + }, + BytesIO(b"djangoponies=are+awesome"), + ) + self.assertEqual(request.read(3), b"dja") + with pytest.raises(RawPostDataException): + request.body + def test_size_exceeded(self): with override_settings(DATA_UPLOAD_MAX_MEMORY_SIZE=1): with pytest.raises(RequestDataTooBig): @@ -179,8 +199,45 @@ def test_size_exceeded(self): "path": "/", "headers": {"host": b"example.com", "content-length": b"1000"}, }, - b"", - ) + BytesIO(b""), + ).body + + def test_size_check_ignores_files(self): + file_data = ( + b"FAKEPDFBYTESGOHERETHISISREALLYLONGBUTNOTUSEDTOCOMPUTETHESIZEOFTHEREQUEST" + ) + body = ( + b"--BOUNDARY\r\n" + + b'Content-Disposition: form-data; name="' + + b"Title" + + b'"\r\n\r\n' + + b"My first book" + + b"\r\n" + + b"--BOUNDARY\r\n" + + b'Content-Disposition: form-data; name="pdf"; filename="book.pdf"\r\n\r\n' + + file_data + + b"--BOUNDARY--" + ) + + scope = { + "http_version": "1.1", + "method": "POST", + "path": "/test/", + "headers": { + "content-type": b"multipart/form-data; boundary=BOUNDARY", + "content-length": str(len(body)).encode("ascii"), + }, + } + + with override_settings(DATA_UPLOAD_MAX_MEMORY_SIZE=1): + with pytest.raises(RequestDataTooBig): + AsgiRequest(scope, BytesIO(body)).POST + + smaller_than_file_data_size = len(file_data) - 20 + with override_settings(DATA_UPLOAD_MAX_MEMORY_SIZE=smaller_than_file_data_size): + # There is no exception, since the setting should not take into + # account the size of the file upload data. + AsgiRequest(scope, BytesIO(body)).POST ### Handler tests @@ -208,7 +265,9 @@ async def test_handler_basic(): await handler.send_input({"type": "http.request"}) await handler.receive_output(1) # response start await handler.receive_output(1) # response body - MockHandler.request_class.assert_called_with(scope, b"") + scope, body_stream = MockHandler.request_class.call_args[0] + body_stream.seek(0) + assert body_stream.read() == b"" @pytest.mark.asyncio @@ -223,7 +282,9 @@ async def test_handler_body_single(): ) await handler.receive_output(1) # response start await handler.receive_output(1) # response body - MockHandler.request_class.assert_called_with(scope, b"chunk one \x01 chunk two") + scope, body_stream = MockHandler.request_class.call_args[0] + body_stream.seek(0) + assert body_stream.read() == b"chunk one \x01 chunk two" @pytest.mark.asyncio @@ -242,7 +303,9 @@ async def test_handler_body_multiple(): await handler.send_input({"type": "http.request", "body": b"chunk two"}) await handler.receive_output(1) # response start await handler.receive_output(1) # response body - MockHandler.request_class.assert_called_with(scope, b"chunk one \x01 chunk two") + scope, body_stream = MockHandler.request_class.call_args[0] + body_stream.seek(0) + assert body_stream.read() == b"chunk one \x01 chunk two" @pytest.mark.asyncio @@ -258,7 +321,9 @@ async def test_handler_body_ignore_extra(): await handler.send_input({"type": "http.request", "body": b" \x01 "}) await handler.receive_output(1) # response start await handler.receive_output(1) # response body - MockHandler.request_class.assert_called_with(scope, b"chunk one") + scope, body_stream = MockHandler.request_class.call_args[0] + body_stream.seek(0) + assert body_stream.read() == b"chunk one" @pytest.mark.django_db(transaction=True)