From ddf90fb6f0823a6f5d0770998494d8b205b056d9 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Fri, 12 Nov 2021 02:46:43 +0530 Subject: [PATCH] Allow `--plugins` flag to be used multiple times (#725) * deprecate server_file_or_404 * Optionally compress static content. Currently only if content length higher than 300 * trailing comma * Allow `--plugins` flag to be used multiple times Following are valid invocation: 1) `--plugins A` 2) `--plugins A,B` 3) `--plugins A --plugins B` 4) `--plugins A,B --plugins C` * mypy * Flake8 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * correct type * Add `HttpParser.is_https_tunnel()` utility method * mypy * lint checks * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- README.md | 109 +++++++++++----------- examples/https_connect_tunnel.py | 3 +- proxy/common/constants.py | 5 +- proxy/common/flag.py | 29 ++++-- proxy/common/plugins.py | 21 ++++- proxy/core/connection/connection.py | 1 + proxy/dashboard/dashboard.py | 1 + proxy/http/parser.py | 9 +- proxy/http/proxy/server.py | 12 +-- proxy/http/server/web.py | 81 ++++++++-------- proxy/plugin/proxy_pool.py | 10 +- proxy/plugin/redirect_to_custom_server.py | 3 +- proxy/proxy.py | 10 +- tests/http/test_http_parser.py | 1 + 14 files changed, 174 insertions(+), 121 deletions(-) diff --git a/README.md b/README.md index 4825291cf0..5fb7eb2046 100644 --- a/README.md +++ b/README.md @@ -1941,24 +1941,24 @@ for list of tests. ```console ❯ proxy -h -usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--threaded] - [--num-workers NUM_WORKERS] [--pid-file PID_FILE] [--backlog BACKLOG] - [--hostname HOSTNAME] [--port PORT] [--num-acceptors NUM_ACCEPTORS] - [--unix-socket-path UNIX_SOCKET_PATH] - [--client-recvbuf-size CLIENT_RECVBUF_SIZE] [--key-file KEY_FILE] - [--timeout TIMEOUT] [--version] [--log-level LOG_LEVEL] +usage: proxy [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--threaded] + [--num-workers NUM_WORKERS] [--backlog BACKLOG] [--hostname HOSTNAME] + [--port PORT] [--unix-socket-path UNIX_SOCKET_PATH] + [--num-acceptors NUM_ACCEPTORS] [--version] [--log-level LOG_LEVEL] [--log-file LOG_FILE] [--log-format LOG_FORMAT] - [--open-file-limit OPEN_FILE_LIMIT] [--plugins PLUGINS] - [--enable-dashboard] [--work-klass WORK_KLASS] [--disable-http-proxy] - [--ca-key-file CA_KEY_FILE] [--ca-cert-dir CA_CERT_DIR] - [--ca-cert-file CA_CERT_FILE] [--ca-file CA_FILE] - [--ca-signing-key-file CA_SIGNING_KEY_FILE] [--cert-file CERT_FILE] - [--disable-headers DISABLE_HEADERS] + [--open-file-limit OPEN_FILE_LIMIT] [--plugins PLUGINS [PLUGINS ...]] + [--enable-dashboard] [--work-klass WORK_KLASS] [--pid-file PID_FILE] + [--client-recvbuf-size CLIENT_RECVBUF_SIZE] [--key-file KEY_FILE] + [--timeout TIMEOUT] [--disable-http-proxy] [--ca-key-file CA_KEY_FILE] + [--ca-cert-dir CA_CERT_DIR] [--ca-cert-file CA_CERT_FILE] + [--ca-file CA_FILE] [--ca-signing-key-file CA_SIGNING_KEY_FILE] + [--cert-file CERT_FILE] [--disable-headers DISABLE_HEADERS] [--server-recvbuf-size SERVER_RECVBUF_SIZE] [--basic-auth BASIC_AUTH] [--cache-dir CACHE_DIR] [--filtered-upstream-hosts FILTERED_UPSTREAM_HOSTS] [--enable-web-server] [--enable-static-server] [--static-server-dir STATIC_SERVER_DIR] - [--pac-file PAC_FILE] [--pac-file-url-path PAC_FILE_URL_PATH] + [--min-compression-length MIN_COMPRESSION_LENGTH] [--pac-file PAC_FILE] + [--pac-file-url-path PAC_FILE_URL_PATH] [--filtered-client-ips FILTERED_CLIENT_IPS] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] @@ -1971,50 +1971,51 @@ options: Plugins can be used to subscribe for core events. --enable-conn-pool Default: False. (WIP) Enable upstream connection pooling. --threadless Default: True. Enabled by default on Python 3.8+ (mac, - linux). When disabled a new thread is spawned to handle - each client connection. + linux). When disabled a new thread is spawned to handle each + client connection. --threaded Default: False. Disabled by default on Python < 3.8 and - windows. When enabled a new thread is spawned to handle - each client connection. + windows. When enabled a new thread is spawned to handle each + client connection. --num-workers NUM_WORKERS Defaults to number of CPU cores. - --pid-file PID_FILE Default: None. Save parent process ID to a file. - --backlog BACKLOG Default: 100. Maximum number of pending connections to - proxy server + --backlog BACKLOG Default: 100. Maximum number of pending connections to proxy + server --hostname HOSTNAME Default: ::1. Server IP address. --port PORT Default: 8899. Server port. + --unix-socket-path UNIX_SOCKET_PATH + Default: None. Unix socket path to use. When provided --host + and --port flags are ignored --num-acceptors NUM_ACCEPTORS Defaults to number of CPU cores. - --unix-socket-path UNIX_SOCKET_PATH - Default: None. Unix socket path to use. When provided - --host and --port flags are ignored - --client-recvbuf-size CLIENT_RECVBUF_SIZE - Default: 1 MB. Maximum amount of data received from the - client in a single recv() operation. Bump this value for - faster uploads at the expense of increased RAM. - --key-file KEY_FILE Default: None. Server key file to enable end-to-end TLS - encryption with clients. If used, must also pass --cert- - file. - --timeout TIMEOUT Default: 10.0. Number of seconds after which an inactive - connection must be dropped. Inactivity is defined by no - data sent or received by the client. --version, -v Prints proxy.py version. --log-level LOG_LEVEL Valid options: DEBUG, INFO (default), WARNING, ERROR, CRITICAL. Both upper and lowercase values are allowed. You - may also simply use the leading character e.g. --log-level - d + may also simply use the leading character e.g. --log-level d --log-file LOG_FILE Default: sys.stdout. Log file destination. --log-format LOG_FORMAT Log format for Python logger. --open-file-limit OPEN_FILE_LIMIT Default: 1024. Maximum number of files (TCP connections) that proxy.py can open concurrently. - --plugins PLUGINS Comma separated plugins + --plugins PLUGINS [PLUGINS ...] + Comma separated plugins. You may use --plugins flag multiple + times. --enable-dashboard Default: False. Enables proxy.py dashboard. --work-klass WORK_KLASS - Default: proxy.http.handler.HttpProtocolHandler. Work klass - to use for work execution. + Default: proxy.http.HttpProtocolHandler. Work klass to use + for work execution. + --pid-file PID_FILE Default: None. Save "parent" process ID to a file. + --client-recvbuf-size CLIENT_RECVBUF_SIZE + Default: 1 MB. Maximum amount of data received from the + client in a single recv() operation. Bump this value for + faster uploads at the expense of increased RAM. + --key-file KEY_FILE Default: None. Server key file to enable end-to-end TLS + encryption with clients. If used, must also pass --cert- + file. + --timeout TIMEOUT Default: 10.0. Number of seconds after which an inactive + connection must be dropped. Inactivity is defined by no data + sent or received by the client. --disable-http-proxy Default: False. Whether to disable proxy.HttpProxyPlugin. --ca-key-file CA_KEY_FILE Default: None. CA key to use for signing dynamically @@ -2026,19 +2027,18 @@ options: file and --ca-signing-key-file --ca-cert-file CA_CERT_FILE Default: None. Signing certificate to use for signing - dynamically generated HTTPS certificates. If used, must - also pass --ca-key-file and --ca-signing-key-file - --ca-file CA_FILE Default: /Users/abhinavsingh/Dev/proxy.py/venv310/lib/pytho - n3.10/site-packages/certifi/cacert.pem. Provide path to + dynamically generated HTTPS certificates. If used, must also + pass --ca-key-file and --ca-signing-key-file + --ca-file CA_FILE Default: /Users/abhinavsingh/Dev/proxy.py/venv310/lib/python + 3.10/site-packages/certifi/cacert.pem. Provide path to custom CA bundle for peer certificate verification --ca-signing-key-file CA_SIGNING_KEY_FILE Default: None. CA signing key to use for dynamic generation - of HTTPS certificates. If used, must also pass --ca-key- - file and --ca-cert-file + of HTTPS certificates. If used, must also pass --ca-key-file + and --ca-cert-file --cert-file CERT_FILE Default: None. Server certificate to enable end-to-end TLS - encryption with clients. If used, must also pass --key- - file. + encryption with clients. If used, must also pass --key-file. --disable-headers DISABLE_HEADERS Default: None. Comma separated list of headers to remove before dispatching client request to upstream server. @@ -2055,8 +2055,7 @@ options: --filtered-upstream-hosts FILTERED_UPSTREAM_HOSTS Default: Blocks Facebook. Comma separated list of IPv4 and IPv6 addresses. - --enable-web-server Default: False. Whether to enable - proxy.HttpWebServerPlugin. + --enable-web-server Default: False. Whether to enable proxy.HttpWebServerPlugin. --enable-static-server Default: False. Enable inbuilt static file server. Optionally, also use --static-server-dir to serve static @@ -2065,11 +2064,14 @@ options: folder. --static-server-dir STATIC_SERVER_DIR Default: "public" folder in directory where proxy.py is - placed. This option is only applicable when static server - is also enabled. See --enable-static-server. + placed. This option is only applicable when static server is + also enabled. See --enable-static-server. + --min-compression-length MIN_COMPRESSION_LENGTH + Default: 20 bytes. Sets the minimum length of a response + that will be compressed (gzipped). --pac-file PAC_FILE A file (Proxy Auto Configuration) or string to serve when - the server receives a direct file request. Using this - option enables proxy.HttpWebServerPlugin. + the server receives a direct file request. Using this option + enables proxy.HttpWebServerPlugin. --pac-file-url-path PAC_FILE_URL_PATH Default: /. Web server path to serve the PAC file. --filtered-client-ips FILTERED_CLIENT_IPS @@ -2083,8 +2085,7 @@ options: protection) or "family" (for malware and adult content protection) -Proxy.py not working? Report at: -https://github.com/abhinavsingh/proxy.py/issues/new +Proxy.py not working? Report at: https://github.com/abhinavsingh/proxy.py/issues/new ``` # Changelog diff --git a/examples/https_connect_tunnel.py b/examples/https_connect_tunnel.py index 5b246a5ccb..5e8dbd2b71 100644 --- a/examples/https_connect_tunnel.py +++ b/examples/https_connect_tunnel.py @@ -16,7 +16,6 @@ from proxy.common.utils import build_http_response from proxy.http.codes import httpStatusCodes from proxy.http.parser import httpParserStates -from proxy.http.methods import httpMethods from proxy.core.base import BaseTcpTunnelHandler @@ -51,7 +50,7 @@ def handle_data(self, data: memoryview) -> Optional[bool]: self.request.parse(data) # Drop the request if not a CONNECT request - if self.request.method != httpMethods.CONNECT: + if not self.request.is_https_tunnel(): self.work.queue( HttpsConnectTunnelHandler.PROXY_TUNNEL_UNSUPPORTED_SCHEME, ) diff --git a/proxy/common/constants.py b/proxy/common/constants.py index b5b88186a2..bff6c84426 100644 --- a/proxy/common/constants.py +++ b/proxy/common/constants.py @@ -16,7 +16,7 @@ import sysconfig import ipaddress -from typing import List +from typing import Any, List from .version import __version__ @@ -93,10 +93,11 @@ def _env_threadless_compliant() -> bool: DEFAULT_PAC_FILE = None DEFAULT_PAC_FILE_URL_PATH = b'/' DEFAULT_PID_FILE = None -DEFAULT_PLUGINS = '' +DEFAULT_PLUGINS: List[Any] = [] DEFAULT_PORT = 8899 DEFAULT_SERVER_RECVBUF_SIZE = DEFAULT_BUFFER_SIZE DEFAULT_STATIC_SERVER_DIR = os.path.join(PROXY_PY_DIR, "public") +DEFAULT_MIN_COMPRESSION_LIMIT = 20 # In bytes DEFAULT_THREADLESS = _env_threadless_compliant() DEFAULT_TIMEOUT = 10.0 DEFAULT_VERSION = False diff --git a/proxy/common/flag.py b/proxy/common/flag.py index ac6f2513de..7d8224e44f 100644 --- a/proxy/common/flag.py +++ b/proxy/common/flag.py @@ -21,10 +21,10 @@ from .plugins import Plugins from .types import IpAddress -from .utils import text_, bytes_, is_py2, set_open_file_limit +from .utils import bytes_, is_py2, set_open_file_limit from .constants import COMMA, DEFAULT_DATA_DIRECTORY_PATH, DEFAULT_NUM_ACCEPTORS, DEFAULT_NUM_WORKERS from .constants import DEFAULT_DEVTOOLS_WS_PATH, DEFAULT_DISABLE_HEADERS, PY2_DEPRECATION_MESSAGE -from .constants import PLUGIN_DASHBOARD, PLUGIN_DEVTOOLS_PROTOCOL +from .constants import PLUGIN_DASHBOARD, PLUGIN_DEVTOOLS_PROTOCOL, DEFAULT_MIN_COMPRESSION_LIMIT from .constants import PLUGIN_HTTP_PROXY, PLUGIN_INSPECT_TRAFFIC, PLUGIN_PAC_FILE from .constants import PLUGIN_WEB_SERVER, PLUGIN_PROXY_AUTH from .logger import Logger @@ -110,6 +110,9 @@ def initialize( # proxy.py currently cannot serve over HTTPS and also perform TLS interception # at the same time. Check if user is trying to enable both feature # at the same time. + # + # TODO: Use parser.add_mutually_exclusive_group() + # and remove this logic from here. if (args.cert_file and args.key_file) and \ (args.ca_key_file and args.ca_cert_file and args.ca_signing_key_file): print( @@ -140,18 +143,16 @@ def initialize( bytes_(p) for p in FlagParser.get_default_plugins(args) ] - extra_plugins = [ - p if isinstance(p, type) else bytes_(p) - for p in opts.get('plugins', args.plugins.split(text_(COMMA))) - if not (isinstance(p, str) and len(p) == 0) - ] - plugins = Plugins.load(default_plugins + extra_plugins) + plugins = Plugins.load( + default_plugins + Plugins.resolve_plugin_flag( + args.plugins, opts.get('plugins', None), + ), + ) # https://github.com/python/mypy/issues/5865 # # def option(t: object, key: str, default: Any) -> Any: # return cast(t, opts.get(key, default)) - args.work_klass = work_klass args.plugins = plugins args.auth_code = cast( @@ -284,6 +285,16 @@ def initialize( args.enable_static_server, ), ) + args.min_compression_limit = cast( + bool, + opts.get( + 'min_compression_limit', + getattr( + args, 'min_compression_limit', + DEFAULT_MIN_COMPRESSION_LIMIT, + ), + ), + ) args.devtools_ws_path = cast( bytes, opts.get( diff --git a/proxy/common/plugins.py b/proxy/common/plugins.py index 2c04530b20..52d6a28344 100644 --- a/proxy/common/plugins.py +++ b/proxy/common/plugins.py @@ -12,12 +12,13 @@ import abc import logging import inspect +import itertools import importlib from typing import Any, List, Dict, Optional, Union from .utils import bytes_, text_ -from .constants import DOT, DEFAULT_ABC_PLUGINS +from .constants import DOT, DEFAULT_ABC_PLUGINS, COMMA logger = logging.getLogger(__name__) @@ -25,6 +26,24 @@ class Plugins: """Common utilities for plugin discovery.""" + @staticmethod + def resolve_plugin_flag(flag_plugins: Any, opt_plugins: Optional[Any] = None) -> List[Union[bytes, type]]: + if isinstance(flag_plugins, list): + requested_plugins = list( + itertools.chain.from_iterable([ + p.split(text_(COMMA)) for p in list( + itertools.chain.from_iterable(flag_plugins), + ) + ]), + ) + else: + requested_plugins = flag_plugins.split(text_(COMMA)) + return [ + p if isinstance(p, type) else bytes_(p) + for p in (opt_plugins if opt_plugins is not None else requested_plugins) + if not (isinstance(p, str) and len(p) == 0) + ] + @staticmethod def discover(input_args: List[str]) -> None: """Search for plugin and plugins flag in command line arguments, diff --git a/proxy/core/connection/connection.py b/proxy/core/connection/connection.py index 2827185b56..69651b8e9c 100644 --- a/proxy/core/connection/connection.py +++ b/proxy/core/connection/connection.py @@ -56,6 +56,7 @@ def connection(self) -> Union[ssl.SSLSocket, socket.socket]: def send(self, data: bytes) -> int: """Users must handle BrokenPipeError exceptions""" + # logger.info(data) return self.connection.send(data) def recv( diff --git a/proxy/dashboard/dashboard.py b/proxy/dashboard/dashboard.py index 57fb5aa81c..d9ca1a0c99 100644 --- a/proxy/dashboard/dashboard.py +++ b/proxy/dashboard/dashboard.py @@ -67,6 +67,7 @@ def handle_request(self, request: HttpParser) -> None: self.flags.static_server_dir, 'dashboard', 'proxy.html', ), + self.flags.min_compression_limit, ), ) elif request.path in ( diff --git a/proxy/http/parser.py b/proxy/http/parser.py index f51ec8e45b..209ecee84e 100644 --- a/proxy/http/parser.py +++ b/proxy/http/parser.py @@ -138,11 +138,14 @@ def set_url(self, url: bytes) -> None: # For CONNECT requests, request line contains # upstream_host:upstream_port which is not complaint # with urlsplit, which expects a fully qualified url. - if self.method == httpMethods.CONNECT: + if self.is_https_tunnel(): url = b'https://' + url self._url = urlparse.urlsplit(url) self._set_line_attributes() + def is_https_tunnel(self) -> bool: + return self.method == httpMethods.CONNECT + def is_chunked_encoded(self) -> bool: return b'transfer-encoding' in self.headers and \ self.headers[b'transfer-encoding'][1].lower() == b'chunked' @@ -184,7 +187,7 @@ def build(self, disable_headers: Optional[List[bytes]] = None, for_proxy: bool = COLON + str(self.port).encode() + self.path - ) if self.method != httpMethods.CONNECT else (self.host + COLON + str(self.port).encode()) + ) if not self.is_https_tunnel() else (self.host + COLON + str(self.port).encode()) return build_http_request( self.method, path, self.version, headers={} if not self.headers else { @@ -305,7 +308,7 @@ def _get_body_or_chunks(self) -> Optional[bytes]: def _set_line_attributes(self) -> None: if self.type == httpParserTypes.REQUEST_PARSER: - if self.method == httpMethods.CONNECT and self._url: + if self.is_https_tunnel() and self._url: self.host = self._url.hostname self.port = 443 if self._url.port is None else self._url.port elif self._url: diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index b313542c9f..158b8908e9 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -298,7 +298,7 @@ def read_from_descriptors(self, r: Readables) -> bool: # parse incoming response packet # only for non-https requests and when # tls interception is enabled - if self.request.method != httpMethods.CONNECT: + if not self.request.is_https_tunnel(): # See https://github.com/abhinavsingh/proxy.py/issues/127 for why # currently response parsing is disabled when TLS interception is enabled. # @@ -385,7 +385,7 @@ def on_client_connection_close(self) -> None: def access_log(self, log_attrs: Dict[str, Any]) -> None: access_log_format = DEFAULT_HTTPS_ACCESS_LOG_FORMAT - if self.request.method != httpMethods.CONNECT: + if not self.request.is_https_tunnel(): access_log_format = DEFAULT_HTTP_ACCESS_LOG_FORMAT logger.info(access_log_format.format_map(log_attrs)) @@ -395,7 +395,7 @@ def on_response_chunk(self, chunk: List[memoryview]) -> List[memoryview]: # However, this must also be accompanied by resetting both request # and response objects. # - # if not self.request.method == httpMethods.CONNECT and \ + # if not self.request.is_https_tunnel() and \ # self.response.state == httpParserStates.COMPLETE: # self.access_log() return chunk @@ -426,7 +426,7 @@ def on_client_data(self, raw: memoryview) -> Optional[memoryview]: # We also handle pipeline scenario for https proxy # requests is TLS interception is enabled. if self.request.state == httpParserStates.COMPLETE and ( - self.request.method != httpMethods.CONNECT or + not self.request.is_https_tunnel() or self.tls_interception_enabled() ): if self.pipeline_request is not None and \ @@ -506,7 +506,7 @@ def on_request_complete(self) -> Union[socket.socket, bool]: # For https requests, respond back with tunnel established response. # Optionally, setup interceptor if TLS interception is enabled. if self.upstream: - if self.request.method == httpMethods.CONNECT: + if self.request.is_https_tunnel(): self.client.queue( HttpProxyPlugin.PROXY_TUNNEL_ESTABLISHED_RESPONSE_PKT, ) @@ -868,7 +868,7 @@ def emit_request_complete(self) -> None: event_name=eventNames.REQUEST_COMPLETE, event_payload={ 'url': text_(self.request.path) - if self.request.method == httpMethods.CONNECT + if self.request.is_https_tunnel() else 'http://%s:%d%s' % (text_(self.request.host), self.request.port, text_(self.request.path)), 'method': text_(self.request.method), 'headers': {text_(k): text_(v[1]) for k, v in self.request.headers.items()}, diff --git a/proxy/http/server/web.py b/proxy/http/server/web.py index 09c50f199a..588d593685 100644 --- a/proxy/http/server/web.py +++ b/proxy/http/server/web.py @@ -8,28 +8,30 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -import gzip import re +import gzip import time +import socket import logging -import os import mimetypes -import socket + from typing import List, Tuple, Optional, Dict, Union, Any, Pattern -from .plugin import HttpWebServerBasePlugin -from .protocols import httpProtocolTypes +from ...common.constants import DEFAULT_STATIC_SERVER_DIR, PROXY_AGENT_HEADER_VALUE +from ...common.constants import DEFAULT_ENABLE_STATIC_SERVER, DEFAULT_ENABLE_WEB_SERVER +from ...common.constants import DEFAULT_MIN_COMPRESSION_LIMIT +from ...common.utils import bytes_, text_, build_http_response, build_websocket_handshake_response +from ...common.types import Readables, Writables +from ...common.flag import flags + from ..exception import HttpProtocolException from ..websocket import WebsocketFrame, websocketOpcodes from ..codes import httpStatusCodes from ..parser import HttpParser, httpParserStates, httpParserTypes from ..plugin import HttpProtocolHandlerPlugin -from ...common.utils import bytes_, text_, build_http_response, build_websocket_handshake_response -from ...common.constants import DEFAULT_STATIC_SERVER_DIR, PROXY_AGENT_HEADER_VALUE -from ...common.constants import DEFAULT_ENABLE_STATIC_SERVER, DEFAULT_ENABLE_WEB_SERVER -from ...common.types import Readables, Writables -from ...common.flag import flags +from .plugin import HttpWebServerBasePlugin +from .protocols import httpProtocolTypes logger = logging.getLogger(__name__) @@ -60,6 +62,14 @@ 'See --enable-static-server.', ) +flags.add_argument( + '--min-compression-length', + type=int, + default=DEFAULT_MIN_COMPRESSION_LIMIT, + help='Default: ' + str(DEFAULT_MIN_COMPRESSION_LIMIT) + ' bytes. ' + + 'Sets the minimum length of a response that will be compressed (gzipped).', +) + class HttpWebServerPlugin(HttpProtocolHandlerPlugin): """HttpProtocolHandler plugin which handles incoming requests to local web server.""" @@ -119,40 +129,31 @@ def encryption_enabled(self) -> bool: self.flags.certfile is not None @staticmethod - def read_and_build_static_file_response(path: str) -> memoryview: + def read_and_build_static_file_response(path: str, min_compression_limit: int) -> memoryview: with open(path, 'rb') as f: content = f.read() content_type = mimetypes.guess_type(path)[0] if content_type is None: content_type = 'text/plain' + headers = { + b'Content-Type': bytes_(content_type), + b'Cache-Control': b'max-age=86400', + b'Connection': b'close', + } + do_compress = len(content) > min_compression_limit + if do_compress: + headers.update({ + b'Content-Encoding': b'gzip', + }) return memoryview( build_http_response( httpStatusCodes.OK, reason=b'OK', - headers={ - b'Content-Type': bytes_(content_type), - b'Cache-Control': b'max-age=86400', - b'Content-Encoding': b'gzip', - b'Connection': b'close', - }, - body=gzip.compress(content), + headers=headers, + body=gzip.compress(content) if do_compress else content, ), ) - def serve_file_or_404(self, path: str) -> bool: - """Read and serves a file from disk. - - Queues 404 Not Found for IOError. - Shouldn't this be server error? - """ - try: - self.client.queue( - self.read_and_build_static_file_response(path), - ) - except IOError: - self.client.queue(self.DEFAULT_404_RESPONSE) - return True - def try_upgrade(self) -> bool: if self.request.has_header(b'connection') and \ self.request.header(b'connection').lower() == b'upgrade': @@ -207,18 +208,24 @@ def on_request_complete(self) -> Union[socket.socket, bool]: if match: self.route = self.routes[protocol][route] self.route.handle_request(self.request) - if self.request.has_header(b'connection') \ - and self.request.header(b'connection').lower() == b'close': + if self.request.has_header(b'connection') and \ + self.request.header(b'connection').lower() == b'close': return True return False # No-route found, try static serving if enabled if self.flags.enable_static_server: path = text_(self.request.path).split('?')[0] - if os.path.isfile(self.flags.static_server_dir + path): - return self.serve_file_or_404( - self.flags.static_server_dir + path, + try: + self.client.queue( + self.read_and_build_static_file_response( + self.flags.static_server_dir + path, + self.flags.min_compression_limit, + ), ) + return True + except FileNotFoundError: + pass # Catch all unhandled web server requests, return 404 self.client.queue(self.DEFAULT_404_RESPONSE) diff --git a/proxy/plugin/proxy_pool.py b/proxy/plugin/proxy_pool.py index 17877487a5..6117046f51 100644 --- a/proxy/plugin/proxy_pool.py +++ b/proxy/plugin/proxy_pool.py @@ -115,11 +115,15 @@ def before_upstream_connection( # using a datastructure without having to spawn separate thread/process for health # check. logger.info( - 'Connection refused by upstream proxy {0}:{1}'.format(*endpoint), + 'Connection refused by upstream proxy {0}:{1}'.format( + *endpoint, + ), ) raise HttpProtocolException() logger.debug( - 'Established connection to upstream proxy {0}:{1}'.format(*endpoint), + 'Established connection to upstream proxy {0}:{1}'.format( + *endpoint, + ), ) return None @@ -140,7 +144,7 @@ def handle_client_request( else: assert len(parts) == 1 host = parts[0] - port = '443' if request.method == httpMethods.CONNECT else '80' + port = '443' if request.is_https_tunnel() else '80' path = None if not request.path else request.path.decode() self.request_host_port_path_method = [ host, port, path, request.method, diff --git a/proxy/plugin/redirect_to_custom_server.py b/proxy/plugin/redirect_to_custom_server.py index 75d38359eb..bb500d32ee 100644 --- a/proxy/plugin/redirect_to_custom_server.py +++ b/proxy/plugin/redirect_to_custom_server.py @@ -13,7 +13,6 @@ from ..http.proxy import HttpProxyBasePlugin from ..http.parser import HttpParser -from ..http.methods import httpMethods class RedirectToCustomServerPlugin(HttpProxyBasePlugin): @@ -25,7 +24,7 @@ def before_upstream_connection( self, request: HttpParser, ) -> Optional[HttpParser]: # Redirect all non-https requests to inbuilt WebServer. - if request.method != httpMethods.CONNECT: + if not request.is_https_tunnel(): request.set_url(self.UPSTREAM_SERVER) # Update Host header too, otherwise upstream can reject our request if request.has_header(b'Host'): diff --git a/proxy/proxy.py b/proxy/proxy.py index 9e4c116dca..706f864441 100644 --- a/proxy/proxy.py +++ b/proxy/proxy.py @@ -35,6 +35,10 @@ help='Prints proxy.py version.', ) +# TODO: Convert me into 1-letter choices +# TODO: Add --verbose option which also +# starts to log traffic flowing between +# clients and upstream servers. flags.add_argument( '--log-level', type=str, @@ -68,9 +72,11 @@ flags.add_argument( '--plugins', - type=str, + action='append', + nargs='+', default=DEFAULT_PLUGINS, - help='Comma separated plugins', + help='Comma separated plugins. ' + + 'You may use --plugins flag multiple times.', ) # TODO: Ideally all `--enable-*` flags must be at the top-level. diff --git a/tests/http/test_http_parser.py b/tests/http/test_http_parser.py index b5ede79ecd..3d3ba8f977 100644 --- a/tests/http/test_http_parser.py +++ b/tests/http/test_http_parser.py @@ -24,6 +24,7 @@ def setUp(self) -> None: def test_urlparse(self) -> None: self.parser.parse(b'CONNECT httpbin.org:443 HTTP/1.1\r\n') + self.assertTrue(self.parser.is_https_tunnel()) self.assertEqual(self.parser.host, b'httpbin.org') self.assertEqual(self.parser.port, 443)