diff --git a/changelog.d/9372.feature b/changelog.d/9372.feature new file mode 100644 index 000000000000..3cb01004c97b --- /dev/null +++ b/changelog.d/9372.feature @@ -0,0 +1 @@ +The `no_proxy` and `NO_PROXY` environment variables are now respected in proxied HTTP clients with the lowercase form taking precedence if both are present. Additionally, the lowercase `https_proxy` environment variable is now respected in proxied HTTP clients on top of existing support for the uppercase `HTTPS_PROXY` form and takes precedence if both are present. Contributed by Timothy Leung. diff --git a/synapse/http/client.py b/synapse/http/client.py index e54d9bd213c9..a910548f1edb 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -289,8 +289,7 @@ def __init__( treq_args: Dict[str, Any] = {}, ip_whitelist: Optional[IPSet] = None, ip_blacklist: Optional[IPSet] = None, - http_proxy: Optional[bytes] = None, - https_proxy: Optional[bytes] = None, + use_proxy: bool = False, ): """ Args: @@ -300,8 +299,8 @@ def __init__( we may not request. ip_whitelist: The whitelisted IP addresses, that we can request if it were otherwise caught in a blacklist. - http_proxy: proxy server to use for http connections. host[:port] - https_proxy: proxy server to use for https connections. host[:port] + use_proxy: Whether proxy settings should be discovered and used + from conventional environment variables. """ self.hs = hs @@ -345,8 +344,7 @@ def __init__( connectTimeout=15, contextFactory=self.hs.get_http_client_context_factory(), pool=pool, - http_proxy=http_proxy, - https_proxy=https_proxy, + use_proxy=use_proxy, ) if self._ip_blacklist: diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py index b730d2c63452..3d553ae236cf 100644 --- a/synapse/http/proxyagent.py +++ b/synapse/http/proxyagent.py @@ -14,6 +14,7 @@ # limitations under the License. import logging import re +from urllib.request import getproxies_environment, proxy_bypass_environment from zope.interface import implementer @@ -58,6 +59,9 @@ class ProxyAgent(_AgentBase): pool (HTTPConnectionPool|None): connection pool to be used. If None, a non-persistent pool instance will be created. + + use_proxy (bool): Whether proxy settings should be discovered and used + from conventional environment variables. """ def __init__( @@ -68,8 +72,7 @@ def __init__( connectTimeout=None, bindAddress=None, pool=None, - http_proxy=None, - https_proxy=None, + use_proxy=False, ): _AgentBase.__init__(self, reactor, pool) @@ -84,6 +87,15 @@ def __init__( if bindAddress is not None: self._endpoint_kwargs["bindAddress"] = bindAddress + http_proxy = None + https_proxy = None + no_proxy = None + if use_proxy: + proxies = getproxies_environment() + http_proxy = proxies["http"].encode() if "http" in proxies else None + https_proxy = proxies["https"].encode() if "https" in proxies else None + no_proxy = proxies["no"] if "no" in proxies else None + self.http_proxy_endpoint = _http_proxy_endpoint( http_proxy, self.proxy_reactor, **self._endpoint_kwargs ) @@ -92,6 +104,8 @@ def __init__( https_proxy, self.proxy_reactor, **self._endpoint_kwargs ) + self.no_proxy = no_proxy + self._policy_for_https = contextFactory self._reactor = reactor @@ -139,13 +153,28 @@ def request(self, method, uri, headers=None, bodyProducer=None): pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port) request_path = parsed_uri.originForm - if parsed_uri.scheme == b"http" and self.http_proxy_endpoint: + should_skip_proxy = False + if self.no_proxy is not None: + should_skip_proxy = proxy_bypass_environment( + parsed_uri.host.decode(), + proxies={"no": self.no_proxy}, + ) + + if ( + parsed_uri.scheme == b"http" + and self.http_proxy_endpoint + and not should_skip_proxy + ): # Cache *all* connections under the same key, since we are only # connecting to a single destination, the proxy: pool_key = ("http-proxy", self.http_proxy_endpoint) endpoint = self.http_proxy_endpoint request_path = uri - elif parsed_uri.scheme == b"https" and self.https_proxy_endpoint: + elif ( + parsed_uri.scheme == b"https" + and self.https_proxy_endpoint + and not should_skip_proxy + ): endpoint = HTTPConnectProxyEndpoint( self.proxy_reactor, self.https_proxy_endpoint, diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 6104ef4e46f6..89dc6b1c9871 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -149,8 +149,7 @@ def __init__( treq_args={"browser_like_redirects": True}, ip_whitelist=hs.config.url_preview_ip_range_whitelist, ip_blacklist=hs.config.url_preview_ip_range_blacklist, - http_proxy=os.getenvb(b"http_proxy"), - https_proxy=os.getenvb(b"HTTPS_PROXY"), + use_proxy=True, ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path diff --git a/synapse/server.py b/synapse/server.py index 4b9ec7f0ae98..1d4370e0bac2 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -24,7 +24,6 @@ import abc import functools import logging -import os from typing import ( TYPE_CHECKING, Any, @@ -370,11 +369,7 @@ def get_proxied_http_client(self) -> SimpleHttpClient: """ An HTTP client that uses configured HTTP(S) proxies. """ - return SimpleHttpClient( - self, - http_proxy=os.getenvb(b"http_proxy"), - https_proxy=os.getenvb(b"HTTPS_PROXY"), - ) + return SimpleHttpClient(self, use_proxy=True) @cache_in_self def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient: @@ -386,8 +381,7 @@ def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient: self, ip_whitelist=self.config.ip_range_whitelist, ip_blacklist=self.config.ip_range_blacklist, - http_proxy=os.getenvb(b"http_proxy"), - https_proxy=os.getenvb(b"HTTPS_PROXY"), + use_proxy=True, ) @cache_in_self diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py index 9a56e1c14afc..505ffcd300fc 100644 --- a/tests/http/test_proxyagent.py +++ b/tests/http/test_proxyagent.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import os +from unittest.mock import patch import treq from netaddr import IPSet @@ -100,22 +102,36 @@ def _make_connection( return http_protocol - def test_http_request(self): - agent = ProxyAgent(self.reactor) + def _test_request_direct_connection(self, agent, scheme, hostname, path): + """Runs a test case for a direct connection not going through a proxy. - self.reactor.lookups["test.com"] = "1.2.3.4" - d = agent.request(b"GET", b"http://test.com") + Args: + agent (ProxyAgent): the proxy agent being tested + + scheme (bytes): expected to be either "http" or "https" + + hostname (bytes): the hostname to connect to in the test + + path (bytes): the path to connect to in the test + """ + is_https = scheme == b"https" + + self.reactor.lookups[hostname.decode()] = "1.2.3.4" + d = agent.request(b"GET", scheme + b"://" + hostname + b"/" + path) # there should be a pending TCP connection clients = self.reactor.tcpClients self.assertEqual(len(clients), 1) (host, port, client_factory, _timeout, _bindAddress) = clients[0] self.assertEqual(host, "1.2.3.4") - self.assertEqual(port, 80) + self.assertEqual(port, 443 if is_https else 80) # make a test server, and wire up the client http_server = self._make_connection( - client_factory, _get_test_protocol_factory() + client_factory, + _get_test_protocol_factory(), + ssl=is_https, + expected_sni=hostname if is_https else None, ) # the FakeTransport is async, so we need to pump the reactor @@ -126,8 +142,8 @@ def test_http_request(self): request = http_server.requests[0] self.assertEqual(request.method, b"GET") - self.assertEqual(request.path, b"/") - self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"]) + self.assertEqual(request.path, b"/" + path) + self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [hostname]) request.write(b"result") request.finish() @@ -137,48 +153,58 @@ def test_http_request(self): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") + def test_http_request(self): + agent = ProxyAgent(self.reactor) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") + def test_https_request(self): agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy()) + self._test_request_direct_connection(agent, b"https", b"test.com", b"abc") - self.reactor.lookups["test.com"] = "1.2.3.4" - d = agent.request(b"GET", b"https://test.com/abc") + def test_http_request_use_proxy_empty_environment(self): + agent = ProxyAgent(self.reactor, use_proxy=True) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") - # there should be a pending TCP connection - clients = self.reactor.tcpClients - self.assertEqual(len(clients), 1) - (host, port, client_factory, _timeout, _bindAddress) = clients[0] - self.assertEqual(host, "1.2.3.4") - self.assertEqual(port, 443) + @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "NO_PROXY": "test.com"}) + def test_http_request_via_uppercase_no_proxy(self): + agent = ProxyAgent(self.reactor, use_proxy=True) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") - # make a test server, and wire up the client - http_server = self._make_connection( - client_factory, - _get_test_protocol_factory(), - ssl=True, - expected_sni=b"test.com", - ) - - # the FakeTransport is async, so we need to pump the reactor - self.reactor.advance(0) - - # now there should be a pending request - self.assertEqual(len(http_server.requests), 1) + @patch.dict( + os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "test.com,unused.com"} + ) + def test_http_request_via_no_proxy(self): + agent = ProxyAgent(self.reactor, use_proxy=True) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") - request = http_server.requests[0] - self.assertEqual(request.method, b"GET") - self.assertEqual(request.path, b"/abc") - self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"]) - request.write(b"result") - request.finish() + @patch.dict( + os.environ, {"https_proxy": "proxy.com", "no_proxy": "test.com,unused.com"} + ) + def test_https_request_via_no_proxy(self): + agent = ProxyAgent( + self.reactor, + contextFactory=get_test_https_policy(), + use_proxy=True, + ) + self._test_request_direct_connection(agent, b"https", b"test.com", b"abc") - self.reactor.advance(0) + @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "*"}) + def test_http_request_via_no_proxy_star(self): + agent = ProxyAgent(self.reactor, use_proxy=True) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") - resp = self.successResultOf(d) - body = self.successResultOf(treq.content(resp)) - self.assertEqual(body, b"result") + @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "*"}) + def test_https_request_via_no_proxy_star(self): + agent = ProxyAgent( + self.reactor, + contextFactory=get_test_https_policy(), + use_proxy=True, + ) + self._test_request_direct_connection(agent, b"https", b"test.com", b"abc") + @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"}) def test_http_request_via_proxy(self): - agent = ProxyAgent(self.reactor, http_proxy=b"proxy.com:8888") + agent = ProxyAgent(self.reactor, use_proxy=True) self.reactor.lookups["proxy.com"] = "1.2.3.5" d = agent.request(b"GET", b"http://test.com") @@ -214,11 +240,12 @@ def test_http_request_via_proxy(self): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") + @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"}) def test_https_request_via_proxy(self): agent = ProxyAgent( self.reactor, contextFactory=get_test_https_policy(), - https_proxy=b"proxy.com", + use_proxy=True, ) self.reactor.lookups["proxy.com"] = "1.2.3.5" @@ -294,6 +321,7 @@ def test_https_request_via_proxy(self): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") + @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"}) def test_http_request_via_proxy_with_blacklist(self): # The blacklist includes the configured proxy IP. agent = ProxyAgent( @@ -301,7 +329,7 @@ def test_http_request_via_proxy_with_blacklist(self): self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"]) ), self.reactor, - http_proxy=b"proxy.com:8888", + use_proxy=True, ) self.reactor.lookups["proxy.com"] = "1.2.3.5" @@ -338,7 +366,8 @@ def test_http_request_via_proxy_with_blacklist(self): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") - def test_https_request_via_proxy_with_blacklist(self): + @patch.dict(os.environ, {"HTTPS_PROXY": "proxy.com"}) + def test_https_request_via_uppercase_proxy_with_blacklist(self): # The blacklist includes the configured proxy IP. agent = ProxyAgent( BlacklistingReactorWrapper( @@ -346,7 +375,7 @@ def test_https_request_via_proxy_with_blacklist(self): ), self.reactor, contextFactory=get_test_https_policy(), - https_proxy=b"proxy.com", + use_proxy=True, ) self.reactor.lookups["proxy.com"] = "1.2.3.5"