From 42118e3c2b68243df00a6d9fd03cd4495f496210 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 15 Apr 2022 13:24:52 +0200 Subject: [PATCH 01/13] improve performance of get_proxies_environment when there are many environment variables --- Lib/urllib/request.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 84997f268c9304..ebddc903733a03 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2492,33 +2492,34 @@ def real_close(self): # Proxy handling def getproxies_environment(): """Return a dictionary of scheme -> proxy server URL mappings. - Scan the environment for variables named _proxy; this seems to be the standard convention. If you need a different way, you can pass a proxies dictionary to the [Fancy]URLopener constructor. - """ - proxies = {} # in order to prefer lowercase variables, process environment in # two passes: first matches any, second pass matches lowercase only - for name, value in os.environ.items(): - name = name.lower() - if value and name[-6:] == '_proxy': - proxies[name[:-6]] = value + + # select only environment variables which end in (after making lowercase) _proxy + candidate_names = [name for name in os.environ.keys() if name[-6:]=='_'] # fast selection of candidates + environment = [(name, os.environ[name], name.lower()) for name in candidate_names if name[-6:].lower()=='_proxy'] + + proxies = {} + for name, value, name_lower in environment: + if value and name_lower[-6:] == '_proxy': + proxies[name_lower[:-6]] = value # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY # (non-all-lowercase) as it may be set from the web server by a "Proxy:" # header from the client # If "proxy" is lowercase, it will still be used thanks to the next block if 'REQUEST_METHOD' in os.environ: proxies.pop('http', None) - for name, value in os.environ.items(): + for name, value, name_lower in environment: if name[-6:] == '_proxy': - name = name.lower() if value: - proxies[name[:-6]] = value + proxies[name_lower[:-6]] = value else: - proxies.pop(name[:-6], None) + proxies.pop(name_lower[:-6], None) return proxies def proxy_bypass_environment(host, proxies=None): From 37ff8d9fb0ed143842351dd23ccdb9f443289299 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 15 Apr 2022 11:29:39 +0000 Subject: [PATCH 02/13] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst diff --git a/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst new file mode 100644 index 00000000000000..8337543b2f1265 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst @@ -0,0 +1 @@ +Improve performance of `urllib.request.get_proxies_environment` when there are many environment variables From b8de96294e5c1b97f871266e4686318563bcf7cf Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 15 Apr 2022 13:34:20 +0200 Subject: [PATCH 03/13] fix case of short env name --- Lib/urllib/request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index ebddc903733a03..d7d5794605ceb0 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2501,7 +2501,7 @@ def getproxies_environment(): # two passes: first matches any, second pass matches lowercase only # select only environment variables which end in (after making lowercase) _proxy - candidate_names = [name for name in os.environ.keys() if name[-6:]=='_'] # fast selection of candidates + candidate_names = [name for name in os.environ.keys() if len(name)>5 and name[-6]=='_'] # fast selection of candidates environment = [(name, os.environ[name], name.lower()) for name in candidate_names if name[-6:].lower()=='_proxy'] proxies = {} From 336da07d16351a16f34d1458c9eeec3bddcc8034 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 15 Apr 2022 13:39:59 +0200 Subject: [PATCH 04/13] fix formatting --- .../next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst index 8337543b2f1265..037955c809d2da 100644 --- a/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst +++ b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst @@ -1 +1 @@ -Improve performance of `urllib.request.get_proxies_environment` when there are many environment variables +Improve performance of ``urllib.request.get_proxies_environment`` when there are many environment variables From cabeb47cc0c556b0278704add666d1d52a751a5f Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 15 Apr 2022 14:17:57 +0200 Subject: [PATCH 05/13] fix whitespace --- Lib/urllib/request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index d7d5794605ceb0..7efa290532911e 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2503,7 +2503,7 @@ def getproxies_environment(): # select only environment variables which end in (after making lowercase) _proxy candidate_names = [name for name in os.environ.keys() if len(name)>5 and name[-6]=='_'] # fast selection of candidates environment = [(name, os.environ[name], name.lower()) for name in candidate_names if name[-6:].lower()=='_proxy'] - + proxies = {} for name, value, name_lower in environment: if value and name_lower[-6:] == '_proxy': From c58174891dc82c7f6a6bbe8a0a03d5b5087b9072 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 15 Apr 2022 14:41:29 +0200 Subject: [PATCH 06/13] whitespace --- Lib/urllib/request.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 7efa290532911e..26b207cfab95ba 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2500,9 +2500,9 @@ def getproxies_environment(): # in order to prefer lowercase variables, process environment in # two passes: first matches any, second pass matches lowercase only - # select only environment variables which end in (after making lowercase) _proxy + # select only environment variables which end in (after making lowercase) _proxy candidate_names = [name for name in os.environ.keys() if len(name)>5 and name[-6]=='_'] # fast selection of candidates - environment = [(name, os.environ[name], name.lower()) for name in candidate_names if name[-6:].lower()=='_proxy'] + environment = [(name, os.environ[name], name.lower()) for name in candidate_names if name[-6:].lower()=='_proxy'] proxies = {} for name, value, name_lower in environment: From dba04145c2e9dfb35f5794eeeeed882e460ceab7 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 18 May 2022 10:03:47 +0200 Subject: [PATCH 07/13] Update Lib/urllib/request.py Co-authored-by: Carl Meyer --- Lib/urllib/request.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 7d7cf2b1815ad1..0260a9914a7b41 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2501,13 +2501,16 @@ def getproxies_environment(): # two passes: first matches any, second pass matches lowercase only # select only environment variables which end in (after making lowercase) _proxy - candidate_names = [name for name in os.environ.keys() if len(name)>5 and name[-6]=='_'] # fast selection of candidates - environment = [(name, os.environ[name], name.lower()) for name in candidate_names if name[-6:].lower()=='_proxy'] - proxies = {} - for name, value, name_lower in environment: - if value and name_lower[-6:] == '_proxy': - proxies[name_lower[:-6]] = value + environment = [] + for name in os.environ.keys(): + # fast screen underscore position before more expensive case-folding + if len(name) > 5 and name[-6] == "_" and name[-5:].lower() == "proxy": + value = os.environ[name] + proxy_name = name[:-6].lower() + environment.append((name, value, proxy_name)) + if value: + proxies[proxy_name] = value # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY # (non-all-lowercase) as it may be set from the web server by a "Proxy:" # header from the client From 76f16ca13b4d7d793386c4fb3e0e66fcaf9ea9ec Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 18 May 2022 10:03:56 +0200 Subject: [PATCH 08/13] Update Lib/urllib/request.py Co-authored-by: Carl Meyer --- Lib/urllib/request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 0260a9914a7b41..259e52f4b7144e 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2517,7 +2517,7 @@ def getproxies_environment(): # If "proxy" is lowercase, it will still be used thanks to the next block if 'REQUEST_METHOD' in os.environ: proxies.pop('http', None) - for name, value, name_lower in environment: + for name, value, proxy_name in environment: if name[-6:] == '_proxy': if value: proxies[name_lower[:-6]] = value From d3feb07e1d69255087f92b82fdafe7bc37855975 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 18 May 2022 10:04:03 +0200 Subject: [PATCH 09/13] Update Lib/urllib/request.py Co-authored-by: Carl Meyer --- Lib/urllib/request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 259e52f4b7144e..e10c4228c3ccf7 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2520,7 +2520,7 @@ def getproxies_environment(): for name, value, proxy_name in environment: if name[-6:] == '_proxy': if value: - proxies[name_lower[:-6]] = value + proxies[proxy_name] = value else: proxies.pop(name_lower[:-6], None) return proxies From b9ab037c9ab6a046f5ef2c5db243841e11ffd134 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 18 May 2022 10:04:09 +0200 Subject: [PATCH 10/13] Update Lib/urllib/request.py Co-authored-by: Carl Meyer --- Lib/urllib/request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index e10c4228c3ccf7..b62d5d8dc92806 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2522,7 +2522,7 @@ def getproxies_environment(): if value: proxies[proxy_name] = value else: - proxies.pop(name_lower[:-6], None) + proxies.pop(proxy_name, None) return proxies def proxy_bypass_environment(host, proxies=None): From f96150587daa573c819101aa361b6f7a677bb890 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 18 May 2022 10:52:24 +0200 Subject: [PATCH 11/13] whitespace --- Lib/urllib/request.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index b62d5d8dc92806..0f205e2feb6a60 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2492,6 +2492,7 @@ def real_close(self): # Proxy handling def getproxies_environment(): """Return a dictionary of scheme -> proxy server URL mappings. + Scan the environment for variables named _proxy; this seems to be the standard convention. If you need a different way, you can pass a proxies dictionary to the From 36e5497a9f6d1dcda4d2a7b1ed851e8be0bd31af Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 18 May 2022 18:48:03 +0200 Subject: [PATCH 12/13] Update Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst Co-authored-by: Carl Meyer --- .../next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst index 037955c809d2da..16d61f1b91102d 100644 --- a/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst +++ b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst @@ -1 +1 @@ -Improve performance of ``urllib.request.get_proxies_environment`` when there are many environment variables +Improve performance of ``urllib.request.getproxies_environment`` when there are many environment variables From ca4fbd40b31d01f657ebfda3979d050afb7505d6 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 18 May 2022 18:50:08 +0200 Subject: [PATCH 13/13] Update Lib/urllib/request.py Co-authored-by: Carl Meyer --- Lib/urllib/request.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 0f205e2feb6a60..01a4d7fc30ed95 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2519,6 +2519,7 @@ def getproxies_environment(): if 'REQUEST_METHOD' in os.environ: proxies.pop('http', None) for name, value, proxy_name in environment: + # not case-folded, checking here for lower-case env vars only if name[-6:] == '_proxy': if value: proxies[proxy_name] = value