From aae7b43ca3d2bb2028370b8252ccb51006827429 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 5 Dec 2022 14:47:57 -0800 Subject: [PATCH] gh-100001: Also escape \s in http.server log messages. (GH-100038) Also \ escape \s in the http.server BaseHTTPRequestHandler.log_message so that it is technically possible to parse the line and reconstruct what the original data was. Without this a \xHH is ambiguious as to if it is a hex replacement we put in or the characters r"\x" came through in the original request line. (cherry picked from commit 7e29398407dbd53b714702abb89aa2fd7baca48a) Co-authored-by: Gregory P. Smith --- Lib/http/server.py | 1 + Lib/test/test_httpservers.py | 2 ++ .../Library/2022-12-05-13-40-15.gh-issue-100001.78ReYp.rst | 5 +++++ 3 files changed, 8 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-12-05-13-40-15.gh-issue-100001.78ReYp.rst diff --git a/Lib/http/server.py b/Lib/http/server.py index ca429428fdfd26..03dbaa51b798b7 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -567,6 +567,7 @@ def log_error(self, format, *args): # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes _control_char_table = str.maketrans( {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))}) + _control_char_table[ord('\\')] = r'\\' def log_message(self, format, *args): """Log an arbitrary message. diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 34e0e3548359b0..ac8da494e9bb83 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -994,6 +994,7 @@ def test_unprintable_not_logged(self): log_message(self.handler, '/\033bar\000\033') log_message(self.handler, '/spam %s.', 'a') log_message(self.handler, '/spam %s.', '\033\x7f\x9f\xa0beans') + log_message(self.handler, '"GET /foo\\b"ar\007 HTTP/1.0"') stderr = fake_stderr.getvalue() self.assertNotIn('\033', stderr) # non-printable chars are caught. self.assertNotIn('\000', stderr) # non-printable chars are caught. @@ -1002,6 +1003,7 @@ def test_unprintable_not_logged(self): self.assertIn(r'/\x1bbar\x00\x1b', lines[1]) self.assertIn('/spam a.', lines[2]) self.assertIn('/spam \\x1b\\x7f\\x9f\xa0beans.', lines[3]) + self.assertIn(r'"GET /foo\\b"ar\x07 HTTP/1.0"', lines[4]) def test_http_1_1(self): result = self.send_typical_request(b'GET / HTTP/1.1\r\n\r\n') diff --git a/Misc/NEWS.d/next/Library/2022-12-05-13-40-15.gh-issue-100001.78ReYp.rst b/Misc/NEWS.d/next/Library/2022-12-05-13-40-15.gh-issue-100001.78ReYp.rst new file mode 100644 index 00000000000000..e305352c7a5532 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-12-05-13-40-15.gh-issue-100001.78ReYp.rst @@ -0,0 +1,5 @@ +Also \ escape \s in the http.server BaseHTTPRequestHandler.log_message so +that it is technically possible to parse the line and reconstruct what the +original data was. Without this a \xHH is ambiguious as to if it is a hex +replacement we put in or the characters r"\x" came through in the original +request line.