From 7bd706cfeb4d0d6cd7cdda39ed30d7828aabe4ca Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Thu, 13 Jan 2022 13:34:30 +0100 Subject: [PATCH] [fix] Quote command line segment using shlex Simply joining command segments is not enough for acquiring a valid command. One should escape the necessary characters prior to that. Escaping can be done manually, but using shlex is the preferred way of doing so. This patch might not fix all of the issues. There were multiple places where this was not done adequately: * at calling `intercept-build` * at parsing the `compile_commands.json`'s `arguments` field * at extending the command with the referred response file's content * at extending the command without response files * at the `libtest/codechecker.py/all_command()` helper function * at a bunch of other places where a command is being logged - but for brevity, I've just fixed the two most important places of this - `analyzer_base.py/analyze()` - `analysis_manager.py/handle_reproducer()` --- .../codechecker_analyzer/analysis_manager.py | 5 +- .../analyzers/analyzer_base.py | 4 +- .../buildlog/build_manager.py | 3 +- .../buildlog/log_parser.py | 7 +-- analyzer/tests/libtest/codechecker.py | 5 +- .../ldlogger response.rsp | 1 + .../logparser_test_files/ldlogger-new-at.json | 7 +++ .../ldlogger-new-response.json | 7 +++ .../ldlogger-new-space.json | 2 +- .../logparser_test_files/ldlogger-new.json | 2 +- analyzer/tests/unit/test_log_parser.py | 50 +++++++++++++++---- 11 files changed, 74 insertions(+), 19 deletions(-) create mode 100644 analyzer/tests/unit/logparser_test_files/ldlogger response.rsp create mode 100644 analyzer/tests/unit/logparser_test_files/ldlogger-new-at.json create mode 100644 analyzer/tests/unit/logparser_test_files/ldlogger-new-response.json diff --git a/analyzer/codechecker_analyzer/analysis_manager.py b/analyzer/codechecker_analyzer/analysis_manager.py index 127cc4dc81..da38a9a9ea 100644 --- a/analyzer/codechecker_analyzer/analysis_manager.py +++ b/analyzer/codechecker_analyzer/analysis_manager.py @@ -300,7 +300,10 @@ def handle_reproducer(source_analyzer, rh, zip_file, actions_map): LOG.debug("[ZIP] Writing extra information...") archive.writestr("build-action", action.original_command) - archive.writestr("analyzer-command", ' '.join(rh.analyzer_cmd)) + archive.writestr( + "analyzer-command", + ' '.join([shlex.quote(x) for x in rh.analyzer_cmd]), + ) archive.writestr("return-code", str(rh.analyzer_returncode)) toolchain = gcc_toolchain.toolchain_in_args( diff --git a/analyzer/codechecker_analyzer/analyzers/analyzer_base.py b/analyzer/codechecker_analyzer/analyzers/analyzer_base.py index 8b38f10931..8ff54cfc1a 100644 --- a/analyzer/codechecker_analyzer/analyzers/analyzer_base.py +++ b/analyzer/codechecker_analyzer/analyzers/analyzer_base.py @@ -15,6 +15,7 @@ import signal import subprocess import sys +import shlex from codechecker_common.logger import get_logger @@ -82,7 +83,8 @@ def analyze(self, analyzer_cmd, res_handler, env=None, proc_callback=None): """ LOG.debug('Running analyzer ...') - LOG.debug_analyzer('\n%s', ' '.join(analyzer_cmd)) + LOG.debug_analyzer('\n%s', + ' '.join([shlex.quote(x) for x in analyzer_cmd])) res_handler.analyzer_cmd = analyzer_cmd try: diff --git a/analyzer/codechecker_analyzer/buildlog/build_manager.py b/analyzer/codechecker_analyzer/buildlog/build_manager.py index 42d6c6ecb8..9f932da6f1 100644 --- a/analyzer/codechecker_analyzer/buildlog/build_manager.py +++ b/analyzer/codechecker_analyzer/buildlog/build_manager.py @@ -13,6 +13,7 @@ import os import pickle import platform +import shlex import subprocess import sys from uuid import uuid4 @@ -81,7 +82,7 @@ def perform_build_command(logfile, command, context, keep_link, silent=False, final_command = command command = ' '.join(["intercept-build", "--cdb", logfile, - "sh -c \"" + final_command + "\""]) + "sh -c", shlex.quote(final_command)]) log_env = original_env LOG.debug_analyzer(command) diff --git a/analyzer/codechecker_analyzer/buildlog/log_parser.py b/analyzer/codechecker_analyzer/buildlog/log_parser.py index 4beed3557e..f05ce5149d 100644 --- a/analyzer/codechecker_analyzer/buildlog/log_parser.py +++ b/analyzer/codechecker_analyzer/buildlog/log_parser.py @@ -968,7 +968,8 @@ def parse_options(compilation_db_entry, if 'arguments' in compilation_db_entry: gcc_command = compilation_db_entry['arguments'] - details['original_command'] = ' '.join(gcc_command) + details['original_command'] = \ + ' '.join([shlex.quote(x) for x in gcc_command]) elif 'command' in compilation_db_entry: details['original_command'] = compilation_db_entry['command'] gcc_command = shlex.split(compilation_db_entry['command']) @@ -1172,10 +1173,10 @@ def extend_compilation_database_entries(compilation_database): continue opts, sources = process_response_file(response_file) - cmd.extend(opts) + cmd.extend([shlex.quote(x) for x in opts]) source_files.extend(sources) else: - cmd.append(opt) + cmd.append(shlex.quote(opt)) entry['command'] = ' '.join(cmd) diff --git a/analyzer/tests/libtest/codechecker.py b/analyzer/tests/libtest/codechecker.py index 579ebec84d..8d637bbeba 100644 --- a/analyzer/tests/libtest/codechecker.py +++ b/analyzer/tests/libtest/codechecker.py @@ -29,6 +29,7 @@ def show(out, err): print("\nTEST execute stderr:\n") print(err) + cmd_log = ' '.join([shlex.quote(x) for x in cmd]) try: proc = subprocess.Popen( cmd, @@ -41,13 +42,13 @@ def show(out, err): out, err = proc.communicate() if proc.returncode != 0: show(out, err) - print('Unsuccessful run: "' + ' '.join(cmd) + '"') + print(f'Unsuccessful run: {cmd_log}') print(proc.returncode) return out, err, proc.returncode except OSError as oerr: print(oerr) show(out, err) - print('Failed to run: "' + ' '.join(cmd) + '"') + print(f'Failed to run: {cmd_log}') raise diff --git a/analyzer/tests/unit/logparser_test_files/ldlogger response.rsp b/analyzer/tests/unit/logparser_test_files/ldlogger response.rsp new file mode 100644 index 0000000000..8e654ed944 --- /dev/null +++ b/analyzer/tests/unit/logparser_test_files/ldlogger response.rsp @@ -0,0 +1 @@ +'-DVARIABLE="some value"' '-DVARIABLE2="me@domain.com"' diff --git a/analyzer/tests/unit/logparser_test_files/ldlogger-new-at.json b/analyzer/tests/unit/logparser_test_files/ldlogger-new-at.json new file mode 100644 index 0000000000..5bdcd4f7c7 --- /dev/null +++ b/analyzer/tests/unit/logparser_test_files/ldlogger-new-at.json @@ -0,0 +1,7 @@ +[ + { + "directory": "/tmp", + "command": "g++ -DVARIABLE=\\\"me@domain.com\\\" /tmp/a.cpp -o /tmp/a.out", + "file": "/tmp/a.cpp" + } +] \ No newline at end of file diff --git a/analyzer/tests/unit/logparser_test_files/ldlogger-new-response.json b/analyzer/tests/unit/logparser_test_files/ldlogger-new-response.json new file mode 100644 index 0000000000..3a8b38096e --- /dev/null +++ b/analyzer/tests/unit/logparser_test_files/ldlogger-new-response.json @@ -0,0 +1,7 @@ +[ + { + "directory": "PLACEHOLDER", + "command": "g++ @ldlogger\\ response.rsp /tmp/a.cpp -o /tmp/a.out", + "file": "/tmp/a.cpp" + } +] \ No newline at end of file diff --git a/analyzer/tests/unit/logparser_test_files/ldlogger-new-space.json b/analyzer/tests/unit/logparser_test_files/ldlogger-new-space.json index 9b2df85908..e7ea452898 100644 --- a/analyzer/tests/unit/logparser_test_files/ldlogger-new-space.json +++ b/analyzer/tests/unit/logparser_test_files/ldlogger-new-space.json @@ -1,7 +1,7 @@ [ { "directory": "/tmp", - "command": "g++ /tmp/a b.cpp", + "command": "g++ /tmp/a\\ b.cpp", "file": "/tmp/a b.cpp" } ] \ No newline at end of file diff --git a/analyzer/tests/unit/logparser_test_files/ldlogger-new.json b/analyzer/tests/unit/logparser_test_files/ldlogger-new.json index 2ee4ca4ba6..c1bd94ee88 100644 --- a/analyzer/tests/unit/logparser_test_files/ldlogger-new.json +++ b/analyzer/tests/unit/logparser_test_files/ldlogger-new.json @@ -1,7 +1,7 @@ [ { "directory": "/tmp", - "command": "g++ -DVARIABLE=\"some value\" /tmp/a.cpp -o /tmp/a.out", + "command": "g++ -DVARIABLE=\\\"some\\ value\\\" /tmp/a.cpp -o /tmp/a.out", "file": "/tmp/a.cpp" } ] \ No newline at end of file diff --git a/analyzer/tests/unit/test_log_parser.py b/analyzer/tests/unit/test_log_parser.py index ee7185ff22..980c1bec86 100644 --- a/analyzer/tests/unit/test_log_parser.py +++ b/analyzer/tests/unit/test_log_parser.py @@ -103,7 +103,7 @@ def test_new_ldlogger(self): self.assertEqual(len(build_action.analyzer_options), 1) self.assertTrue(len(build_action.target) > 0) self.assertEqual(build_action.analyzer_options[0], - r'-DVARIABLE=some value') + r'-DVARIABLE="some value"') # Test source file with spaces. logfile = os.path.join(self.__test_files, "ldlogger-new-space.json") @@ -115,16 +115,42 @@ def test_new_ldlogger(self): self.assertEqual(build_action.source, r'/tmp/a b.cpp') self.assertEqual(build_action.lang, 'c++') + # Test @ sign in variable definition. + logfile = os.path.join(self.__test_files, "ldlogger-new-at.json") + + build_actions, _ = log_parser.\ + parse_unique_log(load_json_or_empty(logfile), self.__this_dir) + build_action = build_actions[0] + + self.assertEqual(len(build_action.analyzer_options), 1) + self.assertEqual(build_action.analyzer_options[0], + r'-DVARIABLE="me@domain.com"') + + # Test the same stuff with response files. + logfile = os.path.join(self.__test_files, "ldlogger-new-response.json") + logjson = load_json_or_empty(logfile) + # Make it relative to the response file. + logjson[0]['directory'] = self.__test_files + + build_actions, _ = log_parser.\ + parse_unique_log(logjson, self.__this_dir) + build_action = build_actions[0] + + self.assertEqual(len(build_action.analyzer_options), 2) + self.assertEqual(build_action.analyzer_options[0], + r'-DVARIABLE="some value"') + self.assertEqual(build_action.analyzer_options[1], + r'-DVARIABLE2="me@domain.com"') + def test_old_intercept_build(self): """ Test log file parsing escape behaviour with clang-5.0 intercept-build. """ - logfile = os.path.join(self.__test_files, "intercept-old.json") - + # FIXME: Yes, the json is actually bad! The space should have been + # escaped by intercept-build along with the backslash. # Scan-build-py shipping with clang-5.0 makes a logfile that contains: # -DVARIABLE=\"some value\" and --target=x86_64-linux-gnu - # - # The define is passed to the analyzer properly. + logfile = os.path.join(self.__test_files, "intercept-old.json") build_actions, _ = log_parser.\ parse_unique_log(load_json_or_empty(logfile), self.__this_dir) @@ -133,6 +159,8 @@ def test_old_intercept_build(self): self.assertEqual(build_action.source, r'/tmp/a.cpp') self.assertEqual(len(build_action.analyzer_options), 1) self.assertTrue(len(build_action.target) > 0) + # FIXME: We should expect r'-DVARIABLE="some value"' with a fixed + # intercept-build. self.assertEqual(build_action.analyzer_options[0], r'-DVARIABLE="some') @@ -454,7 +482,8 @@ def test_response_file_simple(self): parse_unique_log(load_json_or_empty(logfile), self.__this_dir) build_action = build_actions[0] self.assertEqual(len(build_action.analyzer_options), 1) - self.assertEqual(build_action.analyzer_options[0], '-DVARIABLE=some') + self.assertEqual(build_action.analyzer_options[0], + '-DVARIABLE=some value') def test_response_file_contains_source_file(self): """ @@ -481,7 +510,8 @@ def test_response_file_contains_source_file(self): self.assertEqual(len(build_action.analyzer_options), 1) self.assertEqual(build_action.source, self.src_file_path) - self.assertEqual(build_action.analyzer_options[0], '-DVARIABLE=some') + self.assertEqual(build_action.analyzer_options[0], + '-DVARIABLE=some value') def test_response_file_contains_multiple_source_files(self): """ @@ -521,12 +551,14 @@ def test_response_file_contains_multiple_source_files(self): a_build_action = [b for b in build_actions if b.source == a_file_path][0] self.assertEqual(len(a_build_action.analyzer_options), 1) - self.assertEqual(a_build_action.analyzer_options[0], '-DVARIABLE=some') + self.assertEqual(a_build_action.analyzer_options[0], + '-DVARIABLE=some value') b_build_action = [b for b in build_actions if b.source == b_file_path][0] self.assertEqual(len(b_build_action.analyzer_options), 1) - self.assertEqual(b_build_action.analyzer_options[0], '-DVARIABLE=some') + self.assertEqual(b_build_action.analyzer_options[0], + '-DVARIABLE=some value') def test_source_file_contains_at_sign(self): """