From b21935a2464579df2b9b2fd1c1eb693e37322987 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1rton=20Csord=C3=A1s?= <csordasmarton92@gmail.com>
Date: Tue, 25 May 2021 14:32:42 +0200
Subject: [PATCH] [analyzer] Fix yaml dumper

If the key attribute (path) in the invocation-list.yml file is longer then
128 characters the yaml module will mark it as a complex key which is not
supported by the LLVM yaml parser. For this reason we will create an
LLVM compatible yaml dumper class which will mark these keys as non-complex
keys.
---
 .../analyzers/clangsa/ctu_manager.py            | 17 ++++++++++++++++-
 analyzer/tests/functional/ctu/test_ctu.py       | 15 +++++++++++++--
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/analyzer/codechecker_analyzer/analyzers/clangsa/ctu_manager.py b/analyzer/codechecker_analyzer/analyzers/clangsa/ctu_manager.py
index 8ce9387cb1..27efb8d8e9 100644
--- a/analyzer/codechecker_analyzer/analyzers/clangsa/ctu_manager.py
+++ b/analyzer/codechecker_analyzer/analyzers/clangsa/ctu_manager.py
@@ -17,6 +17,7 @@
 import tempfile
 from pathlib import Path
 from sys import maxsize
+from yaml import Dumper
 
 from codechecker_common.logger import get_logger
 
@@ -29,6 +30,18 @@
 LOG = get_logger('analyzer')
 
 
+class LLVMComatibleYamlDumper(Dumper):
+    def check_simple_key(self):
+        """ Mark every keys as simple keys.
+
+        PyYAML limits simple keys to '128' characters and this value can't be
+        changed (https://github.com/yaml/pyyaml/issues/157). To be compatible
+        with the YAML parser of LLVM we override this function and mark every
+        keys as simple keys.
+        """
+        return True
+
+
 def merge_clang_extdef_mappings(ctu_dir, ctu_func_map_file,
                                 ctu_temp_fnmap_folder):
     """ Merge individual function maps into a global one."""
@@ -87,7 +100,9 @@ def generate_invocation_list(triple_arch, action, source, config, env):
     # Line width is set to max int size because of compatibility with the YAML
     # parser of LLVM. We try to ensure that no lines break in the textual
     # representation of the list items.
-    invocation_line = yaml.dump({str(source_path): cmd}, width=maxsize)
+    invocation_line = yaml.dump(
+        {str(source_path): cmd},
+        width=maxsize, Dumper=LLVMComatibleYamlDumper)
 
     LOG.debug_analyzer("Appending invocation list item '%s'", invocation_line)
 
diff --git a/analyzer/tests/functional/ctu/test_ctu.py b/analyzer/tests/functional/ctu/test_ctu.py
index 962c817625..f0e1267325 100644
--- a/analyzer/tests/functional/ctu/test_ctu.py
+++ b/analyzer/tests/functional/ctu/test_ctu.py
@@ -267,6 +267,17 @@ def test_ctu_ondemand_yaml_format(self):
         """ Test the generated YAML used in CTU on-demand mode.
         The YAML file should not contain newlines in individual entries in the
         generated textual format. """
+        # Copy test files to a directory which file path will be longer than
+        # 128 chars to test the yaml parser.
+        test_dir = os.path.join(
+            self.test_workspace, os.path.join(*[
+                ''.join('0' for _ in range(43)) for _ in range(0, 3)]))
+
+        shutil.copytree(self.test_dir, test_dir)
+
+        complex_buildlog = os.path.join(test_dir, 'complex_buildlog.json')
+        shutil.copy(self.complex_buildlog, complex_buildlog)
+        env.adjust_buildlog('complex_buildlog.json', test_dir, test_dir)
 
         cmd = [self._codechecker_cmd, 'analyze',
                '-o', self.report_dir,
@@ -275,8 +286,8 @@ def test_ctu_ondemand_yaml_format(self):
                                  # intact only if a single ctu-phase is
                                  # specified
                '--ctu-ast-mode', 'parse-on-demand',
-               self.complex_buildlog]
-        _, _, result = call_command(cmd, cwd=self.test_dir, env=self.env)
+               complex_buildlog]
+        _, _, result = call_command(cmd, cwd=test_dir, env=self.env)
         self.assertEqual(result, 0, "Analyzing failed.")
 
         ctu_dir = os.path.join(self.report_dir, 'ctu-dir')