llvm · MaskRay · May 9, 2024 · Apr 17, 2024 · Apr 17, 2024 · Apr 18, 2024
diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
@@ -433,6 +433,87 @@ actually participate in the test besides holding the ``RUN:`` lines.
   putting the extra files in an ``Inputs/`` directory. This pattern is
   deprecated.
 
+Elaborated tests
+----------------
+
+Generally, IR and assembly test files benefit from being cleaned to remove
+unnecessary details. However, for tests requiring elaborate IR or assembly
+files where cleanup is less practical (e.g., large amount of debug information
+output from Clang), you can include generation instructions within
+``split-file`` part called ``gen``. Then, run
+``llvm/utils/update_test_body.py`` on the test file to generate the needed
+content.
+
+.. code-block:: none
+
+    ; RUN: rm -rf %t && split-file %s %t && cd %t
+    ; RUN: opt -S a.ll ... | FileCheck %s
+
+    ; CHECK: hello
+
+    ;--- a.cc
+    int va;
+    ;--- gen
+    clang --target=x86_64-linux -S -emit-llvm -g a.cc -o -
+
+    ;--- a.ll
+    # content generated by the script 'gen'
+
+.. code-block:: bash
+
+   PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.ll
+
+The script will prepare extra files with ``split-file``, invoke ``gen``, and
+then rewrite the part after ``gen`` with its stdout.
+
+For convenience, if the test needs one single assembly file, you can also wrap
+``gen`` and its required files with ``.ifdef`` and ``.endif``. Then you can
+skip ``split-file`` in RUN lines.
+
+.. code-block:: none
+
+    # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o
+    # RUN: ... | FileCheck %s
+
+    # CHECK: hello
+
+    .ifdef GEN
+    #--- a.cc
+    int va;
+    #--- gen
+    clang --target=x86_64-linux -S -g a.cc -o -
+    .endif
+    # content generated by the script 'gen'
+
+.. note::
+
+  Consider specifying an explicit target triple to avoid differences when
+  regeneration is needed on another machine.
+
+  ``gen`` is invoked with ``PWD`` set to ``/proc/self/cwd``. Clang commands
+  don't need ``-fdebug-compilation-dir=`` since its default value is ``PWD``.
+
+  Check prefixes should be placed before ``.endif`` since the part after
+  ``.endif`` is replaced.
+
+If the test body contains multiple files, you can print ``---`` separators and
+utilize ``split-file`` in ``RUN`` lines.
+
+.. code-block:: none
+
+    # RUN: rm -rf %t && split-file %s %t && cd %t
+    ...
+
+    #--- a.cc
+    int va;
+    #--- b.cc
+    int vb;
+    #--- gen
+    clang --target=x86_64-linux -S -O1 -g a.cc -o -
+    echo '#--- b.s'
+    clang --target=x86_64-linux -S -O1 -g b.cc -o -
+    #--- a.s
+
 Fragile tests
 -------------
 

diff --git a/llvm/test/tools/UpdateTestChecks/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/lit.local.cfg
@@ -19,7 +19,8 @@ def add_update_script_substition(
     # Specify an explicit default version in UTC tests, so that the --version
     # embedded in UTC_ARGS does not change in all test expectations every time
     # the default is bumped.
-    extra_args += " --version=1"
+    if name != "%update_test_body":
+        extra_args += " --version=1"
     config.substitutions.append(
         (name, "'%s' %s %s" % (python_exe, script_path, extra_args))
     )
@@ -47,3 +48,7 @@ if os.path.isfile(llvm_mca_path):
     config.available_features.add("llvm-mca-binary")
     mca_arg = "--llvm-mca-binary " + shell_quote(llvm_mca_path)
     add_update_script_substition("%update_test_checks", extra_args=mca_arg)
+
+split_file_path = os.path.join(config.llvm_tools_dir, "split-file")
+if os.path.isfile(split_file_path):
+    add_update_script_substition("%update_test_body")
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+# RUN: diff -u %S/Inputs/basic-asm.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif
+.long 0
+.long 1
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
@@ -0,0 +1,16 @@
+; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+; RUN: diff -u %S/Inputs/basic.test.expected %t
+
+;--- a.txt
+@a = global i32 0
+;--- b.txt
+@b = global i32 0
+;--- gen
+cat a.txt
+echo ';--- b.ll'
+cat b.txt
+
+;--- a.ll
+@a = global i32 0
+;--- b.ll
+@b = global i32 0
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test b/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test
@@ -0,0 +1,11 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+# RUN: diff -u %S/Inputs/basic-asm.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test b/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
@@ -0,0 +1,13 @@
+; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+; RUN: diff -u %S/Inputs/basic.test.expected %t
+
+;--- a.txt
+@a = global i32 0
+;--- b.txt
+@b = global i32 0
+;--- gen
+cat a.txt
+echo ';--- b.ll'
+cat b.txt
+
+;--- a.ll
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test b/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+# RUN: diff -u %t %s
+
+# CHECK: stdout is empty; forgot -o - ?
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+true
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
@@ -0,0 +1,7 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' does not exist
+
+.ifdef GEN
+#--- a.txt
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
@@ -0,0 +1,11 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK:      log
+# CHECK-NEXT: 'gen' failed
+
+.ifdef GEN
+#--- gen
+echo log >&2
+false  # gen fails due to sh -e
+true
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test
@@ -0,0 +1,8 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' should be followed by another part (---) or .endif
+
+#--- a.txt
+.long 0
+#--- gen
+cat a.txt
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
@@ -0,0 +1,4 @@
+import platform
+
+if platform.system() == "Windows":
+    config.unsupported = True
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s b/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
@@ -1,22 +1,24 @@
-# Source:
-#   struct e {
-#     enum {} f[16384];
-#     short g;
-#   };
-#   e foo() {
-#     auto E = new e;
-#     return *E;
-#   }
-# Compile with:
-#   clang -O2 -gdwarf-4 -S a.cpp -o a4.s
-
 # RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o %t.o
 # RUN: llvm-dwarfdump -debug-info -name g %t.o | FileCheck %s
 
 # CHECK: DW_TAG_member
 # CHECK: DW_AT_name ("g")
 # CHECK: DW_AT_data_member_location    (0x4000)
 
+.ifdef GEN
+#--- a.cpp
+struct e {
+  enum {} f[16384];
+  short g;
+};
+e foo() {
+  auto E = new e;
+  return *E;
+}
+#--- gen
+clang --target=x86_64-apple-macosx -O2 -gdwarf-4 -S a.cpp -o -
+.endif
+
 	.section	__TEXT,__text,regular,pure_instructions
 	.macosx_version_min 10, 14
 	.globl	__Z3foov                ## -- Begin function _Z3foov

diff --git a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
@@ -1,16 +1,16 @@
 # RUN: llvm-mc < %s -filetype obj -triple x86_64 -o - \
 # RUN:   | llvm-dwarfdump - | FileCheck %s
 
-# Generated from:
-#
-#   struct t1 { };
-#   t1 v1;
-#
-# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.5.split.s -gdwarf-5 -g
-
 # CHECK: DW_TAG_variable
 # CHECK:   DW_AT_type ({{.*}} "t1")
 
+.ifdef GEN
+#--- test.cpp
+struct t1 { };
+t1 v1;
+#--- gen
+clang++ --target=x86_64-linux -S -g -fdebug-types-section -gsplit-dwarf -gdwarf-5 test.cpp -o -
+.endif
 	.text
 	.file	"test.cpp"
 	.section	.debug_types.dwo,"e",@progbits

diff --git a/llvm/utils/update_test_body.py b/llvm/utils/update_test_body.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+"""Generate test body using split-file and a custom script.
+
+The script will prepare extra files with `split-file`, invoke `gen`, and then
+rewrite the part after `gen` with its stdout.
+
+https://llvm.org/docs/TestingGuide.html#elaborated-tests
+
+Example:
+PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.s
+"""
+import argparse
+import contextlib
+import os
+import re
+import subprocess
+import sys
+import tempfile
+
+
+@contextlib.contextmanager
+def cd(directory):
+    cwd = os.getcwd()
+    os.chdir(directory)
+    try:
+        yield
+    finally:
+        os.chdir(cwd)
+
+
+def process(args, path):
+    prolog = []
+    seen_gen = False
+    with open(path) as f:
+        for line in f.readlines():
+            line = line.rstrip()
+            prolog.append(line)
+            if (seen_gen and re.match(r"(.|//)---", line)) or line.startswith(".endif"):
+                break
+            if re.match(r"(.|//)--- gen", line):
+                seen_gen = True
+        else:
+            print(
+                "'gen' should be followed by another part (---) or .endif",
+                file=sys.stderr,
+            )
+            return 1
+
+    if not seen_gen:
+        print("'gen' does not exist", file=sys.stderr)
+        return 1
+    with tempfile.TemporaryDirectory(prefix="update_test_body_") as dir:
+        try:
+            # If the last line starts with ".endif", remove it.
+            sub = subprocess.run(
+                ["split-file", "-", dir],
+                input="\n".join(
+                    prolog[:-1] if prolog[-1].startswith(".endif") else prolog
+                ).encode(),
+                capture_output=True,
+                check=True,
+            )
+        except subprocess.CalledProcessError as ex:
+            sys.stderr.write(ex.stderr.decode())
+            return 1
+        with cd(dir):
+            if args.shell:
+                print(f"invoke shell in the temporary directory '{dir}'")
+                subprocess.run([os.environ.get("SHELL", "sh")])
+                return 0
+
+            sub = subprocess.run(
+                ["sh", "-eu", "gen"],
+                capture_output=True,
+                # Don't encode the directory information to the Clang output.
+                # Remove unneeded details (.ident) as well.
+                env=dict(
+                    os.environ,
+                    CCC_OVERRIDE_OPTIONS="#^-fno-ident",
+                    PWD="/proc/self/cwd",
+                ),
+            )
+            sys.stderr.write(sub.stderr.decode())
+            if sub.returncode != 0:
+                print("'gen' failed", file=sys.stderr)
+                return sub.returncode
+            if not sub.stdout:
+                print("stdout is empty; forgot -o - ?", file=sys.stderr)
+                return 1
+            content = sub.stdout.decode()
+
+    with open(path, "w") as f:
+        # Print lines up to '.endif'.
+        print("\n".join(prolog), file=f)
+        # Then print the stdout of 'gen'.
+        f.write(content)
+
+
+parser = argparse.ArgumentParser(
+    description="Generate test body using split-file and a custom script"
+)
+parser.add_argument("files", nargs="+")
+parser.add_argument(
+    "--shell", action="store_true", help="invoke shell instead of 'gen'"
+)
+args = parser.parse_args()
+for path in args.files:
+    retcode = process(args, path)
+    if retcode != 0:
+        sys.exit(retcode)