llvm · tstellar · Jan 6, 2024 · Jan 8, 2024 · Jan 11, 2024 · Jan 11, 2024
diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt
@@ -1,6 +1,10 @@
+include(LLVMExternalProjectUtils)
+
 set(CLANG_PGO_TRAINING_DATA "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH
   "The path to a lit testsuite containing samples for PGO and order file generation"
   )
+set(CLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source directory containing cmake project with source files to use for generating pgo data")
+set(CLANG_PERF_TRAINING_DEPS "" CACHE STRING "Extra dependencies needed to build the PGO training data.")
 
 if(LLVM_BUILD_INSTRUMENTED)
   configure_lit_site_cfg(
@@ -15,7 +19,7 @@ if(LLVM_BUILD_INSTRUMENTED)
     )
 
   add_custom_target(clear-profraw
-    COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} profraw
+    COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/ profraw
     COMMENT "Clearing old profraw data")
 
   if(NOT LLVM_PROFDATA)
@@ -26,9 +30,23 @@ if(LLVM_BUILD_INSTRUMENTED)
     message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to llvm-profdata")
   else()
     add_custom_target(generate-profdata
-      COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR}
+      COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/
       COMMENT "Merging profdata"
       DEPENDS generate-profraw)
+    if (CLANG_PGO_TRAINING_DATA_SOURCE_DIR)
+      llvm_ExternalProject_Add(generate-profraw-external ${CLANG_PGO_TRAINING_DATA_SOURCE_DIR}
+              USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw)
+      add_dependencies(generate-profdata generate-profraw-external)
+    else()
+      # Default to compiling a file from clang. This also builds all the
+      # dependencies needed to build this file, like TableGen.
+      set(generate_profraw_clang_sema tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/Sema.cpp.o)
+      llvm_ExternalProject_Add(generate-profraw-clang ${CMAKE_CURRENT_SOURCE_DIR}/../../../llvm
+              USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw
+              EXTRA_TARGETS generate_profraw_clang_sema
+              CMAKE_ARGS -DLLVM_ENABLE_PROJECTS=clang)
+      add_dependencies(generate-profdata generate_profraw_clang_sema)
+    endif()
   endif()
 endif()
 

diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
@@ -30,26 +30,28 @@ def findFilesWithExtension(path, extension):
 
 
 def clean(args):
-    if len(args) != 2:
+    if len(args) < 2:
         print(
-            "Usage: %s clean <path> <extension>\n" % __file__
+            "Usage: %s clean <paths> <extension>\n" % __file__
             + "\tRemoves all files with extension from <path>."
         )
         return 1
-    for filename in findFilesWithExtension(args[0], args[1]):
-        os.remove(filename)
+    for path in args[1:-1]:
+        for filename in findFilesWithExtension(path, args[-1]):
+            os.remove(filename)
     return 0
 
 
 def merge(args):
-    if len(args) != 3:
+    if len(args) < 3:
         print(
-            "Usage: %s merge <llvm-profdata> <output> <path>\n" % __file__
+            "Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__
             + "\tMerges all profraw files from path into output."
         )
         return 1
     cmd = [args[0], "merge", "-o", args[1]]
-    cmd.extend(findFilesWithExtension(args[2], "profraw"))
+    for i in range(2, len(args)):
+        cmd.extend(findFilesWithExtension(args[i], "profraw"))
     subprocess.check_call(cmd)
     return 0
 

diff --git a/llvm/docs/AdvancedBuilds.rst b/llvm/docs/AdvancedBuilds.rst
@@ -145,6 +145,29 @@ that also enables ThinTLO, use the following command:
       -DPGO_INSTRUMENT_LTO=Thin \
       <path to source>/llvm
 
+By default, clang will generate profile data by compiling the Sema.cpp
+file (and all its dependencies, e.g. TableGen).  However, you can also
+tell clang use an external project for generating profile data that may
+be a better fit for your use case.  The project you specify must either
+be a lit test suite (use the CLANG_PGO_TRAINING_DATA option) or a CMake
+project (use the CLANG_PERF_TRAINING_DATA_SOURCE_DIR option).
+
+For example, If you wanted to use the
+`LLVM Test Suite <https://github.com/llvm/llvm-test-suite/>`_ to generate
+profile data you would use the following command:
+
+.. code-block:: console
+
+  $ cmake -G Ninja -C <path to source>/clang/cmake/caches/PGO.cmake \
+       -DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR=/home/fedora/llvm-test-suite/ \
+       -DBOOTSTRAP_CLANG_PERF_TRAINING_DEPS=runtimes
+
+The BOOTSTRAP\_ prefixes tells CMake to pass the variables on to the instrumented
+stage two build.  And the CLANG_PERF_TRAINING_DEPS option let's you specify
+additional build targets to build before building the external project.  The
+LLVM Test Suite requires compiler-rt to build, so we need to add the
+`runtimes` target as a dependency.
+
 After configuration, building the stage2-instrumented-generate-profdata target
 will automatically build the stage1 compiler, build the instrumented compiler
 with the stage1 compiler, and then run the instrumented compiler against the