-
Notifications
You must be signed in to change notification settings - Fork 12.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options #69133
Conversation
@llvm/pr-subscribers-clang Author: Amir Ayupov (aaupov) ChangesSplit up and refactor CLANG_BOLT_INSTRUMENT into support for Differential Revision: https://reviews.llvm.org/D143617 Full diff: https://github.com/llvm/llvm-project/pull/69133.diff 6 Files Affected:
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 9b52c58be41e7f7..8f64d95cc394ffe 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -850,23 +850,38 @@ if (CLANG_ENABLE_BOOTSTRAP)
endforeach()
endif()
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "Apply BOLT optimization to Clang. \
+ May be specified as Instrument or Perf or LBR to use a particular profiling \
+ mechanism.")
+string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+
+if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
- set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+ set(CLANG_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${CLANG_BOLT_INSTRUMENTED})
set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
- # Instrument clang with BOLT
- add_custom_target(clang-instrumented
- DEPENDS ${CLANG_INSTRUMENTED}
- )
- add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
- DEPENDS clang llvm-bolt
- COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
- -instrument --instrumentation-file-append-pid
- --instrumentation-file=${BOLT_FDATA}
- COMMENT "Instrumenting clang binary with BOLT"
- VERBATIM
- )
+ # Pass extra flag in no-LBR mode
+ if (uppercase_CLANG_BOLT STREQUAL "PERF")
+ set(BOLT_NO_LBR "-nl")
+ endif()
+
+ if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
+ # Instrument clang with BOLT
+ add_custom_target(clang-instrumented
+ DEPENDS ${CLANG_INSTRUMENTED}
+ )
+ add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+ DEPENDS clang llvm-bolt
+ COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+ -instrument --instrumentation-file-append-pid
+ --instrumentation-file=${BOLT_FDATA}
+ COMMENT "Instrumenting clang binary with BOLT"
+ VERBATIM
+ )
+ add_custom_target(clang-bolt-training-deps DEPENDS clang-instrumented)
+ else() # perf or LBR
+ add_custom_target(clang-bolt-training-deps DEPENDS clang)
+ endif()
# Optimize original (pre-bolt) Clang using the collected profile
set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
@@ -880,6 +895,7 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
-data ${BOLT_FDATA}
-reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
-split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
+ ${BOLT_NO_LBR}
COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $<TARGET_FILE:clang>
COMMENT "Optimizing Clang with BOLT"
VERBATIM
diff --git a/clang/cmake/caches/BOLT.cmake b/clang/cmake/caches/BOLT.cmake
index 0442f73e5426ac7..eba2346b2f4ca12 100644
--- a/clang/cmake/caches/BOLT.cmake
+++ b/clang/cmake/caches/BOLT.cmake
@@ -1,5 +1,5 @@
set(CMAKE_BUILD_TYPE Release CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "")
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt
index c6d51863fb1b5c2..48fbee62a8636d1 100644
--- a/clang/utils/perf-training/CMakeLists.txt
+++ b/clang/utils/perf-training/CMakeLists.txt
@@ -62,7 +62,9 @@ if(APPLE AND DTRACE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
DEPENDS generate-dtrace-logs)
endif()
-if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if(CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
+ set(CLANG_BOLT_INSTRUMENTED "clang-bolt.inst" CACHE STRING
+ "Name of BOLT-instrumented Clang binary")
configure_lit_site_cfg(
${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
@@ -71,16 +73,37 @@ if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
EXCLUDE_FROM_CHECK_ALL
- DEPENDS clang-instrumented clear-bolt-fdata
+ DEPENDS clang-bolt-training-deps clear-bolt-fdata clear-perf-data
)
add_custom_target(clear-bolt-fdata
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
COMMENT "Clearing old BOLT fdata")
+ add_custom_target(clear-perf-data
+ COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
+ COMMENT "Clearing old perf data")
+
+ string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+ if (uppercase_CLANG_BOLT STREQUAL "LBR")
+ set(BOLT_LBR "--lbr")
+ endif()
+
+ add_custom_target(merge-fdata-deps)
+ if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
+ add_dependencies(merge-fdata-deps generate-bolt-fdata)
+ else()
+ # Convert perf profiles into fdata
+ add_custom_target(convert-perf-fdata
+ COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py perf2bolt $<TARGET_FILE:llvm-bolt> ${CMAKE_CURRENT_BINARY_DIR} $<TARGET_FILE:clang> ${BOLT_LBR}
+ COMMENT "Converting perf files to BOLT fdata"
+ DEPENDS llvm-bolt generate-bolt-fdata)
+ add_dependencies(merge-fdata-deps convert-perf-fdata)
+ endif()
+
# Merge profiles into one using merge-fdata
add_custom_target(clang-bolt-profile
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Merging BOLT fdata"
- DEPENDS merge-fdata generate-bolt-fdata)
+ DEPENDS merge-fdata merge-fdata-deps)
endif()
diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg
index 234ac855bd67c65..d2b6042a1627e19 100644
--- a/clang/utils/perf-training/bolt.lit.cfg
+++ b/clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,52 @@ import lit.util
import os
import subprocess
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = "clang"
+perf_wrapper = ""
+if config.clang_bolt_mode.lower() == "instrument":
+ clang_binary = config.clang_bolt_name
+else: # perf or LBR
+ perf_wrapper = "%s %s/perf-helper.py perf" % (
+ config.python_exe,
+ config.perf_helper_dir,
+ )
+ if config.clang_bolt_mode.lower() == "lbr":
+ perf_wrapper += " --lbr"
+ perf_wrapper += " -- "
-config.name = 'Clang Perf Training'
-config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+config.clang = os.path.realpath(
+ lit.util.which(clang_binary, config.clang_tools_dir)
+).replace("\\", "/")
+
+config.name = "Clang Perf Training"
+config.suffixes = [
+ ".c",
+ ".cc",
+ ".cpp",
+ ".m",
+ ".mm",
+ ".cu",
+ ".ll",
+ ".cl",
+ ".s",
+ ".S",
+ ".modulemap",
+ ".test",
+]
use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
-config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
-config.substitutions.append( ('%test_root', config.test_exec_root ) )
+config.substitutions.append(
+ (
+ "%clang_cpp_skip_driver",
+ " %s %s --driver-mode=g++ " % (perf_wrapper, config.clang),
+ )
+)
+config.substitutions.append(
+ ("%clang_cpp", " %s %s --driver-mode=g++ " % (perf_wrapper, config.clang))
+)
+config.substitutions.append(
+ ("%clang_skip_driver", " %s %s " % (perf_wrapper, config.clang))
+)
+config.substitutions.append(("%clang", " %s %s " % (perf_wrapper, config.clang)))
+config.substitutions.append(("%test_root", config.test_exec_root))
diff --git a/clang/utils/perf-training/bolt.lit.site.cfg.in b/clang/utils/perf-training/bolt.lit.site.cfg.in
index 3029319673fc26c..54de12701c1ae91 100644
--- a/clang/utils/perf-training/bolt.lit.site.cfg.in
+++ b/clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,8 @@ config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
config.target_triple = "@LLVM_TARGET_TRIPLE@"
config.python_exe = "@Python3_EXECUTABLE@"
config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
+config.clang_bolt_name = "@CLANG_BOLT_INSTRUMENTED@"
# Let the main config do the real work.
lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
index 99d6a3333b6ef08..c1ec68a9f6e2d4e 100644
--- a/clang/utils/perf-training/perf-helper.py
+++ b/clang/utils/perf-training/perf-helper.py
@@ -67,6 +67,69 @@ def merge_fdata(args):
return 0
+def perf(args):
+ parser = argparse.ArgumentParser(
+ prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+ )
+ parser.add_argument(
+ "--lbr", action="store_true", help="Use perf with branch stacks"
+ )
+ parser.add_argument("cmd", nargs="*", help="")
+
+ # Use python's arg parser to handle all leading option arguments, but pass
+ # everything else through to perf
+ first_cmd = next(arg for arg in args if not arg.startswith("--"))
+ last_arg_idx = args.index(first_cmd)
+
+ opts = parser.parse_args(args[:last_arg_idx])
+ cmd = args[last_arg_idx:]
+
+ perf_args = [
+ "perf",
+ "record",
+ "--event=cycles:u",
+ "--freq=max",
+ "--output=%d.perf.data" % os.getpid(),
+ ]
+ if opts.lbr:
+ perf_args += ["--branch-filter=any,u"]
+ perf_args.extend(cmd)
+
+ start_time = time.time()
+ subprocess.check_call(perf_args)
+
+ elapsed = time.time() - start_time
+ print("... data collection took %.4fs" % elapsed)
+ return 0
+
+
+def perf2bolt(args):
+ parser = argparse.ArgumentParser(
+ prog="perf-helper perf2bolt",
+ description="perf2bolt conversion wrapper for perf.data files",
+ )
+ parser.add_argument("bolt", help="Path to llvm-bolt")
+ parser.add_argument("path", help="Path containing perf.data files")
+ parser.add_argument("binary", help="Input binary")
+ parser.add_argument(
+ "--lbr", action="store_true", help="Use LBR perf2bolt mode"
+ )
+ opts = parser.parse_args(args)
+
+ p2b_args = [
+ opts.bolt,
+ opts.binary,
+ "--aggregate-only",
+ "--profile-format=yaml",
+ ]
+ if not opts.lbr:
+ p2b_args += ["-nl"]
+ p2b_args += ["-p"]
+ for filename in findFilesWithExtension(opts.path, "perf.data"):
+ subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
+ return 0
+
+
def dtrace(args):
parser = argparse.ArgumentParser(
prog="perf-helper dtrace",
@@ -507,6 +570,8 @@ def genOrderFile(args):
"cc1": cc1,
"gen-order-file": genOrderFile,
"merge-fdata": merge_fdata,
+ "perf": perf,
+ "perf2bolt": perf2bolt,
}
|
✅ With the latest revision this PR passed the Python code formatter. |
3987088
to
96e6442
Compare
@petrhosek – thank you for a review! |
Split up and refactor CLANG_BOLT_INSTRUMENT into support for perf no-LBR and perf with LBR profiling modes. Differential Revision: https://reviews.llvm.org/D143617
4830982
to
4f2d0c2
Compare
@aaupov I believe this change is somehow causing cmake failures on several bots I oversee, although I cannot explain why it passed when building your commit. For example: https://lab.llvm.org/buildbot/#/builders/139/builds/57628
Any idea what might be going wrong here? |
I think because bots don't always clean their cmake files, revert incoming. |
…ptions (#69133)" This reverts commit 745883b. This is failing to configure on many of our bots: https://lab.llvm.org/buildbot/#/builders/245/builds/19468 This did not get caught right away because generally bots only clean the build every so often.
https://lab.llvm.org/buildbot/#/builders/245/builds/19468
This particular bot is clang+llvm and only the ARM target, but we've seen it on our AArch64 bots too. |
@@ -850,23 +850,38 @@ if (CLANG_ENABLE_BOOTSTRAP) | |||
endforeach() | |||
endif() | |||
|
|||
if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED) | |||
set(CLANG_BOLT "INSTRUMENT" CACHE STRING "Apply BOLT optimization to Clang. \ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wild guess, this defaults to INSTRUMENT
, but it should default to some OFF
value?
I think the libc buildbots are now failing as a result of this change. @aaupov can you PTAL? |
…_BOLT options (#69133)" This reverts commit b83b8d3. Breaks buildbots e.g. https://lab.llvm.org/buildbot/#/builders/225/builds/29950
Reverted. Strangely I couldn't repro the failure from https://lab.llvm.org/buildbot/#/builders/225/builds/29950 locally. But will get to the bottom of it. |
…tions (#69133) This reverts commit 6953b36. Couldn't reproduce cmake issues from any of the affected buildbots: https://lab.llvm.org/buildbot/#/builders/139/builds/57628 https://lab.llvm.org/buildbot/#/builders/163/builds/50270 https://lab.llvm.org/buildbot/#/builders/223/builds/34823 https://lab.llvm.org/buildbot/#/builders/225/builds/29950 https://lab.llvm.org/buildbot/#/builders/245/builds/19468 A common trait is that these builders don't clean build directory. Push again and monitor these builders.
These are all annotated builders. I tried triggering rebuilds with clean build dir through buildbot UI but it didn't have an effect. I don't think it's a reasonable expectation that every change must configure cleanly from an existing build directory. |
Hi @aaupov I think this did break the AMD Hip build bot (another annotated builder). |
I'm sorry but I also don't think it is reasonable to re-landing a change that is known for breaking builders during the weekends, when not many people are around to react to the breakage. |
This issue with incremental builds is explicitly mentioned in HowToAddABuilder:
I've sent an email to builders owners. Still, sorry about landing it on a weekend. I expected that builders are configured according to the guidelines so the issues will clear automatically. Apologies for those depending on these broken builders. |
Final update: |
Split up and refactor CLANG_BOLT_INSTRUMENT into support for
perf no-LBR and perf with LBR profiling modes.
Differential Revision: https://reviews.llvm.org/D143617