Skip to content

Commit

Permalink
[Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED
Browse files Browse the repository at this point in the history
Build on Clang-BOLT infrastructure to collect sample profiles for CSSPGO.
Add clang/cmake/caches/CSSPGO.cmake to automate CSSPGO Clang build.

Differential Revision: https://reviews.llvm.org/D155419
  • Loading branch information
aaupov committed Sep 18, 2024
1 parent 1be4c97 commit efac760
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 13 deletions.
12 changes: 11 additions & 1 deletion clang/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -744,11 +744,21 @@ if (CLANG_ENABLE_BOOTSTRAP)
if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
add_dependencies(clang-bootstrap-deps llvm-profdata)
set(PGO_OPT -DLLVM_PROFDATA=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profdata)
string(TOUPPER "${BOOTSTRAP_LLVM_BUILD_INSTRUMENTED}" BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
if (BOOTSTRAP_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
add_dependencies(clang-bootstrap-deps llvm-profgen)
list(APPEND PGO_OPT -DLLVM_PROFGEN=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profgen)
endif()
endif()

if(LLVM_BUILD_INSTRUMENTED)
string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED)
add_dependencies(clang-bootstrap-deps generate-profdata)
set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
set(PGO_OPT -DLLVM_SPROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
else()
set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
endif()
# Use the current tools for LTO instead of the instrumented ones
list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH
CMAKE_CXX_COMPILER
Expand Down
3 changes: 3 additions & 0 deletions clang/cmake/caches/CSSPGO.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED "CSSPGO" CACHE STRING "")
include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
26 changes: 25 additions & 1 deletion clang/utils/perf-training/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ if(LLVM_BUILD_INSTRUMENTED)

add_custom_target(clear-profraw
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/ profraw
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
COMMENT "Clearing old profraw data")

if(NOT LLVM_PROFDATA)
Expand All @@ -29,6 +30,10 @@ if(LLVM_BUILD_INSTRUMENTED)
if(NOT LLVM_PROFDATA)
message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to llvm-profdata")
else()
string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED)
if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
set(PROFDATA_SAMPLE "--sample")
endif()
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata
# generate-profraw is a custom_target which are always considered stale.
Expand All @@ -39,7 +44,7 @@ if(LLVM_BUILD_INSTRUMENTED)
# Therefor we call the generate-profraw target manually as part of this custom
# command, which will only run if clang or ${CLANG_PGO_TRAINING_DEPS} are updated.
COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target generate-profraw
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/ ${PROFDATA_SAMPLE}
COMMENT "Merging profdata"
DEPENDS clang ${CLANG_PGO_TRAINING_DEPS}
)
Expand All @@ -49,6 +54,25 @@ if(LLVM_BUILD_INSTRUMENTED)
USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw)
add_dependencies(generate-profdata generate-profraw-external)
endif()
if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
if(NOT LLVM_PROFGEN)
find_program(LLVM_PROFGEN llvm-profgen)
endif()

if(NOT LLVM_PROFGEN)
message(STATUS "To enable converting CSSPGO samples LLVM_PROFGEN has to point to llvm-profgen")
endif()

# Convert perf profiles into profraw
add_custom_target(convert-perf-profraw
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py perf2prof ${LLVM_PROFGEN} $<TARGET_FILE:clang> ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/
COMMENT "Converting perf profiles into profraw"
DEPENDS generate-profraw)
if (CLANG_PGO_TRAINING_DATA_SOURCE_DIR)
add_dependencies(convert-perf-profraw generate-profraw-external)
endif()
add_dependencies(generate-profdata convert-perf-profraw)
endif()
endif()
endif()

Expand Down
6 changes: 6 additions & 0 deletions clang/utils/perf-training/lit.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ config.name = 'Clang Perf Training'
config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']

cc1_wrapper = '%s %s/perf-helper.py cc1' % (config.python_exe, config.perf_helper_dir)
if config.llvm_build_instrumented.upper() == "CSSPGO":
perf_wrapper = "%s %s/perf-helper.py perf --lbr --call-graph --event=br_inst_retired.near_taken:uppp -- " % (
config.python_exe,
config.perf_helper_dir,
)
cc1_wrapper = perf_wrapper

use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
config.test_format = lit.formats.ShTest(use_lit_shell == "0")
Expand Down
1 change: 1 addition & 0 deletions clang/utils/perf-training/lit.site.cfg.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
config.target_triple = "@LLVM_TARGET_TRIPLE@"
config.python_exe = "@Python3_EXECUTABLE@"
config.llvm_build_instrumented = "@LLVM_BUILD_INSTRUMENTED@"

# Let the main config do the real work.
lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/lit.cfg")
56 changes: 46 additions & 10 deletions clang/utils/perf-training/perf-helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,22 @@ def clean(args):


def merge(args):
if len(args) < 3:
print(
"Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__
+ "\tMerges all profraw files from path into output."
)
return 1
cmd = [args[0], "merge", "-o", args[1]]
for path in args[2:]:
parser = argparse.ArgumentParser(
prog="perf-helper merge",
description="Merges all profraw files from path(s) into output",
)
parser.add_argument("profdata", help="Path to llvm-profdata tool")
parser.add_argument("output", help="Output filename")
parser.add_argument(
"paths", nargs="+", help="Folder(s) containing input profraw files"
)
parser.add_argument("--sample", action="store_true", help="Sample profile")
opts = parser.parse_args(args)

cmd = [opts.profdata, "merge", "-o", opts.output]
if opts.sample:
cmd += ["--sample"]
for path in opts.paths:
cmd.extend(findFilesWithExtension(path, "profraw"))
subprocess.check_call(cmd)
return 0
Expand All @@ -71,11 +79,16 @@ def merge_fdata(args):

def perf(args):
parser = argparse.ArgumentParser(
prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
prog="perf-helper perf",
description="perf wrapper for BOLT/CSSPGO profile collection",
)
parser.add_argument(
"--lbr", action="store_true", help="Use perf with branch stacks"
)
parser.add_argument("--call-graph", action="store_true", help="Collect call graph")
parser.add_argument(
"--event", help="PMU event name, defaults to cycles:u", default="cycles:u"
)
parser.add_argument("cmd", nargs=argparse.REMAINDER, help="")

opts = parser.parse_args(args)
Expand All @@ -84,12 +97,14 @@ def perf(args):
perf_args = [
"perf",
"record",
"--event=cycles:u",
f"--event={opts.event}",
"--freq=max",
"--output=%d.perf.data" % os.getpid(),
]
if opts.lbr:
perf_args += ["--branch-filter=any,u"]
if opts.call_graph:
perf_args += ["-g", "--call-graph=fp"]
perf_args.extend(cmd)

start_time = time.time()
Expand Down Expand Up @@ -125,6 +140,26 @@ def perf2bolt(args):
return 0


def perf2prof(args):
parser = argparse.ArgumentParser(
prog="perf-helper perf2prof",
description="perf to CSSPGO prof conversion wrapper",
)
parser.add_argument("profgen", help="Path to llvm-profgen binary")
parser.add_argument("binary", help="Input binary")
parser.add_argument("paths", nargs="+", help="Path containing perf.data files")
opts = parser.parse_args(args)

profgen_args = [opts.profgen, f"--binary={opts.binary}"]
for path in opts.paths:
for filename in findFilesWithExtension(path, "perf.data"):
subprocess.check_call(
profgen_args
+ [f"--perfdata={filename}", f"--output={filename}.profraw"]
)
return 0


def dtrace(args):
parser = argparse.ArgumentParser(
prog="perf-helper dtrace",
Expand Down Expand Up @@ -567,6 +602,7 @@ def genOrderFile(args):
"merge-fdata": merge_fdata,
"perf": perf,
"perf2bolt": perf2bolt,
"perf2prof": perf2prof,
}


Expand Down
3 changes: 3 additions & 0 deletions llvm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,9 @@ set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_defa
set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
"Profiling data file to use when compiling in order to improve runtime performance.")

set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
"Sampling profiling data file to use when compiling in order to improve runtime performance.")

if(LLVM_INCLUDE_TESTS)
# All LLVM Python files should be compatible down to this minimum version.
set(LLVM_MINIMUM_PYTHON_VERSION 3.8)
Expand Down
26 changes: 25 additions & 1 deletion llvm/cmake/modules/HandleLLVMOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ endif()
option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off)
mark_as_advanced(LLVM_ENABLE_IR_PGO)

set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
Expand Down Expand Up @@ -1167,6 +1167,15 @@ if (LLVM_BUILD_INSTRUMENTED)
CMAKE_EXE_LINKER_FLAGS
CMAKE_SHARED_LINKER_FLAGS)
endif()
elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
CMAKE_CXX_FLAGS
CMAKE_C_FLAGS)
if(NOT LINKER_IS_LLD_LINK)
append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
CMAKE_EXE_LINKER_FLAGS
CMAKE_SHARED_LINKER_FLAGS)
endif()
else()
append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
CMAKE_CXX_FLAGS
Expand Down Expand Up @@ -1217,6 +1226,21 @@ if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE})
endif()
endif()

if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
CMAKE_CXX_FLAGS
CMAKE_C_FLAGS)
if(NOT LINKER_IS_LLD_LINK)
append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
CMAKE_EXE_LINKER_FLAGS
CMAKE_SHARED_LINKER_FLAGS)
endif()
else()
message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
endif()
endif()

option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
option(LLVM_INDIVIDUAL_TEST_COVERAGE "Emit individual coverage file for each test case." OFF)
mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)
Expand Down

0 comments on commit efac760

Please sign in to comment.