Skip to content

Commit

Permalink
OMPT Target Offload Support (#17)
Browse files Browse the repository at this point in the history
- Porting from ROCm/omnitrace#411
- Improve OMPT support
- Add OpenMP target example to testing
- Update Timemory submodule to use ROCm/Timemory rather than NERSC/Timemory
- Update `actions/upload-artifacts` to v4
- Standardize the `cmake_minimum_required` to 3.18.4 across workflows, project, and examples
- Updated Ubuntu 20.04 workflows
  • Loading branch information
dgaliffiAMD committed Nov 18, 2024
1 parent 6d28b07 commit 9da7365
Show file tree
Hide file tree
Showing 34 changed files with 801 additions and 125 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/opensuse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ jobs:
chmod +x /opt/trace_processor/bin/trace_processor_shell
fi
python3 -m pip install --upgrade pip &&
python3 -m pip install numpy perfetto dataclasses &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done
python3 -m pip install --upgrade numpy perfetto dataclasses &&
python3 -m pip install 'cmake==3.18.4' &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done
- name: Configure Env
run:
Expand Down Expand Up @@ -136,7 +137,7 @@ jobs:
- name: CTest Artifacts
if: failure()
continue-on-error: True
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: ctest-${{ github.job }}-${{ strategy.job-index }}-log
path: |
Expand All @@ -145,7 +146,7 @@ jobs:
- name: Data Artifacts
if: failure()
continue-on-error: True
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: data-${{ github.job }}-${{ strategy.job-index }}-files
path: |
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/redhat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ jobs:
chmod +x /opt/trace_processor/bin/trace_processor_shell
fi
python3 -m pip install --upgrade pip &&
python3 -m pip install numpy perfetto dataclasses &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done
python3 -m pip install --upgrade numpy perfetto dataclasses &&
python3 -m pip install 'cmake==3.18.4' &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done
- name: Install ROCm Packages
if: ${{ matrix.rocm-version > 0 }}
Expand Down
30 changes: 12 additions & 18 deletions .github/workflows/ubuntu-focal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
compiler: ['g++-7', 'g++-8']
lto: ['OFF']
strip: ['OFF']
python: ['ON']
python: ['OFF']
build-type: ['Release']
mpi-headers: ['OFF']
static-libgcc: ['OFF']
Expand Down Expand Up @@ -97,8 +97,9 @@ jobs:
wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin &&
chmod +x /opt/trace_processor/bin/trace_processor_shell &&
python3 -m pip install --upgrade pip &&
python3 -m pip install numpy perfetto dataclasses &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done &&
python3 -m pip install --upgrade numpy perfetto dataclasses &&
python3 -m pip install 'cmake==3.18.4' &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done &&
apt-get -y --purge autoremove &&
apt-get -y clean &&
/opt/conda/bin/conda clean -y -a
Expand Down Expand Up @@ -278,8 +279,9 @@ jobs:
wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin &&
chmod +x /opt/trace_processor/bin/trace_processor_shell &&
python3 -m pip install --upgrade pip &&
python3 -m pip install numpy perfetto dataclasses &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done &&
python3 -m pip install --upgrade numpy perfetto dataclasses &&
python3 -m pip install 'cmake==3.18.4' &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done &&
apt-get -y --purge autoremove &&
apt-get -y clean &&
/opt/conda/bin/conda clean -y -a
Expand Down Expand Up @@ -408,16 +410,6 @@ jobs:
ompt: ['ON']
papi: ['ON']
deps: ['libtbb-dev libboost-{atomic,system,thread,date-time,filesystem,timer}-dev']
include:
- compiler: 'g++'
mpi: 'nompi'
boost: 'ON'
tbb: 'ON'
build-type: 'Release'
python: 'ON'
ompt: 'OFF'
papi: 'OFF'
deps: ''

env:
ELFUTILS_DOWNLOAD_VERSION: 0.186
Expand All @@ -440,7 +432,8 @@ jobs:
wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin &&
chmod +x /opt/trace_processor/bin/trace_processor_shell &&
python3 -m pip install --upgrade pip &&
python3 -m pip install numpy perfetto dataclasses &&
python3 -m pip install --upgrade numpy perfetto dataclasses &&
python3 -m pip install 'cmake==3.18.4' &&
sudo apt-get -y --purge autoremove &&
sudo apt-get -y clean
Expand Down Expand Up @@ -592,8 +585,9 @@ jobs:
wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin &&
chmod +x /opt/trace_processor/bin/trace_processor_shell &&
python3 -m pip install --upgrade pip &&
python3 -m pip install numpy perfetto dataclasses &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done &&
python3 -m pip install --upgrade numpy perfetto dataclasses &&
python3 -m pip install 'cmake==3.18.4' &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done &&
apt-get -y --purge autoremove &&
apt-get -y clean &&
/opt/conda/bin/conda clean -y -a
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/ubuntu-jammy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ jobs:
apt-get upgrade -y &&
apt-get install -y autoconf bison build-essential clang environment-modules gettext libfabric-dev libiberty-dev libomp-dev libopenmpi-dev libtool m4 openmpi-bin python3-pip texinfo ${{ matrix.compiler }} &&
python3 -m pip install --upgrade pip &&
python3 -m pip install numpy perfetto dataclasses &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done
python3 -m pip install --upgrade numpy perfetto dataclasses &&
python3 -m pip install 'cmake==3.18.4' &&
for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done
- name: Install ROCm Packages
timeout-minutes: 25
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "external/timemory"]
path = external/timemory
url = https://github.com/NERSC/timemory.git
url = https://github.com/ROCm/timemory.git
[submodule "external/perfetto"]
path = external/perfetto
url = https://github.com/google/perfetto.git
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND CMAKE_CURRENT_SOURCE_DIR STREQUAL
CMAKE_SOURCE_DIR)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ export LD_LIBRARY_PATH=/opt/rocprofiler-systems/lib:${LD_LIBRARY_PATH}

Generate a rocprofiler-systems configuration file using `rocprof-sys-avail -G rocprof-sys.cfg`. Optionally, use `rocprof-sys-avail -G rocprof-sys.cfg --all` for
a verbose configuration file with descriptions, categories, etc. Modify the configuration file as desired, e.g. enable
[perfetto](https://perfetto.dev/), [timemory](https://github.com/NERSC/timemory), sampling, and process-level sampling by default
[perfetto](https://perfetto.dev/), [timemory](https://github.com/ROCm/timemory), sampling, and process-level sampling by default
and tweak some sampling default values:

```console
Expand Down
2 changes: 1 addition & 1 deletion cmake/Packages.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ mark_as_advanced(TIMEMORY_PROJECT_NAME)
rocprofiler_systems_checkout_git_submodule(
RELATIVE_PATH external/timemory
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
REPO_URL https://github.com/NERSC/timemory.git
REPO_URL https://github.com/ROCm/timemory.git
REPO_BRANCH omnitrace)

rocprofiler_systems_save_variables(
Expand Down
2 changes: 1 addition & 1 deletion docs/conceptual/rocprof-sys-feature-set.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ The ROCm Systems Profiler feature set and use cases
***************************************

`ROCm Systems Profiler <https://github.com/ROCm/rocprofiler-systems>`_ is designed to be highly extensible.
Internally, it leverages the `Timemory performance analysis toolkit <https://github.com/NERSC/timemory>`_
Internally, it leverages the `Timemory performance analysis toolkit <https://github.com/ROCm/timemory>`_
to manage extensions, resources, data, and other items. It supports the following features,
modes, metrics, and APIs.

Expand Down
4 changes: 2 additions & 2 deletions docs/how-to/configuring-runtime-options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use the ``rocprof-sys-avail -G ~/.rocprof-sys.cfg --all`` option
for a verbose configuration file with descriptions, categories, and additional information.

Modify ``${HOME}/.rocprof-sys.cfg`` as required. For example, enable `Perfetto <https://perfetto.dev/>`_,
`Timemory <https://github.com/NERSC/timemory>`_, sampling, and process-level sampling by default
`Timemory <https://github.com/ROCm/timemory>`_, sampling, and process-level sampling by default
and tweak the default sampling values.

.. code-block:: shell
Expand Down Expand Up @@ -64,7 +64,7 @@ accepts a case insensitive match for nearly all common Boolean logic expressions
Exploring components
-----------------------------------

ROCm Systems Profiler uses `Timemory <https://github.com/NERSC/timemory>`_ extensively to provide
ROCm Systems Profiler uses `Timemory <https://github.com/ROCm/timemory>`_ extensively to provide
various capabilities and manage
data and resources. By default, with ``ROCPROFSYS_PROFILE=ON``, ROCm Systems Profiler only collects wall-clock
timing values. However, by modifying the ``ROCPROFSYS_TIMEMORY_COMPONENTS`` setting,
Expand Down
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-examples LANGUAGES C CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/causal/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-causal-example LANGUAGES CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/code-coverage/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-code-coverage-example LANGUAGES CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/fork/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-fork LANGUAGES CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/lulesh/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-lulesh-example LANGUAGES C CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/mpi/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-mpi-examples LANGUAGES C CXX)

Expand Down
4 changes: 3 additions & 1 deletion examples/openmp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-openmp LANGUAGES CXX)

Expand Down Expand Up @@ -56,3 +56,5 @@ if(ROCPROFSYS_INSTALL_EXAMPLES)
DESTINATION bin
COMPONENT rocprofiler-systems-examples)
endif()

add_subdirectory(target)
110 changes: 110 additions & 0 deletions examples/openmp/target/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#
#
#
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

# try to find a compatible HIP version
if(ROCmVersion_MAJOR_VERSION AND ROCmVersion_MAJOR_VERSION GREATER_EQUAL 6)
find_package(hip ${ROCmVersion_MAJOR_VERSION}.0.0)
else()
find_package(hip)
endif()

if(NOT hip_FOUND)
message(WARNING "ROCm >= 5.6 not found. Skipping OpenMP target example.")
return()
elseif(hip_FOUND AND hip_VERSION VERSION_LESS 5.6.0)
message(
WARNING
"ROCm >= 5.6 not found (found ${hip_VERSION}). Skipping OpenMP target example."
)
return()
endif()

if(NOT OMP_TARGET_COMPILER)
find_program(
amdclangpp_EXECUTABLE
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin)
mark_as_advanced(amdclangpp_EXECUTABLE)

if(amdclangpp_EXECUTABLE)
set(OMP_TARGET_COMPILER
"${amdclangpp_EXECUTABLE}"
CACHE FILEPATH "OpenMP target compiler")
else()
message(WARNING "OpenMP target compiler not found. Skipping this example.")
return()
endif()
endif()

project(rocprofiler-systems-example-openmp-target-lib LANGUAGES CXX)

set(CMAKE_BUILD_TYPE "RelWithDebInfo")

set(DEFAULT_GPU_TARGETS
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx940"
"gfx941"
"gfx942"
"gfx1030"
"gfx1010"
"gfx1100"
"gfx1101"
"gfx1102")

set(GPU_TARGETS
"${DEFAULT_GPU_TARGETS}"
CACHE STRING "GPU targets to compile for")

find_package(Threads REQUIRED)

add_library(openmp-target-lib SHARED)
target_sources(openmp-target-lib PRIVATE library.cpp)
target_link_libraries(openmp-target-lib PUBLIC Threads::Threads)
target_compile_options(openmp-target-lib PRIVATE -fopenmp -ggdb)
target_link_options(openmp-target-lib PUBLIC -fopenmp)

foreach(_TARGET ${GPU_TARGETS})
target_compile_options(openmp-target-lib PRIVATE --offload-arch=${_TARGET})
target_link_options(openmp-target-lib PUBLIC --offload-arch=${_TARGET})
endforeach()

message(STATUS "Using OpenMP target compiler: ${OMP_TARGET_COMPILER}")

get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER} PATH)
get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER_DIR} PATH)

message(STATUS "Using OpemMP target compiler directory: ${OMP_TARGET_COMPILER_DIR}")

if(NOT EXISTS ${OMP_TARGET_COMPILER_DIR}/llvm/lib)
message(FATAL_ERROR "${OMP_TARGET_COMPILER_DIR}/llvm/lib does not exist")
endif()
set_target_properties(
openmp-target-lib
PROPERTIES BUILD_RPATH
"${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib"
OUTPUT_NAME "openmp-target"
POSITION_INDEPENDENT_CODE ON)

rocprofiler_systems_custom_compilation(TARGET openmp-target-lib COMPILER
${OMP_TARGET_COMPILER})

add_executable(openmp-target)
target_sources(openmp-target PRIVATE main.cpp)
target_link_libraries(openmp-target PRIVATE openmp-target-lib)
target_compile_options(openmp-target PRIVATE -ggdb)

set_target_properties(
openmp-target
PROPERTIES BUILD_RPATH
"${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib"
POSITION_INDEPENDENT_CODE ON)

rocprofiler_systems_custom_compilation(TARGET openmp-target COMPILER
${OMP_TARGET_COMPILER})
Loading

0 comments on commit 9da7365

Please sign in to comment.