From 531da1fcf702551513aef52263437e820d14cad1 Mon Sep 17 00:00:00 2001 From: Joana Niermann Date: Sun, 23 Oct 2022 08:39:19 +0200 Subject: [PATCH] Update current benchmarks and add new CPU and CUDA benchmarks --- .../detray/definitions/pdg_particle.hpp | 23 +- .../include/detray/propagator/actor_chain.hpp | 20 +- core/include/detray/propagator/propagator.hpp | 6 + tests/benchmarks/CMakeLists.txt | 35 +++ tests/benchmarks/cpu/CMakeLists.txt | 19 +- tests/benchmarks/cpu/benchmark_propagator.cpp | 183 -------------- tests/benchmarks/cpu/propagation.cpp | 157 ++++++++++++ tests/benchmarks/cuda/CMakeLists.txt | 14 +- .../cuda/benchmark_propagator_cuda.cpp | 128 ---------- .../cuda/benchmark_propagator_cuda_kernel.cu | 70 ------ .../cuda/benchmark_propagator_cuda_kernel.hpp | 60 ----- tests/benchmarks/cuda/propagation.cpp | 161 +++++++++++++ .../detray/benchmarks/benchmark_base.hpp | 76 ++++++ .../detray/benchmarks/cpu/CMakeLists.txt | 15 ++ .../benchmarks/cpu/propagation_benchmark.hpp | 131 ++++++++++ .../detray/benchmarks/device/CMakeLists.txt | 9 + .../benchmarks/device/cuda/CMakeLists.txt | 29 +++ .../device/cuda/propagation_benchmark.cu | 151 ++++++++++++ .../device/cuda/propagation_benchmark.hpp | 179 ++++++++++++++ .../propagation_benchmark_config.hpp | 57 +++++ .../propagation_benchmark_utils.hpp | 223 ++++++++++++++++++ .../test/common/detector_scan_config.hpp | 4 +- .../detray/test/common/fixture_base.hpp | 12 +- .../common/material_validation_config.hpp | 4 +- .../common/navigation_validation_config.hpp | 4 +- .../cpu/propagator/propagator.cpp | 6 +- 26 files changed, 1299 insertions(+), 477 deletions(-) delete mode 100644 tests/benchmarks/cpu/benchmark_propagator.cpp create mode 100644 tests/benchmarks/cpu/propagation.cpp delete mode 100644 tests/benchmarks/cuda/benchmark_propagator_cuda.cpp delete mode 100644 tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu delete mode 100644 tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp create mode 100644 tests/benchmarks/cuda/propagation.cpp create mode 100644 tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp create mode 100644 tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt create mode 100644 tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp create mode 100644 tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt create mode 100644 tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt create mode 100644 tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu create mode 100644 tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp create mode 100644 tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp create mode 100644 tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp diff --git a/core/include/detray/definitions/pdg_particle.hpp b/core/include/detray/definitions/pdg_particle.hpp index 36aa99759..95951b372 100644 --- a/core/include/detray/definitions/pdg_particle.hpp +++ b/core/include/detray/definitions/pdg_particle.hpp @@ -28,13 +28,13 @@ struct pdg_particle { m_charge(static_cast(charge)) {} DETRAY_HOST_DEVICE - std::int32_t pdg_num() const { return m_pdg_num; } + constexpr std::int32_t pdg_num() const { return m_pdg_num; } DETRAY_HOST_DEVICE - scalar_type mass() const { return m_mass; } + constexpr scalar_type mass() const { return m_mass; } DETRAY_HOST_DEVICE - scalar_type charge() const { return m_charge; } + constexpr scalar_type charge() const { return m_charge; } private: std::int32_t m_pdg_num; @@ -42,6 +42,23 @@ struct pdg_particle { scalar_type m_charge; }; +/// Apply the charge conjugation operator to a particle hypothesis @param ptc +template +DETRAY_HOST_DEVICE constexpr pdg_particle charge_conjugation( + const pdg_particle& ptc) { + return (ptc.charge() != 0) + ? detray::pdg_particle{-ptc.pdg_num(), ptc.mass(), + -ptc.charge()} + : ptc; +} + +/// @returns an updated particle hypothesis according to the track qop +template +DETRAY_HOST_DEVICE constexpr pdg_particle update_particle_hypothesis( + const pdg_particle& ptc, const track_t& params) { + return (ptc.charge() * params.qop() > 0.f) ? ptc : charge_conjugation(ptc); +} + // Macro for declaring the particle #define DETRAY_DECLARE_PARTICLE(PARTICLE_NAME, PDG_NUM, MASS, CHARGE) \ template \ diff --git a/core/include/detray/propagator/actor_chain.hpp b/core/include/detray/propagator/actor_chain.hpp index 572880fc9..2b077ea66 100644 --- a/core/include/detray/propagator/actor_chain.hpp +++ b/core/include/detray/propagator/actor_chain.hpp @@ -34,6 +34,8 @@ class actor_chain { public: /// Types of the actors that are registered in the chain using actor_list_type = tuple_t; + // Tuple of actor states + using state_tuple = tuple_t; // Type of states tuple that is used in the propagator using state = tuple_t; @@ -53,8 +55,7 @@ class actor_chain { return m_actors; } - /// @returns a tuple of default constructible actor states and a - /// corresponding tuple of references + /// @returns a tuple of default constructible actor states DETRAY_HOST_DEVICE static constexpr auto make_actor_states() { // Only possible if each state is default initializable @@ -67,10 +68,10 @@ class actor_chain { } /// @returns a tuple of reference for every state in the tuple @param t - DETRAY_HOST_DEVICE static constexpr state make_ref_tuple( + DETRAY_HOST_DEVICE static constexpr state setup_actor_states( tuple_t &t) { - return make_ref_tuple(t, - std::make_index_sequence{}); + return setup_actor_states( + t, std::make_index_sequence{}); } private: @@ -111,7 +112,7 @@ class actor_chain { /// @returns a tuple of reference for every state in the tuple @param t template - DETRAY_HOST_DEVICE static constexpr state make_ref_tuple( + DETRAY_HOST_DEVICE static constexpr state setup_actor_states( tuple_t &t, std::index_sequence /*ids*/) { return detray::tie(detail::get(t)...); @@ -126,6 +127,7 @@ template <> class actor_chain<> { public: + using state_tuple = dtuple<>; /// Empty states replaces a real actor states container struct state {}; @@ -138,6 +140,12 @@ class actor_chain<> { propagator_state_t & /*p_state*/) const { /*Do nothing*/ } + + /// @returns an empty state + DETRAY_HOST_DEVICE static constexpr state setup_actor_states( + const state_tuple &) { + return {}; + } }; } // namespace detray diff --git a/core/include/detray/propagator/propagator.hpp b/core/include/detray/propagator/propagator.hpp index 58c0ed813..45b374079 100644 --- a/core/include/detray/propagator/propagator.hpp +++ b/core/include/detray/propagator/propagator.hpp @@ -55,6 +55,12 @@ struct propagator { explicit constexpr propagator(const propagation::config &cfg) : m_cfg{cfg} {} + /// @returns the actor chain + DETRAY_HOST_DEVICE + constexpr const actor_chain_t &get_actor_chain() const { + return run_actors; + } + /// Propagation that state aggregates a stepping and a navigation state. It /// also keeps references to the actor states. struct state { diff --git a/tests/benchmarks/CMakeLists.txt b/tests/benchmarks/CMakeLists.txt index dd4c3fd9c..12940e393 100644 --- a/tests/benchmarks/CMakeLists.txt +++ b/tests/benchmarks/CMakeLists.txt @@ -4,12 +4,47 @@ # # Mozilla Public License Version 2.0 +# Set the common C++ flags. +include(detray-compiler-options-cpp) +include_directories( + SYSTEM + $ +) +include_directories( + SYSTEM + $ +) + +# Set up a common benchmark library. +file( + GLOB _detray_benchmarks_headers + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "include/detray/benchmarks/*.hpp" +) + +add_library(detray_benchmarks INTERFACE "${_detray_benchmarks_headers}") +add_library(detray::benchmarks ALIAS detray_benchmarks) + +target_include_directories( + detray_benchmarks + INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include" +) + +target_link_libraries( + detray_benchmarks + INTERFACE benchmark::benchmark vecmem::core detray::core detray::test_utils +) + +unset(_detray_benchmarks_headers) + # Set up the host/cpu benchmarks. if(DETRAY_BUILD_HOST) add_subdirectory(cpu) + add_subdirectory(include/detray/benchmarks/cpu) endif() # Set up all of the "device" benchmarks. if(DETRAY_BUILD_CUDA) add_subdirectory(cuda) + add_subdirectory(include/detray/benchmarks/device) endif() diff --git a/tests/benchmarks/cpu/CMakeLists.txt b/tests/benchmarks/cpu/CMakeLists.txt index 30b36c7af..03140c4af 100644 --- a/tests/benchmarks/cpu/CMakeLists.txt +++ b/tests/benchmarks/cpu/CMakeLists.txt @@ -10,21 +10,20 @@ message(STATUS "Building detray host benchmarks") option(DETRAY_BENCHMARK_MULTITHREAD "Enable multithreaded benchmarks" OFF) option(DETRAY_BENCHMARK_PRINTOUTS "Enable printouts in the benchmarks" OFF) -# Look for openMP, which is used for the CPU benchmark +# Look for openMP, which is used for the CPU propagation benchmark find_package(OpenMP) # Macro setting up the CPU benchmarks for a specific algebra plugin. macro(detray_add_cpu_benchmark algebra) # Build the benchmark executable. detray_add_executable(benchmark_cpu_${algebra} - "benchmark_propagator.cpp" "find_volume.cpp" "grid.cpp" "grid2.cpp" "intersect_all.cpp" "intersect_surfaces.cpp" "masks.cpp" - LINK_LIBRARIES benchmark::benchmark benchmark::benchmark_main vecmem::core + LINK_LIBRARIES benchmark::benchmark benchmark::benchmark_main vecmem::core detray::benchmarks detray::core_${algebra} detray::test_utils ) @@ -48,9 +47,21 @@ macro(detray_add_cpu_benchmark algebra) ) endif() + # Build the benchmark executable for the propagation + detray_add_executable( benchmark_cpu_propagation_${algebra} + "propagation.cpp" + LINK_LIBRARIES detray::benchmark_cpu benchmark::benchmark_main + vecmem::core detray::core_${algebra} detray::test_utils + ) + + target_compile_options( + detray_benchmark_cpu_propagation_${algebra} + PRIVATE "-march=native" "-ftree-vectorize" + ) + if(OpenMP_CXX_FOUND) target_link_libraries( - detray_benchmark_cpu_${algebra} + detray_benchmark_cpu_propagation_${algebra} PRIVATE OpenMP::OpenMP_CXX ) endif() diff --git a/tests/benchmarks/cpu/benchmark_propagator.cpp b/tests/benchmarks/cpu/benchmark_propagator.cpp deleted file mode 100644 index e588e9850..000000000 --- a/tests/benchmarks/cpu/benchmark_propagator.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/** Detray library, part of the ACTS project (R&D line) - * - * (c) 2020-2024 CERN for the benefit of the ACTS project - * - * Mozilla Public License Version 2.0 - */ - -// Project include(s). -#include "detray/definitions/detail/algebra.hpp" -#include "detray/definitions/detail/containers.hpp" -#include "detray/definitions/detail/indexing.hpp" -#include "detray/definitions/units.hpp" -#include "detray/detectors/bfield.hpp" -#include "detray/detectors/toy_metadata.hpp" -#include "detray/geometry/shapes/rectangle2D.hpp" -#include "detray/navigation/navigator.hpp" -#include "detray/propagator/actor_chain.hpp" -#include "detray/propagator/actors/aborters.hpp" -#include "detray/propagator/actors/parameter_resetter.hpp" -#include "detray/propagator/actors/parameter_transporter.hpp" -#include "detray/propagator/actors/pointwise_material_interactor.hpp" -#include "detray/propagator/base_actor.hpp" -#include "detray/propagator/propagator.hpp" -#include "detray/propagator/rk_stepper.hpp" -#include "detray/tracks/tracks.hpp" -#include "detray/utils/grid/grid.hpp" - -// Detray test include(s). -#include "detray/test/utils/detectors/build_toy_detector.hpp" -#include "detray/test/utils/simulation/event_generator/track_generators.hpp" -#include "detray/test/utils/types.hpp" - -// VecMem include(s). -#include - -// Google benchmark include(s). -#include - -// System include(s) -#include -#include - -// Use the detray:: namespace implicitly. -using namespace detray; - -using algebra_t = ALGEBRA_PLUGIN; - -using detector_host_type = detector; -using detector_device_type = detector; - -using intersection_t = - intersection2D; - -using navigator_host_type = navigator; -using navigator_device_type = navigator; -using field_type = bfield::const_field_t; -using rk_stepper_type = rk_stepper; -using actor_chain_t = actor_chain, - pointwise_material_interactor, - parameter_resetter>; -using propagator_host_type = - propagator; -using propagator_device_type = - propagator; - -enum class propagate_option { - e_unsync = 0, - e_sync = 1, -}; - -// VecMem memory resource(s) -vecmem::host_memory_resource host_mr; - -// detector configuration -auto toy_cfg = - toy_det_config{}.n_brl_layers(4u).n_edc_layers(7u).do_check(false); - -void fill_tracks(vecmem::vector> &tracks, - const std::size_t n_tracks, bool do_sort = true) { - using scalar_t = dscalar; - using uniform_gen_t = - detail::random_numbers>; - using trk_generator_t = - random_track_generator, uniform_gen_t>; - - trk_generator_t::configuration trk_gen_cfg{}; - trk_gen_cfg.seed(42u); - trk_gen_cfg.n_tracks(n_tracks); - trk_gen_cfg.randomize_charge(true); - trk_gen_cfg.phi_range(-constant::pi, constant::pi); - trk_gen_cfg.eta_range(-3.f, 3.f); - trk_gen_cfg.mom_range(1.f * unit::GeV, - 100.f * unit::GeV); - trk_gen_cfg.origin({0.f, 0.f, 0.f}); - trk_gen_cfg.origin_stddev({0.f * unit::mm, - 0.f * unit::mm, - 0.f * unit::mm}); - - // Iterate through uniformly distributed momentum directions - for (auto traj : trk_generator_t{trk_gen_cfg}) { - tracks.push_back(traj); - } - - if (do_sort) { - // Sort by theta angle - const auto traj_comp = [](const auto &lhs, const auto &rhs) { - constexpr auto pi_2{constant::pi_2}; - return math::fabs(pi_2 - vector::theta(lhs.dir())) < - math::fabs(pi_2 - vector::theta(rhs.dir())); - }; - - std::ranges::sort(tracks, traj_comp); - } -} - -template -static void BM_PROPAGATOR_CPU(benchmark::State &state) { - - std::size_t n_tracks{static_cast(state.range(0)) * - static_cast(state.range(0))}; - - // Create the toy geometry and bfield - auto [det, names] = build_toy_detector(host_mr, toy_cfg); - test::vector3 B{0.f, 0.f, 2.f * unit::T}; - auto bfield = bfield::create_const_field(B); - - // Create propagator - propagation::config cfg{}; - cfg.navigation.search_window = {3u, 3u}; - propagator_host_type p{cfg}; - - std::size_t total_tracks = 0; - - for (auto _ : state) { - - // TODO: use fixture to build tracks - state.PauseTiming(); - - // Get tracks - vecmem::vector> tracks(&host_mr); - fill_tracks(tracks, n_tracks); - - total_tracks += tracks.size(); - - state.ResumeTiming(); - -#pragma omp parallel for - for (auto &track : tracks) { - - parameter_transporter::state transporter_state{}; - pointwise_material_interactor::state interactor_state{}; - parameter_resetter::state resetter_state{}; - - auto actor_states = - tie(transporter_state, interactor_state, resetter_state); - - // Create the propagator state - propagator_host_type::state p_state(track, bfield, det); - - // Run propagation - if constexpr (opt == propagate_option::e_unsync) { - p.propagate(p_state, actor_states); - } else if constexpr (opt == propagate_option::e_sync) { - p.propagate_sync(p_state, actor_states); - } - } - } - - state.counters["TracksPropagated"] = benchmark::Counter( - static_cast(total_tracks), benchmark::Counter::kIsRate); -} - -BENCHMARK_TEMPLATE(BM_PROPAGATOR_CPU, propagate_option::e_unsync) - ->Name("CPU unsync propagation") - ->RangeMultiplier(2) - ->Range(8, 256); -BENCHMARK_TEMPLATE(BM_PROPAGATOR_CPU, propagate_option::e_sync) - ->Name("CPU sync propagation") - ->RangeMultiplier(2) - ->Range(8, 256); - -BENCHMARK_MAIN(); diff --git a/tests/benchmarks/cpu/propagation.cpp b/tests/benchmarks/cpu/propagation.cpp new file mode 100644 index 000000000..83bde2b8c --- /dev/null +++ b/tests/benchmarks/cpu/propagation.cpp @@ -0,0 +1,157 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Project include(s) +#include "detray/detectors/bfield.hpp" +#include "detray/navigation/navigator.hpp" +#include "detray/propagator/actor_chain.hpp" +#include "detray/propagator/actors/aborters.hpp" +#include "detray/propagator/actors/parameter_resetter.hpp" +#include "detray/propagator/actors/parameter_transporter.hpp" +#include "detray/propagator/actors/pointwise_material_interactor.hpp" +#include "detray/propagator/rk_stepper.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray benchmark include(s) +#include "detray/benchmarks/cpu/propagation_benchmark.hpp" + +// Detray test include(s). +#include "detray/test/utils/detectors/build_toy_detector.hpp" +#include "detray/test/utils/detectors/build_wire_chamber.hpp" +#include "detray/test/utils/simulation/event_generator/track_generators.hpp" +#include "detray/test/utils/types.hpp" + +// Vecmem include(s) +#include + +// System include(s) +#include +#include + +using namespace detray; + +int main(int argc, char** argv) { + + using toy_detector_t = detector; + using algebra_t = typename toy_detector_t::algebra_type; + using scalar_t = dscalar; + using vector3_t = dvector3D; + using free_track_parameters_t = free_track_parameters; + using uniform_gen_t = + detail::random_numbers>; + using track_generator_t = + random_track_generator; + + using field_t = bfield::const_field_t; + using stepper_t = rk_stepper; + using empty_chain_t = actor_chain<>; + using default_chain = actor_chain, + pointwise_material_interactor, + parameter_resetter>; + + vecmem::host_memory_resource host_mr; + + // + // Configuration + // + + // Constant magnetic field + vector3_t B{0.f, 0.f, 2.f * unit::T}; + + // Configure toy detector + toy_det_config toy_cfg{}; + toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u); + + std::cout << toy_cfg << std::endl; + + // Configure wire chamber + wire_chamber_config wire_chamber_cfg{}; + wire_chamber_cfg.half_z(500.f * unit::mm); + + std::cout << wire_chamber_cfg << std::endl; + + // Configure propagation + propagation::config prop_cfg{}; + prop_cfg.navigation.search_window = {3u, 3u}; + + std::cout << prop_cfg << std::endl; + + // Benchmark config + detray::benchmarks::benchmark_base::configuration bench_cfg{}; + + std::vector n_tracks{8 * 8, 16 * 16, 32 * 32, 64 * 64, + 128 * 128, 256 * 256, 512 * 512}; + + auto trk_cfg = + detray::benchmarks::get_default_trk_gen_config( + n_tracks); + + // Specific configuration for the random track generation + trk_cfg.seed(42u); + + // Add additional tracks for warmup + bench_cfg.n_warmup(static_cast( + std::ceil(0.1f * static_cast(trk_cfg.n_tracks())))); + + // + // Prepare data + // + auto track_samples = + detray::benchmarks::generate_track_samples( + &host_mr, n_tracks, trk_cfg); + + const auto [toy_det, names] = build_toy_detector(host_mr, toy_cfg); + const auto [wire_chamber, _] = + build_wire_chamber(host_mr, wire_chamber_cfg); + + auto bfield = bfield::create_const_field(B); + + dtuple<> empty_state{}; + + parameter_transporter::state transporter_state{}; + pointwise_material_interactor::state interactor_state{}; + parameter_resetter::state resetter_state{}; + + auto actor_states = detail::make_tuple( + transporter_state, interactor_state, resetter_state); + + // + // Register benchmarks + // + std::cout << "Propagation Benchmarks\n" + << "----------------------\n\n"; + + prop_cfg.stepping.do_covariance_transport = true; + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, default_chain>( + "TOY_DETECTOR_W_COV_TRANSPORT", bench_cfg, prop_cfg, toy_det, bfield, + &actor_states, track_samples, n_tracks); + + prop_cfg.stepping.do_covariance_transport = false; + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, empty_chain_t>( + "TOY_DETECTOR", bench_cfg, prop_cfg, toy_det, bfield, &empty_state, + track_samples, n_tracks); + + prop_cfg.stepping.do_covariance_transport = true; + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, default_chain>( + "WIRE_CHAMBER_W_COV_TRANSPORT", bench_cfg, prop_cfg, wire_chamber, + bfield, &actor_states, track_samples, n_tracks); + + prop_cfg.stepping.do_covariance_transport = false; + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, empty_chain_t>( + "WIRE_CHAMBER", bench_cfg, prop_cfg, wire_chamber, bfield, &empty_state, + track_samples, n_tracks); + + // Run benchmarks + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); +} diff --git a/tests/benchmarks/cuda/CMakeLists.txt b/tests/benchmarks/cuda/CMakeLists.txt index 096c92ad0..a101f0854 100644 --- a/tests/benchmarks/cuda/CMakeLists.txt +++ b/tests/benchmarks/cuda/CMakeLists.txt @@ -26,26 +26,24 @@ if(DETRAY_EIGEN_PLUGIN) endif() foreach(algebra ${algebras}) - detray_add_executable(benchmark_cuda_${algebra} - "benchmark_propagator_cuda_kernel.hpp" - "benchmark_propagator_cuda.cpp" - "benchmark_propagator_cuda_kernel.cu" - LINK_LIBRARIES benchmark::benchmark detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils + detray_add_executable(benchmark_cuda_propagation_${algebra} + "propagation.cpp" + LINK_LIBRARIES detray::benchmark_cuda detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils ) target_compile_definitions( - detray_benchmark_cuda_${algebra} + detray_benchmark_cuda_propagation_${algebra} PRIVATE ${algebra}=${algebra} ) target_compile_options( - detray_benchmark_cuda_${algebra} + detray_benchmark_cuda_propagation_${algebra} PRIVATE "-march=native" "-ftree-vectorize" ) if(OpenMP_CXX_FOUND) target_link_libraries( - detray_benchmark_cuda_${algebra} + detray_benchmark_cuda_propagation_${algebra} PRIVATE OpenMP::OpenMP_CXX ) endif() diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp b/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp deleted file mode 100644 index 08db5b8f9..000000000 --- a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/** Detray library, part of the ACTS project (R&D line) - * - * (c) 2022-2024 CERN for the benefit of the ACTS project - * - * Mozilla Public License Version 2.0 - */ - -// Project include(s) -#include "benchmark_propagator_cuda_kernel.hpp" - -// Detray test include(s). -#include "detray/test/utils/detectors/build_toy_detector.hpp" -#include "detray/test/utils/simulation/event_generator/track_generators.hpp" -#include "detray/test/utils/types.hpp" - -// Vecmem include(s) -#include -#include -#include -#include - -// Google include(s). -#include - -using namespace detray; - -// VecMem memory resource(s) -vecmem::host_memory_resource host_mr; -vecmem::cuda::managed_memory_resource mng_mr; -vecmem::cuda::device_memory_resource dev_mr; -vecmem::binary_page_memory_resource bp_mng_mr(mng_mr); - -// detector configuration -auto toy_cfg = - toy_det_config{}.n_brl_layers(4u).n_edc_layers(7u).do_check(false); - -void fill_tracks(vecmem::vector> &tracks, - const std::size_t n_tracks, bool do_sort = true) { - using scalar_t = dscalar; - using uniform_gen_t = - detail::random_numbers>; - using trk_generator_t = - random_track_generator, uniform_gen_t>; - - trk_generator_t::configuration trk_gen_cfg{}; - trk_gen_cfg.seed(42u); - trk_gen_cfg.n_tracks(n_tracks); - trk_gen_cfg.randomize_charge(true); - trk_gen_cfg.phi_range(-constant::pi, constant::pi); - trk_gen_cfg.eta_range(-3.f, 3.f); - trk_gen_cfg.mom_range(1.f * unit::GeV, - 100.f * unit::GeV); - trk_gen_cfg.origin({0.f, 0.f, 0.f}); - trk_gen_cfg.origin_stddev({0.f * unit::mm, - 0.f * unit::mm, - 0.f * unit::mm}); - - // Iterate through uniformly distributed momentum directions - for (auto traj : trk_generator_t{trk_gen_cfg}) { - tracks.push_back(traj); - } - - if (do_sort) { - // Sort by theta angle - const auto traj_comp = [](const auto &lhs, const auto &rhs) { - constexpr auto pi_2{constant::pi_2}; - return math::fabs(pi_2 - vector::theta(lhs.dir())) < - math::fabs(pi_2 - vector::theta(rhs.dir())); - }; - - std::ranges::sort(tracks, traj_comp); - } -} - -template -static void BM_PROPAGATOR_CUDA(benchmark::State &state) { - - std::size_t n_tracks{static_cast(state.range(0)) * - static_cast(state.range(0))}; - - // Create the toy geometry - auto [det, names] = build_toy_detector(host_mr, toy_cfg); - test::vector3 B{0.f, 0.f, 2.f * unit::T}; - auto bfield = bfield::create_const_field(B); - - // vecmem copy helper object - vecmem::cuda::copy cuda_cpy; - - // Copy detector to device - auto det_buff = detray::get_buffer(det, dev_mr, cuda_cpy); - auto det_view = detray::get_data(det_buff); - - std::size_t total_tracks = 0; - - for (auto _ : state) { - - state.PauseTiming(); - - // Get tracks - vecmem::vector> tracks(&bp_mng_mr); - fill_tracks(tracks, n_tracks); - - total_tracks += tracks.size(); - - state.ResumeTiming(); - - // Get tracks data - auto tracks_data = vecmem::get_data(tracks); - - // Run the propagator test for GPU device - propagator_benchmark(det_view, bfield, tracks_data, opt); - } - - state.counters["TracksPropagated"] = benchmark::Counter( - static_cast(total_tracks), benchmark::Counter::kIsRate); -} - -BENCHMARK_TEMPLATE(BM_PROPAGATOR_CUDA, propagate_option::e_unsync) - ->Name("CUDA unsync propagation") - ->RangeMultiplier(2) - ->Range(8, 256); -BENCHMARK_TEMPLATE(BM_PROPAGATOR_CUDA, propagate_option::e_sync) - ->Name("CUDA sync propagation") - ->RangeMultiplier(2) - ->Range(8, 256); - -BENCHMARK_MAIN(); diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu b/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu deleted file mode 100644 index 6bba06fd1..000000000 --- a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu +++ /dev/null @@ -1,70 +0,0 @@ -/** Detray library, part of the ACTS project (R&D line) - * - * (c) 2022 CERN for the benefit of the ACTS project - * - * Mozilla Public License Version 2.0 - */ - -#include "benchmark_propagator_cuda_kernel.hpp" -#include "detray/definitions/detail/cuda_definitions.hpp" - -namespace detray { - -__global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( - typename detector_host_type::view_type det_data, - covfie::field_view field_data, - vecmem::data::vector_view> tracks_data, - const propagate_option opt) { - - int gid = threadIdx.x + blockIdx.x * blockDim.x; - - detector_device_type det(det_data); - vecmem::device_vector> tracks(tracks_data); - - if (gid >= tracks.size()) { - return; - } - - // Create propagator - propagation::config cfg{}; - cfg.navigation.search_window = {3u, 3u}; - propagator_device_type p{cfg}; - - parameter_transporter::state transporter_state{}; - pointwise_material_interactor::state interactor_state{}; - parameter_resetter::state resetter_state{}; - - // Create the actor states - auto actor_states = - detray::tie(transporter_state, interactor_state, resetter_state); - // Create the propagator state - propagator_device_type::state p_state(tracks.at(gid), field_data, det); - - // Run propagation - if (opt == propagate_option::e_unsync) { - p.propagate(p_state, actor_states); - } else if (opt == propagate_option::e_sync) { - p.propagate_sync(p_state, actor_states); - } -} - -void propagator_benchmark( - typename detector_host_type::view_type det_data, - covfie::field_view field_data, - vecmem::data::vector_view>& tracks_data, - const propagate_option opt) { - - constexpr int thread_dim = 256; - int block_dim = - static_cast(tracks_data.size() + thread_dim - 1) / thread_dim; - - // run the test kernel - propagator_benchmark_kernel<<>>(det_data, field_data, - tracks_data, opt); - - // cuda error check - DETRAY_CUDA_ERROR_CHECK(cudaGetLastError()); - DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize()); -} - -} // namespace detray diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp b/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp deleted file mode 100644 index ee2f505cb..000000000 --- a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/** Detray library, part of the ACTS project (R&D line) - * - * (c) 2022-2024 CERN for the benefit of the ACTS project - * - * Mozilla Public License Version 2.0 - */ - -#pragma once - -// Project include(s) -#include "detray/definitions/detail/algebra.hpp" -#include "detray/definitions/units.hpp" -#include "detray/detectors/bfield.hpp" -#include "detray/detectors/toy_metadata.hpp" -#include "detray/navigation/navigator.hpp" -#include "detray/propagator/actor_chain.hpp" -#include "detray/propagator/actors/aborters.hpp" -#include "detray/propagator/actors/parameter_resetter.hpp" -#include "detray/propagator/actors/parameter_transporter.hpp" -#include "detray/propagator/actors/pointwise_material_interactor.hpp" -#include "detray/propagator/base_actor.hpp" -#include "detray/propagator/propagator.hpp" -#include "detray/propagator/rk_stepper.hpp" -#include "detray/tracks/tracks.hpp" - -using algebra_t = ALGEBRA_PLUGIN; - -using detector_host_type = - detray::detector; -using detector_device_type = - detray::detector; - -using navigator_host_type = detray::navigator; -using navigator_device_type = detray::navigator; -using field_type = detray::bfield::const_field_t; -using rk_stepper_type = detray::rk_stepper; -using actor_chain_t = - detray::actor_chain, - detray::pointwise_material_interactor, - detray::parameter_resetter>; -using propagator_host_type = - detray::propagator; -using propagator_device_type = - detray::propagator; - -enum class propagate_option { - e_unsync = 0, - e_sync = 1, -}; - -namespace detray { - -/// test function for propagator with single state -void propagator_benchmark( - typename detector_host_type::view_type det_data, - typename field_type::view_t field_data, - vecmem::data::vector_view>& tracks_data, - const propagate_option opt); - -} // namespace detray diff --git a/tests/benchmarks/cuda/propagation.cpp b/tests/benchmarks/cuda/propagation.cpp new file mode 100644 index 000000000..5be604394 --- /dev/null +++ b/tests/benchmarks/cuda/propagation.cpp @@ -0,0 +1,161 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Project include(s) +#include "detray/detectors/bfield.hpp" +#include "detray/navigation/navigator.hpp" +#include "detray/propagator/actor_chain.hpp" +#include "detray/propagator/actors/aborters.hpp" +#include "detray/propagator/actors/parameter_resetter.hpp" +#include "detray/propagator/actors/parameter_transporter.hpp" +#include "detray/propagator/actors/pointwise_material_interactor.hpp" +#include "detray/propagator/rk_stepper.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray benchmark include(s) +#include "detray/benchmarks/device/cuda/propagation_benchmark.hpp" + +// Detray test include(s). +#include "detray/test/utils/detectors/build_toy_detector.hpp" +#include "detray/test/utils/detectors/build_wire_chamber.hpp" +#include "detray/test/utils/simulation/event_generator/track_generators.hpp" +#include "detray/test/utils/types.hpp" + +// Vecmem include(s) +#include +#include + +// System include(s) +#include +#include + +using namespace detray; + +int main(int argc, char** argv) { + + using toy_detector_t = detector; + using algebra_t = typename toy_detector_t::algebra_type; + using scalar_t = dscalar; + using vector3_t = dvector3D; + using free_track_parameters_t = free_track_parameters; + using uniform_gen_t = + detail::random_numbers>; + using track_generator_t = + random_track_generator; + using field_bknd_t = bfield::const_bknd_t; + + vecmem::host_memory_resource host_mr; + vecmem::cuda::device_memory_resource dev_mr; + + // + // Configuration + // + + // Constant magnetic field + vector3_t B{0.f, 0.f, 2.f * unit::T}; + + // Configure toy detector + toy_det_config toy_cfg{}; + toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u); + + std::cout << toy_cfg << std::endl; + + // Configure wire chamber + wire_chamber_config wire_chamber_cfg{}; + wire_chamber_cfg.half_z(500.f * unit::mm); + + std::cout << wire_chamber_cfg << std::endl; + + // Configure propagation + propagation::config prop_cfg{}; + prop_cfg.navigation.search_window = {3u, 3u}; + + std::cout << prop_cfg << std::endl; + + // Benchmark config + detray::benchmarks::benchmark_base::configuration bench_cfg{}; + + std::vector n_tracks{8 * 8, 16 * 16, 32 * 32, 64 * 64, + 128 * 128, 256 * 256, 512 * 512}; + + auto trk_cfg = + detray::benchmarks::get_default_trk_gen_config( + n_tracks); + + // Specific configuration for the random track generation + trk_cfg.seed(42u); + + // Add additional tracks for warmup + bench_cfg.n_warmup(static_cast( + std::ceil(0.1f * static_cast(trk_cfg.n_tracks())))); + + // + // Prepare data + // + auto track_samples = + detray::benchmarks::generate_track_samples( + &host_mr, n_tracks, trk_cfg, true); + + const auto [toy_det, names] = build_toy_detector(host_mr, toy_cfg); + const auto [wire_chamber, _] = + build_wire_chamber(host_mr, wire_chamber_cfg); + + auto bfield = bfield::create_const_field(B); + + dtuple<> empty_state{}; + + parameter_transporter::state transporter_state{}; + pointwise_material_interactor::state interactor_state{}; + parameter_resetter::state resetter_state{}; + + auto actor_states = detail::make_tuple( + transporter_state, interactor_state, resetter_state); + + // + // Register benchmarks + // + std::cout << "Propagation Benchmarks\n" + << "----------------------\n\n"; + + prop_cfg.stepping.do_covariance_transport = true; + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type< + toy_metadata, field_bknd_t, detray::benchmarks::default_chain>>( + "TOY_DETECTOR_W_COV_TRANSPORT", bench_cfg, prop_cfg, toy_det, bfield, + &actor_states, track_samples, n_tracks, &dev_mr); + + prop_cfg.stepping.do_covariance_transport = false; + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type< + toy_metadata, field_bknd_t, detray::benchmarks::empty_chain>>( + "TOY_DETECTOR", bench_cfg, prop_cfg, toy_det, bfield, &empty_state, + track_samples, n_tracks, &dev_mr); + + prop_cfg.stepping.do_covariance_transport = true; + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type< + default_metadata, field_bknd_t, detray::benchmarks::default_chain>>( + "WIRE_CHAMBER_W_COV_TRANSPORT", bench_cfg, prop_cfg, wire_chamber, + bfield, &actor_states, track_samples, n_tracks, &dev_mr); + + prop_cfg.stepping.do_covariance_transport = false; + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type< + default_metadata, field_bknd_t, detray::benchmarks::empty_chain>>( + "WIRE_CHAMBER", bench_cfg, prop_cfg, wire_chamber, bfield, &empty_state, + track_samples, n_tracks, &dev_mr); + + // Run benchmarks + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); +} diff --git a/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp b/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp new file mode 100644 index 000000000..c867b6f19 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp @@ -0,0 +1,76 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2023-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Benchmark include +#include + +// System include(s) +#include +#include + +namespace detray::benchmarks { + +/// Base type for detray benchmarks with google benchmark +struct benchmark_base { + /// Local configuration type + struct configuration { + /// Size of data sample to be used in benchmark + int m_samples{100}; + /// Run a number of operations before the benchmark + bool m_warmup = true; + // Size of data in warm-up round + int m_n_warmup{static_cast(0.1 * static_cast(m_samples))}; + + /// Setters + /// @{ + configuration& n_samples(int n) { + m_samples = n; + return *this; + } + configuration& do_warmup(bool b) { + m_warmup = b; + return *this; + } + configuration& n_warmup(int n) { + m_n_warmup = n; + m_warmup = true; + return *this; + } + /// @} + + /// Getters + /// @{ + constexpr int n_samples() const { return m_samples; } + constexpr bool do_warmup() const { return m_warmup; } + constexpr int n_warmup() const { return m_n_warmup; } + /// @} + + private: + /// Print the benchmark setup + friend std::ostream& operator<<(std::ostream& os, + const configuration& cfg) { + os << " -> running:\t " << cfg.n_samples() << " samples" + << std::endl; + if (cfg.do_warmup()) { + os << " -> warmup: \t " << cfg.n_warmup() << " samples" + << std::endl; + } + os << std::endl; + return os; + } + }; + + /// Default construction + benchmark_base() = default; + + /// Default destructor + virtual ~benchmark_base() = default; +}; + +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt new file mode 100644 index 000000000..c2bc1a0d8 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt @@ -0,0 +1,15 @@ +# Detray library, part of the ACTS project (R&D line) +# +# (c) 2024 CERN for the benefit of the ACTS project +# +# Mozilla Public License Version 2.0 + +# Set the CPU build flags. +include(detray-compiler-options-cpp) + +# Set up a test library, which the "new style" benchmarks and tests could use. +add_library(detray_benchmark_cpu INTERFACE "propagation_benchmark.hpp") + +add_library(detray::benchmark_cpu ALIAS detray_benchmark_cpu) + +target_link_libraries(detray_benchmark_cpu INTERFACE detray::benchmarks) diff --git a/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp new file mode 100644 index 000000000..94c78c950 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp @@ -0,0 +1,131 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2023-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s) +#include "detray/definitions/detail/algebra.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray benchmark include(s) +#include "detray/benchmarks/benchmark_base.hpp" +#include "detray/benchmarks/propagation_benchmark_config.hpp" +#include "detray/benchmarks/propagation_benchmark_utils.hpp" + +// Benchmark include +#include + +// System include(s) +#include +#include +#include +#include + +namespace detray::benchmarks { + +template +struct host_propagation_bm : public benchmark_base { + /// Detector dependent types + using algebra_t = typename propagator_t::detector_type::algebra_type; + using scalar_t = dscalar; + using vector3_t = dvector3D; + + /// Local configuration type + using configuration = propagation_benchmark_config; + + /// The benchmark configuration + configuration m_cfg{}; + + /// Default construction + host_propagation_bm() = default; + + /// Construct from an externally provided configuration @param cfg + explicit host_propagation_bm(const configuration &cfg) : m_cfg{cfg} {} + + /// @return the benchmark configuration + configuration &config() { return m_cfg; } + + /// Prepare data and run benchmark loop + inline void operator()(::benchmark::State &state, + dvector> *tracks, + const typename propagator_t::detector_type *det, + const bfield_t *bfield, + typename propagator_t::actor_chain_type::state_tuple + *input_actor_states) const { + using actor_chain_t = typename propagator_t::actor_chain_type; + using actor_states_t = typename actor_chain_t::state_tuple; + + assert(tracks != nullptr); + assert(det != nullptr); + assert(bfield != nullptr); + assert(input_actor_states != nullptr); + + const int n_samples{m_cfg.benchmark().n_samples()}; + const int n_warmup{m_cfg.benchmark().n_warmup()}; + + assert(static_cast(n_samples) <= tracks->size()); + + // Create propagator + propagator_t p{m_cfg.propagation()}; + + // Call the host propagation + auto run_propagation = [&p, det, bfield, input_actor_states]( + free_track_parameters &track) { + // Fresh copy of actor states + actor_states_t actor_states(*input_actor_states); + // Tuple of references to pass to the propagator + typename actor_chain_t::state actor_state_refs = + actor_chain_t::setup_actor_states(actor_states); + + typename propagator_t::state p_state(track, *bfield, *det); + // Particle hypothesis + auto &ptc = p_state._stepping.particle_hypothesis(); + p_state.set_particle(update_particle_hypothesis(ptc, track)); + + // Run propagation + if constexpr (kOPT == + detray::benchmarks::propagation_opt::e_unsync) { + ::benchmark::DoNotOptimize( + p.propagate(p_state, actor_state_refs)); + } else if constexpr (kOPT == + detray::benchmarks::propagation_opt::e_sync) { + ::benchmark::DoNotOptimize( + p.propagate_sync(p_state, actor_state_refs)); + } + }; + + // Warm-up + if (m_cfg.benchmark().do_warmup()) { + assert(n_warmup > 0); + auto stride{n_samples / n_warmup}; + stride = (stride == 0) ? 10 : stride; + assert(stride > 0); + +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < n_samples; i += stride) { + run_propagation((*tracks)[static_cast(i)]); + } + } + + // Run the benchmark + std::size_t total_tracks = 0u; + for (auto _ : state) { +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < n_samples; ++i) { + run_propagation((*tracks)[static_cast(i)]); + } + total_tracks += static_cast(n_samples); + } + // Report throughput + state.counters["TracksPropagated"] = benchmark::Counter( + static_cast(total_tracks), benchmark::Counter::kIsRate); + } +}; + +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt new file mode 100644 index 000000000..71bce8dfa --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt @@ -0,0 +1,9 @@ +# Detray library, part of the ACTS project (R&D line) +# +# (c) 2024 CERN for the benefit of the ACTS project +# +# Mozilla Public License Version 2.0 + +if(DETRAY_BUILD_CUDA) + add_subdirectory(cuda) +endif() diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt new file mode 100644 index 000000000..9548b0dd7 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt @@ -0,0 +1,29 @@ +# Detray library, part of the ACTS project (R&D line) +# +# (c) 2024 CERN for the benefit of the ACTS project +# +# Mozilla Public License Version 2.0 + +# C++17 support for CUDA requires CMake 3.18. +cmake_minimum_required(VERSION 3.18) + +# Enable CUDA as a language. +enable_language(CUDA) + +# Set the CUDA build flags. +include(detray-compiler-options-cuda) + +# Set up a benchamrk library for CUDA +add_library( + detray_benchmark_cuda + STATIC + "propagation_benchmark.hpp" + "propagation_benchmark.cu" +) + +add_library(detray::benchmark_cuda ALIAS detray_benchmark_cuda) + +target_link_libraries( + detray_benchmark_cuda + PUBLIC detray::benchmarks detray::core_array vecmem::cuda +) diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu new file mode 100644 index 000000000..fd993bbdb --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu @@ -0,0 +1,151 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#include "detray/benchmarks/device/cuda/propagation_benchmark.hpp" +#include "detray/core/detector_metadata.hpp" +#include "detray/definitions/detail/cuda_definitions.hpp" +#include "detray/detectors/toy_metadata.hpp" + +namespace detray::benchmarks { + +template +__global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( + propagation::config cfg, + typename propagator_t::detector_type::view_type det_view, + typename propagator_t::stepper_type::magnetic_field_type field_view, + typename propagator_t::actor_chain_type::state_tuple + *device_actor_state_ptr, + vecmem::data::vector_view< + free_track_parameters> + tracks_view) { + + using detector_device_t = + detector; + using algebra_t = typename detector_device_t::algebra_type; + using actor_chain_t = typename propagator_t::actor_chain_type; + using propagator_device_t = + propagator, actor_chain_t>; + + detector_device_t det(det_view); + vecmem::device_vector> tracks(tracks_view); + + int gid = threadIdx.x + blockIdx.x * blockDim.x; + if (gid >= tracks.size()) { + return; + } + + // Create propagator + propagator_device_t p{cfg}; + + // Create the actor states on a fresh copy + typename actor_chain_t::state_tuple actor_states = *device_actor_state_ptr; + auto actor_state_refs = actor_chain_t::setup_actor_states(actor_states); + + // Create the propagator state + typename propagator_device_t::state p_state(tracks.at(gid), field_view, + det); + + // Particle hypothesis + auto &ptc = p_state._stepping.particle_hypothesis(); + p_state.set_particle(update_particle_hypothesis(ptc, tracks.at(gid))); + + // Run propagation + if constexpr (kOPT == detray::benchmarks::propagation_opt::e_unsync) { + p.propagate(p_state, actor_state_refs); + } else if constexpr (kOPT == detray::benchmarks::propagation_opt::e_sync) { + p.propagate_sync(p_state, actor_state_refs); + } +} + +template +typename propagator_t::actor_chain_type::state_tuple *setup_actor_states( + typename propagator_t::actor_chain_type::state_tuple *input_actor_states) { + + // Copy the actor state blueprint to the device + using actor_state_t = typename propagator_t::actor_chain_type::state_tuple; + actor_state_t *device_actor_state_ptr{nullptr}; + + cudaError_t success = + cudaMalloc((void **)&device_actor_state_ptr, sizeof(actor_state_t)); + assert(success == cudaSuccess); + + success = cudaMemcpy(device_actor_state_ptr, input_actor_states, + sizeof(actor_state_t), cudaMemcpyHostToDevice); + assert(success == cudaSuccess); + + return device_actor_state_ptr; +} + +template +void release_actor_states(typename propagator_t::actor_chain_type::state_tuple + *device_actor_state_ptr) { + [[maybe_unused]] cudaError_t success = cudaFree(device_actor_state_ptr); + assert(success == cudaSuccess); +} + +template +void run_propagation_kernel( + const propagation::config &cfg, + typename propagator_t::detector_type::view_type det_view, + typename propagator_t::stepper_type::magnetic_field_type field_view, + typename propagator_t::actor_chain_type::state_tuple + *device_actor_state_ptr, + vecmem::data::vector_view< + free_track_parameters> + tracks_view, + const int n_samples) { + + constexpr int thread_dim = 256; + int block_dim = (n_samples + thread_dim - 1) / thread_dim; + + // run the test kernel + propagator_benchmark_kernel<<>>( + cfg, det_view, field_view, device_actor_state_ptr, tracks_view); + + // cuda error check + DETRAY_CUDA_ERROR_CHECK(cudaGetLastError()); + DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize()); +} + +/// Macro declaring the template instantiations for the different detector types +#define DECLARE_PROPAGATION_BENCHMARK(METADATA, CHAIN, FIELD, OPT) \ + \ + template void \ + run_propagation_kernel, OPT>( \ + const propagation::config &, detector::view_type, \ + covfie::field_view, \ + cuda_propagator_type::actor_chain_type::state_tuple *, \ + vecmem::data::vector_view< \ + free_track_parameters::algebra_type>>, \ + const int); \ + \ + template cuda_propagator_type::actor_chain_type::state_tuple * \ + setup_actor_states>( \ + cuda_propagator_type::actor_chain_type::state_tuple *); \ + \ + template void \ + release_actor_states>( \ + cuda_propagator_type::actor_chain_type::state_tuple *); + +DECLARE_PROPAGATION_BENCHMARK(default_metadata, empty_chain, + bfield::const_bknd_t, + detray::benchmarks::propagation_opt::e_unsync) +DECLARE_PROPAGATION_BENCHMARK(default_metadata, default_chain, + bfield::const_bknd_t, + detray::benchmarks::propagation_opt::e_unsync) +DECLARE_PROPAGATION_BENCHMARK(toy_metadata, empty_chain, bfield::const_bknd_t, + detray::benchmarks::propagation_opt::e_unsync) +DECLARE_PROPAGATION_BENCHMARK(toy_metadata, default_chain, bfield::const_bknd_t, + detray::benchmarks::propagation_opt::e_unsync) + +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp new file mode 100644 index 000000000..9442ea810 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp @@ -0,0 +1,179 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s) +#include "detray/definitions/detail/algebra.hpp" +#include "detray/detectors/bfield.hpp" +#include "detray/navigation/navigator.hpp" +#include "detray/propagator/actor_chain.hpp" +#include "detray/propagator/actors/aborters.hpp" +#include "detray/propagator/actors/parameter_resetter.hpp" +#include "detray/propagator/actors/parameter_transporter.hpp" +#include "detray/propagator/actors/pointwise_material_interactor.hpp" +#include "detray/propagator/propagator.hpp" +#include "detray/propagator/rk_stepper.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray benchmark include(s) +#include "detray/benchmarks/benchmark_base.hpp" +#include "detray/benchmarks/propagation_benchmark_config.hpp" +#include "detray/benchmarks/propagation_benchmark_utils.hpp" + +// Vecmem include(s) +#include +#include +#include +#include + +// Benchmark include +#include + +// System include(s) +#include +#include +#include +#include + +namespace detray::benchmarks { + +// Define propagator type +template +using empty_chain = actor_chain<>; + +template +using default_chain = actor_chain, + pointwise_material_interactor, + parameter_resetter>; + +template class actor_chain_t> +using cuda_propagator_type = + propagator, + typename detector::algebra_type>, + navigator>, + actor_chain_t::algebra_type>>; + +/// Launch the propagation kernelfor benchmarking +/// +/// @param cfg the propagation configuration +/// @param det_view the detector vecmem view +/// @param field_data the magentic field view (maybe an empty field) +/// @param tracks_data the track collection view +/// @param navigation_cache_view the navigation cache vecemem view +/// @param opt which propagation to run (sync vs. unsync) +template +void run_propagation_kernel( + const propagation::config &, + typename propagator_t::detector_type::view_type, + typename propagator_t::stepper_type::magnetic_field_type, + typename propagator_t::actor_chain_type::state_tuple *, + vecmem::data::vector_view< + free_track_parameters>, + const int); + +/// Allocate actor state blueprint on device +template +typename propagator_t::actor_chain_type::state_tuple *setup_actor_states( + typename propagator_t::actor_chain_type::state_tuple *); + +/// Release actor state blueprint +template +void release_actor_states( + typename propagator_t::actor_chain_type::state_tuple *); + +/// Device Propagation becnhmark +template +struct cuda_propagation_bm : public benchmark_base { + /// Detector dependent types + using algebra_t = typename propagator_t::detector_type::algebra_type; + using scalar_t = dscalar; + using vector3_t = dvector3D; + + /// Local configuration type + using configuration = propagation_benchmark_config; + + /// The benchmark configuration + configuration m_cfg{}; + + /// Default construction + cuda_propagation_bm() = default; + + /// Construct from an externally provided configuration @param cfg + explicit cuda_propagation_bm(const configuration &cfg) : m_cfg{cfg} {} + + /// @return the benchmark configuration + configuration &config() { return m_cfg; } + + /// Prepare data and run benchmark loop + inline void operator()(::benchmark::State &state, + vecmem::memory_resource *dev_mr, + dvector> *tracks, + const typename propagator_t::detector_type *det, + const bfield_bknd_t *bfield, + typename propagator_t::actor_chain_type::state_tuple + *input_actor_states) const { + + assert(dev_mr != nullptr); + assert(tracks != nullptr); + assert(det != nullptr); + assert(bfield != nullptr); + assert(input_actor_states != nullptr); + + // Helper object for performing memory copies (to CUDA devices) + vecmem::cuda::copy cuda_cpy; + + const int n_samples{m_cfg.benchmark().n_samples()}; + const int n_warmup{m_cfg.benchmark().n_warmup()}; + + assert(static_cast(n_samples) <= tracks->size()); + + // Copy the track collection to device + auto track_buffer = + detray::get_buffer(vecmem::get_data(*tracks), *dev_mr, cuda_cpy); + + // Copy the detector to device and get its view + auto det_buffer = detray::get_buffer(*det, *dev_mr, cuda_cpy); + auto det_view = detray::get_data(det_buffer); + + // Copy blueprint actor states to device + auto *device_actor_state_ptr = + setup_actor_states(input_actor_states); + + // Do a small warm up run + { + auto warmup_track_buffer = detray::get_buffer( + vecmem::get_data(*tracks), *dev_mr, cuda_cpy); + run_propagation_kernel( + m_cfg.propagation(), det_view, *bfield, device_actor_state_ptr, + warmup_track_buffer, math::min(n_warmup, n_samples)); + } + + std::size_t total_tracks = 0u; + for (auto _ : state) { + // Launch the propagator test for GPU device + run_propagation_kernel( + m_cfg.propagation(), det_view, *bfield, device_actor_state_ptr, + track_buffer, n_samples); + + total_tracks += static_cast(n_samples); + } + + // Report throughput + state.counters["TracksPropagated"] = benchmark::Counter( + static_cast(total_tracks), benchmark::Counter::kIsRate); + + release_actor_states(device_actor_state_ptr); + } +}; + +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp new file mode 100644 index 000000000..32dbcc617 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp @@ -0,0 +1,57 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s) +#include "detray/benchmarks/benchmark_base.hpp" +#include "detray/propagator/propagation_config.hpp" + +// System include(s) +#include +#include + +namespace detray::benchmarks { + +/// Configuration for propagation benchmarks +struct propagation_benchmark_config { + /// Prefix for the benchmark name + std::string m_name{"BM_PROPAGATION"}; + /// Benchmark configuration + benchmark_base::configuration m_benchmark{}; + /// Propagation configuration + propagation::config m_propagation{}; + + /// Default construciton + propagation_benchmark_config() = default; + + /// Construct from a base configuration + explicit propagation_benchmark_config( + const benchmark_base::configuration& bench_cfg) + : m_benchmark(bench_cfg) {} + + /// Getters + /// @{ + const std::string& name() const { return m_name; } + const propagation::config& propagation() const { return m_propagation; } + propagation::config& propagation() { return m_propagation; } + const benchmark_base::configuration& benchmark() const { + return m_benchmark; + } + benchmark_base::configuration& benchmark() { return m_benchmark; } + /// @} + + /// Setters + /// @{ + propagation_benchmark_config& name(const std::string_view n) { + m_name = n; + return *this; + } + /// @} +}; + +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp new file mode 100644 index 000000000..89bbb6bb2 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp @@ -0,0 +1,223 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s) +#include "detray/definitions/detail/algebra.hpp" +#include "detray/navigation/navigator.hpp" +#include "detray/propagator/actor_chain.hpp" +#include "detray/propagator/propagator.hpp" +#include "detray/tracks/tracks.hpp" +#include "detray/utils/tuple.hpp" + +// Vecmem include(s) +#include + +// Benchmark include +#include + +// System include(s) +#include +#include +#include + +namespace detray::benchmarks { + +/// Which propagate function to run +enum class propagation_opt { + e_unsync = 0, + e_sync = 1, +}; + +/// @returns the default track generation configuration for detray benchmarks +template +inline typename track_generator_t::configuration get_default_trk_gen_config( + const std::vector &n_tracks) { + + using track_t = typename track_generator_t::track_type; + using scalar_t = dscalar; + + int n_trks{*std::ranges::max_element(n_tracks)}; + + // Generate tracks + typename track_generator_t::configuration trk_cfg{}; + trk_cfg.n_tracks(static_cast(n_trks)); + trk_cfg.randomize_charge(true); + trk_cfg.phi_range(-constant::pi, constant::pi); + trk_cfg.eta_range(-3.f, 3.f); + trk_cfg.mom_range(1.f * unit::GeV, 100.f * unit::GeV); + trk_cfg.origin({0.f, 0.f, 0.f}); + trk_cfg.origin_stddev({0.f * unit::mm, 0.f * unit::mm, + 0.f * unit::mm}); + + return trk_cfg; +} + +/// Precompute the tracks +/// +/// @param mr memory resource to allocate the track vector +/// @param cfg the configuration of the track generator +/// @param do_sort sort the tracks by theta angle +template +inline auto generate_tracks( + vecmem::memory_resource *mr, + const typename track_generator_t::configuration &cfg = {}, + bool do_sort = true) { + + using track_t = typename track_generator_t::track_type; + using scalar_t = dscalar; + + // Track collection + dvector tracks(mr); + + // Iterate through uniformly distributed momentum directions + for (auto track : track_generator_t{cfg}) { + // Put it into vector of trajectories + tracks.push_back(track); + } + + if (do_sort) { + // Sort by theta angle + const auto traj_comp = [](const auto &lhs, const auto &rhs) { + constexpr auto pi_2{constant::pi_2}; + return math::fabs(pi_2 - vector::theta(lhs.dir())) < + math::fabs(pi_2 - vector::theta(rhs.dir())); + }; + + std::ranges::sort(tracks, traj_comp); + } + + return tracks; +} + +/// Generate as many samples of track states as there are entries in the +/// @param n_tracks vector. +template +inline auto generate_track_samples( + vecmem::memory_resource *mr, const std::vector &n_tracks, + typename track_generator_t::configuration &cfg = {}, bool do_sort = true) { + + using track_t = typename track_generator_t::track_type; + + std::vector> track_samples{}; + track_samples.reserve(n_tracks.size()); + + auto tmp_cfg{cfg}; + for (const int n : n_tracks) { + tmp_cfg.n_tracks(static_cast(n)); + track_samples.push_back( + generate_tracks(mr, tmp_cfg, do_sort)); + } + + return track_samples; +} + +/// Register a propagation benchmark of type @tparam benchmark_t +/// +/// @tparam benchmark_t the propagation benchmark functor +/// @tparam propagator_t full propagator type +/// @tparam detector_t host detector type +/// @tparam bfield_t covfie magnetic field type +/// +/// @param name name for the benchmark +/// @param bench_cfg basic benchmark configuration +/// @param prop_cfg propagation configuration +/// @param det the detector +/// @param bfield the covfie field +/// @param actor_states tuple that contains all actor states (same order as in +/// actor_chain_t) +/// @param tracks the pre-computed test tracks +/// @param n_samples the number of track to run +template