diff --git a/tests/benchmarks/cpu/propagation.cpp b/tests/benchmarks/cpu/propagation.cpp index 2c638e746..c63dd978f 100644 --- a/tests/benchmarks/cpu/propagation.cpp +++ b/tests/benchmarks/cpu/propagation.cpp @@ -40,6 +40,7 @@ int main(int argc, char** argv) { using test_algebra = typename toy_detector_t::algebra_type; using scalar = dscalar; using vector3 = dvector3D; + using free_track_parameters_t = free_track_parameters; using uniform_gen_t = detail::random_numbers>; @@ -92,11 +93,12 @@ int main(int argc, char** argv) { n_tracks); // Specific configuration for the random track generation - trk_cfg.seed(42u); + trk_cfg.seed(detail::random_numbers::default_seed()); // Add additional tracks for warmup bench_cfg.n_warmup(static_cast( std::ceil(0.1f * static_cast(trk_cfg.n_tracks())))); + bench_cfg.do_warmup(true); // // Prepare data diff --git a/tests/benchmarks/cuda/CMakeLists.txt b/tests/benchmarks/cuda/CMakeLists.txt index a101f0854..b8f19d9b5 100644 --- a/tests/benchmarks/cuda/CMakeLists.txt +++ b/tests/benchmarks/cuda/CMakeLists.txt @@ -15,20 +15,17 @@ enable_language(CUDA) # Set the CUDA build flags. include(detray-compiler-options-cuda) -# Look for openMP, which is used for the CPU benchmark -find_package(OpenMP) - -# make unit tests for multiple algebras -# Currently vc and smatrix is not supported -set(algebras "array") +# Build benchmarks for multiple algebra plugins +# Currently vc and smatrix is not supported on device +set(algebra_plugins "array") if(DETRAY_EIGEN_PLUGIN) - list(APPEND algebras "eigen") + list(APPEND algebra_plugins "eigen") endif() -foreach(algebra ${algebras}) +foreach(algebra ${algebra_plugins}) detray_add_executable(benchmark_cuda_propagation_${algebra} "propagation.cpp" - LINK_LIBRARIES detray::benchmark_cuda detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils + LINK_LIBRARIES detray::benchmark_cuda_${algebra} detray::core_${algebra} vecmem::cuda detray::test_utils ) target_compile_definitions( @@ -40,11 +37,4 @@ foreach(algebra ${algebras}) detray_benchmark_cuda_propagation_${algebra} PRIVATE "-march=native" "-ftree-vectorize" ) - - if(OpenMP_CXX_FOUND) - target_link_libraries( - detray_benchmark_cuda_propagation_${algebra} - PRIVATE OpenMP::OpenMP_CXX - ) - endif() endforeach() diff --git a/tests/benchmarks/cuda/propagation.cpp b/tests/benchmarks/cuda/propagation.cpp index ef5721395..9ec9e004b 100644 --- a/tests/benchmarks/cuda/propagation.cpp +++ b/tests/benchmarks/cuda/propagation.cpp @@ -41,6 +41,7 @@ int main(int argc, char** argv) { using test_algebra = typename toy_detector_t::algebra_type; using scalar = dscalar; using vector3 = dvector3D; + using free_track_parameters_t = free_track_parameters; using uniform_gen_t = detail::random_numbers>; @@ -87,11 +88,12 @@ int main(int argc, char** argv) { n_tracks); // Specific configuration for the random track generation - trk_cfg.seed(42u); + trk_cfg.seed(detail::random_numbers::default_seed()); // Add additional tracks for warmup bench_cfg.n_warmup(static_cast( std::ceil(0.1f * static_cast(trk_cfg.n_tracks())))); + bench_cfg.do_warmup(true); // // Prepare data diff --git a/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp index 94c78c950..54a876cb0 100644 --- a/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp +++ b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp @@ -1,6 +1,6 @@ /** Detray library, part of the ACTS project (R&D line) * - * (c) 2023-2024 CERN for the benefit of the ACTS project + * (c) 2023-2025 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -52,12 +52,12 @@ struct host_propagation_bm : public benchmark_base { configuration &config() { return m_cfg; } /// Prepare data and run benchmark loop - inline void operator()(::benchmark::State &state, - dvector> *tracks, - const typename propagator_t::detector_type *det, - const bfield_t *bfield, - typename propagator_t::actor_chain_type::state_tuple - *input_actor_states) const { + inline void operator()( + ::benchmark::State &state, + const dvector> *tracks, + const typename propagator_t::detector_type *det, const bfield_t *bfield, + const typename propagator_t::actor_chain_type::state_tuple + *input_actor_states) const { using actor_chain_t = typename propagator_t::actor_chain_type; using actor_states_t = typename actor_chain_t::state_tuple; @@ -76,7 +76,8 @@ struct host_propagation_bm : public benchmark_base { // Call the host propagation auto run_propagation = [&p, det, bfield, input_actor_states]( - free_track_parameters &track) { + const free_track_parameters + &track) { // Fresh copy of actor states actor_states_t actor_states(*input_actor_states); // Tuple of references to pass to the propagator @@ -103,17 +104,26 @@ struct host_propagation_bm : public benchmark_base { // Warm-up if (m_cfg.benchmark().do_warmup()) { assert(n_warmup > 0); - auto stride{n_samples / n_warmup}; + int stride{n_samples / n_warmup}; stride = (stride == 0) ? 10 : stride; assert(stride > 0); #pragma omp parallel for schedule(dynamic) for (int i = 0; i < n_samples; i += stride) { + // The track gets copied into the stepper state, so that the + // original track sample vector remains unchanged run_propagation((*tracks)[static_cast(i)]); } + } else { + std::cout << "WARNING: Running host benchmarks without warmup" + << std::endl; } // Run the benchmark + + // Calculate the propagation rate + // @see + // https://github.com/google/benchmark/blob/main/docs/user_guide.md#custom-counters std::size_t total_tracks = 0u; for (auto _ : state) { #pragma omp parallel for schedule(dynamic) diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt index b43cd29c6..e7fe1adbc 100644 --- a/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt @@ -13,17 +13,34 @@ enable_language(CUDA) # Set the CUDA build flags. include(detray-compiler-options-cuda) +# Build benchmark library for multiple algebra plugins to create correct +# template instantiations +# Currently vc and smatrix is not supported on device +set(algebra_plugins "array") +if(DETRAY_EIGEN_PLUGIN) + list(APPEND algebra_plugins "eigen") +endif() + # Set up a benchamrk library for CUDA -add_library( - detray_benchmark_cuda - STATIC - "propagation_benchmark.hpp" - "propagation_benchmark.cu" -) +foreach(algebra ${algebra_plugins}) + add_library( + detray_benchmark_cuda_${algebra} + STATIC + "propagation_benchmark.hpp" + "propagation_benchmark.cu" + ) -add_library(detray::benchmark_cuda ALIAS detray_benchmark_cuda) + add_library( + detray::benchmark_cuda_${algebra} + ALIAS detray_benchmark_cuda_${algebra} + ) -target_link_libraries( - detray_benchmark_cuda - PUBLIC vecmem::cuda detray::benchmarks detray::test_utils detray::core_array -) + target_link_libraries( + detray_benchmark_cuda_${algebra} + PUBLIC + vecmem::cuda + detray::benchmarks + detray::test_utils + detray::core_${algebra} + ) +endforeach() diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu index 200d9c295..ba45520ff 100644 --- a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu @@ -1,6 +1,6 @@ /** Detray library, part of the ACTS project (R&D line) * - * (c) 2022-2024 CERN for the benefit of the ACTS project + * (c) 2022-2025 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -15,7 +15,7 @@ __global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( propagation::config cfg, typename propagator_t::detector_type::view_type det_view, typename propagator_t::stepper_type::magnetic_field_type field_view, - typename propagator_t::actor_chain_type::state_tuple + const typename propagator_t::actor_chain_type::state_tuple *device_actor_state_ptr, vecmem::data::vector_view< free_track_parameters> @@ -30,8 +30,9 @@ __global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( propagator, actor_chain_t>; - detector_device_t det(det_view); - vecmem::device_vector> tracks(tracks_view); + const detector_device_t det(det_view); + const vecmem::device_vector> tracks( + tracks_view); int gid = threadIdx.x + blockIdx.x * blockDim.x; if (gid >= tracks.size()) { @@ -46,6 +47,9 @@ __global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( auto actor_state_refs = actor_chain_t::setup_actor_states(actor_states); // Create the propagator state + + // The track gets copied into the stepper state, so that the + // original track sample vector remains unchanged typename propagator_device_t::state p_state(tracks.at(gid), field_view, det); diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp index f5ef5e199..e7590cf7a 100644 --- a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp @@ -1,6 +1,6 @@ /** Detray library, part of the ACTS project (R&D line) * - * (c) 2024 CERN for the benefit of the ACTS project + * (c) 2024-2025 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -40,6 +40,7 @@ // System include(s) #include #include +#include #include #include @@ -85,6 +86,7 @@ void run_propagation_kernel( const int); /// Allocate actor state blueprint on device +/// @note This only works if each actor state in the tuple is essentially POD template typename propagator_t::actor_chain_type::state_tuple *setup_actor_states( typename propagator_t::actor_chain_type::state_tuple *); @@ -155,14 +157,22 @@ struct cuda_propagation_bm : public benchmark_base { setup_actor_states(input_actor_states); // Do a small warm up run - { + if (m_cfg.benchmark().do_warmup()) { auto warmup_track_buffer = detray::get_buffer( vecmem::get_data(*tracks), *dev_mr, cuda_cpy); + run_propagation_kernel( m_cfg.propagation(), det_view, *bfield, device_actor_state_ptr, warmup_track_buffer, math::min(n_warmup, n_samples)); + } else { + std::cout << "WARNING: Running CUDA benchmarks without warmup is " + "not recommended" + << std::endl; } + // Calculate the propagation rate + // @see + // https://github.com/google/benchmark/blob/main/docs/user_guide.md#custom-counters std::size_t total_tracks = 0u; for (auto _ : state) { // Launch the propagator test for GPU device diff --git a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp index e3c4bffd8..1cb559205 100644 --- a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp +++ b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp @@ -171,19 +171,19 @@ inline void register_benchmark( std::cout << bench_name << "\n" << bench_cfg; - // Cpu benchmark if constexpr (std::is_invocable_v< decltype(prop_benchmark), ::benchmark::State &, dvector> *, const detector_t *, const bfield_bknd_t *, typename propagator_t::actor_chain_type::state_tuple *>) { + // Cpu benchmark ::benchmark::RegisterBenchmark(bench_name.c_str(), prop_benchmark, &tracks, &det, &bfield, actor_states); //->MeasureProcessCPUTime(); } else { - + // Device benchmark ::benchmark::RegisterBenchmark(bench_name.c_str(), prop_benchmark, dev_mr, &tracks, &det, &bfield, actor_states); diff --git a/tests/tools/CMakeLists.txt b/tests/tools/CMakeLists.txt index 3e4fb870c..544f49168 100644 --- a/tests/tools/CMakeLists.txt +++ b/tests/tools/CMakeLists.txt @@ -21,7 +21,6 @@ target_link_libraries( INTERFACE Boost::program_options vecmem::core - detray::core_array detray::test_common detray::io detray::csv_io diff --git a/tests/tools/include/detray/options/propagation_options.hpp b/tests/tools/include/detray/options/propagation_options.hpp index 7e634a323..ffa193332 100644 --- a/tests/tools/include/detray/options/propagation_options.hpp +++ b/tests/tools/include/detray/options/propagation_options.hpp @@ -42,7 +42,7 @@ void add_options( "mask_tolerance_scalor", boost::program_options::value()->default_value( cfg.mask_tolerance_scalor), - "Mask tolerance scaling")( + "Mask tolerance scale factor")( "overstep_tolerance", boost::program_options::value()->default_value( cfg.overstep_tolerance / unit::um), @@ -172,6 +172,7 @@ void configure_options( cfg.path_limit = path_limit * unit::m; } + cfg.do_covariance_transport = false; if (vm.count("covariance_transport")) { cfg.do_covariance_transport = true; } diff --git a/tests/tools/include/detray/options/track_generator_options.hpp b/tests/tools/include/detray/options/track_generator_options.hpp index 346803ef0..f085f23ed 100644 --- a/tests/tools/include/detray/options/track_generator_options.hpp +++ b/tests/tools/include/detray/options/track_generator_options.hpp @@ -41,6 +41,9 @@ void add_uniform_track_gen_options( boost::program_options::value()->default_value( cfg.eta_steps()), "No. eta steps for particle gun")( + "random_seed", + boost::program_options::value()->default_value(cfg.seed()), + "Seed for the random number generator")( "eta_range", boost::program_options::value>()->multitoken(), "Min, Max range of eta values for particle gun")( @@ -48,14 +51,12 @@ void add_uniform_track_gen_options( "origin", boost::program_options::value>()->multitoken(), "Coordintates for particle gun origin position [mm]")( - "p_tot", - boost::program_options::value()->default_value( - static_cast(cfg.m_p_mag) / unit::GeV), - "Total momentum of the test particle [GeV]")( - "p_T", - boost::program_options::value()->default_value( - static_cast(cfg.m_p_mag) / unit::GeV), - "Transverse momentum of the test particle [GeV]"); + "p_range", + boost::program_options::value>()->multitoken(), + "Total momentum [range] of the test particles [GeV]")( + "pT_range", + boost::program_options::value>()->multitoken(), + "Transverse momentum [range] of the test particles [GeV]"); } /// Add options for detray event generation @@ -66,6 +67,7 @@ void configure_uniform_track_gen_options( cfg.phi_steps(vm["phi_steps"].as()); cfg.eta_steps(vm["eta_steps"].as()); + cfg.seed(vm["random_seed"].as()); cfg.randomize_charge(vm.count("randomize_charge")); if (vm.count("eta_range")) { @@ -87,15 +89,47 @@ void configure_uniform_track_gen_options( "Particle gun origin needs three arguments"); } } - if (!vm["p_T"].defaulted() && !vm["p_tot"].defaulted()) { + if (vm.count("pT_range") && vm.count("p_range")) { throw std::invalid_argument( "Transverse and total momentum cannot be specified at the same " "time"); } - if (!vm["p_T"].defaulted()) { - cfg.p_T(vm["p_T"].as() * unit::GeV); + if (vm.count("pT_range")) { + const auto pt_range = vm["pT_range"].as>(); + + // Default + if (pt_range.empty()) { + cfg.p_T(static_cast(cfg.m_p_mag) * unit::GeV); + } else if (pt_range.size() == 1u) { + cfg.p_T(pt_range[0] * unit::GeV); + } else if (pt_range.size() == 2u) { + std::cout << "WARNING: Momentum range not possible with uniform " + "track generator: Using first value." + << std::endl; + cfg.p_T(pt_range[0] * unit::GeV); + } else { + throw std::invalid_argument( + "Wrong number of arguments for pT range: Need one argument or " + "range"); + } } else { - cfg.p_tot(vm["p_tot"].as() * unit::GeV); + const auto p_range = vm["p_range"].as>(); + + // Default + if (p_range.empty()) { + cfg.p_tot(static_cast(cfg.m_p_mag) * unit::GeV); + } else if (p_range.size() == 1u) { + cfg.p_tot(p_range[0] * unit::GeV); + } else if (p_range.size() == 2u) { + std::cout << "WARNING: Momentum range not possible with uniform " + "track generator: Using first value." + << std::endl; + cfg.p_tot(p_range[0] * unit::GeV); + } else { + throw std::invalid_argument( + "Wrong number of arguments for p_tot range: Need one argument " + "or range"); + } } } @@ -110,24 +144,25 @@ void add_rnd_track_gen_options( boost::program_options::value()->default_value( cfg.n_tracks()), "No. of tracks for particle gun")( + "random_seed", + boost::program_options::value()->default_value(cfg.seed()), + "Seed for the random number generator")( "theta_range", boost::program_options::value>()->multitoken(), - "Min, Max range of theta values for particle gun")( + "Min, Max range of theta values for particle gun. Interval in [0, pi)")( "eta_range", boost::program_options::value>()->multitoken(), "Min, Max range of eta values for particle gun")( "randomize_charge", "Randomly flip charge sign per track")( "origin", boost::program_options::value>()->multitoken(), - "Coordintates for particle gun origin position")( - "p_tot", - boost::program_options::value()->default_value( - static_cast(cfg.mom_range()[0]) / unit::GeV), - "Total momentum of the test particle [GeV]")( - "p_T", - boost::program_options::value()->default_value( - static_cast(cfg.mom_range()[0]) / unit::GeV), - "Transverse momentum of the test particle [GeV]"); + "Coordintates for particle gun origin position [mm]")( + "p_range", + boost::program_options::value>()->multitoken(), + "Total momentum [range] of the test particles [GeV]")( + "pT_range", + boost::program_options::value>()->multitoken(), + "Transverse momentum [range] of the test particles [GeV]"); } /// Add options for detray event generation @@ -137,6 +172,7 @@ void configure_rnd_track_gen_options( random_track_generator_config &cfg) { cfg.n_tracks(vm["n_tracks"].as()); + cfg.seed(vm["random_seed"].as()); cfg.randomize_charge(vm.count("randomize_charge")); if (vm.count("eta_range") && vm.count("theta_range")) { @@ -178,15 +214,43 @@ void configure_rnd_track_gen_options( "Particle gun origin needs three coordinates"); } } - if (!vm["p_T"].defaulted() && !vm["p_tot"].defaulted()) { + if (vm.count("pT_range") && vm.count("p_range")) { throw std::invalid_argument( "Transverse and total momentum cannot be specified at the same " "time"); } - if (!vm["p_T"].defaulted()) { - cfg.p_T(vm["p_T"].as() * unit::GeV); + if (vm.count("pT_range")) { + const auto pt_range = vm["pT_range"].as>(); + + // Default + if (pt_range.empty()) { + cfg.p_T(cfg.mom_range()[0] * unit::GeV); + } else if (pt_range.size() == 1u) { + cfg.p_T(pt_range[0] * unit::GeV); + } else if (pt_range.size() == 2u) { + cfg.pT_range(pt_range[0] * unit::GeV, + pt_range[1] * unit::GeV); + } else { + throw std::invalid_argument( + "Wrong number of arguments for pT range: Need one argument or " + "range"); + } } else { - cfg.p_tot(vm["p_tot"].as() * unit::GeV); + const auto p_range = vm["p_range"].as>(); + + // Default + if (p_range.empty()) { + cfg.p_tot(cfg.mom_range()[0] * unit::GeV); + } else if (p_range.size() == 1u) { + cfg.p_tot(p_range[0] * unit::GeV); + } else if (p_range.size() == 2u) { + cfg.mom_range(p_range[0] * unit::GeV, + p_range[1] * unit::GeV); + } else { + throw std::invalid_argument( + "Wrong number of arguments for p_tot range: Need one argument " + "or range"); + } } } diff --git a/tests/tools/src/cpu/CMakeLists.txt b/tests/tools/src/cpu/CMakeLists.txt index a6e3f2d5a..8c63a8416 100644 --- a/tests/tools/src/cpu/CMakeLists.txt +++ b/tests/tools/src/cpu/CMakeLists.txt @@ -32,7 +32,7 @@ detray_add_executable(generate_telescope_detector # Build the visualization executable. detray_add_executable(detector_display "detector_display.cpp" - LINK_LIBRARIES Boost::program_options detray::tools + LINK_LIBRARIES Boost::program_options detray::core_array detray::tools detray::svgtools ) @@ -40,7 +40,7 @@ detray_add_executable(detector_display detray_add_executable(detector_validation "detector_validation.cpp" LINK_LIBRARIES GTest::gtest GTest::gtest_main - Boost::program_options detray::tools detray::test_utils + Boost::program_options detray::core_array detray::tools detray::test_utils detray::svgtools ) @@ -48,6 +48,49 @@ detray_add_executable(detector_validation detray_add_executable(material_validation "material_validation.cpp" LINK_LIBRARIES GTest::gtest GTest::gtest_main - Boost::program_options detray::tools detray::test_utils + Boost::program_options detray::core_array detray::tools detray::test_utils detray::svgtools ) + +if(DETRAY_BUILD_BENCHMARKS) + # Look for openMP, which is used for the CPU propagation benchmark + find_package(OpenMP) + + # Build the propagation benchmark executable. + macro(detray_add_propagation_benchmark algebra) + detray_add_executable(propagation_benchmark_cpu_${algebra} + "propagation_benchmark.cpp" + LINK_LIBRARIES Boost::program_options benchmark::benchmark benchmark::benchmark_main vecmem::core detray::benchmark_cpu detray::core_${algebra} detray::tools detray::detectors + ) + + target_compile_options( + detray_propagation_benchmark_cpu_${algebra} + PRIVATE "-march=native" "-ftree-vectorize" + ) + + if(OpenMP_CXX_FOUND) + target_link_libraries( + detray_propagation_benchmark_cpu_${algebra} + PRIVATE OpenMP::OpenMP_CXX + ) + endif() + endmacro() + + # Build the array benchmark. + detray_add_propagation_benchmark( array ) + + # Build the Eigen benchmark executable. + if(DETRAY_EIGEN_PLUGIN) + detray_add_propagation_benchmark( eigen ) + endif() + + # Build the SMatrix benchmark executable. + if(DETRAY_SMATRIX_PLUGIN) + detray_add_propagation_benchmark( smatrix ) + endif() + + # Build the Vc benchmark executable. + if(DETRAY_VC_AOS_PLUGIN) + detray_add_propagation_benchmark( vc_aos ) + endif() +endif() diff --git a/tests/tools/src/cpu/propagation_benchmark.cpp b/tests/tools/src/cpu/propagation_benchmark.cpp new file mode 100644 index 000000000..3040d328b --- /dev/null +++ b/tests/tools/src/cpu/propagation_benchmark.cpp @@ -0,0 +1,162 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Project include(s) +#include "detray/detectors/bfield.hpp" +#include "detray/navigation/navigator.hpp" +#include "detray/propagator/actor_chain.hpp" +#include "detray/propagator/actors/aborters.hpp" +#include "detray/propagator/actors/parameter_resetter.hpp" +#include "detray/propagator/actors/parameter_transporter.hpp" +#include "detray/propagator/actors/pointwise_material_interactor.hpp" +#include "detray/propagator/rk_stepper.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray IO include(s) +#include "detray/io/frontend/detector_reader.hpp" + +// Detray benchmark include(s) +#include "detray/benchmarks/cpu/propagation_benchmark.hpp" + +// Detray test include(s). +#include "detray/test/utils/simulation/event_generator/track_generators.hpp" +#include "detray/test/utils/types.hpp" + +// Detray tools include(s) +#include "detray/options/detector_io_options.hpp" +#include "detray/options/parse_options.hpp" +#include "detray/options/propagation_options.hpp" +#include "detray/options/track_generator_options.hpp" + +// Vecmem include(s) +#include + +// System include(s) +#include +#include + +namespace po = boost::program_options; + +using namespace detray; + +int main(int argc, char** argv) { + + // Use the most general type to be able to read in all detector files + using detector_t = detray::detector; + using test_algebra = typename detector_t::algebra_type; + using scalar = dscalar; + using vector3 = dvector3D; + + using free_track_parameters_t = free_track_parameters; + using uniform_gen_t = + detail::random_numbers>; + using track_generator_t = + random_track_generator; + + using field_t = bfield::const_field_t; + using stepper_t = rk_stepper; + using empty_chain_t = actor_chain<>; + using default_chain = + actor_chain, + pointwise_material_interactor, + parameter_resetter>; + + // Host memory resource + vecmem::host_memory_resource host_mr; + + // Constant magnetic field + vector3 B{0.f, 0.f, 2.f * unit::T}; + + // Number of tracks in the different benchmark cases + std::vector n_tracks{8 * 8, 16 * 16, 32 * 32, 64 * 64, + 128 * 128, 256 * 256, 512 * 512}; + + // + // Configuration + // + + // Specific options for this test + po::options_description desc("\ndetray propagation benchmark options"); + + desc.add_options()("context", po::value(), + "Index of the geometry context")( + "sort_tracks", "Sort track samples by theta angle"); + + // Configs to be filled + detray::io::detector_reader_config reader_cfg{}; + track_generator_t::configuration trk_cfg{}; + propagation::config prop_cfg{}; + detray::benchmarks::benchmark_base::configuration bench_cfg{}; + + // Read options from commandline + po::variables_map vm = detray::options::parse_options( + desc, argc, argv, reader_cfg, trk_cfg, prop_cfg); + + // Custom options + bool do_sort{(vm.count("sort_tracks") != 0)}; + + // The geometry context to be used + detector_t::geometry_context gctx; + if (vm.count("context")) { + gctx = detector_t::geometry_context{vm["context"].as()}; + } + + // + // Prepare data + // + + // Read the detector geometry + reader_cfg.do_check(true); + + const auto [det, names] = + detray::io::read_detector(host_mr, reader_cfg); + const std::string& det_name = det.name(names); + + // Generate the track samples + auto track_samples = + detray::benchmarks::generate_track_samples( + &host_mr, n_tracks, trk_cfg, do_sort); + + // Create a constant b-field + auto bfield = bfield::create_const_field(B); + + // Build actor states + dtuple<> empty_state{}; + + parameter_transporter::state transporter_state{}; + pointwise_material_interactor::state interactor_state{}; + parameter_resetter::state resetter_state{}; + + auto actor_states = detail::make_tuple( + transporter_state, interactor_state, resetter_state); + + // + // Register benchmarks + // + + // Number of warmup tracks + const int n_max_tracks{*std::ranges::max_element(n_tracks)}; + bench_cfg.n_warmup( + static_cast(std::ceil(0.1f * static_cast(n_max_tracks)))); + + if (prop_cfg.stepping.do_covariance_transport) { + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, default_chain>( + det_name + "_W_COV_TRANSPORT", bench_cfg, prop_cfg, det, bfield, + &actor_states, track_samples, n_tracks); + } else { + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, empty_chain_t>( + det_name, bench_cfg, prop_cfg, det, bfield, &empty_state, + track_samples, n_tracks); + } + + // Run benchmarks + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); +} diff --git a/tests/tools/src/cuda/CMakeLists.txt b/tests/tools/src/cuda/CMakeLists.txt index 271577341..d3159d940 100644 --- a/tests/tools/src/cuda/CMakeLists.txt +++ b/tests/tools/src/cuda/CMakeLists.txt @@ -26,3 +26,22 @@ detray_add_executable(material_validation_cuda LINK_LIBRARIES GTest::gtest GTest::gtest_main Boost::program_options detray::test_cuda detray::tools ) + +# Build benchmarks for multiple algebra plugins +# Currently vc and smatrix is not supported on device +set(algebra_plugins "array") +if(DETRAY_EIGEN_PLUGIN) + list(APPEND algebra_plugins "eigen") +endif() + +foreach(algebra ${algebra_plugins}) + detray_add_executable(propagation_benchmark_cuda_${algebra} + "propagation_benchmark_cuda.cpp" + LINK_LIBRARIES detray::benchmark_cuda_${algebra} vecmem::cuda detray::tools detray::test_utils + ) + + target_compile_options( + detray_propagation_benchmark_cuda_${algebra} + PRIVATE "-march=native" "-ftree-vectorize" + ) +endforeach() diff --git a/tests/tools/src/cuda/propagation_benchmark_cuda.cpp b/tests/tools/src/cuda/propagation_benchmark_cuda.cpp new file mode 100644 index 000000000..c24550fb7 --- /dev/null +++ b/tests/tools/src/cuda/propagation_benchmark_cuda.cpp @@ -0,0 +1,163 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Project include(s) +#include "detray/detectors/bfield.hpp" +#include "detray/navigation/navigator.hpp" +#include "detray/propagator/actor_chain.hpp" +#include "detray/propagator/actors/aborters.hpp" +#include "detray/propagator/actors/parameter_resetter.hpp" +#include "detray/propagator/actors/parameter_transporter.hpp" +#include "detray/propagator/actors/pointwise_material_interactor.hpp" +#include "detray/propagator/rk_stepper.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray IO include(s) +#include "detray/io/frontend/detector_reader.hpp" + +// Detray benchmark include(s) +#include "detray/benchmarks/device/cuda/propagation_benchmark.hpp" + +// Detray test include(s). +#include "detray/test/utils/simulation/event_generator/track_generators.hpp" +#include "detray/test/utils/types.hpp" + +// Detray tools include(s) +#include "detray/options/detector_io_options.hpp" +#include "detray/options/parse_options.hpp" +#include "detray/options/propagation_options.hpp" +#include "detray/options/track_generator_options.hpp" + +// Vecmem include(s) +#include + +// System include(s) +#include +#include + +namespace po = boost::program_options; + +using namespace detray; + +int main(int argc, char** argv) { + + // Use the most general type to be able to read in all detector files + using detector_t = detray::detector; + using test_algebra = typename detector_t::algebra_type; + using scalar = dscalar; + using vector3 = dvector3D; + + using free_track_parameters_t = free_track_parameters; + using uniform_gen_t = + detail::random_numbers>; + using track_generator_t = + random_track_generator; + + using field_bknd_t = bfield::const_bknd_t; + + // Host and device memory resources + vecmem::host_memory_resource host_mr; + vecmem::cuda::device_memory_resource dev_mr; + + // Constant magnetic field + vector3 B{0.f, 0.f, 2.f * unit::T}; + + // Number of tracks in the different benchmark cases + std::vector n_tracks{8 * 8, 16 * 16, 32 * 32, 64 * 64, + 128 * 128, 256 * 256, 512 * 512}; + + // + // Configuration + // + + // Specific options for this test + po::options_description desc("\ndetray propagation benchmark options"); + + desc.add_options()("context", po::value(), + "Index of the geometry context")( + "sort_tracks", "Sort track samples by theta angle"); + + // Configs to be filled + detray::io::detector_reader_config reader_cfg{}; + track_generator_t::configuration trk_cfg{}; + propagation::config prop_cfg{}; + detray::benchmarks::benchmark_base::configuration bench_cfg{}; + + // Read options from commandline + po::variables_map vm = detray::options::parse_options( + desc, argc, argv, reader_cfg, trk_cfg, prop_cfg); + + // Custom options + bool do_sort{(vm.count("sort_tracks") != 0)}; + + // The geometry context to be used + detector_t::geometry_context gctx; + if (vm.count("context")) { + gctx = detector_t::geometry_context{vm["context"].as()}; + } + + // + // Prepare data + // + + // Read the detector geometry + reader_cfg.do_check(true); + + const auto [det, names] = + detray::io::read_detector(host_mr, reader_cfg); + const std::string& det_name = det.name(names); + + // Generate the track samples + auto track_samples = + detray::benchmarks::generate_track_samples( + &host_mr, n_tracks, trk_cfg, do_sort); + + // Create a constant b-field + auto bfield = bfield::create_const_field(B); + + // Build actor states + dtuple<> empty_state{}; + + parameter_transporter::state transporter_state{}; + pointwise_material_interactor::state interactor_state{}; + parameter_resetter::state resetter_state{}; + + auto actor_states = detail::make_tuple( + transporter_state, interactor_state, resetter_state); + + // + // Register benchmarks + // + + // Number of warmup tracks + const int n_max_tracks{*std::ranges::max_element(n_tracks)}; + bench_cfg.n_warmup( + static_cast(std::ceil(0.1f * static_cast(n_max_tracks)))); + + if (prop_cfg.stepping.do_covariance_transport) { + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type< + test::default_metadata, field_bknd_t, + detray::benchmarks::default_chain>>( + det_name + "_W_COV_TRANSPORT", bench_cfg, prop_cfg, det, bfield, + &actor_states, track_samples, n_tracks, &dev_mr); + } else { + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type< + test::default_metadata, field_bknd_t, + detray::benchmarks::empty_chain>>( + det_name, bench_cfg, prop_cfg, det, bfield, &empty_state, + track_samples, n_tracks, &dev_mr); + } + + // Run benchmarks + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); +}