diff --git a/core/include/detray/definitions/pdg_particle.hpp b/core/include/detray/definitions/pdg_particle.hpp
index 36aa99759..95951b372 100644
--- a/core/include/detray/definitions/pdg_particle.hpp
+++ b/core/include/detray/definitions/pdg_particle.hpp
@@ -28,13 +28,13 @@ struct pdg_particle {
           m_charge(static_cast<scalar_t>(charge)) {}
 
     DETRAY_HOST_DEVICE
-    std::int32_t pdg_num() const { return m_pdg_num; }
+    constexpr std::int32_t pdg_num() const { return m_pdg_num; }
 
     DETRAY_HOST_DEVICE
-    scalar_type mass() const { return m_mass; }
+    constexpr scalar_type mass() const { return m_mass; }
 
     DETRAY_HOST_DEVICE
-    scalar_type charge() const { return m_charge; }
+    constexpr scalar_type charge() const { return m_charge; }
 
     private:
     std::int32_t m_pdg_num;
@@ -42,6 +42,23 @@ struct pdg_particle {
     scalar_type m_charge;
 };
 
+/// Apply the charge conjugation operator to a particle hypothesis @param ptc
+template <typename scalar_t>
+DETRAY_HOST_DEVICE constexpr pdg_particle<scalar_t> charge_conjugation(
+    const pdg_particle<scalar_t>& ptc) {
+    return (ptc.charge() != 0)
+               ? detray::pdg_particle<scalar_t>{-ptc.pdg_num(), ptc.mass(),
+                                                -ptc.charge()}
+               : ptc;
+}
+
+/// @returns an updated particle hypothesis according to the track qop
+template <typename scalar_t, typename track_t>
+DETRAY_HOST_DEVICE constexpr pdg_particle<scalar_t> update_particle_hypothesis(
+    const pdg_particle<scalar_t>& ptc, const track_t& params) {
+    return (ptc.charge() * params.qop() > 0.f) ? ptc : charge_conjugation(ptc);
+}
+
 // Macro for declaring the particle
 #define DETRAY_DECLARE_PARTICLE(PARTICLE_NAME, PDG_NUM, MASS, CHARGE)   \
     template <typename scalar_t>                                        \
diff --git a/core/include/detray/propagator/actor_chain.hpp b/core/include/detray/propagator/actor_chain.hpp
index 572880fc9..2b077ea66 100644
--- a/core/include/detray/propagator/actor_chain.hpp
+++ b/core/include/detray/propagator/actor_chain.hpp
@@ -34,6 +34,8 @@ class actor_chain {
     public:
     /// Types of the actors that are registered in the chain
     using actor_list_type = tuple_t<actors_t...>;
+    // Tuple of actor states
+    using state_tuple = tuple_t<typename actors_t::state...>;
     // Type of states tuple that is used in the propagator
     using state = tuple_t<typename actors_t::state &...>;
 
@@ -53,8 +55,7 @@ class actor_chain {
         return m_actors;
     }
 
-    /// @returns a tuple of default constructible actor states and a
-    /// corresponding tuple of references
+    /// @returns a tuple of default constructible actor states
     DETRAY_HOST_DEVICE
     static constexpr auto make_actor_states() {
         // Only possible if each state is default initializable
@@ -67,10 +68,10 @@ class actor_chain {
     }
 
     /// @returns a tuple of reference for every state in the tuple @param t
-    DETRAY_HOST_DEVICE static constexpr state make_ref_tuple(
+    DETRAY_HOST_DEVICE static constexpr state setup_actor_states(
         tuple_t<typename actors_t::state...> &t) {
-        return make_ref_tuple(t,
-                              std::make_index_sequence<sizeof...(actors_t)>{});
+        return setup_actor_states(
+            t, std::make_index_sequence<sizeof...(actors_t)>{});
     }
 
     private:
@@ -111,7 +112,7 @@ class actor_chain {
 
     /// @returns a tuple of reference for every state in the tuple @param t
     template <std::size_t... indices>
-    DETRAY_HOST_DEVICE static constexpr state make_ref_tuple(
+    DETRAY_HOST_DEVICE static constexpr state setup_actor_states(
         tuple_t<typename actors_t::state...> &t,
         std::index_sequence<indices...> /*ids*/) {
         return detray::tie(detail::get<indices>(t)...);
@@ -126,6 +127,7 @@ template <>
 class actor_chain<> {
 
     public:
+    using state_tuple = dtuple<>;
     /// Empty states replaces a real actor states container
     struct state {};
 
@@ -138,6 +140,12 @@ class actor_chain<> {
                                        propagator_state_t & /*p_state*/) const {
         /*Do nothing*/
     }
+
+    /// @returns an empty state
+    DETRAY_HOST_DEVICE static constexpr state setup_actor_states(
+        const state_tuple &) {
+        return {};
+    }
 };
 
 }  // namespace detray
diff --git a/core/include/detray/propagator/propagator.hpp b/core/include/detray/propagator/propagator.hpp
index 58c0ed813..45b374079 100644
--- a/core/include/detray/propagator/propagator.hpp
+++ b/core/include/detray/propagator/propagator.hpp
@@ -55,6 +55,12 @@ struct propagator {
     explicit constexpr propagator(const propagation::config &cfg)
         : m_cfg{cfg} {}
 
+    /// @returns the actor chain
+    DETRAY_HOST_DEVICE
+    constexpr const actor_chain_t &get_actor_chain() const {
+        return run_actors;
+    }
+
     /// Propagation that state aggregates a stepping and a navigation state. It
     /// also keeps references to the actor states.
     struct state {
diff --git a/tests/benchmarks/CMakeLists.txt b/tests/benchmarks/CMakeLists.txt
index dd4c3fd9c..12940e393 100644
--- a/tests/benchmarks/CMakeLists.txt
+++ b/tests/benchmarks/CMakeLists.txt
@@ -4,12 +4,47 @@
 #
 # Mozilla Public License Version 2.0
 
+# Set the common C++ flags.
+include(detray-compiler-options-cpp)
+include_directories(
+    SYSTEM
+    $<TARGET_PROPERTY:covfie::core,INTERFACE_INCLUDE_DIRECTORIES>
+)
+include_directories(
+    SYSTEM
+    $<TARGET_PROPERTY:dfelibs::dfelibs,INTERFACE_INCLUDE_DIRECTORIES>
+)
+
+# Set up a common benchmark library.
+file(
+    GLOB _detray_benchmarks_headers
+    RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
+    "include/detray/benchmarks/*.hpp"
+)
+
+add_library(detray_benchmarks INTERFACE "${_detray_benchmarks_headers}")
+add_library(detray::benchmarks ALIAS detray_benchmarks)
+
+target_include_directories(
+    detray_benchmarks
+    INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include"
+)
+
+target_link_libraries(
+    detray_benchmarks
+    INTERFACE benchmark::benchmark vecmem::core detray::core detray::test_utils
+)
+
+unset(_detray_benchmarks_headers)
+
 # Set up the host/cpu benchmarks.
 if(DETRAY_BUILD_HOST)
     add_subdirectory(cpu)
+    add_subdirectory(include/detray/benchmarks/cpu)
 endif()
 
 # Set up all of the "device" benchmarks.
 if(DETRAY_BUILD_CUDA)
     add_subdirectory(cuda)
+    add_subdirectory(include/detray/benchmarks/device)
 endif()
diff --git a/tests/benchmarks/cpu/CMakeLists.txt b/tests/benchmarks/cpu/CMakeLists.txt
index 30b36c7af..03140c4af 100644
--- a/tests/benchmarks/cpu/CMakeLists.txt
+++ b/tests/benchmarks/cpu/CMakeLists.txt
@@ -10,21 +10,20 @@ message(STATUS "Building detray host benchmarks")
 option(DETRAY_BENCHMARK_MULTITHREAD "Enable multithreaded benchmarks" OFF)
 option(DETRAY_BENCHMARK_PRINTOUTS "Enable printouts in the benchmarks" OFF)
 
-# Look for openMP, which is used for the CPU benchmark
+# Look for openMP, which is used for the CPU propagation benchmark
 find_package(OpenMP)
 
 # Macro setting up the CPU benchmarks for a specific algebra plugin.
 macro(detray_add_cpu_benchmark algebra)
     # Build the benchmark executable.
     detray_add_executable(benchmark_cpu_${algebra}
-      "benchmark_propagator.cpp"
        "find_volume.cpp"
        "grid.cpp"
        "grid2.cpp"
        "intersect_all.cpp"
        "intersect_surfaces.cpp"
        "masks.cpp"
-       LINK_LIBRARIES benchmark::benchmark benchmark::benchmark_main vecmem::core
+       LINK_LIBRARIES benchmark::benchmark benchmark::benchmark_main vecmem::core detray::benchmarks
                       detray::core_${algebra} detray::test_utils
     )
 
@@ -48,9 +47,21 @@ macro(detray_add_cpu_benchmark algebra)
         )
     endif()
 
+    # Build the benchmark executable for the propagation
+    detray_add_executable( benchmark_cpu_propagation_${algebra}
+       "propagation.cpp"
+       LINK_LIBRARIES detray::benchmark_cpu benchmark::benchmark_main
+                     vecmem::core detray::core_${algebra} detray::test_utils
+    )
+
+    target_compile_options(
+        detray_benchmark_cpu_propagation_${algebra}
+        PRIVATE "-march=native" "-ftree-vectorize"
+    )
+
     if(OpenMP_CXX_FOUND)
         target_link_libraries(
-            detray_benchmark_cpu_${algebra}
+            detray_benchmark_cpu_propagation_${algebra}
             PRIVATE OpenMP::OpenMP_CXX
         )
     endif()
diff --git a/tests/benchmarks/cpu/benchmark_propagator.cpp b/tests/benchmarks/cpu/benchmark_propagator.cpp
deleted file mode 100644
index e588e9850..000000000
--- a/tests/benchmarks/cpu/benchmark_propagator.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-/** Detray library, part of the ACTS project (R&D line)
- *
- * (c) 2020-2024 CERN for the benefit of the ACTS project
- *
- * Mozilla Public License Version 2.0
- */
-
-// Project include(s).
-#include "detray/definitions/detail/algebra.hpp"
-#include "detray/definitions/detail/containers.hpp"
-#include "detray/definitions/detail/indexing.hpp"
-#include "detray/definitions/units.hpp"
-#include "detray/detectors/bfield.hpp"
-#include "detray/detectors/toy_metadata.hpp"
-#include "detray/geometry/shapes/rectangle2D.hpp"
-#include "detray/navigation/navigator.hpp"
-#include "detray/propagator/actor_chain.hpp"
-#include "detray/propagator/actors/aborters.hpp"
-#include "detray/propagator/actors/parameter_resetter.hpp"
-#include "detray/propagator/actors/parameter_transporter.hpp"
-#include "detray/propagator/actors/pointwise_material_interactor.hpp"
-#include "detray/propagator/base_actor.hpp"
-#include "detray/propagator/propagator.hpp"
-#include "detray/propagator/rk_stepper.hpp"
-#include "detray/tracks/tracks.hpp"
-#include "detray/utils/grid/grid.hpp"
-
-// Detray test include(s).
-#include "detray/test/utils/detectors/build_toy_detector.hpp"
-#include "detray/test/utils/simulation/event_generator/track_generators.hpp"
-#include "detray/test/utils/types.hpp"
-
-// VecMem include(s).
-#include <vecmem/memory/host_memory_resource.hpp>
-
-// Google benchmark include(s).
-#include <benchmark/benchmark.h>
-
-// System include(s)
-#include <cstdlib>
-#include <vector>
-
-// Use the detray:: namespace implicitly.
-using namespace detray;
-
-using algebra_t = ALGEBRA_PLUGIN<detray::scalar>;
-
-using detector_host_type = detector<toy_metadata, host_container_types>;
-using detector_device_type = detector<toy_metadata, device_container_types>;
-
-using intersection_t =
-    intersection2D<typename detector_device_type::surface_type, algebra_t>;
-
-using navigator_host_type = navigator<detector_host_type>;
-using navigator_device_type = navigator<detector_device_type>;
-using field_type = bfield::const_field_t;
-using rk_stepper_type = rk_stepper<field_type::view_t, algebra_t>;
-using actor_chain_t = actor_chain<tuple, parameter_transporter<algebra_t>,
-                                  pointwise_material_interactor<algebra_t>,
-                                  parameter_resetter<algebra_t>>;
-using propagator_host_type =
-    propagator<rk_stepper_type, navigator_host_type, actor_chain_t>;
-using propagator_device_type =
-    propagator<rk_stepper_type, navigator_device_type, actor_chain_t>;
-
-enum class propagate_option {
-    e_unsync = 0,
-    e_sync = 1,
-};
-
-// VecMem memory resource(s)
-vecmem::host_memory_resource host_mr;
-
-// detector configuration
-auto toy_cfg =
-    toy_det_config{}.n_brl_layers(4u).n_edc_layers(7u).do_check(false);
-
-void fill_tracks(vecmem::vector<free_track_parameters<algebra_t>> &tracks,
-                 const std::size_t n_tracks, bool do_sort = true) {
-    using scalar_t = dscalar<algebra_t>;
-    using uniform_gen_t =
-        detail::random_numbers<scalar_t,
-                               std::uniform_real_distribution<scalar_t>>;
-    using trk_generator_t =
-        random_track_generator<free_track_parameters<algebra_t>, uniform_gen_t>;
-
-    trk_generator_t::configuration trk_gen_cfg{};
-    trk_gen_cfg.seed(42u);
-    trk_gen_cfg.n_tracks(n_tracks);
-    trk_gen_cfg.randomize_charge(true);
-    trk_gen_cfg.phi_range(-constant<scalar_t>::pi, constant<scalar_t>::pi);
-    trk_gen_cfg.eta_range(-3.f, 3.f);
-    trk_gen_cfg.mom_range(1.f * unit<scalar_t>::GeV,
-                          100.f * unit<scalar_t>::GeV);
-    trk_gen_cfg.origin({0.f, 0.f, 0.f});
-    trk_gen_cfg.origin_stddev({0.f * unit<scalar_t>::mm,
-                               0.f * unit<scalar_t>::mm,
-                               0.f * unit<scalar_t>::mm});
-
-    // Iterate through uniformly distributed momentum directions
-    for (auto traj : trk_generator_t{trk_gen_cfg}) {
-        tracks.push_back(traj);
-    }
-
-    if (do_sort) {
-        // Sort by theta angle
-        const auto traj_comp = [](const auto &lhs, const auto &rhs) {
-            constexpr auto pi_2{constant<scalar_t>::pi_2};
-            return math::fabs(pi_2 - vector::theta(lhs.dir())) <
-                   math::fabs(pi_2 - vector::theta(rhs.dir()));
-        };
-
-        std::ranges::sort(tracks, traj_comp);
-    }
-}
-
-template <propagate_option opt>
-static void BM_PROPAGATOR_CPU(benchmark::State &state) {
-
-    std::size_t n_tracks{static_cast<std::size_t>(state.range(0)) *
-                         static_cast<std::size_t>(state.range(0))};
-
-    // Create the toy geometry and bfield
-    auto [det, names] = build_toy_detector(host_mr, toy_cfg);
-    test::vector3 B{0.f, 0.f, 2.f * unit<scalar>::T};
-    auto bfield = bfield::create_const_field(B);
-
-    // Create propagator
-    propagation::config cfg{};
-    cfg.navigation.search_window = {3u, 3u};
-    propagator_host_type p{cfg};
-
-    std::size_t total_tracks = 0;
-
-    for (auto _ : state) {
-
-        // TODO: use fixture to build tracks
-        state.PauseTiming();
-
-        // Get tracks
-        vecmem::vector<free_track_parameters<algebra_t>> tracks(&host_mr);
-        fill_tracks(tracks, n_tracks);
-
-        total_tracks += tracks.size();
-
-        state.ResumeTiming();
-
-#pragma omp parallel for
-        for (auto &track : tracks) {
-
-            parameter_transporter<algebra_t>::state transporter_state{};
-            pointwise_material_interactor<algebra_t>::state interactor_state{};
-            parameter_resetter<algebra_t>::state resetter_state{};
-
-            auto actor_states =
-                tie(transporter_state, interactor_state, resetter_state);
-
-            // Create the propagator state
-            propagator_host_type::state p_state(track, bfield, det);
-
-            // Run propagation
-            if constexpr (opt == propagate_option::e_unsync) {
-                p.propagate(p_state, actor_states);
-            } else if constexpr (opt == propagate_option::e_sync) {
-                p.propagate_sync(p_state, actor_states);
-            }
-        }
-    }
-
-    state.counters["TracksPropagated"] = benchmark::Counter(
-        static_cast<double>(total_tracks), benchmark::Counter::kIsRate);
-}
-
-BENCHMARK_TEMPLATE(BM_PROPAGATOR_CPU, propagate_option::e_unsync)
-    ->Name("CPU unsync propagation")
-    ->RangeMultiplier(2)
-    ->Range(8, 256);
-BENCHMARK_TEMPLATE(BM_PROPAGATOR_CPU, propagate_option::e_sync)
-    ->Name("CPU sync propagation")
-    ->RangeMultiplier(2)
-    ->Range(8, 256);
-
-BENCHMARK_MAIN();
diff --git a/tests/benchmarks/cpu/propagation.cpp b/tests/benchmarks/cpu/propagation.cpp
new file mode 100644
index 000000000..83bde2b8c
--- /dev/null
+++ b/tests/benchmarks/cpu/propagation.cpp
@@ -0,0 +1,157 @@
+/** Detray library, part of the ACTS project (R&D line)
+ *
+ * (c) 2024 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+// Project include(s)
+#include "detray/detectors/bfield.hpp"
+#include "detray/navigation/navigator.hpp"
+#include "detray/propagator/actor_chain.hpp"
+#include "detray/propagator/actors/aborters.hpp"
+#include "detray/propagator/actors/parameter_resetter.hpp"
+#include "detray/propagator/actors/parameter_transporter.hpp"
+#include "detray/propagator/actors/pointwise_material_interactor.hpp"
+#include "detray/propagator/rk_stepper.hpp"
+#include "detray/tracks/tracks.hpp"
+
+// Detray benchmark include(s)
+#include "detray/benchmarks/cpu/propagation_benchmark.hpp"
+
+// Detray test include(s).
+#include "detray/test/utils/detectors/build_toy_detector.hpp"
+#include "detray/test/utils/detectors/build_wire_chamber.hpp"
+#include "detray/test/utils/simulation/event_generator/track_generators.hpp"
+#include "detray/test/utils/types.hpp"
+
+// Vecmem include(s)
+#include <vecmem/memory/host_memory_resource.hpp>
+
+// System include(s)
+#include <iostream>
+#include <string>
+
+using namespace detray;
+
+int main(int argc, char** argv) {
+
+    using toy_detector_t = detector<toy_metadata>;
+    using algebra_t = typename toy_detector_t::algebra_type;
+    using scalar_t = dscalar<algebra_t>;
+    using vector3_t = dvector3D<algebra_t>;
+    using free_track_parameters_t = free_track_parameters<algebra_t>;
+    using uniform_gen_t =
+        detail::random_numbers<scalar_t,
+                               std::uniform_real_distribution<scalar_t>>;
+    using track_generator_t =
+        random_track_generator<free_track_parameters_t, uniform_gen_t>;
+
+    using field_t = bfield::const_field_t;
+    using stepper_t = rk_stepper<typename field_t::view_t, algebra_t>;
+    using empty_chain_t = actor_chain<>;
+    using default_chain = actor_chain<dtuple, parameter_transporter<algebra_t>,
+                                      pointwise_material_interactor<algebra_t>,
+                                      parameter_resetter<algebra_t>>;
+
+    vecmem::host_memory_resource host_mr;
+
+    //
+    // Configuration
+    //
+
+    // Constant magnetic field
+    vector3_t B{0.f, 0.f, 2.f * unit<scalar_t>::T};
+
+    // Configure toy detector
+    toy_det_config toy_cfg{};
+    toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u);
+
+    std::cout << toy_cfg << std::endl;
+
+    // Configure wire chamber
+    wire_chamber_config wire_chamber_cfg{};
+    wire_chamber_cfg.half_z(500.f * unit<scalar>::mm);
+
+    std::cout << wire_chamber_cfg << std::endl;
+
+    // Configure propagation
+    propagation::config prop_cfg{};
+    prop_cfg.navigation.search_window = {3u, 3u};
+
+    std::cout << prop_cfg << std::endl;
+
+    // Benchmark config
+    detray::benchmarks::benchmark_base::configuration bench_cfg{};
+
+    std::vector<int> n_tracks{8 * 8,     16 * 16,   32 * 32,  64 * 64,
+                              128 * 128, 256 * 256, 512 * 512};
+
+    auto trk_cfg =
+        detray::benchmarks::get_default_trk_gen_config<track_generator_t>(
+            n_tracks);
+
+    // Specific configuration for the random track generation
+    trk_cfg.seed(42u);
+
+    // Add additional tracks for warmup
+    bench_cfg.n_warmup(static_cast<int>(
+        std::ceil(0.1f * static_cast<float>(trk_cfg.n_tracks()))));
+
+    //
+    // Prepare data
+    //
+    auto track_samples =
+        detray::benchmarks::generate_track_samples<track_generator_t>(
+            &host_mr, n_tracks, trk_cfg);
+
+    const auto [toy_det, names] = build_toy_detector(host_mr, toy_cfg);
+    const auto [wire_chamber, _] =
+        build_wire_chamber(host_mr, wire_chamber_cfg);
+
+    auto bfield = bfield::create_const_field(B);
+
+    dtuple<> empty_state{};
+
+    parameter_transporter<algebra_t>::state transporter_state{};
+    pointwise_material_interactor<algebra_t>::state interactor_state{};
+    parameter_resetter<algebra_t>::state resetter_state{};
+
+    auto actor_states = detail::make_tuple<dtuple>(
+        transporter_state, interactor_state, resetter_state);
+
+    //
+    // Register benchmarks
+    //
+    std::cout << "Propagation Benchmarks\n"
+              << "----------------------\n\n";
+
+    prop_cfg.stepping.do_covariance_transport = true;
+    detray::benchmarks::register_benchmark<
+        detray::benchmarks::host_propagation_bm, stepper_t, default_chain>(
+        "TOY_DETECTOR_W_COV_TRANSPORT", bench_cfg, prop_cfg, toy_det, bfield,
+        &actor_states, track_samples, n_tracks);
+
+    prop_cfg.stepping.do_covariance_transport = false;
+    detray::benchmarks::register_benchmark<
+        detray::benchmarks::host_propagation_bm, stepper_t, empty_chain_t>(
+        "TOY_DETECTOR", bench_cfg, prop_cfg, toy_det, bfield, &empty_state,
+        track_samples, n_tracks);
+
+    prop_cfg.stepping.do_covariance_transport = true;
+    detray::benchmarks::register_benchmark<
+        detray::benchmarks::host_propagation_bm, stepper_t, default_chain>(
+        "WIRE_CHAMBER_W_COV_TRANSPORT", bench_cfg, prop_cfg, wire_chamber,
+        bfield, &actor_states, track_samples, n_tracks);
+
+    prop_cfg.stepping.do_covariance_transport = false;
+    detray::benchmarks::register_benchmark<
+        detray::benchmarks::host_propagation_bm, stepper_t, empty_chain_t>(
+        "WIRE_CHAMBER", bench_cfg, prop_cfg, wire_chamber, bfield, &empty_state,
+        track_samples, n_tracks);
+
+    // Run benchmarks
+    ::benchmark::Initialize(&argc, argv);
+    ::benchmark::RunSpecifiedBenchmarks();
+    ::benchmark::Shutdown();
+}
diff --git a/tests/benchmarks/cuda/CMakeLists.txt b/tests/benchmarks/cuda/CMakeLists.txt
index 096c92ad0..a101f0854 100644
--- a/tests/benchmarks/cuda/CMakeLists.txt
+++ b/tests/benchmarks/cuda/CMakeLists.txt
@@ -26,26 +26,24 @@ if(DETRAY_EIGEN_PLUGIN)
 endif()
 
 foreach(algebra ${algebras})
-    detray_add_executable(benchmark_cuda_${algebra}
-   "benchmark_propagator_cuda_kernel.hpp"
-       "benchmark_propagator_cuda.cpp"
-       "benchmark_propagator_cuda_kernel.cu"
-       LINK_LIBRARIES benchmark::benchmark detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils
+    detray_add_executable(benchmark_cuda_propagation_${algebra}
+      "propagation.cpp"
+       LINK_LIBRARIES detray::benchmark_cuda detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils
     )
 
     target_compile_definitions(
-        detray_benchmark_cuda_${algebra}
+        detray_benchmark_cuda_propagation_${algebra}
         PRIVATE ${algebra}=${algebra}
     )
 
     target_compile_options(
-        detray_benchmark_cuda_${algebra}
+        detray_benchmark_cuda_propagation_${algebra}
         PRIVATE "-march=native" "-ftree-vectorize"
     )
 
     if(OpenMP_CXX_FOUND)
         target_link_libraries(
-            detray_benchmark_cuda_${algebra}
+            detray_benchmark_cuda_propagation_${algebra}
             PRIVATE OpenMP::OpenMP_CXX
         )
     endif()
diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp b/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp
deleted file mode 100644
index 08db5b8f9..000000000
--- a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/** Detray library, part of the ACTS project (R&D line)
- *
- * (c) 2022-2024 CERN for the benefit of the ACTS project
- *
- * Mozilla Public License Version 2.0
- */
-
-// Project include(s)
-#include "benchmark_propagator_cuda_kernel.hpp"
-
-// Detray test include(s).
-#include "detray/test/utils/detectors/build_toy_detector.hpp"
-#include "detray/test/utils/simulation/event_generator/track_generators.hpp"
-#include "detray/test/utils/types.hpp"
-
-// Vecmem include(s)
-#include <vecmem/memory/binary_page_memory_resource.hpp>
-#include <vecmem/memory/cuda/device_memory_resource.hpp>
-#include <vecmem/memory/cuda/managed_memory_resource.hpp>
-#include <vecmem/utils/cuda/copy.hpp>
-
-// Google include(s).
-#include <benchmark/benchmark.h>
-
-using namespace detray;
-
-// VecMem memory resource(s)
-vecmem::host_memory_resource host_mr;
-vecmem::cuda::managed_memory_resource mng_mr;
-vecmem::cuda::device_memory_resource dev_mr;
-vecmem::binary_page_memory_resource bp_mng_mr(mng_mr);
-
-// detector configuration
-auto toy_cfg =
-    toy_det_config{}.n_brl_layers(4u).n_edc_layers(7u).do_check(false);
-
-void fill_tracks(vecmem::vector<free_track_parameters<algebra_t>> &tracks,
-                 const std::size_t n_tracks, bool do_sort = true) {
-    using scalar_t = dscalar<algebra_t>;
-    using uniform_gen_t =
-        detail::random_numbers<scalar_t,
-                               std::uniform_real_distribution<scalar_t>>;
-    using trk_generator_t =
-        random_track_generator<free_track_parameters<algebra_t>, uniform_gen_t>;
-
-    trk_generator_t::configuration trk_gen_cfg{};
-    trk_gen_cfg.seed(42u);
-    trk_gen_cfg.n_tracks(n_tracks);
-    trk_gen_cfg.randomize_charge(true);
-    trk_gen_cfg.phi_range(-constant<scalar_t>::pi, constant<scalar_t>::pi);
-    trk_gen_cfg.eta_range(-3.f, 3.f);
-    trk_gen_cfg.mom_range(1.f * unit<scalar_t>::GeV,
-                          100.f * unit<scalar_t>::GeV);
-    trk_gen_cfg.origin({0.f, 0.f, 0.f});
-    trk_gen_cfg.origin_stddev({0.f * unit<scalar_t>::mm,
-                               0.f * unit<scalar_t>::mm,
-                               0.f * unit<scalar_t>::mm});
-
-    // Iterate through uniformly distributed momentum directions
-    for (auto traj : trk_generator_t{trk_gen_cfg}) {
-        tracks.push_back(traj);
-    }
-
-    if (do_sort) {
-        // Sort by theta angle
-        const auto traj_comp = [](const auto &lhs, const auto &rhs) {
-            constexpr auto pi_2{constant<scalar_t>::pi_2};
-            return math::fabs(pi_2 - vector::theta(lhs.dir())) <
-                   math::fabs(pi_2 - vector::theta(rhs.dir()));
-        };
-
-        std::ranges::sort(tracks, traj_comp);
-    }
-}
-
-template <propagate_option opt>
-static void BM_PROPAGATOR_CUDA(benchmark::State &state) {
-
-    std::size_t n_tracks{static_cast<std::size_t>(state.range(0)) *
-                         static_cast<std::size_t>(state.range(0))};
-
-    // Create the toy geometry
-    auto [det, names] = build_toy_detector(host_mr, toy_cfg);
-    test::vector3 B{0.f, 0.f, 2.f * unit<scalar>::T};
-    auto bfield = bfield::create_const_field(B);
-
-    // vecmem copy helper object
-    vecmem::cuda::copy cuda_cpy;
-
-    // Copy detector to device
-    auto det_buff = detray::get_buffer(det, dev_mr, cuda_cpy);
-    auto det_view = detray::get_data(det_buff);
-
-    std::size_t total_tracks = 0;
-
-    for (auto _ : state) {
-
-        state.PauseTiming();
-
-        // Get tracks
-        vecmem::vector<free_track_parameters<algebra_t>> tracks(&bp_mng_mr);
-        fill_tracks(tracks, n_tracks);
-
-        total_tracks += tracks.size();
-
-        state.ResumeTiming();
-
-        // Get tracks data
-        auto tracks_data = vecmem::get_data(tracks);
-
-        // Run the propagator test for GPU device
-        propagator_benchmark(det_view, bfield, tracks_data, opt);
-    }
-
-    state.counters["TracksPropagated"] = benchmark::Counter(
-        static_cast<double>(total_tracks), benchmark::Counter::kIsRate);
-}
-
-BENCHMARK_TEMPLATE(BM_PROPAGATOR_CUDA, propagate_option::e_unsync)
-    ->Name("CUDA unsync propagation")
-    ->RangeMultiplier(2)
-    ->Range(8, 256);
-BENCHMARK_TEMPLATE(BM_PROPAGATOR_CUDA, propagate_option::e_sync)
-    ->Name("CUDA sync propagation")
-    ->RangeMultiplier(2)
-    ->Range(8, 256);
-
-BENCHMARK_MAIN();
diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu b/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu
deleted file mode 100644
index 6bba06fd1..000000000
--- a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu
+++ /dev/null
@@ -1,70 +0,0 @@
-/** Detray library, part of the ACTS project (R&D line)
- *
- * (c) 2022 CERN for the benefit of the ACTS project
- *
- * Mozilla Public License Version 2.0
- */
-
-#include "benchmark_propagator_cuda_kernel.hpp"
-#include "detray/definitions/detail/cuda_definitions.hpp"
-
-namespace detray {
-
-__global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel(
-    typename detector_host_type::view_type det_data,
-    covfie::field_view<bfield::const_bknd_t> field_data,
-    vecmem::data::vector_view<free_track_parameters<algebra_t>> tracks_data,
-    const propagate_option opt) {
-
-    int gid = threadIdx.x + blockIdx.x * blockDim.x;
-
-    detector_device_type det(det_data);
-    vecmem::device_vector<free_track_parameters<algebra_t>> tracks(tracks_data);
-
-    if (gid >= tracks.size()) {
-        return;
-    }
-
-    // Create propagator
-    propagation::config cfg{};
-    cfg.navigation.search_window = {3u, 3u};
-    propagator_device_type p{cfg};
-
-    parameter_transporter<algebra_t>::state transporter_state{};
-    pointwise_material_interactor<algebra_t>::state interactor_state{};
-    parameter_resetter<algebra_t>::state resetter_state{};
-
-    // Create the actor states
-    auto actor_states =
-        detray::tie(transporter_state, interactor_state, resetter_state);
-    // Create the propagator state
-    propagator_device_type::state p_state(tracks.at(gid), field_data, det);
-
-    // Run propagation
-    if (opt == propagate_option::e_unsync) {
-        p.propagate(p_state, actor_states);
-    } else if (opt == propagate_option::e_sync) {
-        p.propagate_sync(p_state, actor_states);
-    }
-}
-
-void propagator_benchmark(
-    typename detector_host_type::view_type det_data,
-    covfie::field_view<bfield::const_bknd_t> field_data,
-    vecmem::data::vector_view<free_track_parameters<algebra_t>>& tracks_data,
-    const propagate_option opt) {
-
-    constexpr int thread_dim = 256;
-    int block_dim =
-        static_cast<int>(tracks_data.size() + thread_dim - 1) / thread_dim;
-
-    // run the test kernel
-    propagator_benchmark_kernel<<<block_dim, thread_dim>>>(det_data, field_data,
-                                                           tracks_data, opt);
-
-    // cuda error check
-    DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
-    DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
-}
-
-}  // namespace detray
diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp b/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp
deleted file mode 100644
index ee2f505cb..000000000
--- a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/** Detray library, part of the ACTS project (R&D line)
- *
- * (c) 2022-2024 CERN for the benefit of the ACTS project
- *
- * Mozilla Public License Version 2.0
- */
-
-#pragma once
-
-// Project include(s)
-#include "detray/definitions/detail/algebra.hpp"
-#include "detray/definitions/units.hpp"
-#include "detray/detectors/bfield.hpp"
-#include "detray/detectors/toy_metadata.hpp"
-#include "detray/navigation/navigator.hpp"
-#include "detray/propagator/actor_chain.hpp"
-#include "detray/propagator/actors/aborters.hpp"
-#include "detray/propagator/actors/parameter_resetter.hpp"
-#include "detray/propagator/actors/parameter_transporter.hpp"
-#include "detray/propagator/actors/pointwise_material_interactor.hpp"
-#include "detray/propagator/base_actor.hpp"
-#include "detray/propagator/propagator.hpp"
-#include "detray/propagator/rk_stepper.hpp"
-#include "detray/tracks/tracks.hpp"
-
-using algebra_t = ALGEBRA_PLUGIN<detray::scalar>;
-
-using detector_host_type =
-    detray::detector<detray::toy_metadata, detray::host_container_types>;
-using detector_device_type =
-    detray::detector<detray::toy_metadata, detray::device_container_types>;
-
-using navigator_host_type = detray::navigator<detector_host_type>;
-using navigator_device_type = detray::navigator<detector_device_type>;
-using field_type = detray::bfield::const_field_t;
-using rk_stepper_type = detray::rk_stepper<field_type::view_t, algebra_t>;
-using actor_chain_t =
-    detray::actor_chain<detray::tuple, detray::parameter_transporter<algebra_t>,
-                        detray::pointwise_material_interactor<algebra_t>,
-                        detray::parameter_resetter<algebra_t>>;
-using propagator_host_type =
-    detray::propagator<rk_stepper_type, navigator_host_type, actor_chain_t>;
-using propagator_device_type =
-    detray::propagator<rk_stepper_type, navigator_device_type, actor_chain_t>;
-
-enum class propagate_option {
-    e_unsync = 0,
-    e_sync = 1,
-};
-
-namespace detray {
-
-/// test function for propagator with single state
-void propagator_benchmark(
-    typename detector_host_type::view_type det_data,
-    typename field_type::view_t field_data,
-    vecmem::data::vector_view<free_track_parameters<algebra_t>>& tracks_data,
-    const propagate_option opt);
-
-}  // namespace detray
diff --git a/tests/benchmarks/cuda/propagation.cpp b/tests/benchmarks/cuda/propagation.cpp
new file mode 100644
index 000000000..5be604394
--- /dev/null
+++ b/tests/benchmarks/cuda/propagation.cpp
@@ -0,0 +1,161 @@
+/** Detray library, part of the ACTS project (R&D line)
+ *
+ * (c) 2024 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+// Project include(s)
+#include "detray/detectors/bfield.hpp"
+#include "detray/navigation/navigator.hpp"
+#include "detray/propagator/actor_chain.hpp"
+#include "detray/propagator/actors/aborters.hpp"
+#include "detray/propagator/actors/parameter_resetter.hpp"
+#include "detray/propagator/actors/parameter_transporter.hpp"
+#include "detray/propagator/actors/pointwise_material_interactor.hpp"
+#include "detray/propagator/rk_stepper.hpp"
+#include "detray/tracks/tracks.hpp"
+
+// Detray benchmark include(s)
+#include "detray/benchmarks/device/cuda/propagation_benchmark.hpp"
+
+// Detray test include(s).
+#include "detray/test/utils/detectors/build_toy_detector.hpp"
+#include "detray/test/utils/detectors/build_wire_chamber.hpp"
+#include "detray/test/utils/simulation/event_generator/track_generators.hpp"
+#include "detray/test/utils/types.hpp"
+
+// Vecmem include(s)
+#include <vecmem/memory/cuda/device_memory_resource.hpp>
+#include <vecmem/memory/host_memory_resource.hpp>
+
+// System include(s)
+#include <iostream>
+#include <string>
+
+using namespace detray;
+
+int main(int argc, char** argv) {
+
+    using toy_detector_t = detector<toy_metadata>;
+    using algebra_t = typename toy_detector_t::algebra_type;
+    using scalar_t = dscalar<algebra_t>;
+    using vector3_t = dvector3D<algebra_t>;
+    using free_track_parameters_t = free_track_parameters<algebra_t>;
+    using uniform_gen_t =
+        detail::random_numbers<scalar_t,
+                               std::uniform_real_distribution<scalar_t>>;
+    using track_generator_t =
+        random_track_generator<free_track_parameters_t, uniform_gen_t>;
+    using field_bknd_t = bfield::const_bknd_t;
+
+    vecmem::host_memory_resource host_mr;
+    vecmem::cuda::device_memory_resource dev_mr;
+
+    //
+    // Configuration
+    //
+
+    // Constant magnetic field
+    vector3_t B{0.f, 0.f, 2.f * unit<scalar_t>::T};
+
+    // Configure toy detector
+    toy_det_config toy_cfg{};
+    toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u);
+
+    std::cout << toy_cfg << std::endl;
+
+    // Configure wire chamber
+    wire_chamber_config wire_chamber_cfg{};
+    wire_chamber_cfg.half_z(500.f * unit<scalar>::mm);
+
+    std::cout << wire_chamber_cfg << std::endl;
+
+    // Configure propagation
+    propagation::config prop_cfg{};
+    prop_cfg.navigation.search_window = {3u, 3u};
+
+    std::cout << prop_cfg << std::endl;
+
+    // Benchmark config
+    detray::benchmarks::benchmark_base::configuration bench_cfg{};
+
+    std::vector<int> n_tracks{8 * 8,     16 * 16,   32 * 32,  64 * 64,
+                              128 * 128, 256 * 256, 512 * 512};
+
+    auto trk_cfg =
+        detray::benchmarks::get_default_trk_gen_config<track_generator_t>(
+            n_tracks);
+
+    // Specific configuration for the random track generation
+    trk_cfg.seed(42u);
+
+    // Add additional tracks for warmup
+    bench_cfg.n_warmup(static_cast<int>(
+        std::ceil(0.1f * static_cast<float>(trk_cfg.n_tracks()))));
+
+    //
+    // Prepare data
+    //
+    auto track_samples =
+        detray::benchmarks::generate_track_samples<track_generator_t>(
+            &host_mr, n_tracks, trk_cfg, true);
+
+    const auto [toy_det, names] = build_toy_detector(host_mr, toy_cfg);
+    const auto [wire_chamber, _] =
+        build_wire_chamber(host_mr, wire_chamber_cfg);
+
+    auto bfield = bfield::create_const_field(B);
+
+    dtuple<> empty_state{};
+
+    parameter_transporter<algebra_t>::state transporter_state{};
+    pointwise_material_interactor<algebra_t>::state interactor_state{};
+    parameter_resetter<algebra_t>::state resetter_state{};
+
+    auto actor_states = detail::make_tuple<dtuple>(
+        transporter_state, interactor_state, resetter_state);
+
+    //
+    // Register benchmarks
+    //
+    std::cout << "Propagation Benchmarks\n"
+              << "----------------------\n\n";
+
+    prop_cfg.stepping.do_covariance_transport = true;
+    detray::benchmarks::register_benchmark<
+        detray::benchmarks::cuda_propagation_bm,
+        detray::benchmarks::cuda_propagator_type<
+            toy_metadata, field_bknd_t, detray::benchmarks::default_chain>>(
+        "TOY_DETECTOR_W_COV_TRANSPORT", bench_cfg, prop_cfg, toy_det, bfield,
+        &actor_states, track_samples, n_tracks, &dev_mr);
+
+    prop_cfg.stepping.do_covariance_transport = false;
+    detray::benchmarks::register_benchmark<
+        detray::benchmarks::cuda_propagation_bm,
+        detray::benchmarks::cuda_propagator_type<
+            toy_metadata, field_bknd_t, detray::benchmarks::empty_chain>>(
+        "TOY_DETECTOR", bench_cfg, prop_cfg, toy_det, bfield, &empty_state,
+        track_samples, n_tracks, &dev_mr);
+
+    prop_cfg.stepping.do_covariance_transport = true;
+    detray::benchmarks::register_benchmark<
+        detray::benchmarks::cuda_propagation_bm,
+        detray::benchmarks::cuda_propagator_type<
+            default_metadata, field_bknd_t, detray::benchmarks::default_chain>>(
+        "WIRE_CHAMBER_W_COV_TRANSPORT", bench_cfg, prop_cfg, wire_chamber,
+        bfield, &actor_states, track_samples, n_tracks, &dev_mr);
+
+    prop_cfg.stepping.do_covariance_transport = false;
+    detray::benchmarks::register_benchmark<
+        detray::benchmarks::cuda_propagation_bm,
+        detray::benchmarks::cuda_propagator_type<
+            default_metadata, field_bknd_t, detray::benchmarks::empty_chain>>(
+        "WIRE_CHAMBER", bench_cfg, prop_cfg, wire_chamber, bfield, &empty_state,
+        track_samples, n_tracks, &dev_mr);
+
+    // Run benchmarks
+    ::benchmark::Initialize(&argc, argv);
+    ::benchmark::RunSpecifiedBenchmarks();
+    ::benchmark::Shutdown();
+}
diff --git a/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp b/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp
new file mode 100644
index 000000000..c867b6f19
--- /dev/null
+++ b/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp
@@ -0,0 +1,76 @@
+/** Detray library, part of the ACTS project (R&D line)
+ *
+ * (c) 2023-2024 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+#pragma once
+
+// Benchmark include
+#include <benchmark/benchmark.h>
+
+// System include(s)
+#include <ostream>
+#include <string>
+
+namespace detray::benchmarks {
+
+/// Base type for detray benchmarks with google benchmark
+struct benchmark_base {
+    /// Local configuration type
+    struct configuration {
+        /// Size of data sample to be used in benchmark
+        int m_samples{100};
+        /// Run a number of operations before the benchmark
+        bool m_warmup = true;
+        // Size of data in warm-up round
+        int m_n_warmup{static_cast<int>(0.1 * static_cast<double>(m_samples))};
+
+        /// Setters
+        /// @{
+        configuration& n_samples(int n) {
+            m_samples = n;
+            return *this;
+        }
+        configuration& do_warmup(bool b) {
+            m_warmup = b;
+            return *this;
+        }
+        configuration& n_warmup(int n) {
+            m_n_warmup = n;
+            m_warmup = true;
+            return *this;
+        }
+        /// @}
+
+        /// Getters
+        /// @{
+        constexpr int n_samples() const { return m_samples; }
+        constexpr bool do_warmup() const { return m_warmup; }
+        constexpr int n_warmup() const { return m_n_warmup; }
+        /// @}
+
+        private:
+        /// Print the benchmark setup
+        friend std::ostream& operator<<(std::ostream& os,
+                                        const configuration& cfg) {
+            os << " -> running:\t " << cfg.n_samples() << " samples"
+               << std::endl;
+            if (cfg.do_warmup()) {
+                os << " -> warmup: \t " << cfg.n_warmup() << " samples"
+                   << std::endl;
+            }
+            os << std::endl;
+            return os;
+        }
+    };
+
+    /// Default construction
+    benchmark_base() = default;
+
+    /// Default destructor
+    virtual ~benchmark_base() = default;
+};
+
+}  // namespace detray::benchmarks
diff --git a/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt
new file mode 100644
index 000000000..c2bc1a0d8
--- /dev/null
+++ b/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Detray library, part of the ACTS project (R&D line)
+#
+# (c) 2024 CERN for the benefit of the ACTS project
+#
+# Mozilla Public License Version 2.0
+
+# Set the CPU build flags.
+include(detray-compiler-options-cpp)
+
+# Set up a test library, which the "new style" benchmarks and tests could use.
+add_library(detray_benchmark_cpu INTERFACE "propagation_benchmark.hpp")
+
+add_library(detray::benchmark_cpu ALIAS detray_benchmark_cpu)
+
+target_link_libraries(detray_benchmark_cpu INTERFACE detray::benchmarks)
diff --git a/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp
new file mode 100644
index 000000000..94c78c950
--- /dev/null
+++ b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp
@@ -0,0 +1,131 @@
+/** Detray library, part of the ACTS project (R&D line)
+ *
+ * (c) 2023-2024 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+#pragma once
+
+// Project include(s)
+#include "detray/definitions/detail/algebra.hpp"
+#include "detray/tracks/tracks.hpp"
+
+// Detray benchmark include(s)
+#include "detray/benchmarks/benchmark_base.hpp"
+#include "detray/benchmarks/propagation_benchmark_config.hpp"
+#include "detray/benchmarks/propagation_benchmark_utils.hpp"
+
+// Benchmark include
+#include <benchmark/benchmark.h>
+
+// System include(s)
+#include <algorithm>
+#include <cassert>
+#include <ranges>
+#include <string>
+
+namespace detray::benchmarks {
+
+template <typename propagator_t, typename bfield_t,
+          detray::benchmarks::propagation_opt kOPT =
+              detray::benchmarks::propagation_opt::e_unsync>
+struct host_propagation_bm : public benchmark_base {
+    /// Detector dependent types
+    using algebra_t = typename propagator_t::detector_type::algebra_type;
+    using scalar_t = dscalar<algebra_t>;
+    using vector3_t = dvector3D<algebra_t>;
+
+    /// Local configuration type
+    using configuration = propagation_benchmark_config;
+
+    /// The benchmark configuration
+    configuration m_cfg{};
+
+    /// Default construction
+    host_propagation_bm() = default;
+
+    /// Construct from an externally provided configuration @param cfg
+    explicit host_propagation_bm(const configuration &cfg) : m_cfg{cfg} {}
+
+    /// @return the benchmark configuration
+    configuration &config() { return m_cfg; }
+
+    /// Prepare data and run benchmark loop
+    inline void operator()(::benchmark::State &state,
+                           dvector<free_track_parameters<algebra_t>> *tracks,
+                           const typename propagator_t::detector_type *det,
+                           const bfield_t *bfield,
+                           typename propagator_t::actor_chain_type::state_tuple
+                               *input_actor_states) const {
+        using actor_chain_t = typename propagator_t::actor_chain_type;
+        using actor_states_t = typename actor_chain_t::state_tuple;
+
+        assert(tracks != nullptr);
+        assert(det != nullptr);
+        assert(bfield != nullptr);
+        assert(input_actor_states != nullptr);
+
+        const int n_samples{m_cfg.benchmark().n_samples()};
+        const int n_warmup{m_cfg.benchmark().n_warmup()};
+
+        assert(static_cast<std::size_t>(n_samples) <= tracks->size());
+
+        // Create propagator
+        propagator_t p{m_cfg.propagation()};
+
+        // Call the host propagation
+        auto run_propagation = [&p, det, bfield, input_actor_states](
+                                   free_track_parameters<algebra_t> &track) {
+            // Fresh copy of actor states
+            actor_states_t actor_states(*input_actor_states);
+            // Tuple of references to pass to the propagator
+            typename actor_chain_t::state actor_state_refs =
+                actor_chain_t::setup_actor_states(actor_states);
+
+            typename propagator_t::state p_state(track, *bfield, *det);
+            // Particle hypothesis
+            auto &ptc = p_state._stepping.particle_hypothesis();
+            p_state.set_particle(update_particle_hypothesis(ptc, track));
+
+            // Run propagation
+            if constexpr (kOPT ==
+                          detray::benchmarks::propagation_opt::e_unsync) {
+                ::benchmark::DoNotOptimize(
+                    p.propagate(p_state, actor_state_refs));
+            } else if constexpr (kOPT ==
+                                 detray::benchmarks::propagation_opt::e_sync) {
+                ::benchmark::DoNotOptimize(
+                    p.propagate_sync(p_state, actor_state_refs));
+            }
+        };
+
+        // Warm-up
+        if (m_cfg.benchmark().do_warmup()) {
+            assert(n_warmup > 0);
+            auto stride{n_samples / n_warmup};
+            stride = (stride == 0) ? 10 : stride;
+            assert(stride > 0);
+
+#pragma omp parallel for schedule(dynamic)
+            for (int i = 0; i < n_samples; i += stride) {
+                run_propagation((*tracks)[static_cast<std::size_t>(i)]);
+            }
+        }
+
+        // Run the benchmark
+        std::size_t total_tracks = 0u;
+        for (auto _ : state) {
+#pragma omp parallel for schedule(dynamic)
+            for (int i = 0; i < n_samples; ++i) {
+                run_propagation((*tracks)[static_cast<std::size_t>(i)]);
+            }
+            total_tracks += static_cast<std::size_t>(n_samples);
+        }
+        // Report throughput
+        state.counters["TracksPropagated"] = benchmark::Counter(
+            static_cast<double>(total_tracks), benchmark::Counter::kIsRate);
+    }
+};
+
+}  // namespace detray::benchmarks
diff --git a/tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt
new file mode 100644
index 000000000..71bce8dfa
--- /dev/null
+++ b/tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Detray library, part of the ACTS project (R&D line)
+#
+# (c) 2024 CERN for the benefit of the ACTS project
+#
+# Mozilla Public License Version 2.0
+
+if(DETRAY_BUILD_CUDA)
+    add_subdirectory(cuda)
+endif()
diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt
new file mode 100644
index 000000000..9548b0dd7
--- /dev/null
+++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Detray library, part of the ACTS project (R&D line)
+#
+# (c) 2024 CERN for the benefit of the ACTS project
+#
+# Mozilla Public License Version 2.0
+
+# C++17 support for CUDA requires CMake 3.18.
+cmake_minimum_required(VERSION 3.18)
+
+# Enable CUDA as a language.
+enable_language(CUDA)
+
+# Set the CUDA build flags.
+include(detray-compiler-options-cuda)
+
+# Set up a benchamrk library for CUDA
+add_library(
+    detray_benchmark_cuda
+    STATIC
+    "propagation_benchmark.hpp"
+    "propagation_benchmark.cu"
+)
+
+add_library(detray::benchmark_cuda ALIAS detray_benchmark_cuda)
+
+target_link_libraries(
+    detray_benchmark_cuda
+    PUBLIC detray::benchmarks detray::core_array vecmem::cuda
+)
diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu
new file mode 100644
index 000000000..fd993bbdb
--- /dev/null
+++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu
@@ -0,0 +1,151 @@
+/** Detray library, part of the ACTS project (R&D line)
+ *
+ * (c) 2022-2024 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+#include "detray/benchmarks/device/cuda/propagation_benchmark.hpp"
+#include "detray/core/detector_metadata.hpp"
+#include "detray/definitions/detail/cuda_definitions.hpp"
+#include "detray/detectors/toy_metadata.hpp"
+
+namespace detray::benchmarks {
+
+template <typename propagator_t, detray::benchmarks::propagation_opt kOPT>
+__global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel(
+    propagation::config cfg,
+    typename propagator_t::detector_type::view_type det_view,
+    typename propagator_t::stepper_type::magnetic_field_type field_view,
+    typename propagator_t::actor_chain_type::state_tuple
+        *device_actor_state_ptr,
+    vecmem::data::vector_view<
+        free_track_parameters<typename propagator_t::algebra_type>>
+        tracks_view) {
+
+    using detector_device_t =
+        detector<typename propagator_t::detector_type::metadata,
+                 device_container_types>;
+    using algebra_t = typename detector_device_t::algebra_type;
+    using actor_chain_t = typename propagator_t::actor_chain_type;
+    using propagator_device_t =
+        propagator<typename propagator_t::stepper_type,
+                   navigator<detector_device_t>, actor_chain_t>;
+
+    detector_device_t det(det_view);
+    vecmem::device_vector<free_track_parameters<algebra_t>> tracks(tracks_view);
+
+    int gid = threadIdx.x + blockIdx.x * blockDim.x;
+    if (gid >= tracks.size()) {
+        return;
+    }
+
+    // Create propagator
+    propagator_device_t p{cfg};
+
+    // Create the actor states on a fresh copy
+    typename actor_chain_t::state_tuple actor_states = *device_actor_state_ptr;
+    auto actor_state_refs = actor_chain_t::setup_actor_states(actor_states);
+
+    // Create the propagator state
+    typename propagator_device_t::state p_state(tracks.at(gid), field_view,
+                                                det);
+
+    // Particle hypothesis
+    auto &ptc = p_state._stepping.particle_hypothesis();
+    p_state.set_particle(update_particle_hypothesis(ptc, tracks.at(gid)));
+
+    // Run propagation
+    if constexpr (kOPT == detray::benchmarks::propagation_opt::e_unsync) {
+        p.propagate(p_state, actor_state_refs);
+    } else if constexpr (kOPT == detray::benchmarks::propagation_opt::e_sync) {
+        p.propagate_sync(p_state, actor_state_refs);
+    }
+}
+
+template <typename propagator_t>
+typename propagator_t::actor_chain_type::state_tuple *setup_actor_states(
+    typename propagator_t::actor_chain_type::state_tuple *input_actor_states) {
+
+    // Copy the actor state blueprint to the device
+    using actor_state_t = typename propagator_t::actor_chain_type::state_tuple;
+    actor_state_t *device_actor_state_ptr{nullptr};
+
+    cudaError_t success =
+        cudaMalloc((void **)&device_actor_state_ptr, sizeof(actor_state_t));
+    assert(success == cudaSuccess);
+
+    success = cudaMemcpy(device_actor_state_ptr, input_actor_states,
+                         sizeof(actor_state_t), cudaMemcpyHostToDevice);
+    assert(success == cudaSuccess);
+
+    return device_actor_state_ptr;
+}
+
+template <typename propagator_t>
+void release_actor_states(typename propagator_t::actor_chain_type::state_tuple
+                              *device_actor_state_ptr) {
+    [[maybe_unused]] cudaError_t success = cudaFree(device_actor_state_ptr);
+    assert(success == cudaSuccess);
+}
+
+template <typename propagator_t, detray::benchmarks::propagation_opt kOPT>
+void run_propagation_kernel(
+    const propagation::config &cfg,
+    typename propagator_t::detector_type::view_type det_view,
+    typename propagator_t::stepper_type::magnetic_field_type field_view,
+    typename propagator_t::actor_chain_type::state_tuple
+        *device_actor_state_ptr,
+    vecmem::data::vector_view<
+        free_track_parameters<typename propagator_t::algebra_type>>
+        tracks_view,
+    const int n_samples) {
+
+    constexpr int thread_dim = 256;
+    int block_dim = (n_samples + thread_dim - 1) / thread_dim;
+
+    // run the test kernel
+    propagator_benchmark_kernel<propagator_t, kOPT><<<block_dim, thread_dim>>>(
+        cfg, det_view, field_view, device_actor_state_ptr, tracks_view);
+
+    // cuda error check
+    DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
+    DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
+}
+
+/// Macro declaring the template instantiations for the different detector types
+#define DECLARE_PROPAGATION_BENCHMARK(METADATA, CHAIN, FIELD, OPT)             \
+                                                                               \
+    template void                                                              \
+    run_propagation_kernel<cuda_propagator_type<METADATA, FIELD, CHAIN>, OPT>( \
+        const propagation::config &, detector<METADATA>::view_type,            \
+        covfie::field_view<FIELD>,                                             \
+        cuda_propagator_type<METADATA, FIELD,                                  \
+                             CHAIN>::actor_chain_type::state_tuple *,          \
+        vecmem::data::vector_view<                                             \
+            free_track_parameters<detector<METADATA>::algebra_type>>,          \
+        const int);                                                            \
+                                                                               \
+    template cuda_propagator_type<METADATA, FIELD,                             \
+                                  CHAIN>::actor_chain_type::state_tuple *      \
+    setup_actor_states<cuda_propagator_type<METADATA, FIELD, CHAIN>>(          \
+        cuda_propagator_type<METADATA, FIELD,                                  \
+                             CHAIN>::actor_chain_type::state_tuple *);         \
+                                                                               \
+    template void                                                              \
+    release_actor_states<cuda_propagator_type<METADATA, FIELD, CHAIN>>(        \
+        cuda_propagator_type<METADATA, FIELD,                                  \
+                             CHAIN>::actor_chain_type::state_tuple *);
+
+DECLARE_PROPAGATION_BENCHMARK(default_metadata, empty_chain,
+                              bfield::const_bknd_t,
+                              detray::benchmarks::propagation_opt::e_unsync)
+DECLARE_PROPAGATION_BENCHMARK(default_metadata, default_chain,
+                              bfield::const_bknd_t,
+                              detray::benchmarks::propagation_opt::e_unsync)
+DECLARE_PROPAGATION_BENCHMARK(toy_metadata, empty_chain, bfield::const_bknd_t,
+                              detray::benchmarks::propagation_opt::e_unsync)
+DECLARE_PROPAGATION_BENCHMARK(toy_metadata, default_chain, bfield::const_bknd_t,
+                              detray::benchmarks::propagation_opt::e_unsync)
+
+}  // namespace detray::benchmarks
diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp
new file mode 100644
index 000000000..9442ea810
--- /dev/null
+++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp
@@ -0,0 +1,179 @@
+/** Detray library, part of the ACTS project (R&D line)
+ *
+ * (c) 2024 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+#pragma once
+
+// Project include(s)
+#include "detray/definitions/detail/algebra.hpp"
+#include "detray/detectors/bfield.hpp"
+#include "detray/navigation/navigator.hpp"
+#include "detray/propagator/actor_chain.hpp"
+#include "detray/propagator/actors/aborters.hpp"
+#include "detray/propagator/actors/parameter_resetter.hpp"
+#include "detray/propagator/actors/parameter_transporter.hpp"
+#include "detray/propagator/actors/pointwise_material_interactor.hpp"
+#include "detray/propagator/propagator.hpp"
+#include "detray/propagator/rk_stepper.hpp"
+#include "detray/tracks/tracks.hpp"
+
+// Detray benchmark include(s)
+#include "detray/benchmarks/benchmark_base.hpp"
+#include "detray/benchmarks/propagation_benchmark_config.hpp"
+#include "detray/benchmarks/propagation_benchmark_utils.hpp"
+
+// Vecmem include(s)
+#include <vecmem/memory/cuda/device_memory_resource.hpp>
+#include <vecmem/memory/host_memory_resource.hpp>
+#include <vecmem/memory/memory_resource.hpp>
+#include <vecmem/utils/cuda/copy.hpp>
+
+// Benchmark include
+#include <benchmark/benchmark.h>
+
+// System include(s)
+#include <algorithm>
+#include <cassert>
+#include <random>
+#include <string>
+
+namespace detray::benchmarks {
+
+// Define propagator type
+template <typename algebra_t>
+using empty_chain = actor_chain<>;
+
+template <typename algebra_t>
+using default_chain = actor_chain<dtuple, parameter_transporter<algebra_t>,
+                                  pointwise_material_interactor<algebra_t>,
+                                  parameter_resetter<algebra_t>>;
+
+template <typename metadata_t, typename bfield_t,
+          template <typename> class actor_chain_t>
+using cuda_propagator_type =
+    propagator<rk_stepper<covfie::field_view<bfield_t>,
+                          typename detector<metadata_t>::algebra_type>,
+               navigator<detector<metadata_t>>,
+               actor_chain_t<typename detector<metadata_t>::algebra_type>>;
+
+/// Launch the propagation kernelfor benchmarking
+///
+/// @param cfg the propagation configuration
+/// @param det_view the detector vecmem view
+/// @param field_data the magentic field view (maybe an empty field)
+/// @param tracks_data the track collection view
+/// @param navigation_cache_view the navigation cache vecemem view
+/// @param opt which propagation to run (sync vs. unsync)
+template <typename propagator_t,
+          detray::benchmarks::propagation_opt kOPT =
+              detray::benchmarks::propagation_opt::e_unsync>
+void run_propagation_kernel(
+    const propagation::config &,
+    typename propagator_t::detector_type::view_type,
+    typename propagator_t::stepper_type::magnetic_field_type,
+    typename propagator_t::actor_chain_type::state_tuple *,
+    vecmem::data::vector_view<
+        free_track_parameters<typename propagator_t::algebra_type>>,
+    const int);
+
+/// Allocate actor state blueprint on device
+template <typename propagator_t>
+typename propagator_t::actor_chain_type::state_tuple *setup_actor_states(
+    typename propagator_t::actor_chain_type::state_tuple *);
+
+/// Release actor state blueprint
+template <typename propagator_t>
+void release_actor_states(
+    typename propagator_t::actor_chain_type::state_tuple *);
+
+/// Device Propagation becnhmark
+template <typename propagator_t, typename bfield_bknd_t,
+          detray::benchmarks::propagation_opt kOPT =
+              detray::benchmarks::propagation_opt::e_unsync>
+struct cuda_propagation_bm : public benchmark_base {
+    /// Detector dependent types
+    using algebra_t = typename propagator_t::detector_type::algebra_type;
+    using scalar_t = dscalar<algebra_t>;
+    using vector3_t = dvector3D<algebra_t>;
+
+    /// Local configuration type
+    using configuration = propagation_benchmark_config;
+
+    /// The benchmark configuration
+    configuration m_cfg{};
+
+    /// Default construction
+    cuda_propagation_bm() = default;
+
+    /// Construct from an externally provided configuration @param cfg
+    explicit cuda_propagation_bm(const configuration &cfg) : m_cfg{cfg} {}
+
+    /// @return the benchmark configuration
+    configuration &config() { return m_cfg; }
+
+    /// Prepare data and run benchmark loop
+    inline void operator()(::benchmark::State &state,
+                           vecmem::memory_resource *dev_mr,
+                           dvector<free_track_parameters<algebra_t>> *tracks,
+                           const typename propagator_t::detector_type *det,
+                           const bfield_bknd_t *bfield,
+                           typename propagator_t::actor_chain_type::state_tuple
+                               *input_actor_states) const {
+
+        assert(dev_mr != nullptr);
+        assert(tracks != nullptr);
+        assert(det != nullptr);
+        assert(bfield != nullptr);
+        assert(input_actor_states != nullptr);
+
+        // Helper object for performing memory copies (to CUDA devices)
+        vecmem::cuda::copy cuda_cpy;
+
+        const int n_samples{m_cfg.benchmark().n_samples()};
+        const int n_warmup{m_cfg.benchmark().n_warmup()};
+
+        assert(static_cast<std::size_t>(n_samples) <= tracks->size());
+
+        // Copy the track collection to device
+        auto track_buffer =
+            detray::get_buffer(vecmem::get_data(*tracks), *dev_mr, cuda_cpy);
+
+        // Copy the detector to device and get its view
+        auto det_buffer = detray::get_buffer(*det, *dev_mr, cuda_cpy);
+        auto det_view = detray::get_data(det_buffer);
+
+        // Copy blueprint actor states to device
+        auto *device_actor_state_ptr =
+            setup_actor_states<propagator_t>(input_actor_states);
+
+        // Do a small warm up run
+        {
+            auto warmup_track_buffer = detray::get_buffer(
+                vecmem::get_data(*tracks), *dev_mr, cuda_cpy);
+            run_propagation_kernel<propagator_t, kOPT>(
+                m_cfg.propagation(), det_view, *bfield, device_actor_state_ptr,
+                warmup_track_buffer, math::min(n_warmup, n_samples));
+        }
+
+        std::size_t total_tracks = 0u;
+        for (auto _ : state) {
+            // Launch the propagator test for GPU device
+            run_propagation_kernel<propagator_t, kOPT>(
+                m_cfg.propagation(), det_view, *bfield, device_actor_state_ptr,
+                track_buffer, n_samples);
+
+            total_tracks += static_cast<std::size_t>(n_samples);
+        }
+
+        // Report throughput
+        state.counters["TracksPropagated"] = benchmark::Counter(
+            static_cast<double>(total_tracks), benchmark::Counter::kIsRate);
+
+        release_actor_states<propagator_t>(device_actor_state_ptr);
+    }
+};
+
+}  // namespace detray::benchmarks
diff --git a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp
new file mode 100644
index 000000000..32dbcc617
--- /dev/null
+++ b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp
@@ -0,0 +1,57 @@
+/** Detray library, part of the ACTS project (R&D line)
+ *
+ * (c) 2024 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+#pragma once
+
+// Project include(s)
+#include "detray/benchmarks/benchmark_base.hpp"
+#include "detray/propagator/propagation_config.hpp"
+
+// System include(s)
+#include <string>
+#include <string_view>
+
+namespace detray::benchmarks {
+
+/// Configuration for propagation benchmarks
+struct propagation_benchmark_config {
+    /// Prefix for the benchmark name
+    std::string m_name{"BM_PROPAGATION"};
+    /// Benchmark configuration
+    benchmark_base::configuration m_benchmark{};
+    /// Propagation configuration
+    propagation::config m_propagation{};
+
+    /// Default construciton
+    propagation_benchmark_config() = default;
+
+    /// Construct from a base configuration
+    explicit propagation_benchmark_config(
+        const benchmark_base::configuration& bench_cfg)
+        : m_benchmark(bench_cfg) {}
+
+    /// Getters
+    /// @{
+    const std::string& name() const { return m_name; }
+    const propagation::config& propagation() const { return m_propagation; }
+    propagation::config& propagation() { return m_propagation; }
+    const benchmark_base::configuration& benchmark() const {
+        return m_benchmark;
+    }
+    benchmark_base::configuration& benchmark() { return m_benchmark; }
+    /// @}
+
+    /// Setters
+    /// @{
+    propagation_benchmark_config& name(const std::string_view n) {
+        m_name = n;
+        return *this;
+    }
+    /// @}
+};
+
+}  // namespace detray::benchmarks
diff --git a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp
new file mode 100644
index 000000000..89bbb6bb2
--- /dev/null
+++ b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp
@@ -0,0 +1,223 @@
+/** Detray library, part of the ACTS project (R&D line)
+ *
+ * (c) 2024 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+#pragma once
+
+// Project include(s)
+#include "detray/definitions/detail/algebra.hpp"
+#include "detray/navigation/navigator.hpp"
+#include "detray/propagator/actor_chain.hpp"
+#include "detray/propagator/propagator.hpp"
+#include "detray/tracks/tracks.hpp"
+#include "detray/utils/tuple.hpp"
+
+// Vecmem include(s)
+#include <vecmem/memory/memory_resource.hpp>
+
+// Benchmark include
+#include <benchmark/benchmark.h>
+
+// System include(s)
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace detray::benchmarks {
+
+/// Which propagate function to run
+enum class propagation_opt {
+    e_unsync = 0,
+    e_sync = 1,
+};
+
+/// @returns the default track generation configuration for detray benchmarks
+template <typename track_generator_t>
+inline typename track_generator_t::configuration get_default_trk_gen_config(
+    const std::vector<int> &n_tracks) {
+
+    using track_t = typename track_generator_t::track_type;
+    using scalar_t = dscalar<typename track_t::algebra_type>;
+
+    int n_trks{*std::ranges::max_element(n_tracks)};
+
+    // Generate tracks
+    typename track_generator_t::configuration trk_cfg{};
+    trk_cfg.n_tracks(static_cast<std::size_t>(n_trks));
+    trk_cfg.randomize_charge(true);
+    trk_cfg.phi_range(-constant<scalar_t>::pi, constant<scalar_t>::pi);
+    trk_cfg.eta_range(-3.f, 3.f);
+    trk_cfg.mom_range(1.f * unit<scalar_t>::GeV, 100.f * unit<scalar_t>::GeV);
+    trk_cfg.origin({0.f, 0.f, 0.f});
+    trk_cfg.origin_stddev({0.f * unit<scalar_t>::mm, 0.f * unit<scalar_t>::mm,
+                           0.f * unit<scalar_t>::mm});
+
+    return trk_cfg;
+}
+
+/// Precompute the tracks
+///
+/// @param mr memory resource to allocate the track vector
+/// @param cfg the configuration of the track generator
+/// @param do_sort sort the tracks by theta angle
+template <typename track_generator_t>
+inline auto generate_tracks(
+    vecmem::memory_resource *mr,
+    const typename track_generator_t::configuration &cfg = {},
+    bool do_sort = true) {
+
+    using track_t = typename track_generator_t::track_type;
+    using scalar_t = dscalar<typename track_t::algebra_type>;
+
+    // Track collection
+    dvector<track_t> tracks(mr);
+
+    // Iterate through uniformly distributed momentum directions
+    for (auto track : track_generator_t{cfg}) {
+        // Put it into vector of trajectories
+        tracks.push_back(track);
+    }
+
+    if (do_sort) {
+        // Sort by theta angle
+        const auto traj_comp = [](const auto &lhs, const auto &rhs) {
+            constexpr auto pi_2{constant<scalar_t>::pi_2};
+            return math::fabs(pi_2 - vector::theta(lhs.dir())) <
+                   math::fabs(pi_2 - vector::theta(rhs.dir()));
+        };
+
+        std::ranges::sort(tracks, traj_comp);
+    }
+
+    return tracks;
+}
+
+/// Generate as many samples of track states as there are entries in the
+/// @param n_tracks vector.
+template <typename track_generator_t>
+inline auto generate_track_samples(
+    vecmem::memory_resource *mr, const std::vector<int> &n_tracks,
+    typename track_generator_t::configuration &cfg = {}, bool do_sort = true) {
+
+    using track_t = typename track_generator_t::track_type;
+
+    std::vector<dvector<track_t>> track_samples{};
+    track_samples.reserve(n_tracks.size());
+
+    auto tmp_cfg{cfg};
+    for (const int n : n_tracks) {
+        tmp_cfg.n_tracks(static_cast<std::size_t>(n));
+        track_samples.push_back(
+            generate_tracks<track_generator_t>(mr, tmp_cfg, do_sort));
+    }
+
+    return track_samples;
+}
+
+/// Register a propagation benchmark of type @tparam benchmark_t
+///
+/// @tparam benchmark_t the propagation benchmark functor
+/// @tparam propagator_t full propagator type
+/// @tparam detector_t host detector type
+/// @tparam bfield_t covfie magnetic field type
+///
+/// @param name name for the benchmark
+/// @param bench_cfg basic benchmark configuration
+/// @param prop_cfg propagation configuration
+/// @param det the detector
+/// @param bfield the covfie field
+/// @param actor_states tuple that contains all actor states (same order as in
+///                     actor_chain_t)
+/// @param tracks the pre-computed test tracks
+/// @param n_samples the number of track to run
+template <template <typename, typename, detray::benchmarks::propagation_opt>
+          class benchmark_t,
+          typename propagator_t, typename detector_t, typename bfield_bknd_t,
+          detray::benchmarks::propagation_opt kOPT =
+              detray::benchmarks::propagation_opt::e_unsync>
+inline void register_benchmark(
+    const std::string &name, benchmark_base::configuration &bench_cfg,
+    propagation::config &prop_cfg, const detector_t &det, bfield_bknd_t &bfield,
+    typename propagator_t::actor_chain_type::state_tuple *actor_states,
+    std::vector<
+        dvector<free_track_parameters<typename detector_t::algebra_type>>>
+        &track_samples,
+    const std::vector<int> &n_samples = {10000},
+    vecmem::memory_resource *dev_mr = nullptr) {
+
+    using algebra_t = typename detector_t::algebra_type;
+    using propagation_benchmark_t =
+        benchmark_t<propagator_t, bfield_bknd_t, kOPT>;
+
+    assert(track_samples.size() == n_samples.size());
+
+    for (const auto [i, n] : detray::views::enumerate(n_samples)) {
+
+        auto &tracks = track_samples[i];
+        assert(static_cast<std::size_t>(n) <= tracks.size());
+
+        bench_cfg.n_samples(n);
+
+        typename propagation_benchmark_t::configuration prop_bm_cfg{bench_cfg};
+        prop_bm_cfg.propagation() = prop_cfg;
+
+        // Configure the benchmark
+        propagation_benchmark_t prop_benchmark{prop_bm_cfg};
+
+        std::string bench_name = prop_benchmark.config().name() + "_" + name +
+                                 "_" + std::to_string(n) + "_TRACKS";
+
+        std::cout << bench_name << "\n" << bench_cfg;
+
+        // Cpu benchmark
+        if constexpr (std::is_invocable_v<
+                          decltype(prop_benchmark), ::benchmark::State &,
+                          dvector<free_track_parameters<algebra_t>> *,
+                          const detector_t *, const bfield_bknd_t *,
+                          typename propagator_t::actor_chain_type::state_tuple
+                              *>) {
+            ::benchmark::RegisterBenchmark(bench_name.c_str(), prop_benchmark,
+                                           &tracks, &det, &bfield,
+                                           actor_states);
+            //->MeasureProcessCPUTime();
+        } else {
+
+            ::benchmark::RegisterBenchmark(bench_name.c_str(), prop_benchmark,
+                                           dev_mr, &tracks, &det, &bfield,
+                                           actor_states);
+            //->MeasureProcessCPUTime();
+        }
+    }
+}
+
+/// Register a propagation benchmark of type @tparam benchmark_t
+///
+/// @tparam benchmark_t the propagation benchmark functor
+/// @tparam stepper_t the stepper to use fro track parameter transport
+/// @tparam actor_chain_t types of actors
+template <template <typename, typename, detray::benchmarks::propagation_opt>
+          class benchmark_t,
+          typename stepper_t, typename actor_chain_t, typename detector_t,
+          typename bfield_bknd_t,
+          detray::benchmarks::propagation_opt kOPT =
+              detray::benchmarks::propagation_opt::e_unsync>
+inline void register_benchmark(
+    const std::string &name, benchmark_base::configuration &bench_cfg,
+    propagation::config &prop_cfg, const detector_t &det, bfield_bknd_t &bfield,
+    typename actor_chain_t::state_tuple *actor_states,
+    std::vector<
+        dvector<free_track_parameters<typename detector_t::algebra_type>>>
+        &tracks,
+    const std::vector<int> &n_samples = {10000}) {
+
+    using propagator_t =
+        propagator<stepper_t, navigator<detector_t>, actor_chain_t>;
+    register_benchmark<benchmark_t, propagator_t, detector_t, bfield_bknd_t,
+                       kOPT>(name, bench_cfg, prop_cfg, det, bfield,
+                             actor_states, tracks, n_samples, nullptr);
+}
+
+}  // namespace detray::benchmarks
diff --git a/tests/include/detray/test/common/detector_scan_config.hpp b/tests/include/detray/test/common/detector_scan_config.hpp
index f471133c1..b184d7b9b 100644
--- a/tests/include/detray/test/common/detector_scan_config.hpp
+++ b/tests/include/detray/test/common/detector_scan_config.hpp
@@ -26,8 +26,8 @@ namespace detray::test {
 template <typename track_generator_t>
 struct detector_scan_config : public test::fixture_base<>::configuration {
     using base_type = test::fixture_base<>;
-    using scalar_type = typename base_type::scalar;
-    using vector3_type = typename base_type::vector3;
+    using scalar_type = typename base_type::scalar_type;
+    using vector3_type = typename base_type::vector3_type;
     using trk_gen_config_t = typename track_generator_t::configuration;
 
     /// Name of the test
diff --git a/tests/include/detray/test/common/fixture_base.hpp b/tests/include/detray/test/common/fixture_base.hpp
index 2550bf1fc..33a861b99 100644
--- a/tests/include/detray/test/common/fixture_base.hpp
+++ b/tests/include/detray/test/common/fixture_base.hpp
@@ -31,12 +31,12 @@ class fixture_base : public scope {
     public:
     /// Linear algebra typedefs
     /// @{
-    using algebra_t = ALGEBRA_PLUGIN<test::scalar>;
-    using scalar = detray::scalar;
-    using point2 = test::point2;
-    using point3 = test::point3;
-    using vector3 = test::vector3;
-    using transform3 = test::transform3;
+    using algebra_type = ALGEBRA_PLUGIN<test::scalar>;
+    using scalar_type = dscalar<algebra_type>;
+    using point2_type = dpoint2D<algebra_type>;
+    using point3_type = dpoint3D<algebra_type>;
+    using vector3_type = dvector3D<algebra_type>;
+    using transform3_type = dtransform3D<algebra_type>;
     /// @}
 
     /// Local configuration type
diff --git a/tests/include/detray/test/common/material_validation_config.hpp b/tests/include/detray/test/common/material_validation_config.hpp
index 14ea10262..e14525263 100644
--- a/tests/include/detray/test/common/material_validation_config.hpp
+++ b/tests/include/detray/test/common/material_validation_config.hpp
@@ -25,8 +25,8 @@ namespace detray::test {
 /// @brief Configuration for a detector scan test.
 struct material_validation_config : public test::fixture_base<>::configuration {
     using base_type = test::fixture_base<>;
-    using scalar_type = typename base_type::scalar;
-    using vector3_type = typename base_type::vector3;
+    using scalar_type = typename base_type::scalar_type;
+    using vector3_type = typename base_type::vector3_type;
 
     /// Name of the test
     std::string m_name{"material_validation"};
diff --git a/tests/include/detray/test/common/navigation_validation_config.hpp b/tests/include/detray/test/common/navigation_validation_config.hpp
index b76f89ce0..c18f12923 100644
--- a/tests/include/detray/test/common/navigation_validation_config.hpp
+++ b/tests/include/detray/test/common/navigation_validation_config.hpp
@@ -26,8 +26,8 @@ namespace detray::test {
 struct navigation_validation_config
     : public test::fixture_base<>::configuration {
     using base_type = test::fixture_base<>;
-    using scalar_type = typename base_type::scalar;
-    using vector3_type = typename base_type::vector3;
+    using scalar_type = typename base_type::scalar_type;
+    using vector3_type = typename base_type::vector3_type;
 
     /// Name of the test
     std::string m_name{"navigation_validation"};
diff --git a/tests/integration_tests/cpu/propagator/propagator.cpp b/tests/integration_tests/cpu/propagator/propagator.cpp
index 206c03535..999f4c56b 100644
--- a/tests/integration_tests/cpu/propagator/propagator.cpp
+++ b/tests/integration_tests/cpu/propagator/propagator.cpp
@@ -265,19 +265,19 @@ TEST_P(PropagatorWithRkStepper, rk4_propagator_const_bfield) {
 
         // Propagate the entire detector
         ASSERT_TRUE(
-            p.propagate(state, actor_chain_t::make_ref_tuple(actor_states)))
+            p.propagate(state, actor_chain_t::setup_actor_states(actor_states)))
             //<< state.debug_stream.str() << std::endl;
             << state._navigation.inspector().to_string() << std::endl;
 
         // Test propagate sync method
         ASSERT_TRUE(p.propagate_sync(
-            sync_state, actor_chain_t::make_ref_tuple(actor_states_sync)))
+            sync_state, actor_chain_t::setup_actor_states(actor_states_sync)))
             //<< state.debug_stream.str() << std::endl;
             << sync_state._navigation.inspector().to_string() << std::endl;
 
         // Propagate with path limit
         ASSERT_FALSE(p.propagate(
-            lim_state, actor_chain_t::make_ref_tuple(actor_states_lim)))
+            lim_state, actor_chain_t::setup_actor_states(actor_states_lim)))
             //<< lim_state.debug_stream.str() << std::endl;
             << lim_state._navigation.inspector().to_string() << std::endl;