diff --git a/examples/alpaka/nbody_vc/nbody.cpp b/examples/alpaka/nbody_vc/nbody.cpp index 1d8a84105c..38adefc3c1 100644 --- a/examples/alpaka/nbody_vc/nbody.cpp +++ b/examples/alpaka/nbody_vc/nbody.cpp @@ -109,11 +109,16 @@ inline constexpr auto canUseVcWithMapping = false; template inline constexpr auto canUseVcWithMapping, Elems> = true; -template -inline constexpr auto - canUseVcWithMapping, Elems> = Lanes - >= Elems&& Lanes % Elems - == 0; +template < + typename ArrayDomain, + typename DatumDomain, + std::size_t Lanes, + bool LaneAccessOnly, + typename Linearize, + std::size_t Elems> +inline constexpr auto canUseVcWithMapping< + llama::mapping::AoSoA, + Elems> = Lanes >= Elems&& Lanes % Elems == 0; template struct VecType @@ -143,6 +148,8 @@ struct UpdateKernel "UpdateKernel only works with compatible mappings like SoA or AoSoAs"); auto sharedView = [&] { + // TODO: we could optimize here, since pPInteraction only needs position and mass, no velocity. the mapping + // could discard these properties const auto sharedMapping = llama::mapping::SoA(typename View::ArrayDomain{BlockSize}, typename View::DatumDomain{}); @@ -244,7 +251,8 @@ int main() if constexpr (MAPPING == 2) return llama::mapping::SoA{arrayDomain, Particle{}, std::true_type{}}; if constexpr (MAPPING == 3) - return llama::mapping::AoSoA, Particle, aosoaLanes>{arrayDomain}; + return llama::mapping:: + AoSoA, Particle, aosoaLanes, aosoaLanes == elements>{arrayDomain}; if constexpr (MAPPING == 4) return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Particle{}}; if constexpr (MAPPING == 5) diff --git a/include/llama/mapping/AoSoA.hpp b/include/llama/mapping/AoSoA.hpp index a7a7e6cb99..25a4ff1fc9 100644 --- a/include/llama/mapping/AoSoA.hpp +++ b/include/llama/mapping/AoSoA.hpp @@ -6,17 +6,18 @@ namespace llama::mapping { - /// Array of struct of arrays mapping. Used to create a \ref View via \ref - /// allocView. - /// \tparam Lanes The size of the inner arrays of this array of struct of - /// arrays. - /// \tparam LinearizeArrayDomainFunctor Defines how the - /// user domain should be mapped into linear numbers and how big the linear - /// domain gets. + /// Array of struct of arrays mapping. Used to create a \ref View via \ref allocView. + /// \tparam Lanes The size of the inner arrays of this array of struct of arrays. + /// \tparam LaneAccessOnly If true, the mapping will return the same reference to the beginning of a lane and for + /// all elements mapped into the same lane. This simplifies the mapping function in case the AoSoA mapping is used + /// in conjunction with a vectorization library. + /// \tparam LinearizeArrayDomainFunctor Defines how the user domain should be mapped into linear numbers and how big + /// the linear domain gets. template < typename T_ArrayDomain, typename T_DatumDomain, std::size_t Lanes, + bool LaneAccessOnly = false, typename LinearizeArrayDomainFunctor = LinearizeArrayDomainCpp> struct AoSoA { @@ -41,10 +42,13 @@ namespace llama::mapping { const auto flatArrayIndex = LinearizeArrayDomainFunctor{}(coord, arrayDomainSize); const auto blockIndex = flatArrayIndex / Lanes; - const auto laneIndex = flatArrayIndex % Lanes; - const auto offset = (sizeOf * Lanes) * blockIndex - + offsetOf> * Lanes - + sizeof(GetType>) * laneIndex; + auto offset = (sizeOf * Lanes) * blockIndex + + offsetOf> * Lanes; + if constexpr (!LaneAccessOnly) + { + const auto laneIndex = flatArrayIndex % Lanes; + offset += sizeof(GetType>) * laneIndex; + } return {0, offset}; }