Skip to content

Commit

Permalink
add an optimization to AoSoA that simplifies the mapping when used to…
Browse files Browse the repository at this point in the history
…gether with Vc
  • Loading branch information
bernhardmgruber committed Nov 17, 2020
1 parent fa91cc2 commit 99e0030
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 17 deletions.
20 changes: 14 additions & 6 deletions examples/alpaka/nbody_vc/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,16 @@ inline constexpr auto canUseVcWithMapping = false;
template <typename ArrayDomain, typename DatumDomain, typename Linearize, std::size_t Elems>
inline constexpr auto canUseVcWithMapping<llama::mapping::SoA<ArrayDomain, DatumDomain, Linearize>, Elems> = true;

template <typename ArrayDomain, typename DatumDomain, std::size_t Lanes, typename Linearize, std::size_t Elems>
inline constexpr auto
canUseVcWithMapping<llama::mapping::AoSoA<ArrayDomain, DatumDomain, Lanes, Linearize>, Elems> = Lanes
>= Elems&& Lanes % Elems
== 0;
template <
typename ArrayDomain,
typename DatumDomain,
std::size_t Lanes,
bool LaneAccessOnly,
typename Linearize,
std::size_t Elems>
inline constexpr auto canUseVcWithMapping<
llama::mapping::AoSoA<ArrayDomain, DatumDomain, Lanes, LaneAccessOnly, Linearize>,
Elems> = Lanes >= Elems&& Lanes % Elems == 0;

template <std::size_t Elems>
struct VecType
Expand Down Expand Up @@ -143,6 +148,8 @@ struct UpdateKernel
"UpdateKernel only works with compatible mappings like SoA or AoSoAs");

auto sharedView = [&] {
// TODO: we could optimize here, since pPInteraction only needs position and mass, no velocity. the mapping
// could discard these properties
const auto sharedMapping
= llama::mapping::SoA(typename View::ArrayDomain{BlockSize}, typename View::DatumDomain{});

Expand Down Expand Up @@ -244,7 +251,8 @@ int main()
if constexpr (MAPPING == 2)
return llama::mapping::SoA{arrayDomain, Particle{}, std::true_type{}};
if constexpr (MAPPING == 3)
return llama::mapping::AoSoA<std::decay_t<decltype(arrayDomain)>, Particle, aosoaLanes>{arrayDomain};
return llama::mapping::
AoSoA<std::decay_t<decltype(arrayDomain)>, Particle, aosoaLanes, aosoaLanes == elements>{arrayDomain};
if constexpr (MAPPING == 4)
return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Particle{}};
if constexpr (MAPPING == 5)
Expand Down
26 changes: 15 additions & 11 deletions include/llama/mapping/AoSoA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@

namespace llama::mapping
{
/// Array of struct of arrays mapping. Used to create a \ref View via \ref
/// allocView.
/// \tparam Lanes The size of the inner arrays of this array of struct of
/// arrays.
/// \tparam LinearizeArrayDomainFunctor Defines how the
/// user domain should be mapped into linear numbers and how big the linear
/// domain gets.
/// Array of struct of arrays mapping. Used to create a \ref View via \ref allocView.
/// \tparam Lanes The size of the inner arrays of this array of struct of arrays.
/// \tparam LaneAccessOnly If true, the mapping will return the same reference to the beginning of a lane and for
/// all elements mapped into the same lane. This simplifies the mapping function in case the AoSoA mapping is used
/// in conjunction with a vectorization library.
/// \tparam LinearizeArrayDomainFunctor Defines how the user domain should be mapped into linear numbers and how big
/// the linear domain gets.
template <
typename T_ArrayDomain,
typename T_DatumDomain,
std::size_t Lanes,
bool LaneAccessOnly = false,
typename LinearizeArrayDomainFunctor = LinearizeArrayDomainCpp>
struct AoSoA
{
Expand All @@ -41,10 +42,13 @@ namespace llama::mapping
{
const auto flatArrayIndex = LinearizeArrayDomainFunctor{}(coord, arrayDomainSize);
const auto blockIndex = flatArrayIndex / Lanes;
const auto laneIndex = flatArrayIndex % Lanes;
const auto offset = (sizeOf<DatumDomain> * Lanes) * blockIndex
+ offsetOf<DatumDomain, DatumCoord<DatumDomainCoord...>> * Lanes
+ sizeof(GetType<DatumDomain, DatumCoord<DatumDomainCoord...>>) * laneIndex;
auto offset = (sizeOf<DatumDomain> * Lanes) * blockIndex
+ offsetOf<DatumDomain, DatumCoord<DatumDomainCoord...>> * Lanes;
if constexpr (!LaneAccessOnly)
{
const auto laneIndex = flatArrayIndex % Lanes;
offset += sizeof(GetType<DatumDomain, DatumCoord<DatumDomainCoord...>>) * laneIndex;
}
return {0, offset};
}

Expand Down

0 comments on commit 99e0030

Please sign in to comment.