Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable multiple blobs for SoA mapping #111

Merged
merged 8 commits into from
Oct 12, 2020
6 changes: 4 additions & 2 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <random>
#include <utility>

constexpr auto MAPPING = 0; /// 0 native AoS, 1 native SoA, 2 tree AoS, 3 tree SoA
constexpr auto MAPPING = 0; ///< 0 native AoS, 1 native SoA, 2 native SoA (separate blobs), 3 tree AoS, 4 tree SoA
constexpr auto USE_SHARED = true; ///< defines whether shared memory shall be used
constexpr auto USE_SHARED_TREE = true; ///< defines whether the shared memory shall use tree mapping or
///< native mapping
Expand Down Expand Up @@ -187,8 +187,10 @@ int main(int argc, char** argv)
if constexpr (MAPPING == 1)
return llama::mapping::SoA{arrayDomain, Particle{}};
if constexpr (MAPPING == 2)
return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Particle{}};
return llama::mapping::SoA{arrayDomain, Particle{}, std::true_type{}};
if constexpr (MAPPING == 3)
return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Particle{}};
if constexpr (MAPPING == 4)
return llama::mapping::tree::Mapping{
arrayDomain,
llama::Tuple{llama::mapping::tree::functor::LeafOnlyRT()},
Expand Down
7 changes: 5 additions & 2 deletions examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
#include <random>
#include <utility>

constexpr auto MAPPING = 0; /// 0 native AoS, 1 native SoA, 2 tree AoS, 3 tree SoA
constexpr auto MAPPING
= 1; ///< 0 native AoS, 1 native SoA, 2 native SoA (separate blobs, does not work yet), 3 tree AoS, 4 tree SoA
constexpr auto PROBLEM_SIZE = 64 * 1024 * 1024;
constexpr auto BLOCK_SIZE = 256;
constexpr auto STEPS = 10;
Expand Down Expand Up @@ -86,8 +87,10 @@ int main(int argc, char** argv)
if constexpr (MAPPING == 1)
return llama::mapping::SoA{arrayDomain, Vector{}};
if constexpr (MAPPING == 2)
return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Vector{}};
return llama::mapping::SoA{arrayDomain, Vector{}, std::true_type{}};
if constexpr (MAPPING == 3)
return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Vector{}};
if constexpr (MAPPING == 4)
return llama::mapping::tree::Mapping{
arrayDomain,
llama::Tuple{llama::mapping::tree::functor::LeafOnlyRT()},
Expand Down
4 changes: 2 additions & 2 deletions examples/nbody/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(${PROJECT_NAME} PRIVATE
-fno-math-errno # sqrt prevents vectorization otherwise
# -march=native
# -ffast-math
-march=native
-ffast-math
)
endif()

Expand Down
6 changes: 4 additions & 2 deletions examples/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

// needs -fno-math-errno, so std::sqrt() can be vectorized

constexpr auto MAPPING = 1; ///< 0 native AoS, 1 native SoA, 2 tree AoS, 3 tree SoA
constexpr auto MAPPING = 2; ///< 0 native AoS, 1 native SoA, 2 native SoA (separate blos), 3 tree AoS, 4 tree SoA
constexpr auto PROBLEM_SIZE = 16 * 1024; ///< total number of particles
constexpr auto STEPS = 5; ///< number of steps to calculate
constexpr auto TRACE = false;
Expand Down Expand Up @@ -86,8 +86,10 @@ namespace usellama
if constexpr (MAPPING == 1)
return llama::mapping::SoA{arrayDomain, Particle{}};
if constexpr (MAPPING == 2)
return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Particle{}};
return llama::mapping::SoA{arrayDomain, Particle{}, std::true_type{}};
if constexpr (MAPPING == 3)
return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Particle{}};
if constexpr (MAPPING == 4)
return llama::mapping::tree::Mapping{
arrayDomain,
llama::Tuple{llama::mapping::tree::functor::LeafOnlyRT()},
Expand Down
6 changes: 4 additions & 2 deletions examples/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <llama/llama.hpp>
#include <utility>

constexpr auto MAPPING = 3; /// 0 native AoS, 1 native SoA, 2 tree AoS, 3 tree SoA
constexpr auto MAPPING = 2; ///< 0 native AoS, 1 native SoA, 2 native SoA (separate blobs), 3 tree AoS, 4 tree SoA
constexpr auto PROBLEM_SIZE = 64 * 1024 * 1024; ///< problem size
constexpr auto STEPS = 10; ///< number of vector adds to perform

Expand Down Expand Up @@ -48,8 +48,10 @@ namespace usellama
if constexpr (MAPPING == 1)
return llama::mapping::SoA{arrayDomain, Vector{}};
if constexpr (MAPPING == 2)
return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Vector{}};
return llama::mapping::SoA{arrayDomain, Vector{}, std::true_type{}};
if constexpr (MAPPING == 3)
return llama::mapping::tree::Mapping{arrayDomain, llama::Tuple{}, Vector{}};
if constexpr (MAPPING == 4)
return llama::mapping::tree::Mapping{
arrayDomain,
llama::Tuple{llama::mapping::tree::functor::LeafOnlyRT()},
Expand Down
18 changes: 9 additions & 9 deletions include/llama/Array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,28 @@ namespace llama
static constexpr std::size_t rank = N;
T element[N > 0 ? N : 1];

LLAMA_FN_HOST_ACC_INLINE T* begin()
LLAMA_FN_HOST_ACC_INLINE constexpr T* begin()
{
return &element[0];
}

LLAMA_FN_HOST_ACC_INLINE const T* begin() const
LLAMA_FN_HOST_ACC_INLINE constexpr const T* begin() const
{
return &element[0];
}

LLAMA_FN_HOST_ACC_INLINE T* end()
LLAMA_FN_HOST_ACC_INLINE constexpr T* end()
{
return &element[N];
};

LLAMA_FN_HOST_ACC_INLINE const T* end() const
LLAMA_FN_HOST_ACC_INLINE constexpr const T* end() const
{
return &element[N];
};

template <typename IndexType>
LLAMA_FN_HOST_ACC_INLINE auto operator[](IndexType&& idx) -> T&
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator[](IndexType&& idx) -> T&
{
return element[idx];
}
Expand All @@ -51,15 +51,15 @@ namespace llama
return element[idx];
}

LLAMA_FN_HOST_ACC_INLINE friend auto operator==(const Array<T, N>& a, const Array<T, N>& b) -> bool
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator==(const Array<T, N>& a, const Array<T, N>& b) -> bool
{
for (std::size_t i = 0; i < N; ++i)
if (a.element[i] != b.element[i])
return false;
return true;
}

LLAMA_FN_HOST_ACC_INLINE friend auto operator+(const Array<T, N>& a, const Array<T, N>& b) -> Array
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator+(const Array<T, N>& a, const Array<T, N>& b) -> Array
{
Array temp;
for (std::size_t i = 0; i < N; ++i)
Expand All @@ -68,13 +68,13 @@ namespace llama
}

template <std::size_t I>
auto get() -> T&
constexpr auto get() -> T&
{
return element[I];
}

template <std::size_t I>
auto get() const -> const T&
constexpr auto get() const -> const T&
{
return element[I];
}
Expand Down
34 changes: 25 additions & 9 deletions include/llama/DumpMapping.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ namespace llama
}

template <typename Mapping, typename ArrayDomain, std::size_t... Coords>
auto mappingOffset(const Mapping& mapping, const ArrayDomain& udCoord, DatumCoord<Coords...>)
auto mappingBlobNrAndOffset(const Mapping& mapping, const ArrayDomain& udCoord, DatumCoord<Coords...>)
{
return mapping.template getBlobNrAndOffset<Coords...>(udCoord).offset;
return mapping.template getBlobNrAndOffset<Coords...>(udCoord);
}
} // namespace internal

Expand All @@ -82,7 +82,7 @@ namespace llama
ArrayDomain udCoord;
std::vector<std::size_t> ddIndices;
std::vector<std::string> ddTags;
std::size_t offset;
NrAndOffset nrAndOffset;
std::size_t size;
};
std::vector<DatumInfo> infos;
Expand All @@ -95,7 +95,7 @@ namespace llama
udCoord,
internal::toVec(coord),
internal::tagsAsStrings<DatumDomain>(coord),
internal::mappingOffset(mapping, udCoord, coord),
internal::mappingBlobNrAndOffset(mapping, udCoord, coord),
size});
});
}
Expand Down Expand Up @@ -140,8 +140,16 @@ namespace llama

for (const auto& info : infos)
{
const auto x = (info.offset % wrapByteCount) * byteSizeInPixel;
const auto y = (info.offset / wrapByteCount) * byteSizeInPixel;
std::size_t blobY = 0;
for (auto i = 0; i < info.nrAndOffset.nr; i++)
{
auto blobRows = (mapping.getBlobSize(i) + wrapByteCount - 1) / wrapByteCount;
blobRows++; // one row gap between blobs
blobY += blobRows * byteSizeInPixel;
}

const auto x = (info.nrAndOffset.offset % wrapByteCount) * byteSizeInPixel;
const auto y = (info.nrAndOffset.offset / wrapByteCount) * byteSizeInPixel + blobY;

const auto fill = boost::hash_value(info.ddIndices) & 0xFFFFFF;

Expand Down Expand Up @@ -193,7 +201,7 @@ namespace llama
ArrayDomain udCoord;
std::vector<std::size_t> ddIndices;
std::vector<std::string> ddTags;
std::size_t offset;
NrAndOffset nrAndOffset;
std::size_t size;
};
std::vector<DatumInfo> infos;
Expand All @@ -206,11 +214,13 @@ namespace llama
udCoord,
internal::toVec(coord),
internal::tagsAsStrings<DatumDomain>(coord),
internal::mappingOffset(mapping, udCoord, coord),
internal::mappingBlobNrAndOffset(mapping, udCoord, coord),
size});
});
}
std::sort(begin(infos), end(infos), [](const DatumInfo& a, const DatumInfo& b) { return a.offset < b.offset; });
std::sort(begin(infos), end(infos), [](const DatumInfo& a, const DatumInfo& b) {
return std::tie(a.nrAndOffset.nr, a.nrAndOffset.offset) < std::tie(b.nrAndOffset.nr, b.nrAndOffset.offset);
});

auto formatDDTags = [](const std::vector<std::string>& tags) {
std::string s;
Expand Down Expand Up @@ -308,8 +318,14 @@ namespace llama
</header>
)");

auto currentBlobNr = std::numeric_limits<std::size_t>::max();
for (const auto& info : infos)
{
if (currentBlobNr != info.nrAndOffset.nr)
{
currentBlobNr = info.nrAndOffset.nr;
svg += fmt::format("<h1>Blob: {}</h1>", currentBlobNr);
}
const auto width = byteSizeInPixel * info.size;
svg += fmt::format(
R"(<div class="box {0}" title="{1} {2}">{1} {2}</div>)",
Expand Down
72 changes: 63 additions & 9 deletions include/llama/mapping/SoA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,88 @@ namespace llama::mapping
template <
typename T_ArrayDomain,
typename T_DatumDomain,
typename SeparateBuffers = std::false_type, // TODO: make this a bool. Needs work in SplitMapping
typename LinearizeArrayDomainFunctor = LinearizeArrayDomainCpp>
struct SoA
{
using ArrayDomain = T_ArrayDomain;
using DatumDomain = T_DatumDomain;
static constexpr std::size_t blobCount = 1;
static constexpr std::size_t blobCount = []() constexpr
{
if constexpr (SeparateBuffers::value)
{
std::size_t count = 0;
forEach<DatumDomain>([&](auto) constexpr { count++; });
return count;
}
else
return 1;
}
();

SoA() = default;

LLAMA_FN_HOST_ACC_INLINE
SoA(ArrayDomain size, DatumDomain = {}) : arrayDomainSize(size)
SoA(ArrayDomain size, DatumDomain = {}, SeparateBuffers = {}) : arrayDomainSize(size)
{
}

LLAMA_FN_HOST_ACC_INLINE
auto getBlobSize(std::size_t) const -> std::size_t
auto getBlobSize(std::size_t blobIndex) const -> std::size_t
{
return LinearizeArrayDomainFunctor{}.size(arrayDomainSize) * sizeOf<DatumDomain>;
if constexpr (SeparateBuffers::value)
{
static constexpr llama::Array<std::size_t, blobCount> typeSizes = []() constexpr
{
llama::Array<std::size_t, blobCount> r{};
std::size_t i = 0;
forEach<DatumDomain>([&](auto coord) constexpr {
r[i++] = sizeof(GetType<DatumDomain, decltype(coord)>);
});
return r;
}
();
return LinearizeArrayDomainFunctor{}.size(arrayDomainSize) * typeSizes[blobIndex];
}
else
return LinearizeArrayDomainFunctor{}.size(arrayDomainSize) * sizeOf<DatumDomain>;
}

template <std::size_t... DatumDomainCoord>
LLAMA_FN_HOST_ACC_INLINE auto getBlobNrAndOffset(ArrayDomain coord) const -> NrAndOffset
{
LLAMA_FORCE_INLINE_RECURSIVE
const auto offset = LinearizeArrayDomainFunctor{}(coord, arrayDomainSize)
* sizeof(GetType<DatumDomain, DatumCoord<DatumDomainCoord...>>)
+ offsetOf<DatumDomain, DatumDomainCoord...> * LinearizeArrayDomainFunctor{}.size(arrayDomainSize);
return {0, offset};
if constexpr (SeparateBuffers::value)
{
using TargetDatumCoord = DatumCoord<DatumDomainCoord...>;
constexpr auto blob = [&]() constexpr
{
std::size_t index = 0;
bool found = false;
forEach<DatumDomain>([&](auto c) constexpr {
if constexpr (std::is_same_v<decltype(c), TargetDatumCoord>)
found = true;
else if (!found)
index++;
});
if (!found)
throw "Passed TargetDatumCoord must be in datum domain";
return index;
}
();

LLAMA_FORCE_INLINE_RECURSIVE
const auto offset = LinearizeArrayDomainFunctor{}(coord, arrayDomainSize)
* sizeof(GetType<DatumDomain, DatumCoord<DatumDomainCoord...>>);
return {blob, offset};
}
else
{
LLAMA_FORCE_INLINE_RECURSIVE
const auto offset = LinearizeArrayDomainFunctor{}(coord, arrayDomainSize)
* sizeof(GetType<DatumDomain, DatumCoord<DatumDomainCoord...>>)
+ offsetOf<DatumDomain, DatumDomainCoord...> * LinearizeArrayDomainFunctor{}.size(arrayDomainSize);
return {0, offset};
}
}

ArrayDomain arrayDomainSize;
Expand Down
2 changes: 1 addition & 1 deletion include/llama/mapping/SplitMapping.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ namespace llama::mapping

LLAMA_FN_HOST_ACC_INLINE auto getBlobSize(std::size_t) const -> std::size_t
{
return mapping1BlobSize + mapping2.getBlobSize();
return mapping1BlobSize + mapping2.getBlobSize(0);
}

template <std::size_t... DatumDomainCoord>
Expand Down
6 changes: 5 additions & 1 deletion tests/dump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ TEST_CASE("dump.SoA")
dump(llama::mapping::SoA{arrayDomain, Particle{}}, "SoAMapping");
}

TEST_CASE("dump.SoA.MultiBlob")
{
dump(llama::mapping::SoA{arrayDomain, Particle{}, std::true_type{}}, "SoAMappingMultiBlob");
}

TEST_CASE("dump.AoSoA.8")
{
dump(llama::mapping::AoSoA<ArrayDomain, Particle, 8>{arrayDomain}, "AoSoAMapping8");
Expand All @@ -91,7 +96,6 @@ TEST_CASE("dump.AoSoA.32")
dump(llama::mapping::AoSoA<ArrayDomain, Particle, 32>{arrayDomain}, "AoSoAMapping32");
}


TEST_CASE("dump.SplitMapping")
{
dump(
Expand Down
Loading