Skip to content

Commit

Permalink
WIP: rewrite vectoradd example as SoA benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Dec 12, 2022
1 parent d51d755 commit d13540a
Showing 1 changed file with 65 additions and 55 deletions.
120 changes: 65 additions & 55 deletions examples/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,63 +6,73 @@
#include <iostream>
#include <llama/llama.hpp>

constexpr auto mappingIndex = 2; ///< 0 native AoS, 1 native SoA, 2 native SoA (separate blobs), 3 tree AoS, 4 tree SoA
constexpr auto problemSize = 64 * 1024 * 1024; ///< problem size
constexpr auto steps = 10; ///< number of vector adds to perform

using FP = float;
// use different types for various struct members to alignment/padding plays a role
using X_t = float;
using Y_t = double;
using Z_t = std::uint16_t;

namespace usellama
{
// clang-format off
namespace tag
{
struct X{};
struct Y{};
struct Z{};
struct X{} x;
struct Y{} y;
struct Z{} z;
} // namespace tag

using Vector = llama::Record<
llama::Field<tag::X, FP>,
llama::Field<tag::Y, FP>,
llama::Field<tag::Z, FP>
llama::Field<tag::X, X_t>,
llama::Field<tag::Y, Y_t>,
llama::Field<tag::Z, Z_t>
>;
// clang-format on

template<typename View>
void add(const View& a, const View& b, View& c)
void compute(const View& a, const View& b, View& c)
{
LLAMA_INDEPENDENT_DATA
for(std::size_t i = 0; i < problemSize; i++)
{
c(i)(tag::X{}) = a(i)(tag::X{}) + b(i)(tag::X{});
c(i)(tag::Y{}) = a(i)(tag::Y{}) - b(i)(tag::Y{});
c(i)(tag::Z{}) = a(i)(tag::Z{}) * b(i)(tag::Z{});
c(i)(tag::x) = a(i)(tag::x) + b(i)(tag::x);
c(i)(tag::z) = a(i)(tag::y) - b(i)(tag::y);
c(i)(tag::z) = a(i)(tag::z) * b(i)(tag::z);
}
}

template<int MappingIndex>
auto main(std::ofstream& plotFile) -> int
{
std::cout << "\nLLAMA\n";
const auto mappingname = [&]
{
if constexpr(MappingIndex == 0)
return "AoS";
if constexpr(MappingIndex == 1)
return "SoA SB P";
if constexpr(MappingIndex == 2)
return "SoA SB A";
if constexpr(MappingIndex == 3)
return "SoA MB";
}();

std::cout << "\nLLAMA " << mappingname << "\n";
Stopwatch watch;

const auto mapping = [&]
{
using ArrayExtents = llama::ArrayExtentsDynamic<std::size_t, 1>;
const auto extents = ArrayExtents{problemSize};
if constexpr(mappingIndex == 0)
if constexpr(MappingIndex == 0)
return llama::mapping::AoS{extents, Vector{}};
if constexpr(mappingIndex == 1)
return llama::mapping::SoA{extents, Vector{}};
if constexpr(mappingIndex == 2)
if constexpr(MappingIndex == 1)
return llama::mapping::SoA<ArrayExtents, Vector, false, false>{extents};
if constexpr(MappingIndex == 2)
return llama::mapping::SoA<ArrayExtents, Vector, false, true>{extents};
if constexpr(MappingIndex == 3)
return llama::mapping::SoA<ArrayExtents, Vector, true>{extents};
if constexpr(mappingIndex == 3)
return llama::mapping::tree::Mapping{extents, llama::Tuple{}, Vector{}};
if constexpr(mappingIndex == 4)
return llama::mapping::tree::Mapping{
extents,
llama::Tuple{llama::mapping::tree::functor::LeafOnlyRT()},
Vector{}};
}();

auto a = allocViewUninitialized(mapping);
Expand All @@ -73,9 +83,9 @@ namespace usellama
LLAMA_INDEPENDENT_DATA
for(std::size_t i = 0; i < problemSize; ++i)
{
const auto value = static_cast<FP>(i);
a[i](tag::X{}) = value; // X
a[i](tag::Y{}) = value; // Y
const auto value = static_cast<X_t>(i);
a[i](tag::x) = value; // X
a[i](tag::y) = value; // Y
a[i](llama::RecordCoord<2>{}) = value; // Z
b(i) = value; // writes to all (X, Y, Z)
}
Expand All @@ -84,8 +94,8 @@ namespace usellama
double acc = 0;
for(std::size_t s = 0; s < steps; ++s)
{
add(a, b, c);
acc += watch.printAndReset("add");
compute(a, b, c);
acc += watch.printAndReset("compute");
}
plotFile << "LLAMA\t" << acc / steps << '\n';

Expand All @@ -97,12 +107,12 @@ namespace manualAoS
{
struct Vector
{
FP x;
FP y;
FP z;
X_t x;
X_t y;
X_t z;
};

inline void add(const Vector* a, const Vector* b, Vector* c)
inline void compute(const Vector* a, const Vector* b, Vector* c)
{
LLAMA_INDEPENDENT_DATA
for(std::size_t i = 0; i < problemSize; i++)
Expand All @@ -126,7 +136,7 @@ namespace manualAoS
LLAMA_INDEPENDENT_DATA
for(std::size_t i = 0; i < problemSize; ++i)
{
const auto value = static_cast<FP>(i);
const auto value = static_cast<X_t>(i);
a[i].x = value;
a[i].y = value;
a[i].z = value;
Expand All @@ -139,8 +149,8 @@ namespace manualAoS
double acc = 0;
for(std::size_t s = 0; s < steps; ++s)
{
add(a.data(), b.data(), c.data());
acc += watch.printAndReset("add");
compute(a.data(), b.data(), c.data());
acc += watch.printAndReset("compute");
}
plotFile << "AoS\t" << acc / steps << '\n';

Expand All @@ -150,16 +160,16 @@ namespace manualAoS

namespace manualSoA
{
inline void add(
const FP* ax,
const FP* ay,
const FP* az,
const FP* bx,
const FP* by,
const FP* bz,
FP* cx,
FP* cy,
FP* cz)
inline void compute(
const X_t* ax,
const X_t* ay,
const X_t* az,
const X_t* bx,
const X_t* by,
const X_t* bz,
X_t* cx,
X_t* cy,
X_t* cz)
{
LLAMA_INDEPENDENT_DATA
for(std::size_t i = 0; i < problemSize; i++)
Expand Down Expand Up @@ -190,7 +200,7 @@ namespace manualSoA
LLAMA_INDEPENDENT_DATA
for(std::size_t i = 0; i < problemSize; ++i)
{
const auto value = static_cast<FP>(i);
const auto value = static_cast<X_t>(i);
ax[i] = value;
ay[i] = value;
az[i] = value;
Expand All @@ -203,8 +213,8 @@ namespace manualSoA
double acc = 0;
for(std::size_t s = 0; s < steps; ++s)
{
add(ax.data(), ay.data(), az.data(), bx.data(), by.data(), bz.data(), cx.data(), cy.data(), cz.data());
acc += watch.printAndReset("add");
compute(ax.data(), ay.data(), az.data(), bx.data(), by.data(), bz.data(), cx.data(), cy.data(), cz.data());
acc += watch.printAndReset("compute");
}
plotFile << "SoA\t" << acc / steps << '\n';

Expand All @@ -219,14 +229,14 @@ namespace manualAoSoA

struct alignas(64) VectorBlock
{
FP x[lanes];
FP y[lanes];
FP z[lanes];
X_t x[lanes];
X_t y[lanes];
X_t z[lanes];
};

constexpr auto blocks = problemSize / lanes;

inline void add(const VectorBlock* a, const VectorBlock* b, VectorBlock* c)
inline void compute(const VectorBlock* a, const VectorBlock* b, VectorBlock* c)
{
for(std::size_t bi = 0; bi < problemSize / lanes; bi++)
{
Expand Down Expand Up @@ -274,8 +284,8 @@ namespace manualAoSoA
double acc = 0;
for(std::size_t s = 0; s < steps; ++s)
{
add(a.data(), b.data(), c.data());
acc += watch.printAndReset("add");
compute(a.data(), b.data(), c.data());
acc += watch.printAndReset("compute");
}
plotFile << "AoSoA\t" << acc / steps << '\n';

Expand Down

0 comments on commit d13540a

Please sign in to comment.