Skip to content

Commit

Permalink
run clang-format
Browse files Browse the repository at this point in the history
  • Loading branch information
Third Party authored and bernhardmgruber committed Apr 19, 2021
1 parent 829bdb3 commit 8b5e466
Show file tree
Hide file tree
Showing 21 changed files with 452 additions and 288 deletions.
3 changes: 2 additions & 1 deletion examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ struct BlurKernel
{
const auto ti = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);

[[maybe_unused]] auto sharedView = [&] {
[[maybe_unused]] auto sharedView = [&]
{
if constexpr (SHARED)
{
// Using SoA for the shared memory
Expand Down
15 changes: 10 additions & 5 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,15 @@ struct UpdateKernel
template <typename Acc, typename View>
LLAMA_FN_HOST_ACC_INLINE void operator()(const Acc& acc, View particles) const
{
auto sharedView = [&] {
auto sharedView = [&]
{
// if there is only 1 thread per block, use stack instead of shared memory
if constexpr (BlockSize == 1)
return llama::allocViewStack<View::ArrayDomain::rank, typename View::DatumDomain>();
else
{
constexpr auto sharedMapping = [] {
constexpr auto sharedMapping = []
{
constexpr auto arrayDomain = llama::ArrayDomain{BlockSize};
if constexpr (MappingSM == AoS)
return llama::mapping::AoS{arrayDomain, Particle{}};
Expand All @@ -176,7 +178,8 @@ struct UpdateKernel
const auto tbi = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0];

// TODO: we could optimize here, because only velocity is ever updated
auto pi = [&] {
auto pi = [&]
{
constexpr auto arrayDomain = llama::ArrayDomain{Elems};
constexpr auto mapping
= llama::mapping::SoA<typename View::ArrayDomain, typename View::DatumDomain, false>{arrayDomain};
Expand Down Expand Up @@ -242,7 +245,8 @@ void run(std::ostream& plotFile)
using PltfAcc = alpaka::Pltf<DevAcc>;
using Queue = alpaka::Queue<DevAcc, alpaka::Blocking>;

auto mappingName = [](int m) -> std::string {
auto mappingName = [](int m) -> std::string
{
if (m == 0)
return "AoS";
if (m == 1)
Expand All @@ -258,7 +262,8 @@ void run(std::ostream& plotFile)
const DevHost devHost(alpaka::getDevByIdx<PltfHost>(0u));
Queue queue(devAcc);

auto mapping = [] {
auto mapping = []
{
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};
if constexpr (MappingGM == AoS)
return llama::mapping::AoS{arrayDomain, Particle{}};
Expand Down
3 changes: 2 additions & 1 deletion examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ try
// LLAMA
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};

const auto mapping = [&] {
const auto mapping = [&]
{
if constexpr (MAPPING == 0)
return llama::mapping::AoS{arrayDomain, Vector{}};
if constexpr (MAPPING == 1)
Expand Down
3 changes: 2 additions & 1 deletion examples/bufferguard/bufferguard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ void run(const std::string& mappingName)
std::cout << "\nView 2:\n";
printView(view2, rows, cols);

auto copyBlobs = [&](auto& srcView, auto& dstView, auto srcBlobs, auto dstBlobs) {
auto copyBlobs = [&](auto& srcView, auto& dstView, auto srcBlobs, auto dstBlobs)
{
static_assert(srcBlobs.size() == dstBlobs.size());
for (auto i = 0; i < srcBlobs.size(); i++)
{
Expand Down
3 changes: 2 additions & 1 deletion examples/heatequation/heatequation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ try
auto uNext = llama::allocView(mapping);
auto uCurr = llama::allocView(mapping);

auto run = [&](std::string_view updateName, auto update) {
auto run = [&](std::string_view updateName, auto update)
{
// init
for (uint32_t i = 0; i < extent; i++)
uCurr[i] = exactSolution(i * dx, 0.0);
Expand Down
65 changes: 42 additions & 23 deletions examples/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ namespace usellama
template <int Mapping, bool UseAccumulator, std::size_t AoSoALanes = 8 /*AVX2*/>
auto main(std::ostream& plotFile) -> int
{
auto mappingName = [](int m) -> std::string {
auto mappingName = [](int m) -> std::string
{
if (m == 0)
return "AoS";
if (m == 1)
Expand All @@ -127,7 +128,8 @@ namespace usellama
title += " Acc";
std::cout << title << "\n";
Stopwatch watch;
auto mapping = [&] {
auto mapping = [&]
{
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};
if constexpr (Mapping == 0)
return llama::mapping::AoS{arrayDomain, Particle{}};
Expand All @@ -149,14 +151,16 @@ namespace usellama
if constexpr (DUMP_MAPPING)
std::ofstream(title + ".svg") << llama::toSvg(mapping);

auto tmapping = [&] {
auto tmapping = [&]
{
if constexpr (TRACE)
return llama::mapping::Trace{std::move(mapping)};
else
return std::move(mapping);
}();

auto hmapping = [&] {
auto hmapping = [&]
{
if constexpr (HEATMAP)
return llama::mapping::Heatmap{std::move(tmapping)};
else
Expand Down Expand Up @@ -730,7 +734,8 @@ namespace manualAoSoA_manualAVX
const __m256 distSqr
= _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(vEPS2, xdistanceSqr), ydistanceSqr), zdistanceSqr);
const __m256 distSixth = _mm256_mul_ps(_mm256_mul_ps(distSqr, distSqr), distSqr);
const __m256 invDistCube = [&] {
const __m256 invDistCube = [&]
{
if constexpr (ALLOW_RSQRT)
{
const __m256 r = _mm256_rsqrt_ps(distSixth);
Expand Down Expand Up @@ -1019,7 +1024,8 @@ namespace manualAoSoA_Vc
const vec zdistanceSqr = zdistance * zdistance;
const vec distSqr = EPS2 + xdistanceSqr + ydistanceSqr + zdistanceSqr;
const vec distSixth = distSqr * distSqr * distSqr;
const vec invDistCube = [&] {
const vec invDistCube = [&]
{
if constexpr (ALLOW_RSQRT)
{
const vec r = Vc::rsqrt(distSixth);
Expand Down Expand Up @@ -1383,25 +1389,33 @@ try

int r = 0;
using namespace boost::mp11;
mp_for_each<mp_iota_c<5>>([&](auto i) {
// only AoSoA (3) needs lanes
using Lanes
= std::conditional_t<decltype(i)::value == 3, mp_list_c<std::size_t, 8, 16>, mp_list_c<std::size_t, 0>>;
mp_for_each<Lanes>([&, i](auto lanes) {
mp_for_each<mp_list_c<bool, false, true>>([&, i](auto useAccumulator) {
r += usellama::main<decltype(i)::value, decltype(useAccumulator)::value, decltype(lanes)::value>(
plotFile);
});
mp_for_each<mp_iota_c<5>>(
[&](auto i)
{
// only AoSoA (3) needs lanes
using Lanes
= std::conditional_t<decltype(i)::value == 3, mp_list_c<std::size_t, 8, 16>, mp_list_c<std::size_t, 0>>;
mp_for_each<Lanes>(
[&, i](auto lanes)
{
mp_for_each<mp_list_c<bool, false, true>>(
[&, i](auto useAccumulator) {
r += usellama::
main<decltype(i)::value, decltype(useAccumulator)::value, decltype(lanes)::value>(
plotFile);
});
});
});
});
r += manualAoS::main<false>(plotFile);
r += manualAoS::main<true>(plotFile);
r += manualSoA::main<false>(plotFile);
r += manualSoA::main<true>(plotFile);
mp_for_each<mp_list_c<std::size_t, 8, 16>>([&](auto lanes) {
r += manualAoSoA::main<false, false, decltype(lanes)::value>(plotFile);
r += manualAoSoA::main<true, false, decltype(lanes)::value>(plotFile);
});
mp_for_each<mp_list_c<std::size_t, 8, 16>>(
[&](auto lanes)
{
r += manualAoSoA::main<false, false, decltype(lanes)::value>(plotFile);
r += manualAoSoA::main<true, false, decltype(lanes)::value>(plotFile);
});
// r += manualAoSoA::main<false, true>(plotFile);
// r += manualAoSoA::main<true, true>(plotFile);
#ifdef __AVX2__
Expand All @@ -1417,9 +1431,14 @@ try
{
if (useUpdate1 && tiled)
continue;
mp_for_each<mp_list_c<bool, false, true>>([&](auto useAccumulator) {
r += manualAoSoA_Vc::main<decltype(useAccumulator)::value>(plotFile, threads, useUpdate1, tiled);
});
mp_for_each<mp_list_c<bool, false, true>>(
[&](auto useAccumulator) {
r += manualAoSoA_Vc::main<decltype(useAccumulator)::value>(
plotFile,
threads,
useUpdate1,
tiled);
});
}
#endif

Expand Down
19 changes: 12 additions & 7 deletions examples/nbody_benchmark/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ void run(std::ostream& plotFile)

constexpr FP ts = 0.0001f;

auto mapping = [&] {
auto mapping = [&]
{
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};
if constexpr (Mapping == 0)
return llama::mapping::AoS{arrayDomain, Particle{}};
Expand Down Expand Up @@ -155,13 +156,17 @@ try
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
plotFile << "\"alignment\"\t\"AoS\"\t\"SoA\"\t\"SoA MB\"\n";

mp_for_each<mp_iota_c<28>>([&](auto ae) {
mp_for_each<mp_list_c<std::size_t, 0, 1, 2>>([&](auto m) {
constexpr auto mapping = decltype(m)::value;
constexpr auto alignment = std::size_t{1} << decltype(ae)::value;
run<mapping, alignment>(plotFile);
mp_for_each<mp_iota_c<28>>(
[&](auto ae)
{
mp_for_each<mp_list_c<std::size_t, 0, 1, 2>>(
[&](auto m)
{
constexpr auto mapping = decltype(m)::value;
constexpr auto alignment = std::size_t{1} << decltype(ae)::value;
run<mapping, alignment>(plotFile);
});
});
});

std::cout << "Plot with: ./nbody.sh\n";
std::ofstream{"nbody.sh"} << fmt::format(
Expand Down
3 changes: 2 additions & 1 deletion examples/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ namespace usellama
std::cout << "\nLLAMA\n";
Stopwatch watch;

const auto mapping = [&] {
const auto mapping = [&]
{
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};
if constexpr (MAPPING == 0)
return llama::mapping::AoS{arrayDomain, Vector{}};
Expand Down
Loading

0 comments on commit 8b5e466

Please sign in to comment.