Skip to content

Commit

Permalink
convert daxpy example to alpaka
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Feb 11, 2022
1 parent c441afc commit 2fc3972
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 25 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ if (LLAMA_BUILD_EXAMPLES)
add_subdirectory("examples/bytesplit")
add_subdirectory("examples/bitpackint")
add_subdirectory("examples/bitpackfloat")
add_subdirectory("examples/daxpy")

# alpaka examples
find_package(alpaka 0.7.0 QUIET)
Expand All @@ -95,6 +94,7 @@ if (LLAMA_BUILD_EXAMPLES)
add_subdirectory("examples/alpaka/vectoradd")
add_subdirectory("examples/alpaka/asyncblur")
add_subdirectory("examples/alpaka/pic")
add_subdirectory("examples/alpaka/daxpy")
elseif()
message(WARNING "Could not find alpaka. Alpaka examples are disabled.")
endif()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
cmake_minimum_required (VERSION 3.15)
project(llama-daxpy CXX)
project(llama-alpaka-daxpy CXX)

#find_package(Vc QUIET)
find_package(OpenMP REQUIRED)
if (NOT TARGET llama::llama)
find_package(llama REQUIRED)
endif()

add_executable(${PROJECT_NAME} daxpy.cpp)
find_package(alpaka 0.7.0 REQUIRED)
alpaka_add_executable(${PROJECT_NAME} daxpy.cpp ../../common/Stopwatch.hpp ../../common/hostname.hpp)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama OpenMP::OpenMP_CXX)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama OpenMP::OpenMP_CXX alpaka::alpaka)

if (MSVC)
target_compile_options(${PROJECT_NAME} PRIVATE /arch:AVX2 /fp:fast)
Expand Down
100 changes: 80 additions & 20 deletions examples/daxpy/daxpy.cpp → examples/alpaka/daxpy/daxpy.cpp
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
#include "../common/Stopwatch.hpp"
#include "../common/hostname.hpp"
#include "../../common/Stopwatch.hpp"
#include "../../common/hostname.hpp"

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
#include <fmt/core.h>
#include <fstream>
#include <iomanip>
#include <llama/llama.hpp>
#include <omp.h>
#include <vector>

constexpr auto PROBLEM_SIZE = 1024 * 1024 * 128;
constexpr auto PROBLEM_SIZE = std::size_t{1024 * 1024 * 128};
constexpr auto BLOCK_SIZE = std::size_t{256};
constexpr auto STEPS = 5;
constexpr auto alpha = 3.14;

static_assert(PROBLEM_SIZE % BLOCK_SIZE == 0);

void daxpy(std::ofstream& plotFile)
{
const auto* title = "std::vector";
const auto* title = "baseline std::vector";
std::cout << title << "\n";

Stopwatch watch;
Expand All @@ -41,37 +46,92 @@ void daxpy(std::ofstream& plotFile)
plotFile << std::quoted(title) << "\t" << sum / STEPS << '\n';
}

template<typename Acc>
inline constexpr bool isGPU = false;

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
template<typename Dim, typename Idx>
inline constexpr bool isGPU<alpaka::AccGpuCudaRt<Dim, Idx>> = true;
#endif

template<typename Mapping>
void daxpy_llama(std::string mappingName, std::ofstream& plotFile, Mapping mapping)
void daxpy_alpaka_llama(std::string mappingName, std::ofstream& plotFile, Mapping mapping)
{
std::size_t storageSize = 0;
for(std::size_t i = 0; i < mapping.blobCount; i++)
storageSize += mapping.blobSize(i);

auto title = "LLAMA " + std::move(mappingName);
auto title = "alpaka/LLAMA " + std::move(mappingName);
fmt::print("{0} (blobs size: {1}MiB)\n", title, storageSize / 1024 / 1024);

using Dim = alpaka::DimInt<1>;
using Size = std::size_t;
using Acc = alpaka::ExampleDefaultAcc<Dim, Size>;
using Dev = alpaka::Dev<Acc>;
using Queue = alpaka::Queue<Dev, alpaka::Blocking>;
const auto devAcc = alpaka::getDevByIdx<alpaka::Pltf<Dev>>(0u);
const auto devHost = alpaka::getDevByIdx<alpaka::PltfCpu>(0u);
auto queue = Queue(devAcc);

Stopwatch watch;
auto x = llama::allocViewUninitialized(mapping);
auto y = llama::allocViewUninitialized(mapping);
auto z = llama::allocViewUninitialized(mapping);
watch.printAndReset("alloc");
watch.printAndReset("alloc host");

for(std::size_t i = 0; i < PROBLEM_SIZE; ++i)
{
x[i] = static_cast<double>(i);
y[i] = static_cast<double>(i);
}
watch.printAndReset("init");
watch.printAndReset("init host");

static_assert(Mapping::blobCount == 1); // make our life simpler
const auto bufferSize = mapping.blobSize(0);
const auto extents = alpaka::Vec<Dim, Size>{bufferSize};
auto bufferX = alpaka::allocBuf<std::byte, Size>(devAcc, extents);
auto bufferY = alpaka::allocBuf<std::byte, Size>(devAcc, extents);
auto bufferZ = alpaka::allocBuf<std::byte, Size>(devAcc, extents);
watch.printAndReset("alloc device");

{
auto vx = alpaka::createView(devHost, &x.storageBlobs[0][0], extents);
auto vy = alpaka::createView(devHost, &y.storageBlobs[0][0], extents);
alpaka::memcpy(queue, bufferX, vx, extents);
alpaka::memcpy(queue, bufferY, vy, extents);
}
watch.printAndReset("copy H->D");

auto viewX = llama::View{mapping, llama::Array{alpaka::getPtrNative(bufferX)}};
auto viewY = llama::View{mapping, llama::Array{alpaka::getPtrNative(bufferY)}};
auto viewZ = llama::View{mapping, llama::Array{alpaka::getPtrNative(bufferZ)}};

constexpr auto blockSize = isGPU<Acc> ? BLOCK_SIZE : 1;
const auto workdiv = alpaka::WorkDivMembers<Dim, Size>(
alpaka::Vec<Dim, Size>{PROBLEM_SIZE / blockSize},
alpaka::Vec<Dim, Size>{blockSize},
alpaka::Vec<Dim, Size>{Size{1}});
watch = {};

double sum = 0;
for(std::size_t s = 0; s < STEPS; ++s)
{
#pragma omp parallel for
for(std::ptrdiff_t i = 0; i < PROBLEM_SIZE; i++)
auto kernel
= [] ALPAKA_FN_ACC(const Acc& acc, decltype(viewX) x, decltype(viewY) y, double alpha, decltype(viewZ) z)
{
const auto [i] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
z[i] = alpha * x[i] + y[i];
};
alpaka::exec<Acc>(queue, workdiv, kernel, viewX, viewY, alpha, viewZ);
sum = watch.printAndReset("daxpy");
}

{
auto vz = alpaka::createView(devHost, &z.storageBlobs[0][0], extents);
alpaka::memcpy(queue, vz, bufferZ, extents);
}
watch.printAndReset("copy D->H");

plotFile << std::quoted(title) << "\t" << sum / STEPS << '\n';
}

Expand Down Expand Up @@ -114,36 +174,36 @@ set ylabel "runtime [s]"
daxpy(plotFile);

const auto extents = llama::ArrayExtents{PROBLEM_SIZE};
daxpy_llama("AoS", plotFile, llama::mapping::AoS{extents, double{}});
daxpy_llama("SoA", plotFile, llama::mapping::SoA{extents, double{}});
daxpy_llama(
daxpy_alpaka_llama("AoS", plotFile, llama::mapping::AoS{extents, double{}});
daxpy_alpaka_llama("SoA", plotFile, llama::mapping::SoA<llama::ArrayExtentsDynamic<1>, double, false>{extents});
daxpy_alpaka_llama(
"Bytesplit",
plotFile,
llama::mapping::Bytesplit<llama::ArrayExtentsDynamic<1>, double, llama::mapping::BindAoS<>::fn>{extents});
daxpy_llama(
daxpy_alpaka_llama(
"ChangeType D->F",
plotFile,
llama::mapping::ChangeType<
llama::ArrayExtentsDynamic<1>,
double,
llama::mapping::BindAoS<>::fn,
boost::mp11::mp_list<boost::mp11::mp_list<double, float>>>{extents});
daxpy_llama("Bitpack 52^{11}", plotFile, llama::mapping::BitPackedFloatSoA{extents, 11, 52, double{}});
daxpy_llama(
daxpy_alpaka_llama("Bitpack 52^{11}", plotFile, llama::mapping::BitPackedFloatSoA{extents, 11, 52, double{}});
daxpy_alpaka_llama(
"Bitpack 52^{11} CT",
plotFile,
llama::mapping::
BitPackedFloatSoA<llama::ArrayExtentsDynamic<1>, double, llama::Constant<11>, llama::Constant<52>>{
extents});
daxpy_llama("Bitpack 23^{8}", plotFile, llama::mapping::BitPackedFloatSoA{extents, 8, 23, double{}});
daxpy_llama(
daxpy_alpaka_llama("Bitpack 23^{8}", plotFile, llama::mapping::BitPackedFloatSoA{extents, 8, 23, double{}});
daxpy_alpaka_llama(
"Bitpack 23^{8} CT",
plotFile,
llama::mapping::
BitPackedFloatSoA<llama::ArrayExtentsDynamic<1>, double, llama::Constant<8>, llama::Constant<23>>{
extents});
daxpy_llama("Bitpack 10^{5}", plotFile, llama::mapping::BitPackedFloatSoA{extents, 5, 10, double{}});
daxpy_llama(
daxpy_alpaka_llama("Bitpack 10^{5}", plotFile, llama::mapping::BitPackedFloatSoA{extents, 5, 10, double{}});
daxpy_alpaka_llama(
"Bitpack 10^{5} CT",
plotFile,
llama::mapping::
Expand Down

0 comments on commit 2fc3972

Please sign in to comment.