Skip to content

Commit

Permalink
update to recent alpaka API changes
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Oct 22, 2020
1 parent f240fd2 commit eb18cf5
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 95 deletions.
66 changes: 33 additions & 33 deletions examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ auto viewAlpakaBuffer(
Mapping& mapping,
AlpakaBuffer& buffer) // taking mapping by & on purpose, so Mapping can deduce const
{
return llama::View<Mapping, std::byte*>{mapping, {alpaka::mem::view::getPtrNative(buffer)}};
return llama::View<Mapping, std::byte*>{mapping, {alpaka::getPtrNative(buffer)}};
}

// clang-format off
Expand Down Expand Up @@ -77,7 +77,7 @@ struct BlurKernel
template <typename Acc, typename View>
LLAMA_FN_HOST_ACC_INLINE void operator()(const Acc& acc, View oldImage, View newImage) const
{
const auto ti = alpaka::idx::getIdx<alpaka::Grid, alpaka::Threads>(acc);
const auto ti = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);

[[maybe_unused]] auto sharedView = [&] {
if constexpr (SHARED)
Expand All @@ -89,18 +89,18 @@ struct BlurKernel
llama::Tuple{llama::mapping::tree::functor::LeafOnlyRT()},
typename View::DatumDomain{});
constexpr auto sharedMemSize = llama::sizeOf<PixelOnAcc> * sharedChunkSize * sharedChunkSize;
auto& sharedMem = alpaka::block::shared::st::allocVar<std::byte[sharedMemSize], __COUNTER__>(acc);
auto& sharedMem = alpaka::allocVar<std::byte[sharedMemSize], __COUNTER__>(acc);
return llama::View{sharedMapping, llama::Array{&sharedMem[0]}};
}
else
return int{}; // dummy
}();

[[maybe_unused]] const auto bi = alpaka::idx::getIdx<alpaka::Grid, alpaka::Blocks>(acc);
[[maybe_unused]] const auto bi = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc);
if constexpr (SHARED)
{
constexpr auto threadsPerBlock = ElemsPerBlock / Elems;
const auto threadIdxInBlock = alpaka::idx::getIdx<alpaka::Block, alpaka::Threads>(acc);
const auto threadIdxInBlock = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc);

const std::size_t bStart[2]
= {bi[0] * ElemsPerBlock + threadIdxInBlock[0], bi[1] * ElemsPerBlock + threadIdxInBlock[1]};
Expand All @@ -114,7 +114,7 @@ struct BlurKernel
for (auto x = bStart[1]; x < bEnd[1]; x += threadsPerBlock)
sharedView(y - bi[0] * ElemsPerBlock, x - bi[1] * ElemsPerBlock) = oldImage(y, x);

alpaka::block::sync::syncBlockThreads(acc);
alpaka::syncBlockThreads(acc);
}

const std::size_t start[2] = {ti[0] * Elems, ti[1] * Elems};
Expand Down Expand Up @@ -157,20 +157,20 @@ struct BlurKernel
int main(int argc, char** argv)
{
// ALPAKA
using Dim = alpaka::dim::DimInt<2>;
using Dim = alpaka::DimInt<2>;

using Acc = alpaka::example::ExampleDefaultAcc<Dim, std::size_t>;
// using Acc = alpaka::acc::AccGpuCudaRt<Dim, Size>;
// using Acc = alpaka::acc::AccCpuSerial<Dim, Size>;
using Acc = alpaka::ExampleDefaultAcc<Dim, std::size_t>;
// using Acc = alpaka::AccGpuCudaRt<Dim, Size>;
// using Acc = alpaka::AccCpuSerial<Dim, Size>;

using Queue
= alpaka::queue::Queue<Acc, std::conditional_t<ASYNC, alpaka::queue::NonBlocking, alpaka::queue::Blocking>>;
using DevHost = alpaka::dev::DevCpu;
using DevAcc = alpaka::dev::Dev<Acc>;
using PltfHost = alpaka::pltf::Pltf<DevHost>;
using PltfAcc = alpaka::pltf::Pltf<DevAcc>;
const DevAcc devAcc = alpaka::pltf::getDevByIdx<PltfAcc>(0);
const DevHost devHost = alpaka::pltf::getDevByIdx<PltfHost>(0);
= alpaka::Queue<Acc, std::conditional_t<ASYNC, alpaka::NonBlocking, alpaka::Blocking>>;
using DevHost = alpaka::DevCpu;
using DevAcc = alpaka::Dev<Acc>;
using PltfHost = alpaka::Pltf<DevHost>;
using PltfAcc = alpaka::Pltf<DevAcc>;
const DevAcc devAcc = alpaka::getDevByIdx<PltfAcc>(0);
const DevHost devHost = alpaka::getDevByIdx<PltfHost>(0);
std::vector<Queue> queue;
for (std::size_t i = 0; i < CHUNK_COUNT; ++i)
queue.push_back(Queue(devAcc));
Expand Down Expand Up @@ -225,25 +225,25 @@ int main(int argc, char** argv)

Chrono chrono;

auto hostBuffer = alpaka::mem::buf::alloc<std::byte, std::size_t>(devHost, hostBufferSize);
auto hostBuffer = alpaka::allocBuf<std::byte, std::size_t>(devHost, hostBufferSize);
auto hostView = viewAlpakaBuffer(hostMapping, hostBuffer);

std::vector<alpaka::mem::buf::Buf<DevHost, std::byte, alpaka::dim::DimInt<1>, std::size_t>> hostChunkBuffer;
std::vector<alpaka::Buf<DevHost, std::byte, alpaka::DimInt<1>, std::size_t>> hostChunkBuffer;
std::vector<llama::View<decltype(devMapping), std::byte*>> hostChunkView;

std::vector<alpaka::mem::buf::Buf<DevAcc, std::byte, alpaka::dim::DimInt<1>, std::size_t>> devOldBuffer,
std::vector<alpaka::Buf<DevAcc, std::byte, alpaka::DimInt<1>, std::size_t>> devOldBuffer,
devNewBuffer;
std::vector<llama::View<decltype(devMapping), std::byte*>> devOldView, devNewView;

for (std::size_t i = 0; i < CHUNK_COUNT; ++i)
{
hostChunkBuffer.push_back(alpaka::mem::buf::alloc<std::byte, std::size_t>(devHost, devBufferSize));
hostChunkBuffer.push_back(alpaka::allocBuf<std::byte, std::size_t>(devHost, devBufferSize));
hostChunkView.push_back(viewAlpakaBuffer(devMapping, hostChunkBuffer.back()));

devOldBuffer.push_back(alpaka::mem::buf::alloc<std::byte, std::size_t>(devAcc, devBufferSize));
devOldBuffer.push_back(alpaka::allocBuf<std::byte, std::size_t>(devAcc, devBufferSize));
devOldView.push_back(viewAlpakaBuffer(devMapping, devOldBuffer.back()));

devNewBuffer.push_back(alpaka::mem::buf::alloc<std::byte, std::size_t>(devAcc, devBufferSize));
devNewBuffer.push_back(alpaka::allocBuf<std::byte, std::size_t>(devAcc, devBufferSize));
devNewView.push_back(viewAlpakaBuffer(devMapping, devNewBuffer.back()));
}

Expand Down Expand Up @@ -283,16 +283,16 @@ int main(int argc, char** argv)
}

chrono.printAndReset("Init");
const auto elems = alpaka::vec::Vec<Dim, size_t>(elemCount, elemCount);
const auto threads = alpaka::vec::Vec<Dim, size_t>(threadCount, threadCount);
const auto blocks = alpaka::vec::Vec<Dim, size_t>(
const auto elems = alpaka::Vec<Dim, size_t>(elemCount, elemCount);
const auto threads = alpaka::Vec<Dim, size_t>(threadCount, threadCount);
const auto blocks = alpaka::Vec<Dim, size_t>(
static_cast<size_t>((CHUNK_SIZE + ELEMS_PER_BLOCK - 1) / ELEMS_PER_BLOCK),
static_cast<size_t>((CHUNK_SIZE + ELEMS_PER_BLOCK - 1) / ELEMS_PER_BLOCK));
const alpaka::vec::Vec<Dim, size_t> chunks(
const alpaka::Vec<Dim, size_t> chunks(
static_cast<size_t>((img_y + CHUNK_SIZE - 1) / CHUNK_SIZE),
static_cast<size_t>((img_x + CHUNK_SIZE - 1) / CHUNK_SIZE));

const auto workdiv = alpaka::workdiv::WorkDivMembers<Dim, size_t>{blocks, threads, elems};
const auto workdiv = alpaka::WorkDivMembers<Dim, size_t>{blocks, threads, elems};

struct VirtualHostElement
{
Expand Down Expand Up @@ -321,7 +321,7 @@ int main(int argc, char** argv)
auto chunkIt = virtualHostList.begin();
for (chunkNr = 0; chunkNr < CHUNK_COUNT; ++chunkNr)
{
if (alpaka::queue::empty(queue[chunkNr]))
if (alpaka::empty(queue[chunkNr]))
{
// Copy data back
LLAMA_INDEPENDENT_DATA
Expand Down Expand Up @@ -351,23 +351,23 @@ int main(int argc, char** argv)
for (std::size_t x = 0; x < validMiniSize[1]; ++x)
hostChunkView[chunkNr](y, x) = virtualHost(y, x);
}
alpaka::mem::view::copy(queue[chunkNr], devOldBuffer[chunkNr], hostChunkBuffer[chunkNr], devBufferSize);
alpaka::memcpy(queue[chunkNr], devOldBuffer[chunkNr], hostChunkBuffer[chunkNr], devBufferSize);

alpaka::kernel::exec<Acc>(
alpaka::exec<Acc>(
queue[chunkNr],
workdiv,
BlurKernel<elemCount, KERNEL_SIZE, ELEMS_PER_BLOCK>{},
devOldView[chunkNr],
devNewView[chunkNr]);

alpaka::mem::view::copy(queue[chunkNr], hostChunkBuffer[chunkNr], devNewBuffer[chunkNr], devBufferSize);
alpaka::memcpy(queue[chunkNr], hostChunkBuffer[chunkNr], devNewBuffer[chunkNr], devBufferSize);
}

// Wait for not finished tasks on accelerator
auto chunkIt = virtualHostList.begin();
for (std::size_t chunkNr = 0; chunkNr < CHUNK_COUNT; ++chunkNr)
{
alpaka::wait::wait(queue[chunkNr]);
alpaka::wait(queue[chunkNr]);
// Copy data back
for (std::size_t y = 0; y < chunkIt->validMiniSize[0] - 2 * KERNEL_SIZE; ++y)
{
Expand Down
60 changes: 30 additions & 30 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,16 @@ struct UpdateKernel
else
{
constexpr auto sharedMemSize = llama::sizeOf<typename View::DatumDomain> * BlockSize;
auto& sharedMem = alpaka::block::shared::st::allocVar<std::byte[sharedMemSize], __COUNTER__>(acc);
auto& sharedMem = alpaka::allocVar<std::byte[sharedMemSize], __COUNTER__>(acc);
return llama::View{sharedMapping, llama::Array{&sharedMem[0]}};
}
}
else
return int{}; // dummy
}();

const auto ti = alpaka::idx::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
const auto tbi = alpaka::idx::getIdx<alpaka::Block, alpaka::Threads>(acc)[0];
const auto ti = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
const auto tbi = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0];

const auto start = ti * Elems;
const auto end = alpaka::math::min(acc, start + Elems, ProblemSize);
Expand All @@ -118,7 +118,7 @@ struct UpdateKernel
LLAMA_INDEPENDENT_DATA
for (auto pos2 = decltype(end2)(0); pos2 + ti < end2; pos2 += BlockSize / Elems)
sharedView(pos2 + tbi) = particles(start2 + pos2 + tbi);
alpaka::block::sync::syncBlockThreads(acc);
alpaka::syncBlockThreads(acc);
}
LLAMA_INDEPENDENT_DATA
for (auto pos2 = decltype(end2)(0); pos2 < end2; ++pos2)
Expand All @@ -129,7 +129,7 @@ struct UpdateKernel
else
pPInteraction(particles(i), particles(start2 + pos2), ts);
if constexpr (USE_SHARED)
alpaka::block::sync::syncBlockThreads(acc);
alpaka::syncBlockThreads(acc);
}
}
};
Expand All @@ -142,7 +142,7 @@ struct MoveKernel
template <typename Acc, typename View>
LLAMA_FN_HOST_ACC_INLINE void operator()(const Acc& acc, View particles, FP ts) const
{
const auto ti = alpaka::idx::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
const auto ti = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];

const auto start = ti * Elems;
const auto end = alpaka::math::min(acc, start + Elems, ProblemSize);
Expand All @@ -155,20 +155,20 @@ struct MoveKernel

int main(int argc, char** argv)
{
using Dim = alpaka::dim::DimInt<1>;
using Dim = alpaka::DimInt<1>;
using Size = std::size_t;

using Acc = alpaka::example::ExampleDefaultAcc<Dim, Size>;
// using Acc = alpaka::acc::AccGpuCudaRt<Dim, Size>;
// using Acc = alpaka::acc::AccCpuSerial<Dim, Size>;

using DevHost = alpaka::dev::DevCpu;
using DevAcc = alpaka::dev::Dev<Acc>;
using PltfHost = alpaka::pltf::Pltf<DevHost>;
using PltfAcc = alpaka::pltf::Pltf<DevAcc>;
using Queue = alpaka::queue::Queue<DevAcc, alpaka::queue::Blocking>;
const DevAcc devAcc(alpaka::pltf::getDevByIdx<PltfAcc>(0u));
const DevHost devHost(alpaka::pltf::getDevByIdx<PltfHost>(0u));
using Acc = alpaka::ExampleDefaultAcc<Dim, Size>;
// using Acc = alpaka::AccGpuCudaRt<Dim, Size>;
// using Acc = alpaka::AccCpuSerial<Dim, Size>;

using DevHost = alpaka::DevCpu;
using DevAcc = alpaka::Dev<Acc>;
using PltfHost = alpaka::Pltf<DevHost>;
using PltfAcc = alpaka::Pltf<DevAcc>;
using Queue = alpaka::Queue<DevAcc, alpaka::Blocking>;
const DevAcc devAcc(alpaka::getDevByIdx<PltfAcc>(0u));
const DevHost devHost(alpaka::getDevByIdx<PltfHost>(0u));
Queue queue(devAcc);

// NBODY
Expand Down Expand Up @@ -204,13 +204,13 @@ int main(int argc, char** argv)

const auto bufferSize = Size(mapping.getBlobSize(0));

auto hostBuffer = alpaka::mem::buf::alloc<std::byte, Size>(devHost, bufferSize);
auto accBuffer = alpaka::mem::buf::alloc<std::byte, Size>(devAcc, bufferSize);
auto hostBuffer = alpaka::allocBuf<std::byte, Size>(devHost, bufferSize);
auto accBuffer = alpaka::allocBuf<std::byte, Size>(devAcc, bufferSize);

chrono.printAndReset("Alloc");

auto hostView = llama::View{mapping, llama::Array{alpaka::mem::view::getPtrNative(hostBuffer)}};
auto accView = llama::View{mapping, llama::Array{alpaka::mem::view::getPtrNative(accBuffer)}};
auto hostView = llama::View{mapping, llama::Array{alpaka::getPtrNative(hostBuffer)}};
auto accView = llama::View{mapping, llama::Array{alpaka::getPtrNative(accBuffer)}};

chrono.printAndReset("Views");

Expand All @@ -233,29 +233,29 @@ int main(int argc, char** argv)

chrono.printAndReset("Init");

alpaka::mem::view::copy(queue, accBuffer, hostBuffer, bufferSize);
alpaka::memcpy(queue, accBuffer, hostBuffer, bufferSize);
chrono.printAndReset("Copy H->D");

const alpaka::vec::Vec<Dim, Size> Elems(static_cast<Size>(elemCount));
const alpaka::vec::Vec<Dim, Size> threads(static_cast<Size>(threadCount));
const alpaka::Vec<Dim, Size> Elems(static_cast<Size>(elemCount));
const alpaka::Vec<Dim, Size> threads(static_cast<Size>(threadCount));
constexpr auto innerCount = elemCount * threadCount;
const alpaka::vec::Vec<Dim, Size> blocks(static_cast<Size>((PROBLEM_SIZE + innerCount - 1u) / innerCount));
const alpaka::Vec<Dim, Size> blocks(static_cast<Size>((PROBLEM_SIZE + innerCount - 1u) / innerCount));

const auto workdiv = alpaka::workdiv::WorkDivMembers<Dim, Size>{blocks, threads, Elems};
const auto workdiv = alpaka::WorkDivMembers<Dim, Size>{blocks, threads, Elems};

for (std::size_t s = 0; s < STEPS; ++s)
{
UpdateKernel<PROBLEM_SIZE, elemCount, BLOCK_SIZE> updateKernel;
alpaka::kernel::exec<Acc>(queue, workdiv, updateKernel, accView, ts);
alpaka::exec<Acc>(queue, workdiv, updateKernel, accView, ts);

chrono.printAndReset("Update kernel");

MoveKernel<PROBLEM_SIZE, elemCount> moveKernel;
alpaka::kernel::exec<Acc>(queue, workdiv, moveKernel, accView, ts);
alpaka::exec<Acc>(queue, workdiv, moveKernel, accView, ts);
chrono.printAndReset("Move kernel");
}

alpaka::mem::view::copy(queue, hostBuffer, accBuffer, bufferSize);
alpaka::memcpy(queue, hostBuffer, accBuffer, bufferSize);
chrono.printAndReset("Copy D->H");

return 0;
Expand Down
Loading

0 comments on commit eb18cf5

Please sign in to comment.