Skip to content

Commit

Permalink
support extents with arbitrary value types
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed May 9, 2022
1 parent 55f841c commit c0c49c1
Show file tree
Hide file tree
Showing 58 changed files with 666 additions and 444 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if (BUILD_TESTING)
target_compile_options(tests PRIVATE /permissive- /constexpr:steps10000000 /diagnostics:caret)
else()
target_compile_features(tests PRIVATE cxx_std_20)
target_compile_options(tests PRIVATE -Wall -Wextra -Wno-missing-braces)
target_compile_options(tests PRIVATE -Wall -Wextra -Werror=narrowing -Wno-missing-braces)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
target_compile_options(tests PRIVATE -fconstexpr-steps=10000000)
Expand Down
2 changes: 1 addition & 1 deletion docs/pages/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Array dimensions

.. doxygenstruct:: llama::ArrayExtents
.. doxygentypedef:: llama::ArrayExtentsDynamic
.. doxygentypedef:: llama::ArrayExtentsStatic
.. doxygentypedef:: llama::ArrayExtentsNCube
.. doxygenstruct:: llama::ArrayIndex

.. doxygenstruct:: llama::ArrayIndexIterator
Expand Down
8 changes: 5 additions & 3 deletions examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,10 @@ struct BlurKernel
{
// Using SoA for the shared memory
constexpr auto sharedChunkSize = ElemsPerBlock + 2 * KernelSize;
constexpr auto sharedMapping = llama::mapping::
SoA<llama::ArrayExtents<sharedChunkSize, sharedChunkSize>, typename View::RecordDim, false>{};
constexpr auto sharedMapping = llama::mapping::SoA<
llama::ArrayExtents<std::size_t, sharedChunkSize, sharedChunkSize>,
typename View::RecordDim,
false>{};
auto& sharedMem = alpaka::declareSharedVar<std::byte[sharedMapping.blobSize(0)], __COUNTER__>(acc);
return llama::View(sharedMapping, llama::Array<std::byte*, 1>{&sharedMem[0]});
}
Expand Down Expand Up @@ -211,7 +213,7 @@ try
const auto hostMapping
= llama::mapping::tree::Mapping{llama::ArrayExtents{buffer_y, buffer_x}, treeOperationList, Pixel{}};
const auto devMapping = llama::mapping::tree::Mapping{
llama::ArrayExtents<CHUNK_SIZE + 2 * KERNEL_SIZE, CHUNK_SIZE + 2 * KERNEL_SIZE>{},
llama::ArrayExtents<std::size_t, CHUNK_SIZE + 2 * KERNEL_SIZE, CHUNK_SIZE + 2 * KERNEL_SIZE>{},
treeOperationList,
PixelOnAcc{}};
using DevMapping = std::decay_t<decltype(devMapping)>;
Expand Down
28 changes: 14 additions & 14 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ LLAMA_FN_HOST_ACC_INLINE auto store(FP& dst, Vec v)
v.store(&dst);
}

template<std::size_t Elems>
template<int Elems>
struct VecType
{
// TODO(bgruber): we need a vector type that also works on GPUs
Expand All @@ -116,7 +116,7 @@ struct VecType<1>
using type = FP;
};

template<std::size_t Elems, typename ViewParticleI, typename VirtualParticleJ>
template<int Elems, typename ViewParticleI, typename VirtualParticleJ>
LLAMA_FN_HOST_ACC_INLINE void pPInteraction(ViewParticleI pi, VirtualParticleJ pj)
{
using Vec = typename VecType<Elems>::type;
Expand All @@ -143,7 +143,7 @@ LLAMA_FN_HOST_ACC_INLINE void pPInteraction(ViewParticleI pi, VirtualParticleJ p
store<Vec>(pi(tag::Vel{}, tag::Z{}), zdistanceSqr * sts + load<Vec>(pi(tag::Vel{}, tag::Z{})));
}

template<std::size_t ProblemSize, std::size_t Elems, std::size_t BlockSize, Mapping MappingSM>
template<int ProblemSize, int Elems, int BlockSize, Mapping MappingSM>
struct UpdateKernel
{
template<typename Acc, typename View>
Expand All @@ -158,7 +158,7 @@ struct UpdateKernel
{
constexpr auto sharedMapping = []
{
using ArrayExtents = llama::ArrayExtents<BlockSize>;
using ArrayExtents = llama::ArrayExtents<int, BlockSize>;
if constexpr(MappingSM == AoS)
return llama::mapping::AoS<ArrayExtents, Particle>{};
if constexpr(MappingSM == SoA)
Expand All @@ -181,7 +181,7 @@ struct UpdateKernel
auto pi = [&]
{
constexpr auto mapping
= llama::mapping::SoA<llama::ArrayExtents<Elems>, typename View::RecordDim, false>{};
= llama::mapping::SoA<llama::ArrayExtents<int, Elems>, typename View::RecordDim, false>{};
return llama::allocViewUninitialized(mapping, llama::bloballoc::Stack<mapping.blobSize(0)>{});
}();
// TODO(bgruber): vector load
Expand All @@ -190,26 +190,26 @@ struct UpdateKernel
pi(e) = particles(ti * Elems + e);

LLAMA_INDEPENDENT_DATA
for(std::size_t blockOffset = 0; blockOffset < ProblemSize; blockOffset += BlockSize)
for(int blockOffset = 0; blockOffset < ProblemSize; blockOffset += BlockSize)
{
LLAMA_INDEPENDENT_DATA
for(auto j = tbi; j < BlockSize; j += THREADS_PER_BLOCK)
for(int j = tbi; j < BlockSize; j += THREADS_PER_BLOCK)
sharedView(j) = particles(blockOffset + j);
alpaka::syncBlockThreads(acc);

LLAMA_INDEPENDENT_DATA
for(auto j = std::size_t{0}; j < BlockSize; ++j)
for(int j = 0; j < BlockSize; ++j)
pPInteraction<Elems>(pi(0u), sharedView(j));
alpaka::syncBlockThreads(acc);
}
// TODO(bgruber): vector store
LLAMA_INDEPENDENT_DATA
for(auto e = 0u; e < Elems; e++)
for(int e = 0u; e < Elems; e++)
particles(ti * Elems + e) = pi(e);
}
};

template<std::size_t ProblemSize, std::size_t Elems>
template<int ProblemSize, int Elems>
struct MoveKernel
{
template<typename Acc, typename View>
Expand All @@ -235,7 +235,7 @@ template<template<typename, typename> typename AccTemplate, Mapping MappingGM, M
void run(std::ostream& plotFile)
{
using Dim = alpaka::DimInt<1>;
using Size = std::size_t;
using Size = int;
using Acc = AccTemplate<Dim, Size>;
using DevHost = alpaka::DevCpu;
using DevAcc = alpaka::Dev<Acc>;
Expand All @@ -262,7 +262,7 @@ void run(std::ostream& plotFile)

auto mapping = []
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<1, int>;
const auto extents = ArrayExtents{PROBLEM_SIZE};
if constexpr(MappingGM == AoS)
return llama::mapping::AoS<ArrayExtents, Particle>{extents};
Expand Down Expand Up @@ -290,7 +290,7 @@ void run(std::ostream& plotFile)

std::mt19937_64 generator;
std::normal_distribution<FP> distribution(FP(0), FP(1));
for(std::size_t i = 0; i < PROBLEM_SIZE; ++i)
for(int i = 0; i < PROBLEM_SIZE; ++i)
{
llama::One<Particle> p;
p(tag::Pos(), tag::X()) = distribution(generator);
Expand All @@ -315,7 +315,7 @@ void run(std::ostream& plotFile)

double sumUpdate = 0;
double sumMove = 0;
for(std::size_t s = 0; s < STEPS; ++s)
for(int s = 0; s < STEPS; ++s)
{
auto updateKernel = UpdateKernel<PROBLEM_SIZE, DESIRED_ELEMENTS_PER_THREAD, THREADS_PER_BLOCK, MappingSM>{};
alpaka::exec<Acc>(queue, workdiv, updateKernel, accView);
Expand Down
2 changes: 1 addition & 1 deletion examples/alpaka/pic/pic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ auto setup(Queue& queue, const Dev& dev, const DevHost& devHost)

auto particleMapping = [&]
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<1>;
const auto particleExtents = ArrayExtents{numpart};
if constexpr(ParticleMapping == 0)
return llama::mapping::AoS<ArrayExtents, Particle>{particleExtents};
Expand Down
2 changes: 1 addition & 1 deletion examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ try
// LLAMA
const auto mapping = [&]
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<1>;
const auto extents = ArrayExtents{PROBLEM_SIZE};
if constexpr(MAPPING == 0)
return llama::mapping::AoS<ArrayExtents, Vector>{extents};
Expand Down
2 changes: 1 addition & 1 deletion examples/bitpackfloat/bitpackfloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ auto main() -> int
constexpr auto exponentBits = 5;
constexpr auto mantissaBits = 13;
const auto mapping
= llama::mapping::BitPackedFloatSoA{llama::ArrayExtents<llama::dyn>{N}, exponentBits, mantissaBits, Vector{}};
= llama::mapping::BitPackedFloatSoA{llama::ArrayExtents{N}, exponentBits, mantissaBits, Vector{}};

auto view = llama::allocView(mapping);

Expand Down
2 changes: 1 addition & 1 deletion examples/bitpackint/bitpackint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ auto main() -> int

// extract into a view of full size integers
auto viewExtracted
= llama::allocViewUninitialized(llama::mapping::AoS<llama::ArrayExtents<llama::dyn>, Vector>{{N}});
= llama::allocViewUninitialized(llama::mapping::AoS<llama::ArrayExtentsDynamic<1>, Vector>{{N}});
llama::copy(view, viewExtracted);
if(!std::equal(view.begin(), view.end(), viewExtracted.begin(), viewExtracted.end()))
fmt::print("ERROR: unpacked view is different\n");
Expand Down
9 changes: 5 additions & 4 deletions examples/bufferguard/bufferguard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ struct GuardMapping2D : llama::ArrayExtentsDynamic<2>

template<std::size_t... RecordCoords>
constexpr auto blobNrAndOffset(ArrayIndex ai, llama::RecordCoord<RecordCoords...> rc = {}) const
-> llama::NrAndOffset
-> llama::NrAndOffset<std::size_t>
{
// [0][0] is at left top
const auto [row, col] = ai;
Expand Down Expand Up @@ -144,7 +144,8 @@ struct GuardMapping2D : llama::ArrayExtentsDynamic<2>
}

private:
constexpr auto offsetBlobNr(llama::NrAndOffset nao, std::size_t blobNrOffset) const -> llama::NrAndOffset
constexpr auto offsetBlobNr(llama::NrAndOffset<std::size_t> nao, std::size_t blobNrOffset) const
-> llama::NrAndOffset<std::size_t>
{
nao.nr += blobNrOffset;
return nao;
Expand Down Expand Up @@ -202,8 +203,8 @@ void run(const std::string& mappingName)
{
std::cout << "\n===== Mapping " << mappingName << " =====\n\n";

constexpr auto rows = 7;
constexpr auto cols = 5;
constexpr std::size_t rows = 7;
constexpr std::size_t cols = 5;
const auto extents = llama::ArrayExtents{rows, cols};
const auto mapping = GuardMapping2D<Mapping, Vector>{extents};
std::ofstream{"bufferguard_" + mappingName + ".svg"} << llama::toSvg(mapping);
Expand Down
20 changes: 10 additions & 10 deletions examples/cuda/nbody/nbody.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ __global__ void updateSM(View particles)
{
constexpr auto sharedMapping = []
{
using ArrayExtents = llama::ArrayExtents<SHARED_ELEMENTS_PER_BLOCK>;
using ArrayExtents = llama::ArrayExtents<int, SHARED_ELEMENTS_PER_BLOCK>;
if constexpr(MappingSM == 0)
return llama::mapping::AoS<ArrayExtents, SharedMemoryParticle>{};
if constexpr(MappingSM == 1)
Expand All @@ -111,15 +111,15 @@ __global__ void updateSM(View particles)
const auto tbi = blockIdx.x;

llama::One<Particle> pi = particles(ti);
for(std::size_t blockOffset = 0; blockOffset < PROBLEM_SIZE; blockOffset += SHARED_ELEMENTS_PER_BLOCK)
for(int blockOffset = 0; blockOffset < PROBLEM_SIZE; blockOffset += SHARED_ELEMENTS_PER_BLOCK)
{
LLAMA_INDEPENDENT_DATA
for(auto j = tbi; j < SHARED_ELEMENTS_PER_BLOCK; j += THREADS_PER_BLOCK)
for(int j = tbi; j < SHARED_ELEMENTS_PER_BLOCK; j += THREADS_PER_BLOCK)
sharedView(j) = particles(blockOffset + j);
__syncthreads();

LLAMA_INDEPENDENT_DATA
for(auto j = std::size_t{0}; j < SHARED_ELEMENTS_PER_BLOCK; ++j)
for(int j = 0; j < SHARED_ELEMENTS_PER_BLOCK; ++j)
pPInteraction(pi, sharedView(j));
__syncthreads();
}
Expand All @@ -133,7 +133,7 @@ __global__ void update(View particles)

llama::One<Particle> pi = particles(ti);
LLAMA_INDEPENDENT_DATA
for(auto j = std::size_t{0}; j < PROBLEM_SIZE; ++j)
for(int j = 0; j < PROBLEM_SIZE; ++j)
pPInteraction(pi, particles(j));
particles(ti)(tag::Vel{}) = pi(tag::Vel{});
}
Expand Down Expand Up @@ -178,7 +178,7 @@ try

auto mapping = []
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<1, int>;
const auto extents = ArrayExtents{PROBLEM_SIZE};
if constexpr(Mapping == 0)
return llama::mapping::AoS<ArrayExtents, Particle>{extents};
Expand Down Expand Up @@ -224,7 +224,7 @@ try

std::default_random_engine engine;
std::normal_distribution<FP> distribution(FP(0), FP(1));
for(std::size_t i = 0; i < PROBLEM_SIZE; ++i)
for(int i = 0; i < PROBLEM_SIZE; ++i)
{
llama::One<Particle> p;
p(tag::Pos(), tag::X()) = distribution(engine);
Expand Down Expand Up @@ -267,7 +267,7 @@ try

double sumUpdate = 0;
double sumMove = 0;
for(std::size_t s = 0; s < STEPS; ++s)
for(int s = 0; s < STEPS; ++s)
{
if constexpr(RUN_UPATE)
{
Expand Down Expand Up @@ -388,7 +388,7 @@ namespace manual

std::default_random_engine engine;
std::normal_distribution<FP> distribution(FP(0), FP(1));
for(std::size_t i = 0; i < PROBLEM_SIZE; ++i)
for(int i = 0; i < PROBLEM_SIZE; ++i)
{
hostPositions[i].x = distribution(engine);
hostPositions[i].y = distribution(engine);
Expand Down Expand Up @@ -426,7 +426,7 @@ namespace manual

double sumUpdate = 0;
double sumMove = 0;
for(std::size_t s = 0; s < STEPS; ++s)
for(int s = 0; s < STEPS; ++s)
{
if constexpr(RUN_UPATE)
{
Expand Down
4 changes: 2 additions & 2 deletions examples/cuda/pitch/pitch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ namespace llamaex
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(
typename Base::ArrayIndex ai,
RecordCoord<RecordCoords...> = {}) const -> NrAndOffset
RecordCoord<RecordCoords...> = {}) const -> NrAndOffset<std::size_t>
{
constexpr std::size_t flatFieldIndex =
#ifdef __NVCC__
Expand All @@ -134,7 +134,7 @@ try
prop.totalGlobalMem / 1024 / 1024,
prop.sharedMemPerBlock / 1024);

const auto extents = llama::ArrayExtents{600, 800}; // height, width
const auto extents = llama::ArrayExtents<std::size_t, llama::dyn, llama::dyn>{600, 800}; // height, width
const auto widthBytes = extents[1] * sizeof(RGB);

const auto blockDim = dim3{16, 32, 1};
Expand Down
2 changes: 1 addition & 1 deletion examples/nbody_benchmark/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void run(std::ostream& plotFile)

const auto mapping = [&]
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<1>;
const auto extents = ArrayExtents{PROBLEM_SIZE};
if constexpr(Mapping == 0)
return llama::mapping::AoS<ArrayExtents, Particle>{extents};
Expand Down
14 changes: 7 additions & 7 deletions examples/simpletest/simpletest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ auto main() -> int
try
{
// Defining two array dimensions
using ArrayExtents = llama::ArrayExtentsDynamic<2>;
using ArrayExtents = llama::ArrayExtentsDynamic<2, int>;
// Setting the run time size of the array dimensions to 8192 * 8192
auto extents = ArrayExtents{8192, 8192};

Expand Down Expand Up @@ -177,11 +177,11 @@ try
<< '\n';

// iterating over the array dimensions at run time to do some stuff with the allocated data
for(size_t x = 0; x < extents[0]; ++x)
for(int x = 0; x < extents[0]; ++x)
// telling the compiler that all data in the following loop is independent to each other and thus can be
// vectorized
LLAMA_INDEPENDENT_DATA
for(size_t y = 0; y < extents[1]; ++y)
for(int y = 0; y < extents[1]; ++y)
{
// Defining a functor for a given virtual record
SetZeroFunctor<decltype(view(x, y))> szf{view(x, y)};
Expand All @@ -194,9 +194,9 @@ try
view({x, y}) = double(x + y) / double(extents[0] + extents[1]);
}

for(size_t x = 0; x < extents[0]; ++x)
for(int x = 0; x < extents[0]; ++x)
LLAMA_INDEPENDENT_DATA
for(size_t y = 0; y < extents[1]; ++y)
for(int y = 0; y < extents[1]; ++y)
{
// Showing different options of access data with llama. Internally all do the same data- and mappingwise
auto record = view(x, y);
Expand All @@ -209,9 +209,9 @@ try
}
double sum = 0.0;
LLAMA_INDEPENDENT_DATA
for(size_t x = 0; x < extents[0]; ++x)
for(int x = 0; x < extents[0]; ++x)
LLAMA_INDEPENDENT_DATA
for(size_t y = 0; y < extents[1]; ++y)
for(int y = 0; y < extents[1]; ++y)
sum += view(x, y)(llama::RecordCoord<1, 0>{});
std::cout << "Sum: " << sum << '\n';

Expand Down
2 changes: 1 addition & 1 deletion examples/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace usellama

const auto mapping = [&]
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<1>;
const auto extents = ArrayExtents{PROBLEM_SIZE};
if constexpr(MAPPING == 0)
return llama::mapping::AoS{extents, Vector{}};
Expand Down
Loading

0 comments on commit c0c49c1

Please sign in to comment.