Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support extents with arbitrary value types #488

Merged
merged 1 commit into from
May 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if (BUILD_TESTING)
target_compile_options(tests PRIVATE /permissive- /constexpr:steps10000000 /diagnostics:caret)
else()
target_compile_features(tests PRIVATE cxx_std_20)
target_compile_options(tests PRIVATE -Wall -Wextra -Wno-missing-braces)
target_compile_options(tests PRIVATE -Wall -Wextra -Werror=narrowing -Wno-missing-braces)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
target_compile_options(tests PRIVATE -fconstexpr-steps=10000000)
Expand Down
2 changes: 1 addition & 1 deletion docs/pages/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Array dimensions

.. doxygenstruct:: llama::ArrayExtents
.. doxygentypedef:: llama::ArrayExtentsDynamic
.. doxygentypedef:: llama::ArrayExtentsStatic
.. doxygentypedef:: llama::ArrayExtentsNCube
.. doxygenstruct:: llama::ArrayIndex

.. doxygenstruct:: llama::ArrayIndexIterator
Expand Down
8 changes: 5 additions & 3 deletions examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,10 @@ struct BlurKernel
{
// Using SoA for the shared memory
constexpr auto sharedChunkSize = ElemsPerBlock + 2 * KernelSize;
constexpr auto sharedMapping = llama::mapping::
SoA<llama::ArrayExtents<sharedChunkSize, sharedChunkSize>, typename View::RecordDim, false>{};
constexpr auto sharedMapping = llama::mapping::SoA<
llama::ArrayExtents<std::size_t, sharedChunkSize, sharedChunkSize>,
typename View::RecordDim,
false>{};
auto& sharedMem = alpaka::declareSharedVar<std::byte[sharedMapping.blobSize(0)], __COUNTER__>(acc);
return llama::View(sharedMapping, llama::Array<std::byte*, 1>{&sharedMem[0]});
}
Expand Down Expand Up @@ -211,7 +213,7 @@ try
const auto hostMapping
= llama::mapping::tree::Mapping{llama::ArrayExtents{buffer_y, buffer_x}, treeOperationList, Pixel{}};
const auto devMapping = llama::mapping::tree::Mapping{
llama::ArrayExtents<CHUNK_SIZE + 2 * KERNEL_SIZE, CHUNK_SIZE + 2 * KERNEL_SIZE>{},
llama::ArrayExtents<std::size_t, CHUNK_SIZE + 2 * KERNEL_SIZE, CHUNK_SIZE + 2 * KERNEL_SIZE>{},
treeOperationList,
PixelOnAcc{}};
using DevMapping = std::decay_t<decltype(devMapping)>;
Expand Down
28 changes: 14 additions & 14 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ LLAMA_FN_HOST_ACC_INLINE auto store(FP& dst, Vec v)
v.store(&dst);
}

template<std::size_t Elems>
template<int Elems>
struct VecType
{
// TODO(bgruber): we need a vector type that also works on GPUs
Expand All @@ -116,7 +116,7 @@ struct VecType<1>
using type = FP;
};

template<std::size_t Elems, typename ViewParticleI, typename VirtualParticleJ>
template<int Elems, typename ViewParticleI, typename VirtualParticleJ>
LLAMA_FN_HOST_ACC_INLINE void pPInteraction(ViewParticleI pi, VirtualParticleJ pj)
{
using Vec = typename VecType<Elems>::type;
Expand All @@ -143,7 +143,7 @@ LLAMA_FN_HOST_ACC_INLINE void pPInteraction(ViewParticleI pi, VirtualParticleJ p
store<Vec>(pi(tag::Vel{}, tag::Z{}), zdistanceSqr * sts + load<Vec>(pi(tag::Vel{}, tag::Z{})));
}

template<std::size_t ProblemSize, std::size_t Elems, std::size_t BlockSize, Mapping MappingSM>
template<int ProblemSize, int Elems, int BlockSize, Mapping MappingSM>
struct UpdateKernel
{
template<typename Acc, typename View>
Expand All @@ -158,7 +158,7 @@ struct UpdateKernel
{
constexpr auto sharedMapping = []
{
using ArrayExtents = llama::ArrayExtents<BlockSize>;
using ArrayExtents = llama::ArrayExtents<int, BlockSize>;
if constexpr(MappingSM == AoS)
return llama::mapping::AoS<ArrayExtents, Particle>{};
if constexpr(MappingSM == SoA)
Expand All @@ -181,7 +181,7 @@ struct UpdateKernel
auto pi = [&]
{
constexpr auto mapping
= llama::mapping::SoA<llama::ArrayExtents<Elems>, typename View::RecordDim, false>{};
= llama::mapping::SoA<llama::ArrayExtents<int, Elems>, typename View::RecordDim, false>{};
return llama::allocViewUninitialized(mapping, llama::bloballoc::Stack<mapping.blobSize(0)>{});
}();
// TODO(bgruber): vector load
Expand All @@ -190,26 +190,26 @@ struct UpdateKernel
pi(e) = particles(ti * Elems + e);

LLAMA_INDEPENDENT_DATA
for(std::size_t blockOffset = 0; blockOffset < ProblemSize; blockOffset += BlockSize)
for(int blockOffset = 0; blockOffset < ProblemSize; blockOffset += BlockSize)
{
LLAMA_INDEPENDENT_DATA
for(auto j = tbi; j < BlockSize; j += THREADS_PER_BLOCK)
for(int j = tbi; j < BlockSize; j += THREADS_PER_BLOCK)
sharedView(j) = particles(blockOffset + j);
alpaka::syncBlockThreads(acc);

LLAMA_INDEPENDENT_DATA
for(auto j = std::size_t{0}; j < BlockSize; ++j)
for(int j = 0; j < BlockSize; ++j)
pPInteraction<Elems>(pi(0u), sharedView(j));
alpaka::syncBlockThreads(acc);
}
// TODO(bgruber): vector store
LLAMA_INDEPENDENT_DATA
for(auto e = 0u; e < Elems; e++)
for(int e = 0u; e < Elems; e++)
particles(ti * Elems + e) = pi(e);
}
};

template<std::size_t ProblemSize, std::size_t Elems>
template<int ProblemSize, int Elems>
struct MoveKernel
{
template<typename Acc, typename View>
Expand All @@ -235,7 +235,7 @@ template<template<typename, typename> typename AccTemplate, Mapping MappingGM, M
void run(std::ostream& plotFile)
{
using Dim = alpaka::DimInt<1>;
using Size = std::size_t;
using Size = int;
using Acc = AccTemplate<Dim, Size>;
using DevHost = alpaka::DevCpu;
using DevAcc = alpaka::Dev<Acc>;
Expand All @@ -262,7 +262,7 @@ void run(std::ostream& plotFile)

auto mapping = []
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<1, int>;
const auto extents = ArrayExtents{PROBLEM_SIZE};
if constexpr(MappingGM == AoS)
return llama::mapping::AoS<ArrayExtents, Particle>{extents};
Expand Down Expand Up @@ -290,7 +290,7 @@ void run(std::ostream& plotFile)

std::mt19937_64 generator;
std::normal_distribution<FP> distribution(FP(0), FP(1));
for(std::size_t i = 0; i < PROBLEM_SIZE; ++i)
for(int i = 0; i < PROBLEM_SIZE; ++i)
{
llama::One<Particle> p;
p(tag::Pos(), tag::X()) = distribution(generator);
Expand All @@ -315,7 +315,7 @@ void run(std::ostream& plotFile)

double sumUpdate = 0;
double sumMove = 0;
for(std::size_t s = 0; s < STEPS; ++s)
for(int s = 0; s < STEPS; ++s)
{
auto updateKernel = UpdateKernel<PROBLEM_SIZE, DESIRED_ELEMENTS_PER_THREAD, THREADS_PER_BLOCK, MappingSM>{};
alpaka::exec<Acc>(queue, workdiv, updateKernel, accView);
Expand Down
2 changes: 1 addition & 1 deletion examples/alpaka/pic/pic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ auto setup(Queue& queue, const Dev& dev, const DevHost& devHost)

auto particleMapping = [&]
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<1>;
const auto particleExtents = ArrayExtents{numpart};
if constexpr(ParticleMapping == 0)
return llama::mapping::AoS<ArrayExtents, Particle>{particleExtents};
Expand Down
2 changes: 1 addition & 1 deletion examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ try
// LLAMA
const auto mapping = [&]
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<1>;
const auto extents = ArrayExtents{PROBLEM_SIZE};
if constexpr(MAPPING == 0)
return llama::mapping::AoS<ArrayExtents, Vector>{extents};
Expand Down
2 changes: 1 addition & 1 deletion examples/bitpackfloat/bitpackfloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ auto main() -> int
constexpr auto exponentBits = 5;
constexpr auto mantissaBits = 13;
const auto mapping
= llama::mapping::BitPackedFloatSoA{llama::ArrayExtents<llama::dyn>{N}, exponentBits, mantissaBits, Vector{}};
= llama::mapping::BitPackedFloatSoA{llama::ArrayExtents{N}, exponentBits, mantissaBits, Vector{}};

auto view = llama::allocView(mapping);

Expand Down
5 changes: 3 additions & 2 deletions examples/bitpackint/bitpackint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ auto main() -> int
constexpr auto N = 128;
constexpr auto bits = 7;
const auto mapping
= llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<1>, Vector, llama::Constant<bits>>{{N}};
= llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, Vector, llama::Constant<bits>>{
{N}};

auto view = llama::allocView(mapping);

Expand All @@ -49,7 +50,7 @@ auto main() -> int

// extract into a view of full size integers
auto viewExtracted
= llama::allocViewUninitialized(llama::mapping::AoS<llama::ArrayExtents<llama::dyn>, Vector>{{N}});
= llama::allocViewUninitialized(llama::mapping::AoS<llama::ArrayExtentsDynamic<std::size_t, 1>, Vector>{{N}});
llama::copy(view, viewExtracted);
if(!std::equal(view.begin(), view.end(), viewExtracted.begin(), viewExtracted.end()))
fmt::print("ERROR: unpacked view is different\n");
Expand Down
27 changes: 14 additions & 13 deletions examples/bufferguard/bufferguard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ using Vector = llama::Record<
// clang-format on

template<template<typename, typename> typename InnerMapping, typename TRecordDim>
struct GuardMapping2D : llama::ArrayExtentsDynamic<2>
struct GuardMapping2D : llama::ArrayExtentsDynamic<std::size_t, 2>
{
using ArrayExtents = llama::ArrayExtentsDynamic<2>;
using ArrayIndex = llama::ArrayIndex<2>;
using ArrayExtents = llama::ArrayExtentsDynamic<std::size_t, 2>;
using ArrayIndex = llama::ArrayIndex<std::size_t, 2>;
using RecordDim = TRecordDim;

constexpr GuardMapping2D() = default;

constexpr explicit GuardMapping2D(ArrayExtents extents, RecordDim = {})
: llama::ArrayExtentsDynamic<2>(extents)
: llama::ArrayExtentsDynamic<std::size_t, 2>(extents)
, left({extents[0] - 2})
, right({extents[0] - 2})
, top({extents[1] - 2})
Expand Down Expand Up @@ -69,7 +69,7 @@ struct GuardMapping2D : llama::ArrayExtentsDynamic<2>

template<std::size_t... RecordCoords>
constexpr auto blobNrAndOffset(ArrayIndex ai, llama::RecordCoord<RecordCoords...> rc = {}) const
-> llama::NrAndOffset
-> llama::NrAndOffset<std::size_t>
{
// [0][0] is at left top
const auto [row, col] = ai;
Expand Down Expand Up @@ -144,7 +144,8 @@ struct GuardMapping2D : llama::ArrayExtentsDynamic<2>
}

private:
constexpr auto offsetBlobNr(llama::NrAndOffset nao, std::size_t blobNrOffset) const -> llama::NrAndOffset
constexpr auto offsetBlobNr(llama::NrAndOffset<std::size_t> nao, std::size_t blobNrOffset) const
-> llama::NrAndOffset<std::size_t>
{
nao.nr += blobNrOffset;
return nao;
Expand All @@ -162,11 +163,11 @@ struct GuardMapping2D : llama::ArrayExtentsDynamic<2>
llama::mapping::One<llama::ArrayExtents<>, RecordDim> leftBot;
llama::mapping::One<llama::ArrayExtents<>, RecordDim> rightTop;
llama::mapping::One<llama::ArrayExtents<>, RecordDim> rightBot;
InnerMapping<llama::ArrayExtentsDynamic<1>, RecordDim> left;
InnerMapping<llama::ArrayExtentsDynamic<1>, RecordDim> right;
InnerMapping<llama::ArrayExtentsDynamic<1>, RecordDim> top;
InnerMapping<llama::ArrayExtentsDynamic<1>, RecordDim> bot;
InnerMapping<llama::ArrayExtentsDynamic<2>, RecordDim> center;
InnerMapping<llama::ArrayExtentsDynamic<std::size_t, 1>, RecordDim> left;
InnerMapping<llama::ArrayExtentsDynamic<std::size_t, 1>, RecordDim> right;
InnerMapping<llama::ArrayExtentsDynamic<std::size_t, 1>, RecordDim> top;
InnerMapping<llama::ArrayExtentsDynamic<std::size_t, 1>, RecordDim> bot;
InnerMapping<llama::ArrayExtentsDynamic<std::size_t, 2>, RecordDim> center;

static constexpr auto leftTopOff = std::size_t{0};
static constexpr auto leftBotOff = leftTopOff + decltype(leftTop)::blobCount;
Expand Down Expand Up @@ -202,8 +203,8 @@ void run(const std::string& mappingName)
{
std::cout << "\n===== Mapping " << mappingName << " =====\n\n";

constexpr auto rows = 7;
constexpr auto cols = 5;
constexpr std::size_t rows = 7;
constexpr std::size_t cols = 5;
const auto extents = llama::ArrayExtents{rows, cols};
const auto mapping = GuardMapping2D<Mapping, Vector>{extents};
std::ofstream{"bufferguard_" + mappingName + ".svg"} << llama::toSvg(mapping);
Expand Down
2 changes: 1 addition & 1 deletion examples/bytesplit/bytesplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ using Data = llama::Record<
auto main() -> int
{
constexpr auto N = 128;
using ArrayExtents = llama::ArrayExtentsDynamic<1>;
using ArrayExtents = llama::ArrayExtentsDynamic<std::size_t, 1>;
const auto mapping = llama::mapping::Bytesplit<ArrayExtents, Data, llama::mapping::BindSoA<false>::fn>{{N}};

auto view = llama::allocView(mapping);
Expand Down
20 changes: 10 additions & 10 deletions examples/cuda/nbody/nbody.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ __global__ void updateSM(View particles)
{
constexpr auto sharedMapping = []
{
using ArrayExtents = llama::ArrayExtents<SHARED_ELEMENTS_PER_BLOCK>;
using ArrayExtents = llama::ArrayExtents<int, SHARED_ELEMENTS_PER_BLOCK>;
if constexpr(MappingSM == 0)
return llama::mapping::AoS<ArrayExtents, SharedMemoryParticle>{};
if constexpr(MappingSM == 1)
Expand All @@ -111,15 +111,15 @@ __global__ void updateSM(View particles)
const auto tbi = blockIdx.x;

llama::One<Particle> pi = particles(ti);
for(std::size_t blockOffset = 0; blockOffset < PROBLEM_SIZE; blockOffset += SHARED_ELEMENTS_PER_BLOCK)
for(int blockOffset = 0; blockOffset < PROBLEM_SIZE; blockOffset += SHARED_ELEMENTS_PER_BLOCK)
{
LLAMA_INDEPENDENT_DATA
for(auto j = tbi; j < SHARED_ELEMENTS_PER_BLOCK; j += THREADS_PER_BLOCK)
for(int j = tbi; j < SHARED_ELEMENTS_PER_BLOCK; j += THREADS_PER_BLOCK)
sharedView(j) = particles(blockOffset + j);
__syncthreads();

LLAMA_INDEPENDENT_DATA
for(auto j = std::size_t{0}; j < SHARED_ELEMENTS_PER_BLOCK; ++j)
for(int j = 0; j < SHARED_ELEMENTS_PER_BLOCK; ++j)
pPInteraction(pi, sharedView(j));
__syncthreads();
}
Expand All @@ -133,7 +133,7 @@ __global__ void update(View particles)

llama::One<Particle> pi = particles(ti);
LLAMA_INDEPENDENT_DATA
for(auto j = std::size_t{0}; j < PROBLEM_SIZE; ++j)
for(int j = 0; j < PROBLEM_SIZE; ++j)
pPInteraction(pi, particles(j));
particles(ti)(tag::Vel{}) = pi(tag::Vel{});
}
Expand Down Expand Up @@ -178,7 +178,7 @@ try

auto mapping = []
{
using ArrayExtents = llama::ArrayExtents<llama::dyn>;
using ArrayExtents = llama::ArrayExtentsDynamic<int, 1>;
const auto extents = ArrayExtents{PROBLEM_SIZE};
if constexpr(Mapping == 0)
return llama::mapping::AoS<ArrayExtents, Particle>{extents};
Expand Down Expand Up @@ -224,7 +224,7 @@ try

std::default_random_engine engine;
std::normal_distribution<FP> distribution(FP(0), FP(1));
for(std::size_t i = 0; i < PROBLEM_SIZE; ++i)
for(int i = 0; i < PROBLEM_SIZE; ++i)
{
llama::One<Particle> p;
p(tag::Pos(), tag::X()) = distribution(engine);
Expand Down Expand Up @@ -267,7 +267,7 @@ try

double sumUpdate = 0;
double sumMove = 0;
for(std::size_t s = 0; s < STEPS; ++s)
for(int s = 0; s < STEPS; ++s)
{
if constexpr(RUN_UPATE)
{
Expand Down Expand Up @@ -388,7 +388,7 @@ namespace manual

std::default_random_engine engine;
std::normal_distribution<FP> distribution(FP(0), FP(1));
for(std::size_t i = 0; i < PROBLEM_SIZE; ++i)
for(int i = 0; i < PROBLEM_SIZE; ++i)
{
hostPositions[i].x = distribution(engine);
hostPositions[i].y = distribution(engine);
Expand Down Expand Up @@ -426,7 +426,7 @@ namespace manual

double sumUpdate = 0;
double sumMove = 0;
for(std::size_t s = 0; s < STEPS; ++s)
for(int s = 0; s < STEPS; ++s)
{
if constexpr(RUN_UPATE)
{
Expand Down
6 changes: 3 additions & 3 deletions examples/cuda/pitch/pitch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ namespace llamaex
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(
typename Base::ArrayIndex ai,
RecordCoord<RecordCoords...> = {}) const -> NrAndOffset
RecordCoord<RecordCoords...> = {}) const -> NrAndOffset<std::size_t>
{
constexpr std::size_t flatFieldIndex =
#ifdef __NVCC__
Expand All @@ -134,7 +134,7 @@ try
prop.totalGlobalMem / 1024 / 1024,
prop.sharedMemPerBlock / 1024);

const auto extents = llama::ArrayExtents{600, 800}; // height, width
const auto extents = llama::ArrayExtents<std::size_t, llama::dyn, llama::dyn>{600, 800}; // height, width
const auto widthBytes = extents[1] * sizeof(RGB);

const auto blockDim = dim3{16, 32, 1};
Expand All @@ -151,7 +151,7 @@ try
checkError(cudaMallocPitch(&mem, &rowPitch, widthBytes, extents[0]));
fmt::print("Row pitch: {} B ({} B padding)\n", rowPitch, rowPitch - widthBytes);

auto mapping = llamaex::PitchedAoS<llama::ArrayExtentsDynamic<2>, RGB>{extents, rowPitch};
auto mapping = llamaex::PitchedAoS<llama::ArrayExtentsDynamic<std::size_t, 2>, RGB>{extents, rowPitch};
assert(mapping.blobSize(0) == rowPitch * extents[0]);
auto view = llama::View{mapping, llama::Array{mem}};

Expand Down
Loading