Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add View::extents() #718

Merged
merged 1 commit into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ struct BlurKernel
const int bStart[2]
= {bi[0] * ElemsPerBlock + threadIdxInBlock[0], bi[1] * ElemsPerBlock + threadIdxInBlock[1]};
const int bEnd[2] = {
alpaka::math::min(acc, bStart[0] + ElemsPerBlock + 2 * KernelSize, oldImage.mapping().extents()[0]),
alpaka::math::min(acc, bStart[1] + ElemsPerBlock + 2 * KernelSize, oldImage.mapping().extents()[1]),
alpaka::math::min(acc, bStart[0] + ElemsPerBlock + 2 * KernelSize, oldImage.extents()[0]),
alpaka::math::min(acc, bStart[1] + ElemsPerBlock + 2 * KernelSize, oldImage.extents()[1]),
};
LLAMA_INDEPENDENT_DATA
for(auto y = bStart[0]; y < bEnd[0]; y += threadsPerBlock)
Expand All @@ -102,8 +102,8 @@ struct BlurKernel

const int start[2] = {ti[0] * Elems, ti[1] * Elems};
const int end[2] = {
alpaka::math::min(acc, start[0] + Elems, oldImage.mapping().extents()[0] - 2 * KernelSize),
alpaka::math::min(acc, start[1] + Elems, oldImage.mapping().extents()[1] - 2 * KernelSize),
alpaka::math::min(acc, start[0] + Elems, oldImage.extents()[0] - 2 * KernelSize),
alpaka::math::min(acc, start[1] + Elems, oldImage.extents()[1] - 2 * KernelSize),
};

LLAMA_INDEPENDENT_DATA
Expand Down
10 changes: 5 additions & 5 deletions examples/alpaka/pic/pic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,10 @@ void output(int n, const ParticleView& particles)
};
auto addFloat = [&](float f) { buffer.push_back(swapBytes(f)); };

const auto pointCount = particles.mapping().extents()[0];
const auto pointCount = particles.extents()[0];
outP << "POINTS " << pointCount << " float\n";
buffer.reserve(pointCount * 3);
for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
for(auto i : llama::ArrayIndexRange{particles.extents()})
{
auto p = particles(i);
addFloat(0);
Expand All @@ -207,7 +207,7 @@ void output(int n, const ParticleView& particles)

outP << "POINT_DATA " << pointCount << "\nVECTORS velocity float\n";
buffer.clear();
for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
for(auto i : llama::ArrayIndexRange{particles.extents()})
{
auto p = particles(i);
addFloat(p(U{}, Z{}));
Expand All @@ -218,13 +218,13 @@ void output(int n, const ParticleView& particles)

outP << "SCALARS q float 1\nLOOKUP_TABLE default\n";
buffer.clear();
for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
for(auto i : llama::ArrayIndexRange{particles.extents()})
addFloat(particles(i)(Q{}));
flushBuffer();

outP << "SCALARS m float 1\nLOOKUP_TABLE default\n";
buffer.clear();
for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
for(auto i : llama::ArrayIndexRange{particles.extents()})
addFloat(particles(i)(M{}));
flushBuffer();
}
Expand Down
2 changes: 1 addition & 1 deletion examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ struct ComputeKernel
LLAMA_FN_HOST_ACC_INLINE void operator()(const Acc& acc, View a, View b) const
{
const auto ti = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
const auto [n] = a.mapping().extents();
const auto [n] = a.extents();
const auto start = ti * Elems;
const auto end = alpaka::math::min(acc, start + Elems, n);

Expand Down
2 changes: 1 addition & 1 deletion examples/memmap/memmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ auto computeCentroid(const View& triangles)
llama::One<Vertex> centroid{};
for(const auto& t : triangles)
centroid += t(tag::a) + t(tag::b) + t(tag::c);
return centroid / triangles.mapping().extents()[0] / 3;
return centroid / triangles.extents()[0] / 3;
}

auto main(int argc, const char* argv[]) -> int
Expand Down
2 changes: 1 addition & 1 deletion examples/root/lhcb_analysis/lhcb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ namespace
auto hists = std::vector<TH1D>(omp_get_max_threads(), TH1D("B_mass", mappingName.c_str(), 500, 5050, 5500));

auto begin = std::chrono::steady_clock::now();
const RE::NTupleSize_t n = view.mapping().extents()[0];
const RE::NTupleSize_t n = view.extents()[0];
#pragma omp parallel for
for(RE::NTupleSize_t i = 0; i < n; i++)
{
Expand Down
2 changes: 1 addition & 1 deletion examples/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace usellama
template<typename View>
[[gnu::noinline]] void compute(const View& a, const View& b, View& c)
{
const auto [n] = c.mapping().extents();
const auto [n] = c.extents();

for(std::size_t i = 0; i < n; i++)
{
Expand Down
6 changes: 3 additions & 3 deletions examples/viewcopy/viewcopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ void stdCopy(const llama::View<SrcMapping, SrcBlobType>& srcView, llama::View<Ds
{
static_assert(std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>);

if(srcView.mapping().extents() != dstView.mapping().extents())
throw std::runtime_error{"Array dimensions sizes are different"};
if(srcView.extents() != dstView.extents())
throw std::runtime_error{"Array extents are different"};

std::copy(srcView.begin(), srcView.end(), dstView.begin());
}
Expand Down Expand Up @@ -106,7 +106,7 @@ template<typename Mapping, typename BlobType>
auto hash(const llama::View<Mapping, BlobType>& view)
{
std::size_t acc = 0;
for(auto ad : llama::ArrayIndexRange{view.mapping().extents()})
for(auto ad : llama::ArrayIndexRange{view.extents()})
llama::forEachLeafCoord<typename Mapping::RecordDim>([&](auto rc) { boost::hash_combine(acc, view(ad)(rc)); });
return acc;
}
Expand Down
11 changes: 6 additions & 5 deletions include/llama/BlobAllocators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ namespace llama::bloballoc
/// on the view before passing it to the kernel.
struct CudaMalloc
{
inline static const auto deleter = [](void* p)
{
if(const auto code = cudaFree(p); code != cudaSuccess)
throw std::runtime_error(std::string{"cudaFree failed with code "} + cudaGetErrorString(code));
};

template<std::size_t FieldAlignment>
inline auto operator()(std::integral_constant<std::size_t, FieldAlignment>, std::size_t count) const
{
Expand All @@ -157,11 +163,6 @@ namespace llama::bloballoc
throw std::runtime_error(std::string{"cudaMalloc failed with code "} + cudaGetErrorString(code));
if(reinterpret_cast<std::uintptr_t>(p) & (FieldAlignment - 1 != 0u))
throw std::runtime_error{"cudaMalloc does not align sufficiently"};
auto deleter = [](void* p)
{
if(const auto code = cudaFree(p); code != cudaSuccess)
throw std::runtime_error(std::string{"cudaFree failed with code "} + cudaGetErrorString(code));
};
return std::unique_ptr<std::byte[], decltype(deleter)>(p, deleter);
}
};
Expand Down
10 changes: 5 additions & 5 deletions include/llama/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ namespace llama
internal::assertTrivialCopyable<typename Mapping::RecordDim>();

// TODO(bgruber): we do not verify if the mappings have other runtime state than the array dimensions
if(srcView.mapping().extents() != dstView.mapping().extents())
if(srcView.extents() != dstView.extents())
throw std::runtime_error{"Array dimensions sizes are different"};

// TODO(bgruber): this is maybe not the best parallel copying strategy
Expand Down Expand Up @@ -85,7 +85,7 @@ namespace llama
std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>,
"The source and destination record dimensions must be the same");

if(srcView.mapping().extents() != dstView.mapping().extents())
if(srcView.extents() != dstView.extents())
throw std::runtime_error{"Array dimensions sizes are different"};

auto copyOne = [&](auto ai) LLAMA_LAMBDA_INLINE
Expand All @@ -95,7 +95,7 @@ namespace llama
};

constexpr auto dims = SrcMapping::ArrayExtents::rank;
const auto extents = srcView.mapping().extents().toArray();
const auto extents = srcView.extents().toArray();
const auto workPerThread = (extents[0] + threadCount - 1) / threadCount;
const auto start = threadId * workPerThread;
const auto end = std::min((threadId + 1) * workPerThread, static_cast<std::size_t>(extents[0]));
Expand Down Expand Up @@ -162,7 +162,7 @@ namespace llama
static constexpr auto lanesSrc = internal::aosoaLanes<SrcMapping>;
static constexpr auto lanesDst = internal::aosoaLanes<DstMapping>;

if(srcView.mapping().extents() != dstView.mapping().extents())
if(srcView.extents() != dstView.extents())
throw std::runtime_error{"Array dimensions sizes are different"};

static constexpr auto srcIsAoSoA = lanesSrc != std::numeric_limits<std::size_t>::max();
Expand All @@ -176,7 +176,7 @@ namespace llama
!dstIsAoSoA || std::tuple_size_v<decltype(dstView.storageBlobs)> == 1,
"Implementation assumes AoSoA with single blob");

const auto flatSize = product(dstView.mapping().extents());
const auto flatSize = product(dstView.extents());

// TODO(bgruber): implement the following by adding additional copy loops for the remaining elements
if(!srcIsAoSoA && flatSize % lanesDst != 0)
Expand Down
4 changes: 2 additions & 2 deletions include/llama/Simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ namespace llama
}
else
{
auto b = ArrayIndexIterator{srcRef.view.mapping().extents(), srcRef.arrayIndex()};
auto b = ArrayIndexIterator{srcRef.view.extents(), srcRef.arrayIndex()};
ElementSimd elemSimd; // g++-12 really needs the intermediate elemSimd and memcpy
for(auto i = 0; i < Traits::lanes; i++)
reinterpret_cast<FieldType*>(&elemSimd)[i]
Expand Down Expand Up @@ -248,7 +248,7 @@ namespace llama
// TODO(bgruber): how does this generalize conceptually to 2D and higher dimensions? in which
// direction should we collect SIMD values?
const ElementSimd elemSimd = srcSimd(rc);
auto b = ArrayIndexIterator{dstRef.view.mapping().extents(), dstRef.arrayIndex()};
auto b = ArrayIndexIterator{dstRef.view.extents(), dstRef.arrayIndex()};
for(auto i = 0; i < Traits::lanes; i++)
dstRef.view (*b++)(cat(typename T::BoundRecordCoord{}, rc))
= reinterpret_cast<const FieldType*>(&elemSimd)[i]; // scalar store
Expand Down
2 changes: 1 addition & 1 deletion include/llama/Vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ namespace llama

LLAMA_FN_HOST_ACC_INLINE auto capacity() const -> size_type
{
return m_view.mapping().extents()[0];
return m_view.extents()[0];
}

// NOLINTNEXTLINE(readability-identifier-naming)
Expand Down
15 changes: 10 additions & 5 deletions include/llama/View.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ namespace llama
using View = View<Mapping, BlobType, Accessor>;
using RecordDim = typename View::RecordDim;
forEachADCoord(
view.mapping().extents(),
view.extents(),
[&]([[maybe_unused]] typename View::ArrayIndex ai)
{
if constexpr(isRecordDim<RecordDim>)
Expand Down Expand Up @@ -458,6 +458,11 @@ namespace llama
return static_cast<const Mapping&>(*this);
}

LLAMA_FN_HOST_ACC_INLINE auto extents() const -> ArrayExtents
{
return mapping().extents();
}

LLAMA_FN_HOST_ACC_INLINE auto accessor() -> Accessor&
{
return static_cast<Accessor&>(*this);
Expand Down Expand Up @@ -569,25 +574,25 @@ namespace llama
LLAMA_FN_HOST_ACC_INLINE
auto begin() -> iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this};
return {ArrayIndexRange<ArrayExtents>{extents()}.begin(), this};
}

LLAMA_FN_HOST_ACC_INLINE
auto begin() const -> const_iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this};
return {ArrayIndexRange<ArrayExtents>{extents()}.begin(), this};
}

LLAMA_FN_HOST_ACC_INLINE
auto end() -> iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this};
return {ArrayIndexRange<ArrayExtents>{extents()}.end(), this};
}

LLAMA_FN_HOST_ACC_INLINE
auto end() const -> const_iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this};
return {ArrayIndexRange<ArrayExtents>{extents()}.end(), this};
}

Array<BlobType, Mapping::blobCount> storageBlobs;
Expand Down
4 changes: 2 additions & 2 deletions tests/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ void iotaFillView(View& view)
{
std::int64_t value = 0;
using RecordDim = typename View::RecordDim;
for(auto ai : llama::ArrayIndexRange{view.mapping().extents()})
for(auto ai : llama::ArrayIndexRange{view.extents()})
{
if constexpr(llama::isRecordDim<RecordDim>)
{
Expand All @@ -169,7 +169,7 @@ void iotaCheckView(View& view)
{
std::int64_t value = 0;
using RecordDim = typename View::RecordDim;
for(auto ai : llama::ArrayIndexRange{view.mapping().extents()})
for(auto ai : llama::ArrayIndexRange{view.extents()})
{
if constexpr(llama::isRecordDim<RecordDim>)
{
Expand Down
2 changes: 1 addition & 1 deletion tests/mapping.Null.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ TEST_CASE("mapping.Null")
auto view = llama::allocView(mapping);
iotaFillView(view);

for(auto ai : llama::ArrayIndexRange{view.mapping().extents()})
for(auto ai : llama::ArrayIndexRange{view.extents()})
llama::forEachLeafCoord<Particle>(
[&](auto rc)
{
Expand Down