From dfd7facc0577f0cd1bd53620ede431834d7ffe37 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Mon, 27 Feb 2023 15:46:35 +0100 Subject: [PATCH] Add View::extents() --- examples/alpaka/asyncblur/asyncblur.cpp | 8 ++++---- examples/alpaka/pic/pic.cpp | 10 +++++----- examples/alpaka/vectoradd/vectoradd.cpp | 2 +- examples/memmap/memmap.cpp | 2 +- examples/root/lhcb_analysis/lhcb.cpp | 2 +- examples/vectoradd/vectoradd.cpp | 2 +- examples/viewcopy/viewcopy.cpp | 6 +++--- include/llama/BlobAllocators.hpp | 11 ++++++----- include/llama/Copy.hpp | 10 +++++----- include/llama/Simd.hpp | 4 ++-- include/llama/Vector.hpp | 2 +- include/llama/View.hpp | 15 ++++++++++----- tests/common.hpp | 4 ++-- tests/mapping.Null.cpp | 2 +- 14 files changed, 43 insertions(+), 37 deletions(-) diff --git a/examples/alpaka/asyncblur/asyncblur.cpp b/examples/alpaka/asyncblur/asyncblur.cpp index 32bd8a558b..30e7ea7871 100644 --- a/examples/alpaka/asyncblur/asyncblur.cpp +++ b/examples/alpaka/asyncblur/asyncblur.cpp @@ -88,8 +88,8 @@ struct BlurKernel const int bStart[2] = {bi[0] * ElemsPerBlock + threadIdxInBlock[0], bi[1] * ElemsPerBlock + threadIdxInBlock[1]}; const int bEnd[2] = { - alpaka::math::min(acc, bStart[0] + ElemsPerBlock + 2 * KernelSize, oldImage.mapping().extents()[0]), - alpaka::math::min(acc, bStart[1] + ElemsPerBlock + 2 * KernelSize, oldImage.mapping().extents()[1]), + alpaka::math::min(acc, bStart[0] + ElemsPerBlock + 2 * KernelSize, oldImage.extents()[0]), + alpaka::math::min(acc, bStart[1] + ElemsPerBlock + 2 * KernelSize, oldImage.extents()[1]), }; LLAMA_INDEPENDENT_DATA for(auto y = bStart[0]; y < bEnd[0]; y += threadsPerBlock) @@ -102,8 +102,8 @@ struct BlurKernel const int start[2] = {ti[0] * Elems, ti[1] * Elems}; const int end[2] = { - alpaka::math::min(acc, start[0] + Elems, oldImage.mapping().extents()[0] - 2 * KernelSize), - alpaka::math::min(acc, start[1] + Elems, oldImage.mapping().extents()[1] - 2 * KernelSize), + alpaka::math::min(acc, start[0] + Elems, oldImage.extents()[0] - 2 * KernelSize), + alpaka::math::min(acc, start[1] + Elems, oldImage.extents()[1] - 2 * KernelSize), }; LLAMA_INDEPENDENT_DATA diff --git a/examples/alpaka/pic/pic.cpp b/examples/alpaka/pic/pic.cpp index 66d9f96226..17515d880c 100644 --- a/examples/alpaka/pic/pic.cpp +++ b/examples/alpaka/pic/pic.cpp @@ -193,10 +193,10 @@ void output(int n, const ParticleView& particles) }; auto addFloat = [&](float f) { buffer.push_back(swapBytes(f)); }; - const auto pointCount = particles.mapping().extents()[0]; + const auto pointCount = particles.extents()[0]; outP << "POINTS " << pointCount << " float\n"; buffer.reserve(pointCount * 3); - for(auto i : llama::ArrayIndexRange{particles.mapping().extents()}) + for(auto i : llama::ArrayIndexRange{particles.extents()}) { auto p = particles(i); addFloat(0); @@ -207,7 +207,7 @@ void output(int n, const ParticleView& particles) outP << "POINT_DATA " << pointCount << "\nVECTORS velocity float\n"; buffer.clear(); - for(auto i : llama::ArrayIndexRange{particles.mapping().extents()}) + for(auto i : llama::ArrayIndexRange{particles.extents()}) { auto p = particles(i); addFloat(p(U{}, Z{})); @@ -218,13 +218,13 @@ void output(int n, const ParticleView& particles) outP << "SCALARS q float 1\nLOOKUP_TABLE default\n"; buffer.clear(); - for(auto i : llama::ArrayIndexRange{particles.mapping().extents()}) + for(auto i : llama::ArrayIndexRange{particles.extents()}) addFloat(particles(i)(Q{})); flushBuffer(); outP << "SCALARS m float 1\nLOOKUP_TABLE default\n"; buffer.clear(); - for(auto i : llama::ArrayIndexRange{particles.mapping().extents()}) + for(auto i : llama::ArrayIndexRange{particles.extents()}) addFloat(particles(i)(M{})); flushBuffer(); } diff --git a/examples/alpaka/vectoradd/vectoradd.cpp b/examples/alpaka/vectoradd/vectoradd.cpp index 9f589e3c6d..7db34ee7b1 100644 --- a/examples/alpaka/vectoradd/vectoradd.cpp +++ b/examples/alpaka/vectoradd/vectoradd.cpp @@ -53,7 +53,7 @@ struct ComputeKernel LLAMA_FN_HOST_ACC_INLINE void operator()(const Acc& acc, View a, View b) const { const auto ti = alpaka::getIdx(acc)[0]; - const auto [n] = a.mapping().extents(); + const auto [n] = a.extents(); const auto start = ti * Elems; const auto end = alpaka::math::min(acc, start + Elems, n); diff --git a/examples/memmap/memmap.cpp b/examples/memmap/memmap.cpp index 655b1e5fd0..a001df1d27 100644 --- a/examples/memmap/memmap.cpp +++ b/examples/memmap/memmap.cpp @@ -40,7 +40,7 @@ auto computeCentroid(const View& triangles) llama::One centroid{}; for(const auto& t : triangles) centroid += t(tag::a) + t(tag::b) + t(tag::c); - return centroid / triangles.mapping().extents()[0] / 3; + return centroid / triangles.extents()[0] / 3; } auto main(int argc, const char* argv[]) -> int diff --git a/examples/root/lhcb_analysis/lhcb.cpp b/examples/root/lhcb_analysis/lhcb.cpp index 6365b5d89a..130a5a014a 100644 --- a/examples/root/lhcb_analysis/lhcb.cpp +++ b/examples/root/lhcb_analysis/lhcb.cpp @@ -176,7 +176,7 @@ namespace auto hists = std::vector(omp_get_max_threads(), TH1D("B_mass", mappingName.c_str(), 500, 5050, 5500)); auto begin = std::chrono::steady_clock::now(); - const RE::NTupleSize_t n = view.mapping().extents()[0]; + const RE::NTupleSize_t n = view.extents()[0]; #pragma omp parallel for for(RE::NTupleSize_t i = 0; i < n; i++) { diff --git a/examples/vectoradd/vectoradd.cpp b/examples/vectoradd/vectoradd.cpp index c06211c512..813ac4006f 100644 --- a/examples/vectoradd/vectoradd.cpp +++ b/examples/vectoradd/vectoradd.cpp @@ -38,7 +38,7 @@ namespace usellama template [[gnu::noinline]] void compute(const View& a, const View& b, View& c) { - const auto [n] = c.mapping().extents(); + const auto [n] = c.extents(); for(std::size_t i = 0; i < n; i++) { diff --git a/examples/viewcopy/viewcopy.cpp b/examples/viewcopy/viewcopy.cpp index fc2be3f5e1..7f7d5eb9f4 100644 --- a/examples/viewcopy/viewcopy.cpp +++ b/examples/viewcopy/viewcopy.cpp @@ -53,8 +53,8 @@ void stdCopy(const llama::View& srcView, llama::View); - if(srcView.mapping().extents() != dstView.mapping().extents()) - throw std::runtime_error{"Array dimensions sizes are different"}; + if(srcView.extents() != dstView.extents()) + throw std::runtime_error{"Array extents are different"}; std::copy(srcView.begin(), srcView.end(), dstView.begin()); } @@ -106,7 +106,7 @@ template auto hash(const llama::View& view) { std::size_t acc = 0; - for(auto ad : llama::ArrayIndexRange{view.mapping().extents()}) + for(auto ad : llama::ArrayIndexRange{view.extents()}) llama::forEachLeafCoord([&](auto rc) { boost::hash_combine(acc, view(ad)(rc)); }); return acc; } diff --git a/include/llama/BlobAllocators.hpp b/include/llama/BlobAllocators.hpp index 52ffe02d75..058a3d4c67 100644 --- a/include/llama/BlobAllocators.hpp +++ b/include/llama/BlobAllocators.hpp @@ -149,6 +149,12 @@ namespace llama::bloballoc /// on the view before passing it to the kernel. struct CudaMalloc { + inline static const auto deleter = [](void* p) + { + if(const auto code = cudaFree(p); code != cudaSuccess) + throw std::runtime_error(std::string{"cudaFree failed with code "} + cudaGetErrorString(code)); + }; + template inline auto operator()(std::integral_constant, std::size_t count) const { @@ -157,11 +163,6 @@ namespace llama::bloballoc throw std::runtime_error(std::string{"cudaMalloc failed with code "} + cudaGetErrorString(code)); if(reinterpret_cast(p) & (FieldAlignment - 1 != 0u)) throw std::runtime_error{"cudaMalloc does not align sufficiently"}; - auto deleter = [](void* p) - { - if(const auto code = cudaFree(p); code != cudaSuccess) - throw std::runtime_error(std::string{"cudaFree failed with code "} + cudaGetErrorString(code)); - }; return std::unique_ptr(p, deleter); } }; diff --git a/include/llama/Copy.hpp b/include/llama/Copy.hpp index 9c0b19dc43..30c3963041 100644 --- a/include/llama/Copy.hpp +++ b/include/llama/Copy.hpp @@ -57,7 +57,7 @@ namespace llama internal::assertTrivialCopyable(); // TODO(bgruber): we do not verify if the mappings have other runtime state than the array dimensions - if(srcView.mapping().extents() != dstView.mapping().extents()) + if(srcView.extents() != dstView.extents()) throw std::runtime_error{"Array dimensions sizes are different"}; // TODO(bgruber): this is maybe not the best parallel copying strategy @@ -85,7 +85,7 @@ namespace llama std::is_same_v, "The source and destination record dimensions must be the same"); - if(srcView.mapping().extents() != dstView.mapping().extents()) + if(srcView.extents() != dstView.extents()) throw std::runtime_error{"Array dimensions sizes are different"}; auto copyOne = [&](auto ai) LLAMA_LAMBDA_INLINE @@ -95,7 +95,7 @@ namespace llama }; constexpr auto dims = SrcMapping::ArrayExtents::rank; - const auto extents = srcView.mapping().extents().toArray(); + const auto extents = srcView.extents().toArray(); const auto workPerThread = (extents[0] + threadCount - 1) / threadCount; const auto start = threadId * workPerThread; const auto end = std::min((threadId + 1) * workPerThread, static_cast(extents[0])); @@ -162,7 +162,7 @@ namespace llama static constexpr auto lanesSrc = internal::aosoaLanes; static constexpr auto lanesDst = internal::aosoaLanes; - if(srcView.mapping().extents() != dstView.mapping().extents()) + if(srcView.extents() != dstView.extents()) throw std::runtime_error{"Array dimensions sizes are different"}; static constexpr auto srcIsAoSoA = lanesSrc != std::numeric_limits::max(); @@ -176,7 +176,7 @@ namespace llama !dstIsAoSoA || std::tuple_size_v == 1, "Implementation assumes AoSoA with single blob"); - const auto flatSize = product(dstView.mapping().extents()); + const auto flatSize = product(dstView.extents()); // TODO(bgruber): implement the following by adding additional copy loops for the remaining elements if(!srcIsAoSoA && flatSize % lanesDst != 0) diff --git a/include/llama/Simd.hpp b/include/llama/Simd.hpp index 2f4b7d2be8..d0f17f3a81 100644 --- a/include/llama/Simd.hpp +++ b/include/llama/Simd.hpp @@ -206,7 +206,7 @@ namespace llama } else { - auto b = ArrayIndexIterator{srcRef.view.mapping().extents(), srcRef.arrayIndex()}; + auto b = ArrayIndexIterator{srcRef.view.extents(), srcRef.arrayIndex()}; ElementSimd elemSimd; // g++-12 really needs the intermediate elemSimd and memcpy for(auto i = 0; i < Traits::lanes; i++) reinterpret_cast(&elemSimd)[i] @@ -248,7 +248,7 @@ namespace llama // TODO(bgruber): how does this generalize conceptually to 2D and higher dimensions? in which // direction should we collect SIMD values? const ElementSimd elemSimd = srcSimd(rc); - auto b = ArrayIndexIterator{dstRef.view.mapping().extents(), dstRef.arrayIndex()}; + auto b = ArrayIndexIterator{dstRef.view.extents(), dstRef.arrayIndex()}; for(auto i = 0; i < Traits::lanes; i++) dstRef.view (*b++)(cat(typename T::BoundRecordCoord{}, rc)) = reinterpret_cast(&elemSimd)[i]; // scalar store diff --git a/include/llama/Vector.hpp b/include/llama/Vector.hpp index facde55f81..ce4d8dde2a 100644 --- a/include/llama/Vector.hpp +++ b/include/llama/Vector.hpp @@ -182,7 +182,7 @@ namespace llama LLAMA_FN_HOST_ACC_INLINE auto capacity() const -> size_type { - return m_view.mapping().extents()[0]; + return m_view.extents()[0]; } // NOLINTNEXTLINE(readability-identifier-naming) diff --git a/include/llama/View.hpp b/include/llama/View.hpp index 5146ac6f7c..036a7a5b5e 100644 --- a/include/llama/View.hpp +++ b/include/llama/View.hpp @@ -127,7 +127,7 @@ namespace llama using View = View; using RecordDim = typename View::RecordDim; forEachADCoord( - view.mapping().extents(), + view.extents(), [&]([[maybe_unused]] typename View::ArrayIndex ai) { if constexpr(isRecordDim) @@ -458,6 +458,11 @@ namespace llama return static_cast(*this); } + LLAMA_FN_HOST_ACC_INLINE auto extents() const -> ArrayExtents + { + return mapping().extents(); + } + LLAMA_FN_HOST_ACC_INLINE auto accessor() -> Accessor& { return static_cast(*this); @@ -569,25 +574,25 @@ namespace llama LLAMA_FN_HOST_ACC_INLINE auto begin() -> iterator { - return {ArrayIndexRange{mapping().extents()}.begin(), this}; + return {ArrayIndexRange{extents()}.begin(), this}; } LLAMA_FN_HOST_ACC_INLINE auto begin() const -> const_iterator { - return {ArrayIndexRange{mapping().extents()}.begin(), this}; + return {ArrayIndexRange{extents()}.begin(), this}; } LLAMA_FN_HOST_ACC_INLINE auto end() -> iterator { - return {ArrayIndexRange{mapping().extents()}.end(), this}; + return {ArrayIndexRange{extents()}.end(), this}; } LLAMA_FN_HOST_ACC_INLINE auto end() const -> const_iterator { - return {ArrayIndexRange{mapping().extents()}.end(), this}; + return {ArrayIndexRange{extents()}.end(), this}; } Array storageBlobs; diff --git a/tests/common.hpp b/tests/common.hpp index eece288c4b..e9382e03ca 100644 --- a/tests/common.hpp +++ b/tests/common.hpp @@ -144,7 +144,7 @@ void iotaFillView(View& view) { std::int64_t value = 0; using RecordDim = typename View::RecordDim; - for(auto ai : llama::ArrayIndexRange{view.mapping().extents()}) + for(auto ai : llama::ArrayIndexRange{view.extents()}) { if constexpr(llama::isRecordDim) { @@ -169,7 +169,7 @@ void iotaCheckView(View& view) { std::int64_t value = 0; using RecordDim = typename View::RecordDim; - for(auto ai : llama::ArrayIndexRange{view.mapping().extents()}) + for(auto ai : llama::ArrayIndexRange{view.extents()}) { if constexpr(llama::isRecordDim) { diff --git a/tests/mapping.Null.cpp b/tests/mapping.Null.cpp index bfa6053d9e..a0739967be 100644 --- a/tests/mapping.Null.cpp +++ b/tests/mapping.Null.cpp @@ -13,7 +13,7 @@ TEST_CASE("mapping.Null") auto view = llama::allocView(mapping); iotaFillView(view); - for(auto ai : llama::ArrayIndexRange{view.mapping().extents()}) + for(auto ai : llama::ArrayIndexRange{view.extents()}) llama::forEachLeafCoord( [&](auto rc) {