From dfd7facc0577f0cd1bd53620ede431834d7ffe37 Mon Sep 17 00:00:00 2001
From: Bernhard Manfred Gruber <bernhardmgruber@gmail.com>
Date: Mon, 27 Feb 2023 15:46:35 +0100
Subject: [PATCH] Add View::extents()

---
 examples/alpaka/asyncblur/asyncblur.cpp |  8 ++++----
 examples/alpaka/pic/pic.cpp             | 10 +++++-----
 examples/alpaka/vectoradd/vectoradd.cpp |  2 +-
 examples/memmap/memmap.cpp              |  2 +-
 examples/root/lhcb_analysis/lhcb.cpp    |  2 +-
 examples/vectoradd/vectoradd.cpp        |  2 +-
 examples/viewcopy/viewcopy.cpp          |  6 +++---
 include/llama/BlobAllocators.hpp        | 11 ++++++-----
 include/llama/Copy.hpp                  | 10 +++++-----
 include/llama/Simd.hpp                  |  4 ++--
 include/llama/Vector.hpp                |  2 +-
 include/llama/View.hpp                  | 15 ++++++++++-----
 tests/common.hpp                        |  4 ++--
 tests/mapping.Null.cpp                  |  2 +-
 14 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/examples/alpaka/asyncblur/asyncblur.cpp b/examples/alpaka/asyncblur/asyncblur.cpp
index 32bd8a558b..30e7ea7871 100644
--- a/examples/alpaka/asyncblur/asyncblur.cpp
+++ b/examples/alpaka/asyncblur/asyncblur.cpp
@@ -88,8 +88,8 @@ struct BlurKernel
             const int bStart[2]
                 = {bi[0] * ElemsPerBlock + threadIdxInBlock[0], bi[1] * ElemsPerBlock + threadIdxInBlock[1]};
             const int bEnd[2] = {
-                alpaka::math::min(acc, bStart[0] + ElemsPerBlock + 2 * KernelSize, oldImage.mapping().extents()[0]),
-                alpaka::math::min(acc, bStart[1] + ElemsPerBlock + 2 * KernelSize, oldImage.mapping().extents()[1]),
+                alpaka::math::min(acc, bStart[0] + ElemsPerBlock + 2 * KernelSize, oldImage.extents()[0]),
+                alpaka::math::min(acc, bStart[1] + ElemsPerBlock + 2 * KernelSize, oldImage.extents()[1]),
             };
             LLAMA_INDEPENDENT_DATA
             for(auto y = bStart[0]; y < bEnd[0]; y += threadsPerBlock)
@@ -102,8 +102,8 @@ struct BlurKernel
 
         const int start[2] = {ti[0] * Elems, ti[1] * Elems};
         const int end[2] = {
-            alpaka::math::min(acc, start[0] + Elems, oldImage.mapping().extents()[0] - 2 * KernelSize),
-            alpaka::math::min(acc, start[1] + Elems, oldImage.mapping().extents()[1] - 2 * KernelSize),
+            alpaka::math::min(acc, start[0] + Elems, oldImage.extents()[0] - 2 * KernelSize),
+            alpaka::math::min(acc, start[1] + Elems, oldImage.extents()[1] - 2 * KernelSize),
         };
 
         LLAMA_INDEPENDENT_DATA
diff --git a/examples/alpaka/pic/pic.cpp b/examples/alpaka/pic/pic.cpp
index 66d9f96226..17515d880c 100644
--- a/examples/alpaka/pic/pic.cpp
+++ b/examples/alpaka/pic/pic.cpp
@@ -193,10 +193,10 @@ void output(int n, const ParticleView& particles)
     };
     auto addFloat = [&](float f) { buffer.push_back(swapBytes(f)); };
 
-    const auto pointCount = particles.mapping().extents()[0];
+    const auto pointCount = particles.extents()[0];
     outP << "POINTS " << pointCount << " float\n";
     buffer.reserve(pointCount * 3);
-    for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
+    for(auto i : llama::ArrayIndexRange{particles.extents()})
     {
         auto p = particles(i);
         addFloat(0);
@@ -207,7 +207,7 @@ void output(int n, const ParticleView& particles)
 
     outP << "POINT_DATA " << pointCount << "\nVECTORS velocity float\n";
     buffer.clear();
-    for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
+    for(auto i : llama::ArrayIndexRange{particles.extents()})
     {
         auto p = particles(i);
         addFloat(p(U{}, Z{}));
@@ -218,13 +218,13 @@ void output(int n, const ParticleView& particles)
 
     outP << "SCALARS q float 1\nLOOKUP_TABLE default\n";
     buffer.clear();
-    for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
+    for(auto i : llama::ArrayIndexRange{particles.extents()})
         addFloat(particles(i)(Q{}));
     flushBuffer();
 
     outP << "SCALARS m float 1\nLOOKUP_TABLE default\n";
     buffer.clear();
-    for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
+    for(auto i : llama::ArrayIndexRange{particles.extents()})
         addFloat(particles(i)(M{}));
     flushBuffer();
 }
diff --git a/examples/alpaka/vectoradd/vectoradd.cpp b/examples/alpaka/vectoradd/vectoradd.cpp
index 9f589e3c6d..7db34ee7b1 100644
--- a/examples/alpaka/vectoradd/vectoradd.cpp
+++ b/examples/alpaka/vectoradd/vectoradd.cpp
@@ -53,7 +53,7 @@ struct ComputeKernel
     LLAMA_FN_HOST_ACC_INLINE void operator()(const Acc& acc, View a, View b) const
     {
         const auto ti = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
-        const auto [n] = a.mapping().extents();
+        const auto [n] = a.extents();
         const auto start = ti * Elems;
         const auto end = alpaka::math::min(acc, start + Elems, n);
 
diff --git a/examples/memmap/memmap.cpp b/examples/memmap/memmap.cpp
index 655b1e5fd0..a001df1d27 100644
--- a/examples/memmap/memmap.cpp
+++ b/examples/memmap/memmap.cpp
@@ -40,7 +40,7 @@ auto computeCentroid(const View& triangles)
     llama::One<Vertex> centroid{};
     for(const auto& t : triangles)
         centroid += t(tag::a) + t(tag::b) + t(tag::c);
-    return centroid / triangles.mapping().extents()[0] / 3;
+    return centroid / triangles.extents()[0] / 3;
 }
 
 auto main(int argc, const char* argv[]) -> int
diff --git a/examples/root/lhcb_analysis/lhcb.cpp b/examples/root/lhcb_analysis/lhcb.cpp
index 6365b5d89a..130a5a014a 100644
--- a/examples/root/lhcb_analysis/lhcb.cpp
+++ b/examples/root/lhcb_analysis/lhcb.cpp
@@ -176,7 +176,7 @@ namespace
         auto hists = std::vector<TH1D>(omp_get_max_threads(), TH1D("B_mass", mappingName.c_str(), 500, 5050, 5500));
 
         auto begin = std::chrono::steady_clock::now();
-        const RE::NTupleSize_t n = view.mapping().extents()[0];
+        const RE::NTupleSize_t n = view.extents()[0];
 #pragma omp parallel for
         for(RE::NTupleSize_t i = 0; i < n; i++)
         {
diff --git a/examples/vectoradd/vectoradd.cpp b/examples/vectoradd/vectoradd.cpp
index c06211c512..813ac4006f 100644
--- a/examples/vectoradd/vectoradd.cpp
+++ b/examples/vectoradd/vectoradd.cpp
@@ -38,7 +38,7 @@ namespace usellama
     template<typename View>
     [[gnu::noinline]] void compute(const View& a, const View& b, View& c)
     {
-        const auto [n] = c.mapping().extents();
+        const auto [n] = c.extents();
 
         for(std::size_t i = 0; i < n; i++)
         {
diff --git a/examples/viewcopy/viewcopy.cpp b/examples/viewcopy/viewcopy.cpp
index fc2be3f5e1..7f7d5eb9f4 100644
--- a/examples/viewcopy/viewcopy.cpp
+++ b/examples/viewcopy/viewcopy.cpp
@@ -53,8 +53,8 @@ void stdCopy(const llama::View<SrcMapping, SrcBlobType>& srcView, llama::View<Ds
 {
     static_assert(std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>);
 
-    if(srcView.mapping().extents() != dstView.mapping().extents())
-        throw std::runtime_error{"Array dimensions sizes are different"};
+    if(srcView.extents() != dstView.extents())
+        throw std::runtime_error{"Array extents are different"};
 
     std::copy(srcView.begin(), srcView.end(), dstView.begin());
 }
@@ -106,7 +106,7 @@ template<typename Mapping, typename BlobType>
 auto hash(const llama::View<Mapping, BlobType>& view)
 {
     std::size_t acc = 0;
-    for(auto ad : llama::ArrayIndexRange{view.mapping().extents()})
+    for(auto ad : llama::ArrayIndexRange{view.extents()})
         llama::forEachLeafCoord<typename Mapping::RecordDim>([&](auto rc) { boost::hash_combine(acc, view(ad)(rc)); });
     return acc;
 }
diff --git a/include/llama/BlobAllocators.hpp b/include/llama/BlobAllocators.hpp
index 52ffe02d75..058a3d4c67 100644
--- a/include/llama/BlobAllocators.hpp
+++ b/include/llama/BlobAllocators.hpp
@@ -149,6 +149,12 @@ namespace llama::bloballoc
     /// on the view before passing it to the kernel.
     struct CudaMalloc
     {
+        inline static const auto deleter = [](void* p)
+        {
+            if(const auto code = cudaFree(p); code != cudaSuccess)
+                throw std::runtime_error(std::string{"cudaFree failed with code "} + cudaGetErrorString(code));
+        };
+
         template<std::size_t FieldAlignment>
         inline auto operator()(std::integral_constant<std::size_t, FieldAlignment>, std::size_t count) const
         {
@@ -157,11 +163,6 @@ namespace llama::bloballoc
                 throw std::runtime_error(std::string{"cudaMalloc failed with code "} + cudaGetErrorString(code));
             if(reinterpret_cast<std::uintptr_t>(p) & (FieldAlignment - 1 != 0u))
                 throw std::runtime_error{"cudaMalloc does not align sufficiently"};
-            auto deleter = [](void* p)
-            {
-                if(const auto code = cudaFree(p); code != cudaSuccess)
-                    throw std::runtime_error(std::string{"cudaFree failed with code "} + cudaGetErrorString(code));
-            };
             return std::unique_ptr<std::byte[], decltype(deleter)>(p, deleter);
         }
     };
diff --git a/include/llama/Copy.hpp b/include/llama/Copy.hpp
index 9c0b19dc43..30c3963041 100644
--- a/include/llama/Copy.hpp
+++ b/include/llama/Copy.hpp
@@ -57,7 +57,7 @@ namespace llama
         internal::assertTrivialCopyable<typename Mapping::RecordDim>();
 
         // TODO(bgruber): we do not verify if the mappings have other runtime state than the array dimensions
-        if(srcView.mapping().extents() != dstView.mapping().extents())
+        if(srcView.extents() != dstView.extents())
             throw std::runtime_error{"Array dimensions sizes are different"};
 
         // TODO(bgruber): this is maybe not the best parallel copying strategy
@@ -85,7 +85,7 @@ namespace llama
             std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>,
             "The source and destination record dimensions must be the same");
 
-        if(srcView.mapping().extents() != dstView.mapping().extents())
+        if(srcView.extents() != dstView.extents())
             throw std::runtime_error{"Array dimensions sizes are different"};
 
         auto copyOne = [&](auto ai) LLAMA_LAMBDA_INLINE
@@ -95,7 +95,7 @@ namespace llama
         };
 
         constexpr auto dims = SrcMapping::ArrayExtents::rank;
-        const auto extents = srcView.mapping().extents().toArray();
+        const auto extents = srcView.extents().toArray();
         const auto workPerThread = (extents[0] + threadCount - 1) / threadCount;
         const auto start = threadId * workPerThread;
         const auto end = std::min((threadId + 1) * workPerThread, static_cast<std::size_t>(extents[0]));
@@ -162,7 +162,7 @@ namespace llama
         static constexpr auto lanesSrc = internal::aosoaLanes<SrcMapping>;
         static constexpr auto lanesDst = internal::aosoaLanes<DstMapping>;
 
-        if(srcView.mapping().extents() != dstView.mapping().extents())
+        if(srcView.extents() != dstView.extents())
             throw std::runtime_error{"Array dimensions sizes are different"};
 
         static constexpr auto srcIsAoSoA = lanesSrc != std::numeric_limits<std::size_t>::max();
@@ -176,7 +176,7 @@ namespace llama
             !dstIsAoSoA || std::tuple_size_v<decltype(dstView.storageBlobs)> == 1,
             "Implementation assumes AoSoA with single blob");
 
-        const auto flatSize = product(dstView.mapping().extents());
+        const auto flatSize = product(dstView.extents());
 
         // TODO(bgruber): implement the following by adding additional copy loops for the remaining elements
         if(!srcIsAoSoA && flatSize % lanesDst != 0)
diff --git a/include/llama/Simd.hpp b/include/llama/Simd.hpp
index 2f4b7d2be8..d0f17f3a81 100644
--- a/include/llama/Simd.hpp
+++ b/include/llama/Simd.hpp
@@ -206,7 +206,7 @@ namespace llama
             }
             else
             {
-                auto b = ArrayIndexIterator{srcRef.view.mapping().extents(), srcRef.arrayIndex()};
+                auto b = ArrayIndexIterator{srcRef.view.extents(), srcRef.arrayIndex()};
                 ElementSimd elemSimd; // g++-12 really needs the intermediate elemSimd and memcpy
                 for(auto i = 0; i < Traits::lanes; i++)
                     reinterpret_cast<FieldType*>(&elemSimd)[i]
@@ -248,7 +248,7 @@ namespace llama
                 // TODO(bgruber): how does this generalize conceptually to 2D and higher dimensions? in which
                 // direction should we collect SIMD values?
                 const ElementSimd elemSimd = srcSimd(rc);
-                auto b = ArrayIndexIterator{dstRef.view.mapping().extents(), dstRef.arrayIndex()};
+                auto b = ArrayIndexIterator{dstRef.view.extents(), dstRef.arrayIndex()};
                 for(auto i = 0; i < Traits::lanes; i++)
                     dstRef.view (*b++)(cat(typename T::BoundRecordCoord{}, rc))
                         = reinterpret_cast<const FieldType*>(&elemSimd)[i]; // scalar store
diff --git a/include/llama/Vector.hpp b/include/llama/Vector.hpp
index facde55f81..ce4d8dde2a 100644
--- a/include/llama/Vector.hpp
+++ b/include/llama/Vector.hpp
@@ -182,7 +182,7 @@ namespace llama
 
         LLAMA_FN_HOST_ACC_INLINE auto capacity() const -> size_type
         {
-            return m_view.mapping().extents()[0];
+            return m_view.extents()[0];
         }
 
         // NOLINTNEXTLINE(readability-identifier-naming)
diff --git a/include/llama/View.hpp b/include/llama/View.hpp
index 5146ac6f7c..036a7a5b5e 100644
--- a/include/llama/View.hpp
+++ b/include/llama/View.hpp
@@ -127,7 +127,7 @@ namespace llama
         using View = View<Mapping, BlobType, Accessor>;
         using RecordDim = typename View::RecordDim;
         forEachADCoord(
-            view.mapping().extents(),
+            view.extents(),
             [&]([[maybe_unused]] typename View::ArrayIndex ai)
             {
                 if constexpr(isRecordDim<RecordDim>)
@@ -458,6 +458,11 @@ namespace llama
             return static_cast<const Mapping&>(*this);
         }
 
+        LLAMA_FN_HOST_ACC_INLINE auto extents() const -> ArrayExtents
+        {
+            return mapping().extents();
+        }
+
         LLAMA_FN_HOST_ACC_INLINE auto accessor() -> Accessor&
         {
             return static_cast<Accessor&>(*this);
@@ -569,25 +574,25 @@ namespace llama
         LLAMA_FN_HOST_ACC_INLINE
         auto begin() -> iterator
         {
-            return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this};
+            return {ArrayIndexRange<ArrayExtents>{extents()}.begin(), this};
         }
 
         LLAMA_FN_HOST_ACC_INLINE
         auto begin() const -> const_iterator
         {
-            return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this};
+            return {ArrayIndexRange<ArrayExtents>{extents()}.begin(), this};
         }
 
         LLAMA_FN_HOST_ACC_INLINE
         auto end() -> iterator
         {
-            return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this};
+            return {ArrayIndexRange<ArrayExtents>{extents()}.end(), this};
         }
 
         LLAMA_FN_HOST_ACC_INLINE
         auto end() const -> const_iterator
         {
-            return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this};
+            return {ArrayIndexRange<ArrayExtents>{extents()}.end(), this};
         }
 
         Array<BlobType, Mapping::blobCount> storageBlobs;
diff --git a/tests/common.hpp b/tests/common.hpp
index eece288c4b..e9382e03ca 100644
--- a/tests/common.hpp
+++ b/tests/common.hpp
@@ -144,7 +144,7 @@ void iotaFillView(View& view)
 {
     std::int64_t value = 0;
     using RecordDim = typename View::RecordDim;
-    for(auto ai : llama::ArrayIndexRange{view.mapping().extents()})
+    for(auto ai : llama::ArrayIndexRange{view.extents()})
     {
         if constexpr(llama::isRecordDim<RecordDim>)
         {
@@ -169,7 +169,7 @@ void iotaCheckView(View& view)
 {
     std::int64_t value = 0;
     using RecordDim = typename View::RecordDim;
-    for(auto ai : llama::ArrayIndexRange{view.mapping().extents()})
+    for(auto ai : llama::ArrayIndexRange{view.extents()})
     {
         if constexpr(llama::isRecordDim<RecordDim>)
         {
diff --git a/tests/mapping.Null.cpp b/tests/mapping.Null.cpp
index bfa6053d9e..a0739967be 100644
--- a/tests/mapping.Null.cpp
+++ b/tests/mapping.Null.cpp
@@ -13,7 +13,7 @@ TEST_CASE("mapping.Null")
     auto view = llama::allocView(mapping);
     iotaFillView(view);
 
-    for(auto ai : llama::ArrayIndexRange{view.mapping().extents()})
+    for(auto ai : llama::ArrayIndexRange{view.extents()})
         llama::forEachLeafCoord<Particle>(
             [&](auto rc)
             {