From 1fea13bb87f40673f0ad8fbfe18f1bbcb4e54ff4 Mon Sep 17 00:00:00 2001
From: Julian Lenz <j.lenz@hzdr.de>
Date: Fri, 8 Nov 2024 09:49:59 +0100
Subject: [PATCH 1/2] Copy .clang-format from alpaka

---
 .clang-format | 110 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 83 insertions(+), 27 deletions(-)

diff --git a/.clang-format b/.clang-format
index 63ec7485..7249ac3a 100644
--- a/.clang-format
+++ b/.clang-format
@@ -4,15 +4,16 @@ Standard: c++20
 DisableFormat: false
 AccessModifierOffset: -4
 AlignAfterOpenBracket: AlwaysBreak
+AlignArrayOfStructures: None
 AlignConsecutiveAssignments: false
 AlignConsecutiveBitFields: false
 AlignConsecutiveDeclarations: false
 AlignConsecutiveMacros: false
 AlignEscapedNewlines: Right
-AlignOperands: DontAlign
-AlignTrailingComments: false
+AlignOperands: Align
+AlignTrailingComments:
+  Kind: Never
 AllowAllArgumentsOnNextLine: false
-AllowAllConstructorInitializersOnNextLine: false
 AllowAllParametersOfDeclarationOnNextLine: false
 AllowShortBlocksOnASingleLine: Never
 AllowShortCaseLabelsOnASingleLine: false
@@ -26,59 +27,90 @@ AlwaysBreakBeforeMultilineStrings: false
 AlwaysBreakTemplateDeclarations: Yes
 BinPackArguments: false
 BinPackParameters: false
+BitFieldColonSpacing: Both
+BreakAfterAttributes: Never
 BreakBeforeBinaryOperators: All
 BreakBeforeBraces: Allman
+BreakBeforeConceptDeclarations: Always
+BreakBeforeInlineASMColon: OnlyMultiline
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializers: BeforeComma
 BreakInheritanceList: BeforeComma
 BreakStringLiterals: true
 ColumnLimit: 119
-CommentPragmas: "^ COMMENT pragma:"
+CommentPragmas: '^ COMMENT pragma:'
 CompactNamespaces: false
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
-DeriveLineEnding: true
 DerivePointerAlignment: false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: Always
 ExperimentalAutoDetectBinPacking: false
 FixNamespaceComments: true
 IncludeBlocks: Regroup
-IncludeIsMainRegex: "(Test)?$"
-IncludeIsMainSourceRegex: ""
+IncludeIsMainRegex: '(Test)?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
 IndentCaseBlocks: true
 IndentCaseLabels: false
 IndentExternBlock: AfterExternBlock
 IndentGotoLabels: true
 IndentPPDirectives: AfterHash
+IndentRequiresClause: false
 IndentWidth: 4
 IndentWrappedFunctionNames: false
+InsertBraces: false
+InsertNewlineAtEOF: true
+IntegerLiteralSeparator:
+  Binary: 4
+  Decimal: 3
+  DecimalMinDigits: 7
+  Hex: 4
 KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ""
-MacroBlockEnd: ""
+LambdaBodyIndentation: Signature
+LineEnding: DeriveLF
+MacroBlockBegin: ''
+MacroBlockEnd: ''
 MaxEmptyLinesToKeep: 2
 NamespaceIndentation: All
+PackConstructorInitializers: CurrentLine
 PenaltyBreakAssignment: 2
 PenaltyBreakBeforeFirstCallParameter: 19
 PenaltyBreakComment: 300
 PenaltyBreakFirstLessLess: 120
+PenaltyBreakOpenParenthesis: 0 # default made explicit here
 PenaltyBreakString: 1000
 PenaltyBreakTemplateDeclaration: 10
 PenaltyExcessCharacter: 1000000
+PenaltyIndentedWhitespace: 0 # default made explicit here
 PenaltyReturnTypeOnItsOwnLine: 1000
 PointerAlignment: Left
+PPIndentWidth: -1 # follow IndentWidth
+QualifierAlignment: Custom
+QualifierOrder: ['friend', 'static', 'inline', 'constexpr', 'type', 'const', 'volatile', 'restrict']
+ReferenceAlignment: Pointer # follow PointerAlignment
 ReflowComments: true
+RemoveBracesLLVM: false
+RemoveSemicolon: false
+RequiresClausePosition: WithPreceding
+RequiresExpressionIndentation: OuterScope
+ShortNamespaceLines: 0
 SortIncludes: true
-SortUsingDeclarations: true
+SortUsingDeclarations: Lexicographic
+SeparateDefinitionBlocks: Always
 SpaceAfterCStyleCast: true
 SpaceAfterLogicalNot: false
 SpaceAfterTemplateKeyword: false
+SpaceAroundPointerQualifiers: Default # follow PointerAlignment
 SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
 SpaceBeforeCpp11BracedList: false
 SpaceBeforeCtorInitializerColon: true
 SpaceBeforeInheritanceColon: true
 SpaceBeforeParens: Never
 SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
 SpaceInEmptyBlock: false
 SpaceInEmptyParentheses: false
 SpacesBeforeTrailingComments: 1
@@ -86,24 +118,48 @@ SpacesInAngles: false
 SpacesInConditionalStatement: false
 SpacesInContainerLiterals: false
 SpacesInCStyleCastParentheses: false
+SpacesInLineCommentPrefix:
+  Minimum: 1
+  Maximum: -1
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
-SpaceBeforeSquareBrackets: false
 TabWidth: 4
 UseCRLF: false
 UseTab: Never
-# Project specific options -- uncomment and modify as needed
-#IncludeCategories:
-#  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
-#    Priority:        2
-#    SortPriority:    0
-#  - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
-#    Priority:        3
-#    SortPriority:    0
-#  - Regex:           '.*'
-#    Priority:        1
-#    SortPriority:    0
-
-# Future options - not supported in clang-format 11
-# BitFieldColonSpacing: Both
-# OperandAlignmentStyle: Align
+# Project specific options
+#AttributeMacros: []
+#ForEachMacros: []
+#IfMacros: []
+IncludeCategories:
+  # Local headers (in "") above all else
+  - Regex: '"([A-Za-z0-9.\/-_])+"'
+    Priority: 1
+  # "alpaka/foo.hpp" after local headers (occur inside alpaka)
+  - Regex: '"alpaka/([A-Za-z0-9.\/-_])+"'
+    Priority: 2
+  # <alpaka/foo.hpp> after local headers (occur outside alpaka in examples and test)
+  - Regex: '<alpaka/([A-Za-z0-9.\/-_])+>'
+    Priority: 3
+  # C++ standard library headers are the last group to be included
+  - Regex: '<([A-Za-z0-9\/-_])+>'
+    Priority: 5
+  # Includes that made it this far are third-party headers and will be placed
+  # below alpaka's includes
+  - Regex: '<([A-Za-z0-9.\/-_])+>'
+    Priority: 4
+# Macros: []
+# NamespaceMacros: []
+StatementAttributeLikeMacros:
+  - 'ALPAKA_DEVICE_VOLATILE'
+  - 'ALPAKA_FN_ACC'
+  - 'ALPAKA_FN_EXTERN'
+  - 'ALPAKA_FN_HOST'
+  - 'ALPAKA_FN_HOST_ACC'
+  - 'ALPAKA_FN_INLINE'
+  - 'ALPAKA_STATIC_ACC_MEM_CONSTANT'
+  - 'ALPAKA_STATIC_ACC_MEM_GLOBAL'
+  - 'ALPAKA_UNROLL'
+  - 'ALPAKA_VECTORIZE_HINT'
+#StatementMacros: []
+#TypenameMacros: []
+#WhitespaceSensitiveMacros: []

From fbefa18015414c70847476570f60b6570e2e377f Mon Sep 17 00:00:00 2001
From: Third Party <mallocMC@hzdr.de>
Date: Fri, 8 Nov 2024 09:51:50 +0100
Subject: [PATCH 2/2] Run clang-format

---
 examples/mallocMC_example01.cpp               |  44 ++--
 examples/mallocMC_example03.cpp               |  16 +-
 .../mallocMC/alignmentPolicies/Noop.hpp       |   1 +
 .../mallocMC/alignmentPolicies/Shrink.hpp     |   2 +
 src/include/mallocMC/allocator.hpp            |   5 +-
 .../mallocMC/creationPolicies/OldMalloc.hpp   |   5 +-
 .../mallocMC/creationPolicies/Scatter.hpp     | 222 +++++++++---------
 src/include/mallocMC/device_allocator.hpp     |   9 +-
 .../mallocMC/distributionPolicies/Noop.hpp    |   7 +-
 .../distributionPolicies/XMallocSIMD.hpp      |   9 +-
 .../mallocMC/mallocMC_allocator_handle.hpp    |   6 +-
 src/include/mallocMC/mallocMC_utils.hpp       |   6 +-
 .../oOMPolicies/BadAllocException.hpp         |   1 +
 .../mallocMC/oOMPolicies/ReturnNull.hpp       |   1 +
 .../reservePoolPolicies/AlpakaBuf.hpp         |   3 +-
 .../reservePoolPolicies/CudaSetLimits.hpp     |   3 +-
 tests/dimensions.cpp                          |  55 ++---
 tests/policies.cpp                            |   5 +-
 tests/verify_heap.cpp                         | 112 ++++-----
 tests/verify_heap_config.hpp                  |   1 +
 20 files changed, 270 insertions(+), 243 deletions(-)

diff --git a/examples/mallocMC_example01.cpp b/examples/mallocMC_example01.cpp
index f64c99b0..c991f357 100644
--- a/examples/mallocMC_example01.cpp
+++ b/examples/mallocMC_example01.cpp
@@ -26,12 +26,14 @@
   THE SOFTWARE.
 */
 
-#include <algorithm>
 #include <alpaka/alpaka.hpp>
 #include <alpaka/example/ExampleDefaultAcc.hpp>
+
+#include <mallocMC/mallocMC.hpp>
+
+#include <algorithm>
 #include <cassert>
 #include <iostream>
-#include <mallocMC/mallocMC.hpp>
 #include <numeric>
 
 using Dim = alpaka::DimInt<1>;
@@ -84,7 +86,7 @@ auto main() -> int
     constexpr auto length = 100;
 
     auto const platform = alpaka::Platform<Acc>{};
-    const auto dev = alpaka::getDevByIdx(platform, 0);
+    auto const dev = alpaka::getDevByIdx(platform, 0);
     auto queue = alpaka::Queue<Acc, alpaka::Blocking>{dev};
 
     auto const devProps = alpaka::getAccDevProps<Acc>(dev);
@@ -103,13 +105,13 @@ auto main() -> int
     // create arrays of arrays on the device
     {
         auto createArrayPointers
-            = [] ALPAKA_FN_ACC(const Acc& acc, int x, int y, ScatterAllocator::AllocatorHandle allocHandle)
+            = [] ALPAKA_FN_ACC(Acc const& acc, int x, int y, ScatterAllocator::AllocatorHandle allocHandle)
         {
             arA = (int**) allocHandle.malloc(acc, sizeof(int*) * x * y);
             arB = (int**) allocHandle.malloc(acc, sizeof(int*) * x * y);
             arC = (int**) allocHandle.malloc(acc, sizeof(int*) * x * y);
         };
-        const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{1}, Idx{1}};
+        auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{1}, Idx{1}};
         alpaka::enqueue(
             queue,
             alpaka::createTaskKernel<Acc>(
@@ -122,9 +124,9 @@ auto main() -> int
 
     // fill 2 of them all with ascending values
     {
-        auto fillArrays = [] ALPAKA_FN_ACC(const Acc& acc, int length, ScatterAllocator::AllocatorHandle allocHandle)
+        auto fillArrays = [] ALPAKA_FN_ACC(Acc const& acc, int length, ScatterAllocator::AllocatorHandle allocHandle)
         {
-            const auto id = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
+            auto const id = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
 
             arA[id] = (int*) allocHandle.malloc(acc, length * sizeof(int));
             arB[id] = (int*) allocHandle.malloc(acc, length * sizeof(int));
@@ -136,7 +138,7 @@ auto main() -> int
                 arB[id][i] = static_cast<int>(id * length + i);
             }
         };
-        const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{grid}, Idx{block}, Idx{1}};
+        auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{grid}, Idx{block}, Idx{1}};
         alpaka::enqueue(
             queue,
             alpaka::createTaskKernel<Acc>(workDiv, fillArrays, length, scatterAlloc.getAllocatorHandle()));
@@ -147,9 +149,9 @@ auto main() -> int
     {
         auto sumsBufferAcc = alpaka::allocBuf<int, Idx>(dev, Idx{block * grid});
 
-        auto addArrays = [] ALPAKA_FN_ACC(const Acc& acc, int length, int* sums)
+        auto addArrays = [] ALPAKA_FN_ACC(Acc const& acc, int length, int* sums)
         {
-            const auto id = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
+            auto const id = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
 
             sums[id] = 0;
             for(int i = 0; i < length; ++i)
@@ -158,25 +160,25 @@ auto main() -> int
                 sums[id] += arC[id][i];
             }
         };
-        const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{grid}, Idx{block}, Idx{1}};
+        auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{grid}, Idx{block}, Idx{1}};
         alpaka::enqueue(
             queue,
             alpaka::createTaskKernel<Acc>(workDiv, addArrays, length, alpaka::getPtrNative(sumsBufferAcc)));
 
         auto const platformCPU = alpaka::Platform<alpaka::DevCpu>{};
-        const auto hostDev = alpaka::getDevByIdx(platformCPU, 0);
+        auto const hostDev = alpaka::getDevByIdx(platformCPU, 0);
 
         auto sumsBufferHost = alpaka::allocBuf<int, Idx>(hostDev, Idx{block * grid});
         alpaka::memcpy(queue, sumsBufferHost, sumsBufferAcc, Idx{block * grid});
         alpaka::wait(queue);
 
-        const auto* sumsPtr = alpaka::getPtrNative(sumsBufferHost);
-        const auto sum = std::accumulate(sumsPtr, sumsPtr + block * grid, size_t{0});
+        auto const* sumsPtr = alpaka::getPtrNative(sumsBufferHost);
+        auto const sum = std::accumulate(sumsPtr, sumsPtr + block * grid, size_t{0});
         std::cout << "The sum of the arrays on GPU is " << sum << '\n';
     }
 
-    const auto n = static_cast<size_t>(block * grid * length);
-    const auto gaussian = n * (n - 1);
+    auto const n = static_cast<size_t>(block * grid * length);
+    auto const gaussian = n * (n - 1);
     std::cout << "The gaussian sum as comparison: " << gaussian << '\n';
 
     /*constexpr*/ if(mallocMC::Traits<ScatterAllocator>::providesAvailableSlots)
@@ -187,25 +189,25 @@ auto main() -> int
     }
 
     {
-        auto freeArrays = [] ALPAKA_FN_ACC(const Acc& acc, ScatterAllocator::AllocatorHandle allocHandle)
+        auto freeArrays = [] ALPAKA_FN_ACC(Acc const& acc, ScatterAllocator::AllocatorHandle allocHandle)
         {
-            const auto id = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
+            auto const id = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
             allocHandle.free(acc, arA[id]);
             allocHandle.free(acc, arB[id]);
             allocHandle.free(acc, arC[id]);
         };
-        const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{grid}, Idx{block}, Idx{1}};
+        auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{grid}, Idx{block}, Idx{1}};
         alpaka::enqueue(queue, alpaka::createTaskKernel<Acc>(workDiv, freeArrays, scatterAlloc.getAllocatorHandle()));
     }
 
     {
-        auto freeArrayPointers = [] ALPAKA_FN_ACC(const Acc& acc, ScatterAllocator::AllocatorHandle allocHandle)
+        auto freeArrayPointers = [] ALPAKA_FN_ACC(Acc const& acc, ScatterAllocator::AllocatorHandle allocHandle)
         {
             allocHandle.free(acc, arA);
             allocHandle.free(acc, arB);
             allocHandle.free(acc, arC);
         };
-        const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{1}, Idx{1}};
+        auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{1}, Idx{1}};
         alpaka::enqueue(
             queue,
             alpaka::createTaskKernel<Acc>(workDiv, freeArrayPointers, scatterAlloc.getAllocatorHandle()));
diff --git a/examples/mallocMC_example03.cpp b/examples/mallocMC_example03.cpp
index f2262b63..6551e0d7 100644
--- a/examples/mallocMC_example03.cpp
+++ b/examples/mallocMC_example03.cpp
@@ -26,12 +26,14 @@
   THE SOFTWARE.
 */
 
-#include <algorithm>
 #include <alpaka/alpaka.hpp>
 #include <alpaka/example/ExampleDefaultAcc.hpp>
+
+#include <mallocMC/mallocMC.hpp>
+
+#include <algorithm>
 #include <cassert>
 #include <iostream>
-#include <mallocMC/mallocMC.hpp>
 #include <numeric>
 #include <vector>
 
@@ -75,14 +77,14 @@ ALPAKA_STATIC_ACC_MEM_GLOBAL int* arA = nullptr;
 
 struct ExampleKernel
 {
-    ALPAKA_FN_ACC void operator()(const Acc& acc, ScatterAllocator::AllocatorHandle allocHandle) const
+    ALPAKA_FN_ACC void operator()(Acc const& acc, ScatterAllocator::AllocatorHandle allocHandle) const
     {
-        const auto id = static_cast<uint32_t>(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0]);
+        auto const id = static_cast<uint32_t>(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0]);
         if(id == 0)
             arA = (int*) allocHandle.malloc(acc, sizeof(int) * 32);
         // wait the the malloc from thread zero is not changing the result for some threads
         alpaka::syncBlockThreads(acc);
-        const auto slots = allocHandle.getAvailableSlots(acc, 1);
+        auto const slots = allocHandle.getAvailableSlots(acc, 1);
         if(arA != nullptr)
         {
             arA[id] = id;
@@ -101,14 +103,14 @@ struct ExampleKernel
 auto main() -> int
 {
     auto const platform = alpaka::Platform<Acc>{};
-    const auto dev = alpaka::getDevByIdx(platform, 0);
+    auto const dev = alpaka::getDevByIdx(platform, 0);
     auto queue = alpaka::Queue<Acc, alpaka::Blocking>{dev};
     auto const devProps = alpaka::getAccDevProps<Acc>(dev);
     unsigned const block = std::min(static_cast<size_t>(32u), static_cast<size_t>(devProps.m_blockThreadCountMax));
 
     ScatterAllocator scatterAlloc(dev, queue, 1U * 1024U * 1024U * 1024U); // 1GB for device-side malloc
 
-    const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{block}, Idx{1}};
+    auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{block}, Idx{1}};
     alpaka::enqueue(queue, alpaka::createTaskKernel<Acc>(workDiv, ExampleKernel{}, scatterAlloc.getAllocatorHandle()));
 
     std::cout << "Slots from Host: " << scatterAlloc.getAvailableSlots(dev, queue, 1) << '\n';
diff --git a/src/include/mallocMC/alignmentPolicies/Noop.hpp b/src/include/mallocMC/alignmentPolicies/Noop.hpp
index 5b631d3c..ee176187 100644
--- a/src/include/mallocMC/alignmentPolicies/Noop.hpp
+++ b/src/include/mallocMC/alignmentPolicies/Noop.hpp
@@ -30,6 +30,7 @@
 #include "Noop.hpp"
 
 #include <alpaka/core/Common.hpp>
+
 #include <cstdint>
 #include <string>
 #include <tuple>
diff --git a/src/include/mallocMC/alignmentPolicies/Shrink.hpp b/src/include/mallocMC/alignmentPolicies/Shrink.hpp
index 4fd01184..0eb495e9 100644
--- a/src/include/mallocMC/alignmentPolicies/Shrink.hpp
+++ b/src/include/mallocMC/alignmentPolicies/Shrink.hpp
@@ -34,6 +34,7 @@
 #include "Shrink.hpp"
 
 #include <alpaka/core/Common.hpp>
+
 #include <cstdint>
 #include <iostream>
 #include <sstream>
@@ -50,6 +51,7 @@ namespace mallocMC
             {
                 using type = unsigned int;
             };
+
             template<>
             struct __PointerEquivalent<8>
             {
diff --git a/src/include/mallocMC/allocator.hpp b/src/include/mallocMC/allocator.hpp
index 51375588..9fe1f41b 100644
--- a/src/include/mallocMC/allocator.hpp
+++ b/src/include/mallocMC/allocator.hpp
@@ -35,6 +35,7 @@
 #include "mallocMC_utils.hpp"
 
 #include <alpaka/alpaka.hpp>
+
 #include <cstdint>
 #include <memory>
 #include <sstream>
@@ -140,7 +141,7 @@ namespace mallocMC
          * compile time. The volatile workaround has no negative effects on the
          * register usage in CUDA.
          */
-        alloc(AlpakaDevice& dev, AlpakaQueue& queue, volatile size_t size)
+        alloc(AlpakaDevice& dev, AlpakaQueue& queue, size_t volatile size)
         {
             void* pool = reservePolicy.setMemPool(dev, size);
             std::tie(pool, size) = AlignmentPolicy::alignPool(pool, size);
@@ -173,7 +174,7 @@ namespace mallocMC
 
         /* forbid to copy the allocator */
         ALPAKA_FN_HOST
-        Allocator(const Allocator&) = delete;
+        Allocator(Allocator const&) = delete;
 
     public:
         template<typename AlpakaDevice, typename AlpakaQueue>
diff --git a/src/include/mallocMC/creationPolicies/OldMalloc.hpp b/src/include/mallocMC/creationPolicies/OldMalloc.hpp
index c8729d54..c75534a1 100644
--- a/src/include/mallocMC/creationPolicies/OldMalloc.hpp
+++ b/src/include/mallocMC/creationPolicies/OldMalloc.hpp
@@ -30,6 +30,7 @@
 #include "OldMalloc.hpp"
 
 #include <alpaka/core/Common.hpp>
+
 #include <cstdint>
 
 namespace mallocMC
@@ -51,13 +52,13 @@ namespace mallocMC
             static constexpr auto providesAvailableSlots = false;
 
             template<typename AlignmentPolicy, typename AlpakaAcc>
-            ALPAKA_FN_ACC auto create(const AlpakaAcc& acc, uint32 bytes) const -> void*
+            ALPAKA_FN_ACC auto create(AlpakaAcc const& acc, uint32 bytes) const -> void*
             {
                 return ::malloc(static_cast<size_t>(bytes));
             }
 
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC void destroy(const AlpakaAcc& /*acc*/, void* mem) const
+            ALPAKA_FN_ACC void destroy(AlpakaAcc const& /*acc*/, void* mem) const
             {
                 ::free(mem);
             }
diff --git a/src/include/mallocMC/creationPolicies/Scatter.hpp b/src/include/mallocMC/creationPolicies/Scatter.hpp
index feeb4d3e..0de95765 100644
--- a/src/include/mallocMC/creationPolicies/Scatter.hpp
+++ b/src/include/mallocMC/creationPolicies/Scatter.hpp
@@ -36,8 +36,9 @@
 #include "../mallocMC_utils.hpp"
 #include "Scatter.hpp"
 
-#include <algorithm>
 #include <alpaka/alpaka.hpp>
+
+#include <algorithm>
 #include <atomic>
 #include <cassert>
 #include <cstdint> /* uint32_t */
@@ -111,11 +112,13 @@ namespace mallocMC
         public:
             using HeapProperties = T_Config;
             using HashingProperties = T_Hashing;
+
             struct Properties
                 : HeapProperties
                 , HashingProperties
             {
             };
+
             static constexpr auto providesAvailableSlots = true;
 
         private:
@@ -164,6 +167,7 @@ namespace mallocMC
 
         private:
 #if _DEBUG || ANALYSEHEAP
+
         public:
 #endif
             /* HierarchyThreshold defines the largest chunk size which can be stored in a segment with hierarchy.
@@ -252,7 +256,7 @@ namespace mallocMC
                      * volatile avoids that the data is changed within L1 Cache and therefore is hidden for other
                      * threads.
                      */
-                    volatile uint32* write = (uint32*) (data + pagesize - (int) (sizeof(uint32) * maxOnPageMasks));
+                    uint32 volatile* write = (uint32*) (data + pagesize - (int) (sizeof(uint32) * maxOnPageMasks));
                     while(write < (uint32*) (data + pagesize))
                         *write++ = 0;
                 }
@@ -261,14 +265,14 @@ namespace mallocMC
             // the data used by the allocator
 
             volatile PTE* _ptes;
-            volatile uint32* _regions;
+            uint32 volatile* _regions;
             Page* _page;
             size_t _memsize;
             uint32 _numpages;
             uint32 _accessblocks;
             uint32 _pagebasedMutex;
-            volatile uint32 _firstFreePageBased;
-            volatile uint32 _firstfreeblock;
+            uint32 volatile _firstFreePageBased;
+            uint32 volatile _firstfreeblock;
 
             /**
              * randInit should create an random offset which can be used
@@ -292,13 +296,13 @@ namespace mallocMC
              */
             static ALPAKA_FN_ACC inline auto nextspot(uint32 bitfield, uint32 spot, uint32 spots) -> uint32
             {
-                const uint32 low_part = (spot + 1) == sizeof(uint32) * CHAR_BIT ? 0u : (bitfield >> (spot + 1));
-                const uint32 high_part = (bitfield << (spots - (spot + 1)));
-                const uint32 selection_mask = spots == sizeof(uint32) * CHAR_BIT ? ~0 : ((1u << spots) - 1);
+                uint32 const low_part = (spot + 1) == sizeof(uint32) * CHAR_BIT ? 0u : (bitfield >> (spot + 1));
+                uint32 const high_part = (bitfield << (spots - (spot + 1)));
+                uint32 const selection_mask = spots == sizeof(uint32) * CHAR_BIT ? ~0 : ((1u << spots) - 1);
                 // wrap around the bitfields from the current spot to the left
                 bitfield = (high_part | low_part) & selection_mask;
                 // compute the step from the current spot in the bitfield
-                const uint32 step = ffs(~bitfield);
+                uint32 const step = ffs(~bitfield);
                 // and return the new spot
                 return (spot + step) % spots;
             }
@@ -327,14 +331,14 @@ namespace mallocMC
              * otherwise -1
              */
             template<typename AlpakaAcc>
-            static ALPAKA_FN_ACC inline auto usespot(const AlpakaAcc& acc, uint32* bitfield, uint32 spots) -> int
+            static ALPAKA_FN_ACC inline auto usespot(AlpakaAcc const& acc, uint32* bitfield, uint32 spots) -> int
             {
                 // get first spot
                 uint32 spot = randInit() % spots;
                 for(;;)
                 {
-                    const uint32 mask = 1u << spot;
-                    const uint32 old = alpaka::atomicOp<alpaka::AtomicOr>(acc, bitfield, mask);
+                    uint32 const mask = 1u << spot;
+                    uint32 const old = alpaka::atomicOp<alpaka::AtomicOr>(acc, bitfield, mask);
                     if((old & mask) == 0)
                         return spot;
                     // note: popc(old) == spots should be sufficient,
@@ -364,7 +368,7 @@ namespace mallocMC
              */
             template<typename AlpakaAcc>
             static ALPAKA_FN_ACC inline auto calcAdditionalChunks(
-                const AlpakaAcc& acc,
+                AlpakaAcc const& acc,
                 uint32 fullsegments,
                 uint32 segmentsize,
                 uint32 chunksize) -> uint32
@@ -396,22 +400,22 @@ namespace mallocMC
              */
             template<typename AlpakaAcc>
             ALPAKA_FN_ACC inline auto addChunkHierarchy(
-                const AlpakaAcc& acc,
+                AlpakaAcc const& acc,
                 uint32 chunksize,
                 uint32 fullsegments,
                 uint32 additional_chunks,
                 uint32 page) -> void*
             {
-                const uint32 segments = fullsegments + (additional_chunks > 0 ? 1 : 0);
+                uint32 const segments = fullsegments + (additional_chunks > 0 ? 1 : 0);
                 uint32 spot = randInit() % segments;
-                const uint32 mask = _ptes[page].bitmask;
+                uint32 const mask = _ptes[page].bitmask;
                 if((mask & (1u << spot)) != 0)
                     spot = nextspot(mask, spot, segments);
-                const uint32 tries = segments - popc(mask);
+                uint32 const tries = segments - popc(mask);
                 uint32* onpagemasks = onPageMasksPosition(page, segments);
                 for(uint32 i = 0; i < tries; ++i)
                 {
-                    const int hspot = usespot(acc, &onpagemasks[spot], spot < fullsegments ? 32 : additional_chunks);
+                    int const hspot = usespot(acc, &onpagemasks[spot], spot < fullsegments ? 32 : additional_chunks);
                     if(hspot != -1)
                         return _page[page].data + (32 * spot + hspot) * chunksize;
                     alpaka::atomicOp<alpaka::AtomicOr>(acc, (uint32*) &_ptes[page].bitmask, 1u << spot);
@@ -431,12 +435,12 @@ namespace mallocMC
              */
             template<typename AlpakaAcc>
             ALPAKA_FN_ACC inline auto addChunkNoHierarchy(
-                const AlpakaAcc& acc,
+                AlpakaAcc const& acc,
                 uint32 chunksize,
                 uint32 page,
                 uint32 spots) -> void*
             {
-                const int spot = usespot(acc, (uint32*) &_ptes[page].bitmask, spots);
+                int const spot = usespot(acc, (uint32*) &_ptes[page].bitmask, spots);
                 if(spot == -1)
                     return 0; // that should be impossible :)
                 return _page[page].data + spot * chunksize;
@@ -453,7 +457,7 @@ namespace mallocMC
              */
             template<typename AlpakaAcc, typename T_ChunkSizeRangeCheck>
             ALPAKA_FN_ACC inline auto tryUsePage(
-                const AlpakaAcc& acc,
+                AlpakaAcc const& acc,
                 uint32 page,
                 uint32 chunksize,
                 T_ChunkSizeRangeCheck&& isChunkSizeInRange) -> void*
@@ -461,7 +465,7 @@ namespace mallocMC
                 void* chunk_ptr = nullptr;
 
                 // increse the fill level
-                const uint32 filllevel = alpaka::atomicOp<alpaka::AtomicAdd>(acc, (uint32*) &(_ptes[page].count), 1u);
+                uint32 const filllevel = alpaka::atomicOp<alpaka::AtomicAdd>(acc, (uint32*) &(_ptes[page].count), 1u);
 
                 // if resetfreedpages == false we do not need to re-check chunksize
                 bool tryAllocMem = !resetfreedpages;
@@ -477,7 +481,7 @@ namespace mallocMC
                          * In case the page is now free (chunksize == 0) we acquire the new chunk size.
                          * In cases where the page has already a chunksize we test if the chunksize fits our needs.
                          */
-                        const uint32 oldChunksize = alpaka::atomicOp<alpaka::AtomicCas>(
+                        uint32 const oldChunksize = alpaka::atomicOp<alpaka::AtomicCas>(
                             acc,
                             (uint32*) &_ptes[page].chunksize,
                             0u,
@@ -501,16 +505,16 @@ namespace mallocMC
                     {
                         // more chunks than can be covered by the pte's single
                         // bitfield can be used
-                        const uint32 segmentsize = chunksize * 32 + sizeof(uint32);
-                        const uint32 fullsegments = alpaka::math::min(acc, 32u, pagesize / segmentsize);
-                        const uint32 additional_chunks
+                        uint32 const segmentsize = chunksize * 32 + sizeof(uint32);
+                        uint32 const fullsegments = alpaka::math::min(acc, 32u, pagesize / segmentsize);
+                        uint32 const additional_chunks
                             = calcAdditionalChunks(acc, fullsegments, segmentsize, chunksize);
                         if(filllevel < fullsegments * 32 + additional_chunks)
                             chunk_ptr = addChunkHierarchy(acc, chunksize, fullsegments, additional_chunks, page);
                     }
                     else
                     {
-                        const uint32 chunksinpage = alpaka::math::min(acc, pagesize / chunksize, 32u);
+                        uint32 const chunksinpage = alpaka::math::min(acc, pagesize / chunksize, 32u);
                         if(filllevel < chunksinpage)
                             chunk_ptr = addChunkNoHierarchy(acc, chunksize, page, chunksinpage);
                     }
@@ -539,18 +543,18 @@ namespace mallocMC
              * obtain a free chunk
              */
             template<typename AlignmentPolicy, typename AlpakaAcc>
-            ALPAKA_FN_ACC auto allocChunked(const AlpakaAcc& acc, uint32 bytes) -> void*
+            ALPAKA_FN_ACC auto allocChunked(AlpakaAcc const& acc, uint32 bytes) -> void*
             {
                 // use the minimal allocation size to increase the hit rate for small allocations.
-                const uint32 paddedMinChunkSize = AlignmentPolicy::applyPadding(minChunkSize);
-                const uint32 minAllocation = alpaka::math::max(acc, bytes, paddedMinChunkSize);
-                const uint32 numpages = _numpages;
-                const uint32 pagesperblock = numpages / _accessblocks;
-                const uint32 reloff = warpSize * minAllocation / pagesize;
-                const uint32 start_page_in_block = (minAllocation * hashingK + hashingDistMP * smid()
+                uint32 const paddedMinChunkSize = AlignmentPolicy::applyPadding(minChunkSize);
+                uint32 const minAllocation = alpaka::math::max(acc, bytes, paddedMinChunkSize);
+                uint32 const numpages = _numpages;
+                uint32 const pagesperblock = numpages / _accessblocks;
+                uint32 const reloff = warpSize * minAllocation / pagesize;
+                uint32 const start_page_in_block = (minAllocation * hashingK + hashingDistMP * smid()
                                                     + (hashingDistWP + hashingDistWPRel * reloff) * warpid())
-                    % pagesperblock;
-                const uint32 maxchunksize = alpaka::math::min(
+                                                   % pagesperblock;
+                uint32 const maxchunksize = alpaka::math::min(
                     acc,
                     +pagesize,
                     /* this clumping means that allocations of paddedMinChunkSize could have a waste exceeding the
@@ -562,7 +566,7 @@ namespace mallocMC
                  *   - different for each thread to reduce memory read/write conflicts
                  *   - index calculated by the hash function
                  */
-                const uint32 global_start_page = start_page_in_block + _firstfreeblock * pagesperblock;
+                uint32 const global_start_page = start_page_in_block + _firstfreeblock * pagesperblock;
 
                 uint32 checklevel = regionsize * 3 / 4;
                 /* Finding a free segment is using a two step approach.
@@ -583,9 +587,9 @@ namespace mallocMC
                      */
                     do
                     {
-                        const uint32 region = global_page / regionsize;
-                        const uint32 regionfilllevel = _regions[region];
-                        const uint32 region_offset = region * regionsize;
+                        uint32 const region = global_page / regionsize;
+                        uint32 const regionfilllevel = _regions[region];
+                        uint32 const region_offset = region * regionsize;
                         if(regionfilllevel < checklevel)
                         {
                             uint32 page_in_region = global_page;
@@ -634,7 +638,7 @@ namespace mallocMC
                         // check if we jumped into the next access block
                         if(global_page % pagesperblock == 0u)
                         {
-                            const uint32 access_block_id = global_page / pagesperblock;
+                            uint32 const access_block_id = global_page / pagesperblock;
                             // randomize the thread writing the info
                             // Data races are not critical.
                             if(access_block_id > _firstfreeblock)
@@ -654,7 +658,7 @@ namespace mallocMC
              * The last thread reducing the page count to zero should call this method.
              */
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC void tryCleanPage(const AlpakaAcc& acc, uint32 page)
+            ALPAKA_FN_ACC void tryCleanPage(AlpakaAcc const& acc, uint32 page)
             {
                 if constexpr(resetfreedpages)
                 {
@@ -672,7 +676,7 @@ namespace mallocMC
 
                     if(oldfilllevel == 0)
                     {
-                        const uint32 chunksize
+                        uint32 const chunksize
                             = alpaka::atomicOp<alpaka::AtomicCas>(acc, (uint32*) &_ptes[page].chunksize, 0u, 0u);
                         // if chunksize == 0 than another thread cleaned the page already
                         if(chunksize != 0)
@@ -724,19 +728,19 @@ namespace mallocMC
              * @param chunksize the chunksize used for the page
              */
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC void deallocChunked(const AlpakaAcc& acc, void* mem, uint32 page, uint32 chunksize)
+            ALPAKA_FN_ACC void deallocChunked(AlpakaAcc const& acc, void* mem, uint32 page, uint32 chunksize)
             {
-                const auto inpage_offset = static_cast<uint32>((char*) mem - _page[page].data);
+                auto const inpage_offset = static_cast<uint32>((char*) mem - _page[page].data);
                 if(chunksize <= HierarchyThreshold)
                 {
                     // one more level in hierarchy
-                    const uint32 segmentsize = chunksize * 32 + sizeof(uint32);
-                    const uint32 fullsegments = alpaka::math::min(acc, 32u, pagesize / segmentsize);
-                    const uint32 additional_chunks = calcAdditionalChunks(acc, fullsegments, segmentsize, chunksize);
-                    const uint32 segment = inpage_offset / (chunksize * 32);
-                    const uint32 withinsegment = (inpage_offset - segment * (chunksize * 32)) / chunksize;
+                    uint32 const segmentsize = chunksize * 32 + sizeof(uint32);
+                    uint32 const fullsegments = alpaka::math::min(acc, 32u, pagesize / segmentsize);
+                    uint32 const additional_chunks = calcAdditionalChunks(acc, fullsegments, segmentsize, chunksize);
+                    uint32 const segment = inpage_offset / (chunksize * 32);
+                    uint32 const withinsegment = (inpage_offset - segment * (chunksize * 32)) / chunksize;
                     // mark it as free
-                    const uint32 nMasks = fullsegments + (additional_chunks > 0 ? 1 : 0);
+                    uint32 const nMasks = fullsegments + (additional_chunks > 0 ? 1 : 0);
                     uint32* onpagemasks = onPageMasksPosition(page, nMasks);
                     uint32 old
                         = alpaka::atomicOp<alpaka::AtomicAnd>(acc, &onpagemasks[segment], ~(1u << withinsegment));
@@ -747,7 +751,7 @@ namespace mallocMC
                 }
                 else
                 {
-                    const uint32 segment = inpage_offset / chunksize;
+                    uint32 const segment = inpage_offset / chunksize;
                     alpaka::atomicOp<alpaka::AtomicAnd>(acc, (uint32*) &_ptes[page].bitmask, ~(1u << segment));
                 }
 
@@ -760,10 +764,10 @@ namespace mallocMC
                 // many threads, so..
                 if(oldfilllevel == pagesize / 2 / chunksize)
                 {
-                    const uint32 region = page / regionsize;
+                    uint32 const region = page / regionsize;
                     alpaka::atomicOp<alpaka::AtomicExch>(acc, (uint32*) (_regions + region), 0u);
-                    const uint32 pagesperblock = _numpages / _accessblocks;
-                    const uint32 block = page / pagesperblock;
+                    uint32 const pagesperblock = _numpages / _accessblocks;
+                    uint32 const block = page / pagesperblock;
                     if(warpid() + laneid() == 0)
                         alpaka::atomicOp<alpaka::AtomicMin>(acc, (uint32*) &_firstfreeblock, block);
                 }
@@ -777,12 +781,12 @@ namespace mallocMC
              * @return true on success, false if one of the pages is not free
              */
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC auto markpages(const AlpakaAcc& acc, uint32 startpage, uint32 pages, uint32 bytes) -> bool
+            ALPAKA_FN_ACC auto markpages(AlpakaAcc const& acc, uint32 startpage, uint32 pages, uint32 bytes) -> bool
             {
                 uint32 abord = std::numeric_limits<uint32>::max();
                 for(uint32 trypage = startpage; trypage < startpage + pages; ++trypage)
                 {
-                    const uint32 old
+                    uint32 const old
                         = alpaka::atomicOp<alpaka::AtomicCas>(acc, (uint32*) &_ptes[trypage].chunksize, 0u, bytes);
                     if(old != 0)
                     {
@@ -808,12 +812,12 @@ namespace mallocMC
              */
             template<typename AlpakaAcc>
             ALPAKA_FN_ACC auto allocPageBasedSingleRegion(
-                const AlpakaAcc& acc,
+                AlpakaAcc const& acc,
                 uint32 startpage,
                 uint32 endpage,
                 uint32 bytes) -> void*
             {
-                const uint32 pagestoalloc = divup(bytes, pagesize);
+                uint32 const pagestoalloc = divup(bytes, pagesize);
                 uint32 freecount = 0;
                 bool left_free = false;
                 for(uint32 search_page = startpage + 1; search_page > endpage;)
@@ -856,13 +860,13 @@ namespace mallocMC
              * function concurrently
              */
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC auto allocPageBasedSingle(const AlpakaAcc& acc, uint32 bytes) -> void*
+            ALPAKA_FN_ACC auto allocPageBasedSingle(AlpakaAcc const& acc, uint32 bytes) -> void*
             {
                 // acquire mutex
                 while(alpaka::atomicOp<alpaka::AtomicExch>(acc, &_pagebasedMutex, 1u) != 0)
                     ;
                 // search for free spot from the back
-                const uint32 spage = _firstFreePageBased;
+                uint32 const spage = _firstFreePageBased;
                 void* res = allocPageBasedSingleRegion(acc, spage, 0, bytes);
                 if(res == 0)
                     // also check the rest of the pages
@@ -872,6 +876,7 @@ namespace mallocMC
                 alpaka::atomicOp<alpaka::AtomicExch>(acc, &_pagebasedMutex, 0u);
                 return res;
             }
+
             /**
              * allocPageBased tries to allocate the demanded number of bytes on
              * a continues sequence of pages
@@ -880,7 +885,7 @@ namespace mallocMC
              * use all the requested pages
              */
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC auto allocPageBased(const AlpakaAcc& acc, uint32 bytes) -> void*
+            ALPAKA_FN_ACC auto allocPageBased(AlpakaAcc const& acc, uint32 bytes) -> void*
             {
                 // this is rather slow, but we dont expect that to happen often
                 // anyway
@@ -888,11 +893,11 @@ namespace mallocMC
                 // only one thread per warp can acquire the mutex
                 void* res = 0;
                 // based on the alpaka backend the lanemask type can be 64bit
-                const auto mask = activemask();
-                const uint32_t num = popc(mask);
+                auto const mask = activemask();
+                uint32_t const num = popc(mask);
                 // based on the alpaka backend the lanemask type can be 64bit
-                const auto lanemask = lanemask_lt();
-                const uint32_t local_id = popc(lanemask & mask);
+                auto const lanemask = lanemask_lt();
+                uint32_t const local_id = popc(lanemask & mask);
                 for(unsigned int active = 0; active < num; ++active)
                     if(active == local_id)
                         res = allocPageBasedSingle(acc, bytes);
@@ -906,9 +911,9 @@ namespace mallocMC
              * @param bytes the number of bytes to be freed
              */
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC void deallocPageBased(const AlpakaAcc& acc, void* mem, uint32 page, uint32 bytes)
+            ALPAKA_FN_ACC void deallocPageBased(AlpakaAcc const& acc, void* mem, uint32 page, uint32 bytes)
             {
-                const uint32 pages = divup(bytes, pagesize);
+                uint32 const pages = divup(bytes, pagesize);
                 for(uint32 p = page; p < page + pages; ++p)
                     _page[p].init();
 
@@ -927,7 +932,7 @@ namespace mallocMC
              * @return pointer to the allocated memory
              */
             template<typename AlignmentPolicy, typename AlpakaAcc>
-            ALPAKA_FN_ACC auto create(const AlpakaAcc& acc, uint32 bytes) -> void*
+            ALPAKA_FN_ACC auto create(AlpakaAcc const& acc, uint32 bytes) -> void*
             {
                 if(bytes == 0)
                     return 0;
@@ -950,28 +955,28 @@ namespace mallocMC
              * @param mempointer to the memory region to free
              */
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC void destroy(const AlpakaAcc& acc, void* mem)
+            ALPAKA_FN_ACC void destroy(AlpakaAcc const& acc, void* mem)
             {
                 if(mem == 0)
                     return;
                 // lets see on which page we are on
-                const auto page = static_cast<uint32>(((char*) mem - (char*) _page) / pagesize);
+                auto const page = static_cast<uint32>(((char*) mem - (char*) _page) / pagesize);
                 /* Emulate atomic read.
                  * In older implementations we read the chunksize without atomics which can result in data races.
                  */
-                const uint32 chunksize
+                uint32 const chunksize
                     = alpaka::atomicOp<alpaka::AtomicCas>(acc, (uint32*) &_ptes[page].chunksize, 0u, 0u);
 
                 // is the pointer the beginning of a chunk?
-                const auto inpage_offset = static_cast<uint32>((char*) mem - _page[page].data);
-                const uint32 block = inpage_offset / chunksize;
-                const uint32 inblockoffset = inpage_offset - block * chunksize;
+                auto const inpage_offset = static_cast<uint32>((char*) mem - _page[page].data);
+                uint32 const block = inpage_offset / chunksize;
+                uint32 const inblockoffset = inpage_offset - block * chunksize;
                 if(inblockoffset != 0)
                 {
                     uint32* counter = (uint32*) (_page[page].data + block * chunksize);
                     // coalesced mem free
 
-                    const uint32 old = alpaka::atomicOp<alpaka::AtomicSub>(acc, counter, 1u);
+                    uint32 const old = alpaka::atomicOp<alpaka::AtomicSub>(acc, counter, 1u);
                     if(old != 1)
                         return;
                     mem = (void*) counter;
@@ -992,13 +997,13 @@ namespace mallocMC
              * @param memsize size of the memory in bytes
              */
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC void initDeviceFunction(const AlpakaAcc& acc, void* memory, size_t memsize)
+            ALPAKA_FN_ACC void initDeviceFunction(AlpakaAcc const& acc, void* memory, size_t memsize)
             {
-                const auto linid = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc).sum();
-                const auto totalThreads = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc).prod();
+                auto const linid = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc).sum();
+                auto const totalThreads = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc).prod();
 
                 uint32 numregions = ((unsigned long long) memsize)
-                    / (((unsigned long long) regionsize) * (sizeof(PTE) + pagesize) + sizeof(uint32));
+                                    / (((unsigned long long) regionsize) * (sizeof(PTE) + pagesize) + sizeof(uint32));
 
                 uint32 numpages = numregions * regionsize;
                 // pointer is copied (copy is called page)
@@ -1026,11 +1031,11 @@ namespace mallocMC
                 // However, we do not have to use the exact requested block size.
                 // So we redistribute actual memory between the chosen number of blocks
                 // and ensure that all blocks have the same number of regions.
-                const auto memorysize = static_cast<size_t>(numpages) * pagesize;
-                const auto numblocks = memorysize / accessblocksize;
-                const auto memoryperblock = memorysize / numblocks;
-                const auto pagesperblock = memoryperblock / pagesize;
-                const auto regionsperblock = pagesperblock / regionsize;
+                auto const memorysize = static_cast<size_t>(numpages) * pagesize;
+                auto const numblocks = memorysize / accessblocksize;
+                auto const memoryperblock = memorysize / numblocks;
+                auto const pagesperblock = memoryperblock / pagesize;
+                auto const regionsperblock = pagesperblock / regionsize;
                 numregions = numblocks * regionsperblock;
                 numpages = numregions * regionsize;
 
@@ -1108,7 +1113,7 @@ namespace mallocMC
                                                 "or AlignmentPolicy.");
                 }
                 auto initKernel = [] ALPAKA_FN_ACC(
-                                      const AlpakaAcc& m_acc,
+                                      AlpakaAcc const& m_acc,
                                       T_DeviceAllocator* m_heap,
                                       void* m_heapmem,
                                       size_t m_memsize)
@@ -1127,7 +1132,7 @@ namespace mallocMC
                 threadsPerBlock[Dim::value - 1]
                     = std::min(static_cast<size_t>(256u), static_cast<size_t>(devProps.m_blockThreadCountMax));
 
-                const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{
+                auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{
                     VecType::ones(),
                     threadsPerBlock,
                     VecType::ones()}; // Dim may be any dimension, but workDiv is 1D
@@ -1149,26 +1154,26 @@ namespace mallocMC
              *        page.
              */
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC auto countFreeChunksInPage(const AlpakaAcc& acc, uint32 page, uint32 chunksize) -> unsigned
+            ALPAKA_FN_ACC auto countFreeChunksInPage(AlpakaAcc const& acc, uint32 page, uint32 chunksize) -> unsigned
             {
-                const uint32 filledChunks = _ptes[page].count;
+                uint32 const filledChunks = _ptes[page].count;
                 if(chunksize <= HierarchyThreshold)
                 {
-                    const uint32 segmentsize = chunksize * 32 + sizeof(uint32); // each segment can hold 32
+                    uint32 const segmentsize = chunksize * 32 + sizeof(uint32); // each segment can hold 32
                                                                                 // 2nd-level chunks
-                    const uint32 fullsegments = alpaka::math::min(
+                    uint32 const fullsegments = alpaka::math::min(
                         acc,
                         32u,
                         pagesize / segmentsize); // there might be space for
                                                  // more than 32 segments
                                                  // with 32 2nd-level chunks
-                    const uint32 additional_chunks = calcAdditionalChunks(acc, fullsegments, segmentsize, chunksize);
-                    const uint32 level2Chunks = fullsegments * 32 + additional_chunks;
+                    uint32 const additional_chunks = calcAdditionalChunks(acc, fullsegments, segmentsize, chunksize);
+                    uint32 const level2Chunks = fullsegments * 32 + additional_chunks;
                     return level2Chunks - filledChunks;
                 }
                 else
                 {
-                    const uint32 chunksinpage = alpaka::math::min(
+                    uint32 const chunksinpage = alpaka::math::min(
                         acc,
                         pagesize / chunksize,
                         32u); // without hierarchy, there can not be more than
@@ -1196,7 +1201,7 @@ namespace mallocMC
              */
             template<typename AlignmentPolicy, typename AlpakaAcc>
             ALPAKA_FN_ACC auto getAvailaibleSlotsDeviceFunction(
-                const AlpakaAcc& acc,
+                AlpakaAcc const& acc,
                 size_t slotSize,
                 uint32 gid,
                 uint32 stride) -> unsigned
@@ -1206,7 +1211,7 @@ namespace mallocMC
                 { // multiple slots per page
                     for(uint32 currentpage = gid; currentpage < _numpages; currentpage += stride)
                     {
-                        const uint32 maxchunksize = alpaka::math::min(acc, +pagesize, wastefactor * (uint32) slotSize);
+                        uint32 const maxchunksize = alpaka::math::min(acc, +pagesize, wastefactor * (uint32) slotSize);
 
                         uint32 chunksize = _ptes[currentpage].chunksize;
                         if(chunksize >= slotSize && chunksize <= maxchunksize)
@@ -1235,7 +1240,7 @@ namespace mallocMC
                 { // 1 slot needs multiple pages
                     if(gid > 0)
                         return 0; // do this serially
-                    const uint32 pagestoalloc = divup((uint32) slotSize, pagesize);
+                    uint32 const pagestoalloc = divup((uint32) slotSize, pagesize);
                     uint32 freecount = 0;
                     for(uint32 currentpage = _numpages; currentpage > 0;)
                     { // this already includes all superblocks
@@ -1270,6 +1275,7 @@ namespace mallocMC
              * @param slotSize the size of allocatable elements to count
              * @param obj a reference to the allocator instance (host-side)
              */
+
         public:
             template<typename AlpakaAcc, typename AlpakaDevice, typename AlpakaQueue, typename T_DeviceAllocator>
             static auto getAvailableSlotsHost(
@@ -1282,15 +1288,15 @@ namespace mallocMC
                 alpaka::memset(queue, d_slots, 0, 1);
 
                 auto getAvailableSlotsKernel = [] ALPAKA_FN_ACC(
-                                                   const AlpakaAcc& acc,
+                                                   AlpakaAcc const& acc,
                                                    T_DeviceAllocator* heapPtr,
                                                    size_t numBytes,
                                                    unsigned* slots) -> void
                 {
-                    const auto gid = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc).sum();
+                    auto const gid = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc).sum();
 
-                    const auto nWorker = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc).prod();
-                    const unsigned temp = heapPtr->template getAvailaibleSlotsDeviceFunction<
+                    auto const nWorker = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc).prod();
+                    unsigned const temp = heapPtr->template getAvailaibleSlotsDeviceFunction<
                         typename T_DeviceAllocator::AlignmentPolicy>(acc, numBytes, gid, nWorker);
                     if(temp)
                         alpaka::atomicOp<alpaka::AtomicAdd>(acc, slots, temp);
@@ -1310,7 +1316,7 @@ namespace mallocMC
                 threadsPerBlock[Dim::value - 1]
                     = std::min(static_cast<size_t>(256u), static_cast<size_t>(devProps.m_blockThreadCountMax));
 
-                const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{
+                auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{
                     numBlocks,
                     threadsPerBlock,
                     VecType::ones()}; // Dim may be any dimension, but workDiv is 1D
@@ -1325,7 +1331,7 @@ namespace mallocMC
                         alpaka::getPtrNative(d_slots)));
 
                 auto const platform = alpaka::Platform<alpaka::DevCpu>{};
-                const auto hostDev = alpaka::getDevByIdx(platform, 0);
+                auto const hostDev = alpaka::getDevByIdx(platform, 0);
 
                 auto h_slots = alpaka::allocBuf<unsigned, int>(hostDev, 1);
                 alpaka::memcpy(queue, h_slots, d_slots, 1);
@@ -1349,13 +1355,13 @@ namespace mallocMC
              * @param slotSize the size of allocatable elements to count
              */
             template<typename AlignmentPolicy, typename AlpakaAcc>
-            ALPAKA_FN_ACC auto getAvailableSlotsAccelerator(const AlpakaAcc& acc, size_t slotSize) -> unsigned
+            ALPAKA_FN_ACC auto getAvailableSlotsAccelerator(AlpakaAcc const& acc, size_t slotSize) -> unsigned
             {
-                const int wId = warpid_withinblock(acc); // do not use warpid-function, since
+                int const wId = warpid_withinblock(acc); // do not use warpid-function, since
                                                          // this value is not guaranteed to
                                                          // be stable across warp lifetime
 
-                const uint32 activeThreads = popc(activemask());
+                uint32 const activeThreads = popc(activemask());
 
                 auto& activePerWarp = alpaka::declareSharedVar<
                     std::uint32_t[maxThreadsPerBlock / warpSize],
@@ -1373,11 +1379,11 @@ namespace mallocMC
                 // the active threads obtain an id from 0 to activeThreads-1
                 if(slotSize == 0)
                     return 0;
-                const auto linearId = alpaka::atomicOp<alpaka::AtomicAdd>(acc, &activePerWarp[wId], 1u);
+                auto const linearId = alpaka::atomicOp<alpaka::AtomicAdd>(acc, &activePerWarp[wId], 1u);
 
                 // printf("Block %d, id %d: activeThreads=%d
                 // linearId=%d\n",blockIdx.x,threadIdx.x,activeThreads,linearId);
-                const unsigned temp = this->template getAvailaibleSlotsDeviceFunction<AlignmentPolicy>(
+                unsigned const temp = this->template getAvailaibleSlotsDeviceFunction<AlignmentPolicy>(
                     acc,
                     slotSize,
                     linearId,
diff --git a/src/include/mallocMC/device_allocator.hpp b/src/include/mallocMC/device_allocator.hpp
index 8bb6c2f4..52e4e736 100644
--- a/src/include/mallocMC/device_allocator.hpp
+++ b/src/include/mallocMC/device_allocator.hpp
@@ -33,6 +33,7 @@
 #include "mallocMC_utils.hpp"
 
 #include <alpaka/core/Common.hpp>
+
 #include <cstdint>
 #include <cstdio>
 
@@ -70,11 +71,11 @@ namespace mallocMC
         void* pool;
 
         template<typename AlpakaAcc>
-        ALPAKA_FN_ACC auto malloc(const AlpakaAcc& acc, size_t bytes) -> void*
+        ALPAKA_FN_ACC auto malloc(AlpakaAcc const& acc, size_t bytes) -> void*
         {
             bytes = AlignmentPolicy::applyPadding(bytes);
             DistributionPolicy distributionPolicy(acc);
-            const uint32 req_size = distributionPolicy.collect(acc, bytes);
+            uint32 const req_size = distributionPolicy.collect(acc, bytes);
             void* memBlock = CreationPolicy::template create<AlignmentPolicy>(acc, req_size);
             if(CreationPolicy::isOOM(memBlock, req_size))
                 memBlock = OOMPolicy::handleOOM(memBlock);
@@ -82,7 +83,7 @@ namespace mallocMC
         }
 
         template<typename AlpakaAcc>
-        ALPAKA_FN_ACC void free(const AlpakaAcc& acc, void* p)
+        ALPAKA_FN_ACC void free(AlpakaAcc const& acc, void* p)
         {
             CreationPolicy::destroy(acc, p);
         }
@@ -96,7 +97,7 @@ namespace mallocMC
          * device side 0 will be returned.
          */
         template<typename AlpakaAcc>
-        ALPAKA_FN_ACC auto getAvailableSlots(const AlpakaAcc& acc, size_t slotSize) -> unsigned
+        ALPAKA_FN_ACC auto getAvailableSlots(AlpakaAcc const& acc, size_t slotSize) -> unsigned
         {
             slotSize = AlignmentPolicy::applyPadding(slotSize);
             if constexpr(Traits<DeviceAllocator>::providesAvailableSlots)
diff --git a/src/include/mallocMC/distributionPolicies/Noop.hpp b/src/include/mallocMC/distributionPolicies/Noop.hpp
index 19cc471d..2f43640f 100644
--- a/src/include/mallocMC/distributionPolicies/Noop.hpp
+++ b/src/include/mallocMC/distributionPolicies/Noop.hpp
@@ -30,6 +30,7 @@
 #include "Noop.hpp"
 
 #include <alpaka/core/Common.hpp>
+
 #include <cstdint>
 #include <string>
 
@@ -49,18 +50,18 @@ namespace mallocMC
 
         public:
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC Noop(const AlpakaAcc& /*acc*/)
+            ALPAKA_FN_ACC Noop(AlpakaAcc const& /*acc*/)
             {
             }
 
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC auto collect(const AlpakaAcc& /*acc*/, uint32 bytes) const -> uint32
+            ALPAKA_FN_ACC auto collect(AlpakaAcc const& /*acc*/, uint32 bytes) const -> uint32
             {
                 return bytes;
             }
 
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC auto distribute(const AlpakaAcc& /*acc*/, void* allocatedMem) const -> void*
+            ALPAKA_FN_ACC auto distribute(AlpakaAcc const& /*acc*/, void* allocatedMem) const -> void*
             {
                 return allocatedMem;
             }
diff --git a/src/include/mallocMC/distributionPolicies/XMallocSIMD.hpp b/src/include/mallocMC/distributionPolicies/XMallocSIMD.hpp
index a67dd333..eb4f3d59 100644
--- a/src/include/mallocMC/distributionPolicies/XMallocSIMD.hpp
+++ b/src/include/mallocMC/distributionPolicies/XMallocSIMD.hpp
@@ -37,6 +37,7 @@
 #include "XMallocSIMD.hpp"
 
 #include <alpaka/alpaka.hpp>
+
 #include <cstdint>
 #include <limits>
 #include <sstream>
@@ -88,7 +89,7 @@ namespace mallocMC
             using Properties = T_Config;
 
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC XMallocSIMD(const AlpakaAcc& acc)
+            ALPAKA_FN_ACC XMallocSIMD(AlpakaAcc const& acc)
                 : can_use_coalescing(false)
                 , warpid(warpid_withinblock(acc))
                 , myoffset(0)
@@ -116,7 +117,7 @@ namespace mallocMC
             static constexpr uint32 _pagesize = pagesize;
 
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC auto collect(const AlpakaAcc& acc, uint32 bytes) -> uint32
+            ALPAKA_FN_ACC auto collect(AlpakaAcc const& acc, uint32 bytes) -> uint32
             {
                 can_use_coalescing = false;
                 myoffset = 0;
@@ -129,7 +130,7 @@ namespace mallocMC
 
                 // second half: make sure that all coalesced allocations can fit
                 // within one page necessary for offset calculation
-                const bool coalescible = bytes > 0 && bytes < (pagesize / 32);
+                bool const coalescible = bytes > 0 && bytes < (pagesize / 32);
 
 #if(MALLOCMC_DEVICE_COMPILE)
                 threadcount = popc(ballot(coalescible));
@@ -150,7 +151,7 @@ namespace mallocMC
             }
 
             template<typename AlpakaAcc>
-            ALPAKA_FN_ACC auto distribute(const AlpakaAcc& acc, void* allocatedMem) -> void*
+            ALPAKA_FN_ACC auto distribute(AlpakaAcc const& acc, void* allocatedMem) -> void*
             {
                 auto& warp_res = alpaka::declareSharedVar<char * [maxThreadsPerBlock / warpSize], __COUNTER__>(acc);
 
diff --git a/src/include/mallocMC/mallocMC_allocator_handle.hpp b/src/include/mallocMC/mallocMC_allocator_handle.hpp
index 4d5361a7..1da222fa 100644
--- a/src/include/mallocMC/mallocMC_allocator_handle.hpp
+++ b/src/include/mallocMC/mallocMC_allocator_handle.hpp
@@ -44,19 +44,19 @@ namespace mallocMC
         }
 
         template<typename AlpakaAcc>
-        ALPAKA_FN_ACC auto malloc(const AlpakaAcc& acc, size_t size) -> void*
+        ALPAKA_FN_ACC auto malloc(AlpakaAcc const& acc, size_t size) -> void*
         {
             return devAllocator->malloc(acc, size);
         }
 
         template<typename AlpakaAcc>
-        ALPAKA_FN_ACC void free(const AlpakaAcc& acc, void* p)
+        ALPAKA_FN_ACC void free(AlpakaAcc const& acc, void* p)
         {
             devAllocator->free(acc, p);
         }
 
         template<typename AlpakaAcc>
-        ALPAKA_FN_ACC auto getAvailableSlots(const AlpakaAcc& acc, size_t slotSize) -> unsigned
+        ALPAKA_FN_ACC auto getAvailableSlots(AlpakaAcc const& acc, size_t slotSize) -> unsigned
         {
             return devAllocator->getAvailableSlots(acc, slotSize);
         }
diff --git a/src/include/mallocMC/mallocMC_utils.hpp b/src/include/mallocMC/mallocMC_utils.hpp
index 6bf953b4..c1c9b24f 100644
--- a/src/include/mallocMC/mallocMC_utils.hpp
+++ b/src/include/mallocMC/mallocMC_utils.hpp
@@ -62,6 +62,7 @@ namespace mallocMC
     public:
         using type = unsigned int;
     };
+
     template<>
     class __PointerEquivalent<8>
     {
@@ -161,7 +162,6 @@ namespace mallocMC
 #endif
     }
 
-
     ALPAKA_FN_ACC inline auto activemask()
     {
 #if defined(__CUDA_ARCH__)
@@ -194,9 +194,9 @@ namespace mallocMC
      * @return warp id within the block
      */
     template<typename AlpakaAcc>
-    ALPAKA_FN_ACC inline auto warpid_withinblock(const AlpakaAcc& acc) -> std::uint32_t
+    ALPAKA_FN_ACC inline auto warpid_withinblock(AlpakaAcc const& acc) -> std::uint32_t
     {
-        const auto localId = alpaka::mapIdx<1>(
+        auto const localId = alpaka::mapIdx<1>(
             alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc),
             alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc))[0];
         return localId / warpSize;
diff --git a/src/include/mallocMC/oOMPolicies/BadAllocException.hpp b/src/include/mallocMC/oOMPolicies/BadAllocException.hpp
index f3a3bb75..7d7dfcad 100644
--- a/src/include/mallocMC/oOMPolicies/BadAllocException.hpp
+++ b/src/include/mallocMC/oOMPolicies/BadAllocException.hpp
@@ -30,6 +30,7 @@
 #include "BadAllocException.hpp"
 
 #include <alpaka/core/Common.hpp>
+
 #include <cassert>
 #include <string>
 
diff --git a/src/include/mallocMC/oOMPolicies/ReturnNull.hpp b/src/include/mallocMC/oOMPolicies/ReturnNull.hpp
index 5ddd698a..dbea98e7 100644
--- a/src/include/mallocMC/oOMPolicies/ReturnNull.hpp
+++ b/src/include/mallocMC/oOMPolicies/ReturnNull.hpp
@@ -30,6 +30,7 @@
 #include "ReturnNull.hpp"
 
 #include <alpaka/core/Common.hpp>
+
 #include <string>
 
 namespace mallocMC
diff --git a/src/include/mallocMC/reservePoolPolicies/AlpakaBuf.hpp b/src/include/mallocMC/reservePoolPolicies/AlpakaBuf.hpp
index ca8bcb40..5ffa1980 100644
--- a/src/include/mallocMC/reservePoolPolicies/AlpakaBuf.hpp
+++ b/src/include/mallocMC/reservePoolPolicies/AlpakaBuf.hpp
@@ -28,6 +28,7 @@
 #pragma once
 
 #include <alpaka/alpaka.hpp>
+
 #include <memory>
 #include <string>
 
@@ -39,7 +40,7 @@ namespace mallocMC
         struct AlpakaBuf
         {
             template<typename AlpakaDev>
-            auto setMemPool(const AlpakaDev& dev, size_t memsize) -> void*
+            auto setMemPool(AlpakaDev const& dev, size_t memsize) -> void*
             {
                 poolBuffer = std::make_unique<PoolBufferType>(alpaka::allocBuf<unsigned char, size_t>(dev, memsize));
                 return alpaka::getPtrNative(*poolBuffer);
diff --git a/src/include/mallocMC/reservePoolPolicies/CudaSetLimits.hpp b/src/include/mallocMC/reservePoolPolicies/CudaSetLimits.hpp
index d2d9be66..b77132f9 100644
--- a/src/include/mallocMC/reservePoolPolicies/CudaSetLimits.hpp
+++ b/src/include/mallocMC/reservePoolPolicies/CudaSetLimits.hpp
@@ -32,6 +32,7 @@
 #    include "CudaSetLimits.hpp"
 
 #    include <cuda_runtime_api.h>
+
 #    include <mutex>
 #    include <string>
 
@@ -57,7 +58,7 @@ namespace mallocMC
         struct CudaSetLimits
         {
             template<typename AlpakaDev>
-            auto setMemPool(const AlpakaDev& dev, size_t memsize) -> void*
+            auto setMemPool(AlpakaDev const& dev, size_t memsize) -> void*
             {
                 cudaDeviceSetLimit(cudaLimitMallocHeapSize, memsize);
                 return nullptr;
diff --git a/tests/dimensions.cpp b/tests/dimensions.cpp
index bd7e3343..a39ff448 100644
--- a/tests/dimensions.cpp
+++ b/tests/dimensions.cpp
@@ -26,6 +26,7 @@
 */
 
 #include <alpaka/alpaka.hpp>
+
 #include <catch2/catch.hpp>
 #include <mallocMC/mallocMC.hpp>
 
@@ -77,7 +78,7 @@ void test1D()
         mallocMC::AlignmentPolicies::Shrink<AlignmentConfig>>;
 
     auto const platform = alpaka::Platform<Acc>{};
-    const auto dev = alpaka::getDevByIdx(platform, 0);
+    auto const dev = alpaka::getDevByIdx(platform, 0);
     auto queue = alpaka::Queue<Acc, alpaka::Blocking>{dev};
 
     constexpr auto N = 16;
@@ -90,7 +91,7 @@ void test1D()
         queue,
         alpaka::createTaskKernel<Acc>(
             alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{1}, Idx{1}},
-            [] ALPAKA_FN_ACC(const Acc& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
             { deviceArray = (int**) allocHandle.malloc(acc, sizeof(int*) * dim * dim); },
             N,
             scatterAlloc.getAllocatorHandle()));
@@ -100,15 +101,15 @@ void test1D()
         queue,
         alpaka::createTaskKernel<Acc>(
             alpaka::WorkDivMembers<Dim, Idx>{Idx{N}, Idx{N}, Idx{1}},
-            [] ALPAKA_FN_ACC(const Acc& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
             {
-                const auto i = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
+                auto const i = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
                 deviceArray[i] = (int*) allocHandle.malloc(acc, sizeof(int));
             },
             scatterAlloc.getAllocatorHandle()));
 
-    const auto slots = scatterAlloc.getAvailableSlots(dev, queue, sizeof(int));
-    const auto heapInfo = scatterAlloc.getHeapLocations().at(0);
+    auto const slots = scatterAlloc.getAvailableSlots(dev, queue, sizeof(int));
+    auto const heapInfo = scatterAlloc.getHeapLocations().at(0);
     std::cout << alpaka::trait::GetAccName<Acc>::getAccName() << " slots: " << slots << " heap size: " << heapInfo.size
               << '\n';
 
@@ -117,9 +118,9 @@ void test1D()
         queue,
         alpaka::createTaskKernel<Acc>(
             alpaka::WorkDivMembers<Dim, Idx>{Idx{N}, Idx{N}, Idx{1}},
-            [] ALPAKA_FN_ACC(const Acc& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
             {
-                const auto i = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
+                auto const i = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
                 allocHandle.free(acc, deviceArray[i]);
             },
             scatterAlloc.getAllocatorHandle()));
@@ -129,7 +130,7 @@ void test1D()
         queue,
         alpaka::createTaskKernel<Acc>(
             alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{1}, Idx{1}},
-            [] ALPAKA_FN_ACC(const Acc& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
             { allocHandle.free(acc, deviceArray); },
             scatterAlloc.getAllocatorHandle()));
 }
@@ -149,7 +150,7 @@ void test2D()
         mallocMC::AlignmentPolicies::Shrink<AlignmentConfig>>;
 
     auto const platform = alpaka::Platform<Acc>{};
-    const auto dev = alpaka::getDevByIdx(platform, 0);
+    auto const dev = alpaka::getDevByIdx(platform, 0);
     auto queue = alpaka::Queue<Acc, alpaka::Blocking>{dev};
 
     constexpr auto N = 8;
@@ -165,7 +166,7 @@ void test2D()
                 alpaka::Vec<Dim, Idx>::all(1),
                 alpaka::Vec<Dim, Idx>::all(1),
                 alpaka::Vec<Dim, Idx>::all(1)},
-            [] ALPAKA_FN_ACC(const Acc& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
             { deviceArray = (int**) allocHandle.malloc(acc, sizeof(int*) * dim * dim * dim * dim); },
             N,
             scatterAlloc.getAllocatorHandle()));
@@ -178,16 +179,16 @@ void test2D()
                 alpaka::Vec<Dim, Idx>::all(N),
                 alpaka::Vec<Dim, Idx>::all(N),
                 alpaka::Vec<Dim, Idx>::all(1)},
-            [] ALPAKA_FN_ACC(const Acc& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
             {
-                const auto idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
+                auto const idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
                 deviceArray[idx[0] * dim * dim + idx[1]] = (int*) allocHandle.malloc(acc, sizeof(int));
             },
             N,
             scatterAlloc.getAllocatorHandle()));
 
-    const auto slots = scatterAlloc.getAvailableSlots(dev, queue, sizeof(int));
-    const auto heapInfo = scatterAlloc.getHeapLocations().at(0);
+    auto const slots = scatterAlloc.getAvailableSlots(dev, queue, sizeof(int));
+    auto const heapInfo = scatterAlloc.getHeapLocations().at(0);
     std::cout << alpaka::trait::GetAccName<Acc>::getAccName() << " slots: " << slots << " heap size: " << heapInfo.size
               << '\n';
 
@@ -199,9 +200,9 @@ void test2D()
                 alpaka::Vec<Dim, Idx>::all(N),
                 alpaka::Vec<Dim, Idx>::all(N),
                 alpaka::Vec<Dim, Idx>::all(1)},
-            [] ALPAKA_FN_ACC(const Acc& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
             {
-                const auto idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
+                auto const idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
                 allocHandle.free(acc, deviceArray[idx[0] * dim * dim + idx[1]]);
             },
             N,
@@ -215,7 +216,7 @@ void test2D()
                 alpaka::Vec<Dim, Idx>::all(1),
                 alpaka::Vec<Dim, Idx>::all(1),
                 alpaka::Vec<Dim, Idx>::all(1)},
-            [] ALPAKA_FN_ACC(const Acc& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
             { allocHandle.free(acc, deviceArray); },
             scatterAlloc.getAllocatorHandle()));
 }
@@ -235,7 +236,7 @@ void test3D()
         mallocMC::AlignmentPolicies::Shrink<AlignmentConfig>>;
 
     auto const platform = alpaka::Platform<Acc>{};
-    const auto dev = alpaka::getDevByIdx(platform, 0);
+    auto const dev = alpaka::getDevByIdx(platform, 0);
     auto queue = alpaka::Queue<Acc, alpaka::Blocking>{dev};
 
     constexpr auto N = 4;
@@ -251,7 +252,7 @@ void test3D()
                 alpaka::Vec<Dim, Idx>::all(1),
                 alpaka::Vec<Dim, Idx>::all(1),
                 alpaka::Vec<Dim, Idx>::all(1)},
-            [] ALPAKA_FN_ACC(const Acc& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
             { deviceArray = (int**) allocHandle.malloc(acc, sizeof(int*) * dim * dim * dim * dim * dim * dim); },
             N,
             scatterAlloc.getAllocatorHandle()));
@@ -265,17 +266,17 @@ void test3D()
                 alpaka::Vec<Dim, Idx>::all(N),
                 alpaka::Vec<Dim, Idx>::all(N),
                 alpaka::Vec<Dim, Idx>::all(1)},
-            [] ALPAKA_FN_ACC(const Acc& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
             {
-                const auto idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
+                auto const idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
                 deviceArray[idx[0] * dim * dim * dim * dim + idx[1] * dim * dim + idx[0]]
                     = (int*) allocHandle.malloc(acc, sizeof(int));
             },
             N,
             scatterAlloc.getAllocatorHandle()));
 
-    const auto slots = scatterAlloc.getAvailableSlots(dev, queue, sizeof(int));
-    const auto heapInfo = scatterAlloc.getHeapLocations().at(0);
+    auto const slots = scatterAlloc.getAvailableSlots(dev, queue, sizeof(int));
+    auto const heapInfo = scatterAlloc.getHeapLocations().at(0);
     std::cout << alpaka::trait::GetAccName<Acc>::getAccName() << " slots: " << slots << " heap size: " << heapInfo.size
               << '\n';
 
@@ -288,9 +289,9 @@ void test3D()
                 alpaka::Vec<Dim, Idx>::all(N),
                 alpaka::Vec<Dim, Idx>::all(N),
                 alpaka::Vec<Dim, Idx>::all(1)},
-            [] ALPAKA_FN_ACC(const Acc& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, int dim, typename ScatterAllocator::AllocatorHandle allocHandle)
             {
-                const auto idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
+                auto const idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
                 allocHandle.free(acc, deviceArray[idx[0] * dim * dim * dim * dim + idx[1] * dim * dim + idx[0]]);
             },
             N,
@@ -304,7 +305,7 @@ void test3D()
                 alpaka::Vec<Dim, Idx>::all(1),
                 alpaka::Vec<Dim, Idx>::all(1),
                 alpaka::Vec<Dim, Idx>::all(1)},
-            [] ALPAKA_FN_ACC(const Acc& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
             { allocHandle.free(acc, deviceArray); },
             scatterAlloc.getAllocatorHandle()));
 }
diff --git a/tests/policies.cpp b/tests/policies.cpp
index cbb6f453..5e9c9cb3 100644
--- a/tests/policies.cpp
+++ b/tests/policies.cpp
@@ -26,6 +26,7 @@
 */
 
 #include <alpaka/alpaka.hpp>
+
 #include <catch2/catch.hpp>
 #include <mallocMC/mallocMC.hpp>
 
@@ -64,7 +65,7 @@ template<typename ScatterAllocator>
 void run()
 {
     auto const platform = alpaka::Platform<Acc>{};
-    const auto dev = alpaka::getDevByIdx(platform, 0);
+    auto const dev = alpaka::getDevByIdx(platform, 0);
 
     auto queue = alpaka::Queue<Acc, alpaka::Blocking>{dev};
 
@@ -73,7 +74,7 @@ void run()
         queue,
         alpaka::createTaskKernel<Acc>(
             alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{1}, Idx{1}},
-            [] ALPAKA_FN_ACC(const Acc& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
+            [] ALPAKA_FN_ACC(Acc const& acc, typename ScatterAllocator::AllocatorHandle allocHandle)
             {
                 auto* ptr = allocHandle.malloc(acc, sizeof(int) * 1000);
                 allocHandle.free(acc, ptr);
diff --git a/tests/verify_heap.cpp b/tests/verify_heap.cpp
index b6f1a103..7d696a10 100644
--- a/tests/verify_heap.cpp
+++ b/tests/verify_heap.cpp
@@ -32,11 +32,13 @@ constexpr auto ELEMS_PER_SLOT = 750;
 
 #include "verify_heap_config.hpp"
 
-#include <algorithm>
 #include <alpaka/alpaka.hpp>
+
+#include <mallocMC/mallocMC_utils.hpp>
+
+#include <algorithm>
 #include <cstdio>
 #include <iostream>
-#include <mallocMC/mallocMC_utils.hpp>
 #include <sstream>
 #include <typeinfo>
 #include <vector>
@@ -50,8 +52,8 @@ bool verbose = false;
 // the type of the elements to allocate
 using allocElem_t = unsigned long long;
 
-auto run_heap_verification(const size_t, const unsigned, unsigned, const bool) -> bool;
-void parse_cmdline(const int, char**, size_t*, unsigned*, unsigned*, bool*);
+auto run_heap_verification(size_t const, unsigned const, unsigned, bool const) -> bool;
+void parse_cmdline(int const, char**, size_t*, unsigned*, unsigned*, bool*);
 void print_help(char**);
 
 // used to create an empty stream for non-verbose output
@@ -92,7 +94,7 @@ auto main(int argc, char** argv) -> int
 
     parse_cmdline(argc, argv, &heapInMB, &threads, &blocks, &machine_readable);
 
-    const auto correct = run_heap_verification(heapInMB, threads, blocks, machine_readable);
+    auto const correct = run_heap_verification(heapInMB, threads, blocks, machine_readable);
     if(!machine_readable || verbose)
     {
         if(correct)
@@ -120,7 +122,7 @@ auto main(int argc, char** argv) -> int
  * @param blocks will be filled with number of blocks, if given as a parameter
  */
 void parse_cmdline(
-    const int argc,
+    int const argc,
     char** argv,
     size_t* heapInMB,
     unsigned* threads,
@@ -241,22 +243,22 @@ void print_help(char** argv)
 struct Check_content
 {
     ALPAKA_FN_ACC void operator()(
-        const Acc& acc,
+        Acc const& acc,
         allocElem_t** data,
         unsigned long long* counter,
         unsigned long long* globalSum,
-        const size_t nSlots,
+        size_t const nSlots,
         int* correct) const
     {
         unsigned long long sum = 0;
         while(true)
         {
-            const size_t pos = alpaka::atomicOp<alpaka::AtomicAdd>(acc, counter, 1ull);
+            size_t const pos = alpaka::atomicOp<alpaka::AtomicAdd>(acc, counter, 1ull);
             if(pos >= nSlots)
             {
                 break;
             }
-            const size_t offset = pos * ELEMS_PER_SLOT;
+            size_t const offset = pos * ELEMS_PER_SLOT;
             for(size_t i = 0; i < ELEMS_PER_SLOT; ++i)
             {
                 if(static_cast<allocElem_t>(data[pos][i]) != static_cast<allocElem_t>(offset + i))
@@ -290,10 +292,10 @@ struct Check_content
 struct Check_content_fast
 {
     ALPAKA_FN_ACC void operator()(
-        const Acc& acc,
+        Acc const& acc,
         allocElem_t** data,
         unsigned long long* counter,
-        const size_t nSlots,
+        size_t const nSlots,
         int* correct) const
     {
         int c = 1;
@@ -304,7 +306,7 @@ struct Check_content_fast
             {
                 break;
             }
-            const size_t offset = pos * ELEMS_PER_SLOT;
+            size_t const offset = pos * ELEMS_PER_SLOT;
             for(size_t i = 0; i < ELEMS_PER_SLOT; ++i)
             {
                 if(static_cast<allocElem_t>(data[pos][i]) != static_cast<allocElem_t>(offset + i))
@@ -333,7 +335,7 @@ struct Check_content_fast
 struct AllocAll
 {
     ALPAKA_FN_ACC void operator()(
-        const Acc& acc,
+        Acc const& acc,
         allocElem_t** data,
         unsigned long long* counter,
         unsigned long long* globalSum,
@@ -347,7 +349,7 @@ struct AllocAll
                 break;
 
             size_t pos = alpaka::atomicOp<alpaka::AtomicAdd>(acc, counter, 1ull);
-            const size_t offset = pos * ELEMS_PER_SLOT;
+            size_t const offset = pos * ELEMS_PER_SLOT;
             for(size_t i = 0; i < ELEMS_PER_SLOT; ++i)
             {
                 p[i] = static_cast<allocElem_t>(offset + i);
@@ -371,10 +373,10 @@ struct AllocAll
 struct DeallocAll
 {
     ALPAKA_FN_ACC void operator()(
-        const Acc& acc,
+        Acc const& acc,
         allocElem_t** data,
         unsigned long long* counter,
-        const size_t nSlots,
+        size_t const nSlots,
         ScatterAllocator::AllocatorHandle mMC) const
     {
         while(true)
@@ -398,7 +400,7 @@ struct DeallocAll
  */
 struct DamageElement
 {
-    ALPAKA_FN_ACC void operator()(const Acc& acc, allocElem_t** data) const
+    ALPAKA_FN_ACC void operator()(Acc const& acc, allocElem_t** data) const
     {
         data[1][0] = static_cast<allocElem_t>(5 * ELEMS_PER_SLOT - 1);
     }
@@ -419,13 +421,13 @@ struct DamageElement
  * @param threads the number of CUDA threads per block
  */
 void allocate(
-    const Device& dev,
+    Device const& dev,
     Queue& queue,
     alpaka::Buf<Device, allocElem_t*, Dim, Idx>& d_testData,
     unsigned long long* nSlots,
     unsigned long long* sum,
-    const unsigned blocks,
-    const unsigned threads,
+    unsigned const blocks,
+    unsigned const threads,
     ScatterAllocator& mMC)
 {
     dout() << "allocating on device...";
@@ -436,7 +438,7 @@ void allocate(
     alpaka::memset(queue, d_sum, 0, 1);
     alpaka::memset(queue, d_nSlots, 0, 1);
 
-    const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{blocks}, Idx{threads}, Idx{1}};
+    auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{blocks}, Idx{threads}, Idx{1}};
     alpaka::enqueue(
         queue,
         alpaka::createTaskKernel<Acc>(
@@ -448,7 +450,7 @@ void allocate(
             mMC.getAllocatorHandle()));
 
     auto const platform = alpaka::Platform<alpaka::DevCpu>{};
-    const auto hostDev = alpaka::getDevByIdx(platform, 0);
+    auto const hostDev = alpaka::getDevByIdx(platform, 0);
 
     auto h_sum = alpaka::allocBuf<unsigned long long, Idx>(hostDev, Idx{1});
     auto h_nSlots = alpaka::allocBuf<unsigned long long, Idx>(hostDev, Idx{1});
@@ -477,17 +479,17 @@ void allocate(
  * @return true if the verification was successful, false otherwise
  */
 auto verify(
-    const Device& dev,
+    Device const& dev,
     Queue& queue,
     alpaka::Buf<Device, allocElem_t*, Dim, Idx>& d_testData,
-    const unsigned long long nSlots,
-    const unsigned blocks,
-    const unsigned threads) -> bool
+    unsigned long long const nSlots,
+    unsigned const blocks,
+    unsigned const threads) -> bool
 {
     dout() << "verifying on device... ";
 
     auto const platform = alpaka::Platform<alpaka::DevCpu>{};
-    const auto hostDev = alpaka::getDevByIdx(platform, 0);
+    auto const hostDev = alpaka::getDevByIdx(platform, 0);
 
     auto h_correct = alpaka::allocBuf<int, Idx>(hostDev, Idx{1});
     *alpaka::getPtrNative(h_correct) = 1;
@@ -503,7 +505,7 @@ auto verify(
     // can be replaced by a call to check_content_fast,
     // if the gaussian sum (see below) is not used and you
     // want to be a bit faster
-    const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{blocks}, Idx{threads}, Idx{1}};
+    auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{blocks}, Idx{threads}, Idx{1}};
     alpaka::enqueue(
         queue,
         alpaka::createTaskKernel<Acc>(
@@ -518,7 +520,7 @@ auto verify(
     alpaka::memcpy(queue, h_correct, d_correct, 1);
     alpaka::wait(queue);
 
-    const auto correct = *alpaka::getPtrNative(h_correct);
+    auto const correct = *alpaka::getPtrNative(h_correct);
     dout() << (correct ? "done\n" : "failed\n");
     return correct != 0;
 }
@@ -529,22 +531,22 @@ auto verify(
  * for params, see run_heap_verification-internal parameters
  */
 void print_machine_readable(
-    const unsigned pagesize,
-    const unsigned accessblocks,
-    const unsigned regionsize,
-    const unsigned wastefactor,
-    const bool resetfreedpages,
-    const unsigned blocks,
-    const unsigned threads,
-    const unsigned elemsPerSlot,
-    const size_t allocElemSize,
-    const size_t heapSize,
-    const size_t maxSpace,
-    const size_t maxSlots,
-    const unsigned long long usedSlots,
-    const float allocFrac,
-    const size_t wasted,
-    const bool correct)
+    unsigned const pagesize,
+    unsigned const accessblocks,
+    unsigned const regionsize,
+    unsigned const wastefactor,
+    bool const resetfreedpages,
+    unsigned const blocks,
+    unsigned const threads,
+    unsigned const elemsPerSlot,
+    size_t const allocElemSize,
+    size_t const heapSize,
+    size_t const maxSpace,
+    size_t const maxSlots,
+    unsigned long long const usedSlots,
+    float const allocFrac,
+    size_t const wasted,
+    bool const correct)
 {
     std::string sep = ",";
     std::stringstream h;
@@ -616,21 +618,21 @@ void print_machine_readable(
  * @return true if the verification was successful,
  *         false otherwise
  */
-auto run_heap_verification(const size_t heapMB, const unsigned blocks, unsigned threads, const bool machine_readable)
+auto run_heap_verification(size_t const heapMB, unsigned const blocks, unsigned threads, bool const machine_readable)
     -> bool
 {
     auto const platform = alpaka::Platform<Acc>{};
-    const auto dev = alpaka::getDevByIdx(platform, 0);
+    auto const dev = alpaka::getDevByIdx(platform, 0);
     auto queue = Queue{dev};
 
     auto const devProps = alpaka::getAccDevProps<Acc>(dev);
     threads = std::min(static_cast<size_t>(threads), static_cast<size_t>(devProps.m_blockThreadCountMax));
 
-    const size_t heapSize = size_t(1024U * 1024U) * heapMB;
-    const size_t slotSize = sizeof(allocElem_t) * ELEMS_PER_SLOT;
-    const size_t nPointers = (heapSize + slotSize - 1) / slotSize;
-    const size_t maxSlots = heapSize / slotSize;
-    const size_t maxSpace = maxSlots * slotSize + nPointers * sizeof(allocElem_t*);
+    size_t const heapSize = size_t(1024U * 1024U) * heapMB;
+    size_t const slotSize = sizeof(allocElem_t) * ELEMS_PER_SLOT;
+    size_t const nPointers = (heapSize + slotSize - 1) / slotSize;
+    size_t const maxSlots = heapSize / slotSize;
+    size_t const maxSpace = maxSlots * slotSize + nPointers * sizeof(allocElem_t*);
     bool correct = true;
 
     dout() << "CreationPolicy Arguments:\n";
@@ -675,7 +677,7 @@ auto run_heap_verification(const size_t heapMB, const unsigned blocks, unsigned
         // damaging one cell
         dout() << "damaging of element... ";
         {
-            const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{1}, Idx{1}};
+            auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{1}, Idx{1}, Idx{1}};
             alpaka::enqueue(
                 queue,
                 alpaka::createTaskKernel<Acc>(workDiv, DamageElement{}, alpaka::getPtrNative(d_testData)));
@@ -692,7 +694,7 @@ auto run_heap_verification(const size_t heapMB, const unsigned blocks, unsigned
         auto d_dealloc_counter = alpaka::allocBuf<unsigned long long, Idx>(dev, Idx{1});
         alpaka::memset(queue, d_dealloc_counter, 0, 1);
         {
-            const auto workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{blocks}, Idx{threads}, Idx{1}};
+            auto const workDiv = alpaka::WorkDivMembers<Dim, Idx>{Idx{blocks}, Idx{threads}, Idx{1}};
             alpaka::enqueue(
                 queue,
                 alpaka::createTaskKernel<Acc>(
diff --git a/tests/verify_heap_config.hpp b/tests/verify_heap_config.hpp
index 4874b161..1ca2b8bb 100644
--- a/tests/verify_heap_config.hpp
+++ b/tests/verify_heap_config.hpp
@@ -30,6 +30,7 @@
 
 #include <alpaka/alpaka.hpp>
 #include <alpaka/example/ExampleDefaultAcc.hpp>
+
 #include <mallocMC/mallocMC.hpp>
 
 using Dim = alpaka::DimInt<1>;