From e5c3fe0411f344d23a9e93ed88f3ced3aadf18e3 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 10 Jul 2024 12:49:26 -0700 Subject: [PATCH] New parameter amrex::init_snan Uninitialized FArrayBox, BaseFab, PODVectors, Gpu::DeviceVector, etc. will be initialized to signaling NaNs, if this parameter is true. The default value for the new parameter is false unless AMREX_DEBUG or AMREX_TESTING is true. Note that previously this was only performed for FArrayBox, and it was controlled by fab.init_snan. --- .github/workflows/gcc.yml | 2 +- Src/Base/AMReX.H | 5 ++++ Src/Base/AMReX.cpp | 10 ++++++++ Src/Base/AMReX_BaseFab.H | 32 ++++++++++++++++++++++++ Src/Base/AMReX_FArrayBox.cpp | 3 +++ Src/Base/AMReX_MemPool.H | 40 ++++++++++++++++++++++++++++++ Src/Base/AMReX_MemPool.cpp | 23 ++--------------- Src/Base/AMReX_PODVector.H | 48 +++++++++++++++++++++++++++++++++--- 8 files changed, 137 insertions(+), 26 deletions(-) diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 7e4fe8d40bf..5d8c9ad9c30 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -641,7 +641,7 @@ jobs: - name: Run tests run: | cd build - ctest --output-on-failure -R + ctest --output-on-failure test_hdf5: name: GNU@9.3 HDF5 I/O Test [tests] diff --git a/Src/Base/AMReX.H b/Src/Base/AMReX.H index ee6605f6bd8..ba131a9d94e 100644 --- a/Src/Base/AMReX.H +++ b/Src/Base/AMReX.H @@ -66,6 +66,8 @@ namespace amrex extern AMREX_EXPORT ErrorHandler error_handler; extern AMREX_EXPORT int abort_on_unused_inputs; + + extern AMREX_EXPORT bool init_snan; } /** the AMReX "git describe" version */ @@ -199,6 +201,9 @@ namespace amrex [[nodiscard]] int Verbose () noexcept; void SetVerbose (int v) noexcept; + [[nodiscard]] bool InitSNaN () noexcept; + void SetInitSNaN (bool v) noexcept; + // ! Get the entire command line including the executable [[nodiscard]] std::string get_command (); diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp index 0951023a1fe..67a3e39aa58 100644 --- a/Src/Base/AMReX.cpp +++ b/Src/Base/AMReX.cpp @@ -113,6 +113,11 @@ namespace system std::ostream* osout = &std::cout; std::ostream* oserr = &std::cerr; ErrorHandler error_handler = nullptr; +#if defined(AMREX_DEBUG) || defined(AMREX_TESTING) + bool init_snan = true; +#else + bool init_snan = false; +#endif } } @@ -156,6 +161,10 @@ int amrex::Verbose () noexcept { return amrex::system::verbose; } void amrex::SetVerbose (int v) noexcept { amrex::system::verbose = v; } +bool amrex::InitSNaN () noexcept { return amrex::system::init_snan; } + +void amrex::SetInitSNaN (bool v) noexcept { amrex::system::init_snan = v; } + void amrex::SetErrorHandler (amrex::ErrorHandler f) { amrex::system::error_handler = f; } @@ -444,6 +453,7 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, ParmParse pp("amrex"); pp.queryAdd("v", system::verbose); pp.queryAdd("verbose", system::verbose); + pp.queryAdd("init_snan", system::init_snan); } if (system::verbose > 0) { diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index c1212fb7a0a..db9289894ca 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -457,6 +457,15 @@ public: void getVal (T* data, const IntVect& pos, int N, int numcomp) const noexcept; //! Same as above, except that starts at component 0 and copies all comps. void getVal (T* data, const IntVect& pos) const noexcept; + +#if defined(AMREX_USE_GPU) + template || std::is_same_v,int> FOO = 0> + void fill_snan () noexcept; + /** * \brief The setVal functions set sub-regions in the BaseFab to a * constant value. This most general form specifies the sub-box, @@ -1829,6 +1838,15 @@ BaseFab::shiftHalf (int idir, int n_cell) noexcept return *this; } +template +template || std::is_same_v, int> FOO> +void +BaseFab::fill_snan () noexcept +{ + amrex::fill_snan(this->dptr, this->truesize); +} + template template void @@ -1924,6 +1942,20 @@ BaseFab::define () placementNew(this->dptr, this->truesize); amrex::update_fab_stats(this->domain.numPts(), this->truesize, sizeof(T)); + + if constexpr (std::is_same_v || std::is_same_v) { + if (amrex::InitSNaN() && this->truesize > 0) { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && arena()->isDeviceAccessible()) { + this->template fill_snan(); + Gpu::streamSynchronize(); + } else +#endif + { + this->template fill_snan(); + } + } + } } template diff --git a/Src/Base/AMReX_FArrayBox.cpp b/Src/Base/AMReX_FArrayBox.cpp index ecb7fc0f4fc..023cfff2738 100644 --- a/Src/Base/AMReX_FArrayBox.cpp +++ b/Src/Base/AMReX_FArrayBox.cpp @@ -144,6 +144,9 @@ FArrayBox::FArrayBox (const Box& b, int ncomp, Real const* p) noexcept void FArrayBox::initVal () noexcept { + // If amrex::InitSNaN is true, snans have been filled by BaseFab. + if (amrex::InitSNaN()) { return; } + Real * p = dataPtr(); Long s = size(); if (p && s > 0) { diff --git a/Src/Base/AMReX_MemPool.H b/Src/Base/AMReX_MemPool.H index 9a3b92fae04..5a87793aa8f 100644 --- a/Src/Base/AMReX_MemPool.H +++ b/Src/Base/AMReX_MemPool.H @@ -2,8 +2,12 @@ #define BL_MEMPOOL_H #include +#include #include +#include +#include + extern "C" { void amrex_mempool_init (); void amrex_mempool_finalize (); @@ -14,4 +18,40 @@ extern "C" { void amrex_array_init_snan (amrex_real* p, size_t nelems); } +namespace amrex { + template || std::is_same_v, int> FOO = 0> + void fill_snan (T* p, std::size_t nelems) + { + if (p == nullptr || nelems == 0) { return; } +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && run_on == RunOn::Device) { + amrex::ParallelFor(nelems, [=] AMREX_GPU_DEVICE (Long i) noexcept + { + p[i] = std::numeric_limits::signaling_NaN(); + }); + } else +#endif + { + if constexpr (std::is_same_v) { +#ifdef UINT32_MAX + const uint32_t snan = UINT32_C(0x7fa00000); + static_assert(sizeof(float) == sizeof(uint32_t), "MemPool: sizeof float != sizeof uint32_t"); + for (size_t i = 0; i < nelems; ++i) { + std::memcpy(p++, &snan, sizeof(float)); + } +#endif + } else if constexpr (std::is_same_v) { +#ifdef UINT64_MAX + const uint64_t snan = UINT64_C(0x7ff0000080000001); + static_assert(sizeof(double) == sizeof(uint64_t), "MemPool: sizeof double != sizeof uint64_t"); + for (size_t i = 0; i < nelems; ++i) { + std::memcpy(p++, &snan, sizeof(double)); + } +#endif + } + } + } +} + #endif diff --git a/Src/Base/AMReX_MemPool.cpp b/Src/Base/AMReX_MemPool.cpp index e1e26ce7b6d..9b03e6eca4a 100644 --- a/Src/Base/AMReX_MemPool.cpp +++ b/Src/Base/AMReX_MemPool.cpp @@ -114,26 +114,7 @@ void amrex_real_array_init (Real* p, size_t nelems) void amrex_array_init_snan (Real* p, size_t nelems) { -#ifdef BL_USE_DOUBLE - -#ifdef UINT64_MAX - const uint64_t snan = UINT64_C(0x7ff0000080000001); - static_assert(sizeof(double) == sizeof(uint64_t), "MemPool: sizeof double != sizeof uint64_t"); - for (size_t i = 0; i < nelems; ++i) { - std::memcpy(p++, &snan, sizeof(double)); - } -#endif - -#else - -#ifdef UINT32_MAX - const uint32_t snan = UINT32_C(0x7fa00000); - static_assert(sizeof(float) == sizeof(uint32_t), "MemPool: sizeof float != sizeof uint32_t"); - for (size_t i = 0; i < nelems; ++i) { - std::memcpy(p++, &snan, sizeof(float)); - } -#endif - -#endif + amrex::fill_snan(p, nelems); } + } diff --git a/Src/Base/AMReX_PODVector.H b/Src/Base/AMReX_PODVector.H index 86791b09c03..464bb552ee1 100644 --- a/Src/Base/AMReX_PODVector.H +++ b/Src/Base/AMReX_PODVector.H @@ -2,10 +2,12 @@ #define AMREX_PODVECTOR_H_ #include +#include #include #include #include #include +#include #include #include @@ -196,6 +198,33 @@ namespace amrex #endif std::memmove(dst, src, count); } + + template class Allocator> + void maybe_init_snan (T* data, Size count, Allocator const& allocator) + { + amrex::ignore_unused(data, count, allocator); + if constexpr (std::is_same_v> || + std::is_same_v>) { + if (amrex::InitSNaN()) { +#ifdef AMREX_USE_GPU + if constexpr (RunOnGpu>::value) { + amrex::fill_snan(data, count); + Gpu::streamSynchronize(); + return; + } else if constexpr (IsPolymorphicArenaAllocator>::value) { + if (allocator.arena()->isManaged() || + allocator.arena()->isDevice()) + { + amrex::fill_snan(data, count); + Gpu::streamSynchronize(); + return; + } + } +#endif + amrex::fill_snan(data, count); + } + } + } } namespace VectorGrowthStrategy @@ -254,6 +283,7 @@ namespace amrex { if (a_size != 0) { m_data = allocate(m_size); + detail::maybe_init_snan(m_data, m_size, (Allocator const&)(*this)); } } @@ -594,16 +624,18 @@ namespace amrex void resize (size_type a_new_size) { - if (m_capacity < a_new_size) { - reserve(a_new_size); + auto old_size = m_size; + resize_without_init_snan(a_new_size); + if (old_size < a_new_size) { + detail::maybe_init_snan(m_data + old_size, + m_size - old_size, (Allocator const&)(*this)); } - m_size = a_new_size; } void resize (size_type a_new_size, const T& a_val) { size_type old_size = m_size; - resize(a_new_size); + resize_without_init_snan(a_new_size); if (old_size < a_new_size) { detail::uninitializedFillNImpl(m_data + old_size, @@ -738,6 +770,14 @@ namespace amrex m_size = new_size; m_capacity = new_capacity; } + + void resize_without_init_snan (size_type a_new_size) + { + if (m_capacity < a_new_size) { + reserve(a_new_size); + } + m_size = a_new_size; + } }; }