From daa9b83b8542cfd904f6b249d99af0ec909b4a91 Mon Sep 17 00:00:00 2001 From: Danny Taller <66029857+dtaller@users.noreply.github.com> Date: Wed, 4 Dec 2024 11:24:34 -0800 Subject: [PATCH 1/4] fix gpu errors (#306) * Explicitly define constructors instead of using the compiler generated ones (works around compiler bugs) --- src/care/host_device_map.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/care/host_device_map.h b/src/care/host_device_map.h index a63bb386..a24da555 100644 --- a/src/care/host_device_map.h +++ b/src/care/host_device_map.h @@ -66,7 +66,7 @@ namespace care { class host_device_map< key_type, mapped_type, RAJA::seq_exec> { public: // default constructor - host_device_map() noexcept = default; + host_device_map() noexcept {}; // constructor taking max number of entries host_device_map(size_t max_entries) : host_device_map{} { @@ -86,7 +86,15 @@ namespace care { } // copy constructor - host_device_map(host_device_map const & other) noexcept = default; + host_device_map(host_device_map const & other) noexcept : + m_map(other.m_map), + m_size(other.m_size), + m_iterator(other.m_iterator), + m_next_iterator_index(other.m_next_iterator_index), + m_max_size(other.m_max_size), + m_signal(other.m_signal) + { + } // move constructor host_device_map(host_device_map && other) noexcept { @@ -387,7 +395,7 @@ namespace care { { public: // default constructor - host_device_map() noexcept = default; + host_device_map() noexcept {}; // constructor host_device_map(size_t max_entries) : host_device_map{} { @@ -406,9 +414,16 @@ namespace care { } // copy constructor - host_device_map(host_device_map const & other) noexcept = default; + host_device_map(host_device_map const & other) noexcept : + m_size_ptr(other.m_size_ptr), + m_size(other.m_size), + m_map(other.m_map), + m_max_size(other.m_max_size), + m_signal(other.m_signal) + { + } - // move constructor + // move constructor host_device_map(host_device_map && other) noexcept { delete m_size_ptr; m_size_ptr = other.m_size_ptr; From 2cbb58cdc647a814aa238f5ae8becb71d4f9ff4a Mon Sep 17 00:00:00 2001 From: Alan Dayton <6393677+adayton1@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:57:21 -0800 Subject: [PATCH 2/4] Remove wrapper for make_managed_from_factory (#308) --- src/care/managed_ptr.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/care/managed_ptr.h b/src/care/managed_ptr.h index ea3b2a43..7e6dde1a 100644 --- a/src/care/managed_ptr.h +++ b/src/care/managed_ptr.h @@ -25,13 +25,6 @@ namespace care{ inline managed_ptr make_managed(Args&&... args) { return chai::make_managed(std::forward(args)...); } - - template - inline managed_ptr make_managed_from_factory(F&& f, Args&&... args) { - return chai::make_managed_from_factory(std::forward(f), std::forward(args)...); - } } #else // defined(CARE_ENABLE_MANAGED_PTR) From 68f189f0ea4167681368847f21ac7a2f8f8d15a9 Mon Sep 17 00:00:00 2001 From: Ben Liu <38140930+liu15@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:28:22 -0800 Subject: [PATCH 3/4] Fix for clang query (#309) --- src/care/host_device_map.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/care/host_device_map.h b/src/care/host_device_map.h index a24da555..8a41dc0f 100644 --- a/src/care/host_device_map.h +++ b/src/care/host_device_map.h @@ -284,7 +284,8 @@ namespace care { inline CARE_HOST_DEVICE mapped_type at(key_type key) const { int index = care::BinarySearch(m_gpu_map.keys(),0,m_size,key); if (index >= 0) { - return m_gpu_map.values()[index]; + const care::local_ptr& values = m_gpu_map.values(); + return values[index]; } else { return m_signal; @@ -351,13 +352,15 @@ namespace care { // lookups (valid after a sort() call) are done by binary searching the keys and using the // index of the located key to grab the appropriate value inline CARE_DEVICE mapped_type & value_at(int index) const { - return m_gpu_map.values()[index]; + const care::local_ptr& values = m_gpu_map.values(); + return values[index]; } // lookups (valid after a sort() call) are done by binary searching the keys and using the // index of the located key to grab the appropriate value inline CARE_DEVICE key_type const & key_at(int index) const { - return m_gpu_map.keys()[index]; + const care::local_ptr& keys = m_gpu_map.keys(); + return keys[index]; } inline CARE_DEVICE iterator iterator_at(int index) const { From b79bf2b31f78fc556923be10b5ef5f2556f0ad95 Mon Sep 17 00:00:00 2001 From: Alan Dayton <6393677+adayton1@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:18:17 -0800 Subject: [PATCH 4/4] Remove TSAN_ONLY_ATOMIC stuff (#310) --- RELEASE_NOTES.md | 3 --- cmake/SetupOptions.cmake | 2 -- src/care/atomic.h | 47 ---------------------------------------- src/care/config.h.in | 1 - 4 files changed, 53 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index d3e45687..0be796f2 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -16,10 +16,7 @@ The format of this file is based on [Keep a Changelog](http://keepachangelog.com ### Added - Added CARE\_DEEP\_COPY\_RAW\_PTR configuration option. - -### Added - Added ATOMIC\_SUB, ATOMIC\_LOAD, ATOMIC\_STORE, ATOMIC\_EXCHANGE, and ATOMIC\_CAS macros. -- Added TSAN\_ONLY\_ATOMIC\_\* macros to suppress tsan data race reports. Controlled by CARE\_ENABLE\_TSAN\_ONLY\_ATOMICS configuration option. ### Removed - Removed Accessor template parameter from host\_device\_ptr. diff --git a/cmake/SetupOptions.cmake b/cmake/SetupOptions.cmake index d70b436c..2ef13d5f 100644 --- a/cmake/SetupOptions.cmake +++ b/cmake/SetupOptions.cmake @@ -29,8 +29,6 @@ option(CARE_NEVER_USE_RAJA_PARALLEL_SCAN "Disable RAJA parallel scans in SCAN lo option(CARE_ENABLE_FUSER_BIN_32 "Enable the 32 register fusible loop bin." OFF) option(CARE_ENABLE_PARALLEL_LOOP_BACKWARDS "Reverse the start and end for parallel loops." OFF) option(CARE_ENABLE_STALE_DATA_CHECK "Enable checking for stale host data. Only applicable for GPU (or GPU simulation) builds." OFF) -# TODO: Investigate correctness and performance impact of this option -option(CARE_ENABLE_TSAN_ONLY_ATOMICS "Enable atomics for ThreadSanitizer (TSAN) build." OFF) # Extra components cmake_dependent_option(CARE_ENABLE_TESTS "Build CARE tests" diff --git a/src/care/atomic.h b/src/care/atomic.h index b4ab7efb..74f6aabc 100644 --- a/src/care/atomic.h +++ b/src/care/atomic.h @@ -25,51 +25,4 @@ using RAJAAtomic = RAJA::auto_atomic; #define ATOMIC_EXCHANGE(ref, val) RAJA::atomicExchange(&(ref), val) #define ATOMIC_CAS(ref, compare, val) RAJA::atomicCAS(&(ref), compare, val) -/// -/// Macros that use atomics for a ThreadSanitizer build to avoid false -/// positives, but otherwise do a non-atomic operation (for cases where -/// the order of execution does not matter, such as multiple threads -/// setting the same variable to the same value). -/// -/// WARNING: The returned previous value for the TSAN_ONLY_ATOMIC_* macros -/// should generally not be used in a parallel context, since -/// another thread may have modified the value at the given memory -/// location in between the current thread's read and write. If the -/// return value is needed, use the ATOMIC_* macros instead. -/// -/// TODO: Evaluate whether the compiler actually does the right thing without -/// atomics and whether using atomics detracts from performance. -/// -#if defined(CARE_ENABLE_TSAN_ONLY_ATOMICS) - -#define TSAN_ONLY_ATOMIC_ADD(ref, inc) ATOMIC_ADD(ref, inc) -#define TSAN_ONLY_ATOMIC_SUB(ref, inc) ATOMIC_SUB(ref, inc) -#define TSAN_ONLY_ATOMIC_MIN(ref, val) ATOMIC_MIN(ref, val) -#define TSAN_ONLY_ATOMIC_MAX(ref, val) ATOMIC_MAX(ref, val) -#define TSAN_ONLY_ATOMIC_OR(ref, val) ATOMIC_OR(ref, val) -#define TSAN_ONLY_ATOMIC_AND(ref, val) ATOMIC_AND(ref, val) -#define TSAN_ONLY_ATOMIC_XOR(ref, val) ATOMIC_XOR(ref, val) -#define TSAN_ONLY_ATOMIC_LOAD(ref) ATOMIC_LOAD(ref) -#define TSAN_ONLY_ATOMIC_STORE(ref, val) ATOMIC_STORE(ref, val) -#define TSAN_ONLY_ATOMIC_EXCHANGE(ref, val) ATOMIC_EXCHANGE(ref, val) -#define TSAN_ONLY_ATOMIC_CAS(ref, compare, val) ATOMIC_CAS(ref, compare, val) - -#else - -using TSANOnlyAtomic = RAJA::seq_atomic; - -#define TSAN_ONLY_ATOMIC_ADD(ref, inc) RAJA::atomicAdd(&(ref), inc) -#define TSAN_ONLY_ATOMIC_SUB(ref, inc) RAJA::atomicSub(&(ref), inc) -#define TSAN_ONLY_ATOMIC_MIN(ref, val) RAJA::atomicMin(&(ref), val) -#define TSAN_ONLY_ATOMIC_MAX(ref, val) RAJA::atomicMax(&(ref), val) -#define TSAN_ONLY_ATOMIC_OR(ref, val) RAJA::atomicOr(&(ref), val) -#define TSAN_ONLY_ATOMIC_AND(ref, val) RAJA::atomicAnd(&(ref), val) -#define TSAN_ONLY_ATOMIC_XOR(ref, val) RAJA::atomicXor(&(ref), val) -#define TSAN_ONLY_ATOMIC_LOAD(ref) RAJA::atomicLoad(&(ref)) -#define TSAN_ONLY_ATOMIC_STORE(ref, val) RAJA::atomicStore(&(ref), val) -#define TSAN_ONLY_ATOMIC_EXCHANGE(ref, val) RAJA::atomicExchange(&(ref), val) -#define TSAN_ONLY_ATOMIC_CAS(ref, compare, val) RAJA::atomicCAS(&(ref), compare, val) - -#endif - #endif // CARE_ATOMIC_H diff --git a/src/care/config.h.in b/src/care/config.h.in index 24e99a64..c26353ea 100644 --- a/src/care/config.h.in +++ b/src/care/config.h.in @@ -27,7 +27,6 @@ #cmakedefine01 CARE_ENABLE_PINNED_MEMORY_FOR_SCANS #cmakedefine CARE_GPU_MEMORY_IS_ACCESSIBLE_ON_CPU #cmakedefine CARE_ENABLE_STALE_DATA_CHECK -#cmakedefine CARE_ENABLE_TSAN_ONLY_ATOMICS // Optional dependencies #cmakedefine01 CARE_HAVE_LLNL_GLOBALID