Skip to content

Commit

Permalink
Merge branch 'develop' into reduceLoops
Browse files Browse the repository at this point in the history
  • Loading branch information
adayton1 authored Dec 13, 2024
2 parents d932859 + b79bf2b commit c44e785
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 68 deletions.
3 changes: 0 additions & 3 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@ The format of this file is based on [Keep a Changelog](http://keepachangelog.com

### Added
- Added CARE\_DEEP\_COPY\_RAW\_PTR configuration option.

### Added
- Added ATOMIC\_SUB, ATOMIC\_LOAD, ATOMIC\_STORE, ATOMIC\_EXCHANGE, and ATOMIC\_CAS macros.
- Added TSAN\_ONLY\_ATOMIC\_\* macros to suppress tsan data race reports. Controlled by CARE\_ENABLE\_TSAN\_ONLY\_ATOMICS configuration option.

### Removed
- Removed Accessor template parameter from host\_device\_ptr.
Expand Down
2 changes: 0 additions & 2 deletions cmake/SetupOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ option(CARE_NEVER_USE_RAJA_PARALLEL_SCAN "Disable RAJA parallel scans in SCAN lo
option(CARE_ENABLE_FUSER_BIN_32 "Enable the 32 register fusible loop bin." OFF)
option(CARE_ENABLE_PARALLEL_LOOP_BACKWARDS "Reverse the start and end for parallel loops." OFF)
option(CARE_ENABLE_STALE_DATA_CHECK "Enable checking for stale host data. Only applicable for GPU (or GPU simulation) builds." OFF)
# TODO: Investigate correctness and performance impact of this option
option(CARE_ENABLE_TSAN_ONLY_ATOMICS "Enable atomics for ThreadSanitizer (TSAN) build." OFF)

# Extra components
cmake_dependent_option(CARE_ENABLE_TESTS "Build CARE tests"
Expand Down
47 changes: 0 additions & 47 deletions src/care/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,51 +25,4 @@ using RAJAAtomic = RAJA::auto_atomic;
#define ATOMIC_EXCHANGE(ref, val) RAJA::atomicExchange<RAJAAtomic>(&(ref), val)
#define ATOMIC_CAS(ref, compare, val) RAJA::atomicCAS<RAJAAtomic>(&(ref), compare, val)

///
/// Macros that use atomics for a ThreadSanitizer build to avoid false
/// positives, but otherwise do a non-atomic operation (for cases where
/// the order of execution does not matter, such as multiple threads
/// setting the same variable to the same value).
///
/// WARNING: The returned previous value for the TSAN_ONLY_ATOMIC_* macros
/// should generally not be used in a parallel context, since
/// another thread may have modified the value at the given memory
/// location in between the current thread's read and write. If the
/// return value is needed, use the ATOMIC_* macros instead.
///
/// TODO: Evaluate whether the compiler actually does the right thing without
/// atomics and whether using atomics detracts from performance.
///
#if defined(CARE_ENABLE_TSAN_ONLY_ATOMICS)

#define TSAN_ONLY_ATOMIC_ADD(ref, inc) ATOMIC_ADD(ref, inc)
#define TSAN_ONLY_ATOMIC_SUB(ref, inc) ATOMIC_SUB(ref, inc)
#define TSAN_ONLY_ATOMIC_MIN(ref, val) ATOMIC_MIN(ref, val)
#define TSAN_ONLY_ATOMIC_MAX(ref, val) ATOMIC_MAX(ref, val)
#define TSAN_ONLY_ATOMIC_OR(ref, val) ATOMIC_OR(ref, val)
#define TSAN_ONLY_ATOMIC_AND(ref, val) ATOMIC_AND(ref, val)
#define TSAN_ONLY_ATOMIC_XOR(ref, val) ATOMIC_XOR(ref, val)
#define TSAN_ONLY_ATOMIC_LOAD(ref) ATOMIC_LOAD(ref)
#define TSAN_ONLY_ATOMIC_STORE(ref, val) ATOMIC_STORE(ref, val)
#define TSAN_ONLY_ATOMIC_EXCHANGE(ref, val) ATOMIC_EXCHANGE(ref, val)
#define TSAN_ONLY_ATOMIC_CAS(ref, compare, val) ATOMIC_CAS(ref, compare, val)

#else

using TSANOnlyAtomic = RAJA::seq_atomic;

#define TSAN_ONLY_ATOMIC_ADD(ref, inc) RAJA::atomicAdd<TSANOnlyAtomic>(&(ref), inc)
#define TSAN_ONLY_ATOMIC_SUB(ref, inc) RAJA::atomicSub<TSANOnlyAtomic>(&(ref), inc)
#define TSAN_ONLY_ATOMIC_MIN(ref, val) RAJA::atomicMin<TSANOnlyAtomic>(&(ref), val)
#define TSAN_ONLY_ATOMIC_MAX(ref, val) RAJA::atomicMax<TSANOnlyAtomic>(&(ref), val)
#define TSAN_ONLY_ATOMIC_OR(ref, val) RAJA::atomicOr<TSANOnlyAtomic>(&(ref), val)
#define TSAN_ONLY_ATOMIC_AND(ref, val) RAJA::atomicAnd<TSANOnlyAtomic>(&(ref), val)
#define TSAN_ONLY_ATOMIC_XOR(ref, val) RAJA::atomicXor<TSANOnlyAtomic>(&(ref), val)
#define TSAN_ONLY_ATOMIC_LOAD(ref) RAJA::atomicLoad<TSANOnlyAtomic>(&(ref))
#define TSAN_ONLY_ATOMIC_STORE(ref, val) RAJA::atomicStore<TSANOnlyAtomic>(&(ref), val)
#define TSAN_ONLY_ATOMIC_EXCHANGE(ref, val) RAJA::atomicExchange<TSANOnlyAtomic>(&(ref), val)
#define TSAN_ONLY_ATOMIC_CAS(ref, compare, val) RAJA::atomicCAS<TSANOnlyAtomic>(&(ref), compare, val)

#endif

#endif // CARE_ATOMIC_H
1 change: 0 additions & 1 deletion src/care/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#cmakedefine01 CARE_ENABLE_PINNED_MEMORY_FOR_SCANS
#cmakedefine CARE_GPU_MEMORY_IS_ACCESSIBLE_ON_CPU
#cmakedefine CARE_ENABLE_STALE_DATA_CHECK
#cmakedefine CARE_ENABLE_TSAN_ONLY_ATOMICS

// Optional dependencies
#cmakedefine01 CARE_HAVE_LLNL_GLOBALID
Expand Down
34 changes: 26 additions & 8 deletions src/care/host_device_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ namespace care {
class host_device_map< key_type, mapped_type, RAJA::seq_exec> {
public:
// default constructor
host_device_map() noexcept = default;
host_device_map() noexcept {};

// constructor taking max number of entries
host_device_map(size_t max_entries) : host_device_map{} {
Expand All @@ -86,7 +86,15 @@ namespace care {
}

// copy constructor
host_device_map(host_device_map const & other) noexcept = default;
host_device_map(host_device_map const & other) noexcept :
m_map(other.m_map),
m_size(other.m_size),
m_iterator(other.m_iterator),
m_next_iterator_index(other.m_next_iterator_index),
m_max_size(other.m_max_size),
m_signal(other.m_signal)
{
}

// move constructor
host_device_map(host_device_map && other) noexcept {
Expand Down Expand Up @@ -276,7 +284,8 @@ namespace care {
inline CARE_HOST_DEVICE mapped_type at(key_type key) const {
int index = care::BinarySearch<key_type>(m_gpu_map.keys(),0,m_size,key);
if (index >= 0) {
return m_gpu_map.values()[index];
const care::local_ptr<mapped_type>& values = m_gpu_map.values();
return values[index];
}
else {
return m_signal;
Expand Down Expand Up @@ -343,13 +352,15 @@ namespace care {
// lookups (valid after a sort() call) are done by binary searching the keys and using the
// index of the located key to grab the appropriate value
inline CARE_DEVICE mapped_type & value_at(int index) const {
return m_gpu_map.values()[index];
const care::local_ptr<mapped_type>& values = m_gpu_map.values();
return values[index];
}

// lookups (valid after a sort() call) are done by binary searching the keys and using the
// index of the located key to grab the appropriate value
inline CARE_DEVICE key_type const & key_at(int index) const {
return m_gpu_map.keys()[index];
const care::local_ptr<key_type>& keys = m_gpu_map.keys();
return keys[index];
}

inline CARE_DEVICE iterator iterator_at(int index) const {
Expand Down Expand Up @@ -387,7 +398,7 @@ namespace care {
{
public:
// default constructor
host_device_map() noexcept = default;
host_device_map() noexcept {};

// constructor
host_device_map(size_t max_entries) : host_device_map{} {
Expand All @@ -406,9 +417,16 @@ namespace care {
}

// copy constructor
host_device_map(host_device_map const & other) noexcept = default;
host_device_map(host_device_map const & other) noexcept :
m_size_ptr(other.m_size_ptr),
m_size(other.m_size),
m_map(other.m_map),
m_max_size(other.m_max_size),
m_signal(other.m_signal)
{
}

// move constructor
// move constructor
host_device_map(host_device_map && other) noexcept {
delete m_size_ptr;
m_size_ptr = other.m_size_ptr;
Expand Down
7 changes: 0 additions & 7 deletions src/care/managed_ptr.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,6 @@ namespace care{
inline managed_ptr<T> make_managed(Args&&... args) {
return chai::make_managed<T>(std::forward<Args>(args)...);
}

template <typename T,
typename F,
typename... Args>
inline managed_ptr<T> make_managed_from_factory(F&& f, Args&&... args) {
return chai::make_managed_from_factory<T>(std::forward<F>(f), std::forward<Args>(args)...);
}
}

#else // defined(CARE_ENABLE_MANAGED_PTR)
Expand Down

0 comments on commit c44e785

Please sign in to comment.