Skip to content

Commit

Permalink
Fix deadlock for CUDA
Browse files Browse the repository at this point in the history
It has been noticed that Tests/GPU/CNS/Exec/RT hangs with
`amrex.the_arena_init_size=0 amrex.the_arena_release_threshold=0`. The issue
appears to be CUDA host callback functions do not work well with cudaFree in
the main host thread. Note that we don't have any CUDA API calls in the host
callback function. Also note that cudaMall seems work and using a single GPU
stream also works.

A workaround is implemented to avoid cudaFree when there are host callback
functions inside an MFIter loop.
  • Loading branch information
WeiqunZhang committed Jul 22, 2024
1 parent cdacc4d commit 189d7b7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 9 deletions.
13 changes: 11 additions & 2 deletions Src/Base/AMReX_CArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <AMReX_CArena.H>
#include <AMReX_BLassert.H>
#include <AMReX_Gpu.H>
#include <AMReX_MFIter.H>
#include <AMReX_ParallelReduce.H>

#ifdef AMREX_TINY_PROFILING
Expand Down Expand Up @@ -57,7 +58,11 @@ CArena::alloc_protected (std::size_t nbytes)
}
#endif

if (static_cast<Long>(m_used+nbytes) >= arena_info.release_threshold) {
if (static_cast<Long>(m_used+nbytes) >= arena_info.release_threshold
#ifdef AMREX_USE_GPU
&& (MFIter::currentDepth() == 0)
#endif
) {
freeUnused_protected();
}

Expand Down Expand Up @@ -393,7 +398,11 @@ CArena::hasFreeDeviceMemory (std::size_t sz)

std::size_t nbytes = Arena::align(sz == 0 ? 1 : sz);

if (static_cast<Long>(m_used+nbytes) >= arena_info.release_threshold) {
if (static_cast<Long>(m_used+nbytes) >= arena_info.release_threshold
#ifdef AMREX_USE_GPU
&& (MFIter::currentDepth() == 0)
#endif
) {
freeUnused_protected();
}

Expand Down
2 changes: 2 additions & 0 deletions Src/Base/AMReX_MFIter.H
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ public:

static int allowMultipleMFIters (int allow);

static int currentDepth ();

void Finalize ();

protected:
Expand Down
25 changes: 18 additions & 7 deletions Src/Base/AMReX_MFIter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ MFIter::allowMultipleMFIters (int allow)
return allow;
}

int
MFIter::currentDepth ()
{
int r;
#ifdef AMREX_USE_OMP
#pragma omp atomic read
#endif
r = MFIter::depth;
return r;
}

MFIter::MFIter (const FabArrayBase& fabarray_,
unsigned char flags_)
:
Expand Down Expand Up @@ -222,13 +233,6 @@ MFIter::Finalize ()
// mark as invalid
currentIndex = endIndex;

#ifdef AMREX_USE_OMP
#pragma omp master
#endif
{
depth = 0;
}

#ifdef BL_USE_TEAM
if ( ! (flags & NoTeamBarrier) )
ParallelDescriptor::MyTeam().MemoryBarrier();
Expand Down Expand Up @@ -257,6 +261,13 @@ MFIter::Finalize ()
if (m_fa) {
m_fa.reset(nullptr);
}

#ifdef AMREX_USE_OMP
#pragma omp single
#endif
{
depth = 0;
}
}

void
Expand Down

0 comments on commit 189d7b7

Please sign in to comment.