Skip to content

Commit

Permalink
Fix deadlock for CUDA (#4044)
Browse files Browse the repository at this point in the history
It has been noticed that Tests/GPU/CNS/Exec/RT hangs with
`amrex.the_arena_init_size=0 amrex.the_arena_release_threshold=0`. The
issue appears to be CUDA host callback functions do not work well with
cudaFree in the main host thread. Note that we don't have any CUDA API
calls in the host callback function. Also note that cudaMalloc seems to
work and using a single GPU stream also works.

A workaround is implemented to avoid cudaFree inside an MFIter loop.
  • Loading branch information
WeiqunZhang authored Jul 22, 2024
1 parent 1232089 commit 4392b19
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 9 deletions.
13 changes: 11 additions & 2 deletions Src/Base/AMReX_CArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <AMReX_CArena.H>
#include <AMReX_BLassert.H>
#include <AMReX_Gpu.H>
#include <AMReX_MFIter.H>
#include <AMReX_ParallelReduce.H>

#ifdef AMREX_TINY_PROFILING
Expand Down Expand Up @@ -57,7 +58,11 @@ CArena::alloc_protected (std::size_t nbytes)
}
#endif

if (static_cast<Long>(m_used+nbytes) >= arena_info.release_threshold) {
if (static_cast<Long>(m_used+nbytes) >= arena_info.release_threshold
#ifdef AMREX_USE_GPU
&& (MFIter::currentDepth() == 0)
#endif
) {
freeUnused_protected();
}

Expand Down Expand Up @@ -393,7 +398,11 @@ CArena::hasFreeDeviceMemory (std::size_t sz)

std::size_t nbytes = Arena::align(sz == 0 ? 1 : sz);

if (static_cast<Long>(m_used+nbytes) >= arena_info.release_threshold) {
if (static_cast<Long>(m_used+nbytes) >= arena_info.release_threshold
#ifdef AMREX_USE_GPU
&& (MFIter::currentDepth() == 0)
#endif
) {
freeUnused_protected();
}

Expand Down
2 changes: 2 additions & 0 deletions Src/Base/AMReX_MFIter.H
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ public:

static int allowMultipleMFIters (int allow);

static int currentDepth ();

void Finalize ();

protected:
Expand Down
25 changes: 18 additions & 7 deletions Src/Base/AMReX_MFIter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ MFIter::allowMultipleMFIters (int allow)
return allow;
}

int
MFIter::currentDepth ()
{
int r;
#ifdef AMREX_USE_OMP
#pragma omp atomic read
#endif
r = MFIter::depth;
return r;
}

MFIter::MFIter (const FabArrayBase& fabarray_,
unsigned char flags_)
:
Expand Down Expand Up @@ -222,13 +233,6 @@ MFIter::Finalize ()
// mark as invalid
currentIndex = endIndex;

#ifdef AMREX_USE_OMP
#pragma omp master
#endif
{
depth = 0;
}

#ifdef BL_USE_TEAM
if ( ! (flags & NoTeamBarrier) )
ParallelDescriptor::MyTeam().MemoryBarrier();
Expand Down Expand Up @@ -257,6 +261,13 @@ MFIter::Finalize ()
if (m_fa) {
m_fa.reset(nullptr);
}

#ifdef AMREX_USE_OMP
#pragma omp master
#endif
{
depth = 0;
}
}

void
Expand Down

0 comments on commit 4392b19

Please sign in to comment.