Skip to content

Commit

Permalink
Fix overflow in ParallelFor(Long n, ...)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderSinn committed Aug 11, 2023
1 parent 6535763 commit d71d11c
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions Src/Base/AMReX_GpuLaunch.H
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,12 @@ namespace Gpu {
makeExecutionConfig (Long N) noexcept
{
ExecutionConfig ec(dim3{}, dim3{});
ec.numBlocks.x = (std::max(N,Long(1)) + MT - 1) / MT;
Long numBlocks = (std::max(N,Long(1)) + MT - 1) / MT;
// ensure that blockDim.x*gridDim.x does not overflow
numBlocks = std::min(numBlocks, Long(std::numeric_limits<unsigned int>::max()/MT));
// ensure that the maximum grid size of 2^31-1 won't be exceeded
numBlocks = std::min(numBlocks, Long(std::numeric_limits<int>::max()));
ec.numBlocks.x = numBlocks;
ec.numThreads.x = MT;
AMREX_ASSERT(MT % Gpu::Device::warp_size == 0);
return ec;
Expand All @@ -167,11 +172,7 @@ namespace Gpu {
ExecutionConfig
makeExecutionConfig (const Box& box) noexcept
{
ExecutionConfig ec(dim3{}, dim3{});
ec.numBlocks.x = (std::max(box.numPts(),Long(1)) + MT - 1) / MT;
ec.numThreads.x = MT;
AMREX_ASSERT(MT % Gpu::Device::warp_size == 0);
return ec;
return makeExecutionConfig<MT>(box.numPts());
}
#endif

Expand Down

0 comments on commit d71d11c

Please sign in to comment.