Skip to content

Commit

Permalink
Apply suggestions to more places
Browse files Browse the repository at this point in the history
  • Loading branch information
WeiqunZhang committed Oct 16, 2024
1 parent bca9c1b commit 7510c31
Showing 1 changed file with 10 additions and 16 deletions.
26 changes: 10 additions & 16 deletions Src/Base/AMReX_GpuLaunchFunctsG.H
Original file line number Diff line number Diff line change
Expand Up @@ -753,21 +753,18 @@ void launch (T const& n, L const& f) noexcept
static_assert(sizeof(T) >= 2);
if (amrex::isEmpty(n)) { return; }
const auto& nec = Gpu::makeNExecutionConfigs<MT>(n);
T ndone = 0;
for (auto const& ec : nec) {
T nleft = n - ndone;
const T start_idx = T(ec.start_idx);
const T nleft = n - start_idx;
AMREX_LAUNCH_KERNEL(MT, ec.nblocks, MT, 0, Gpu::gpuStream(),
[=] AMREX_GPU_DEVICE () noexcept {
// This will not overflow, even though nblocks*MT might.
auto tid = T(MT)*T(blockIdx.x)+T(threadIdx.x);
if (tid < nleft) {
f(tid+ndone);
f(tid+start_idx);
}
});
if (Long(nleft) > ec.ntotalthreads) {
ndone += T(ec.ntotalthreads);
}
}

AMREX_GPU_ERROR_CHECK();
}

Expand All @@ -778,17 +775,16 @@ void launch (BoxND<dim> const& box, L const& f) noexcept
const auto& nec = Gpu::makeNExecutionConfigs<MT>(box);
const BoxIndexerND<dim> indexer(box);
const auto type = box.ixType();
std::uint64_t ndone = 0;
for (auto const& ec : nec) {
const T start_idx = T(ec.start_idx);
AMREX_LAUNCH_KERNEL(MT, ec.nblocks, MT, 0, Gpu::gpuStream(),
[=] AMREX_GPU_DEVICE () noexcept {
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + ndone;
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + start_idx;
if (icell < indexer.numPts()) {
auto iv = indexer.intVect(icell);
f(BoxND<dim>(iv,iv,type));
}
});
ndone += ec.ntotalthreads;
}
AMREX_GPU_ERROR_CHECK();
}
Expand Down Expand Up @@ -824,19 +820,18 @@ ParallelFor (Gpu::KernelInfo const&, BoxND<dim> const& box, L const& f) noexcept
if (amrex::isEmpty(box)) { return; }
const BoxIndexerND<dim> indexer(box);
const auto& nec = Gpu::makeNExecutionConfigs<MT>(box);
std::uint64_t ndone = 0;
for (auto const& ec : nec) {
const T start_idx = T(ec.start_idx);
AMREX_LAUNCH_KERNEL(MT, ec.nblocks, MT, 0, Gpu::gpuStream(),
[=] AMREX_GPU_DEVICE () noexcept {
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + ndone;
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + start_idx;
if (icell < indexer.numPts()) {
auto iv = indexer.intVect(icell);
detail::call_f_intvect_handler(f, iv,
Gpu::Handler(amrex::min((indexer.numPts()-icell+(std::uint64_t)threadIdx.x),
(std::uint64_t)blockDim.x)));
}
});
ndone += ec.ntotalthreads;
}
AMREX_GPU_ERROR_CHECK();
}
Expand All @@ -848,19 +843,18 @@ ParallelFor (Gpu::KernelInfo const&, BoxND<dim> const& box, T ncomp, L const& f)
if (amrex::isEmpty(box)) { return; }
const BoxIndexerND<dim> indexer(box);
const auto& nec = Gpu::makeNExecutionConfigs<MT>(box);
std::uint64_t ndone = 0;
for (auto const& ec : nec) {
const T start_idx = T(ec.start_idx);
AMREX_LAUNCH_KERNEL(MT, ec.nblocks, MT, 0, Gpu::gpuStream(),
[=] AMREX_GPU_DEVICE () noexcept {
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + ndone;
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + start_idx;
if (icell < indexer.numPts()) {
auto iv = indexer.intVect(icell);
detail::call_f_intvect_ncomp_handler(f, iv, ncomp,
Gpu::Handler(amrex::min((indexer.numPts()-icell+(std::uint64_t)threadIdx.x),
(std::uint64_t)blockDim.x)));
}
});
ndone += ec.ntotalthreads;
}
AMREX_GPU_ERROR_CHECK();
}
Expand Down

0 comments on commit 7510c31

Please sign in to comment.