Skip to content

Commit

Permalink
Try #1398:
Browse files Browse the repository at this point in the history
  • Loading branch information
bors[bot] authored Jul 28, 2023
2 parents 85ea29c + 7e7cb58 commit 92cf3ff
Showing 1 changed file with 93 additions and 13 deletions.
106 changes: 93 additions & 13 deletions src/Operators/finitedifference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3420,7 +3420,6 @@ function strip_space(bc::StencilBroadcasted{Style}, parent_space) where {Style}
)
end


function Base.copyto!(
out::Field,
bc::Union{
Expand All @@ -3437,23 +3436,104 @@ function Base.copyto!(
Nq = 1
Nh = 1
end
bounds = window_bounds(space, bc)
# executed
@cuda threads = (Nq, Nq) blocks = (Nh,) copyto_stencil_kernel!(
strip_space(out, space),
strip_space(bc, space),
(li, lw, rw, ri) = bounds = window_bounds(space, bc)
ninteriornodes = rw - lw + 1

max_threads = 256
nitemsbdy = Nq * Nq * Nh # # of independent boundary items
nitemsint = ninteriornodes * Nq * Nq * Nh # # of independent interior items
(nthreadsbdy, nblocksbdy) = Spaces._configure_threadblock(nitemsbdy)
(nthreadsint, nblocksint) = Spaces._configure_threadblock(nitemsint)
isnotperiodic = !Topologies.isperiodic(Spaces.vertical_topology(space))
strip_space_out = strip_space(out, space)
strip_space_bc = strip_space(bc, space)
# left window
isnotperiodic &&
@cuda threads = (nthreadsbdy,) blocks = (nblocksbdy,) copyto_stencil_lw_kernel!(
strip_space_out,
strip_space_bc,
axes(out),
bounds,
Nq,
Nh,
)
# interior nodes
@cuda threads = (nthreadsint,) blocks = (nblocksint,) copyto_stencil_interior_kernel!(
strip_space_out,
strip_space_bc,
axes(out),
bounds,
)
ninteriornodes,
Nq,
Nh,
)
# right window
isnotperiodic &&
@cuda threads = (nthreadsbdy,) blocks = (nblocksbdy,) copyto_stencil_rw_kernel!(
strip_space_out,
strip_space_bc,
axes(out),
bounds,
Nq,
Nh,
)
return out
end

function copyto_stencil_kernel!(out, bc, space, bds)
i = threadIdx().x
j = threadIdx().y
h = blockIdx().x
hidx = (i, j, h)
apply_stencil!(space, out, bc, hidx, bds)
function copyto_stencil_lw_kernel!(out, bc, space, bds, Nq, Nh)
gid = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if gid Nq * Nq * Nh
(li, lw, rw, ri) = bds
h = cld(gid, Nq * Nq)
j = cld(gid - (h - 1) * Nq * Nq, Nq)
i = gid - (h - 1) * Nq * Nq - (j - 1) * Nq
hidx = (i, j, h)
lbw = LeftBoundaryWindow{Spaces.left_boundary_name(space)}()
@inbounds for idx in li:(lw - 1)
setidx!(space, out, idx, hidx, getidx(space, bc, lbw, idx, hidx))
end
end
return nothing
end

function copyto_stencil_interior_kernel!(out, bc, space, bds, nnodes, Nq, Nh)
gid = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if gid nnodes * Nq * Nq * Nh
(_, lw, rw, _) = bds
h = cld(gid, nnodes * Nq * Nq)
j = cld(gid - (h - 1) * nnodes * Nq * Nq, nnodes * Nq)
i = cld(
gid - (h - 1) * nnodes * Nq * Nq - (j - 1) * nnodes * Nq,
nnodes,
)
ndidx =
gid - (h - 1) * nnodes * Nq * Nq - (j - 1) * nnodes * Nq -
(i - 1) * nnodes + lw - 1
hidx = (i, j, h)
setidx!(
space,
out,
ndidx,
hidx,
getidx(space, bc, Interior(), ndidx, hidx),
)
end
return nothing
end

function copyto_stencil_rw_kernel!(out, bc, space, bds, Nq, Nh)
gid = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if gid Nq * Nq * Nh
(li, lw, rw, ri) = bds
h = cld(gid, Nq * Nq)
j = cld(gid - (h - 1) * Nq * Nq, Nq)
i = gid - (h - 1) * Nq * Nq - (j - 1) * Nq
hidx = (i, j, h)
rbw = RightBoundaryWindow{Spaces.right_boundary_name(space)}()
@inbounds for idx in (rw + 1):ri
setidx!(space, out, idx, hidx, getidx(space, bc, rbw, idx, hidx))
end
end
return nothing
end

Expand Down

0 comments on commit 92cf3ff

Please sign in to comment.