Skip to content

Commit

Permalink
Try #1398:
Browse files Browse the repository at this point in the history
  • Loading branch information
bors[bot] authored Jul 28, 2023
2 parents 85ea29c + 9b890a2 commit d569baf
Showing 1 changed file with 96 additions and 10 deletions.
106 changes: 96 additions & 10 deletions src/Operators/finitedifference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3420,7 +3420,6 @@ function strip_space(bc::StencilBroadcasted{Style}, parent_space) where {Style}
)
end


function Base.copyto!(
out::Field,
bc::Union{
Expand All @@ -3437,23 +3436,110 @@ function Base.copyto!(
Nq = 1
Nh = 1
end
bounds = window_bounds(space, bc)
# executed
@cuda threads = (Nq, Nq) blocks = (Nh,) copyto_stencil_kernel!(
(li, lw, rw, ri) = bounds = window_bounds(space, bc)

# left window
if !Topologies.isperiodic(Spaces.vertical_topology(space))
max_threads = 256
nitems = Nq * Nq * Nh
nthreads = min(max_threads, nitems)
nblocks = cld(nitems, nthreads)
@cuda threads = (nthreads,) blocks = (nblocks,) copyto_stencil_lw_kernel!(
strip_space(out, space),
strip_space(bc, space),
axes(out),
bounds,
Nq,
Nh,
)
end
# interior nodes
ninteriornodes = rw - lw + 1
max_threads = 256
nitems = ninteriornodes * Nq * Nq * Nh
nthreads = min(max_threads, nitems)
nblocks = cld(nitems, nthreads)
@cuda threads = (nthreads,) blocks = (nblocks,) copyto_stencil_interior_kernel!(
strip_space(out, space),
strip_space(bc, space),
axes(out),
bounds,
ninteriornodes,
Nq,
Nh,
)
# right window
if !Topologies.isperiodic(Spaces.vertical_topology(space))
max_threads = 256
nitems = Nq * Nq * Nh
nthreads = min(max_threads, nitems)
nblocks = cld(nitems, nthreads)
@cuda threads = (nthreads,) blocks = (nblocks,) copyto_stencil_rw_kernel!(
strip_space(out, space),
strip_space(bc, space),
axes(out),
bounds,
Nq,
Nh,
)
end
return out
end

function copyto_stencil_kernel!(out, bc, space, bds)
i = threadIdx().x
j = threadIdx().y
h = blockIdx().x
hidx = (i, j, h)
apply_stencil!(space, out, bc, hidx, bds)
function copyto_stencil_lw_kernel!(out, bc, space, bds, Nq, Nh)
gid = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if gid Nq * Nq * Nh
(li, lw, rw, ri) = bds
h = cld(gid, Nq * Nq)
j = cld(gid - (h - 1) * Nq * Nq, Nq)
i = gid - (h - 1) * Nq * Nq - (j - 1) * Nq
hidx = (i, j, h)
lbw = LeftBoundaryWindow{Spaces.left_boundary_name(space)}()
@inbounds for idx in li:(lw - 1)
setidx!(space, out, idx, hidx, getidx(space, bc, lbw, idx, hidx))
end
end
return nothing
end

function copyto_stencil_interior_kernel!(out, bc, space, bds, nnodes, Nq, Nh)
gid = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if gid nnodes * Nq * Nq * Nh
(_, lw, rw, _) = bds
h = cld(gid, nnodes * Nq * Nq)
j = cld(gid - (h - 1) * nnodes * Nq * Nq, nnodes * Nq)
i = cld(
gid - (h - 1) * nnodes * Nq * Nq - (j - 1) * nnodes * Nq,
nnodes,
)
ndidx =
gid - (h - 1) * nnodes * Nq * Nq - (j - 1) * nnodes * Nq -
(i - 1) * nnodes + lw - 1
hidx = (i, j, h)
setidx!(
space,
out,
ndidx,
hidx,
getidx(space, bc, Interior(), ndidx, hidx),
)
end
return nothing
end

function copyto_stencil_rw_kernel!(out, bc, space, bds, Nq, Nh)
gid = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if gid Nq * Nq * Nh
(li, lw, rw, ri) = bds
h = cld(gid, Nq * Nq)
j = cld(gid - (h - 1) * Nq * Nq, Nq)
i = gid - (h - 1) * Nq * Nq - (j - 1) * Nq
hidx = (i, j, h)
rbw = RightBoundaryWindow{Spaces.right_boundary_name(space)}()
@inbounds for idx in (rw + 1):ri
setidx!(space, out, idx, hidx, getidx(space, bc, rbw, idx, hidx))
end
end
return nothing
end

Expand Down

0 comments on commit d569baf

Please sign in to comment.