Skip to content

Commit

Permalink
Fix CommonSpaces and MPI
Browse files Browse the repository at this point in the history
The `CommonGrids` module passes down the context to all the grid it
creates. This is a problem for IntervalTopology, because it can only be
created with Singletons.

This commit fixes this issue and adds test checking that we can create
CommonSpaces with MPI and CUDA
  • Loading branch information
Sbozzolo committed Feb 14, 2025
1 parent 53acb3f commit b34a3a9
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 27 deletions.
29 changes: 29 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,35 @@ steps:
agents:
slurm_gpus: 1

- label: "Unit: common spaces with CUDA"
key: "gpu_common_cuda_spaces"
command:
- "julia --color=yes --check-bounds=yes --project=.buildkite test/CommonSpaces/unit_common_spaces.jl"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1

- label: "Unit: common spaces with CUDA and MPI"
key: "gpu_common_cuda_mpi_spaces"
command:
- "srun julia --color=yes --check-bounds=yes --project=.buildkite test/CommonSpaces/unit_common_spaces.jl"
env:
CLIMACOMMS_CONTEXT: "MPI"
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_ntasks: 2

- label: "Unit: common spaces with MPI"
key: "common_cuda_mpi_spaces"
command:
- "srun julia --color=yes --check-bounds=yes --project=.buildkite test/CommonSpaces/unit_common_spaces.jl"
env:
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_ntasks: 2

- label: "Unit: distributed cuda spaces"
key: "gpu_distributed_extruded_cuda_spaces"
command:
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@ ClimaCore.jl Release Notes
========================

main
-------

- Prior to this version, `CommonSpaces` could not be created with
`ClimaComms.MPICommContext`. This is now fixed with PR
[2176](https://github.com/CliMA/ClimaCore.jl/pull/2176).


v0.14.24
-------

- A new `Adapt` wrapper was added, `to_device`, which allows users to adapt datalayouts, spaces, fields, and fieldvectors between the cpu and gpu. PR [2159](https://github.com/CliMA/ClimaCore.jl/pull/2159).
Expand Down
21 changes: 17 additions & 4 deletions src/CommonGrids/CommonGrids.jl
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,10 @@ function ExtrudedCubedSphereGrid(
horizontal_layout_type,
enable_bubble,
)
z_topology = Topologies.IntervalTopology(context, z_mesh)
z_topology = Topologies.IntervalTopology(
ClimaComms.SingletonCommsContext(device),
z_mesh,
)
z_grid = Grids.FiniteDifferenceGrid(z_topology)
return Grids.ExtrudedFiniteDifferenceGrid(
h_grid,
Expand Down Expand Up @@ -307,6 +310,7 @@ function ColumnGrid(
),
) where {FT}
@assert ClimaComms.device(context) == device "The given device and context device do not match."
@assert context isa ClimaComms.SingletonCommsContext "Columns can only be created on Singleton contextes."
z_topology = Topologies.IntervalTopology(context, z_mesh)
return Grids.FiniteDifferenceGrid(z_topology)
end
Expand Down Expand Up @@ -439,7 +443,10 @@ function Box3DGrid(
horizontal_layout_type,
enable_bubble,
)
z_topology = Topologies.IntervalTopology(context, z_mesh)
z_topology = Topologies.IntervalTopology(
ClimaComms.SingletonCommsContext(device),
z_mesh,
)
z_grid = Grids.FiniteDifferenceGrid(z_topology)
return Grids.ExtrudedFiniteDifferenceGrid(
h_grid,
Expand Down Expand Up @@ -542,10 +549,16 @@ function SliceXZGrid(
@assert horizontal_layout_type <: DataLayouts.AbstractData
@assert ClimaComms.device(context) == device "The given device and context device do not match."

h_topology = Topologies.IntervalTopology(context, h_mesh)
h_topology = Topologies.IntervalTopology(
ClimaComms.SingletonCommsContext(device),
h_mesh,
)
h_grid =
Grids.SpectralElementGrid1D(h_topology, quad; horizontal_layout_type)
z_topology = Topologies.IntervalTopology(context, z_mesh)
z_topology = Topologies.IntervalTopology(
ClimaComms.SingletonCommsContext(device),
z_mesh,
)
z_grid = Grids.FiniteDifferenceGrid(z_topology)
return Grids.ExtrudedFiniteDifferenceGrid(
h_grid,
Expand Down
55 changes: 32 additions & 23 deletions test/CommonSpaces/unit_common_spaces.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ using ClimaCore:
DataLayouts
using Test

# Initialize MPI context
ClimaComms.init(ClimaComms.context())

@testset "Convenience constructors" begin
function warp_surface(coord)
# sin²(x) form ground elevation
Expand Down Expand Up @@ -75,14 +78,17 @@ using Test
@test grid isa Grids.SpectralElementGrid2D
@test Grids.topology(grid).mesh isa Meshes.EquiangularCubedSphere

space = ColumnSpace(;
z_elem = 10,
z_min = 0,
z_max = 1,
staggering = Grids.CellCenter(),
)
grid = Spaces.grid(space)
@test grid isa Grids.FiniteDifferenceGrid
# Column spaces are not supported with MPI
if !(ClimaComms.context() isa ClimaComms.MPICommsContext)
space = ColumnSpace(;
z_elem = 10,
z_min = 0,
z_max = 1,
staggering = Grids.CellCenter(),
)
grid = Spaces.grid(space)
@test grid isa Grids.FiniteDifferenceGrid
end

space = Box3DSpace(;
z_elem = 10,
Expand All @@ -104,21 +110,24 @@ using Test
@test grid.horizontal_grid isa Grids.SpectralElementGrid2D
@test Grids.topology(grid.horizontal_grid).mesh isa Meshes.RectilinearMesh

space = SliceXZSpace(;
z_elem = 10,
x_min = 0,
x_max = 1,
z_min = 0,
z_max = 1,
periodic_x = false,
n_quad_points = 4,
x_elem = 4,
staggering = Grids.CellCenter(),
)
grid = Spaces.grid(space)
@test grid isa Grids.ExtrudedFiniteDifferenceGrid
@test grid.horizontal_grid isa Grids.SpectralElementGrid1D
@test Grids.topology(grid.horizontal_grid).mesh isa Meshes.IntervalMesh
# Slices are currently not compatible with GPU
if !(ClimaComms.device() isa ClimaComms.CUDADevice)
space = SliceXZSpace(;
z_elem = 10,
x_min = 0,
x_max = 1,
z_min = 0,
z_max = 1,
periodic_x = false,
n_quad_points = 4,
x_elem = 4,
staggering = Grids.CellCenter(),
)
grid = Spaces.grid(space)
@test grid isa Grids.ExtrudedFiniteDifferenceGrid
@test grid.horizontal_grid isa Grids.SpectralElementGrid1D
@test Grids.topology(grid.horizontal_grid).mesh isa Meshes.IntervalMesh
end

space = RectangleXYSpace(;
x_min = 0,
Expand Down

0 comments on commit b34a3a9

Please sign in to comment.