diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 85a14e83fa..fc76fa3b31 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -939,7 +939,8 @@ steps: - label: ":flower_playing_cards: Rising Bubble 3D hybrid invariant (ρe)" key: "gpu_rising_bubble_3d_hybrid_invariant_rhoe" command: - - "nsys profile --trace=nvtx,cuda --output=output/$$BUILDKITE_STEP_KEY/report julia --color=yes --project=examples examples/hybrid/box/bubble_3d_invariant_rhoe.jl" +# - "nsys profile --trace=nvtx,cuda --output=output/$$BUILDKITE_STEP_KEY/report julia --color=yes --project=examples examples/hybrid/box/bubble_3d_invariant_rhoe.jl" + - "julia --color=yes --project=examples examples/hybrid/box/bubble_3d_invariant_rhoe.jl" artifact_paths: - "examples/hybrid/box/output/gpu_bubble_3d_invariant_rhoe/*_low_*" agents: @@ -948,7 +949,8 @@ steps: - label: ":flower_playing_cards: Rising Bubble 3D hybrid invariant (ρe), custom resolution" key: "gpu_rising_bubble_3d_hybrid_invariant_rhoe_custom" command: - - "nsys profile --trace=nvtx,cuda --output=output/$$BUILDKITE_STEP_KEY/report julia --color=yes --project=examples examples/hybrid/box/bubble_3d_invariant_rhoe.jl Float64 custom 1000 1000 4 16 3 0.05 700.0" +# - "nsys profile --trace=nvtx,cuda --output=output/$$BUILDKITE_STEP_KEY/report julia --color=yes --project=examples examples/hybrid/box/bubble_3d_invariant_rhoe.jl Float64 custom 1000 1000 4 16 3 0.05 700.0" + - "julia --color=yes --project=examples examples/hybrid/box/bubble_3d_invariant_rhoe.jl Float64 custom 1000 1000 4 16 3 0.05 700.0" artifact_paths: - "examples/hybrid/box/output/gpu_bubble_3d_invariant_rhoe/*_custom_*" agents: @@ -1097,7 +1099,8 @@ steps: - label: ":computer: MPI Shallow-water 2D sphere barotropic instability alpha0" key: "cpu_mpi_shallowwater_2d_cg_sphere_barotropic_alpha0" command: - - "nsys profile --trace=nvtx,mpi --mpi-impl=openmpi --output=examples/sphere/output/cg_sphere_shallowwater_barotropic_instability_alpha0/report.%q{NPROCS} mpiexec julia --color=yes --project=examples examples/sphere/shallow_water.jl barotropic_instability" +# - "nsys profile --trace=nvtx,mpi --mpi-impl=openmpi --output=examples/sphere/output/cg_sphere_shallowwater_barotropic_instability_alpha0/report.%q{NPROCS} mpiexec julia --color=yes --project=examples examples/sphere/shallow_water.jl barotropic_instability" + - "mpiexec julia --color=yes --project=examples examples/sphere/shallow_water.jl barotropic_instability" artifact_paths: - "examples/sphere/output/cg_sphere_shallowwater_barotropic_instability_alpha0/*" env: @@ -1132,7 +1135,8 @@ steps: key: "cuda_shallowwater_2d_cg_sphere" command: - mkdir -p output/$$BUILDKITE_STEP_KEY - - nsys profile --trace=nvtx,cuda --output=output/$$BUILDKITE_STEP_KEY/report julia --color=yes --project=examples examples/sphere/shallow_water_cuda.jl +# - nsys profile --trace=nvtx,cuda --output=output/$$BUILDKITE_STEP_KEY/report julia --color=yes --project=examples examples/sphere/shallow_water_cuda.jl + - julia --color=yes --project=examples examples/sphere/shallow_water_cuda.jl artifact_paths: - output/cuda_shallowwater_2d_cg_sphere agents: diff --git a/src/DataLayouts/cuda.jl b/src/DataLayouts/cuda.jl index 7f67b5de66..57e3dd4dd5 100644 --- a/src/DataLayouts/cuda.jl +++ b/src/DataLayouts/cuda.jl @@ -97,7 +97,11 @@ end function Base.fill!( dest::IJFH{S, Nij, A}, val, -) where {S, Nij, A <: CUDA.CuArray} +) where { + S, + Nij, + A <: Union{CUDA.CuArray, SubArray{<:Any, <:Any, <:CUDA.CuArray}}, +} _, _, _, _, Nh = size(dest) if Nh > 0 CUDA.@cuda threads = (Nij, Nij) blocks = (Nh, 1) knl_fill!(dest, val) diff --git a/src/Operators/integrals.jl b/src/Operators/integrals.jl index c7c65d0b0f..9dddf35d35 100644 --- a/src/Operators/integrals.jl +++ b/src/Operators/integrals.jl @@ -7,31 +7,80 @@ The definite vertical column integral, `col∫field`, of field `field`. """ -function column_integral_definite!(col∫field::Fields.Field, field::Fields.Field) +column_integral_definite!(col∫field::Fields.Field, field::Fields.Field) = + column_integral_definite!(ClimaComms.device(axes(field)), col∫field, field) + +function column_integral_definite!( + ::ClimaComms.CUDADevice, + col∫field::Fields.Field, + field::Fields.Field, +) + Ni, Nj, _, _, Nh = size(Fields.field_values(col∫field)) + nthreads, nblocks = Spaces._configure_threadblock(Ni * Nj * Nh) + @cuda threads = nthreads blocks = nblocks column_integral_definite_kernel!( + col∫field, + field, + ) +end + +function column_integral_definite_kernel!( + col∫field::Fields.SpectralElementField, + field::Fields.ExtrudedFiniteDifferenceField, +) + idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x + Ni, Nj, _, _, Nh = size(Fields.field_values(field)) + if idx <= Ni * Nj * Nh + i, j, h = Spaces._get_idx((Ni, Nj, Nh), idx) + colfield = Spaces.column(field, i, j, h) + _column_integral_definite!(Spaces.column(col∫field, i, j, h), colfield) + end + return nothing +end + +column_integral_definite_kernel!( + col∫field::Fields.PointField, + field::Fields.FiniteDifferenceField, +) = _column_integral_definite!(col∫field, field) + +function column_integral_definite!( + ::ClimaComms.AbstractCPUDevice, + col∫field::Fields.SpectralElementField, + field::Fields.ExtrudedFiniteDifferenceField, +) Fields.bycolumn(axes(field)) do colidx - column_integral_definite!(col∫field[colidx], field[colidx]) + _column_integral_definite!(col∫field[colidx], field[colidx]) nothing end return nothing end -function column_integral_definite!( +column_integral_definite!( + ::ClimaComms.AbstractCPUDevice, + col∫field::Fields.PointField, + field::Fields.FiniteDifferenceField, +) = _column_integral_definite!(col∫field, field) + +function _column_integral_definite!( col∫field::Fields.PointField, field::Fields.ColumnField, ) - @inbounds col∫field[] = column_integral_definite(field) - return nothing -end + space = axes(field) + Δz_field = Fields.Δz_field(space) + Nv = Spaces.nlevels(space) -function column_integral_definite(field::Fields.ColumnField) - field_data = Fields.field_values(field) - Δz_data = Spaces.Δz_data(axes(field)) - Nv = Spaces.nlevels(axes(field)) - ∫field = zero(eltype(field)) - @inbounds for j in 1:Nv - ∫field += field_data[j] * Δz_data[j] + col∫field[] = 0 + @inbounds for idx in 1:Nv + col∫field[] += + reduction_getindex(field, idx) * reduction_getindex(Δz_field, idx) end - return ∫field + return nothing end +reduction_getindex(column_field, index) = @inbounds getidx( + axes(column_field), + column_field, + Interior(), + index - 1 + left_idx(axes(column_field)), +) + # TODO: add support for indefinite integrals diff --git a/src/Spaces/finitedifference.jl b/src/Spaces/finitedifference.jl index bdffe71dcf..26844fdeb5 100644 --- a/src/Spaces/finitedifference.jl +++ b/src/Spaces/finitedifference.jl @@ -212,14 +212,12 @@ Base.@propagate_inbounds function level( v::PlusHalf, ) @inbounds local_geometry = level(local_geometry_data(space), v.i + 1) - context = ClimaComms.context(space) - PointSpace(context, local_geometry) + PointSpace(local_geometry) end Base.@propagate_inbounds function level( space::CenterFiniteDifferenceSpace, v::Int, ) local_geometry = level(local_geometry_data(space), v) - context = ClimaComms.context(space) - PointSpace(context, local_geometry) + PointSpace(local_geometry) end diff --git a/src/Spaces/pointspace.jl b/src/Spaces/pointspace.jl index d68ab4e13c..4aee1a8ff4 100644 --- a/src/Spaces/pointspace.jl +++ b/src/Spaces/pointspace.jl @@ -7,19 +7,22 @@ local_geometry_data(space::AbstractPointSpace) = space.local_geometry A zero-dimensional space. """ -struct PointSpace{C <: ClimaComms.AbstractCommsContext, LG} <: - AbstractPointSpace - context::C +struct PointSpace{LG <: DataLayouts.Data0D} <: AbstractPointSpace local_geometry::LG end -ClimaComms.device(space::PointSpace) = ClimaComms.device(space.context) -ClimaComms.context(space::PointSpace) = space.context +# not strictly correct, but it is needed for mapreduce +ClimaComms.device(space::PointSpace) = + ClimaComms.device(ClimaComms.context(space)) +ClimaComms.context(space::PointSpace) = + ClimaComms.SingletonCommsContext(ClimaComms.CPUSingleThreaded()) +#= PointSpace(x::Geometry.LocalGeometry) = PointSpace(ClimaComms.CPUSingleThreaded(), x) PointSpace(x::Geometry.AbstractPoint) = PointSpace(ClimaComms.CPUSingleThreaded(), x) +=# function PointSpace(device::ClimaComms.AbstractDevice, x) context = ClimaComms.SingletonCommsContext(device) @@ -34,7 +37,7 @@ function PointSpace( ArrayType = ClimaComms.array_type(ClimaComms.device(context)) local_geometry_data = DataLayouts.DataF{LG}(Array{FT}) local_geometry_data[] = local_geometry - return PointSpace(context, Adapt.adapt(ArrayType, local_geometry_data)) + return PointSpace(Adapt.adapt(ArrayType, local_geometry_data)) end function PointSpace( diff --git a/src/Spaces/spectralelement.jl b/src/Spaces/spectralelement.jl index 96ec37a70e..81e45ca5ff 100644 --- a/src/Spaces/spectralelement.jl +++ b/src/Spaces/spectralelement.jl @@ -644,16 +644,14 @@ Base.@propagate_inbounds slab(space::AbstractSpectralElementSpace, h) = Base.@propagate_inbounds function column(space::SpectralElementSpace1D, i, h) local_geometry = column(local_geometry_data(space), i, h) - context = ClimaComms.context(space) - PointSpace(context, local_geometry) + PointSpace(local_geometry) end Base.@propagate_inbounds column(space::SpectralElementSpace1D, i, j, h) = column(space, i, h) Base.@propagate_inbounds function column(space::SpectralElementSpace2D, i, j, h) local_geometry = column(local_geometry_data(space), i, j, h) - context = ClimaComms.context(space) - PointSpace(context, local_geometry) + PointSpace(local_geometry) end # XXX: this cannot take `space` as it must be constructed beforehand so diff --git a/test/Fields/field.jl b/test/Fields/field.jl index cb711f0306..06e8b62c82 100644 --- a/test/Fields/field.jl +++ b/test/Fields/field.jl @@ -11,6 +11,7 @@ import ClimaCore: using LinearAlgebra: norm using Statistics: mean using ForwardDiff +using CUDA include( joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"), @@ -35,7 +36,7 @@ function spectral_space_2D(; n1 = 1, n2 = 1, Nij = 4) space = Spaces.SpectralElementSpace2D(grid_topology, quad) return space end - +#= @testset "1×1 2D domain space" begin Nij = 4 n1 = n2 = 1 @@ -269,7 +270,7 @@ end @testset "FieldVector array_type" begin device = ClimaComms.device() context = ClimaComms.SingletonCommsContext(device) - space = TU.PointSpace(Float32; context) + space = TU.SpectralElementSpace1D(Float32; context) xcenters = Fields.coordinate_field(space).x y = Fields.FieldVector(x = xcenters) @test ClimaComms.array_type(y) == ClimaComms.array_type(device) @@ -667,13 +668,13 @@ end C .= Ref(zero(eltype(C))) @test all(==(0.0), parent(C)) end +=# function integrate_bycolumn!(∫y, Y) Fields.bycolumn(axes(Y.y)) do colidx Operators.column_integral_definite!(∫y[colidx], Y.y[colidx]) nothing end end - """ convergence_rate(err, Δh) @@ -698,7 +699,7 @@ convergence_rate(err, Δh) = col_copy = similar(y[Fields.ColumnIndex((1, 1), 1)]) return Fields.Field(Fields.field_values(col_copy), axes(col_copy)) end - device = ClimaComms.CPUSingleThreaded() + device = ClimaComms.device() context = ClimaComms.SingletonCommsContext(device) for zelem in (2^2, 2^3, 2^4, 2^5) for space in TU.all_spaces(FT; zelem, context) @@ -711,7 +712,9 @@ convergence_rate(err, Δh) = zcf = Fields.coordinate_field(Y.y).z Δz = Fields.Δz_field(axes(zcf)) Δz_col = Δz[Fields.ColumnIndex((1, 1), 1)] - Δz_1 = parent(Δz_col)[1] + Δz_1 = CUDA.allowscalar() do + parent(Δz_col)[1] + end key = (space, zelem) if !haskey(results, key) results[key] =