From 87d2ce8b121605d7179dad3d2d2e556b28b966b5 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Wed, 23 Aug 2023 11:35:11 -0700 Subject: [PATCH 1/2] Add NVTX monitoring --- Project.toml | 2 + docs/Manifest.toml | 28 ++++-- docs/Project.toml | 2 + perf/Manifest.toml | 28 ++++-- perf/Project.toml | 2 + src/ClimaTimeSteppers.jl | 2 + src/solvers/imex_ark.jl | 186 ++++++++++++++++++++++++--------------- test/Project.toml | 2 + 8 files changed, 173 insertions(+), 79 deletions(-) diff --git a/Project.toml b/Project.toml index 7c214653..ec002c24 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.7.7" [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" +Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def" @@ -13,6 +14,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" +NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index 3bccae5c..9d89a35c 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.8.5" manifest_format = "2.0" -project_hash = "8397e9864ab4184b6af749a437287a4a333e2a96" +project_hash = "f55f49aef4428e79942bc46383a85f4f16ffefaf" [[deps.ADTypes]] git-tree-sha1 = "dcfdf328328f2645531c4ddebf841228aef74130" @@ -201,10 +201,10 @@ uuid = "cf7c7e5a-b407-4c48-9047-11a94a308626" version = "0.2.4" [[deps.ClimaTimeSteppers]] -deps = ["CUDA", "ClimaComms", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "SciMLBase", "StaticArrays"] +deps = ["CUDA", "ClimaComms", "Colors", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "NVTX", "SciMLBase", "StaticArrays"] path = ".." uuid = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" -version = "0.7.6" +version = "0.7.7" [[deps.CloseOpenIntervals]] deps = ["Static", "StaticArrayInterface"] @@ -733,6 +733,12 @@ git-tree-sha1 = "6f2675ef130a300a112286de91973805fcc5ffbc" uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8" version = "2.1.91+0" +[[deps.JuliaNVTXCallbacks_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "af433a10f3942e882d3c671aacb203e006a5808f" +uuid = "9c1d0b0a-7046-5b2e-a33f-ea22f176ac7e" +version = "0.2.1+0" + [[deps.KLU]] deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse_jll"] git-tree-sha1 = "764164ed65c30738750965d55652db9c94c59bfe" @@ -959,9 +965,9 @@ version = "0.1.8" [[deps.MPItrampoline_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] -git-tree-sha1 = "b3dcf8e1c610a10458df3c62038c8cc3a4d6291d" +git-tree-sha1 = "228d5366a7c89b3c81469592b6f4c612db693d50" uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748" -version = "5.3.0+0" +version = "5.3.0+1" [[deps.MacroTools]] deps = ["Markdown", "Random"] @@ -1030,6 +1036,18 @@ git-tree-sha1 = "019f12e9a1a7880459d0173c182e6a99365d7ac1" uuid = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" version = "4.5.1" +[[deps.NVTX]] +deps = ["Colors", "JuliaNVTXCallbacks_jll", "Libdl", "NVTX_jll"] +git-tree-sha1 = "ab760fa11c7f12df87334ea9e0dffc54af3025bc" +uuid = "5da4648a-3479-48b8-97b9-01cb529c0a1f" +version = "0.3.2" + +[[deps.NVTX_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ce3269ed42816bf18d500c9f63418d4b0d9f5a3b" +uuid = "e98f9f5b-d649-5603-91fd-7774390e6439" +version = "3.1.0+2" + [[deps.NaNMath]] deps = ["OpenLibm_jll"] git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" diff --git a/docs/Project.toml b/docs/Project.toml index 791fe510..97e7cc9b 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,12 +4,14 @@ ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" ClimaCorePlots = "cf7c7e5a-b407-4c48-9047-11a94a308626" ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" +Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" diff --git a/perf/Manifest.toml b/perf/Manifest.toml index f4cdc210..ed5df9ba 100644 --- a/perf/Manifest.toml +++ b/perf/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.8.5" manifest_format = "2.0" -project_hash = "007561ccc3a937bb99d6d0831f5143e3c6352aac" +project_hash = "d581b243d2c5ec1604854b01711d49073e60b208" [[deps.ADTypes]] git-tree-sha1 = "dcfdf328328f2645531c4ddebf841228aef74130" @@ -173,10 +173,10 @@ uuid = "d414da3d-4745-48bb-8d80-42e94e092884" version = "0.10.39" [[deps.ClimaTimeSteppers]] -deps = ["CUDA", "ClimaComms", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "SciMLBase", "StaticArrays"] +deps = ["CUDA", "ClimaComms", "Colors", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "NVTX", "SciMLBase", "StaticArrays"] path = ".." uuid = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" -version = "0.7.6" +version = "0.7.7" [[deps.CloseOpenIntervals]] deps = ["Static", "StaticArrayInterface"] @@ -584,6 +584,12 @@ git-tree-sha1 = "6a125e6a4cb391e0b9adbd1afa9e771c2179f8ef" uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a" version = "0.9.23" +[[deps.JuliaNVTXCallbacks_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "af433a10f3942e882d3c671aacb203e006a5808f" +uuid = "9c1d0b0a-7046-5b2e-a33f-ea22f176ac7e" +version = "0.2.1+0" + [[deps.KernelAbstractions]] deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] git-tree-sha1 = "47be64f040a7ece575c2b5f53ca6da7b548d69f4" @@ -763,9 +769,9 @@ version = "0.1.8" [[deps.MPItrampoline_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] -git-tree-sha1 = "b3dcf8e1c610a10458df3c62038c8cc3a4d6291d" +git-tree-sha1 = "228d5366a7c89b3c81469592b6f4c612db693d50" uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748" -version = "5.3.0+0" +version = "5.3.0+1" [[deps.MacroTools]] deps = ["Markdown", "Random"] @@ -823,6 +829,18 @@ git-tree-sha1 = "019f12e9a1a7880459d0173c182e6a99365d7ac1" uuid = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" version = "4.5.1" +[[deps.NVTX]] +deps = ["Colors", "JuliaNVTXCallbacks_jll", "Libdl", "NVTX_jll"] +git-tree-sha1 = "ab760fa11c7f12df87334ea9e0dffc54af3025bc" +uuid = "5da4648a-3479-48b8-97b9-01cb529c0a1f" +version = "0.3.2" + +[[deps.NVTX_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ce3269ed42816bf18d500c9f63418d4b0d9f5a3b" +uuid = "e98f9f5b-d649-5603-91fd-7774390e6439" +version = "3.1.0+2" + [[deps.NaNMath]] deps = ["OpenLibm_jll"] git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" diff --git a/perf/Project.toml b/perf/Project.toml index 88dae0bd..89083e8b 100644 --- a/perf/Project.toml +++ b/perf/Project.toml @@ -5,6 +5,7 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" +Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def" @@ -14,6 +15,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" +NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f" PProf = "e4faabce-9ead-11e9-39d9-4379958e3056" Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" ProfileCanvas = "efd6af41-a80b-495e-886c-e51b0c7d77a3" diff --git a/src/ClimaTimeSteppers.jl b/src/ClimaTimeSteppers.jl index cd14a400..8ddf4914 100644 --- a/src/ClimaTimeSteppers.jl +++ b/src/ClimaTimeSteppers.jl @@ -51,6 +51,8 @@ using LinearOperators using StaticArrays using CUDA import ClimaComms +using Colors +using NVTX export AbstractAlgorithmName, AbstractAlgorithmConstraint, Unconstrained, SSP diff --git a/src/solvers/imex_ark.jl b/src/solvers/imex_ark.jl index 798489e3..c3b7efe6 100644 --- a/src/solvers/imex_ark.jl +++ b/src/solvers/imex_ark.jl @@ -1,3 +1,5 @@ +import NVTX + has_jac(T_imp!) = hasfield(typeof(T_imp!), :Wfact) && hasfield(typeof(T_imp!), :jac_prototype) && @@ -53,86 +55,120 @@ function step_u!(integrator, cache::IMEXARKCache) s = length(b_exp) if !isnothing(T_imp!) && !isnothing(newtons_method) - update!( - newtons_method, - newtons_method_cache, - NewTimeStep(t), - jacobian -> isnothing(γ) ? sdirk_error(name) : T_imp!.Wfact(jacobian, u, p, dt * γ, t), - ) + NVTX.@range "update!" color = colorant"yellow" begin + update!( + newtons_method, + newtons_method_cache, + NewTimeStep(t), + jacobian -> isnothing(γ) ? sdirk_error(name) : T_imp!.Wfact(jacobian, u, p, dt * γ, t), + ) + end end for i in 1:s - t_exp = t + dt * c_exp[i] - t_imp = t + dt * c_imp[i] - - @. U[i] = u + NVTX.@range "stage" payload = i begin + t_exp = t + dt * c_exp[i] + t_imp = t + dt * c_imp[i] - if !isnothing(T_lim!) # Update based on limited tendencies from previous stages - for j in 1:(i - 1) - iszero(a_exp[i, j]) && continue - @. U[i] += dt * a_exp[i, j] * T_lim[j] + NVTX.@range "assign U" color = colorant"yellow" begin + @. U[i] = u end - lim!(U[i], p, t_exp, u) - end - if !isnothing(T_exp!) # Update based on explicit tendencies from previous stages - for j in 1:(i - 1) - iszero(a_exp[i, j]) && continue - @. U[i] += dt * a_exp[i, j] * T_exp[j] + if !isnothing(T_lim!) # Update based on limited tendencies from previous stages + for j in 1:(i - 1) + iszero(a_exp[i, j]) && continue + NVTX.@range "lim update" color = colorant"yellow" begin + @. U[i] += dt * a_exp[i, j] * T_lim[j] + end + end + NVTX.@range "lim" color = colorant"yellow" begin + lim!(U[i], p, t_exp, u) + end end - end - if !isnothing(T_imp!) # Update based on implicit tendencies from previous stages - for j in 1:(i - 1) - iszero(a_imp[i, j]) && continue - @. U[i] += dt * a_imp[i, j] * T_imp[j] + if !isnothing(T_exp!) # Update based on explicit tendencies from previous stages + for j in 1:(i - 1) + iszero(a_exp[i, j]) && continue + NVTX.@range "exp update" color = colorant"yellow" begin + @. U[i] += dt * a_exp[i, j] * T_exp[j] + end + end end - end - dss!(U[i], p, t_exp) + if !isnothing(T_imp!) # Update based on implicit tendencies from previous stages + for j in 1:(i - 1) + iszero(a_imp[i, j]) && continue + NVTX.@range "imp update" color = colorant"yellow" begin + @. U[i] += dt * a_imp[i, j] * T_imp[j] + end + end + end - if !isnothing(T_imp!) && !iszero(a_imp[i, i]) # Implicit solve - @assert !isnothing(newtons_method) - @. temp = U[i] - # TODO: can/should we remove these closures? - implicit_equation_residual! = (residual, Ui) -> begin - T_imp!(residual, Ui, p, t_imp) - @. residual = temp + dt * a_imp[i, i] * residual - Ui + NVTX.@range "dss!" color = colorant"yellow" begin + dss!(U[i], p, t_exp) end - implicit_equation_jacobian! = (jacobian, Ui) -> T_imp!.Wfact(jacobian, Ui, p, dt * a_imp[i, i], t_imp) - solve_newton!( - newtons_method, - newtons_method_cache, - U[i], - implicit_equation_residual!, - implicit_equation_jacobian!, - ) - end - # We do not need to DSS U[i] again because the implicit solve should - # give the same results for redundant columns (as long as the implicit - # tendency only acts in the vertical direction). - - if !all(iszero, a_imp[:, i]) || !iszero(b_imp[i]) - if !isnothing(T_imp!) - if iszero(a_imp[i, i]) - # If its coefficient is 0, T_imp[i] is effectively being - # treated explicitly. - T_imp!(T_imp[i], U[i], p, t_imp) - else - # If T_imp[i] is being treated implicitly, ensure that it - # exactly satisfies the implicit equation. - @. T_imp[i] = (U[i] - temp) / (dt * a_imp[i, i]) + if !isnothing(T_imp!) && !iszero(a_imp[i, i]) # Implicit solve + @assert !isnothing(newtons_method) + NVTX.@range "assign temp" color = colorant"yellow" begin + @. temp = U[i] + end + # TODO: can/should we remove these closures? + implicit_equation_residual! = + (residual, Ui) -> begin + NVTX.@range "call T_imp!" color = colorant"yellow" begin + T_imp!(residual, Ui, p, t_imp) + end + NVTX.@range "residual" color = colorant"yellow" begin + @. residual = temp + dt * a_imp[i, i] * residual - Ui + end + end + implicit_equation_jacobian! = (jacobian, Ui) -> T_imp!.Wfact(jacobian, Ui, p, dt * a_imp[i, i], t_imp) + + NVTX.@range "solve_newton!" color = colorant"yellow" begin + solve_newton!( + newtons_method, + newtons_method_cache, + U[i], + implicit_equation_residual!, + implicit_equation_jacobian!, + ) end end - end - if !all(iszero, a_exp[:, i]) || !iszero(b_exp[i]) - if !isnothing(T_lim!) - T_lim!(T_lim[i], U[i], p, t_exp) + # We do not need to DSS U[i] again because the implicit solve should + # give the same results for redundant columns (as long as the implicit + # tendency only acts in the vertical direction). + + if !all(iszero, a_imp[:, i]) || !iszero(b_imp[i]) + if !isnothing(T_imp!) + if iszero(a_imp[i, i]) + # If its coefficient is 0, T_imp[i] is effectively being + # treated explicitly. + NVTX.@range "call T_imp!" color = colorant"yellow" begin + T_imp!(T_imp[i], U[i], p, t_imp) + end + else + # If T_imp[i] is being treated implicitly, ensure that it + # exactly satisfies the implicit equation. + NVTX.@range "back out T_imp!" color = colorant"yellow" begin + @. T_imp[i] = (U[i] - temp) / (dt * a_imp[i, i]) + end + end + end end - if !isnothing(T_exp!) - T_exp!(T_exp[i], U[i], p, t_exp) + + if !all(iszero, a_exp[:, i]) || !iszero(b_exp[i]) + if !isnothing(T_lim!) + NVTX.@range "call T_lim!" color = colorant"yellow" begin + T_lim!(T_lim[i], U[i], p, t_exp) + end + end + if !isnothing(T_exp!) + NVTX.@range "call T_exp!" color = colorant"yellow" begin + T_exp!(T_exp[i], U[i], p, t_exp) + end + end end end end @@ -140,30 +176,42 @@ function step_u!(integrator, cache::IMEXARKCache) t_final = t + dt if !isnothing(T_lim!) # Update based on limited tendencies from previous stages - @. temp = u + NVTX.@range "assign temp" color = colorant"yellow" begin + @. temp = u + end for j in 1:s iszero(b_exp[j]) && continue - @. temp += dt * b_exp[j] * T_lim[j] + NVTX.@range "update temp" color = colorant"yellow" begin + @. temp += dt * b_exp[j] * T_lim[j] + end + end + NVTX.@range "call lim!" color = colorant"yellow" begin + lim!(temp, p, t_final, u) end - lim!(temp, p, t_final, u) @. u = temp end if !isnothing(T_exp!) # Update based on explicit tendencies from previous stages for j in 1:s iszero(b_exp[j]) && continue - @. u += dt * b_exp[j] * T_exp[j] + NVTX.@range "increment u (exp)" color = colorant"yellow" begin + @. u += dt * b_exp[j] * T_exp[j] + end end end if !isnothing(T_imp!) # Update based on implicit tendencies from previous stages for j in 1:s iszero(b_imp[j]) && continue - @. u += dt * b_imp[j] * T_imp[j] + NVTX.@range "increment u (imp)" color = colorant"yellow" begin + @. u += dt * b_imp[j] * T_imp[j] + end end end - dss!(u, p, t_final) + NVTX.@range "dss!" color = colorant"yellow" begin + dss!(u, p, t_final) + end return u end diff --git a/test/Project.toml b/test/Project.toml index 852fd385..3cb87987 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,6 +4,7 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" +Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def" @@ -13,6 +14,7 @@ Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f" ODEConvergenceTester = "42a5c2e1-f365-4540-8ca5-3684de3ecd95" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" From 35d9c841dab86b6e0cad44d4fa66373bf7e2abc6 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Thu, 24 Aug 2023 11:23:30 -0700 Subject: [PATCH 2/2] Bump patch version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index ec002c24..73dea350 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ClimaTimeSteppers" uuid = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" authors = ["Climate Modeling Alliance"] -version = "0.7.7" +version = "0.7.8" [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"