From 3f7b83e0af288b252d75fbd6705054cd09e20636 Mon Sep 17 00:00:00 2001 From: Tim Kelley Date: Sun, 19 Nov 2023 11:43:23 -0500 Subject: [PATCH] Test 1 for MPGArray structure --- src/Factorizations/mpglu!.jl | 37 +++++++++----------- src/MultiPrecisionArrays.jl | 3 ++ src/Solvers/mpgeslir.jl | 59 ++++++++++++++++++++++++++++++- src/Solvers/mpgmir.jl | 58 +++++++++++++++++++++++++------ src/Structs4MP/MPArray.jl | 3 +- src/Structs4MP/MPGArray.jl | 65 +++++++++++++++++++++++++++++++++++ test/DetailsTest/mplu_test.jl | 3 +- 7 files changed, 194 insertions(+), 34 deletions(-) create mode 100644 src/Structs4MP/MPGArray.jl diff --git a/src/Factorizations/mpglu!.jl b/src/Factorizations/mpglu!.jl index ce328fbe..8f47c34b 100644 --- a/src/Factorizations/mpglu!.jl +++ b/src/Factorizations/mpglu!.jl @@ -10,6 +10,18 @@ GMRES needs. You get a factorization object as output and can use ```\\``` to solve linear systems. """ +function mpglu!(MPGA::MPGArray) +AL=MPGA.AL +AH=MPGA.AH +VStore=MPGA.VStore +KStore=MPGA.KStore +res=MPGA.residual +TL=eltype(AL) +(TL == Float16) ? ALF = hlu!(AL) : ALF = lu!(AL) +MPF=MPGEFact(AH, AL, ALF, VStore, KStore, res, true) +return MPF +end + function mpglu!(MPH::MPArray; basissize=10) AH = MPH.AH TD = eltype(AH) @@ -34,28 +46,13 @@ mpglu(A::Array{TH,2}; TL=Float32, basissize=10) where TH <: Real Combines the constructor of the multiprecision GMRES-ready array with the factorization. -Combines the constructor of the multiprecision array with the -factorization. - -Step 1: build the MPArray -Step 2: factor the low precision copy, allocate storage for the Krylov -method, and return the factorization object +Step 1: build the MPGArray +Step 2: Call mpglu! to build the factorization object """ function mpglu(A::Array{TH,2}; TL=Float32, basissize=10) where TH <: Real -# -# If the high precision matrix is single, the low precision must be half. -# -(TH == Float32) && (TL = Float16) -# -# Unless you tell me otherwise, onthefly is true if low precision is half -# and false if low precision is single. -# -MPA=MPArray(A; TL=TL, onthefly=true) -# -# Factor the low precision copy and allocate storage -# to get the factorization object MPF -# -MPGF=mpglu!(MPA; basissize=basissize) +(TH==Float32) ? TL=Float16 : TL=TL +MPGA=MPGArray(A; basissize=basissize, TL=TL) +MPGF=mpglu!(MPGA) return MPGF end diff --git a/src/MultiPrecisionArrays.jl b/src/MultiPrecisionArrays.jl index 7fbfa1aa..18560b37 100644 --- a/src/MultiPrecisionArrays.jl +++ b/src/MultiPrecisionArrays.jl @@ -14,6 +14,7 @@ using Polyester include("Structs4MP/MPBase.jl") include("Structs4MP/MPArray.jl") +include("Structs4MP/MPGArray.jl") include("Structs4MP/MPHeavy.jl") MPFact = Union{MPLFact,MPHFact} @@ -116,12 +117,14 @@ export MPHArray # export MPLFact export MPGHFact +export MPGEFact export MPFact # # # #export MPEArray export MPFArray +export MPGArray export MPHFact export MPhatv export MPhptv diff --git a/src/Solvers/mpgeslir.jl b/src/Solvers/mpgeslir.jl index 4169dd96..ab40d3ca 100644 --- a/src/Solvers/mpgeslir.jl +++ b/src/Solvers/mpgeslir.jl @@ -8,13 +8,70 @@ This version is analogous to ```A\\b``` and combines the factorization and the solve. You start with MPA=MPArray(A) and then pass MPA to mpgeslir and combine the factorization and the solve. +You can also get the multiprecision factorization directly with +``` +MPF=mplu!(A) +``` +and then pass ```MPF``` to mpgeslir. + Unlike lu, this does overwrite the low precision part of MPA. I use this to get some timing results and it's also convenient if you want to do factor and solve in one statement. + +You can also get this with ```x = MPA\\b```. + +If you set the kwarg ```reporting``` to true you can get the IR +residual history. The output of +``` +x = MPA\\b +``` +or +``` +x=MPF\\b +``` +is the solition. The output of +``` +mout = \\(MPA,b; reporting=true) +``` +or +``` +mout = \\(MPF,b; reporting=true) +``` +is a structure. ```mpout.sol``` is the solution. ```mpout.rhist``` +is the residual history. mpout also contains the datatypes TH for +high precision and TL for low precision. + +## Example +```jldoctest +julia> using MultiPrecisionArrays.Examples + +julia> N=4096; A = I - 800.0 * Gmat(N); b=ones(N); + +julia> MPF=mplu(A); + +julia> mout=\\(MPF, b; reporting=true); + +julia> mout.rhist +6-element Vector{Float64}: + 1.00000e+00 + 5.36483e-02 + 1.57977e-05 + 5.10232e-09 + 7.76756e-12 + 9.90008e-12 + +# Stagnation after four IR iterations + +julia> [mout.TH mout.TL] +1×2 Matrix{DataType}: + Float64 Float32 + +``` """ function mpgeslir(MPA::MPArray, b; reporting = false, verbose = true) -#MPZ=deepcopy(MPA) +# Factor MPA and return Factorization object MPF=mplu!(MPA); +# Call mpgeslir for the solve xi=\(MPF, b; reporting=reporting, verbose=verbose) return xi end diff --git a/src/Solvers/mpgmir.jl b/src/Solvers/mpgmir.jl index 2bf1153e..0c57a200 100644 --- a/src/Solvers/mpgmir.jl +++ b/src/Solvers/mpgmir.jl @@ -21,7 +21,7 @@ debugging pleasure. When you do ``` -mpout = mpgmir(AF, b; reporting=true) +mpout = mpgmir(NAF, b; reporting=true) ``` You get a structure where @@ -39,31 +39,29 @@ Other parts of ```mpout``` are the high and low precisions ```jldoctest julia> using MultiPrecisionArrays.Examples -julia> N=4096; A = I - 800.0 * Gmat(N); +julia> N=4096; A = I - 800.0 * Gmat(N); b=ones(N); julia> MPA=MPArray(A); AF=mpglu!(MPA); -julia> b=ones(N); - julia> mpout=mpgmir(AF, b; reporting=true); julia> x=mpout.sol; norm(b-A*x,Inf) -4.08251e-12 +8.92664e-12 julia> mpout.rhist 4-element Vector{Float64}: 6.40000e+01 - 2.82712e-09 - 5.82236e-11 - 5.93385e-11 + 3.32046e-09 + 1.17624e-10 + 1.17893e-10 # Stagnation after the second iteration julia> mpout.khist 3-element Vector{Int64}: 4 + 5 4 - 4 -# Four Krylovs per iteration. +# 4-5 Krylovs per iteration. julia> mpout.TH Float64 @@ -71,6 +69,46 @@ Float64 julia> mpout.TL Float32 +# Repeat the experiment with low precision TL=Float16 (half) + +julia> MPA=MPArray(A; TL=Float16); AF=mpglu!(MPA); + +# You can use backslash too + +julia> mpout=\\(AF, b; reporting=true); + +julia> x=mpout.sol; norm(b-A*x,Inf) +8.65075e-12 +# Residual is as good as the TL=Float32 case. + +julia> mpout.rhist +5-element Vector{Float64}: + 6.40000e+01 + 2.00140e-03 + 2.05307e-07 + 1.16612e-10 + 1.18166e-10 + +# Stagnaton after 3 iterations + +julia> mpout.khist +5-element Vector{Int64}: + 10 + 10 + 10 + 10 +# The default basissize=10 so we are taking all the GMRES iterations we +# can at each iteration. + +julia> + +julia> mpout.TH +Float64 + +julia> mpout.TL +Float16 + + ``` """ diff --git a/src/Structs4MP/MPArray.jl b/src/Structs4MP/MPArray.jl index 82a2fdca..95301fd7 100644 --- a/src/Structs4MP/MPArray.jl +++ b/src/Structs4MP/MPArray.jl @@ -30,7 +30,7 @@ function MPArray(AH::Array{Float64,2}; TL = Float32, onthefly=nothing) end """ MPArray(AH::Array{Float32,2}; TL = Float16, onthefly=true) -Default single precision constructor for MPArray. +Default single precision constructor for MPArray with TL=Float16 If your high precision array is single, then your low precision array is half (Duh!). @@ -40,7 +40,6 @@ solves with on-the-fly interprecision transfer in this case because the bit of extra accuracy makes a difference and, at least for now, on-the-fly interprecision transfers are cheaper. - Data structures etc are the same as in the double-single/half case, but you don't have the option to go lower than half. diff --git a/src/Structs4MP/MPGArray.jl b/src/Structs4MP/MPGArray.jl new file mode 100644 index 00000000..bed7f53e --- /dev/null +++ b/src/Structs4MP/MPGArray.jl @@ -0,0 +1,65 @@ +""" +MPGArray(AH::Array{Float64,2}; TL = Float32, basissize=10) +Default constructor for MPGArray. Allocate the storage for +GMRES-IR + +C. T. Kelley 2023 + + +The MPGArray data structure is + +``` +struct MPGArray{TH<:AbstractFloat,TL<:AbstractFloat} + AH::Array{TH,2} + AL::Array{TL,2} + VStore::Array{TH,2} + KStore::NTuple + residual::Vector{TH} + onthefly::Bool +end +``` +The constructor just builds an MPGArray with TH=Float64. Set TL=Float16 +to get double/half IR. +""" + +struct MPGArray{TH<:AbstractFloat,TL<:AbstractFloat} + AH::Array{TH,2} + AL::Array{TL,2} + VStore::Array{TH,2} + KStore::NTuple + residual::Vector{TH} + onthefly::Bool +end + + +function MPGArray(AH::Array{Float64,2}; basissize=10, TL=Float32) +AL=TL.(AH) +(m,n)=size(AH) +res=ones(eltype(AH),n) +VStore=zeros(eltype(AH),n,basissize) +KStore=kstore(n,"gmres") +MPGA=MPGArray(AH, AL, VStore, KStore, res, true) +end + + +function MPGArray(AH::Array{Float32,2}; basissize=10, TL=Float16) +AL=TL.(AH) +(m,n)=size(AH) +res=ones(eltype(AH),n) +VStore=zeros(eltype(AH),n,basissize) +KStore=kstore(n,"gmres") +MPGA=MPGArray(AH, AL, VStore, KStore, res, true) +return MPGA +end + +function Xmpglu!(MPGA::MPGArray) +AL=MPGA.AL +AH=MPGA.AH +VStore=MPGA.VStore +KStore=MPGA.KStore +res=MPGA.residual +TL=eltype(AL) +(TL == Float16) ? ALF = hlu!(AL) : ALF = lu!(AL) +MPF=MPGEFact(AH, AL, ALF, VStore, KStore, res, true) +return MPF +end diff --git a/test/DetailsTest/mplu_test.jl b/test/DetailsTest/mplu_test.jl index 2d3c790b..85b9eeaf 100644 --- a/test/DetailsTest/mplu_test.jl +++ b/test/DetailsTest/mplu_test.jl @@ -29,7 +29,7 @@ function mpglu_test() AD=rand(10,10); MPD=MPArray(AD); MPF1=mpglu!(MPD); MPF2=mpglu(AD); eq64=test_eq(MPF1,MPF2) eq64 || println("mpglu t1 fails") -ADx=rand(10,10); MPDx=MPArray(ADx; TL=Float16); +ADx=rand(10,10); MPDx=MPGArray(ADx; TL=Float16); MPF1x=mpglu!(MPDx); MPF2x=mpglu(ADx; TL=Float16); eq64x=test_eq(MPF1x,MPF2x) eq64x || println("mpglu t2 fails") @@ -47,6 +47,7 @@ eqok=true for nf in fieldnames(MPLFact) gx=getfield(MF1,nf); hx =getfield(MF2,nf) eqok= ((gx==hx) && eqok) +eqok || println(nf) end return eqok end