From 3f7b83e0af288b252d75fbd6705054cd09e20636 Mon Sep 17 00:00:00 2001
From: Tim Kelley <ctk@ncsu.edu>
Date: Sun, 19 Nov 2023 11:43:23 -0500
Subject: [PATCH] Test 1 for MPGArray structure

---
 src/Factorizations/mpglu!.jl  | 37 +++++++++-----------
 src/MultiPrecisionArrays.jl   |  3 ++
 src/Solvers/mpgeslir.jl       | 59 ++++++++++++++++++++++++++++++-
 src/Solvers/mpgmir.jl         | 58 +++++++++++++++++++++++++------
 src/Structs4MP/MPArray.jl     |  3 +-
 src/Structs4MP/MPGArray.jl    | 65 +++++++++++++++++++++++++++++++++++
 test/DetailsTest/mplu_test.jl |  3 +-
 7 files changed, 194 insertions(+), 34 deletions(-)
 create mode 100644 src/Structs4MP/MPGArray.jl

diff --git a/src/Factorizations/mpglu!.jl b/src/Factorizations/mpglu!.jl
index ce328fbe..8f47c34b 100644
--- a/src/Factorizations/mpglu!.jl
+++ b/src/Factorizations/mpglu!.jl
@@ -10,6 +10,18 @@ GMRES needs.
 You get a factorization
 object as output and can use ```\\``` to solve linear systems.
 """
+function mpglu!(MPGA::MPGArray)
+AL=MPGA.AL
+AH=MPGA.AH
+VStore=MPGA.VStore
+KStore=MPGA.KStore
+res=MPGA.residual
+TL=eltype(AL)
+(TL == Float16) ? ALF = hlu!(AL) : ALF = lu!(AL)
+MPF=MPGEFact(AH, AL, ALF, VStore, KStore, res, true)
+return MPF
+end
+
 function mpglu!(MPH::MPArray; basissize=10)
 AH = MPH.AH
 TD = eltype(AH)
@@ -34,28 +46,13 @@ mpglu(A::Array{TH,2}; TL=Float32, basissize=10) where TH <: Real
 Combines the constructor of the multiprecision GMRES-ready array with the
 factorization.
 
-Combines the constructor of the multiprecision array with the
-factorization.
-
-Step 1: build the MPArray
-Step 2: factor the low precision copy, allocate storage for the Krylov
-method, and return the factorization object
+Step 1: build the MPGArray
+Step 2: Call mpglu! to build the factorization object
 """
 function mpglu(A::Array{TH,2}; TL=Float32, basissize=10) where TH <: Real
-#
-# If the high precision matrix is single, the low precision must be half.
-#
-(TH == Float32) && (TL = Float16)
-#
-# Unless you tell me otherwise, onthefly is true if low precision is half
-# and false if low precision is single.
-#
-MPA=MPArray(A; TL=TL, onthefly=true)
-#
-# Factor the low precision copy and allocate storage 
-# to get the factorization object MPF
-#
-MPGF=mpglu!(MPA; basissize=basissize)
+(TH==Float32) ? TL=Float16 : TL=TL
+MPGA=MPGArray(A; basissize=basissize, TL=TL)
+MPGF=mpglu!(MPGA)
 return MPGF
 end
 
diff --git a/src/MultiPrecisionArrays.jl b/src/MultiPrecisionArrays.jl
index 7fbfa1aa..18560b37 100644
--- a/src/MultiPrecisionArrays.jl
+++ b/src/MultiPrecisionArrays.jl
@@ -14,6 +14,7 @@ using Polyester
 
 include("Structs4MP/MPBase.jl")
 include("Structs4MP/MPArray.jl")
+include("Structs4MP/MPGArray.jl")
 include("Structs4MP/MPHeavy.jl")
 
 MPFact = Union{MPLFact,MPHFact}
@@ -116,12 +117,14 @@ export MPHArray
 #
 export MPLFact
 export MPGHFact
+export MPGEFact
 export MPFact
 #
 #
 #
 #export MPEArray
 export MPFArray
+export MPGArray
 export MPHFact
 export MPhatv
 export MPhptv
diff --git a/src/Solvers/mpgeslir.jl b/src/Solvers/mpgeslir.jl
index 4169dd96..ab40d3ca 100644
--- a/src/Solvers/mpgeslir.jl
+++ b/src/Solvers/mpgeslir.jl
@@ -8,13 +8,70 @@ This version is analogous to ```A\\b``` and combines the factorization
 and the solve. You start with MPA=MPArray(A) and then pass MPA
 to mpgeslir and combine the factorization and the solve. 
 
+You can also get the multiprecision factorization directly with
+```
+MPF=mplu!(A)
+```
+and then pass ```MPF``` to mpgeslir.
+
 Unlike lu, this does overwrite the low precision part of MPA.
 I use this to get some timing results and it's also convenient
 if you want to do factor and solve in one statement. 
+
+You can also get this with ```x = MPA\\b```.
+
+If you set the kwarg ```reporting``` to true you can get the IR
+residual history. The output of 
+```
+x = MPA\\b
+```
+or
+```
+x=MPF\\b
+```
+is the solition. The output of 
+```
+mout = \\(MPA,b; reporting=true)
+```
+or
+```
+mout = \\(MPF,b; reporting=true)
+```
+is a structure. ```mpout.sol``` is the solution. ```mpout.rhist```
+is the residual history. mpout also contains the datatypes TH for
+high precision and TL for low precision.
+
+## Example
+```jldoctest
+julia> using MultiPrecisionArrays.Examples
+
+julia> N=4096; A = I - 800.0 * Gmat(N); b=ones(N);
+
+julia> MPF=mplu(A);
+
+julia> mout=\\(MPF, b; reporting=true);
+
+julia> mout.rhist
+6-element Vector{Float64}:
+ 1.00000e+00
+ 5.36483e-02
+ 1.57977e-05
+ 5.10232e-09
+ 7.76756e-12
+ 9.90008e-12
+
+# Stagnation after four IR iterations
+
+julia> [mout.TH mout.TL]
+1×2 Matrix{DataType}:
+ Float64  Float32
+
+```
 """
 function mpgeslir(MPA::MPArray, b; reporting = false, verbose = true)
-#MPZ=deepcopy(MPA)
+# Factor MPA and return Factorization object
 MPF=mplu!(MPA);
+# Call mpgeslir for the solve
 xi=\(MPF, b; reporting=reporting, verbose=verbose)
 return xi
 end
diff --git a/src/Solvers/mpgmir.jl b/src/Solvers/mpgmir.jl
index 2bf1153e..0c57a200 100644
--- a/src/Solvers/mpgmir.jl
+++ b/src/Solvers/mpgmir.jl
@@ -21,7 +21,7 @@ debugging pleasure.
 
 When you do
 ```
-mpout = mpgmir(AF, b; reporting=true)
+mpout = mpgmir(NAF, b; reporting=true)
 ```
 You get a structure where
 
@@ -39,31 +39,29 @@ Other parts of ```mpout``` are the high and low precisions
 ```jldoctest
 julia> using MultiPrecisionArrays.Examples
 
-julia> N=4096; A = I - 800.0 * Gmat(N);
+julia> N=4096; A = I - 800.0 * Gmat(N); b=ones(N);
 
 julia> MPA=MPArray(A); AF=mpglu!(MPA);
 
-julia> b=ones(N); 
-
 julia> mpout=mpgmir(AF, b; reporting=true);
 
 julia> x=mpout.sol; norm(b-A*x,Inf)
-4.08251e-12
+8.92664e-12
 
 julia> mpout.rhist
 4-element Vector{Float64}:
  6.40000e+01
- 2.82712e-09
- 5.82236e-11
- 5.93385e-11
+ 3.32046e-09
+ 1.17624e-10
+ 1.17893e-10
 # Stagnation after the second iteration
 
 julia> mpout.khist
 3-element Vector{Int64}:
  4
+ 5
  4
- 4
-# Four Krylovs per iteration.
+# 4-5 Krylovs per iteration.
 
 julia> mpout.TH
 Float64
@@ -71,6 +69,46 @@ Float64
 julia> mpout.TL
 Float32
 
+# Repeat the experiment with low precision TL=Float16 (half)
+
+julia> MPA=MPArray(A; TL=Float16); AF=mpglu!(MPA); 
+
+# You can use backslash too
+
+julia> mpout=\\(AF, b; reporting=true);
+
+julia> x=mpout.sol; norm(b-A*x,Inf)
+8.65075e-12
+# Residual is as good as the TL=Float32 case.
+
+julia> mpout.rhist
+5-element Vector{Float64}:
+ 6.40000e+01
+ 2.00140e-03
+ 2.05307e-07
+ 1.16612e-10
+ 1.18166e-10
+
+# Stagnaton after 3 iterations
+
+julia> mpout.khist
+5-element Vector{Int64}:
+ 10
+ 10
+ 10
+ 10
+# The default basissize=10 so we are taking all the GMRES iterations we
+# can at each iteration.
+
+julia> 
+
+julia> mpout.TH
+Float64
+
+julia> mpout.TL
+Float16
+
+
 ```
 
 """
diff --git a/src/Structs4MP/MPArray.jl b/src/Structs4MP/MPArray.jl
index 82a2fdca..95301fd7 100644
--- a/src/Structs4MP/MPArray.jl
+++ b/src/Structs4MP/MPArray.jl
@@ -30,7 +30,7 @@ function MPArray(AH::Array{Float64,2}; TL = Float32, onthefly=nothing)
 end
 """
 MPArray(AH::Array{Float32,2}; TL = Float16, onthefly=true)
-Default single precision constructor for MPArray. 
+Default single precision constructor for MPArray with TL=Float16
 
 If your high precision array is single, then your low precision
 array is half (Duh!). 
@@ -40,7 +40,6 @@ solves with on-the-fly interprecision transfer in this case because
 the bit of extra accuracy makes a difference and, at least for now,
 on-the-fly interprecision transfers are cheaper.
 
-
 Data structures etc are the same as in the 
 double-single/half case, but you don't have the option to go lower than
 half.
diff --git a/src/Structs4MP/MPGArray.jl b/src/Structs4MP/MPGArray.jl
new file mode 100644
index 00000000..bed7f53e
--- /dev/null
+++ b/src/Structs4MP/MPGArray.jl
@@ -0,0 +1,65 @@
+"""
+MPGArray(AH::Array{Float64,2}; TL = Float32, basissize=10)
+Default constructor for MPGArray. Allocate the storage for 
+GMRES-IR
+
+C. T. Kelley 2023
+
+
+The MPGArray data structure is
+
+```
+struct MPGArray{TH<:AbstractFloat,TL<:AbstractFloat}
+    AH::Array{TH,2}
+    AL::Array{TL,2}
+    VStore::Array{TH,2}
+    KStore::NTuple
+    residual::Vector{TH}
+    onthefly::Bool
+end
+```
+The constructor just builds an MPGArray with TH=Float64. Set TL=Float16
+to get double/half IR.
+"""
+
+struct MPGArray{TH<:AbstractFloat,TL<:AbstractFloat}
+    AH::Array{TH,2}
+    AL::Array{TL,2}
+    VStore::Array{TH,2}
+    KStore::NTuple
+    residual::Vector{TH}
+    onthefly::Bool
+end
+
+
+function MPGArray(AH::Array{Float64,2}; basissize=10, TL=Float32)
+AL=TL.(AH)
+(m,n)=size(AH)
+res=ones(eltype(AH),n)
+VStore=zeros(eltype(AH),n,basissize)
+KStore=kstore(n,"gmres")
+MPGA=MPGArray(AH, AL, VStore, KStore, res, true)
+end
+
+
+function MPGArray(AH::Array{Float32,2}; basissize=10, TL=Float16)
+AL=TL.(AH)
+(m,n)=size(AH)
+res=ones(eltype(AH),n)
+VStore=zeros(eltype(AH),n,basissize)
+KStore=kstore(n,"gmres")
+MPGA=MPGArray(AH, AL, VStore, KStore, res, true)
+return MPGA
+end
+
+function Xmpglu!(MPGA::MPGArray)
+AL=MPGA.AL
+AH=MPGA.AH
+VStore=MPGA.VStore
+KStore=MPGA.KStore
+res=MPGA.residual
+TL=eltype(AL)
+(TL == Float16) ? ALF = hlu!(AL) : ALF = lu!(AL)
+MPF=MPGEFact(AH, AL, ALF, VStore, KStore, res, true)
+return MPF
+end
diff --git a/test/DetailsTest/mplu_test.jl b/test/DetailsTest/mplu_test.jl
index 2d3c790b..85b9eeaf 100644
--- a/test/DetailsTest/mplu_test.jl
+++ b/test/DetailsTest/mplu_test.jl
@@ -29,7 +29,7 @@ function mpglu_test()
 AD=rand(10,10); MPD=MPArray(AD); MPF1=mpglu!(MPD); MPF2=mpglu(AD);
 eq64=test_eq(MPF1,MPF2)
 eq64 || println("mpglu t1 fails")
-ADx=rand(10,10); MPDx=MPArray(ADx; TL=Float16);
+ADx=rand(10,10); MPDx=MPGArray(ADx; TL=Float16);
 MPF1x=mpglu!(MPDx); MPF2x=mpglu(ADx; TL=Float16);
 eq64x=test_eq(MPF1x,MPF2x)
 eq64x || println("mpglu t2 fails")
@@ -47,6 +47,7 @@ eqok=true
 for nf in fieldnames(MPLFact)
 gx=getfield(MF1,nf); hx =getfield(MF2,nf)
 eqok= ((gx==hx) && eqok)
+eqok || println(nf)
 end
 return eqok
 end