ctkelley · ctkelley · Jul 24, 2024 · Jul 24, 2024 · Jul 24, 2024 · Jul 24, 2024
diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml
@@ -0,0 +1,23 @@
+on: [push]
+
+jobs:
+  paper:
+    runs-on: ubuntu-latest
+    name: Paper Draft
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Build draft PDF
+        uses: openjournals/openjournals-draft-action@master
+        with:
+          journal: joss
+          # This should be the path to the paper within your repo.
+          paper-path: Joss_Paper/paper.md
+      - name: Upload
+        uses: actions/upload-artifact@v4
+        with:
+          name: paper
+          # This is the output path where Pandoc will write the compiled
+          # PDF. Note, this should be the same directory as the input
+          # paper.md
+          path: Joss_Paper
diff --git a/Joss_Paper/paper.bib b/Joss_Paper/paper.bib
@@ -0,0 +1,185 @@
+@article{amestoy:2024,
+author = {Amestoy, Patrick and Buttari, Alfredo and Higham, Nicholas J. and L’Excellent, Jean-Yves and Mary, Theo and Vieubl\'{e}, Bastien},
+title = {Five-Precision GMRES-Based Iterative Refinement},
+journal = {SIAM Journal on Matrix Analysis and Applications},
+volume = {45},
+number = {1},
+pages = {529-552},
+year = {2024},
+doi = {10.1137/23M1549079}
+}
+
+@article{CarsonHigham:2017,
+author = {E. Carson and N. J. Higham},
+title = {A new analysis of iterative refinement and its application of
+accurate solution of ill-conditioned sparse linear systems},
+journal = {SIAM Journal on Scientific Computing},
+volume=39,
+number=6,
+pages={A2834-A2856}, 
+year=2017,
+doi={10.1137/17M1122918}
+}
+
+@article{CarsonHigham:2018,
+author = {E. Carson and N. J. Higham},
+title = {Accelerating the Solution of Linear Systems by Iterative Refinement in Three Precisions},
+journal = {SIAM Journal on Scientific Computing},
+volume = {40},
+number = {2},
+pages = {A817-A847},
+year = {2018},
+doi = {10.1137/17M1140819}
+}
+
+@article{dongarra:1983,
+author="J.J.Dongarra and C.B.Moler and J.H.Wilkinson",
+title="Improving the accuracy of computed eigenvalues and eigenvectors",
+journal="SIAM Journal on Numerical Analysis",
+volume=20,
+pages="23--45",
+year=1983,
+doi={10.1137/0720002}
+}
+
+@techreport{Wilkinson:48,
+author="J. H. Wilkinson",
+title="Progress Report on the Automatic Computing Engine",
+number="MA/17/1024",
+institution="Mathematics Division, Department of Scientific and Industrial Research, National Physical Laboratory, Teddington, UK",
+year=1948,
+url="http://www.alanturing.net/turing_archive/archive/l/l10/l10.php"
+}
+
+
+@book{higham:1996,
+  author = "N. J. Higham",
+  title = "Accuracy and Stability of Numerical Algorithms",
+  publisher = "Society for Industrial and Applied Mathematics",
+  address = "Philadelphia, PA, USA",
+  year = 1996,
+  pages = "xxviii+688",
+  doi = {10.1137/1.9780898718027},
+  URL = "http://www.ma.man.ac.uk/~higham/asna.html",
+  isbn = "0-89871-355-2"
+        }
+
+
+@article{kelley:2022a,
+author="C. T. Kelley",
+title="Newton's Method in Mixed Precision",
+year=2022,
+volume=64,
+pages="191--211",
+doi="10.1137/20M1342902", 
+journal="SIAM Review"
+}
+
+@book{kelley:2022b,
+author="C. T. Kelley",
+title="{Solving Nonlinear Equations with Iterative Methods:
+Solvers and Examples in Julia}",
+publisher="SIAM, Philadelphia",
+address="Philadelphia",
+year=2022,
+ISBN ="978-1-61197-726-4",
+eISBN = "978-1-61197-727-1",
+doi="10.1137/1.9781611977271",
+series="Fundamentals of Algorithms",
+number=20
+}
+
+@misc{kelley:2022c,
+title="{SIAMFANLEquations.jl}",
+author="C. T. Kelley",
+year=2022,
+note="Julia Package",
+doi="10.5281/zenodo.4284807",
+url="https://github.com/ctkelley/SIAMFANLEquations.jl",
+howpublished="https://github.com/ctkelley/SIAMFANLEquations.jl"
+}
+
+
+@misc{kelley:2023,
+      title={Newton's Method in Three Precisions},
+      author={C. T. Kelley},
+      year={2023},
+      eprint={2307.16051},
+      archivePrefix={arXiv},
+      primaryClass={math.NA},
+      doi = {10.48550/arXiv.2307.16051},
+      note="To appear in Pacific Journal of Optimization"
+}
+
+@misc{kelley:2024a,
+title="{MultiPrecisionArrays.jl}",
+author="C. T. Kelley",
+year=2024,
+note="Julia Package",
+doi="10.5281/zenodo.7521427",
+url="https://github.com/ctkelley/MultiPrecisionArrays.jl",
+howpublished="https://github.com/ctkelley/MultiPrecisionArrays.jl"
+}
+
+@misc{kelley:2024b,
+author="C. T. Kelley",
+title="Using {MultiPrecisionArrays.jl}: {I}terative Refinement in {J}ulia",
+year=2024,
+archivePrefix={arXiv},
+primaryClass={math.NA},
+doi = {10.48550/arXiv.2311.14616},
+eprint={2311.14616}
+}
+
+@article{Juliasirev:17,
+title="Julia: A Fresh Approach to Numerical Computing",
+author="J. Bezanson and A. Edelman and S. Karpinski and V. B. Shah",
+year=2017,
+journal="SIAM Review",
+volume=59, 
+pages="65--98",
+doi={10.1137/141000671}
+} 
+
+@article{demmel:06,
+title="Error Bounds from Extra-Precise Iterative Refinement",
+author="James Demmel and Yozo Hida and William Kahan",
+year=2006,
+journal="ACM Trans. Math. Soft.",
+pages="325--351",
+doi={10.1145/1141885.1141894},
+} 
+
+@article{VanderVorst:1992,
+author="H. A. {van der Vorst}",
+title="{Bi-CGSTAB}: A fast and smoothly converging variant to
+{Bi-CG} for the solution of nonsymmetric systems", 
+journal = {SIAM J. Sci. Stat. Comp.},
+year=1992, 
+pages="631--644",
+volume=13,
+doi={10.1137/0913035}
+}
+
+@ARTICLE{saad:1986,
+   author = {Y. Saad and M.H. Schultz},
+   title = {{GMRES} a generalized minimal residual algorithm for solving
+            nonsymmetric linear systems},
+   journal = {SIAM J. Sci. Stat. Comp.},
+   year = 1986,
+   pages = {856--869},
+   doi = {10.1137/0907058},
+   volume = 7   }
+
+}  
+
+@misc{kelley:2024c,
+author="C. T. Kelley",
+title="Interprecision transfers in iterative refinement",
+year = 2024,
+archivePrefix={arXiv},
+primaryClass={math.NA},
+eprint={2407.00827},
+note="submitted for publication" 
+}
+
diff --git a/Joss_Paper/paper.md b/Joss_Paper/paper.md
@@ -0,0 +1,179 @@
+---
+title: 'MultiPrecisionArrays.jl: A Julia package for iterative refinement'
+tags:
+  - Julia
+  - mathematics
+  - numerical analysis
+  - linear equations
+authors:
+  - name: C. T. Kelley
+    orcid: 0000-0003-2791-0648
+    affiliation: 1
+affiliations:
+  - name: North Carolina State University, Raleigh NC, USA
+    index: 1
+date: 15 April 2024
+bibliography: paper.bib
+---
+
+# Summary
+
+[MultiPrecisionArrays.jl](https://github.com/ctkelley/MultiPrecisionArrays.jl),
+[@kelley:2024b] provides data structures and
+solvers for several variations of iterative refinement (IR). IR can speed up an LU matrix factorization for solving linear systems of equations by
+factoring a low precision copy of the matrix and using that 
+low precision factorization in an iteration to solve the system.
+For example, if high precision is double and low precision is single,
+then the factorization time is cut in half.
+The additional storage cost is the low precision copy, 
+so IR is at time vs storage trade off. IR has a long history
+and a good account of the classical theory is in [@higham:1996].
+
+# Statement of need
+
+The solution of linear systems of equations is a ubiquitous task
+in computational science and engineering. A common method for dense
+systems is Gaussian elimination done via an LU factorization,
+[@higham:1996].  Iterative refinement is a way to reduce the factorization
+time at the cost of additional storage. 
+[MultiPrecisionArrays.jl](https://github.com/ctkelley/MultiPrecisionArrays.jl)
+enables IR with a simple interface in Julia [@Juliasirev:17] with an
+IR factorization object that one uses in the same way as the one for LU.
+The package offers several variants of IR, both classical
+[@Wilkinson:48; @higham:1996], and some from the recent literature
+[@CarsonHigham:2017; @amestoy:2024].
+
+# Algorithm
+
+This package will make solving dense systems of linear equations faster by using the LU factorization and IR. While other factorizations can be used in IR, the
+package is limited to LU for now. A very generic description of this 
+for solving a linear system $A x = b$ in a high (working) precision is
+
+__IR(A, b)__
+
+- $x = 0$
+
+- $r = b$
+
+- Factor $A = LU$ in a lower precision
+
+- While $\| r \|$ is too large
+
+  - $d = (LU)^{-1} r$
+
+  - $x = x + d$
+
+  - $r = b - Ax$
+
+- end
+
+- end
+
+
+In Julia, a code to do this would solve the linear system $A x = b$ 
+in the working precision, say double,
+by using a factorization in a lower (factorization) 
+precision, say single, within a residual correction iteration. 
+This means that one would need to allocate storage for a copy of $A$ 
+in the factorization precision and factor that copy. 
+
+The multiprecision factorization ```mplu``` makes the low precision copy
+of the matrix, factors that copy, and allocates some storage for the 
+iteration. The original matrix and the low precision factorization
+are stored in a factorization object that you can use with ```\```.
+
+IR is a perfect example of a storage/time trade off.
+To solve a linear system $A x = b$ in $R^N$ with IR,
+one incurs the storage penalty of making a low
+precision copy of $A$ and reaps the benefit of only having to
+factor the low precision copy.
+
+# Installation
+
+The standard way to install a package is to type ```import.Pkg; Pkg.add("MultiPrecisionArrays")``` at the Julia prompt. One can run the unit tests with ```Pkg.test("MultiPrecisionArrays")```.
+After installation, type ```using MultiPrecisionArrays``` when you want to use the functions in the package.
+
+There are only two direct dependencies outside of the Julia standard libraries.
+The factorization in half precision (Float16) uses 
+[OhMyThreads.jl](https://github.com/JuliaFolds2/OhMyThreads.jl). The
+GMRES and Bi-CGSTAB solvers for Krylov-IR methods are taken from
+[SIAMFANL.jl](https://github.com/ctkelley/SIAMFANLEquations.jl)
+[@kelley:2022c].
+
+# A Few Subtleties
+
+Within the algorithm one has to determine what the line
+$d = (LU)^{-1} r$ means. Does one cast $r$ into the lower precision before 
+the solve or not? If one casts $r$ into the lower precision, then the solve
+is done entirely in the factorization precision. If, however, $r$ remains
+in the working precision, then the LU factors are promoted to the working
+precision on the fly. This makes little difference if TW is double and 
+TF is single and there is a modest performance benefit to downcasting $r$ into
+single. Therefore that is the default behavior in that case. 
+If TF is half precision, ```Float16```, then it is best to do the interprecision
+transfers on the fly and if 
+one is using one of the Krylov-IR algorithms
+[@amestoy:2024] then one must do the interprecision transfers on the fly
+and not downcast $r$.
+
+There are two half precision (16 bit) formats. Julia has native support for IEEE 16 bit floats (Float16). A second format (BFloat16) has a larger exponent field and a smaller significand (mantissa), thereby trading precision for range. In fact, the exponent field in BFloat is the same size (8 bits) as that for single precision (Float32). The significand, however, is only 8 bits. Compare this to the size of the exponent fields for Float16 (11 bits) and single (24 bits). The size of the significand means that you can get in real trouble with half precision in either format and that IR is more likely to fail to converge. GMRES-IR
+can mitigate the convergence problems [@amestoy:2024] by using the 
+low-precision solve as a preconditioner. We support both GMRES 
+[@saad:1986] and BiCGSTAB [@VanderVorst:1992] as solvers for
+Krylov-IR methods. One should also know that LAPACK and the BLAS do not yet
+support half precision arrays, so working in Float16 will be slower than
+using Float64. 
+
+The classic algorithm from [@Wilkinson:48] and its recent extension
+[@CarsonHigham:2017] evaluate the residual in a higher precision
+that the working precision. This can give improved accuracy for
+ill-conditioned problems at a cost of the interprecision transfers
+in the residual computation. This needs to be implemented with some
+care and [@demmel:06] has an excellent account of the details.
+
+__MultiPrecisionArrays.jl__ provides
+infrastructure to manage these things and we refer the reader to
+[@kelley:2024b] for the details.
+
+# Projects using __MultiPrecisionArrays.jl__.
+
+This package was motivated by
+the use of low-precision factorizations in Newton's method
+[@kelley:2022a; @kelley:2022b] and the interface between 
+a preliminary version of this
+package and the solvers from [@kelley:2022b; @kelley:2022c] was reported in
+[@kelley:2023].  That paper used a three 
+precision form of IR (TF=half, TW=single, nonlinear residual
+computed in double) and required direct use of multiprecision
+constructors that we do not export in __MultiPrecisionArrays.jl__.
+We will fully support the application to nonlinear solvers in
+a future version. We give a detailed account of interprecision
+transfers in [@kelley:2024c] and use 
+__MultiPrecisionArrays.jl__ to generate the table in that paper.
+
+
+# Other Julia Packages for IR
+
+The package
+[IterativeRefinement.jl](https://github.com/RalphAS/IterativeRefinement.jl)
+is an implementation of the IR method from
+[@dongarra:1983]. It has not been updated in four years.
+
+The unregistered
+package [Itref.jl](https://github.com/bvieuble/Itref.jl) implements
+IR and the GMRES-IR method from [@amestoy:2024] and was used to obtain
+the numerical results in that paper. It does not provide the
+data structures for preallocation that we do and does not seem to have
+been updated lately.
+
+
+# Acknowledgements
+
+This work was partially supported by
+Department of Energy grant DE-NA003967. 
+Any opinions, findings, and conclusions or recommendations 
+expressed in this material are those of the author and 
+do not necessarily reflect the views of the United States Department of Energy.
+
+
+# References
diff --git a/Joss_Paper/paper.pdf b/Joss_Paper/paper.pdf