diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index cddc0af50..a53d926d8 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -3,9 +3,13 @@ on:
pull_request:
branches:
- main
+ paths-ignore:
+ - 'docs/**'
push:
branches:
- main
+ paths-ignore:
+ - 'docs/**'
jobs:
test:
runs-on: ubuntu-latest
diff --git a/benchmarks/applelu.jl b/benchmarks/applelu.jl
index 58ef00b2c..c346ff0c9 100644
--- a/benchmarks/applelu.jl
+++ b/benchmarks/applelu.jl
@@ -18,7 +18,13 @@ function luflop(m, n = m; innerflop = 2)
end
end
-algs = [LUFactorization(), GenericLUFactorization(), RFLUFactorization(), AppleAccelerateLUFactorization(), MetalLUFactorization()]
+algs = [
+ LUFactorization(),
+ GenericLUFactorization(),
+ RFLUFactorization(),
+ AppleAccelerateLUFactorization(),
+ MetalLUFactorization(),
+]
res = [Float32[] for i in 1:length(algs)]
ns = 4:8:500
@@ -28,10 +34,14 @@ for i in 1:length(ns)
rng = MersenneTwister(123)
global A = rand(rng, Float32, n, n)
global b = rand(rng, Float32, n)
- global u0= rand(rng, Float32, n)
-
+ global u0 = rand(rng, Float32, n)
+
for j in 1:length(algs)
- bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A), copy(b); u0 = copy(u0), alias_A=true, alias_b=true))
+ bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A),
+ copy(b);
+ u0 = copy(u0),
+ alias_A = true,
+ alias_b = true))
push!(res[j], luflop(n) / bt / 1e9)
end
end
@@ -41,11 +51,17 @@ __parameterless_type(T) = Base.typename(T).wrapper
parameterless_type(x) = __parameterless_type(typeof(x))
parameterless_type(::Type{T}) where {T} = __parameterless_type(T)
-p = plot(ns, res[1]; ylabel = "GFLOPs", xlabel = "N", title = "GFLOPs for NxN LU Factorization", label = string(Symbol(parameterless_type(algs[1]))), legend=:outertopright)
+p = plot(ns,
+ res[1];
+ ylabel = "GFLOPs",
+ xlabel = "N",
+ title = "GFLOPs for NxN LU Factorization",
+ label = string(Symbol(parameterless_type(algs[1]))),
+ legend = :outertopright)
for i in 2:length(res)
plot!(p, ns, res[i]; label = string(Symbol(parameterless_type(algs[i]))))
end
p
savefig("metallubench.png")
-savefig("metallubench.pdf")
\ No newline at end of file
+savefig("metallubench.pdf")
diff --git a/benchmarks/cudalu.jl b/benchmarks/cudalu.jl
index b0186f2ec..c1b89fc1c 100644
--- a/benchmarks/cudalu.jl
+++ b/benchmarks/cudalu.jl
@@ -28,10 +28,14 @@ for i in 1:length(ns)
rng = MersenneTwister(123)
global A = rand(rng, Float32, n, n)
global b = rand(rng, Float32, n)
- global u0= rand(rng, Float32, n)
-
+ global u0 = rand(rng, Float32, n)
+
for j in 1:length(algs)
- bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A), copy(b); u0 = copy(u0), alias_A=true, alias_b=true))
+ bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A),
+ copy(b);
+ u0 = copy(u0),
+ alias_A = true,
+ alias_b = true))
push!(res[j], luflop(n) / bt / 1e9)
end
end
@@ -41,11 +45,17 @@ __parameterless_type(T) = Base.typename(T).wrapper
parameterless_type(x) = __parameterless_type(typeof(x))
parameterless_type(::Type{T}) where {T} = __parameterless_type(T)
-p = plot(ns, res[1]; ylabel = "GFLOPs", xlabel = "N", title = "GFLOPs for NxN LU Factorization", label = string(Symbol(parameterless_type(algs[1]))), legend=:outertopright)
+p = plot(ns,
+ res[1];
+ ylabel = "GFLOPs",
+ xlabel = "N",
+ title = "GFLOPs for NxN LU Factorization",
+ label = string(Symbol(parameterless_type(algs[1]))),
+ legend = :outertopright)
for i in 2:length(res)
plot!(p, ns, res[i]; label = string(Symbol(parameterless_type(algs[i]))))
end
p
savefig("cudaoffloadlubench.png")
-savefig("cudaoffloadlubench.pdf")
\ No newline at end of file
+savefig("cudaoffloadlubench.pdf")
diff --git a/benchmarks/lu.jl b/benchmarks/lu.jl
index c4f9a5da1..7ab624b45 100644
--- a/benchmarks/lu.jl
+++ b/benchmarks/lu.jl
@@ -18,7 +18,14 @@ function luflop(m, n = m; innerflop = 2)
end
end
-algs = [LUFactorization(), GenericLUFactorization(), RFLUFactorization(), MKLLUFactorization(), FastLUFactorization(), SimpleLUFactorization()]
+algs = [
+ LUFactorization(),
+ GenericLUFactorization(),
+ RFLUFactorization(),
+ MKLLUFactorization(),
+ FastLUFactorization(),
+ SimpleLUFactorization(),
+]
res = [Float64[] for i in 1:length(algs)]
ns = 4:8:500
@@ -28,10 +35,14 @@ for i in 1:length(ns)
rng = MersenneTwister(123)
global A = rand(rng, n, n)
global b = rand(rng, n)
- global u0= rand(rng, n)
-
+ global u0 = rand(rng, n)
+
for j in 1:length(algs)
- bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A), copy(b); u0 = copy(u0), alias_A=true, alias_b=true))
+ bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A),
+ copy(b);
+ u0 = copy(u0),
+ alias_A = true,
+ alias_b = true))
push!(res[j], luflop(n) / bt / 1e9)
end
end
@@ -41,11 +52,17 @@ __parameterless_type(T) = Base.typename(T).wrapper
parameterless_type(x) = __parameterless_type(typeof(x))
parameterless_type(::Type{T}) where {T} = __parameterless_type(T)
-p = plot(ns, res[1]; ylabel = "GFLOPs", xlabel = "N", title = "GFLOPs for NxN LU Factorization", label = string(Symbol(parameterless_type(algs[1]))), legend=:outertopright)
+p = plot(ns,
+ res[1];
+ ylabel = "GFLOPs",
+ xlabel = "N",
+ title = "GFLOPs for NxN LU Factorization",
+ label = string(Symbol(parameterless_type(algs[1]))),
+ legend = :outertopright)
for i in 2:length(res)
plot!(p, ns, res[i]; label = string(Symbol(parameterless_type(algs[i]))))
end
p
savefig("lubench.png")
-savefig("lubench.pdf")
\ No newline at end of file
+savefig("lubench.pdf")
diff --git a/benchmarks/metallu.jl b/benchmarks/metallu.jl
index a9614e7a4..cc7416871 100644
--- a/benchmarks/metallu.jl
+++ b/benchmarks/metallu.jl
@@ -28,10 +28,14 @@ for i in 1:length(ns)
rng = MersenneTwister(123)
global A = rand(rng, Float32, n, n)
global b = rand(rng, Float32, n)
- global u0= rand(rng, Float32, n)
-
+ global u0 = rand(rng, Float32, n)
+
for j in 1:length(algs)
- bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A), copy(b); u0 = copy(u0), alias_A=true, alias_b=true))
+ bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A),
+ copy(b);
+ u0 = copy(u0),
+ alias_A = true,
+ alias_b = true))
GC.gc()
push!(res[j], luflop(n) / bt / 1e9)
end
@@ -42,11 +46,17 @@ __parameterless_type(T) = Base.typename(T).wrapper
parameterless_type(x) = __parameterless_type(typeof(x))
parameterless_type(::Type{T}) where {T} = __parameterless_type(T)
-p = plot(ns, res[1]; ylabel = "GFLOPs", xlabel = "N", title = "GFLOPs for NxN LU Factorization", label = string(Symbol(parameterless_type(algs[1]))), legend=:outertopright)
+p = plot(ns,
+ res[1];
+ ylabel = "GFLOPs",
+ xlabel = "N",
+ title = "GFLOPs for NxN LU Factorization",
+ label = string(Symbol(parameterless_type(algs[1]))),
+ legend = :outertopright)
for i in 2:length(res)
plot!(p, ns, res[i]; label = string(Symbol(parameterless_type(algs[i]))))
end
p
savefig("metal_large_lubench.png")
-savefig("metal_large_lubench.pdf")
\ No newline at end of file
+savefig("metal_large_lubench.pdf")
diff --git a/docs/Project.toml b/docs/Project.toml
index b32ced196..05f63912d 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -3,5 +3,5 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
[compat]
-Documenter = "0.27"
+Documenter = "1"
LinearSolve = "1, 2"
diff --git a/docs/make.jl b/docs/make.jl
index 677921189..faee67f08 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -12,14 +12,7 @@ makedocs(sitename = "LinearSolve.jl",
authors = "Chris Rackauckas",
modules = [LinearSolve, LinearSolve.SciMLBase],
clean = true, doctest = false, linkcheck = true,
- strict = [
- :doctest,
- :linkcheck,
- :parse_error,
- :example_block,
- # Other available options are
- # :autodocs_block, :cross_references, :docs_block, :eval_block, :example_block, :footnote, :meta_block, :missing_docs, :setup_block
- ],
+ warnonly = [:docs_block, :missing_docs],
format = Documenter.HTML(assets = ["assets/favicon.ico"],
canonical = "https://docs.sciml.ai/LinearSolve/stable/"),
pages = pages)
diff --git a/docs/src/index.md b/docs/src/index.md
index 0574f7066..c2f9c9dfa 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -84,32 +84,19 @@ Pkg.status(; mode = PKGMODE_MANIFEST) # hide
```
-```@raw html
-You can also download the
-manifest file and the
-project file.
+link_manifest = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
+ "/assets/Manifest.toml"
+link_project = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
+ "/assets/Project.toml"
+Markdown.parse("""You can also download the
+[manifest]($link_manifest)
+file and the
+[project]($link_project)
+file.
+""")
```
diff --git a/docs/src/solvers/solvers.md b/docs/src/solvers/solvers.md
index fe8a8b6e4..76ad95fc2 100644
--- a/docs/src/solvers/solvers.md
+++ b/docs/src/solvers/solvers.md
@@ -14,15 +14,15 @@ but one may need to change this to receive more performance or precision. If
more precision is necessary, `QRFactorization()` and `SVDFactorization()` are
the best choices, with SVD being the slowest but most precise.
-For efficiency, `RFLUFactorization` is the fastest for dense LU-factorizations until around
+For efficiency, `RFLUFactorization` is the fastest for dense LU-factorizations until around
150x150 matrices, though this can be dependent on the exact details of the hardware. After this
point, `MKLLUFactorization` is usually faster on most hardware. Note that on Mac computers
that `AppleAccelerateLUFactorization` is generally always the fastest. `LUFactorization` will
-use your base system BLAS which can be fast or slow depending on the hardware configuration.
+use your base system BLAS which can be fast or slow depending on the hardware configuration.
`SimpleLUFactorization` will be fast only on very small matrices but can cut down on compile times.
For very large dense factorizations, offloading to the GPU can be preferred. Metal.jl can be used
-on Mac hardware to offload, and has a cutoff point of being faster at around size 20,000 x 20,000
+on Mac hardware to offload, and has a cutoff point of being faster at around size 20,000 x 20,000
matrices (and only supports Float32). `CudaOffloadFactorization` can be more efficient at a
much smaller cutoff, possibly around size 1,000 x 1,000 matrices, though this is highly dependent
on the chosen GPU hardware. `CudaOffloadFactorization` requires a CUDA-compatible NVIDIA GPU.
@@ -31,9 +31,9 @@ CUDA offload supports Float64 but most consumer GPU hardware will be much faster
this is only recommended for Float32 matrices.
!!! note
-
- Performance details for dense LU-factorizations can be highly dependent on the hardware configuration.
- For details see [this issue](https://github.com/SciML/LinearSolve.jl/issues/357).
+
+ Performance details for dense LU-factorizations can be highly dependent on the hardware configuration.
+ For details see [this issue](https://github.com/SciML/LinearSolve.jl/issues/357).
If one is looking to best optimize their system, we suggest running the performance
tuning benchmark.
@@ -65,7 +65,7 @@ The interface is detailed [here](@ref custom).
### Lazy SciMLOperators
If the linear operator is given as a lazy non-concrete operator, such as a `FunctionOperator`,
-then using a Krylov method is preferred in order to not concretize the matrix.
+then using a Krylov method is preferred in order to not concretize the matrix.
Krylov.jl generally outperforms IterativeSolvers.jl and KrylovKit.jl, and is compatible
with CPUs and GPUs, and thus is the generally preferred form for Krylov methods. The
choice of Krylov method should be the one most constrained to the type of operator one
@@ -73,11 +73,11 @@ has, for example if positive definite then `Krylov_CG()`, but if no good propert
use `Krylov_GMRES()`.
!!! tip
-
+
If your materialized operator is a uniform block diagonal matrix, then you can use
`SimpleGMRES(; blocksize = )` to further improve performance.
This often shows up in Neural Networks where the Jacobian wrt the Inputs (almost always)
- is a Uniform Block Diagonal matrix of Block Size = size of the input divided by the
+ is a Uniform Block Diagonal matrix of Block Size = size of the input divided by the
batch size.
## Full List of Methods
diff --git a/ext/LinearSolveBlockDiagonalsExt.jl b/ext/LinearSolveBlockDiagonalsExt.jl
index 1e9b053eb..4b1827200 100644
--- a/ext/LinearSolveBlockDiagonalsExt.jl
+++ b/ext/LinearSolveBlockDiagonalsExt.jl
@@ -4,7 +4,7 @@ using LinearSolve, BlockDiagonals
function LinearSolve.init_cacheval(alg::SimpleGMRES{false}, A::BlockDiagonal, b, args...;
kwargs...)
- @assert ndims(A) == 2 "ndims(A) == $(ndims(A)). `A` must have ndims == 2."
+ @assert ndims(A)==2 "ndims(A) == $(ndims(A)). `A` must have ndims == 2."
# We need to perform this check even when `zeroinit == true`, since the type of the
# cache is dependent on whether we are able to use the specialized dispatch.
bsizes = blocksizes(A)
diff --git a/ext/LinearSolveKernelAbstractionsExt.jl b/ext/LinearSolveKernelAbstractionsExt.jl
index ba620382f..52a0a10e0 100644
--- a/ext/LinearSolveKernelAbstractionsExt.jl
+++ b/ext/LinearSolveKernelAbstractionsExt.jl
@@ -9,7 +9,7 @@ using GPUArraysCore
function LinearSolve._fast_sym_givens!(c, s, R, nr::Int, inner_iter::Int, bsize::Int, Hbis)
backend = get_backend(Hbis)
kernel! = __fast_sym_givens_kernel!(backend)
- kernel!(c[inner_iter], s[inner_iter], R[nr + inner_iter], Hbis; ndrange=bsize)
+ kernel!(c[inner_iter], s[inner_iter], R[nr + inner_iter], Hbis; ndrange = bsize)
return c, s, R
end
diff --git a/ext/LinearSolveMKLExt.jl b/ext/LinearSolveMKLExt.jl
index 1ed917f58..a094abe7e 100644
--- a/ext/LinearSolveMKLExt.jl
+++ b/ext/LinearSolveMKLExt.jl
@@ -2,49 +2,60 @@ module LinearSolveMKLExt
using MKL_jll
using LinearAlgebra: BlasInt, LU
-using LinearAlgebra.LAPACK: require_one_based_indexing, chkfinite, chkstride1,
- @blasfunc, chkargsok
+using LinearAlgebra.LAPACK: require_one_based_indexing,
+ chkfinite, chkstride1,
+ @blasfunc, chkargsok
using LinearAlgebra
const usemkl = MKL_jll.is_available()
using LinearSolve
using LinearSolve: ArrayInterface, MKLLUFactorization, @get_cacheval, LinearCache, SciMLBase
-function getrf!(A::AbstractMatrix{<:Float64}; ipiv = similar(A, BlasInt, min(size(A,1),size(A,2))), info = Ref{BlasInt}(), check = false)
+function getrf!(A::AbstractMatrix{<:Float64};
+ ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
+ info = Ref{BlasInt}(),
+ check = false)
require_one_based_indexing(A)
check && chkfinite(A)
chkstride1(A)
m, n = size(A)
- lda = max(1,stride(A, 2))
+ lda = max(1, stride(A, 2))
if isempty(ipiv)
- ipiv = similar(A, BlasInt, min(size(A,1),size(A,2)))
+ ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
end
ccall((@blasfunc(dgetrf_), MKL_jll.libmkl_rt), Cvoid,
- (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64},
+ (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64},
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
- m, n, A, lda, ipiv, info)
+ m, n, A, lda, ipiv, info)
chkargsok(info[])
A, ipiv, info[], info #Error code is stored in LU factorization type
end
-function getrf!(A::AbstractMatrix{<:Float32}; ipiv = similar(A, BlasInt, min(size(A,1),size(A,2))), info = Ref{BlasInt}(), check = false)
+function getrf!(A::AbstractMatrix{<:Float32};
+ ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
+ info = Ref{BlasInt}(),
+ check = false)
require_one_based_indexing(A)
check && chkfinite(A)
chkstride1(A)
m, n = size(A)
- lda = max(1,stride(A, 2))
+ lda = max(1, stride(A, 2))
if isempty(ipiv)
- ipiv = similar(A, BlasInt, min(size(A,1),size(A,2)))
+ ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
end
ccall((@blasfunc(sgetrf_), MKL_jll.libmkl_rt), Cvoid,
- (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32},
+ (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32},
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
- m, n, A, lda, ipiv, info)
+ m, n, A, lda, ipiv, info)
chkargsok(info[])
A, ipiv, info[], info #Error code is stored in LU factorization type
end
-function getrs!(trans::AbstractChar, A::AbstractMatrix{<:Float64}, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{<:Float64}; info = Ref{BlasInt}())
+function getrs!(trans::AbstractChar,
+ A::AbstractMatrix{<:Float64},
+ ipiv::AbstractVector{BlasInt},
+ B::AbstractVecOrMat{<:Float64};
+ info = Ref{BlasInt}())
require_one_based_indexing(A, ipiv, B)
LinearAlgebra.LAPACK.chktrans(trans)
chkstride1(A, B, ipiv)
@@ -57,14 +68,19 @@ function getrs!(trans::AbstractChar, A::AbstractMatrix{<:Float64}, ipiv::Abstrac
end
nrhs = size(B, 2)
ccall(("dgetrs_", MKL_jll.libmkl_rt), Cvoid,
- (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64}, Ref{BlasInt},
- Ptr{BlasInt}, Ptr{Float64}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
- trans, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
+ (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64}, Ref{BlasInt},
+ Ptr{BlasInt}, Ptr{Float64}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+ trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
+ 1)
LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
B
end
-function getrs!(trans::AbstractChar, A::AbstractMatrix{<:Float32}, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{<:Float32}; info = Ref{BlasInt}())
+function getrs!(trans::AbstractChar,
+ A::AbstractMatrix{<:Float32},
+ ipiv::AbstractVector{BlasInt},
+ B::AbstractVecOrMat{<:Float32};
+ info = Ref{BlasInt}())
require_one_based_indexing(A, ipiv, B)
LinearAlgebra.LAPACK.chktrans(trans)
chkstride1(A, B, ipiv)
@@ -77,9 +93,10 @@ function getrs!(trans::AbstractChar, A::AbstractMatrix{<:Float32}, ipiv::Abstrac
end
nrhs = size(B, 2)
ccall(("sgetrs_", MKL_jll.libmkl_rt), Cvoid,
- (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32}, Ref{BlasInt},
- Ptr{BlasInt}, Ptr{Float32}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
- trans, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
+ (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32}, Ref{BlasInt},
+ Ptr{BlasInt}, Ptr{Float32}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+ trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
+ 1)
LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
B
end
@@ -125,4 +142,4 @@ function SciMLBase.solve!(cache::LinearCache, alg::MKLLUFactorization;
=#
end
-end
\ No newline at end of file
+end
diff --git a/ext/LinearSolveMetalExt.jl b/ext/LinearSolveMetalExt.jl
index 4ee07dc39..2e963f514 100644
--- a/ext/LinearSolveMetalExt.jl
+++ b/ext/LinearSolveMetalExt.jl
@@ -28,4 +28,4 @@ function SciMLBase.solve!(cache::LinearCache, alg::MetalLUFactorization;
SciMLBase.build_linear_solution(alg, y, nothing, cache)
end
-end
\ No newline at end of file
+end
diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl
index 644c86288..7e96f0203 100644
--- a/src/LinearSolve.jl
+++ b/src/LinearSolve.jl
@@ -28,8 +28,9 @@ PrecompileTools.@recompile_invalidations begin
import InteractiveUtils
using LinearAlgebra: BlasInt, LU
- using LinearAlgebra.LAPACK: require_one_based_indexing, chkfinite, chkstride1,
- @blasfunc, chkargsok
+ using LinearAlgebra.LAPACK: require_one_based_indexing,
+ chkfinite, chkstride1,
+ @blasfunc, chkargsok
import GPUArraysCore
import Preferences
diff --git a/src/appleaccelerate.jl b/src/appleaccelerate.jl
index 0b7ea7cae..f1310ec16 100644
--- a/src/appleaccelerate.jl
+++ b/src/appleaccelerate.jl
@@ -2,7 +2,7 @@ using LinearAlgebra
using Libdl
# For now, only use BLAS from Accelerate (that is to say, vecLib)
-global const libacc = "/System/Library/Frameworks/Accelerate.framework/Accelerate"
+const global libacc = "/System/Library/Frameworks/Accelerate.framework/Accelerate"
"""
```julia
@@ -26,43 +26,53 @@ function appleaccelerate_isavailable()
return true
end
-function aa_getrf!(A::AbstractMatrix{<:Float64}; ipiv = similar(A, Cint, min(size(A,1),size(A,2))), info = Ref{Cint}(), check = false)
+function aa_getrf!(A::AbstractMatrix{<:Float64};
+ ipiv = similar(A, Cint, min(size(A, 1), size(A, 2))),
+ info = Ref{Cint}(),
+ check = false)
require_one_based_indexing(A)
check && chkfinite(A)
chkstride1(A)
m, n = size(A)
- lda = max(1,stride(A, 2))
+ lda = max(1, stride(A, 2))
if isempty(ipiv)
- ipiv = similar(A, Cint, min(size(A,1),size(A,2)))
+ ipiv = similar(A, Cint, min(size(A, 1), size(A, 2)))
end
ccall(("dgetrf_", libacc), Cvoid,
- (Ref{Cint}, Ref{Cint}, Ptr{Float64},
+ (Ref{Cint}, Ref{Cint}, Ptr{Float64},
Ref{Cint}, Ptr{Cint}, Ptr{Cint}),
- m, n, A, lda, ipiv, info)
+ m, n, A, lda, ipiv, info)
info[] < 0 && throw(ArgumentError("Invalid arguments sent to LAPACK dgetrf_"))
A, ipiv, BlasInt(info[]), info #Error code is stored in LU factorization type
end
-function aa_getrf!(A::AbstractMatrix{<:Float32}; ipiv = similar(A, Cint, min(size(A,1),size(A,2))), info = Ref{Cint}(), check = false)
+function aa_getrf!(A::AbstractMatrix{<:Float32};
+ ipiv = similar(A, Cint, min(size(A, 1), size(A, 2))),
+ info = Ref{Cint}(),
+ check = false)
require_one_based_indexing(A)
check && chkfinite(A)
chkstride1(A)
m, n = size(A)
- lda = max(1,stride(A, 2))
+ lda = max(1, stride(A, 2))
if isempty(ipiv)
- ipiv = similar(A, Cint, min(size(A,1),size(A,2)))
+ ipiv = similar(A, Cint, min(size(A, 1), size(A, 2)))
end
ccall(("sgetrf_", libacc), Cvoid,
- (Ref{Cint}, Ref{Cint}, Ptr{Float32},
+ (Ref{Cint}, Ref{Cint}, Ptr{Float32},
Ref{Cint}, Ptr{Cint}, Ptr{Cint}),
- m, n, A, lda, ipiv, info)
+ m, n, A, lda, ipiv, info)
info[] < 0 && throw(ArgumentError("Invalid arguments sent to LAPACK dgetrf_"))
A, ipiv, BlasInt(info[]), info #Error code is stored in LU factorization type
end
-function aa_getrs!(trans::AbstractChar, A::AbstractMatrix{<:Float64}, ipiv::AbstractVector{Cint}, B::AbstractVecOrMat{<:Float64}; info = Ref{Cint}())
+function aa_getrs!(trans::AbstractChar,
+ A::AbstractMatrix{<:Float64},
+ ipiv::AbstractVector{Cint},
+ B::AbstractVecOrMat{<:Float64};
+ info = Ref{Cint}())
require_one_based_indexing(A, ipiv, B)
LinearAlgebra.LAPACK.chktrans(trans)
chkstride1(A, B, ipiv)
@@ -75,14 +85,19 @@ function aa_getrs!(trans::AbstractChar, A::AbstractMatrix{<:Float64}, ipiv::Abst
end
nrhs = size(B, 2)
ccall(("dgetrs_", libacc), Cvoid,
- (Ref{UInt8}, Ref{Cint}, Ref{Cint}, Ptr{Float64}, Ref{Cint},
- Ptr{Cint}, Ptr{Float64}, Ref{Cint}, Ptr{Cint}, Clong),
- trans, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
+ (Ref{UInt8}, Ref{Cint}, Ref{Cint}, Ptr{Float64}, Ref{Cint},
+ Ptr{Cint}, Ptr{Float64}, Ref{Cint}, Ptr{Cint}, Clong),
+ trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
+ 1)
LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
B
end
-function aa_getrs!(trans::AbstractChar, A::AbstractMatrix{<:Float32}, ipiv::AbstractVector{Cint}, B::AbstractVecOrMat{<:Float32}; info = Ref{Cint}())
+function aa_getrs!(trans::AbstractChar,
+ A::AbstractMatrix{<:Float32},
+ ipiv::AbstractVector{Cint},
+ B::AbstractVecOrMat{<:Float32};
+ info = Ref{Cint}())
require_one_based_indexing(A, ipiv, B)
LinearAlgebra.LAPACK.chktrans(trans)
chkstride1(A, B, ipiv)
@@ -95,9 +110,10 @@ function aa_getrs!(trans::AbstractChar, A::AbstractMatrix{<:Float32}, ipiv::Abst
end
nrhs = size(B, 2)
ccall(("sgetrs_", libacc), Cvoid,
- (Ref{UInt8}, Ref{Cint}, Ref{Cint}, Ptr{Float32}, Ref{Cint},
- Ptr{Cint}, Ptr{Float32}, Ref{Cint}, Ptr{Cint}, Clong),
- trans, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
+ (Ref{UInt8}, Ref{Cint}, Ref{Cint}, Ptr{Float32}, Ref{Cint},
+ Ptr{Cint}, Ptr{Float32}, Ref{Cint}, Ptr{Cint}, Clong),
+ trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
+ 1)
LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
B
end
@@ -109,7 +125,7 @@ function LinearSolve.init_cacheval(alg::AppleAccelerateLUFactorization, A, b, u,
maxiters::Int, abstol, reltol, verbose::Bool,
assumptions::OperatorAssumptions)
luinst = ArrayInterface.lu_instance(convert(AbstractMatrix, A))
- LU(luinst.factors,similar(A, Cint, 0), luinst.info), Ref{Cint}()
+ LU(luinst.factors, similar(A, Cint, 0), luinst.info), Ref{Cint}()
end
function SciMLBase.solve!(cache::LinearCache, alg::AppleAccelerateLUFactorization;
diff --git a/src/extension_algs.jl b/src/extension_algs.jl
index 8d3e3134e..dfd6aa5b9 100644
--- a/src/extension_algs.jl
+++ b/src/extension_algs.jl
@@ -357,4 +357,4 @@ MetalLUFactorization()
A wrapper over Apple's Metal GPU library. Direct calls to Metal in a way that pre-allocates workspace
to avoid allocations and automatically offloads to the GPU.
"""
-struct MetalLUFactorization <: AbstractFactorization end
\ No newline at end of file
+struct MetalLUFactorization <: AbstractFactorization end
diff --git a/test/basictests.jl b/test/basictests.jl
index 42f283173..1cf3b2f49 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -426,7 +426,6 @@ end
@testset "DirectLdiv!" begin
function get_operator(A, u; add_inverse = true)
-
function f(u, p, t)
println("using FunctionOperator OOP mul")
A * u
@@ -479,7 +478,9 @@ end
end # testset
# https://github.com/SciML/LinearSolve.jl/issues/347
-A = rand(4, 4); b = rand(4); u0 = zeros(4);
+A = rand(4, 4);
+b = rand(4);
+u0 = zeros(4);
lp = LinearProblem(A, b; u0 = view(u0, :));
truesol = solve(lp, LUFactorization())
krylovsol = solve(lp, KrylovJL_GMRES())
@@ -505,7 +506,7 @@ using BlockDiagonals
prob1 = LinearProblem(Array(A), b, x1)
prob2 = LinearProblem(Array(A), b, x2)
- test_interface(SimpleGMRES(; blocksize=2), prob1, prob2)
+ test_interface(SimpleGMRES(; blocksize = 2), prob1, prob2)
- @test solve(prob1, SimpleGMRES(; blocksize=2)).u ≈ solve(prob2, SimpleGMRES()).u
+ @test solve(prob1, SimpleGMRES(; blocksize = 2)).u ≈ solve(prob2, SimpleGMRES()).u
end
diff --git a/test/gpu/cuda.jl b/test/gpu/cuda.jl
index 042576300..7402275eb 100644
--- a/test/gpu/cuda.jl
+++ b/test/gpu/cuda.jl
@@ -67,7 +67,7 @@ using BlockDiagonals
prob1 = LinearProblem(A, b, x1)
prob2 = LinearProblem(A, b, x2)
- test_interface(SimpleGMRES(; blocksize=2), prob1, prob2)
+ test_interface(SimpleGMRES(; blocksize = 2), prob1, prob2)
- @test solve(prob1, SimpleGMRES(; blocksize=2)).u ≈ solve(prob2, SimpleGMRES()).u
+ @test solve(prob1, SimpleGMRES(; blocksize = 2)).u ≈ solve(prob2, SimpleGMRES()).u
end
diff --git a/test/resolve.jl b/test/resolve.jl
index d49d5b1b6..ca5147b7e 100644
--- a/test/resolve.jl
+++ b/test/resolve.jl
@@ -2,9 +2,14 @@ using LinearSolve, LinearAlgebra, SparseArrays, InteractiveUtils, Test
for alg in subtypes(LinearSolve.AbstractFactorization)
@show alg
- if !(alg in [DiagonalFactorization, CudaOffloadFactorization, AppleAccelerateLUFactorization, MetalLUFactorization]) &&
- (!(alg == AppleAccelerateLUFactorization) || LinearSolve.appleaccelerate_isavailable())
-
+ if !(alg in [
+ DiagonalFactorization,
+ CudaOffloadFactorization,
+ AppleAccelerateLUFactorization,
+ MetalLUFactorization,
+ ]) &&
+ (!(alg == AppleAccelerateLUFactorization) ||
+ LinearSolve.appleaccelerate_isavailable())
A = [1.0 2.0; 3.0 4.0]
alg in [KLUFactorization, UMFPACKFactorization, SparspakFactorization] &&
(A = sparse(A))