From 7073d71458e8d88c01908c951a17361c395e0985 Mon Sep 17 00:00:00 2001 From: Klaus Crusius Date: Mon, 19 Nov 2018 14:06:37 +0100 Subject: [PATCH 1/5] added sprandn methods with Type --- stdlib/SparseArrays/src/sparsematrix.jl | 4 +++- stdlib/SparseArrays/src/sparsevector.jl | 2 ++ stdlib/SparseArrays/test/sparse.jl | 7 +++++++ stdlib/SparseArrays/test/sparsevector.jl | 5 +++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index 02c975168c6ec..7fac079ac97d6 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -1471,7 +1471,7 @@ sprand(r::AbstractRNG, ::Type{Bool}, m::Integer, n::Integer, density::AbstractFl sprand(::Type{T}, m::Integer, n::Integer, density::AbstractFloat) where {T} = sprand(GLOBAL_RNG, T, m, n, density) """ - sprandn([rng], m[,n],p::AbstractFloat) + sprandn([rng],[,Type],m[,n],p::AbstractFloat) Create a random sparse vector of length `m` or sparse matrix of size `m` by `n` with the specified (independent) probability `p` of any entry being nonzero, @@ -1488,6 +1488,8 @@ julia> sprandn(2, 2, 0.75) """ sprandn(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) = sprand(r,m,n,density,randn,Float64) sprandn(m::Integer, n::Integer, density::AbstractFloat) = sprandn(GLOBAL_RNG,m,n,density) +sprandn(r::AbstractRNG, ::Type{T}, m::Integer, n::Integer, density::AbstractFloat) where T = sprand(r,m,n,density,(r,i) -> randn(r,T,i), T) +sprandn(::Type{T}, m::Integer, n::Integer, density::AbstractFloat) where T = sprandn(GLOBAL_RNG,T,m,n,density) LinearAlgebra.fillstored!(S::SparseMatrixCSC, x) = (fill!(nzvalview(S), x); S) diff --git a/stdlib/SparseArrays/src/sparsevector.jl b/stdlib/SparseArrays/src/sparsevector.jl index e7f0e1a2db26a..ce9fbbc35e947 100644 --- a/stdlib/SparseArrays/src/sparsevector.jl +++ b/stdlib/SparseArrays/src/sparsevector.jl @@ -506,6 +506,8 @@ sprand(::Type{T}, n::Integer, p::AbstractFloat) where {T} = sprand(GLOBAL_RNG, T sprandn(n::Integer, p::AbstractFloat) = sprand(GLOBAL_RNG, n, p, randn) sprandn(r::AbstractRNG, n::Integer, p::AbstractFloat) = sprand(r, n, p, randn) +sprandn(::Type{T}, n::Integer, p::AbstractFloat) where T = sprand(GLOBAL_RNG, n, p, (r, i) -> randn(r, T, i)) +sprandn(r::AbstractRNG, ::Type{T}, n::Integer, p::AbstractFloat) where T = sprand(r, n, p, (r, i) -> randn(r, T, i)) ## Indexing into Matrices can return SparseVectors diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl index c8e5ebd8b5acb..9eb2069d3984c 100644 --- a/stdlib/SparseArrays/test/sparse.jl +++ b/stdlib/SparseArrays/test/sparse.jl @@ -2335,4 +2335,11 @@ end @test m2.module == SparseArrays end +@testset "sprandn with type $T" for T in (Float64, Float32, Float16, ComplexF64, ComplexF32, ComplexF16) + @test sprandn(T, 5, 5, 0.5) isa AbstractSparseMatrix{T} +end +@testset "sprandn with invalid type $T" for T in (AbstractFloat, BigFloat, Complex) + @test_throws MethodError sprandn(T, 5, 5, 0.5) +end + end # module diff --git a/stdlib/SparseArrays/test/sparsevector.jl b/stdlib/SparseArrays/test/sparsevector.jl index 6061b21a6721c..d8ada3963e2c5 100644 --- a/stdlib/SparseArrays/test/sparsevector.jl +++ b/stdlib/SparseArrays/test/sparsevector.jl @@ -154,6 +154,11 @@ end end end + let xr = sprandn(ComplexF64, 1000, 0.9) + @test isa(xr, SparseVector{ComplexF64,Int}) + @test length(xr) == 1000 + end + let xr = sprand(Bool, 1000, 0.9) @test isa(xr, SparseVector{Bool,Int}) @test length(xr) == 1000 From 2bad969cfeba6c336594127cd9e8af74e66f829e Mon Sep 17 00:00:00 2001 From: Klaus Crusius Date: Wed, 12 Dec 2018 19:27:12 +0100 Subject: [PATCH 2/5] improved speed and space for spmatmul --- stdlib/SparseArrays/src/linalg.jl | 84 ++++++++++++++++++------------ stdlib/SparseArrays/test/sparse.jl | 3 +- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/stdlib/SparseArrays/src/linalg.jl b/stdlib/SparseArrays/src/linalg.jl index 4350a2d922fc9..be8a8a90bb0c1 100644 --- a/stdlib/SparseArrays/src/linalg.jl +++ b/stdlib/SparseArrays/src/linalg.jl @@ -147,61 +147,79 @@ end *(A::Adjoint{<:Any,<:SparseMatrixCSC{Tv,Ti}}, B::Adjoint{<:Any,<:SparseMatrixCSC{Tv,Ti}}) where {Tv,Ti} = spmatmul(copy(A), copy(B)) *(A::Transpose{<:Any,<:SparseMatrixCSC{Tv,Ti}}, B::Transpose{<:Any,<:SparseMatrixCSC{Tv,Ti}}) where {Tv,Ti} = spmatmul(copy(A), copy(B)) -function spmatmul(A::SparseMatrixCSC{Tv,Ti}, B::SparseMatrixCSC{Tv,Ti}; - sortindices::Symbol = :sortcols) where {Tv,Ti} +# Gustavsen's matrix multiplication algorithm revisited. +# The result rowval vector is already sorted by construction. +# The auxiliary Vector{Ti} xb is replaced by a BitArray of same length. +# Besides SparseMatrixCSC also SparseVector is accepted as B. +# The optional argument controlling a sorting algorithm is obsolete. +function spmatmul(A::SparseMatrixCSC{Tv,Ti}, + B::Union{<:SparseMatrixCSC{Tv,Ti},<:SparseVector{Tv,Ti}}) where {Tv,Ti,N} + mA, nA = size(A) - mB, nB = size(B) - nA==mB || throw(DimensionMismatch()) - - colptrA = A.colptr; rowvalA = A.rowval; nzvalA = A.nzval - colptrB = B.colptr; rowvalB = B.rowval; nzvalB = B.nzval - # TODO: Need better estimation of result space - nnzC = min(mA*nB, length(nzvalA) + length(nzvalB)) - colptrC = Vector{Ti}(undef, nB+1) + nB = size(B, 2) + nA == size(B, 1) || throw(DimensionMismatch()) + + rowvalA = rowvals(A); nzvalA = nonzeros(A) + rowvalB = rowvals(B); nzvalB = nonzeros(B) + nnzC = estimate_mulsize(mA, nnz(A), nA, nnz(B), nB) + if B isa SparseMatrixCSC; colptrC = Vector{Ti}(undef, nB+1) end rowvalC = Vector{Ti}(undef, nnzC) nzvalC = Vector{Tv}(undef, nnzC) @inbounds begin ip = 1 - xb = zeros(Ti, mA) - x = zeros(Tv, mA) + x = Vector{Tv}(undef, mA) + xb = BitArray(undef, mA) for i in 1:nB + fill!(xb, false) if ip + mA - 1 > nnzC - resize!(rowvalC, nnzC + max(nnzC,mA)) - resize!(nzvalC, nnzC + max(nnzC,mA)) - nnzC = length(nzvalC) + nnzC += max(mA, nnzC>>2) + resize!(rowvalC, nnzC) + resize!(nzvalC, nnzC) end - colptrC[i] = ip - for jp in colptrB[i]:(colptrB[i+1] - 1) + if B isa SparseMatrixCSC; colptrC[i] = ip end + for jp in nzrange(B, i) nzB = nzvalB[jp] j = rowvalB[jp] - for kp in colptrA[j]:(colptrA[j+1] - 1) + for kp in nzrange(A, j) nzC = nzvalA[kp] * nzB k = rowvalA[kp] - if xb[k] != i - rowvalC[ip] = k - ip += 1 - xb[k] = i - x[k] = nzC - else + if xb[k] x[k] += nzC + else + x[k] = nzC + xb[k] = true end end end - for vp in colptrC[i]:(ip - 1) - nzvalC[vp] = x[rowvalC[vp]] + for k in findall(xb) + nzvalC[ip] = x[k] + rowvalC[ip] = k + ip += 1 end end - colptrC[nB+1] = ip + if B isa SparseMatrixCSC; colptrC[nB+1] = ip end end - deleteat!(rowvalC, colptrC[end]:length(rowvalC)) - deleteat!(nzvalC, colptrC[end]:length(nzvalC)) + ip -= 1 + resize!(rowvalC, ip) + resize!(nzvalC, ip) + + # This modification of Gustavson's algorithm has sorted row indices. + # The not needed sort makes a major performance improvement and saves space. + if B isa SparseMatrixCSC + SparseMatrixCSC(mA, nB, colptrC, rowvalC, nzvalC) + else + SparseVector(mA, rowvalC, nzvalC) + end +end - # The Gustavson algorithm does not guarantee the product to have sorted row indices. - Cunsorted = SparseMatrixCSC(mA, nB, colptrC, rowvalC, nzvalC) - C = SparseArrays.sortSparseMatrixCSC!(Cunsorted, sortindices=sortindices) - return C +# For randomly distributed nonzeros of the factors this is a rather good estimation +# for the number of nonzeros of the product. +# For heavily structured matrices the value tends to over-estimation. +function estimate_mulsize(m::Integer, nnzA::Integer, n::Integer, nnzB::Integer, k::Integer) + p = (nnzA / (m * n)) * (nnzB / (n * k)) + isnan(p) ? 0 : Int(ceil(-expm1(log1p(-p) * n) * m * k)) # is (1 - (1 - p)^n) * m * k end # Frobenius dot/inner product: trace(A'B) diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl index 4328d59592ce9..040fde198a62e 100644 --- a/stdlib/SparseArrays/test/sparse.jl +++ b/stdlib/SparseArrays/test/sparse.jl @@ -318,8 +318,7 @@ end a = sprand(10, 5, 0.7) b = sprand(5, 15, 0.3) @test maximum(abs.(a*b - Array(a)*Array(b))) < 100*eps() - @test maximum(abs.(SparseArrays.spmatmul(a,b,sortindices=:sortcols) - Array(a)*Array(b))) < 100*eps() - @test maximum(abs.(SparseArrays.spmatmul(a,b,sortindices=:doubletranspose) - Array(a)*Array(b))) < 100*eps() + @test maximum(abs.(SparseArrays.spmatmul(a,b) - Array(a)*Array(b))) < 100*eps() f = Diagonal(rand(5)) @test Array(a*f) == Array(a)*f @test Array(f*b) == f*Array(b) From 7a49ff0b5624f6c5ba6932cf397c34f17a3feb82 Mon Sep 17 00:00:00 2001 From: Klaus Crusius Date: Wed, 12 Dec 2018 21:50:38 +0100 Subject: [PATCH 3/5] reduced number of allocations --- stdlib/SparseArrays/src/linalg.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/stdlib/SparseArrays/src/linalg.jl b/stdlib/SparseArrays/src/linalg.jl index be8a8a90bb0c1..3f4c0c1409d5d 100644 --- a/stdlib/SparseArrays/src/linalg.jl +++ b/stdlib/SparseArrays/src/linalg.jl @@ -149,7 +149,7 @@ end # Gustavsen's matrix multiplication algorithm revisited. # The result rowval vector is already sorted by construction. -# The auxiliary Vector{Ti} xb is replaced by a BitArray of same length. +# The auxiliary Vector{Ti} xb is replaced by a Vector{Bool} of same length. # Besides SparseMatrixCSC also SparseVector is accepted as B. # The optional argument controlling a sorting algorithm is obsolete. function spmatmul(A::SparseMatrixCSC{Tv,Ti}, @@ -169,7 +169,7 @@ function spmatmul(A::SparseMatrixCSC{Tv,Ti}, @inbounds begin ip = 1 x = Vector{Tv}(undef, mA) - xb = BitArray(undef, mA) + xb = Vector{Bool}(undef, mA) for i in 1:nB fill!(xb, false) if ip + mA - 1 > nnzC @@ -192,10 +192,12 @@ function spmatmul(A::SparseMatrixCSC{Tv,Ti}, end end end - for k in findall(xb) - nzvalC[ip] = x[k] - rowvalC[ip] = k - ip += 1 + for k in 1:mA + if xb[k] + nzvalC[ip] = x[k] + rowvalC[ip] = k + ip += 1 + end end end if B isa SparseMatrixCSC; colptrC[nB+1] = ip end From 526b33fefe8f4e94b41bd951907cbdff8b15a9f8 Mon Sep 17 00:00:00 2001 From: Klaus Crusius Date: Wed, 12 Dec 2018 22:11:55 +0100 Subject: [PATCH 4/5] increase initial allocation size --- stdlib/SparseArrays/src/linalg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/SparseArrays/src/linalg.jl b/stdlib/SparseArrays/src/linalg.jl index 3f4c0c1409d5d..2c0b381ba0455 100644 --- a/stdlib/SparseArrays/src/linalg.jl +++ b/stdlib/SparseArrays/src/linalg.jl @@ -161,7 +161,7 @@ function spmatmul(A::SparseMatrixCSC{Tv,Ti}, rowvalA = rowvals(A); nzvalA = nonzeros(A) rowvalB = rowvals(B); nzvalB = nonzeros(B) - nnzC = estimate_mulsize(mA, nnz(A), nA, nnz(B), nB) + nnzC = estimate_mulsize(mA, nnz(A), nA, nnz(B), nB) * 11 ÷ 10 # 10% more if B isa SparseMatrixCSC; colptrC = Vector{Ti}(undef, nB+1) end rowvalC = Vector{Ti}(undef, nnzC) nzvalC = Vector{Tv}(undef, nnzC) From 855959124dbafadafdcd324771ec06831bc79a1a Mon Sep 17 00:00:00 2001 From: Klaus Crusius Date: Sun, 16 Dec 2018 18:52:31 +0100 Subject: [PATCH 5/5] general performance improvements of spmatmul --- stdlib/SparseArrays/src/linalg.jl | 85 +++++++++++++++++++------------ 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/stdlib/SparseArrays/src/linalg.jl b/stdlib/SparseArrays/src/linalg.jl index 2c0b381ba0455..d8446d705d465 100644 --- a/stdlib/SparseArrays/src/linalg.jl +++ b/stdlib/SparseArrays/src/linalg.jl @@ -150,34 +150,35 @@ end # Gustavsen's matrix multiplication algorithm revisited. # The result rowval vector is already sorted by construction. # The auxiliary Vector{Ti} xb is replaced by a Vector{Bool} of same length. -# Besides SparseMatrixCSC also SparseVector is accepted as B. # The optional argument controlling a sorting algorithm is obsolete. -function spmatmul(A::SparseMatrixCSC{Tv,Ti}, - B::Union{<:SparseMatrixCSC{Tv,Ti},<:SparseVector{Tv,Ti}}) where {Tv,Ti,N} - +# depending on expected execution speed the sorting of the result column is +# done by a quicksort of the row indices or by a full scan of the dense result vector. +# The last is faster, if more than ≈ 1/32 of the result column is nonzero. +# TODO: extend to SparseMatrixCSCUnion to allow for SubArrays (view(X, :, r)). +function spmatmul(A::SparseMatrixCSC{Tv,Ti}, B::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} mA, nA = size(A) nB = size(B, 2) nA == size(B, 1) || throw(DimensionMismatch()) rowvalA = rowvals(A); nzvalA = nonzeros(A) rowvalB = rowvals(B); nzvalB = nonzeros(B) - nnzC = estimate_mulsize(mA, nnz(A), nA, nnz(B), nB) * 11 ÷ 10 # 10% more - if B isa SparseMatrixCSC; colptrC = Vector{Ti}(undef, nB+1) end + nnzC = max(estimate_mulsize(mA, nnz(A), nA, nnz(B), nB) * 11 ÷ 10, mA) + colptrC = Vector{Ti}(undef, nB+1) rowvalC = Vector{Ti}(undef, nnzC) nzvalC = Vector{Tv}(undef, nnzC) + nzpercol = nnzC ÷ max(nB, 1) @inbounds begin ip = 1 - x = Vector{Tv}(undef, mA) - xb = Vector{Bool}(undef, mA) + xb = fill(false, mA) for i in 1:nB - fill!(xb, false) if ip + mA - 1 > nnzC nnzC += max(mA, nnzC>>2) resize!(rowvalC, nnzC) resize!(nzvalC, nnzC) end - if B isa SparseMatrixCSC; colptrC[i] = ip end + colptrC[i] = ip0 = ip + k0 = ip - 1 for jp in nzrange(B, i) nzB = nzvalB[jp] j = rowvalB[jp] @@ -185,45 +186,65 @@ function spmatmul(A::SparseMatrixCSC{Tv,Ti}, nzC = nzvalA[kp] * nzB k = rowvalA[kp] if xb[k] - x[k] += nzC + nzvalC[k+k0] += nzC else - x[k] = nzC + nzvalC[k+k0] = nzC xb[k] = true + rowvalC[ip] = k + ip += 1 end end end - for k in 1:mA - if xb[k] - nzvalC[ip] = x[k] - rowvalC[ip] = k - ip += 1 + if ip > ip0 + if prefer_sort(ip-k0, mA) + # in-place sort of indices. Effort: O(nnz*ln(nnz)). + sort!(rowvalC, ip0, ip-1, QuickSort, Base.Order.Forward) + for vp = ip0:ip-1 + k = rowvalC[vp] + xb[k] = false + nzvalC[vp] = nzvalC[k+k0] + end + else + # scan result vector (effort O(mA)) + for k = 1:mA + if xb[k] + xb[k] = false + rowvalC[ip0] = k + nzvalC[ip0] = nzvalC[k+k0] + ip0 += 1 + end + end end end end - if B isa SparseMatrixCSC; colptrC[nB+1] = ip end + colptrC[nB+1] = ip end - ip -= 1 - resize!(rowvalC, ip) - resize!(nzvalC, ip) + resize!(rowvalC, ip - 1) + resize!(nzvalC, ip - 1) - # This modification of Gustavson's algorithm has sorted row indices. - # The not needed sort makes a major performance improvement and saves space. - if B isa SparseMatrixCSC - SparseMatrixCSC(mA, nB, colptrC, rowvalC, nzvalC) - else - SparseVector(mA, rowvalC, nzvalC) - end + # This modification of Gustavson algorithm has sorted row indices + C = SparseMatrixCSC(mA, nB, colptrC, rowvalC, nzvalC) + return C end -# For randomly distributed nonzeros of the factors this is a rather good estimation -# for the number of nonzeros of the product. -# For heavily structured matrices the value tends to over-estimation. +# estimated number of non-zeros in matrix product +# it is assumed, that the non-zero indices are distributed independently and uniformly +# in both matrices. Over-estimation is possible if that is not the case. function estimate_mulsize(m::Integer, nnzA::Integer, n::Integer, nnzB::Integer, k::Integer) p = (nnzA / (m * n)) * (nnzB / (n * k)) - isnan(p) ? 0 : Int(ceil(-expm1(log1p(-p) * n) * m * k)) # is (1 - (1 - p)^n) * m * k + p >= 1 ? m*k : p > 0 ? Int(ceil(-expm1(log1p(-p) * n)*m*k)) : 0 # (1-(1-p)^n)*m*k end +# determine if sort! shall be used or the whole column be scanned +# based on empirical data on i7-3610QM CPU +# measuring runtimes of the scanning and sorting loops of the algorithm. +# The parameters 6 and 3 might be modified for different architectures. +prefer_sort(nz::Integer, m::Integer) = m > 6 && 3 * ilog2(nz) * nz < m + +# minimal number of bits required to represent integer; ilog2(n) >= log2(n) +ilog2(n::Integer) = sizeof(n)<<3 - leading_zeros(n) + # Frobenius dot/inner product: trace(A'B) function dot(A::SparseMatrixCSC{T1,S1},B::SparseMatrixCSC{T2,S2}) where {T1,T2,S1,S2} m, n = size(A)