Skip to content

Commit

Permalink
stricter buffer sizes in SparseMatrixCSC (#40523)
Browse files Browse the repository at this point in the history
Make length(A.nzval)==nnz(A) and add strict buffer checking (#30662)

* Add sizehint!(::SparseMatrixCSC, args...),
* Fix illegal SparseMatrixCSC construction in cholmod and linalg.
* Remove tests targetting now illegal buffers
* Fix invalid buffer creation in kron and more
* use widelength in sizehint! to cope with large matrices in 32 bit systems
  • Loading branch information
abraunst authored Apr 24, 2021
1 parent 3b91a15 commit 248c02f
Show file tree
Hide file tree
Showing 9 changed files with 215 additions and 277 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ Standard library changes

#### SparseArrays

* new `sizehint!(::SparseMatrixCSC, ::Integer)` method ([#30676]).

#### Dates

Expand Down
64 changes: 32 additions & 32 deletions stdlib/SparseArrays/src/higherorderfns.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import Base: map, map!, broadcast, copy, copyto!, _extrema_dims, _extrema_itr

using Base: front, tail, to_shape
using ..SparseArrays: SparseVector, SparseMatrixCSC, AbstractSparseVector, AbstractSparseMatrixCSC,
AbstractSparseMatrix, AbstractSparseArray, indtype, nnz, nzrange,
AbstractSparseMatrix, AbstractSparseArray, indtype, nnz, nzrange, spzeros,
SparseVectorUnion, AdjOrTransSparseVectorUnion, nonzeroinds, nonzeros, rowvals, getcolptr, widelength
using Base.Broadcast: BroadcastStyle, Broadcasted, flatten
using LinearAlgebra
Expand Down Expand Up @@ -132,12 +132,17 @@ function trimstorage!(A::SparseVecOrMat, maxstored)
resize!(storedvals(A), maxstored)
return maxstored
end

function expandstorage!(A::SparseVecOrMat, maxstored)
length(storedinds(A)) < maxstored && resize!(storedinds(A), maxstored)
length(storedvals(A)) < maxstored && resize!(storedvals(A), maxstored)
if length(storedinds(A)) < maxstored
resize!(storedinds(A), maxstored)
resize!(storedvals(A), maxstored)
end
return maxstored
end

_checkbuffers(S::SparseMatrixCSC) = (@assert length(getcolptr(S)) == size(S, 2) + 1 && getcolptr(S)[end] - 1 == length(rowvals(S)) == length(nonzeros(S)); S)
_checkbuffers(S::SparseVector) = (@assert length(storedvals(S)) == length(storedinds(S)); S)

# (2) map[!] entry points
map(f::Tf, A::SparseVector) where {Tf} = _noshapecheck_map(f, A)
Expand Down Expand Up @@ -181,7 +186,7 @@ copy(bc::SpBroadcasted1) = _noshapecheck_map(bc.f, bc.args[1])
storedvals(C)[1] = fofnoargs
broadcast!(f, view(storedvals(C), 2:length(storedvals(C))))
end
return C
return _checkbuffers(C)
end

function _diffshape_broadcast(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N}
Expand Down Expand Up @@ -224,22 +229,17 @@ _maxnnzfrom(shape::NTuple{2}, A::AbstractSparseMatrixCSC) = nnz(A) * div(shape[1
@inline _unchecked_maxnnzbcres(shape, As...) = _unchecked_maxnnzbcres(shape, As)
@inline _checked_maxnnzbcres(shape::NTuple{1}, As...) = shape[1] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
@inline _checked_maxnnzbcres(shape::NTuple{2}, As...) = shape[1] != 0 && shape[2] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
@inline function _allocres(shape::NTuple{1}, indextype, entrytype, maxnnz)
storedinds = Vector{indextype}(undef, maxnnz)
storedvals = Vector{entrytype}(undef, maxnnz)
return SparseVector(shape..., storedinds, storedvals)
end
@inline function _allocres(shape::NTuple{2}, indextype, entrytype, maxnnz)
pointers = ones(indextype, shape[2] + 1)
storedinds = Vector{indextype}(undef, maxnnz)
storedvals = Vector{entrytype}(undef, maxnnz)
return SparseMatrixCSC(shape..., pointers, storedinds, storedvals)
@inline function _allocres(shape::Union{NTuple{1},NTuple{2}}, indextype, entrytype, maxnnz)
X = spzeros(entrytype, indextype, shape)
resize!(storedinds(X), maxnnz)
resize!(storedvals(X), maxnnz)
return X
end

# (4) _map_zeropres!/_map_notzeropres! specialized for a single sparse vector/matrix
"Stores only the nonzero entries of `map(f, Array(A))` in `C`."
function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
spaceC::Int = length(nonzeros(C))
Ck = 1
@inbounds for j in columns(C)
setcolptr!(C, j, Ck)
Expand All @@ -255,7 +255,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
end
@inbounds setcolptr!(C, numcols(C) + 1, Ck)
trimstorage!(C, Ck - 1)
return C
return _checkbuffers(C)
end
"""
Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and
Expand All @@ -274,7 +274,7 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMa
end
# NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over /
# nonsequential access of storedvals(C) does not appear to improve performance.
return C
return _checkbuffers(C)
end
# helper functions for these methods and some of those below
@inline _densecoloffsets(A::SparseVector) = 0
Expand All @@ -297,7 +297,7 @@ end

# (5) _map_zeropres!/_map_notzeropres! specialized for a pair of sparse vectors/matrices
function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
spaceC::Int = length(nonzeros(C))
rowsentinelA = convert(indtype(A), numrows(C) + 1)
rowsentinelB = convert(indtype(B), numrows(C) + 1)
Ck = 1
Expand Down Expand Up @@ -336,7 +336,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVe
end
@inbounds setcolptr!(C, numcols(C) + 1, Ck)
trimstorage!(C, Ck - 1)
return C
return _checkbuffers(C)
end
function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
# Build dense matrix structure in C, expanding storage if necessary
Expand Down Expand Up @@ -368,13 +368,13 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMa
Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
end
end
return C
return _checkbuffers(C)
end


# (6) _map_zeropres!/_map_notzeropres! for more than two sparse matrices / vectors
function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
spaceC::Int = length(nonzeros(C))
rowsentinel = numrows(C) + 1
Ck = 1
stopks = _colstartind_all(1, As)
Expand All @@ -398,7 +398,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N})
end
@inbounds setcolptr!(C, numcols(C) + 1, Ck)
trimstorage!(C, Ck - 1)
return C
return _checkbuffers(C)
end
function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
# Build dense matrix structure in C, expanding storage if necessary
Expand All @@ -421,7 +421,7 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{Spars
activerow = min(rows...)
end
end
return C
return _checkbuffers(C)
end

# helper methods for map/map! methods just above
Expand Down Expand Up @@ -462,7 +462,7 @@ end
# (7) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a single (input) sparse vector/matrix
function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
isempty(C) && return _finishempty!(C)
spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
spaceC::Int = length(nonzeros(C))
# C and A cannot have the same shape, as we directed that case to map in broadcast's
# entry point; here we need efficiently handle only heterogeneous C-A combinations where
# one or both of C and A has at least one singleton dimension.
Expand Down Expand Up @@ -509,7 +509,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where
end
@inbounds setcolptr!(C, numcols(C) + 1, Ck)
trimstorage!(C, Ck - 1)
return C
return _checkbuffers(C)
end
function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
# For information on this code, see comments in similar code in _broadcast_zeropres! above
Expand Down Expand Up @@ -540,14 +540,14 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseV
end
end
end
return C
return _checkbuffers(C)
end


# (8) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse vectors/matrices
function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
isempty(C) && return _finishempty!(C)
spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
spaceC::Int = length(nonzeros(C))
rowsentinelA = convert(indtype(A), numrows(C) + 1)
rowsentinelB = convert(indtype(B), numrows(C) + 1)
# C, A, and B cannot all have the same shape, as we directed that case to map in broadcast's
Expand Down Expand Up @@ -711,7 +711,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
end
@inbounds setcolptr!(C, numcols(C) + 1, Ck)
trimstorage!(C, Ck - 1)
return C
return _checkbuffers(C)
end
function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
# For information on this code, see comments in similar code in _broadcast_zeropres! above
Expand Down Expand Up @@ -810,7 +810,7 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseV
end
end
end
return C
return _checkbuffers(C)
end
_finishempty!(C::SparseVector) = C
_finishempty!(C::AbstractSparseMatrixCSC) = (fill!(getcolptr(C), 1); C)
Expand Down Expand Up @@ -861,7 +861,7 @@ end
# (9) _broadcast_zeropres!/_broadcast_notzeropres! for more than two (input) sparse vectors/matrices
function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
isempty(C) && return _finishempty!(C)
spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
spaceC::Int = length(nonzeros(C))
expandsverts = _expandsvert_all(C, As)
expandshorzs = _expandshorz_all(C, As)
rowsentinel = numrows(C) + 1
Expand Down Expand Up @@ -909,7 +909,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMa
end
@inbounds setcolptr!(C, numcols(C) + 1, Ck)
trimstorage!(C, Ck - 1)
return C
return _checkbuffers(C)
end
function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
isempty(C) && return _finishempty!(C)
Expand Down Expand Up @@ -950,7 +950,7 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg
end
end
end
return C
return _checkbuffers(C)
end

# helper method for broadcast/broadcast! methods just above
Expand Down
49 changes: 18 additions & 31 deletions stdlib/SparseArrays/src/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -935,16 +935,15 @@ function triu(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
end
rowval = Vector{Ti}(undef, nnz)
nzval = Vector{Tv}(undef, nnz)
A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
for col = max(k+1,1) : n
c1 = getcolptr(S)[col]
for c2 in nzrange(A, col)
rowvals(A)[c2] = rowvals(S)[c1]
nonzeros(A)[c2] = nonzeros(S)[c1]
for c2 in colptr[col]:colptr[col+1]-1
rowval[c2] = rowvals(S)[c1]
nzval[c2] = nonzeros(S)[c1]
c1 += 1
end
end
A
SparseMatrixCSC(m, n, colptr, rowval, nzval)
end

function tril(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
Expand All @@ -965,17 +964,16 @@ function tril(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
end
rowval = Vector{Ti}(undef, nnz)
nzval = Vector{Tv}(undef, nnz)
A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
for col = 1 : min(n, m+k)
c1 = getcolptr(S)[col+1]-1
l2 = getcolptr(A)[col+1]-1
for c2 = 0 : l2 - getcolptr(A)[col]
rowvals(A)[l2 - c2] = rowvals(S)[c1]
nonzeros(A)[l2 - c2] = nonzeros(S)[c1]
l2 = colptr[col+1]-1
for c2 = 0 : l2 - colptr[col]
rowval[l2 - c2] = rowvals(S)[c1]
nzval[l2 - c2] = nonzeros(S)[c1]
c1 -= 1
end
end
A
SparseMatrixCSC(m, n, colptr, rowval, nzval)
end

## diff
Expand Down Expand Up @@ -1340,19 +1338,16 @@ end

## kron
@inline function kron!(C::SparseMatrixCSC, A::AbstractSparseMatrixCSC, B::AbstractSparseMatrixCSC)
nnzC = nnz(A)*nnz(B)
mA, nA = size(A); mB, nB = size(B)
mC, nC = mA*mB, nA*nB

rowvalC = rowvals(C)
nzvalC = nonzeros(C)
colptrC = getcolptr(C)

@boundscheck begin
length(colptrC) == nC+1 || throw(DimensionMismatch("expect C to be preallocated with $(nC+1) colptrs "))
length(rowvalC) == nnzC || throw(DimensionMismatch("expect C to be preallocated with $(nnzC) rowvals"))
length(nzvalC) == nnzC || throw(DimensionMismatch("expect C to be preallocated with $(nnzC) nzvals"))
end
nnzC = nnz(A)*nnz(B)
resize!(nzvalC, nnzC)
resize!(rowvalC, nnzC)

col = 1
@inbounds for j = 1:nA
Expand Down Expand Up @@ -1381,16 +1376,13 @@ end
end

@inline function kron!(z::SparseVector, x::SparseVector, y::SparseVector)
nnzx = nnz(x); nnzy = nnz(y); nnzz = nnz(z);
nnzx = nnz(x); nnzy = nnz(y);
nzind = nonzeroinds(z)
nzval = nonzeros(z)

@boundscheck begin
nnzval = length(nzval); nnzind = length(nzind)
nnzz = nnzx*nnzy
nnzval == nnzz || throw(DimensionMismatch("expect z to be preallocated with $nnzz nonzeros"))
nnzind == nnzz || throw(DimensionMismatch("expect z to be preallocated with $nnzz nonzeros"))
end
nnzz = nnzx*nnzy
resize!(nzind, nnzz)
resize!(nzval, nnzz)

@inbounds for i = 1:nnzx, j = 1:nnzy
this_ind = (i-1)*nnzy+j
Expand All @@ -1402,17 +1394,12 @@ end

# sparse matrix ⊗ sparse matrix
function kron(A::AbstractSparseMatrixCSC{T1,S1}, B::AbstractSparseMatrixCSC{T2,S2}) where {T1,S1,T2,S2}
nnzC = nnz(A)*nnz(B)
mA, nA = size(A); mB, nB = size(B)
mC, nC = mA*mB, nA*nB
Tv = typeof(one(T1)*one(T2))
Ti = promote_type(S1,S2)
colptrC = Vector{Ti}(undef, nC+1)
rowvalC = Vector{Ti}(undef, nnzC)
nzvalC = Vector{Tv}(undef, nnzC)
colptrC[1] = 1
# skip sparse_check
C = SparseMatrixCSC{Tv, Ti}(mC, nC, colptrC, rowvalC, nzvalC)
C = spzeros(Tv, Ti, mC, nC)
sizehint!(C, nnz(A)*nnz(B))
return @inbounds kron!(C, A, B)
end

Expand Down
Loading

4 comments on commit 248c02f

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @christopher-dG

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected. A full report can be found here. cc @maleadt

Please sign in to comment.