Skip to content

Commit

Permalink
Argument checks for SparseMatrixCSC constructors (#31724)
Browse files Browse the repository at this point in the history
* reconstruct PR #31118

* reconstruct PR 31118 2

* Check arguments of SparseMatrixCSC #31024 #31435

* fix SuiteSparse test

* added NEWS, fixed tests

* loosen restrictions - resize to useful length

* cleaned up NEWS, revert minor change

* add non-checking and checking constructor - improve check performance
  • Loading branch information
KlausC authored and ViralBShah committed Jun 26, 2019
1 parent d6b6cb5 commit b32c1b8
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 40 deletions.
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ Standard library changes

#### SparseArrays

* `SparseMatrixCSC(m,n,colptr,rowval,nzval)` perform consistency checks for arguments:
`colptr` must be properly populated and lengths of `colptr`, `rowval`, and `nzval`
must be compatible with `m`, `n`, and `eltype(colptr)`.
* `sparse(I, J, V, m, n)` verifies lengths of `I`, `J`, `V` are equal and compatible with
`eltype(I)` and `m`, `n`.

#### Dates

Expand Down
2 changes: 1 addition & 1 deletion stdlib/SparseArrays/src/higherorderfns.jl
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ _maxnnzfrom(shape::NTuple{2}, A::SparseMatrixCSC) = nnz(A) * div(shape[1], A.m)
return SparseVector(shape..., storedinds, storedvals)
end
@inline function _allocres(shape::NTuple{2}, indextype, entrytype, maxnnz)
pointers = Vector{indextype}(undef, shape[2] + 1)
pointers = ones(indextype, shape[2] + 1)
storedinds = Vector{indextype}(undef, maxnnz)
storedvals = Vector{entrytype}(undef, maxnnz)
return SparseMatrixCSC(shape..., pointers, storedinds, storedvals)
Expand Down
131 changes: 96 additions & 35 deletions stdlib/SparseArrays/src/sparsematrix.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

# Compressed sparse columns data structure
# Assumes that no zeros are stored in the data structure
# No assumptions about stored zeros in the data structure
# Assumes that row values in rowval for each column are sorted
# issorted(rowval[colptr[i]:(colptr[i+1]-1)]) == true
# Assumes that 1 <= colptr[i] <= colptr[i+1] for i in 1..n
# Assumes that nnz <= length(rowval) < typemax(Ti)
# Assumes that 0 <= length(nzval) < typemax(Ti)

"""
SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti}
Expand All @@ -20,8 +23,8 @@ struct SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti}
rowval::Vector{Ti} # Row indices of stored values
nzval::Vector{Tv} # Stored values, typically nonzeros

function SparseMatrixCSC{Tv,Ti}(m::Integer, n::Integer, colptr::Vector{Ti}, rowval::Vector{Ti},
nzval::Vector{Tv}) where {Tv,Ti<:Integer}
function SparseMatrixCSC{Tv,Ti}(m::Integer, n::Integer, colptr::Vector{Ti},
rowval::Vector{Ti}, nzval::Vector{Tv}) where {Tv,Ti<:Integer}
@noinline throwsz(str, lbl, k) =
throw(ArgumentError("number of $str ($lbl) must be ≥ 0, got $k"))
m < 0 && throwsz("rows", 'm', m)
Expand All @@ -32,9 +35,42 @@ end
function SparseMatrixCSC(m::Integer, n::Integer, colptr::Vector, rowval::Vector, nzval::Vector)
Tv = eltype(nzval)
Ti = promote_type(eltype(colptr), eltype(rowval))
sparse_check_Ti(m, n, Ti)
sparse_check(n, colptr, rowval, nzval)
# silently shorten rowval and nzval to usable index positions.
maxlen = abs(widemul(m, n))
isbitstype(Ti) && (maxlen = min(maxlen, typemax(Ti) - 1))
length(rowval) > maxlen && resize!(rowval, maxlen)
length(nzval) > maxlen && resize!(nzval, maxlen)
SparseMatrixCSC{Tv,Ti}(m, n, colptr, rowval, nzval)
end

function sparse_check_Ti(m::Integer, n::Integer, Ti::Type)
@noinline throwTi(str, lbl, k) =
throw(ArgumentError("$str ($lbl = $k) does not fit in Ti = $(Ti)"))
0 m && (!isbitstype(Ti) || m typemax(Ti)) || throwTi("number of rows", "m", m)
0 n && (!isbitstype(Ti) || n typemax(Ti)) || throwTi("number of columns", "n", n)
end

function sparse_check(n::Integer, colptr::Vector{Ti}, rowval, nzval) where Ti
sparse_check_length("colptr", colptr, n+1, String) # don't check upper bound
ckp = Ti(1)
ckp == colptr[1] || throw(ArgumentError("$ckp == colptr[1] != 1"))
@inbounds for k = 2:n+1
ck = colptr[k]
ckp <= ck || throw(ArgumentError("$ckp == colptr[$(k-1)] > colptr[$k] == $ck"))
ckp = ck
end
sparse_check_length("rowval", rowval, ckp-1, Ti)
sparse_check_length("nzval", nzval, 0, Ti) # we allow empty nzval !!!
end
function sparse_check_length(rowstr, rowval, minlen, Ti)
len = length(rowval)
len >= minlen || throw(ArgumentError("$len == length($rowstr) < $minlen"))
!isbitstype(Ti) || len < typemax(Ti) ||
throw(ArgumentError("$len == length($rowstr) >= $(typemax(Ti))"))
end

size(S::SparseMatrixCSC) = (S.m, S.n)

# Define an alias for views of a SparseMatrixCSC which include all rows and a unit range of the columns.
Expand Down Expand Up @@ -497,7 +533,10 @@ function sparse(I::AbstractVector{Ti}, J::AbstractVector{Ti}, V::AbstractVector{
"length(I) (=$(length(I))) == length(J) (= $(length(J))) == length(V) (= ",
"$(length(V)))")))
end

Tj = Ti
while isbitstype(Tj) && coolen >= typemax(Tj)
Tj = widen(Tj)

This comment has been minimized.

Copy link
@KristofferC

KristofferC Aug 26, 2019

Member

This made Tj be unknown to the compiler, causing the regression reported in #32985

end
if m == 0 || n == 0 || coolen == 0
if coolen != 0
if n == 0
Expand All @@ -509,13 +548,13 @@ function sparse(I::AbstractVector{Ti}, J::AbstractVector{Ti}, V::AbstractVector{
SparseMatrixCSC(m, n, fill(one(Ti), n+1), Vector{Ti}(), Vector{Tv}())
else
# Allocate storage for CSR form
csrrowptr = Vector{Ti}(undef, m+1)
csrrowptr = Vector{Tj}(undef, m+1)
csrcolval = Vector{Ti}(undef, coolen)
csrnzval = Vector{Tv}(undef, coolen)

# Allocate storage for the CSC form's column pointers and a necessary workspace
csccolptr = Vector{Ti}(undef, n+1)
klasttouch = Vector{Ti}(undef, n)
klasttouch = Vector{Tj}(undef, n)

# Allocate empty arrays for the CSC form's row and nonzero value arrays
# The parent method called below automagically resizes these arrays
Expand Down Expand Up @@ -580,25 +619,28 @@ transposition," ACM TOMS 4(3), 250-269 (1978) inspired this method's use of a pa
counting sorts.
"""
function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
csccolptr::Vector{Ti}, cscrowval::Vector{Ti}, cscnzval::Vector{Tv}) where {Tv,Ti<:Integer}
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Tj},
csrrowptr::Vector{Tj}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
csccolptr::Vector{Ti}, cscrowval::Vector{Ti}, cscnzval::Vector{Tv}) where {Tv,Ti<:Integer,Tj<:Integer}

require_one_based_indexing(I, J, V)
sparse_check_Ti(m, n, Ti)
sparse_check_length("I", I, 0, Tj)
# Compute the CSR form's row counts and store them shifted forward by one in csrrowptr
fill!(csrrowptr, Ti(0))
fill!(csrrowptr, Tj(0))
coolen = length(I)
min(length(J), length(V)) >= coolen || throw(ArgumentError("J and V need length >= length(I) = $coolen"))
@inbounds for k in 1:coolen
Ik = I[k]
if 1 > Ik || m < Ik
throw(ArgumentError("row indices I[k] must satisfy 1 <= I[k] <= m"))
end
csrrowptr[Ik+1] += Ti(1)
csrrowptr[Ik+1] += Tj(1)
end

# Compute the CSR form's rowptrs and store them shifted forward by one in csrrowptr
countsum = Ti(1)
csrrowptr[1] = Ti(1)
countsum = Tj(1)
csrrowptr[1] = Tj(1)
@inbounds for i in 2:(m+1)
overwritten = csrrowptr[i]
csrrowptr[i] = countsum
Expand All @@ -613,7 +655,8 @@ function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
throw(ArgumentError("column indices J[k] must satisfy 1 <= J[k] <= n"))
end
csrk = csrrowptr[Ik+1]
csrrowptr[Ik+1] = csrk + Ti(1)
@assert csrk >= Tj(1) "index into csrcolval exceeds typemax(Ti)"
csrrowptr[Ik+1] = csrk + Tj(1)
csrcolval[csrk] = Jk
csrnzval[csrk] = V[k]
end
Expand All @@ -626,21 +669,21 @@ function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
# Minimizing extraneous communication and nonlocality of reference, primarily by using
# only a single auxiliary array in this step, is the key to this method's performance.
fill!(csccolptr, Ti(0))
fill!(klasttouch, Ti(0))
writek = Ti(1)
fill!(klasttouch, Tj(0))
writek = Tj(1)
newcsrrowptri = Ti(1)
origcsrrowptri = Ti(1)
origcsrrowptri = Tj(1)
origcsrrowptrip1 = csrrowptr[2]
@inbounds for i in 1:m
for readk in origcsrrowptri:(origcsrrowptrip1-Ti(1))
for readk in origcsrrowptri:(origcsrrowptrip1-Tj(1))
j = csrcolval[readk]
if klasttouch[j] < newcsrrowptri
klasttouch[j] = writek
if writek != readk
csrcolval[writek] = j
csrnzval[writek] = csrnzval[readk]
end
writek += Ti(1)
writek += Tj(1)
csccolptr[j+1] += Ti(1)
else
klt = klasttouch[j]
Expand All @@ -654,23 +697,24 @@ function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
end

# Compute the CSC form's colptrs and store them shifted forward by one in csccolptr
countsum = Ti(1)
countsum = Tj(1)
csccolptr[1] = Ti(1)
@inbounds for j in 2:(n+1)
overwritten = csccolptr[j]
csccolptr[j] = countsum
countsum += overwritten
countsum <= typemax(Ti) || throw(ArgumentError("more than typemax(Ti)-1 == $(typemax(Ti)-1) entries"))
end

# Now knowing the CSC form's entry count, resize cscrowval and cscnzval if necessary
cscnnz = countsum - Ti(1)
cscnnz = countsum - Tj(1)
length(cscrowval) < cscnnz && resize!(cscrowval, cscnnz)
length(cscnzval) < cscnnz && resize!(cscnzval, cscnnz)

# Finally counting-sort the row and nonzero values from the CSR form into cscrowval and
# cscnzval. Tracking write positions in csccolptr corrects the column pointers.
@inbounds for i in 1:m
for csrk in csrrowptr[i]:(csrrowptr[i+1]-Ti(1))
for csrk in csrrowptr[i]:(csrrowptr[i+1]-Tj(1))
j = csrcolval[csrk]
x = csrnzval[csrk]
csck = csccolptr[j+1]
Expand All @@ -683,16 +727,16 @@ function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
SparseMatrixCSC(m, n, csccolptr, cscrowval, cscnzval)
end
function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
csccolptr::Vector{Ti}) where {Tv,Ti<:Integer}
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Tj},
csrrowptr::Vector{Tj}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
csccolptr::Vector{Ti}) where {Tv,Ti<:Integer,Tj<:Integer}
sparse!(I, J, V, m, n, combine, klasttouch,
csrrowptr, csrcolval, csrnzval,
csccolptr, Vector{Ti}(), Vector{Tv}())
end
function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv}) where {Tv,Ti<:Integer}
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Tj},
csrrowptr::Vector{Tj}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv}) where {Tv,Ti<:Integer,Tj<:Integer}
sparse!(I, J, V, m, n, combine, klasttouch,
csrrowptr, csrcolval, csrnzval,
Vector{Ti}(undef, n+1), Vector{Ti}(), Vector{Tv}())
Expand Down Expand Up @@ -826,7 +870,7 @@ adjoint!(X::SparseMatrixCSC{Tv,Ti}, A::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} = f

function ftranspose(A::SparseMatrixCSC{Tv,Ti}, f::Function) where {Tv,Ti}
X = SparseMatrixCSC(A.n, A.m,
Vector{Ti}(undef, A.m+1),
ones(Ti, A.m+1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
halfperm!(X, A, 1:A.n, f)
Expand Down Expand Up @@ -1045,7 +1089,7 @@ function permute!(X::SparseMatrixCSC{Tv,Ti}, A::SparseMatrixCSC{Tv,Ti},
_checkargs_sourcecompatdest_permute!(A, X)
_checkargs_sourcecompatperms_permute!(A, p, q)
C = SparseMatrixCSC(A.n, A.m,
Vector{Ti}(undef, A.m + 1),
ones(Ti, A.m + 1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
_checkargs_permutationsvalid_permute!(p, C.colptr, q, X.colptr)
Expand All @@ -1064,7 +1108,7 @@ function permute!(A::SparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
q::AbstractVector{<:Integer}) where {Tv,Ti}
_checkargs_sourcecompatperms_permute!(A, p, q)
C = SparseMatrixCSC(A.n, A.m,
Vector{Ti}(undef, A.m + 1),
ones(Ti, A.m + 1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
workcolptr = Vector{Ti}(undef, A.n + 1)
Expand Down Expand Up @@ -1135,11 +1179,11 @@ function permute(A::SparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
q::AbstractVector{<:Integer}) where {Tv,Ti}
_checkargs_sourcecompatperms_permute!(A, p, q)
X = SparseMatrixCSC(A.m, A.n,
Vector{Ti}(undef, A.n + 1),
ones(Ti, A.n + 1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
C = SparseMatrixCSC(A.n, A.m,
Vector{Ti}(undef, A.m + 1),
ones(Ti, A.m + 1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
_checkargs_permutationsvalid_permute!(p, C.colptr, q, X.colptr)
Expand Down Expand Up @@ -2331,15 +2375,32 @@ function _setindex_scalar!(A::SparseMatrixCSC{Tv,Ti}, _v, _i::Integer, _j::Integ
# Column j does not contain entry A[i,j]. If v is nonzero, insert entry A[i,j] = v
# and return. If to the contrary v is zero, then simply return.
if !iszero(v)
insert!(A.rowval, searchk, i)
insert!(A.nzval, searchk, v)
nz = A.colptr[A.n+1]
# throw exception before state is partially modified
!isbitstype(Ti) || nz < typemax(Ti) ||
throw(ArgumentError("nnz(A) going to exceed typemax(Ti) = $(typemax(Ti))"))

# if nnz(A) < length(rowval/nzval): no need to grow rowval and preserve values
_insert!(A.rowval, searchk, i, nz)
_insert!(A.nzval, searchk, v, nz)
@simd for m in (j + 1):(A.n + 1)
@inbounds A.colptr[m] += 1
@inbounds A.colptr[m] += Ti(1)
end
end
return A
end

# insert item at position pos, shifting only from pos+1 to nz
function _insert!(v::Vector, pos::Integer, item, nz::Integer)
if nz > length(v)
insert!(v, pos, item)
else # nz < length(v)
Base.unsafe_copyto!(v, pos+1, v, pos, nz - pos)
v[pos] = item
v
end
end

function Base.fill!(V::SubArray{Tv, <:Any, <:SparseMatrixCSC, Tuple{Vararg{Union{Integer, AbstractVector{<:Integer}},2}}}, x) where Tv
A = V.parent
I, J = V.indices
Expand Down
56 changes: 56 additions & 0 deletions stdlib/SparseArrays/test/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2597,6 +2597,7 @@ end
A = SparseMatrixCSC(Complex{BigInt}[1+im 2+2im]')'[1:1, 2:2]
# ...ensure it does! If necessary, the test needs to be updated to use
# another mechanism to create a suitable A.
resize!(A.nzval, 2)
@assert length(A.nzval) > nnz(A)
@test -A == fill(-2-2im, 1, 1)
@test conj(A) == fill(2-2im, 1, 1)
Expand All @@ -2617,4 +2618,59 @@ end
@test sum(x1, dims=2) == sum(x2, dims=2)
end

@testset "Ti cannot store all potential values #31024" begin
# m * n >= typemax(Ti) but nnz < typemax(Ti)
A = SparseMatrixCSC(12, 12, fill(Int8(1),13), Int8[], Int[])
@test size(A) == (12,12) && nnz(A) == 0
I1 = [Int8(i) for i in 1:20 for _ in 1:20]
J1 = [Int8(i) for _ in 1:20 for i in 1:20]
# m * n >= typemax(Ti) and nnz >= typemax(Ti)
@test_throws ArgumentError sparse(I1, J1, ones(length(I1)))
I1 = Int8.(rand(1:10, 500))
J1 = Int8.(rand(1:10, 500))
V1 = ones(500)
# m * n < typemax(Ti) and length(I) >= typemax(Ti) - combining values
@test sparse(I1, J1, V1, 10, 10) !== nothing
# m * n >= typemax(Ti) and length(I) >= typemax(Ti)
@test sparse(I1, J1, V1, 12, 13) !== nothing
I1 = Int8.(rand(1:10, 126))
J1 = Int8.(rand(1:10, 126))
V1 = ones(126)
# m * n >= typemax(Ti) and length(I) < typemax(Ti)
@test sparse(I1, J1, V1, 100, 100) !== nothing
end

@testset "Typecheck too strict #31435" begin
A = SparseMatrixCSC{Int,Int8}(70, 2, fill(Int8(1), 3), Int8[], Int[])
A[5:67,1:2] .= ones(Int, 63, 2)
@test nnz(A) == 126
# nnz >= typemax
@test_throws ArgumentError A[2,1] = 42
# colptr short
@test_throws ArgumentError SparseMatrixCSC(1, 1, Int[], Int[], Float64[])
# colptr[1] must be 1
@test_throws ArgumentError SparseMatrixCSC(10, 3, [0,1,1,1], Int[], Float64[])
# colptr not ascending
@test_throws ArgumentError SparseMatrixCSC(10, 3, [1,2,1,2], Int[], Float64[])
# rowwal (and nzval) short
@test_throws ArgumentError SparseMatrixCSC(10, 3, [1,2,2,4], [1,2], Float64[])
# nzval short
@test SparseMatrixCSC(10, 3, [1,2,2,4], [1,2,3], Float64[]) !== nothing
# length(rowval) >= typemax
@test_throws ArgumentError SparseMatrixCSC(5, 1, Int8[1,2], fill(Int8(1),127), Int[1,2,3])
@test SparseMatrixCSC{Int,Int8}(5, 1, Int8[1,2], fill(Int8(1),127), Int[1,2,3]) != 0
# length(nzval) >= typemax
@test_throws ArgumentError SparseMatrixCSC(5, 1, Int8[1,2], Int8[1], fill(7, 127))
@test SparseMatrixCSC{Int,Int8}(5, 1, Int8[1,2], Int8[1], fill(7, 127)) != 0

# length(I) >= typemax
@test_throws ArgumentError sparse(UInt8.(1:255), fill(UInt8(1), 255), fill(1, 255))
# m > typemax
@test_throws ArgumentError sparse(UInt8.(1:254), fill(UInt8(1), 254), fill(1, 254), 256, 1)
# n > typemax
@test_throws ArgumentError sparse(UInt8.(1:254), fill(UInt8(1), 254), fill(1, 254), 255, 256)
# n, m maximal
@test sparse(UInt8.(1:254), fill(UInt8(1), 254), fill(1, 254), 255, 255) !== nothing
end

end # module
6 changes: 2 additions & 4 deletions stdlib/SuiteSparse/test/cholmod.jl
Original file line number Diff line number Diff line change
Expand Up @@ -764,10 +764,8 @@ end
end

@testset "Check inputs to Sparse. Related to #20024" for A_ in (
SparseMatrixCSC(2, 2, [1, 2], CHOLMOD.SuiteSparse_long[], Float64[]),
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1], Float64[]),
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[], Float64[1.0]),
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1], Float64[1.0]))
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1,2], Float64[]),
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1,2], Float64[1.0]))
@test_throws ArgumentError CHOLMOD.Sparse(size(A_)..., A_.colptr .- 1, A_.rowval .- 1, A_.nzval)
@test_throws ArgumentError CHOLMOD.Sparse(A_)
end
Expand Down

0 comments on commit b32c1b8

Please sign in to comment.