diff --git a/lib/cusparse/CUSPARSE.jl b/lib/cusparse/CUSPARSE.jl index 6f8825d498..494bf446bb 100644 --- a/lib/cusparse/CUSPARSE.jl +++ b/lib/cusparse/CUSPARSE.jl @@ -8,6 +8,13 @@ using ..CUDA: libcusparse, unsafe_free!, @retry_reclaim using CEnum +using LinearAlgebra +using LinearAlgebra: HermOrSym + +using Adapt + +using SparseArrays + const SparseChar = Char # core library diff --git a/lib/cusparse/array.jl b/lib/cusparse/array.jl index cd94187421..f9bca78170 100644 --- a/lib/cusparse/array.jl +++ b/lib/cusparse/array.jl @@ -5,16 +5,8 @@ export CuSparseMatrixCSC, CuSparseMatrixCSR, CuSparseMatrixBSR, CuSparseMatrix, AbstractCuSparseMatrix, CuSparseVector -import Base: length, size, ndims, eltype, similar, pointer, stride, - copy, convert, reinterpret, show, summary, copyto!, getindex, get!, fill!, collect - -using LinearAlgebra -import LinearAlgebra: BlasFloat, Hermitian, HermOrSym, issymmetric, Transpose, Adjoint, - ishermitian, istriu, istril, Symmetric, UpperTriangular, LowerTriangular - -using SparseArrays -import SparseArrays: sparse, SparseMatrixCSC, nnz, nonzeros, nonzeroinds, - _spgetindex +using LinearAlgebra: BlasFloat +using SparseArrays: nonzeroinds abstract type AbstractCuSparseArray{Tv, N} <: AbstractSparseArray{Tv, Cint, N} end const AbstractCuSparseVector{Tv} = AbstractCuSparseArray{Tv,1} @@ -26,14 +18,15 @@ mutable struct CuSparseVector{Tv} <: AbstractCuSparseVector{Tv} dims::NTuple{2,Int} nnz::Cint - function CuSparseVector{Tv}(iPtr::CuVector{Cint}, nzVal::CuVector{Tv}, dims::Int, nnz::Cint) where Tv - new(iPtr,nzVal,(dims,1),nnz) + function CuSparseVector{Tv}(iPtr::CuVector{<:Integer}, nzVal::CuVector, + dims::Integer) where Tv + new(iPtr, nzVal, (dims,1), length(nzVal)) end end function CUDA.unsafe_free!(xs::CuSparseVector) - unsafe_free!(xs.iPtr) - unsafe_free!(xs.nzVal) + unsafe_free!(nonzeroinds(xs)) + unsafe_free!(nonzeros(xs)) return end @@ -44,15 +37,16 @@ mutable struct CuSparseMatrixCSC{Tv} <: AbstractCuSparseMatrix{Tv} dims::NTuple{2,Int} nnz::Cint - function CuSparseMatrixCSC{Tv}(colPtr::CuVector{Cint}, rowVal::CuVector{Cint}, nzVal::CuVector{Tv}, dims::NTuple{2,Int}, nnz::Cint) where Tv - new(colPtr,rowVal,nzVal,dims,nnz) + function CuSparseMatrixCSC{Tv}(colPtr::CuVector{<:Integer}, rowVal::CuVector{<:Integer}, + nzVal::CuVector, dims::NTuple{2,<:Integer}) where Tv + new(colPtr, rowVal, nzVal, dims, length(nzVal)) end end -function CuSparseMatrixCSC!(xs::CuSparseVector) +function CUDA.unsafe_free!(xs::CuSparseMatrixCSC) unsafe_free!(xs.colPtr) - unsafe_free!(xs.rowVal) - unsafe_free!(xs.nzVal) + unsafe_free!(rowvals(xs)) + unsafe_free!(nonzeros(xs)) return end @@ -70,15 +64,16 @@ mutable struct CuSparseMatrixCSR{Tv} <: AbstractCuSparseMatrix{Tv} dims::NTuple{2,Int} nnz::Cint - function CuSparseMatrixCSR{Tv}(rowPtr::CuVector{Cint}, colVal::CuVector{Cint}, nzVal::CuVector{Tv}, dims::NTuple{2,Int}, nnz::Cint) where Tv - new(rowPtr,colVal,nzVal,dims,nnz) + function CuSparseMatrixCSR{Tv}(rowPtr::CuVector{<:Integer}, colVal::CuVector{<:Integer}, + nzVal::CuVector, dims::NTuple{2,<:Integer}) where Tv + new(rowPtr, colVal, nzVal, dims, length(nzVal)) end end -function CuSparseMatrixCSR!(xs::CuSparseVector) +function CUDA.unsafe_free!(xs::CuSparseMatrixCSR) unsafe_free!(xs.rowPtr) unsafe_free!(xs.colVal) - unsafe_free!(xs.nzVal) + unsafe_free!(nonzeros(xs)) return end @@ -96,42 +91,60 @@ mutable struct CuSparseMatrixBSR{Tv} <: AbstractCuSparseMatrix{Tv} dir::SparseChar nnz::Cint - function CuSparseMatrixBSR{Tv}(rowPtr::CuVector{Cint}, colVal::CuVector{Cint}, nzVal::CuVector{Tv}, dims::NTuple{2,Int},blockDim::Cint, dir::SparseChar, nnz::Cint) where Tv - new(rowPtr,colVal,nzVal,dims,blockDim,dir,nnz) + function CuSparseMatrixBSR{Tv}(rowPtr::CuVector{<:Integer}, colVal::CuVector{<:Integer}, + nzVal::CuVector, dims::NTuple{2,<:Integer}, + blockDim::Integer, dir::SparseChar, nnz::Integer) where Tv + new(rowPtr, colVal, nzVal, dims, blockDim, dir, nnz) end end -function CuSparseMatrixBSR!(xs::CuSparseVector) +function CUDA.unsafe_free!(xs::CuSparseMatrixBSR) unsafe_free!(xs.rowPtr) unsafe_free!(xs.colVal) - unsafe_free!(xs.nzVal) + unsafe_free!(nonzeros(xs)) return end -""" -Utility union type of [`CuSparseMatrixCSC`](@ref), [`CuSparseMatrixCSR`](@ref), -and `Hermitian` and `Symmetric` versions of these two containers. A function accepting -this type can make use of performance improvements by only indexing one triangle of the -matrix if it is guaranteed to be hermitian/symmetric. -""" -const CompressedSparse{T} = Union{CuSparseMatrixCSC{T},CuSparseMatrixCSR{T},HermOrSym{T,CuSparseMatrixCSC{T}},HermOrSym{T,CuSparseMatrixCSR{T}}} - """ Utility union type of [`CuSparseMatrixCSC`](@ref), [`CuSparseMatrixCSR`](@ref), and [`CuSparseMatrixBSR`](@ref). """ const CuSparseMatrix{T} = Union{CuSparseMatrixCSC{T},CuSparseMatrixCSR{T}, CuSparseMatrixBSR{T}} -Hermitian{T}(Mat::CuSparseMatrix{T}) where T = Hermitian{T,typeof(Mat)}(Mat,'U') -length(g::CuSparseVector) = prod(g.dims) -size(g::CuSparseVector) = g.dims -ndims(g::CuSparseVector) = 1 -length(g::CuSparseMatrix) = prod(g.dims) -size(g::CuSparseMatrix) = g.dims -ndims(g::CuSparseMatrix) = 2 +## convenience constructors + +CuSparseVector(iPtr::CuArray{<:Integer}, nzVal::CuArray{T}, dims::Int) where {T} = + CuSparseVector{T}(iPtr, nzVal, dims) + +CuSparseMatrixCSC(colPtr::CuArray{<:Integer}, rowVal::CuArray{<:Integer}, + nzVal::CuArray{T}, dims::NTuple{2,Int}) where {T} = + CuSparseMatrixCSC{T}(colPtr, rowVal, nzVal, dims) + +CuSparseMatrixCSR(rowPtr::CuArray, colVal::CuArray, nzVal::CuArray{T}, dims::NTuple{2,Int}) where T = + CuSparseMatrixCSR{T}(rowPtr, colVal, nzVal, dims) + +CuSparseMatrixBSR(rowPtr::CuArray, colVal::CuArray, nzVal::CuArray{T}, blockDim, dir, nnz, + dims::NTuple{2,Int}) where T = + CuSparseMatrixBSR{T}(rowPtr, colVal, nzVal, dims, blockDim, dir, nnz) + +Base.similar(Vec::CuSparseVector) = CuSparseVector(copy(nonzeroinds(Vec)), similar(nonzeros(Vec)), Vec.dims[1]) +Base.similar(Mat::CuSparseMatrixCSC) = CuSparseMatrixCSC(copy(Mat.colPtr), copy(rowvals(Mat)), similar(nonzeros(Mat)), Mat.dims) +Base.similar(Mat::CuSparseMatrixCSR) = CuSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), Mat.dims) +Base.similar(Mat::CuSparseMatrixBSR) = CuSparseMatrixBSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), Mat.blockDim, Mat.dir, nnz(Mat), Mat.dims) -function size(g::CuSparseVector, d::Integer) + +## array interface + +Base.length(g::CuSparseVector) = prod(g.dims) +Base.size(g::CuSparseVector) = g.dims +Base.ndims(g::CuSparseVector) = 1 + +Base.length(g::CuSparseMatrix) = prod(g.dims) +Base.size(g::CuSparseMatrix) = g.dims +Base.ndims(g::CuSparseMatrix) = 2 + +function Base.size(g::CuSparseVector, d::Integer) if d == 1 return g.dims[d] elseif d > 1 @@ -141,7 +154,7 @@ function size(g::CuSparseVector, d::Integer) end end -function size(g::CuSparseMatrix, d::Integer) +function Base.size(g::CuSparseMatrix, d::Integer) if d in [1, 2] return g.dims[d] elseif d > 1 @@ -151,53 +164,60 @@ function size(g::CuSparseMatrix, d::Integer) end end -nnz(g::AbstractCuSparseArray) = g.nnz -nonzeros(g::AbstractCuSparseArray) = g.nzVal +Base.eltype(g::CuSparseMatrix{T}) where T = T + + +## sparse array interface -nonzeroinds(g::AbstractCuSparseVector) = g.iPtr +SparseArrays.nnz(g::AbstractCuSparseArray) = g.nnz +SparseArrays.nonzeros(g::AbstractCuSparseArray) = g.nzVal -issymmetric(M::Union{CuSparseMatrixCSC,CuSparseMatrixCSR}) = false -ishermitian(M::Union{CuSparseMatrixCSC,CuSparseMatrixCSR}) = false -issymmetric(M::Symmetric{CuSparseMatrixCSC}) = true -ishermitian(M::Hermitian{CuSparseMatrixCSC}) = true +SparseArrays.nonzeroinds(g::AbstractCuSparseVector) = g.iPtr + +SparseArrays.rowvals(g::CuSparseMatrixCSC) = g.rowVal + +LinearAlgebra.issymmetric(M::Union{CuSparseMatrixCSC,CuSparseMatrixCSR}) = false +LinearAlgebra.ishermitian(M::Union{CuSparseMatrixCSC,CuSparseMatrixCSR}) = false +LinearAlgebra.issymmetric(M::Symmetric{CuSparseMatrixCSC}) = true +LinearAlgebra.ishermitian(M::Hermitian{CuSparseMatrixCSC}) = true + +LinearAlgebra.istriu(M::UpperTriangular{T,S}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix} = true +LinearAlgebra.istril(M::UpperTriangular{T,S}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix} = false +LinearAlgebra.istriu(M::LowerTriangular{T,S}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix} = false +LinearAlgebra.istril(M::LowerTriangular{T,S}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix} = true + +Hermitian{T}(Mat::CuSparseMatrix{T}) where T = Hermitian{T,typeof(Mat)}(Mat,'U') -istriu(M::UpperTriangular{T,S}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix} = true -istril(M::UpperTriangular{T,S}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix} = false -istriu(M::LowerTriangular{T,S}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix} = false -istril(M::LowerTriangular{T,S}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix} = true -eltype(g::CuSparseMatrix{T}) where T = T -# getindex (mostly adapted from stdlib/SparseArrays) +## indexing -# Translations -getindex(A::AbstractCuSparseVector, ::Colon) = copy(A) -getindex(A::AbstractCuSparseMatrix, ::Colon, ::Colon) = copy(A) -getindex(A::AbstractCuSparseMatrix, i, ::Colon) = getindex(A, i, 1:size(A, 2)) -getindex(A::AbstractCuSparseMatrix, ::Colon, i) = getindex(A, 1:size(A, 1), i) -getindex(A::AbstractCuSparseMatrix, I::Tuple{Integer,Integer}) = getindex(A, I[1], I[2]) +# translations +Base.getindex(A::AbstractCuSparseVector, ::Colon) = copy(A) +Base.getindex(A::AbstractCuSparseMatrix, ::Colon, ::Colon) = copy(A) +Base.getindex(A::AbstractCuSparseMatrix, i, ::Colon) = getindex(A, i, 1:size(A, 2)) +Base.getindex(A::AbstractCuSparseMatrix, ::Colon, i) = getindex(A, 1:size(A, 1), i) +Base.getindex(A::AbstractCuSparseMatrix, I::Tuple{Integer,Integer}) = getindex(A, I[1], I[2]) -# Column slices -function getindex(x::CuSparseMatrixCSC, ::Colon, j::Integer) +# column slices +function Base.getindex(x::CuSparseMatrixCSC, ::Colon, j::Integer) checkbounds(x, :, j) r1 = convert(Int, x.colPtr[j]) r2 = convert(Int, x.colPtr[j+1]) - 1 - CuSparseVector(x.rowVal[r1:r2], x.nzVal[r1:r2], size(x, 1)) + CuSparseVector(rowvals(x)[r1:r2], nonzeros(x)[r1:r2], size(x, 1)) end -function getindex(x::CuSparseMatrixCSR, i::Integer, ::Colon) +function Base.getindex(x::CuSparseMatrixCSR, i::Integer, ::Colon) checkbounds(x, :, i) c1 = convert(Int, x.rowPtr[i]) c2 = convert(Int, x.rowPtr[i+1]) - 1 - CuSparseVector(x.colVal[c1:c2], x.nzVal[c1:c2], size(x, 2)) + CuSparseVector(x.colVal[c1:c2], nonzeros(x)[c1:c2], size(x, 2)) end -# Row slices -# TODO optimize -getindex(A::CuSparseMatrixCSC, i::Integer, ::Colon) = CuSparseVector(sparse(A[i, 1:end])) -# TODO optimize -getindex(A::CuSparseMatrixCSR, ::Colon, j::Integer) = CuSparseVector(sparse(A[1:end, j])) +# row slices +Base.getindex(A::CuSparseMatrixCSC, i::Integer, ::Colon) = CuSparseVector(sparse(A[i, 1:end])) # TODO: optimize +Base.getindex(A::CuSparseMatrixCSR, ::Colon, j::Integer) = CuSparseVector(sparse(A[1:end, j])) # TODO: optimize -function getindex(A::CuSparseMatrixCSC{T}, i0::Integer, i1::Integer) where T +function Base.getindex(A::CuSparseMatrixCSC{T}, i0::Integer, i1::Integer) where T m, n = size(A) if !(1 <= i0 <= m && 1 <= i1 <= n) throw(BoundsError()) @@ -205,11 +225,11 @@ function getindex(A::CuSparseMatrixCSC{T}, i0::Integer, i1::Integer) where T r1 = Int(A.colPtr[i1]) r2 = Int(A.colPtr[i1+1]-1) (r1 > r2) && return zero(T) - r1 = searchsortedfirst(A.rowVal, i0, r1, r2, Base.Order.Forward) - ((r1 > r2) || (A.rowVal[r1] != i0)) ? zero(T) : A.nzVal[r1] + r1 = searchsortedfirst(rowvals(A), i0, r1, r2, Base.Order.Forward) + ((r1 > r2) || (rowvals(A)[r1] != i0)) ? zero(T) : nonzeros(A)[r1] end -function getindex(A::CuSparseMatrixCSR{T}, i0::Integer, i1::Integer) where T +function Base.getindex(A::CuSparseMatrixCSR{T}, i0::Integer, i1::Integer) where T m, n = size(A) if !(1 <= i0 <= m && 1 <= i1 <= n) throw(BoundsError()) @@ -218,108 +238,140 @@ function getindex(A::CuSparseMatrixCSR{T}, i0::Integer, i1::Integer) where T c2 = Int(A.rowPtr[i0+1]-1) (c1 > c2) && return zero(T) c1 = searchsortedfirst(A.colVal, i1, c1, c2, Base.Order.Forward) - ((c1 > c2) || (A.colVal[c1] != i1)) ? zero(T) : A.nzVal[c1] + ((c1 > c2) || (A.colVal[c1] != i1)) ? zero(T) : nonzeros(A)[c1] end -# Called for indexing into `CuSparseVector`s -function _spgetindex(m::Integer, nzind::CuVector{Ti}, nzval::CuVector{Tv}, - i::Integer) where {Tv,Ti} +function SparseArrays._spgetindex(m::Integer, nzind::CuVector{Ti}, nzval::CuVector{Tv}, + i::Integer) where {Tv,Ti} ii = searchsortedfirst(nzind, convert(Ti, i)) (ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv) end -function collect(Vec::CuSparseVector) - SparseVector(Vec.dims[1], collect(Vec.iPtr), collect(Vec.nzVal)) -end - -function collect(Mat::CuSparseMatrixCSC) - SparseMatrixCSC(Mat.dims[1], Mat.dims[2], collect(Mat.colPtr), collect(Mat.rowVal), collect(Mat.nzVal)) -end -function collect(Mat::CuSparseMatrixCSR) - rowPtr = collect(Mat.rowPtr) - colVal = collect(Mat.colVal) - nzVal = collect(Mat.nzVal) - #construct Is - I = similar(colVal) - counter = 1 - for row = 1 : size(Mat)[1], k = rowPtr[row] : (rowPtr[row+1]-1) - I[counter] = row - counter += 1 - end - return sparse(I,colVal,nzVal,Mat.dims[1],Mat.dims[2]) -end - -summary(g::CuSparseMatrix) = string(g) -summary(g::CuSparseVector) = string(g) - -CuSparseVector(iPtr::Vector{Ti}, nzVal::Vector{T}, dims::Int) where {T<:BlasFloat, Ti<:Integer} = CuSparseVector{T}(CuArray(convert(Vector{Cint},iPtr)), CuArray(nzVal), dims, convert(Cint,length(nzVal))) -CuSparseVector(iPtr::CuArray{Ti}, nzVal::CuArray{T}, dims::Int) where {T<:BlasFloat, Ti<:Integer} = CuSparseVector{T}(iPtr, nzVal, dims, convert(Cint,length(nzVal))) - -CuSparseMatrixCSC(colPtr::Vector{Ti}, rowVal::Vector{Ti}, nzVal::Vector{T}, dims::NTuple{2,Int}) where {T<:BlasFloat,Ti<:Integer} = CuSparseMatrixCSC{T}(CuArray(convert(Vector{Cint},colPtr)), CuArray(convert(Vector{Cint},rowVal)), CuArray(nzVal), dims, convert(Cint,length(nzVal))) -CuSparseMatrixCSC(colPtr::CuArray{Ti}, rowVal::CuArray{Ti}, nzVal::CuArray{T}, dims::NTuple{2,Int}) where {T<:BlasFloat,Ti<:Integer} = CuSparseMatrixCSC{T}(colPtr, rowVal, nzVal, dims, convert(Cint,length(nzVal))) -CuSparseMatrixCSC(colPtr::CuArray{Ti}, rowVal::CuArray{Ti}, nzVal::CuArray{T}, nnz, dims::NTuple{2,Int}) where {T<:BlasFloat,Ti<:Integer} = CuSparseMatrixCSC{T}(colPtr, rowVal, nzVal, dims, nnz) - -CuSparseMatrixCSR(rowPtr::CuArray, colVal::CuArray, nzVal::CuArray{T}, dims::NTuple{2,Int}) where T = CuSparseMatrixCSR{T}(rowPtr, colVal, nzVal, dims, convert(Cint,length(nzVal))) -CuSparseMatrixCSR(rowPtr::CuArray, colVal::CuArray, nzVal::CuArray{T}, nnz, dims::NTuple{2,Int}) where T = CuSparseMatrixCSR{T}(rowPtr, colVal, nzVal, dims, nnz) - -CuSparseMatrixBSR(rowPtr::CuArray, colVal::CuArray, nzVal::CuArray{T}, blockDim, dir, nnz, dims::NTuple{2,Int}) where T = CuSparseMatrixBSR{T}(rowPtr, colVal, nzVal, dims, blockDim, dir, nnz) - -CuSparseVector(Vec::SparseVector) = CuSparseVector(Vec.nzind, Vec.nzval, size(Vec)[1]) -CuSparseMatrixCSC(Vec::SparseVector) = CuSparseMatrixCSC([1], Vec.nzind, Vec.nzval, size(Vec)) -CuSparseVector(Mat::SparseMatrixCSC) = size(Mat,2) == 1 ? CuSparseVector(Mat.rowval, Mat.nzval, size(Mat)[1]) : throw(ArgumentError("The input argument must have a single column")) -CuSparseMatrixCSC(Mat::SparseMatrixCSC) = CuSparseMatrixCSC(Mat.colptr, Mat.rowval, Mat.nzval, size(Mat)) -CuSparseMatrixCSR(Mat::SparseMatrixCSC) = switch2csr(CuSparseMatrixCSC(Mat)) -similar(Vec::CuSparseVector) = CuSparseVector(copy(Vec.iPtr), similar(Vec.nzVal), Vec.dims[1]) -similar(Mat::CuSparseMatrixCSC) = CuSparseMatrixCSC(copy(Mat.colPtr), copy(Mat.rowVal), similar(Mat.nzVal), Mat.nnz, Mat.dims) -similar(Mat::CuSparseMatrixCSR) = CuSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(Mat.nzVal), Mat.nnz, Mat.dims) -similar(Mat::CuSparseMatrixBSR) = CuSparseMatrixBSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(Mat.nzVal), Mat.blockDim, Mat.dir, Mat.nnz, Mat.dims) - -function copyto!(dst::CuSparseVector, src::CuSparseVector) +## interop with sparse CPU arrays + +# cpu to gpu +# NOTE: we eagerly convert the indices to Cint here to avoid additional conversion later on +CuSparseVector{T}(Vec::SparseVector) where {T} = + CuSparseVector(CuVector{Cint}(Vec.nzind), CuVector{T}(Vec.nzval), length(Vec)) +CuSparseVector{T}(Mat::SparseMatrixCSC) where {T} = + size(Mat,2) == 1 ? + CuSparseVector(CuVector{Cint}(Mat.rowval), CuVector{T}(Mat.nzval), size(Mat)[1]) : + throw(ArgumentError("The input argument must have a single column")) +CuSparseMatrixCSC{T}(Vec::SparseVector) where {T} = + CuSparseMatrixCSC{T}(CuVector{Cint}([1]), CuVector{Cint}(Vec.nzind), + CuVector{T}(Vec.nzval), size(Vec)) +CuSparseMatrixCSC{T}(Mat::SparseMatrixCSC) where {T} = + CuSparseMatrixCSC{T}(CuVector{Cint}(Mat.colptr), CuVector{Cint}(Mat.rowval), + CuVector{T}(Mat.nzval), size(Mat)) +CuSparseMatrixCSR{T}(Mat::SparseMatrixCSC) where {T} = CuSparseMatrixCSR(CuSparseMatrixCSC{T}(Mat)) +CuSparseMatrixBSR{T}(Mat::SparseMatrixCSC, blockdim) where {T} = CuSparseMatrixBSR(CuSparseMatrixCSR{T}(Mat), blockdim) + +# untyped variants +CuSparseVector(x::AbstractSparseArray{T}) where {T} = CuSparseVector{T}(x) +CuSparseMatrixCSC(x::AbstractSparseArray{T}) where {T} = CuSparseMatrixCSC{T}(x) +CuSparseMatrixCSR(x::AbstractSparseArray{T}) where {T} = CuSparseMatrixCSR{T}(x) +CuSparseMatrixBSR(x::AbstractSparseArray{T}, blockdim) where {T} = CuSparseMatrixBSR{T}(x, blockdim) + +# gpu to cpu +SparseVector(x::CuSparseVector) = SparseVector(length(x), Array(nonzeroinds(x)), Array(nonzeros(x))) +SparseMatrixCSC(x::CuSparseMatrixCSC) = SparseMatrixCSC(size(x)..., Array(x.colPtr), Array(rowvals(x)), Array(nonzeros(x))) +SparseMatrixCSC(x::CuSparseMatrixCSR) = SparseMatrixCSC(CuSparseMatrixCSC(x)) # no direct conversion + +# collect to Array +Base.collect(x::CuSparseVector) = collect(SparseVector(x)) +Base.collect(x::CuSparseMatrixCSC) = collect(SparseMatrixCSC(x)) +Base.collect(x::CuSparseMatrixCSR) = collect(SparseMatrixCSC(x)) +Base.collect(x::CuSparseMatrixBSR) = collect(CuSparseMatrixCSR(x)) # no direct conversion + +Adapt.adapt_storage(::Type{CuArray}, xs::SparseVector) = CuSparseVector(xs) +Adapt.adapt_storage(::Type{CuArray}, xs::SparseMatrixCSC) = CuSparseMatrixCSC(xs) +Adapt.adapt_storage(::Type{CuArray{T}}, xs::SparseVector) where {T} = CuSparseVector{T}(xs) +Adapt.adapt_storage(::Type{CuArray{T}}, xs::SparseMatrixCSC) where {T} = CuSparseMatrixCSC{T}(xs) + +Adapt.adapt_storage(::CUDA.Float32Adaptor, xs::AbstractSparseArray) = + adapt(CuArray, xs) +Adapt.adapt_storage(::CUDA.Float32Adaptor, xs::AbstractSparseArray{<:AbstractFloat}) = + adapt(CuArray{Float32}, xs) + +Adapt.adapt_storage(::Type{Array}, xs::CuSparseVector) = SparseVector(xs) +Adapt.adapt_storage(::Type{Array}, xs::CuSparseMatrixCSC) = SparseMatrixCSC(xs) + + +## copying between sparse GPU arrays + +function Base.copyto!(dst::CuSparseVector, src::CuSparseVector) if dst.dims != src.dims throw(ArgumentError("Inconsistent Sparse Vector size")) end - copyto!(dst.iPtr, src.iPtr) - copyto!(dst.nzVal, src.nzVal) - dst.nnz = src.nnz + copyto!(nonzeroinds(dst), nonzeroinds(src)) + copyto!(nonzeros(dst), nonzeros(src)) + nnz(dst) = nnz(src) dst end -function copyto!(dst::CuSparseMatrixCSC, src::CuSparseMatrixCSC) +function Base.copyto!(dst::CuSparseMatrixCSC, src::CuSparseMatrixCSC) if dst.dims != src.dims throw(ArgumentError("Inconsistent Sparse Matrix size")) end copyto!(dst.colPtr, src.colPtr) - copyto!(dst.rowVal, src.rowVal) - copyto!(dst.nzVal, src.nzVal) - dst.nnz = src.nnz + copyto!(rowvals(dst), rowvals(src)) + copyto!(nonzeros(dst), nonzeros(src)) + nnz(dst) = nnz(src) dst end -function copyto!(dst::CuSparseMatrixCSR, src::CuSparseMatrixCSR) +function Base.copyto!(dst::CuSparseMatrixCSR, src::CuSparseMatrixCSR) if dst.dims != src.dims throw(ArgumentError("Inconsistent Sparse Matrix size")) end copyto!(dst.rowPtr, src.rowPtr) copyto!(dst.colVal, src.colVal) - copyto!(dst.nzVal, src.nzVal) - dst.nnz = src.nnz + copyto!(nonzeros(dst), nonzeros(src)) + nnz(dst) = nnz(src) dst end -function copyto!(dst::CuSparseMatrixBSR, src::CuSparseMatrixBSR) +function Base.copyto!(dst::CuSparseMatrixBSR, src::CuSparseMatrixBSR) if dst.dims != src.dims throw(ArgumentError("Inconsistent Sparse Matrix size")) end copyto!(dst.rowPtr, src.rowPtr) copyto!(dst.colVal, src.colVal) - copyto!(dst.nzVal, src.nzVal) + copyto!(nonzeros(dst), nonzeros(src)) dst.dir = src.dir - dst.nnz = src.nnz + nnz(dst) = nnz(src) dst end -copy(Vec::CuSparseVector) = copyto!(similar(Vec),Vec) -copy(Mat::CuSparseMatrixCSC) = copyto!(similar(Mat),Mat) -copy(Mat::CuSparseMatrixCSR) = copyto!(similar(Mat),Mat) -copy(Mat::CuSparseMatrixBSR) = copyto!(similar(Mat),Mat) +Base.copy(Vec::CuSparseVector) = copyto!(similar(Vec),Vec) +Base.copy(Mat::CuSparseMatrixCSC) = copyto!(similar(Mat),Mat) +Base.copy(Mat::CuSparseMatrixCSR) = copyto!(similar(Mat),Mat) +Base.copy(Mat::CuSparseMatrixBSR) = copyto!(similar(Mat),Mat) + + +# input/output + +Base.show(io::IOContext, x::CuSparseVector) = + show(io, SparseVector(x)) + +Base.show(io::IOContext, x::CuSparseMatrixCSC) = + show(io, SparseMatrixCSC(x)) + +Base.show(io::IOContext, x::CuSparseMatrixCSR) = + show(io, SparseMatrixCSC(x)) + +Base.show(io::IOContext, x::CuSparseMatrixBSR) = + show(io, CuSparseMatrixCSR(x)) + +Base.show(io::IO, S::AbstractCuSparseMatrix) = Base.show(convert(IOContext, io), S) +function Base.show(io::IO, ::MIME"text/plain", S::AbstractCuSparseMatrix) + xnnz = nnz(S) + m, n = size(S) + print(io, m, "×", n, " ", typeof(S), " with ", xnnz, " stored ", + xnnz == 1 ? "entry" : "entries") + if !(m == 0 || n == 0) + print(io, ":") + show(IOContext(io, :typeinfo => eltype(S)), S) + end +end diff --git a/lib/cusparse/conversions.jl b/lib/cusparse/conversions.jl index 34853a131a..9d40195757 100644 --- a/lib/cusparse/conversions.jl +++ b/lib/cusparse/conversions.jl @@ -1,100 +1,98 @@ # conversion routines between different sparse and dense storage formats -export switch2csr, switch2csc, switch2bsr +SparseArrays.sparse(::CuArray, args...) = error("CUSPARSE supports multiple sparse formats, use specific constructors instead (e.g. CuSparseMatrixCSC)") + + +## CSR to CSC + +# by flipping rows and columns, we can use that to get CSC to CSR too for (fname,elty) in ((:cusparseScsr2csc, :Float32), (:cusparseDcsr2csc, :Float64), (:cusparseCcsr2csc, :ComplexF32), (:cusparseZcsr2csc, :ComplexF64)) @eval begin - function switch2csc(csr::CuSparseMatrixCSR{$elty},inda::SparseChar='O') + function CuSparseMatrixCSC{$elty}(csr::CuSparseMatrixCSR{$elty}; inda::SparseChar='O') m,n = csr.dims colPtr = CUDA.zeros(Cint, n+1) - rowVal = CUDA.zeros(Cint, csr.nnz) - nzVal = CUDA.zeros($elty, csr.nnz) + rowVal = CUDA.zeros(Cint, nnz(csr)) + nzVal = CUDA.zeros($elty, nnz(csr)) if version() >= v"10.2" # TODO: algorithm configuratibility? @workspace size=@argout( - cusparseCsr2cscEx2_bufferSize(handle(), m, n, csr.nnz, csr.nzVal, + cusparseCsr2cscEx2_bufferSize(handle(), m, n, nnz(csr), nonzeros(csr), csr.rowPtr, csr.colVal, nzVal, colPtr, rowVal, $elty, CUSPARSE_ACTION_NUMERIC, inda, CUSPARSE_CSR2CSC_ALG1, out(Ref{Csize_t}(1))) )[] buffer->begin - cusparseCsr2cscEx2(handle(), m, n, csr.nnz, csr.nzVal, + cusparseCsr2cscEx2(handle(), m, n, nnz(csr), nonzeros(csr), csr.rowPtr, csr.colVal, nzVal, colPtr, rowVal, $elty, CUSPARSE_ACTION_NUMERIC, inda, CUSPARSE_CSR2CSC_ALG1, buffer) end else - $fname(handle(), m, n, csr.nnz, csr.nzVal, + $fname(handle(), m, n, nnz(csr), nonzeros(csr), csr.rowPtr, csr.colVal, nzVal, rowVal, colPtr, CUSPARSE_ACTION_NUMERIC, inda) end - csc = CuSparseMatrixCSC(colPtr,rowVal,nzVal,csr.nnz,csr.dims) - csc + CuSparseMatrixCSC(colPtr,rowVal,nzVal,csr.dims) end - function switch2csr(csc::CuSparseMatrixCSC{$elty},inda::SparseChar='O') + + function CuSparseMatrixCSR{$elty}(csc::CuSparseMatrixCSC{$elty}; inda::SparseChar='O') m,n = csc.dims rowPtr = CUDA.zeros(Cint,m+1) - colVal = CUDA.zeros(Cint,csc.nnz) - nzVal = CUDA.zeros($elty,csc.nnz) + colVal = CUDA.zeros(Cint,nnz(csc)) + nzVal = CUDA.zeros($elty,nnz(csc)) if version() >= v"10.2" # TODO: algorithm configuratibility? @workspace size=@argout( - cusparseCsr2cscEx2_bufferSize(handle(), n, m, csc.nnz, csc.nzVal, - csc.colPtr, csc.rowVal, nzVal, rowPtr, colVal, + cusparseCsr2cscEx2_bufferSize(handle(), n, m, nnz(csc), nonzeros(csc), + csc.colPtr, rowvals(csc), nzVal, rowPtr, colVal, $elty, CUSPARSE_ACTION_NUMERIC, inda, CUSPARSE_CSR2CSC_ALG1, out(Ref{Csize_t}(1))) )[] buffer->begin - cusparseCsr2cscEx2(handle(), n, m, csc.nnz, csc.nzVal, - csc.colPtr, csc.rowVal, nzVal, rowPtr, colVal, + cusparseCsr2cscEx2(handle(), n, m, nnz(csc), nonzeros(csc), + csc.colPtr, rowvals(csc), nzVal, rowPtr, colVal, $elty, CUSPARSE_ACTION_NUMERIC, inda, CUSPARSE_CSR2CSC_ALG1, buffer) end else - $fname(handle(), n, m, csc.nnz, csc.nzVal, - csc.colPtr, csc.rowVal, nzVal, colVal, + $fname(handle(), n, m, nnz(csc), nonzeros(csc), + csc.colPtr, rowvals(csc), nzVal, colVal, rowPtr, CUSPARSE_ACTION_NUMERIC, inda) end - csr = CuSparseMatrixCSR(rowPtr,colVal,nzVal,csc.nnz,csc.dims) - csr + CuSparseMatrixCSR(rowPtr,colVal,nzVal,csc.dims) end end end + +## CSR to BSR and vice-versa + for (fname,elty) in ((:cusparseScsr2bsr, :Float32), (:cusparseDcsr2bsr, :Float64), (:cusparseCcsr2bsr, :ComplexF32), (:cusparseZcsr2bsr, :ComplexF64)) @eval begin - function switch2bsr(csr::CuSparseMatrixCSR{$elty}, - blockDim::Cint, - dir::SparseChar='R', - inda::SparseChar='O', - indc::SparseChar='O') + function CuSparseMatrixBSR{$elty}(csr::CuSparseMatrixCSR{$elty}, blockDim::Integer; + dir::SparseChar='R', inda::SparseChar='O', + indc::SparseChar='O') m,n = csr.dims - nnz = Ref{Cint}(1) + nnz_ref = Ref{Cint}(1) mb = div((m + blockDim - 1),blockDim) nb = div((n + blockDim - 1),blockDim) bsrRowPtr = CUDA.zeros(Cint,mb + 1) cudesca = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, inda) cudescc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, indc) cusparseXcsr2bsrNnz(handle(), dir, m, n, cudesca, csr.rowPtr, - csr.colVal, blockDim, cudescc, bsrRowPtr, nnz) - bsrNzVal = CUDA.zeros($elty, nnz[] * blockDim * blockDim ) - bsrColInd = CUDA.zeros(Cint, nnz[]) + csr.colVal, blockDim, cudescc, bsrRowPtr, nnz_ref) + bsrNzVal = CUDA.zeros($elty, nnz_ref[] * blockDim * blockDim ) + bsrColInd = CUDA.zeros(Cint, nnz_ref[]) $fname(handle(), dir, m, n, - cudesca, csr.nzVal, csr.rowPtr, csr.colVal, + cudesca, nonzeros(csr), csr.rowPtr, csr.colVal, blockDim, cudescc, bsrNzVal, bsrRowPtr, bsrColInd) - CuSparseMatrixBSR{$elty}(bsrRowPtr, bsrColInd, bsrNzVal, csr.dims, blockDim, dir, nnz[]) - end - function switch2bsr(csc::CuSparseMatrixCSC{$elty}, - blockDim::Cint, - dir::SparseChar='R', - inda::SparseChar='O', - indc::SparseChar='O') - switch2bsr(switch2csr(csc),blockDim,dir,inda,indc) + CuSparseMatrixBSR{$elty}(bsrRowPtr, bsrColInd, bsrNzVal, csr.dims, blockDim, dir, nnz_ref[]) end end end @@ -104,58 +102,52 @@ for (fname,elty) in ((:cusparseSbsr2csr, :Float32), (:cusparseCbsr2csr, :ComplexF32), (:cusparseZbsr2csr, :ComplexF64)) @eval begin - function switch2csr(bsr::CuSparseMatrixBSR{$elty}, - inda::SparseChar='O', - indc::SparseChar='O') + function CuSparseMatrixCSR{$elty}(bsr::CuSparseMatrixBSR{$elty}; + inda::SparseChar='O', indc::SparseChar='O') m,n = bsr.dims mb = div(m,bsr.blockDim) nb = div(n,bsr.blockDim) - nnz = bsr.nnz * bsr.blockDim * bsr.blockDim + nnzVal = nnz(bsr) * bsr.blockDim * bsr.blockDim cudesca = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, inda) cudescc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, indc) csrRowPtr = CUDA.zeros(Cint, m + 1) - csrColInd = CUDA.zeros(Cint, nnz) - csrNzVal = CUDA.zeros($elty, nnz) + csrColInd = CUDA.zeros(Cint, nnzVal) + csrNzVal = CUDA.zeros($elty, nnzVal) $fname(handle(), bsr.dir, mb, nb, - cudesca, bsr.nzVal, bsr.rowPtr, bsr.colVal, + cudesca, nonzeros(bsr), bsr.rowPtr, bsr.colVal, bsr.blockDim, cudescc, csrNzVal, csrRowPtr, csrColInd) - CuSparseMatrixCSR(csrRowPtr, csrColInd, csrNzVal, convert(Cint,nnz), bsr.dims) - end - function switch2csc(bsr::CuSparseMatrixBSR{$elty}, - inda::SparseChar='O', - indc::SparseChar='O') - switch2csc(switch2csr(bsr,inda,indc)) + CuSparseMatrixCSR(csrRowPtr, csrColInd, csrNzVal, bsr.dims) end end end + +## sparse to dense, and vice-versa + for (cname,rname,elty) in ((:cusparseScsc2dense, :cusparseScsr2dense, :Float32), (:cusparseDcsc2dense, :cusparseDcsr2dense, :Float64), (:cusparseCcsc2dense, :cusparseCcsr2dense, :ComplexF32), (:cusparseZcsc2dense, :cusparseZcsr2dense, :ComplexF64)) @eval begin - function Base.Array(csr::CuSparseMatrixCSR{$elty},ind::SparseChar='O') + function CUDA.CuMatrix{$elty}(csr::CuSparseMatrixCSR{$elty}; ind::SparseChar='O') m,n = csr.dims denseA = CUDA.zeros($elty,m,n) lda = max(1,stride(denseA,2)) cudesc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, ind) - $rname(handle(), m, n, cudesc, csr.nzVal, + $rname(handle(), m, n, cudesc, nonzeros(csr), csr.rowPtr, csr.colVal, denseA, lda) denseA end - function Base.Array(csc::CuSparseMatrixCSC{$elty},ind::SparseChar='O') + function CUDA.CuMatrix{$elty}(csc::CuSparseMatrixCSC{$elty}; ind::SparseChar='O') m,n = csc.dims denseA = CUDA.zeros($elty,m,n) lda = max(1,stride(denseA,2)) cudesc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, ind) - $cname(handle(), m, n, cudesc, csc.nzVal, - csc.rowVal, csc.colPtr, denseA, lda) + $cname(handle(), m, n, cudesc, nonzeros(csc), + rowvals(csc), csc.colPtr, denseA, lda) denseA end - function Base.Array(bsr::CuSparseMatrixBSR{$elty},ind::SparseChar='O') - Array(switch2csr(bsr,ind)) - end end end @@ -164,60 +156,54 @@ for (nname,cname,rname,elty) in ((:cusparseSnnz, :cusparseSdense2csc, :cusparseS (:cusparseCnnz, :cusparseCdense2csc, :cusparseCdense2csr, :ComplexF32), (:cusparseZnnz, :cusparseZdense2csc, :cusparseZdense2csr, :ComplexF64)) @eval begin - function sparse(A::CuMatrix{$elty},fmt::SparseChar='R',ind::SparseChar='O') - dir = 'R' - if fmt == 'C' - dir = fmt - end + function CuSparseMatrixCSR(A::CuMatrix{$elty}; ind::SparseChar='O') m,n = size(A) - lda = max(1,stride(A,2)) - cudesc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, ind) - nnzRowCol = CUDA.zeros(Cint, fmt == 'R' ? m : n) + lda = max(1, stride(A,2)) + cudesc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, + CUSPARSE_FILL_MODE_LOWER, + CUSPARSE_DIAG_TYPE_NON_UNIT, ind) + nnzRowCol = CUDA.zeros(Cint, m) nnzTotal = Ref{Cint}(1) $nname(handle(), - dir, m, n, cudesc, A, lda, nnzRowCol, + 'R', m, n, cudesc, A, lda, nnzRowCol, nnzTotal) nzVal = CUDA.zeros($elty,nnzTotal[]) - if(fmt == 'R') - rowPtr = CUDA.zeros(Cint,m+1) - colInd = CUDA.zeros(Cint,nnzTotal[]) - $rname(handle(), m, n, cudesc, A, - lda, nnzRowCol, nzVal, rowPtr, colInd) - return CuSparseMatrixCSR(rowPtr,colInd,nzVal,nnzTotal[],size(A)) - end - if(fmt == 'C') - colPtr = CUDA.zeros(Cint,n+1) - rowInd = CUDA.zeros(Cint,nnzTotal[]) - $cname(handle(), m, n, cudesc, A, - lda, nnzRowCol, nzVal, rowInd, colPtr) - return CuSparseMatrixCSC(colPtr,rowInd,nzVal,nnzTotal[],size(A)) - end - if(fmt == 'B') - return switch2bsr(sparse(A,'R',ind),convert(Cint,gcd(m,n))) - end - end - end -end - -""" - switch2csr(csr::CuSparseMatrixCSR, inda::SparseChar='O') -Convert a `CuSparseMatrixCSR` to the compressed sparse column format. -""" -switch2csc(csr::CuSparseMatrixCSR, inda::SparseChar='O') + rowPtr = CUDA.zeros(Cint,m+1) + colInd = CUDA.zeros(Cint,nnzTotal[]) + $rname(handle(), m, n, cudesc, A, + lda, nnzRowCol, nzVal, rowPtr, colInd) + return CuSparseMatrixCSR(rowPtr,colInd,nzVal,size(A)) + end -""" - switch2csr(csc::CuSparseMatrixCSC, inda::SparseChar='O') + function CuSparseMatrixCSC(A::CuMatrix{$elty}; ind::SparseChar='O') + m,n = size(A) + lda = max(1, stride(A,2)) + cudesc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, + CUSPARSE_FILL_MODE_LOWER, + CUSPARSE_DIAG_TYPE_NON_UNIT, ind) + nnzRowCol = CUDA.zeros(Cint, n) + nnzTotal = Ref{Cint}(1) + $nname(handle(), + 'C', m, n, cudesc, A, lda, nnzRowCol, + nnzTotal) + nzVal = CUDA.zeros($elty,nnzTotal[]) -Convert a `CuSparseMatrixCSC` to the compressed sparse row format. -""" -switch2csr(csc::CuSparseMatrixCSC, inda::SparseChar='O') + colPtr = CUDA.zeros(Cint,n+1) + rowInd = CUDA.zeros(Cint,nnzTotal[]) + $cname(handle(), m, n, cudesc, A, + lda, nnzRowCol, nzVal, rowInd, colPtr) + return CuSparseMatrixCSC(colPtr,rowInd,nzVal,size(A)) + end + end +end -""" - switch2bsr(csr::CuSparseMatrixCSR, blockDim::Cint, dir::SparseChar='R', inda::SparseChar='O', indc::SparseChar='O') +function CUDA.CuMatrix{T}(bsr::CuSparseMatrixBSR{T}; inda::SparseChar='O', + indc::SparseChar='O') where {T} + CuMatrix{T}(CuSparseMatrixCSR{T}(bsr; inda, indc)) +end -Convert a `CuSparseMatrixCSR` to the compressed block sparse row format. `blockDim` sets the -block dimension of the compressed sparse blocks and `indc` determines whether the new matrix -will be one- or zero-indexed. -""" -switch2bsr(csr::CuSparseMatrixCSR, blockDim::Cint, dir::SparseChar='R', inda::SparseChar='O', indc::SparseChar='O') +function CuSparseMatrixBSR(A::CuMatrix; ind::SparseChar='O') + m,n = size(A) # TODO: always let the user choose, or provide defaults for other methods too + CuSparseMatrixBSR(CuSparseMatrixCSR(A; ind), gcd(m,n)) +end diff --git a/lib/cusparse/generic.jl b/lib/cusparse/generic.jl index 21b2c0fa45..7ac6b24272 100644 --- a/lib/cusparse/generic.jl +++ b/lib/cusparse/generic.jl @@ -5,10 +5,10 @@ mutable struct CuDenseVectorDescriptor handle::cusparseDnVecDescr_t - function CuDenseVectorDescriptor(v::CuVector{T}) where {T} - vec_ref = Ref{cusparseDnVecDescr_t}() - cusparseCreateDnVec(vec_ref, length(v), v, T) - obj = new(vec_ref[]) + function CuDenseVectorDescriptor(x::CuVector) + desc_ref = Ref{cusparseDnVecDescr_t}() + cusparseCreateDnVec(desc_ref, length(x), x, eltype(x)) + obj = new(desc_ref[]) finalizer(cusparseDestroyDnVec, obj) obj end @@ -17,61 +17,181 @@ end Base.unsafe_convert(::Type{cusparseDnVecDescr_t}, desc::CuDenseVectorDescriptor) = desc.handle +## dense matrix descriptor + +mutable struct CuDenseMatrixDescriptor + handle::cusparseDnMatDescr_t + + function CuDenseMatrixDescriptor(x::CuMatrix) + desc_ref = Ref{cusparseDnMatDescr_t}() + cusparseCreateDnMat(desc_ref, size(x)..., stride(x,2), x, eltype(x), CUSPARSE_ORDER_COL) + obj = new(desc_ref[]) + finalizer(cusparseDestroyDnMat, obj) + obj + end +end + +Base.unsafe_convert(::Type{cusparseDnMatDescr_t}, desc::CuDenseMatrixDescriptor) = desc.handle + + ## sparse matrix descriptor mutable struct CuSparseMatrixDescriptor handle::cusparseSpMatDescr_t -end -function CuSparseMatrixDescriptor(A::CuSparseMatrixCSR{T}) where {T} - desc_ref = Ref{cusparseSpMatDescr_t}() - cusparseCreateCsr( - desc_ref, - A.dims..., length(A.nzVal), - A.rowPtr, A.colVal, A.nzVal, - eltype(A.rowPtr), eltype(A.colVal), 'O', eltype(A.nzVal) - ) - obj = CuSparseMatrixDescriptor(desc_ref[]) - finalizer(cusparseDestroySpMat, obj) - return obj + function CuSparseMatrixDescriptor(A::CuSparseMatrixCSR) + desc_ref = Ref{cusparseSpMatDescr_t}() + cusparseCreateCsr( + desc_ref, + A.dims..., length(nonzeros(A)), + A.rowPtr, A.colVal, nonzeros(A), + eltype(A.rowPtr), eltype(A.colVal), 'O', eltype(nonzeros(A)) + ) + obj = new(desc_ref[]) + finalizer(cusparseDestroySpMat, obj) + return obj + end + + function CuSparseMatrixDescriptor(A::CuSparseMatrixCSC) + desc_ref = Ref{cusparseSpMatDescr_t}() + cusparseCreateCsr( + desc_ref, + reverse(A.dims)..., length(nonzeros(A)), + A.colPtr, rowvals(A), nonzeros(A), + eltype(A.colPtr), eltype(rowvals(A)), 'O', eltype(nonzeros(A)) + ) + obj = new(desc_ref[]) + finalizer(cusparseDestroySpMat, obj) + return obj + end end Base.unsafe_convert(::Type{cusparseSpMatDescr_t}, desc::CuSparseMatrixDescriptor) = desc.handle -## SpMV - -function mv!( - transa::SparseChar, - alpha::T, - A::CuSparseMatrixCSR{T}, - X::CuVector{T}, - beta::T, - Y::CuVector{T}, - index::SparseChar -) where {T} +## API functions +function mv!(transa::SparseChar, alpha::Number, A::Union{CuSparseMatrixBSR{T},CuSparseMatrixCSR{T}}, X::CuVector{T}, + beta::Number, Y::CuVector{T}, index::SparseChar) where {T} m,n = size(A) if transa == 'N' chkmvdims(X,n,Y,m) + elseif transa == 'T' || transa == 'C' + chkmvdims(X,m,Y,n) + end + + descA = CuSparseMatrixDescriptor(A) + descX = CuDenseVectorDescriptor(X) + descY = CuDenseVectorDescriptor(Y) + + @workspace size=@argout( + cusparseSpMV_bufferSize(handle(), transa, Ref{T}(alpha), descA, descX, Ref{T}(beta), + descY, T, CUSPARSE_MV_ALG_DEFAULT, out(Ref{Csize_t}())) + )[] buffer->begin + cusparseSpMV(handle(), transa, Ref{T}(alpha), descA, descX, Ref{T}(beta), + descY, T, CUSPARSE_MV_ALG_DEFAULT, buffer) + end + Y +end + +function mv!(transa::SparseChar, alpha::Number, A::CuSparseMatrixCSC{T}, X::CuVector{T}, + beta::Number, Y::CuVector{T}, index::SparseChar) where {T} + ctransa = 'N' + if transa == 'N' + ctransa = 'T' end - if transa == 'T' || transa == 'C' + # TODO: conjugate transpose? + + n,m = size(A) + + if ctransa == 'N' + chkmvdims(X,n,Y,m) + elseif ctransa == 'T' || ctransa == 'C' chkmvdims(X,m,Y,n) end - cusparseSpMV( - handle(), - transa, - [alpha], - CuSparseMatrixDescriptor(A), - CuDenseVectorDescriptor(X), - [beta], - CuDenseVectorDescriptor(Y), - T, - CUSPARSE_MV_ALG_DEFAULT, - CU_NULL - ) + descA = CuSparseMatrixDescriptor(A) + descX = CuDenseVectorDescriptor(X) + descY = CuDenseVectorDescriptor(Y) - Y + @workspace size=@argout( + cusparseSpMV_bufferSize(handle(), ctransa, Ref{T}(alpha), descA, descX, Ref{T}(beta), + descY, T, CUSPARSE_MV_ALG_DEFAULT, out(Ref{Csize_t}())) + )[] buffer->begin + cusparseSpMV(handle(), ctransa, Ref{T}(alpha), descA, descX, Ref{T}(beta), + descY, T, CUSPARSE_MV_ALG_DEFAULT, buffer) + end + + return Y +end + +function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparseMatrixCSR{T}, + B::CuMatrix{T}, beta::Number, C::CuMatrix{T}, index::SparseChar) where {T} + m,k = size(A) + n = size(C)[2] + + if transa == 'N' && transb == 'N' + chkmmdims(B,C,k,n,m,n) + elseif transa == 'N' && transb != 'N' + chkmmdims(B,C,n,k,m,n) + elseif transa != 'N' && transb == 'N' + chkmmdims(B,C,m,n,k,n) + elseif transa != 'N' && transb != 'N' + chkmmdims(B,C,n,m,k,n) + end + + descA = CuSparseMatrixDescriptor(A) + descB = CuDenseMatrixDescriptor(B) + descC = CuDenseMatrixDescriptor(C) + + @workspace size=@argout( + cusparseSpMM_bufferSize( + handle(), transa, transb, Ref{T}(alpha), descA, descB, Ref{T}(beta), + descC, T, CUSPARSE_MM_ALG_DEFAULT, out(Ref{Csize_t}())) + )[] buffer->begin + cusparseSpMM( + handle(), transa, transb, Ref{T}(alpha), descA, descB, Ref{T}(beta), + descC, T, CUSPARSE_MM_ALG_DEFAULT, buffer) + end + + return C +end + +function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparseMatrixCSC{T}, + B::CuMatrix{T}, beta::Number, C::CuMatrix{T}, index::SparseChar) where {T} + ctransa = 'N' + if transa == 'N' + ctransa = 'T' + end + # TODO: conjugate transpose? + + k,m = size(A) + n = size(C)[2] + + if ctransa == 'N' && transb == 'N' + chkmmdims(B,C,k,n,m,n) + elseif ctransa == 'N' && transb != 'N' + chkmmdims(B,C,n,k,m,n) + elseif ctransa != 'N' && transb == 'N' + chkmmdims(B,C,m,n,k,n) + elseif ctransa != 'N' && transb != 'N' + chkmmdims(B,C,n,m,k,n) + end + + descA = CuSparseMatrixDescriptor(A) + descB = CuDenseMatrixDescriptor(B) + descC = CuDenseMatrixDescriptor(C) + + @workspace size=@argout( + cusparseSpMM_bufferSize( + handle(), ctransa, transb, Ref{T}(alpha), descA, descB, Ref{T}(beta), + descC, T, CUSPARSE_MM_ALG_DEFAULT, out(Ref{Csize_t}())) + )[] buffer->begin + cusparseSpMM( + handle(), ctransa, transb, Ref{T}(alpha), descA, descB, Ref{T}(beta), + descC, T, CUSPARSE_MM_ALG_DEFAULT, buffer) + end + + return C end diff --git a/lib/cusparse/interfaces.jl b/lib/cusparse/interfaces.jl index 9fb850ab35..239dacacb8 100644 --- a/lib/cusparse/interfaces.jl +++ b/lib/cusparse/interfaces.jl @@ -3,29 +3,43 @@ using LinearAlgebra using LinearAlgebra: BlasFloat -Base.:(\)(A::Union{UpperTriangular{T, S},LowerTriangular{T, S}}, B::CuMatrix{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sm('N',A,B,'O') +function mv_wrapper(transa::SparseChar, alpha::Number, A::CuSparseMatrix{T}, X::CuVector{T}, + beta::Number, Y::CuVector{T}) where {T} + mv!(transa, alpha, A, X, beta, Y, 'O') +end + +LinearAlgebra.mul!(C::CuVector{T},A::CuSparseMatrix,B::CuVector,alpha::Number,beta::Number) where {T} = mv_wrapper('N',alpha,A,B,beta,C) +LinearAlgebra.mul!(C::CuVector{T},transA::Transpose{<:Any,<:CuSparseMatrix},B::CuVector,alpha::Number,beta::Number) where {T} = mv_wrapper('T',alpha,parent(transA),B,beta,C) +LinearAlgebra.mul!(C::CuVector{T},adjA::Adjoint{<:Any,<:CuSparseMatrix},B::CuVector,alpha::Number,beta::Number) where {T} = mv_wrapper('C',alpha,parent(adjA),B,beta,C) +LinearAlgebra.mul!(C::CuVector{T},A::HermOrSym{T,<:CuSparseMatrix{T}},B::CuVector{T},alpha::Number,beta::Number) where T = mv_wrapper('N',alpha,A,B,beta,C) +LinearAlgebra.mul!(C::CuVector{T},transA::Transpose{<:Any, <:HermOrSym{T,<:CuSparseMatrix{T}}},B::CuVector{T},alpha::Number,beta::Number) where {T} = mv_wrapper('T',alpha,parent(transA),B,beta,C) +LinearAlgebra.mul!(C::CuVector{T},adjA::Adjoint{<:Any, <:HermOrSym{T,<:CuSparseMatrix{T}}},B::CuVector{T},alpha::Number,beta::Number) where {T} = mv_wrapper('C',alpha,parent(adjA),B,beta,C) + +function mm_wrapper(transa::SparseChar, transb::SparseChar, alpha::Number, + A::CuSparseMatrix{T}, B::CuMatrix{T}, beta::Number, C::CuMatrix{T}) where {T} + if version() < v"10.3.1" && A isa CuSparseMatrixCSR + # generic mm! doesn't work on CUDA 10.1 with CSC matrices + return mm2!(transa, transb, alpha, A, B, beta, C, 'O') + end + mm!(transa, transb, alpha, A, B, beta, C, 'O') +end + +LinearAlgebra.mul!(C::CuMatrix{T},A::CuSparseMatrix{T},B::CuMatrix{T},alpha::Number,beta::Number) where {T} = mm_wrapper('N','N',alpha,A,B,beta,C) +LinearAlgebra.mul!(C::CuMatrix{T},A::CuSparseMatrix{T},transB::Transpose{<:Any, <:CuMatrix{T}},alpha::Number,beta::Number) where {T} = mm_wrapper('N','T',alpha,A,parent(transB),beta,C) +LinearAlgebra.mul!(C::CuMatrix{T},transA::Transpose{<:Any, <:CuSparseMatrix{T}},B::CuMatrix{T},alpha::Number,beta::Number) where {T} = mm_wrapper('T','N',alpha,parent(transA),B,beta,C) +LinearAlgebra.mul!(C::CuMatrix{T},transA::Transpose{<:Any, <:CuSparseMatrix{T}},transB::Transpose{<:Any, <:CuMatrix{T}},alpha::Number,beta::Number) where {T} = mm_wrapper('T','T',alpha,parent(transA),parent(transB),beta,C) +LinearAlgebra.mul!(C::CuMatrix{T},adjA::Adjoint{<:Any, <:CuSparseMatrix{T}},B::CuMatrix{T},alpha::Number,beta::Number) where {T} = mm_wrapper('C','N',alpha,parent(adjA),B,beta,C) + +LinearAlgebra.mul!(C::CuMatrix{T},A::HermOrSym{<:Number, <:CuSparseMatrix},B::CuMatrix,alpha::Number,beta::Number) where {T} = mm_wrapper('N',alpha,A,B,beta,C) +LinearAlgebra.mul!(C::CuMatrix{T},transA::Transpose{<:Any, <:HermOrSym{<:Number, <:CuSparseMatrix}},B::CuMatrix,alpha::Number,beta::Number) where {T} = mm_wrapper('T',alpha,parent(transA),B,beta,C) +LinearAlgebra.mul!(C::CuMatrix{T},adjA::Adjoint{<:Any, <:HermOrSym{<:Number, <:CuSparseMatrix}},B::CuMatrix,alpha::Number,beta::Number) where {T} = mm_wrapper('C',alpha,parent(adjA),B,beta,C) + +Base.:(\)(A::Union{UpperTriangular{T, S},LowerTriangular{T, S}}, B::CuMatrix{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sm('N',A,B,'O') Base.:(\)(transA::Transpose{T, UpperTriangular{T, S}}, B::CuMatrix{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sm('T',parent(transA),B,'O') Base.:(\)(transA::Transpose{T, LowerTriangular{T, S}}, B::CuMatrix{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sm('T',parent(transA),B,'O') Base.:(\)(adjA::Adjoint{T, UpperTriangular{T, S}},B::CuMatrix{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sm('C',parent(adjA),B,'O') Base.:(\)(adjA::Adjoint{T, LowerTriangular{T, S}},B::CuMatrix{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sm('C',parent(adjA),B,'O') -LinearAlgebra.mul!(C::CuVector{T},A::CuSparseMatrix,B::CuVector) where {T} = mv!('N',one(T),A,B,zero(T),C,'O') -LinearAlgebra.mul!(C::CuVector{T},transA::Transpose{<:Any,<:CuSparseMatrix},B::CuVector) where {T} = mv!('T',one(T),parent(transA),B,zero(T),C,'O') -LinearAlgebra.mul!(C::CuVector{T},adjA::Adjoint{<:Any,<:CuSparseMatrix},B::CuVector) where {T} = mv!('C',one(T),parent(adjA),B,zero(T),C,'O') -LinearAlgebra.mul!(C::CuVector{T},A::HermOrSym{T,<:CuSparseMatrix{T}},B::CuVector{T}) where T = mv!('N',one(T),A,B,zero(T),C,'O') -LinearAlgebra.mul!(C::CuVector{T},transA::Transpose{<:Any, <:HermOrSym{T,<:CuSparseMatrix{T}}},B::CuVector{T}) where {T} = mv!('T',one(T),parent(transA),B,zero(T),C,'O') -LinearAlgebra.mul!(C::CuVector{T},adjA::Adjoint{<:Any, <:HermOrSym{T,<:CuSparseMatrix{T}}},B::CuVector{T}) where {T} = mv!('C',one(T),parent(adjA),B,zero(T),C,'O') - -LinearAlgebra.mul!(C::CuMatrix{T},A::CuSparseMatrix{T},B::CuMatrix{T}) where {T} = mm2!('N','N',one(T),A,B,zero(T),C,'O') -LinearAlgebra.mul!(C::CuMatrix{T},A::CuSparseMatrix{T},transB::Transpose{<:Any, <:CuMatrix{T}}) where {T} = mm2!('N','T',one(T),A,parent(transB),zero(T),C,'O') -LinearAlgebra.mul!(C::CuMatrix{T},transA::Transpose{<:Any, <:CuSparseMatrix{T}},B::CuMatrix{T}) where {T} = mm2!('T','N',one(T),parent(transA),B,zero(T),C,'O') -LinearAlgebra.mul!(C::CuMatrix{T},transA::Transpose{<:Any, <:CuSparseMatrix{T}},transB::Transpose{<:Any, <:CuMatrix{T}}) where {T} = mm2!('T','T',one(T),parent(transA),parent(transB),zero(T),C,'O') -LinearAlgebra.mul!(C::CuMatrix{T},adjA::Adjoint{<:Any, <:CuSparseMatrix{T}},B::CuMatrix{T}) where {T} = mm2!('C','N',one(T),parent(adjA),B,zero(T),C,'O') - -LinearAlgebra.mul!(C::CuMatrix{T},A::HermOrSym{<:Number, <:CuSparseMatrix},B::CuMatrix) where {T} = mm!('N',one(T),A,B,zero(T),C,'O') -LinearAlgebra.mul!(C::CuMatrix{T},transA::Transpose{<:Any, <:HermOrSym{<:Number, <:CuSparseMatrix}},B::CuMatrix) where {T} = mm!('T',one(T),parent(transA),B,zero(T),C,'O') -LinearAlgebra.mul!(C::CuMatrix{T},adjA::Adjoint{<:Any, <:HermOrSym{<:Number, <:CuSparseMatrix}},B::CuMatrix) where {T} = mm!('C',one(T),parent(adjA),B,zero(T),C,'O') - Base.:(\)(A::Union{UpperTriangular{T, S},LowerTriangular{T, S}}, B::CuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('N',A,B,'O') Base.:(\)(transA::Transpose{T, UpperTriangular{T, S}},B::CuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('T',parent(transA),B,'O') Base.:(\)(transA::Transpose{T, LowerTriangular{T, S}},B::CuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('T',parent(transA),B,'O') diff --git a/lib/cusparse/level1.jl b/lib/cusparse/level1.jl index 01d9173338..31cba6b0f3 100644 --- a/lib/cusparse/level1.jl +++ b/lib/cusparse/level1.jl @@ -14,14 +14,14 @@ for (fname,elty) in ((:cusparseSaxpyi, :Float32), (:cusparseCaxpyi, :ComplexF32), (:cusparseZaxpyi, :ComplexF64)) @eval begin - function axpyi!(alpha::$elty, + function axpyi!(alpha::Number, X::CuSparseVector{$elty}, Y::CuVector{$elty}, index::SparseChar) - $fname(handle(), X.nnz, [alpha], X.nzVal, X.iPtr, Y, index) + $fname(handle(), nnz(X), Ref{$elty}(alpha), nonzeros(X), nonzeroinds(X), Y, index) Y end - function axpyi(alpha::$elty, + function axpyi(alpha::Number, X::CuSparseVector{$elty}, Y::CuVector{$elty}, index::SparseChar) @@ -49,7 +49,7 @@ for (fname,elty) in ((:cusparseSgthr, :Float32), function gthr!(X::CuSparseVector{$elty}, Y::CuVector{$elty}, index::SparseChar) - $fname(handle(), X.nnz, Y, X.nzVal, X.iPtr, index) + $fname(handle(), nnz(X), Y, nonzeros(X), nonzeroinds(X), index) X end function gthr(X::CuSparseVector{$elty}, @@ -74,7 +74,7 @@ for (fname,elty) in ((:cusparseSgthrz, :Float32), function gthrz!(X::CuSparseVector{$elty}, Y::CuVector{$elty}, index::SparseChar) - $fname(handle(), X.nnz, Y, X.nzVal, X.iPtr, index) + $fname(handle(), nnz(X), Y, nonzeros(X), nonzeroinds(X), index) X,Y end function gthrz(X::CuSparseVector{$elty}, @@ -96,16 +96,16 @@ for (fname,elty) in ((:cusparseSroti, :Float32), @eval begin function roti!(X::CuSparseVector{$elty}, Y::CuVector{$elty}, - c::$elty, - s::$elty, + c::Number, + s::Number, index::SparseChar) - $fname(handle(), X.nnz, X.nzVal, X.iPtr, Y, [c], [s], index) + $fname(handle(), nnz(X), nonzeros(X), nonzeroinds(X), Y, Ref{$elty}(c), Ref{$elty}(s), index) X,Y end function roti(X::CuSparseVector{$elty}, Y::CuVector{$elty}, - c::$elty, - s::$elty, + c::Number, + s::Number, index::SparseChar) roti!(copy(X),copy(Y),c,s,index) end @@ -126,7 +126,7 @@ for (fname,elty) in ((:cusparseSsctr, :Float32), function sctr!(X::CuSparseVector{$elty}, Y::CuVector{$elty}, index::SparseChar) - $fname(handle(), X.nnz, X.nzVal, X.iPtr, Y, index) + $fname(handle(), nnz(X), nonzeros(X), nonzeroinds(X), Y, index) Y end function sctr(X::CuSparseVector{$elty}, diff --git a/lib/cusparse/level2.jl b/lib/cusparse/level2.jl index 286b4f636f..fb3b860b66 100644 --- a/lib/cusparse/level2.jl +++ b/lib/cusparse/level2.jl @@ -19,10 +19,10 @@ for (fname,elty) in ((:cusparseSbsrmv, :Float32), (:cusparseZbsrmv, :ComplexF64)) @eval begin function mv!(transa::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixBSR{$elty}, X::CuVector{$elty}, - beta::$elty, + beta::Number, Y::CuVector{$elty}, index::SparseChar) desc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, index) @@ -36,8 +36,8 @@ for (fname,elty) in ((:cusparseSbsrmv, :Float32), chkmvdims(X,m,Y,n) end $fname(handle(), A.dir, transa, mb, nb, - A.nnz, [alpha], desc, A.nzVal, A.rowPtr, - A.colVal, A.blockDim, X, [beta], Y) + nnz(A), Ref{$elty}(alpha), desc, nonzeros(A), A.rowPtr, + A.colVal, A.blockDim, X, Ref{$elty}(beta), Y) Y end end @@ -59,7 +59,7 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrsv2_bufferSize, :cusparseSbsrsv2_ @eval begin function sv2!(transa::SparseChar, uplo::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixBSR{$elty}, X::CuVector{$elty}, index::SparseChar) @@ -76,20 +76,20 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrsv2_bufferSize, :cusparseSbsrsv2_ info = bsrsv2Info_t[0] cusparseCreateBsrsv2Info(info) @workspace size=@argout( - $bname(handle(), A.dir, transa, mb, A.nnz, - desc, A.nzVal, A.rowPtr, A.colVal, A.blockDim, + $bname(handle(), A.dir, transa, mb, nnz(A), + desc, nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[1], out(Ref{Cint}(1))) )[] buffer->begin - $aname(handle(), A.dir, transa, mb, A.nnz, - desc, A.nzVal, A.rowPtr, A.colVal, A.blockDim, + $aname(handle(), A.dir, transa, mb, nnz(A), + desc, nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) cusparseXbsrsv2_zeroPivot(handle(), info[1], posit) if posit[] >= 0 error("Structural/numerical zero in A at ($(posit[]),$(posit[])))") end - $sname(handle(), A.dir, transa, mb, A.nnz, - [alpha], desc, A.nzVal, A.rowPtr, A.colVal, + $sname(handle(), A.dir, transa, mb, nnz(A), + Ref{$elty}(alpha), desc, nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[1], X, X, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end @@ -103,7 +103,7 @@ for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64) @eval begin function sv2(transa::SparseChar, uplo::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrix{$elty}, X::CuVector{$elty}, index::SparseChar) @@ -117,7 +117,7 @@ for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64) sv2!(transa,uplo,one($elty),A,copy(X),index) end function sv2(transa::SparseChar, - alpha::$elty, + alpha::Number, A::AbstractTriangular, X::CuVector{$elty}, index::SparseChar) @@ -148,7 +148,7 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsv2_bufferSize, :cusparseScsrsv2_ @eval begin function sv2!(transa::SparseChar, uplo::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixCSR{$elty}, X::CuVector{$elty}, index::SparseChar) @@ -164,12 +164,12 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsv2_bufferSize, :cusparseScsrsv2_ info = csrsv2Info_t[0] cusparseCreateCsrsv2Info(info) @workspace size=@argout( - $bname(handle(), transa, m, A.nnz, - desc, A.nzVal, A.rowPtr, A.colVal, info[1], + $bname(handle(), transa, m, nnz(A), + desc, nonzeros(A), A.rowPtr, A.colVal, info[1], out(Ref{Cint}(1))) )[] buffer->begin - $aname(handle(), transa, m, A.nnz, - desc, A.nzVal, A.rowPtr, A.colVal, info[1], + $aname(handle(), transa, m, nnz(A), + desc, nonzeros(A), A.rowPtr, A.colVal, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) cusparseXcsrsv2_zeroPivot(handle(), info[1], posit) @@ -177,7 +177,7 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsv2_bufferSize, :cusparseScsrsv2_ error("Structural/numerical zero in A at ($(posit[]),$(posit[])))") end $sname(handle(), transa, m, - A.nnz, [alpha], desc, A.nzVal, A.rowPtr, + nnz(A), Ref{$elty}(alpha), desc, nonzeros(A), A.rowPtr, A.colVal, info[1], X, X, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end @@ -195,7 +195,7 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsv2_bufferSize, :cusparseScsrsv2_ @eval begin function sv2!(transa::SparseChar, uplo::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixCSC{$elty}, X::CuVector{$elty}, index::SparseChar) @@ -219,12 +219,12 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsv2_bufferSize, :cusparseScsrsv2_ info = csrsv2Info_t[0] cusparseCreateCsrsv2Info(info) @workspace size=@argout( - $bname(handle(), ctransa, m, A.nnz, - desc, A.nzVal, A.colPtr, A.rowVal, info[1], + $bname(handle(), ctransa, m, nnz(A), + desc, nonzeros(A), A.colPtr, rowvals(A), info[1], out(Ref{Cint}(1))) )[] buffer->begin - $aname(handle(), ctransa, m, A.nnz, - desc, A.nzVal, A.colPtr, A.rowVal, info[1], + $aname(handle(), ctransa, m, nnz(A), + desc, nonzeros(A), A.colPtr, rowvals(A), info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) cusparseXcsrsv2_zeroPivot(handle(), info[1], posit) @@ -232,8 +232,8 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsv2_bufferSize, :cusparseScsrsv2_ error("Structural/numerical zero in A at ($(posit[]),$(posit[])))") end $sname(handle(), ctransa, m, - A.nnz, [alpha], desc, A.nzVal, A.colPtr, - A.rowVal, info[1], X, X, + nnz(A), Ref{$elty}(alpha), desc, nonzeros(A), A.colPtr, + rowvals(A), info[1], X, X, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end cusparseDestroyCsrsv2Info(info[1]) diff --git a/lib/cusparse/level3.jl b/lib/cusparse/level3.jl index a42d2abb5d..c1fa5237e2 100644 --- a/lib/cusparse/level3.jl +++ b/lib/cusparse/level3.jl @@ -1,30 +1,20 @@ # sparse linear algebra functions that perform operations between sparse and (usually tall) # dense matrices -export mm2!, mm2 - -""" - mm2!(transa::SparseChar, transb::SparseChar, alpha::BlasFloat, A::CuSparseMatrix, B::CuMatrix, beta::BlasFloat, C::CuMatrix, index::SparseChar) - -Multiply the sparse matrix `A` by the dense matrix `B`, filling in dense matrix `C`. -`C = alpha*op(A)*op(B) + beta*C`. `op(A)` can be nothing (`transa = N`), transpose -(`transa = T`), or conjugate transpose (`transa = C`), and similarly for `op(B)` and -`transb`. -""" -mm2!(transa::SparseChar, transb::SparseChar, alpha::BlasFloat, A::CuSparseMatrix, B::CuMatrix, beta::BlasFloat, C::CuMatrix, index::SparseChar) +# bsrmm for (fname,elty) in ((:cusparseSbsrmm, :Float32), (:cusparseDbsrmm, :Float64), (:cusparseCbsrmm, :ComplexF32), (:cusparseZbsrmm, :ComplexF64)) @eval begin - function mm2!(transa::SparseChar, - transb::SparseChar, - alpha::$elty, - A::CuSparseMatrixBSR{$elty}, - B::CuMatrix{$elty}, - beta::$elty, - C::CuMatrix{$elty}, - index::SparseChar) + function mm!(transa::SparseChar, + transb::SparseChar, + alpha::Number, + A::CuSparseMatrixBSR{$elty}, + B::CuMatrix{$elty}, + beta::Number, + C::CuMatrix{$elty}, + index::SparseChar) desc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, index) m,k = A.dims mb = div(m,A.blockDim) @@ -42,9 +32,9 @@ for (fname,elty) in ((:cusparseSbsrmm, :Float32), ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) $fname(handle(), A.dir, - transa, transb, mb, n, kb, A.nnz, - [alpha], desc, A.nzVal,A.rowPtr, A.colVal, - A.blockDim, B, ldb, [beta], C, ldc) + transa, transb, mb, n, kb, nnz(A), + Ref{$elty}(alpha), desc, nonzeros(A),A.rowPtr, A.colVal, + A.blockDim, B, ldb, Ref{$elty}(beta), C, ldc) C end end @@ -57,10 +47,10 @@ for (fname,elty) in ((:cusparseScsrmm2, :Float32), @eval begin function mm2!(transa::SparseChar, transb::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixCSR{$elty}, B::CuMatrix{$elty}, - beta::$elty, + beta::Number, C::CuMatrix{$elty}, index::SparseChar) desc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_LOWER, CUSPARSE_DIAG_TYPE_NON_UNIT, index) @@ -78,16 +68,16 @@ for (fname,elty) in ((:cusparseScsrmm2, :Float32), ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) $fname(handle(), - transa, transb, m, n, k, A.nnz, [alpha], desc, - A.nzVal, A.rowPtr, A.colVal, B, ldb, [beta], C, ldc) + transa, transb, m, n, k, nnz(A), Ref{$elty}(alpha), desc, + nonzeros(A), A.rowPtr, A.colVal, B, ldb, Ref{$elty}(beta), C, ldc) C end function mm2!(transa::SparseChar, transb::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixCSC{$elty}, B::CuMatrix{$elty}, - beta::$elty, + beta::Number, C::CuMatrix{$elty}, index::SparseChar) ctransa = 'N' @@ -109,64 +99,13 @@ for (fname,elty) in ((:cusparseScsrmm2, :Float32), ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) $fname(handle(), - ctransa, transb, m, n, k, A.nnz, [alpha], desc, - A.nzVal, A.colPtr, A.rowVal, B, ldb, [beta], C, ldc) + ctransa, transb, m, n, k, nnz(A), Ref{$elty}(alpha), desc, + nonzeros(A), A.colPtr, rowvals(A), B, ldb, Ref{$elty}(beta), C, ldc) C end end end -for elty in (:Float32,:Float64,:ComplexF32,:ComplexF64) - @eval begin - function mm2(transa::SparseChar, - transb::SparseChar, - alpha::$elty, - A::Union{CuSparseMatrixCSR{$elty},CuSparseMatrixCSC{$elty},CuSparseMatrixBSR{$elty}}, - B::CuMatrix{$elty}, - beta::$elty, - C::CuMatrix{$elty}, - index::SparseChar) - mm2!(transa,transb,alpha,A,B,beta,copy(C),index) - end - function mm2(transa::SparseChar, - transb::SparseChar, - A::Union{CuSparseMatrixCSR{$elty},CuSparseMatrixCSC{$elty},CuSparseMatrixBSR{$elty}}, - B::CuMatrix{$elty}, - beta::$elty, - C::CuMatrix{$elty}, - index::SparseChar) - mm2(transa,transb,one($elty),A,B,beta,C,index) - end - function mm2(transa::SparseChar, - transb::SparseChar, - A::Union{CuSparseMatrixCSR{$elty},CuSparseMatrixCSC{$elty},CuSparseMatrixBSR{$elty}}, - B::CuMatrix{$elty}, - C::CuMatrix{$elty}, - index::SparseChar) - mm2(transa,transb,one($elty),A,B,one($elty),C,index) - end - function mm2(transa::SparseChar, - transb::SparseChar, - alpha::$elty, - A::Union{CuSparseMatrixCSR{$elty},CuSparseMatrixCSC{$elty},CuSparseMatrixBSR{$elty}}, - B::CuMatrix{$elty}, - index::SparseChar) - m = transa == 'N' ? size(A)[1] : size(A)[2] - n = transb == 'N' ? size(B)[2] : size(B)[1] - mm2(transa,transb,alpha,A,B,zero($elty),CUDA.zeros($elty,(m,n)),index) - end - function mm2(transa::SparseChar, - transb::SparseChar, - A::Union{CuSparseMatrixCSR{$elty},CuSparseMatrixCSC{$elty},CuSparseMatrixBSR{$elty}}, - B::CuMatrix{$elty}, - index::SparseChar) - m = transa == 'N' ? size(A)[1] : size(A)[2] - n = transb == 'N' ? size(B)[2] : size(B)[1] - mm2(transa,transb,one($elty),A,B,zero($elty),CUDA.zeros($elty,(m,n)),index) - end - end -end - # bsrsm2 for (bname,aname,sname,elty) in ((:cusparseSbsrsm2_bufferSize, :cusparseSbsrsm2_analysis, :cusparseSbsrsm2_solve, :Float32), (:cusparseDbsrsm2_bufferSize, :cusparseDbsrsm2_analysis, :cusparseDbsrsm2_solve, :Float64), @@ -175,7 +114,7 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrsm2_bufferSize, :cusparseSbsrsm2_ @eval begin function bsrsm2!(transa::SparseChar, transxy::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixBSR{$elty}, X::CuMatrix{$elty}, index::SparseChar) @@ -197,12 +136,12 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrsm2_bufferSize, :cusparseSbsrsm2_ cusparseCreateBsrsm2Info(info) @workspace size=@argout( $bname(handle(), A.dir, transa, transxy, - mb, nX, A.nnz, desc, A.nzVal, A.rowPtr, + mb, nX, nnz(A), desc, nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[], out(Ref{Cint}(1))) )[] buffer->begin $aname(handle(), A.dir, transa, transxy, - mb, nX, A.nnz, desc, A.nzVal, A.rowPtr, + mb, nX, nnz(A), desc, nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) @@ -211,7 +150,7 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrsm2_bufferSize, :cusparseSbsrsm2_ error("Structural/numerical zero in A at ($(posit[]),$(posit[])))") end $sname(handle(), A.dir, transa, transxy, mb, - nX, A.nnz, [alpha], desc, A.nzVal, A.rowPtr, + nX, nnz(A), Ref{$elty}(alpha), desc, nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[], X, ldx, X, ldx, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end @@ -220,7 +159,7 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrsm2_bufferSize, :cusparseSbsrsm2_ end function bsrsm2(transa::SparseChar, transxy::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixBSR{$elty}, X::CuMatrix{$elty}, index::SparseChar) @@ -237,7 +176,7 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsm2_bufferSizeExt, :cusparseScsrs @eval begin function csrsm2!(transa::SparseChar, transxy::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixCSR{$elty}, X::CuMatrix{$elty}, index::SparseChar) @@ -259,12 +198,12 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsm2_bufferSizeExt, :cusparseScsrs # use non block algo (0) for now... @workspace size=@argout( $bname(handle(), 0, transa, transxy, - m, nX, A.nnz, [alpha], desc, A.nzVal, A.rowPtr, + m, nX, nnz(A), Ref{$elty}(alpha), desc, nonzeros(A), A.rowPtr, A.colVal, X, ldx, info[], CUSPARSE_SOLVE_POLICY_USE_LEVEL, out(Ref{UInt64}(1))) )[] buffer->begin $aname(handle(), 0, transa, transxy, - m, nX, A.nnz, [alpha], desc, A.nzVal, A.rowPtr, + m, nX, nnz(A), Ref{$elty}(alpha), desc, nonzeros(A), A.rowPtr, A.colVal, X, ldx, info[], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) @@ -273,7 +212,7 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsm2_bufferSizeExt, :cusparseScsrs error("Structural/numerical zero in A at ($(posit[]),$(posit[])))") end $sname(handle(), 0, transa, transxy, m, - nX, A.nnz, [alpha], desc, A.nzVal, A.rowPtr, + nX, nnz(A), Ref{$elty}(alpha), desc, nonzeros(A), A.rowPtr, A.colVal, X, ldx, info[], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end @@ -282,7 +221,7 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsm2_bufferSizeExt, :cusparseScsrs end function csrsm2(transa::SparseChar, transxy::SparseChar, - alpha::$elty, + alpha::Number, A::CuSparseMatrixCSR{$elty}, X::CuMatrix{$elty}, index::SparseChar) diff --git a/lib/cusparse/libcusparse_old.jl b/lib/cusparse/libcusparse_old.jl index 9244608181..f806cb3a4d 100644 --- a/lib/cusparse/libcusparse_old.jl +++ b/lib/cusparse/libcusparse_old.jl @@ -50,3 +50,55 @@ end cscSortedVal, cscSortedRowInd, cscSortedColPtr, copyValues, idxBase) end +@checked function cusparseScsrmm2(handle, transA, transB, m, n, k, nnz, alpha, descrA, + csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, + ldb, beta, C, ldc) + initialize_api() + @runtime_ccall((:cusparseScsrmm2, libcusparse()), cusparseStatus_t, + (cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, Cint, + Cint, Cint, Cint, Ptr{Cfloat}, cusparseMatDescr_t, CuPtr{Cfloat}, + CuPtr{Cint}, CuPtr{Cint}, CuPtr{Cfloat}, Cint, Ptr{Cfloat}, + CuPtr{Cfloat}, Cint), + handle, transA, transB, m, n, k, nnz, alpha, descrA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc) +end + +@checked function cusparseDcsrmm2(handle, transA, transB, m, n, k, nnz, alpha, descrA, + csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, + ldb, beta, C, ldc) + initialize_api() + @runtime_ccall((:cusparseDcsrmm2, libcusparse()), cusparseStatus_t, + (cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, Cint, + Cint, Cint, Cint, Ptr{Cdouble}, cusparseMatDescr_t, CuPtr{Cdouble}, + CuPtr{Cint}, CuPtr{Cint}, CuPtr{Cdouble}, Cint, Ptr{Cdouble}, + CuPtr{Cdouble}, Cint), + handle, transA, transB, m, n, k, nnz, alpha, descrA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc) +end + +@checked function cusparseCcsrmm2(handle, transA, transB, m, n, k, nnz, alpha, descrA, + csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, + ldb, beta, C, ldc) + initialize_api() + @runtime_ccall((:cusparseCcsrmm2, libcusparse()), cusparseStatus_t, + (cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, Cint, + Cint, Cint, Cint, Ptr{cuComplex}, cusparseMatDescr_t, CuPtr{cuComplex}, + CuPtr{Cint}, CuPtr{Cint}, CuPtr{cuComplex}, Cint, Ptr{cuComplex}, + CuPtr{cuComplex}, Cint), + handle, transA, transB, m, n, k, nnz, alpha, descrA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc) +end + +@checked function cusparseZcsrmm2(handle, transA, transB, m, n, k, nnz, alpha, descrA, + csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, + ldb, beta, C, ldc) + initialize_api() + @runtime_ccall((:cusparseZcsrmm2, libcusparse()), cusparseStatus_t, + (cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, Cint, + Cint, Cint, Cint, Ptr{cuDoubleComplex}, cusparseMatDescr_t, + CuPtr{cuDoubleComplex}, CuPtr{Cint}, CuPtr{Cint}, + CuPtr{cuDoubleComplex}, Cint, Ptr{cuDoubleComplex}, + CuPtr{cuDoubleComplex}, Cint), + handle, transA, transB, m, n, k, nnz, alpha, descrA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc) +end diff --git a/lib/cusparse/preconditioners.jl b/lib/cusparse/preconditioners.jl index 69af743db0..2c7a86e4bc 100644 --- a/lib/cusparse/preconditioners.jl +++ b/lib/cusparse/preconditioners.jl @@ -24,20 +24,20 @@ for (bname,aname,sname,elty) in ((:cusparseScsric02_bufferSize, :cusparseScsric0 info = csric02Info_t[0] cusparseCreateCsric02Info(info) @workspace size=@argout( - $bname(handle(), m, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, info[1], + $bname(handle(), m, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, info[1], out(Ref{Cint}(1))) )[] buffer->begin - $aname(handle(), m, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, info[1], + $aname(handle(), m, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) cusparseXcsric02_zeroPivot(handle(), info[1], posit) if posit[] >= 0 error("Structural/numerical zero in A at ($(posit[]),$(posit[])))") end - $sname(handle(), m, A.nnz, - desc, A.nzVal, A.rowPtr, A.colVal, info[1], + $sname(handle(), m, nnz(A), + desc, nonzeros(A), A.rowPtr, A.colVal, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end cusparseDestroyCsric02Info(info[1]) @@ -62,20 +62,20 @@ for (bname,aname,sname,elty) in ((:cusparseScsric02_bufferSize, :cusparseScsric0 info = csric02Info_t[0] cusparseCreateCsric02Info(info) @workspace size=@argout( - $bname(handle(), m, A.nnz, desc, - A.nzVal, A.colPtr, A.rowVal, info[1], + $bname(handle(), m, nnz(A), desc, + nonzeros(A), A.colPtr, rowvals(A), info[1], out(Ref{Cint}(1))) )[] buffer->begin - $aname(handle(), m, A.nnz, desc, - A.nzVal, A.colPtr, A.rowVal, info[1], + $aname(handle(), m, nnz(A), desc, + nonzeros(A), A.colPtr, rowvals(A), info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) cusparseXcsric02_zeroPivot(handle(), info[1], posit) if posit[] >= 0 error("Structural/numerical zero in A at ($(posit[]),$(posit[])))") end - $sname(handle(), m, A.nnz, - desc, A.nzVal, A.colPtr, A.rowVal, info[1], + $sname(handle(), m, nnz(A), + desc, nonzeros(A), A.colPtr, rowvals(A), info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end cusparseDestroyCsric02Info(info[1]) @@ -106,20 +106,20 @@ for (bname,aname,sname,elty) in ((:cusparseScsrilu02_bufferSize, :cusparseScsril info = csrilu02Info_t[0] cusparseCreateCsrilu02Info(info) @workspace size=@argout( - $bname(handle(), m, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, info[1], + $bname(handle(), m, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, info[1], out(Ref{Cint}(1))) )[] buffer->begin - $aname(handle(), m, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, info[1], + $aname(handle(), m, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) cusparseXcsrilu02_zeroPivot(handle(), info[1], posit) if posit[] >= 0 error("Structural zero in A at ($(posit[]),$(posit[])))") end - $sname(handle(), m, A.nnz, - desc, A.nzVal, A.rowPtr, A.colVal, info[1], + $sname(handle(), m, nnz(A), + desc, nonzeros(A), A.rowPtr, A.colVal, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end cusparseDestroyCsrilu02Info(info[1]) @@ -144,20 +144,20 @@ for (bname,aname,sname,elty) in ((:cusparseScsrilu02_bufferSize, :cusparseScsril info = csrilu02Info_t[0] cusparseCreateCsrilu02Info(info) @workspace size=@argout( - $bname(handle(), m, A.nnz, desc, - A.nzVal, A.colPtr, A.rowVal, info[1], + $bname(handle(), m, nnz(A), desc, + nonzeros(A), A.colPtr, rowvals(A), info[1], out(Ref{Cint}(1))) )[] buffer->begin - $aname(handle(), m, A.nnz, desc, - A.nzVal, A.colPtr, A.rowVal, info[1], + $aname(handle(), m, nnz(A), desc, + nonzeros(A), A.colPtr, rowvals(A), info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) cusparseXcsrilu02_zeroPivot(handle(), info[1], posit) if posit[] >= 0 error("Structural zero in A at ($(posit[]),$(posit[])))") end - $sname(handle(), m, A.nnz, - desc, A.nzVal, A.colPtr, A.rowVal, info[1], + $sname(handle(), m, nnz(A), + desc, nonzeros(A), A.colPtr, rowvals(A), info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end cusparseDestroyCsrilu02Info(info[1]) @@ -183,20 +183,20 @@ for (bname,aname,sname,elty) in ((:cusparseSbsric02_bufferSize, :cusparseSbsric0 info = bsric02Info_t[0] cusparseCreateBsric02Info(info) @workspace size=@argout( - $bname(handle(), A.dir, mb, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, A.blockDim, info[1], + $bname(handle(), A.dir, mb, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[1], out(Ref{Cint}(1))) )[] buffer->begin - $aname(handle(), A.dir, mb, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, A.blockDim, info[1], + $aname(handle(), A.dir, mb, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) cusparseXbsric02_zeroPivot(handle(), info[1], posit) if posit[] >= 0 error("Structural/numerical zero in A at ($(posit[]),$(posit[])))") end - $sname(handle(), A.dir, mb, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, A.blockDim, info[1], + $sname(handle(), A.dir, mb, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end cusparseDestroyBsric02Info(info[1]) @@ -222,20 +222,20 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrilu02_bufferSize, :cusparseSbsril info = bsrilu02Info_t[0] cusparseCreateBsrilu02Info(info) @workspace size=@argout( - $bname(handle(), A.dir, mb, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, A.blockDim, info[1], + $bname(handle(), A.dir, mb, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[1], out(Ref{Cint}(1))) )[] buffer->begin - $aname(handle(), A.dir, mb, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, A.blockDim, info[1], + $aname(handle(), A.dir, mb, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) posit = Ref{Cint}(1) cusparseXbsrilu02_zeroPivot(handle(), info[1], posit) if posit[] >= 0 error("Structural/numerical zero in A at ($(posit[]),$(posit[])))") end - $sname(handle(), A.dir, mb, A.nnz, desc, - A.nzVal, A.rowPtr, A.colVal, A.blockDim, info[1], + $sname(handle(), A.dir, mb, nnz(A), desc, + nonzeros(A), A.rowPtr, A.colVal, A.blockDim, info[1], CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer) end cusparseDestroyBsrilu02Info(info[1]) diff --git a/test/cusparse.jl b/test/cusparse.jl index 843ea90adc..b4baed742b 100644 --- a/test/cusparse.jl +++ b/test/cusparse.jl @@ -2,6 +2,7 @@ using CUDA.CUSPARSE using LinearAlgebra using SparseArrays +using SparseArrays: nonzeroinds @test CUSPARSE.version() isa VersionNumber @@ -29,7 +30,7 @@ blockdim = 5 @test_throws BoundsError d_x[end + 1] @test nnz(d_x) == nnz(x) @test Array(nonzeros(d_x)) == nonzeros(x) - @test Array(SparseArrays.nonzeroinds(d_x)) == SparseArrays.nonzeroinds(x) + @test Array(nonzeroinds(d_x)) == nonzeroinds(x) @test nnz(d_x) == length(nonzeros(d_x)) x = sprand(m,n,0.2) d_x = CuSparseMatrixCSC(x) @@ -69,11 +70,11 @@ blockdim = 5 y = sprand(k,n,0.2) d_y = CuSparseMatrixCSC(y) @test_throws ArgumentError copyto!(d_y,d_x) - d_y = CUSPARSE.switch2csr(d_y) - d_x = CUSPARSE.switch2csr(d_x) + d_y = CuSparseMatrixCSR(d_y) + d_x = CuSparseMatrixCSR(d_x) @test_throws ArgumentError copyto!(d_y,d_x) - d_y = CUSPARSE.switch2bsr(d_y,convert(Cint,blockdim)) - d_x = CUSPARSE.switch2bsr(d_x,convert(Cint,blockdim)) + d_y = CuSparseMatrixBSR(d_y, blockdim) + d_x = CuSparseMatrixBSR(d_x, blockdim) @test_throws ArgumentError copyto!(d_y,d_x) x = sprand(m,0.2) d_x = CuSparseVector(x) @@ -98,97 +99,84 @@ blockdim = 5 @test istril(d_x) end -@testset "conversion" begin +@testset "construction" begin @testset for elty in [Float32, Float64, ComplexF32, ComplexF64] - @testset "make_csc" begin + @testset "CSC" begin x = sprand(elty,m,n, 0.2) d_x = CuSparseMatrixCSC(x) - h_x = collect(d_x) - @test h_x == x - @test eltype(d_x) == elty + @test collect(d_x) == collect(x) end - @testset "make_csr" begin + @testset "CSR" begin x = sprand(elty,m,n, 0.2) - d_xc = CuSparseMatrixCSC(x) d_x = CuSparseMatrixCSR(x) - h_x = collect(d_x) - @test h_x == x + @test collect(d_x) == collect(x) end - @testset "convert_r2c" begin + @testset "BSR" begin x = sprand(elty,m,n, 0.2) - d_x = CuSparseMatrixCSR(x) - d_x = CUSPARSE.switch2csc(d_x) - h_x = collect(d_x) - @test h_x.rowval == x.rowval - @test h_x.nzval ≈ x.nzval + d_x = CuSparseMatrixBSR(x, blockdim) + @test collect(d_x) == collect(x) end + end +end - @testset "convert_r2b" begin +@testset "conversion" begin + @testset for elty in [Float32, Float64, ComplexF32, ComplexF64] + @testset "CSC(::CSR)" begin x = sprand(elty,m,n, 0.2) d_x = CuSparseMatrixCSR(x) - d_x = CUSPARSE.switch2bsr(d_x,convert(Cint,blockdim)) - d_x = CUSPARSE.switch2csr(d_x) - h_x = collect(d_x) - @test h_x ≈ x + d_x = CuSparseMatrixCSC(d_x) + @test collect(d_x) == collect(x) end - @testset "convert_c2b" begin + @testset "BSR(::CSR)" begin x = sprand(elty,m,n, 0.2) - d_x = CuSparseMatrixCSC(x) - d_x = CUSPARSE.switch2bsr(d_x,convert(Cint,blockdim)) - d_x = CUSPARSE.switch2csc(d_x) - h_x = collect(d_x) - @test h_x ≈ x + d_x = CuSparseMatrixCSR(x) + d_x = CuSparseMatrixBSR(d_x, blockdim) + @test collect(d_x) == collect(x) end - @testset "convert_d2b" begin + @testset "BSR(::Dense)" begin x = rand(elty,m,n) d_x = CuArray(x) - d_x = CUSPARSE.sparse(d_x,'B') - d_y = Array(d_x) - h_x = collect(d_y) - @test h_x ≈ x + d_x = CuSparseMatrixBSR(d_x) + @test collect(d_x) ≈ x end - @testset "convert_c2r" begin + @testset "CSR(CSC)" begin x = sprand(elty,m,n, 0.2) d_x = CuSparseMatrixCSC(x) - d_x = CUSPARSE.switch2csr(d_x) - h_x = collect(d_x) - @test h_x.rowval == x.rowval - @test h_x.nzval ≈ x.nzval + d_x = CuSparseMatrixCSR(d_x) + @test collect(d_x) == collect(x) end - @testset "convert_r2d" begin + @testset "Dense(::CSR)" begin x = sprand(elty,m,n, 0.2) d_x = CuSparseMatrixCSR(x) - d_x = Array(d_x) h_x = collect(d_x) @test h_x ≈ Array(x) end - @testset "convert_c2d" begin + @testset "Dense(CSC)" begin x = sprand(elty,m,n, 0.2) d_x = CuSparseMatrixCSC(x) - d_x = Array(d_x) h_x = collect(d_x) @test h_x ≈ Array(x) end - @testset "convert_d2c" begin + @testset "CSC(::Dense)" begin x = rand(elty,m,n) d_x = CuArray(x) - d_x = CUSPARSE.sparse(d_x,'C') + d_x = CuSparseMatrixCSC(d_x) h_x = collect(d_x) @test h_x ≈ sparse(x) end - @testset "convert_d2r" begin + @testset "CSR(::Dense)" begin x = rand(elty,m,n) d_x = CuArray(x) - d_x = CUSPARSE.sparse(d_x) + d_x = CuSparseMatrixCSR(d_x) h_x = collect(d_x) @test h_x ≈ sparse(x) end @@ -203,27 +191,27 @@ end @testset "bsric02!" begin d_A = CuSparseMatrixCSR(sparse(tril(A))) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) d_A = CUSPARSE.ic02!(d_A,'O') - h_A = collect(CUSPARSE.switch2csr(d_A)) + h_A = SparseMatrixCSC(CuSparseMatrixCSR(d_A)) Ac = sparse(Array(cholesky(Hermitian(A)))) h_A = transpose(h_A) * h_A - @test h_A.rowval ≈ Ac.rowval - @test reduce(&, isfinite.(h_A.nzval)) + @test rowvals(h_A) ≈ rowvals(Ac) + @test reduce(&, isfinite.(nonzeros(h_A))) d_A = CuSparseMatrixCSR(sparse(tril(rand(elty,m,n)))) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) @test_throws DimensionMismatch CUSPARSE.ic02!(d_A,'O') end @testset "bsric02" begin d_A = CuSparseMatrixCSR(sparse(tril(A))) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) d_B = CUSPARSE.ic02(d_A,'O') - h_A = collect(CUSPARSE.switch2csr(d_B)) + h_A = SparseMatrixCSC(CuSparseMatrixCSR(d_B)) Ac = sparse(Array(cholesky(Hermitian(A)))) h_A = transpose(h_A) * h_A - @test h_A.rowval ≈ Ac.rowval - @test reduce(&, isfinite.(h_A.nzval)) + @test rowvals(h_A) ≈ rowvals(Ac) + @test reduce(&, isfinite.(nonzeros(h_A))) end end end @@ -235,29 +223,29 @@ end A += m * Diagonal{elty}(I, m) @testset "bsrilu02!" begin d_A = CuSparseMatrixCSR(sparse(A)) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) d_A = CUSPARSE.ilu02!(d_A,'O') - h_A = collect(CUSPARSE.switch2csr(d_A)) + h_A = SparseMatrixCSC(CuSparseMatrixCSR(d_A)) Alu = lu(Array(A), Val(false)) Ac = sparse(Alu.L*Alu.U) h_A = adjoint(h_A) * h_A - @test h_A.rowval ≈ Ac.rowval - @test reduce(&, isfinite.(h_A.nzval)) + @test rowvals(h_A) ≈ rowvals(Ac) + @test reduce(&, isfinite.(nonzeros(h_A))) d_A = CuSparseMatrixCSR(sparse(rand(elty,m,n))) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) @test_throws DimensionMismatch CUSPARSE.ilu02!(d_A,'O') end @testset "bsrilu02" begin d_A = CuSparseMatrixCSR(sparse(A)) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) d_B = CUSPARSE.ilu02(d_A,'O') - h_A = collect(CUSPARSE.switch2csr(d_B)) + h_A = SparseMatrixCSC(CuSparseMatrixCSR(d_B)) Alu = lu(Array(A),Val(false)) Ac = sparse(Alu.L*Alu.U) h_A = adjoint(h_A) * h_A - @test h_A.rowval ≈ Ac.rowval - @test reduce(&, isfinite.(h_A.nzval)) + @test rowvals(h_A) ≈ rowvals(Ac) + @test reduce(&, isfinite.(nonzeros(h_A))) end end end @@ -271,7 +259,7 @@ end alpha = rand(elty) d_X = CuArray(X) d_A = CuSparseMatrixCSR(sparse(A)) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) d_X = CUSPARSE.bsrsm2!('N','N',alpha,d_A,d_X,'O') h_Y = collect(d_X) Y = A\(alpha * X) @@ -281,7 +269,7 @@ end @test_throws DimensionMismatch CUSPARSE.bsrsm2!('N','T',alpha,d_A,d_X,'O') A = sparse(rand(elty,m,n)) d_A = CuSparseMatrixCSR(A) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) @test_throws DimensionMismatch CUSPARSE.bsrsm2!('N','N',alpha,d_A,d_X,'O') end @@ -292,14 +280,14 @@ end alpha = rand(elty) d_X = CuArray(X) d_A = CuSparseMatrixCSR(sparse(A)) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) d_Y = CUSPARSE.bsrsm2('N','N',alpha,d_A,d_X,'O') h_Y = collect(d_Y) Y = A\(alpha * X) @test Y ≈ h_Y A = sparse(rand(elty,m,n)) d_A = CuSparseMatrixCSR(A) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) @test_throws DimensionMismatch CUSPARSE.bsrsm2('N','N',alpha,d_A,d_X,'O') end end @@ -313,7 +301,7 @@ end alpha = rand(elty) d_X = CuArray(X) d_A = CuSparseMatrixCSR(sparse(A)) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) d_X = CUSPARSE.sv2!('N','U',alpha,d_A,d_X,'O') h_Y = collect(d_X) Y = A\(alpha * X) @@ -322,7 +310,7 @@ end @test_throws DimensionMismatch CUSPARSE.sv2!('N','U',alpha,d_A,d_X,'O') A = sparse(rand(elty,m,n)) d_A = CuSparseMatrixCSR(A) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) @test_throws DimensionMismatch CUSPARSE.sv2!('N','U',alpha,d_A,d_X,'O') end @@ -334,7 +322,7 @@ end alpha = rand(elty) d_X = CuArray(X) d_A = CuSparseMatrixCSR(sparse(A)) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) d_Y = CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O') h_Y = collect(d_Y) Y = A\(alpha * X) @@ -359,7 +347,7 @@ end @test h_Y ≈ transpose(Al)\X A = sparse(rand(elty,m,n)) d_A = CuSparseMatrixCSR(A) - d_A = CUSPARSE.switch2bsr(d_A, convert(Cint,5)) + d_A = CuSparseMatrixBSR(d_A, blockdim) @test_throws DimensionMismatch CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O') end end @@ -411,12 +399,12 @@ end A += m * Diagonal{elty}(I, m) d_A = CuSparseMatrixCSR(sparse(A)) d_B = CUSPARSE.ilu02(d_A,'O') - h_A = collect(d_B) + h_A = SparseMatrixCSC(d_B) Alu = lu(Array(A),Val(false)) Ac = sparse(Alu.L*Alu.U) h_A = adjoint(h_A) * h_A - @test h_A.rowval ≈ Ac.rowval - @test reduce(&, isfinite.(h_A.nzval)) + @test rowvals(h_A) ≈ rowvals(Ac) + @test reduce(&, isfinite.(nonzeros(h_A))) end @testset "csc" begin A = rand(elty,m,m) @@ -424,12 +412,12 @@ end A += m * Diagonal{elty}(I, m) d_A = CuSparseMatrixCSC(sparse(A)) d_B = CUSPARSE.ilu02(d_A,'O') - h_A = collect(d_B) + h_A = SparseMatrixCSC(d_B) Alu = lu(Array(A),Val(false)) Ac = sparse(Alu.L*Alu.U) h_A = adjoint(h_A) * h_A - @test h_A.rowval ≈ Ac.rowval - @test reduce(&, isfinite.(h_A.nzval)) + @test rowvals(h_A) ≈ rowvals(Ac) + @test reduce(&, isfinite.(nonzeros(h_A))) end end end @@ -442,11 +430,11 @@ end A += m * Diagonal{elty}(I, m) d_A = CuSparseMatrixCSR(sparse(tril(A))) d_B = CUSPARSE.ic02(d_A, 'O') - h_A = collect(d_B) + h_A = SparseMatrixCSC(d_B) Ac = sparse(Array(cholesky(Hermitian(A)))) h_A = transpose(h_A) * h_A - @test h_A.rowval ≈ Ac.rowval - @test reduce(&, isfinite.(h_A.nzval)) + @test rowvals(h_A) ≈ rowvals(Ac) + @test reduce(&, isfinite.(nonzeros(h_A))) A = rand(elty,m,n) d_A = CuSparseMatrixCSR(sparse(tril(A))) @test_throws DimensionMismatch CUSPARSE.ic02(d_A, 'O') @@ -457,11 +445,11 @@ end A += m * Diagonal{elty}(I, m) d_A = CuSparseMatrixCSC(sparse(tril(A))) d_B = CUSPARSE.ic02(d_A, 'O') - h_A = collect(d_B) + h_A = SparseMatrixCSC(d_B) Ac = sparse(Array(cholesky(Hermitian(A)))) h_A = transpose(h_A) * h_A - @test h_A.rowval ≈ Ac.rowval - @test reduce(&, isfinite.(h_A.nzval)) + @test rowvals(h_A) ≈ rowvals(Ac) + @test reduce(&, isfinite.(nonzeros(h_A))) A = rand(elty,m,n) d_A = CuSparseMatrixCSC(sparse(tril(A))) @test_throws DimensionMismatch CUSPARSE.ic02(d_A, 'O') @@ -567,7 +555,7 @@ end d_y = CUSPARSE.axpyi!(alpha,d_x,d_y,'O') #compare h_y = collect(d_y) - y[x.nzind] += alpha * x.nzval + y[nonzeroinds(x)] += alpha * nonzeros(x) @test h_y ≈ y end @@ -581,13 +569,13 @@ end #compare h_z = collect(d_z) z = copy(y) - z[x.nzind] += alpha * x.nzval + z[nonzeroinds(x)] += alpha * nonzeros(x) @test h_z ≈ z d_z = CUSPARSE.axpyi(d_x,d_y,'O') #compare h_z = collect(d_z) z = copy(y) - z[x.nzind] += x.nzval + z[nonzeroinds(x)] += nonzeros(x) @test h_z ≈ z end end @@ -602,7 +590,7 @@ end d_y = CuArray(y) d_y = CUSPARSE.gthr!(d_x,d_y,'O') h_x = collect(d_x) - @test h_x ≈ SparseVector(m,x.nzind,y[x.nzind]) + @test h_x ≈ SparseVector(m,nonzeroinds(x),y[nonzeroinds(x)]) end @testset "gthr" begin @@ -610,7 +598,7 @@ end d_y = CuArray(y) d_z = CUSPARSE.gthr(d_x,d_y,'O') h_z = collect(d_z) - @test h_z ≈ SparseVector(m,x.nzind,y[x.nzind]) + @test h_z ≈ SparseVector(m,nonzeroinds(x),y[nonzeroinds(x)]) end @testset "gthrz!" begin @@ -619,8 +607,8 @@ end d_x,d_y = CUSPARSE.gthrz!(d_x,d_y,'O') h_x = collect(d_x) h_y = collect(d_y) - @test h_x ≈ SparseVector(m,x.nzind,y[x.nzind]) - #y[x.nzind] = zero(elty) + @test h_x ≈ SparseVector(m,nonzeroinds(x),y[nonzeroinds(x)]) + #y[nonzeroinds(x)] = zero(elty) #@test h_y ≈ y end @@ -630,104 +618,72 @@ end d_z,d_w = CUSPARSE.gthrz(d_x,d_y,'O') h_w = collect(d_w) h_z = collect(d_z) - @test h_z ≈ SparseVector(m,x.nzind,y[x.nzind]) - #y[x.nzind] = zero(elty) + @test h_z ≈ SparseVector(m,nonzeroinds(x),y[nonzeroinds(x)]) + #y[nonzeroinds(x)] = zero(elty) #@test h_w ≈ y end end end -@testset "bsrmm2" begin +@testset "mv!" begin @testset for elty in [Float32,Float64,ComplexF32,ComplexF64] - A = sparse(rand(elty,m,k)) - B = rand(elty,k,n) - C = rand(elty,m,n) + A = sparse(rand(elty,m,n)) + x = rand(elty,n) + y = rand(elty,m) alpha = rand(elty) beta = rand(elty) - d_B = CuArray(B) - d_C = CuArray(C) - d_A = CuSparseMatrixCSR(A) - d_A = CUSPARSE.switch2bsr(d_A,convert(Cint,blockdim)) - @test_throws DimensionMismatch CUSPARSE.mm2('N','T',alpha,d_A,d_B,beta,d_C,'O') - @test_throws DimensionMismatch CUSPARSE.mm2('T','N',alpha,d_A,d_B,beta,d_C,'O') - @test_throws DimensionMismatch CUSPARSE.mm2('T','T',alpha,d_A,d_B,beta,d_C,'O') - @test_throws DimensionMismatch CUSPARSE.mm2('N','N',alpha,d_A,d_B,beta,d_B,'O') - d_D = CUSPARSE.mm2('N','N',alpha,d_A,d_B,beta,d_C,'O') - h_D = collect(d_D) - D = alpha * A * B + beta * C - @test D ≈ h_D - d_D = CUSPARSE.mm2('N','N',d_A,d_B,beta,d_C,'O') - h_D = collect(d_D) - D = A * B + beta * C - @test D ≈ h_D - d_D = CUSPARSE.mm2('N','N',d_A,d_B,d_C,'O') - h_D = collect(d_D) - D = A * B + C - @test D ≈ h_D - d_D = CUSPARSE.mm2('N','N',alpha,d_A,d_B,'O') - h_D = collect(d_D) - D = alpha * A * B - @test D ≈ h_D - d_D = CUSPARSE.mm2('N','N',d_A,d_B,'O') - h_D = collect(d_D) - D = A * B - @test D ≈ h_D + @testset "$(typeof(d_A))" for d_A in [CuSparseMatrixCSR(A), + CuSparseMatrixCSC(A), + CuSparseMatrixBSR(A, blockdim)] + d_x = CuArray(x) + d_y = CuArray(y) + @test_throws DimensionMismatch CUSPARSE.mv!('T',alpha,d_A,d_x,beta,d_y,'O') + @test_throws DimensionMismatch CUSPARSE.mv!('N',alpha,d_A,d_y,beta,d_x,'O') + CUSPARSE.mv!('N',alpha,d_A,d_x,beta,d_y,'O') + h_z = collect(d_y) + z = alpha * A * x + beta * y + @test z ≈ h_z + mul!(d_y, d_A, d_x) + h_y = collect(d_y) + z = A * x + @test z ≈ h_y + if d_A isa CuSparseMatrixCSR + @test d_y' * (d_A * d_x) ≈ (d_y' * d_A) * d_x + end + end end end -@testset "bsrmm2!" begin + +@testset "mm!" begin @testset for elty in [Float32,Float64,ComplexF32,ComplexF64] A = sparse(rand(elty,m,k)) B = rand(elty,k,n) C = rand(elty,m,n) alpha = rand(elty) beta = rand(elty) - d_B = CuArray(B) - d_C = CuArray(C) - d_A = CuSparseMatrixCSR(A) - d_A = CUSPARSE.switch2bsr(d_A,convert(Cint,blockdim)) - @test_throws DimensionMismatch CUSPARSE.mm2!('N','T',alpha,d_A,d_B,beta,d_C,'O') - @test_throws DimensionMismatch CUSPARSE.mm2!('T','N',alpha,d_A,d_B,beta,d_C,'O') - @test_throws DimensionMismatch CUSPARSE.mm2!('T','T',alpha,d_A,d_B,beta,d_C,'O') - @test_throws DimensionMismatch CUSPARSE.mm2!('N','N',alpha,d_A,d_B,beta,d_B,'O') - CUSPARSE.mm2!('N','N',alpha,d_A,d_B,beta,d_C,'O') - h_D = collect(d_C) - D = alpha * A * B + beta * C - @test D ≈ h_D - d_C = CuArray(C) - mul!(d_C, d_A, d_B) - h_C = collect(d_C) - D = A * B - @test D ≈ h_C - end -end -@testset "mv!" begin - for elty in [Float32,Float64,ComplexF32,ComplexF64] - A = sparse(rand(elty,m,n)) - x = rand(elty,n) - y = rand(elty,m) - alpha = rand(elty) - beta = rand(elty) - @testset "mv!" begin - @testset "$mattype" for (mattype, d_A) in [ - ("csr", CuSparseMatrixCSR(A)), - ("bsr", CUSPARSE.switch2bsr(CuSparseMatrixCSR(A),convert(Cint,blockdim))) - ] - d_x = CuArray(x) - d_y = CuArray(y) - @test_throws DimensionMismatch CUSPARSE.mv!('T',alpha,d_A,d_x,beta,d_y,'O') - @test_throws DimensionMismatch CUSPARSE.mv!('N',alpha,d_A,d_y,beta,d_x,'O') - CUSPARSE.mv!('N',alpha,d_A,d_x,beta,d_y,'O') - h_z = collect(d_y) - z = alpha * A * x + beta * y - @test z ≈ h_z - mul!(d_y, d_A, d_x) - h_y = collect(d_y) - z = A * x - @test z ≈ h_y - if mattype=="csr" - @test d_y' * (d_A * d_x) ≈ (d_y' * d_A) * d_x - end + @testset "$(typeof(d_A))" for d_A in [CuSparseMatrixCSR(A), + CuSparseMatrixCSC(A), + CuSparseMatrixBSR(A, blockdim)] + d_B = CuArray(B) + d_C = CuArray(C) + mm! = if CUSPARSE.version() < v"10.3.1" && d_A isa CuSparseMatrixCSR + CUSPARSE.mm2! + else + CUSPARSE.mm! end + @test_throws DimensionMismatch mm!('N','T',alpha,d_A,d_B,beta,d_C,'O') + @test_throws DimensionMismatch mm!('T','N',alpha,d_A,d_B,beta,d_C,'O') + @test_throws DimensionMismatch mm!('T','T',alpha,d_A,d_B,beta,d_C,'O') + @test_throws DimensionMismatch mm!('N','N',alpha,d_A,d_B,beta,d_B,'O') + mm!('N','N',alpha,d_A,d_B,beta,d_C,'O') + h_D = collect(d_C) + D = alpha * A * B + beta * C + @test D ≈ h_D + d_C = CuArray(C) + mul!(d_C, d_A, d_B) + h_C = collect(d_C) + D = A * B + @test D ≈ h_C end end end @@ -741,7 +697,7 @@ end d_y = CuArray(y) d_y = CUSPARSE.sctr!(d_x,d_y,'O') h_y = collect(d_y) - y[x.nzind] += x.nzval + y[nonzeroinds(x)] += nonzeros(x) @test h_y ≈ y end y = zeros(elty,m) @@ -751,7 +707,7 @@ end d_y = CUSPARSE.sctr(d_x,'O') h_y = collect(d_y) y = zeros(elty,m) - y[x.nzind] += x.nzval + y[nonzeroinds(x)] += nonzeros(x) @test h_y ≈ y end end @@ -770,8 +726,8 @@ end h_y = collect(d_y) z = copy(x) w = copy(y) - y[x.nzind] = cos(angle)*w[z.nzind] - sin(angle)*z.nzval - @test h_x ≈ SparseVector(m,x.nzind,cos(angle)*z.nzval + sin(angle)*w[z.nzind]) + y[nonzeroinds(x)] = cos(angle)*w[nonzeroinds(z)] - sin(angle)*nonzeros(z) + @test h_x ≈ SparseVector(m,nonzeroinds(x),cos(angle)*nonzeros(z) + sin(angle)*w[nonzeroinds(z)]) @test h_y ≈ y end @@ -784,8 +740,8 @@ end h_z = collect(d_z) z = copy(x) w = copy(y) - w[z.nzind] = cos(angle)*y[x.nzind] - sin(angle)*x.nzval - @test h_z ≈ SparseVector(m,z.nzind, cos(angle)*x.nzval + sin(angle)*y[x.nzind]) + w[nonzeroinds(z)] = cos(angle)*y[nonzeroinds(x)] - sin(angle)*nonzeros(x) + @test h_z ≈ SparseVector(m,nonzeroinds(z), cos(angle)*nonzeros(x) + sin(angle)*y[nonzeroinds(x)]) @test h_w ≈ w end end