diff --git a/Project.toml b/Project.toml index 672e75c..00937f2 100644 --- a/Project.toml +++ b/Project.toml @@ -17,8 +17,8 @@ Adapt = "3, 4" AMDGPU = "0.3.7, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1" CUDA = "3.12, 4, 5" Metal = "1" -julia = "1.9" # Minimum required Julia version (supporting extensions and weak dependencies) StaticArrays = "1" +julia = "1.9" # Minimum required Julia version (supporting extensions and weak dependencies) [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/CellArray.jl b/src/CellArray.jl index 0bae3cb..630bd6b 100644 --- a/src/CellArray.jl +++ b/src/CellArray.jl @@ -3,8 +3,10 @@ using StaticArrays, Adapt ## Constants -const _N = 3 -const Cell = Union{Number, SArray, FieldArray} +const _N = 3 +const B0 = 0 +const Cell = Union{Number, SArray, FieldArray} +const ArrayCell = Union{SArray, FieldArray} ## Types and constructors @@ -45,7 +47,7 @@ struct CellArray{T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N} where {T_elem}} < CellArray{T,N,B,T_array}(data, dims) end - function CellArray{T,N,B,T_array}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} #where {Type{T_array}<:DataType} + function CellArray{T,N,B,T_array}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} check_T(T) if (T_elem != eltype(T)) @IncoherentArgumentError("T_elem must match eltype(T).") end celldims = size(T) # Note: size must be defined for type T (as it is e.g. for StaticArrays) @@ -54,35 +56,35 @@ struct CellArray{T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N} where {T_elem}} < CellArray{T,N,B,T_array}(data, dims) end - function CellArray{T,N,B,T_array}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} #where {Type{T_array}<:DataType} + function CellArray{T,N,B,T_array}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} CellArray{T,N,B,T_array}(T_array, undef, dims) end - function CellArray{T,N,B}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} #where {Type{T_array}<:DataType} + function CellArray{T,N,B}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} CellArray{T,N,B,T_array}(T_array, undef, dims) end - function CellArray{T,B}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} #where {Type{T_array}<:DataType} + function CellArray{T,B}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} CellArray{T,N,B}(T_array, undef, dims) end - function CellArray{T,N,B}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll} + function CellArray{T,N,B}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_arraykind<:AbstractArray} CellArray{T,N,B}(T_arraykind{eltype(T),_N}, undef, dims) end - function CellArray{T,B}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll} + function CellArray{T,B}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_arraykind<:AbstractArray} CellArray{T,N,B}(T_arraykind, undef, dims) end - function CellArray{T,B}(::Type{T_arraykind}, ::UndefInitializer, dims::Int...) where {T<:Cell,B,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll} + function CellArray{T,B}(::Type{T_arraykind}, ::UndefInitializer, dims::Vararg{Int, N}) where {T<:Cell,N,B,T_arraykind<:AbstractArray} CellArray{T,B}(T_arraykind, undef, dims) end - function CellArray{T}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll} - CellArray{T,0}(T_arraykind, undef, dims) + function CellArray{T}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,T_arraykind<:AbstractArray} + CellArray{T,B0}(T_arraykind, undef, dims) end - function CellArray{T}(::Type{T_arraykind}, ::UndefInitializer, dims::Int...) where {T<:Cell,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll} + function CellArray{T}(::Type{T_arraykind}, ::UndefInitializer, dims::Vararg{Int, N}) where {T<:Cell,N,T_arraykind<:AbstractArray} CellArray{T}(T_arraykind, undef, dims) end end @@ -107,9 +109,15 @@ See also: [`CellArray`](@ref), [`CuCellArray`](@ref), [`ROCCellArray`](@ref) const CPUCellArray{T,N,B,T_elem} = CellArray{T,N,B,Array{T_elem,_N}} CPUCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B} = (check_T(T); CPUCellArray{T,N,B,eltype(T)}(undef, dims)) -CPUCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:Cell,B} = CPUCellArray{T,B}(undef, dims) -CPUCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N} = CPUCellArray{T,0}(undef, dims) -CPUCellArray{T}(::UndefInitializer, dims::Int...) where {T<:Cell} = CPUCellArray{T}(undef, dims) +CPUCellArray{T,B}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:Cell,B,N} = CPUCellArray{T,B}(undef, dims) +CPUCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N} = CPUCellArray{T,B0}(undef, dims) +CPUCellArray{T}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:Cell,N} = CPUCellArray{T}(undef, dims) + +CPUCellArray(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = CellArray{T,N,B}(Array(A.data), A.dims) + +# TODO: to be added for all kinds of CellArrays: +# CPUCellArray(A::AbstractArray{T,N}, B::Integer) where {T<:Cell,N} = ( C=CPUCellArray{T,B}(undef, N); C.=A; C ) +# CPUCellArray(A::AbstractArray{T,N}) where {T<:Cell,N} = CPUCellArray(A, B0) """ @@ -120,7 +128,7 @@ Define the following type alias and constructors in the caller module: ******************************************************************************** CuCellArray{T<:Cell,N,B,T_elem} <: AbstractArray{T,N} where Cell <: Union{Number, SArray, FieldArray} -`N`-dimensional CellArray with cells of type `T`, blocklength `B`, and `T_array` being a `CuArray` of element type `T_elem`: alias for `CellArray{T,N,B,CuArray{T_elem,CellArrays._N}}`. +`N`-dimensional CellArray with cells of type `T`, blocklength `B`, and `T_array` being a `CuArray` of element type `T_elem`: alias for `CellArray{T,N,B,CuArray{T_elem,CellArrays._N,CUDA.DeviceMemory}}`. -------------------------------------------------------------------------------- @@ -141,12 +149,17 @@ macro define_CuCellArray() esc(define_CuCellArray()) end function define_CuCellArray() quote - const CuCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,CUDA.CuArray{T_elem,CellArrays._N}} + const CuCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,CUDA.CuArray{T_elem,CellArrays._N,CUDA.DeviceMemory}} + + CuCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = ( CellArrays.check_T(T); A = CuCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims); f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @cuda launch=false launch=false f(A) end; A ) + CuCellArray{T,B}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N,B} = CuCellArray{T,B}(undef, dims) + CuCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = CuCellArray{T,CellArrays.B0}(undef, dims) + CuCellArray{T}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N} = CuCellArray{T}(undef, dims) - CuCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = (CellArrays.check_T(T); CuCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims)) - CuCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell,B} = CuCellArray{T,B}(undef, dims) - CuCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = CuCellArray{T,0}(undef, dims) - CuCellArray{T}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell} = CuCellArray{T}(undef, dims) + CuCellArray(A::CellArrays.CellArray{T,N,B,T_array}) where {T,N,B,T_array} = (A = CellArrays.CellArray{T,N,B}(CUDA.CuArray(A.data), A.dims); f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @cuda launch=false f(A) end; A) + + Base.show(io::IO, A::CuCellArray) = Base.show(io, CellArrays.CPUCellArray(A)) + Base.show(io::IO, ::MIME"text/plain", A::CuCellArray{T,N,B}) where {T,N,B} = ( println(io, "$(length(A))-element CuCellArray{$T, $N, $B, $(CellArrays.eltype(T))}:"); Base.print_array(io, CellArrays.CPUCellArray(A)) ) end end @@ -179,12 +192,20 @@ macro define_ROCCellArray() esc(define_ROCCellArray()) end function define_ROCCellArray() quote - const ROCCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCArray{T_elem,CellArrays._N}} + const ROCCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCArray{T_elem,CellArrays._N}} # TODO: ,AMDGPU.Runtime.Mem.HIPBuffer should be added here later. The moment it has no impact (and would require adaption of the unit tests). + const ROCDeviceCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCDeviceArray{T_elem,CellArrays._N,AMDGPU.Runtime.Mem.HIPBuffer}} + + ROCCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = ( CellArrays.check_T(T); A = ROCCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims); A ) # TODO: Once reshape is implemented in AMDGPU, the workaround can be applied as well: f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @roc launch=false f(A) end; A ) + ROCCellArray{T,B}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N,B} = ROCCellArray{T,B}(undef, dims) + ROCCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = ROCCellArray{T,CellArrays.B0}(undef, dims) + ROCCellArray{T}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N} = ROCCellArray{T}(undef, dims) + + ROCCellArray(A::CellArrays.CellArray{T,N,B,T_array}) where {T,N,B,T_array} = ( A = CellArrays.CellArray{T,N,B}(AMDGPU.ROCArray(A.data), A.dims); A ) # TODO: Once reshape is implemented in AMDGPU, the workaround can be applied as well: f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @roc launch=false f(A) end; A ) - ROCCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = (CellArrays.check_T(T); ROCCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims)) - ROCCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell,B} = ROCCellArray{T,B}(undef, dims) - ROCCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = ROCCellArray{T,0}(undef, dims) - ROCCellArray{T}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell} = ROCCellArray{T}(undef, dims) + Base.show(io::IO, A::ROCCellArray) = Base.show(io, CellArrays.CPUCellArray(A)) + Base.show(io::IO, ::MIME"text/plain", A::ROCCellArray{T,N,B}) where {T,N,B} = ( println(io, "$(length(A))-element ROCCellArray{$T, $N, $B, $(CellArrays.eltype(T))}:"); Base.print_array(io, CellArrays.CPUCellArray(A)) ) + + @inline Base.getproperty(A::ROCDeviceCellArray{T,N,B,T_elem}, fieldname::Symbol) where {T<:CellArrays.FieldArray,N,B,T_elem} = ( (fieldname===:dims || fieldname===:data) ? getproperty(A, Val(fieldname)) : CellArrays.@ArgumentError("Field access by name is not yet supported for ROCDeviceCellArray.") ) end end @@ -218,27 +239,35 @@ macro define_MtlCellArray() esc(define_MtlCellArray()) end function define_MtlCellArray() quote const MtlCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,Metal.MtlArray{T_elem,CellArrays._N}} + const MtlDeviceCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,Metal.MtlDeviceArray{T_elem,CellArrays._N}} + + MtlCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = ( CellArrays.check_T(T); A = MtlCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims); A) #workaround: f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @metal launch=false f(A) end; A ) + MtlCellArray{T,B}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N,B} = MtlCellArray{T,B}(undef, dims) + MtlCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = MtlCellArray{T,CellArrays.B0}(undef, dims) + MtlCellArray{T}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N} = MtlCellArray{T}(undef, dims) + + MtlCellArray(A::CellArrays.CellArray{T,N,B,T_array}) where {T,N,B,T_array} = ( A = CellArrays.CellArray{T,N,B}(Metal.MtlArray(A.data), A.dims); A) #workaround: f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @metal launch=false f(A) end; A ) - MtlCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = (CellArrays.check_T(T); MtlCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims)) - MtlCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell,B} = MtlCellArray{T,B}(undef, dims) - MtlCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = MtlCellArray{T,0}(undef, dims) - MtlCellArray{T}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell} = MtlCellArray{T}(undef, dims) + Base.show(io::IO, A::MtlCellArray) = Base.show(io, CellArrays.CPUCellArray(A)) + Base.show(io::IO, ::MIME"text/plain", A::MtlCellArray{T,N,B}) where {T,N,B} = ( println(io, "$(length(A))-element MtlCellArray{$T, $N, $B, $(CellArrays.eltype(T))}:"); Base.print_array(io, CellArrays.CPUCellArray(A)) ) + + @inline Base.getproperty(A::MtlDeviceCellArray{T,N,B,T_elem}, fieldname::Symbol) where {T<:CellArrays.FieldArray,N,B,T_elem} = ( (fieldname===:dims || fieldname===:data) ? getproperty(A, Val(fieldname)) : CellArrays.@ArgumentError("Field access by name is not yet supported for MtlCellArray.") ) end end ## AbstractArray methods -@inline Base.IndexStyle(::Type{<:CellArray}) = IndexLinear() -@inline Base.size(T::Type{<:Number}, args...) = (1,) -@inline Base.size(A::CellArray) = A.dims -@inline Base.length(T::Type{<:Number}, args...) = 1 +@inline Base.IndexStyle(::Type{<:CellArray}) = IndexLinear() +@inline Base.size(::Type{<:Number}, args...) = (1,) +@inline Base.size(A::CellArray) = A.dims +@inline Base.length(::Type{<:Number}) = 1 @inline function Base.similar(A::CellArray{T0,N0,B,T_array0}, ::Type{T}, dims::NTuple{N,Int}) where {T0,N0,B,T_array0,T<:Cell,N} check_T(T) - T_arraykind = Base.typename(T_array0).wrapper # Note: an alternative would be: T_array = typeof(similar(A.data, eltype(T), dims.*0)); CellArray{T,N,B}(T_array, dims) - CellArray{T,N,B}(T_arraykind{eltype(T),_N}, undef, dims) + T_array = typeof(similar(A.data, eltype(T), ntuple(i -> 0, _N))) # Note: an alternative would have been in the past (this misses however the CUDA.DeviceMemory argument if T_arraykind is CuArray): T_arraykind = Base.typename(T_array0).wrapper; CellArray{T,N,B}(T_arraykind{eltype(T),_N}, undef, dims) + CellArray{T,N,B}(T_array, undef, dims) end @@ -248,7 +277,7 @@ end return A end -@inline function Base.fill!(A::CellArray{T,N,B,T_array}, X) where {T<:Union{SArray,FieldArray},N,B,T_array} +@inline function Base.fill!(A::CellArray{T,N,B,T_array}, X) where {T<:ArrayCell,N,B,T_array} cell = convert(T, X) for j=1:length(T) A.data[:, j, :] .= cell[j] @@ -266,11 +295,11 @@ end return end -@inline function Base.getindex(A::CellArray{T,N,B,T_array}, i::Int) where {T<:Union{SArray,FieldArray},N,B,T_array} +@inline function Base.getindex(A::CellArray{T,N,B,T_array}, i::Int) where {T<:ArrayCell,N,B,T_array} T(getindex(A.data, Base._to_linear_index(A.data::T_array, (i-1)%B+1, j, (i-1)÷B+1)) for j=1:length(T)) # NOTE:The same fails on GPU if convert is used. end -@inline function Base.setindex!(A::CellArray{T,N,B,T_array}, X::T, i::Int) where {T<:Union{SArray,FieldArray},N,B,T_array} +@inline function Base.setindex!(A::CellArray{T,N,B,T_array}, X::T, i::Int) where {T<:ArrayCell,N,B,T_array} for j=1:length(T) A.data[Base._to_linear_index(A.data::T_array, (i-1)%B+1, j, (i-1)÷B+1)] = X[j] end @@ -281,11 +310,11 @@ end @inline Base.getindex(A::CellArray{T,N,0,T_array}, i::Int) where {T<:Number,N,T_array<:AbstractArray{T,_N}} = T(A.data[i]) @inline Base.setindex!(A::CellArray{T,N,0,T_array}, x::Number, i::Int) where {T<:Number,N,T_array} = (A.data[i] = x; return) -@inline function Base.getindex(A::CellArray{T,N,0,T_array}, i::Int) where {T<:Union{SArray,FieldArray},N,T_array} +@inline function Base.getindex(A::CellArray{T,N,0,T_array}, i::Int) where {T<:ArrayCell,N,T_array} T(getindex(A.data, Base._to_linear_index(A.data::T_array, i, j, 1)) for j=1:length(T)) # NOTE:The same fails on GPU if convert is used. end -@inline function Base.setindex!(A::CellArray{T,N,0,T_array}, X::T, i::Int) where {T<:Union{SArray,FieldArray},N,T_array} +@inline function Base.setindex!(A::CellArray{T,N,0,T_array}, X::T, i::Int) where {T<:ArrayCell,N,T_array} for j=1:length(T) A.data[Base._to_linear_index(A.data::T_array, i, j, 1)] = X[j] end @@ -296,34 +325,46 @@ end @inline Base.getindex(A::CellArray{T,N,1,T_array}, i::Int) where {T<:Number,N,T_array<:AbstractArray{T,_N}} = T(A.data[i]) @inline Base.setindex!(A::CellArray{T,N,1,T_array}, x::Number, i::Int) where {T<:Number,N,T_array} = (A.data[i] = x; return) -@inline function Base.getindex(A::CellArray{T,N,1,T_array}, i::Int) where {T<:Union{SArray,FieldArray},N,T_array} +@inline function Base.getindex(A::CellArray{T,N,1,T_array}, i::Int) where {T<:ArrayCell,N,T_array} T(getindex(A.data, Base._to_linear_index(A.data::T_array, 1, j, i)) for j=1:length(T)) # NOTE:The same fails on GPU if convert is used. end -@inline function Base.setindex!(A::CellArray{T,N,1,T_array}, X::T, i::Int) where {T<:Union{SArray,FieldArray},N,T_array} +@inline function Base.setindex!(A::CellArray{T,N,1,T_array}, X::T, i::Int) where {T<:ArrayCell,N,T_array} for j=1:length(T) A.data[Base._to_linear_index(A.data::T_array, 1, j, i)] = X[j] end return end -@inline function Base.getindex(A::CPUCellArray{T,N,1,T_elem}, i::Int) where {T<:Union{SArray,FieldArray},N,T_elem} +@inline function Base.getindex(A::CPUCellArray{T,N,1,T_elem}, i::Int) where {T<:ArrayCell,N,T_elem} getindex(reinterpret(reshape, T, view(A.data::Array{T_elem,_N},1,:,:)), i) # NOTE: reinterpret is not implemented for CUDA device arrays, i.e. for usage in kernels end -@inline function Base.setindex!(A::CPUCellArray{T,N,1,T_elem}, X::T, i::Int) where {T<:Union{SArray,FieldArray},N,T_elem} +@inline function Base.setindex!(A::CPUCellArray{T,N,1,T_elem}, X::T, i::Int) where {T<:ArrayCell,N,T_elem} setindex!(reinterpret(reshape, T, view(A.data::Array{T_elem,_N},1,:,:)), X ,i) # NOTE: reinterpret is not implemented for CUDA device arrays, i.e. for usage in kernels return end +## Array operation overloading + +Base.:(==)(A::CellArray, B::CellArray) = all(A.data .== B.data) # NOTE: for some reason the following does not work robustly: A.data == B.data +# TODO: Comparison "<" does not work for integers: +# Base.:(<)(A::CellArray, B::CellArray) = all(A.data .< B.data) + + ## CellArray properties -@inline Base.getproperty(A::CellArray{T,N,B,T_array}, s::Symbol) where {T<:FieldArray,N,B,T_array} = _getproperty(A, Val(s)) -@inline _getproperty(A::CellArray{T,N,B,T_array}, s::Val) where {T<:FieldArray,N,B,T_array} = _getfield(A, s) -@inline _getfield(A::CellArray{T,N,B,T_array}, ::Val{:data}) where {T<:FieldArray,N,B,T_array} = getfield(A, :data) -@inline _getfield(A::CellArray{T,N,B,T_array}, ::Val{:dims}) where {T<:FieldArray,N,B,T_array} = getfield(A, :dims) -@inline _getfield(A::CellArray{T,N,B,T_array}, s::Val) where {T<:FieldArray,N,B,T_array} = field(A, s) +@inline Base.getproperty(A::CellArray{T,N,B,T_array}, fieldname::Symbol) where {T<:FieldArray,N,B,T_array} = getproperty(A, Val(fieldname)) + +@inline Base.getproperty(A::CellArray{T,N,B,T_array}, ::Val{:data}) where {T<:FieldArray{N2,T2,D},N,B,T_array} where {N2,T2,D} = getfield(A, :data) +@inline Base.getproperty(A::CellArray{T,N,B,T_array}, ::Val{:dims}) where {T<:FieldArray{N2,T2,D},N,B,T_array} where {N2,T2,D} = getfield(A, :dims) + +@inline @generated function Base.getproperty(A::CellArray{T,N,B,T_array}, ::Val{fieldname}) where {T<:FieldArray{N2,T2,D},N,B,T_array,fieldname} where {N2,T2,D} + names = SArray{N2}(fieldnames(T)) + indices = Tuple(findfirst(x->x===fieldname, names)) + return :(field(A, $(indices))) +end ## API functions @@ -338,12 +379,22 @@ Return a tuple containing the dimensions of `A` or return only a specific dimens @inline cellsize(A::AbstractArray, dim::Int) = cellsize(A)[dim] +""" + celllength(A) + +Return the cell length of CellArray `A`. +""" +@inline celllength(A::AbstractArray) = length(eltype(A)) + + """ blocklength(A) Return the blocklength of CellArray `A`. """ -@inline blocklength(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = (B == 0) ? prod(A.dims) : B +@inline blocklength(A::CellArray{T,N,0,T_array}) where {T,N, T_array} = prod(A.dims) +@inline blocklength(A::CellArray{T,N,1,T_array}) where {T,N, T_array} = 1 +@inline blocklength(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = B """ @@ -353,35 +404,62 @@ Return the blocklength of CellArray `A`. Return an array view of the field of CellArray `A` designated with `indices` or `fieldname` (modifying the view will modify `A`). The view's dimensionality and size are equal to `A`'s. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`. ## Arguments -- `indices::Int|NTuple{N,Int}`: the `indices` that designate the field in accordance with `A`'s cell type. +- `indices::Int|NTuple{N,Int}`: the `indices` that designate the field in accordance with `A`'s cell type (flat indexing is supported for multi dimensional cells). - `fieldname::Symbol`: the `fieldname` that designates the field in accordance with `A`'s cell type. """ -@inline field(A::CellArray{T,N,0,T_array}, index::Int) where {T,N,T_array} = view(plain(A), Base.OneTo.(size(A))..., index) -@inline field(A::CellArray{T,N,0,T_array}, indices::NTuple{M,Int}) where {T_elem,M,T<:AbstractArray{T_elem,M},N, T_array} = view(plain(A), Base.OneTo.(size(A))..., indices...) -@inline field(A::CellArray{T,N,1,T_array}, index::Int) where {T,N,T_array} = view(plain(A), index, Base.OneTo.(size(A))...) -@inline field(A::CellArray{T,N,1,T_array}, indices::NTuple{M,Int}) where {T_elem,M,T<:AbstractArray{T_elem,M},N, T_array} = view(plain(A), indices..., Base.OneTo.(size(A))...) -@inline field(A::CellArray{T,N,B,T_array}, indices::Union{Int,NTuple{M,Int}}) where {T_elem,M,T<:AbstractArray{T_elem,M},N,B,T_array} = @ArgumentError("the operation is not supported if parameter `B` of `A` is neither `0` nor `1`.") -@inline field(A::CellArray, indices::Int...) = field(A, indices) -@inline field(A::CellArray{T,N,B,T_array}, fieldname::Symbol) where {T<:FieldArray,N,B,T_array} = field(A, Val(fieldname)) - -@inline @generated function field(A::CellArray{T,N,B,T_array}, ::Val{fieldname}) where {T<:FieldArray{N2,T2,D},N,B,T_array,fieldname} where {N2,T2,D} - names = SArray{N2}(fieldnames(T)) - indices = Tuple(findfirst(x->x===fieldname, names)) - return :(field(A, $(indices...))) -end +@inline field(A::CellArray{T,N,0,T_array}, index::Tuple{Int}) where {T<:ArrayCell,N, T_array} = reshape(view(plain_flat(A), :, index...), size(A)) +@inline field(A::CellArray{T,N,0,T_array}, indices::NTuple{M,Int}) where {T<:ArrayCell,N, T_array,M} = reshape(view(plain_arrayflat(A), :, indices...), size(A)) +@inline field(A::CellArray{T,N,1,T_array}, index::Tuple{Int}) where {T<:ArrayCell,N, T_array} = reshape(view(plain_flat(A), index..., :), size(A)) +@inline field(A::CellArray{T,N,1,T_array}, indices::NTuple{M,Int}) where {T<:ArrayCell,N, T_array,M} = reshape(view(plain_arrayflat(A), indices..., :), size(A)) +@inline field(A::CellArray{T,N,B,T_array}, indices::Union{Tuple{Int},NTuple{M,Int}}) where {T<:ArrayCell,N,B,T_array,M} = @ArgumentError("the operation is not supported if parameter `B` of `A` is neither `0` nor `1`.") +@inline field(A::CellArray{T}, indices::NTuple{M,Int}) where {T<:Number,M} = A +@inline field(A::CellArray{T,N,B,T_array}, fieldname::Symbol) where {T<:FieldArray,N,B,T_array} = getproperty(A, fieldname) +@inline field(A::CellArray, indices::Vararg{Int, N}) where {N} = field(A, indices) ## Helper functions +# NOTE: the following function could be provided in public API: # """ # plain(A) # # Return a plain `N`-dimensional array view of CellArray `A` (modifying the view will modify `A`), where `N` is the sum of the dimensionalities of `A` and the cell type of `A`. The view's dimensions are `(size(A)..., cellsize(A)...)` if parameter `B` of `A` is `0`, and `(cellsize(A)..., size(A)...)` if parameter `B` of `A` is `1`. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`. # # """ -@inline plain(A::CellArray{T,N,0,T_array}) where {T,N, T_array} = reshape(A.data, (size(A)..., cellsize(A)...)) -@inline plain(A::CellArray{T,N,1,T_array}) where {T,N, T_array} = reshape(A.data, (cellsize(A)..., size(A)...)) -@inline plain(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.") +# @inline plain(A::CellArray{T,N,0,T_array}) where {T,N, T_array} = reshape(A.data, (size(A)..., cellsize(A)...)) +# @inline plain(A::CellArray{T,N,1,T_array}) where {T,N, T_array} = reshape(A.data, (cellsize(A)..., size(A)...)) +# @inline plain(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.") + +# """ +# plain_arrayflat(A) +# +# Return a plain `N`-dimensional array view of CellArray `A` with flat array indexing (modifying the view will modify `A`), where `N` is the sum of the length of `A` and the dimensionalities of the cell type of `A`. The view's dimensions are `(length(A), cellsize(A)...)` if parameter `B` of `A` is `0`, and `(cellsize(A)..., length(A))` if parameter `B` of `A` is `1`. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`. +# +# """ +@inline plain_arrayflat(A::CellArray{T,N,0,T_array}) where {T,N, T_array} = reshape(A.data, (length(A), cellsize(A)...)) +@inline plain_arrayflat(A::CellArray{T,N,1,T_array}) where {T,N, T_array} = reshape(A.data, (cellsize(A)..., length(A))) +@inline plain_arrayflat(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.") + +# NOTE: the following function could be provided in public API: +# """ +# plain_cellflat(A) +# +# Return a plain `N`-dimensional array view of CellArray `A` with flat cell indexing (modifying the view will modify `A`), where `N` is the sum of the dimensionalities of `A` and the length of the cell type of `A`. The view's dimensions are `(size(A)..., celllength(A))` if parameter `B` of `A` is `0`, and `(celllength(A), size(A)...)` if parameter `B` of `A` is `1`. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`. +# +# """ +# @inline plain_cellflat(A::CellArray{T,N,0,T_array}) where {T,N, T_array} = reshape(A.data, (size(A)..., celllength(A))) +# @inline plain_cellflat(A::CellArray{T,N,1,T_array}) where {T,N, T_array} = reshape(A.data, (celllength(A), size(A)...)) +# @inline plain_cellflat(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.") + +# """ +# plain_flat(A) +# +# Return a plain `N`-dimensional array view of CellArray `A` with flat array and cell indexing (modifying the view will modify `A`), where `N` is the sum of the length of `A` and the length of the cell type of `A`. The view's dimensions are `(length(A), celllength(A))` if parameter `B` of `A` is `0`, and `(celllength(A), length(A))` if parameter `B` of `A` is `1`. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`. +# +# """ +@inline plain_flat(A::CellArray{T,N,0,T_array}) where {T,N, T_array} = reshape(A.data, (length(A), celllength(A))) +@inline plain_flat(A::CellArray{T,N,1,T_array}) where {T,N, T_array} = reshape(A.data, (celllength(A), length(A))) +@inline plain_flat(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.") function check_T(::Type{T}) where {T} diff --git a/src/CellArrays.jl b/src/CellArrays.jl index a4eca6c..6478d9e 100644 --- a/src/CellArrays.jl +++ b/src/CellArrays.jl @@ -32,5 +32,5 @@ using .Exceptions include("CellArray.jl") ## Exports (need to be after include of submodules if re-exports from them) -export CellArray, CPUCellArray, @define_CuCellArray, @define_ROCCellArray, @define_MtlCellArray, cellsize, blocklength, field +export CellArray, CPUCellArray, @define_CuCellArray, @define_ROCCellArray, @define_MtlCellArray, cellsize, celllength, blocklength, field end diff --git a/test/test_CellArray.jl b/test/test_CellArray.jl index 569d4a6..8654885 100644 --- a/test/test_CellArray.jl +++ b/test/test_CellArray.jl @@ -67,7 +67,7 @@ mutable struct MyMutableFieldArray{T} <: FieldArray{Tuple{2}, T, 1} end @testset "$(basename(@__FILE__))" begin - @testset "1. CellArray allocation ($array_type arrays) (precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) + @testset "1. CellArray allocation ($array_type arrays; precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) @testset "Number cells" begin dims = (2,3) A = CellArray{Float}(undef, dims) @@ -86,10 +86,6 @@ end @test eltype(B) == Int32 @test eltype(C) == Float @test eltype(D) == Int32 - @test typeof(A) == CellArrays.CellArray{Float, length(dims), 0, Array{eltype(A.data),_N}} - @test typeof(B) == CellArrays.CellArray{Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} - @test typeof(C) == CellArrays.CellArray{Float, length(dims), 1, Array{eltype(C.data),_N}} - @test typeof(D) == CellArrays.CellArray{Int32, length(dims), 4, Array{eltype(D.data),_N}} @test length(A.data) == prod(dims) @test length(B.data) == prod(dims) @test length(C.data) == prod(dims) @@ -98,12 +94,23 @@ end @test B.dims == dims @test C.dims == dims @test D.dims == dims + if array_type == "CUDA" + @test typeof(A) == CellArrays.CellArray{Float, length(dims), 0, CuArray{eltype(A.data),_N, CUDA.DeviceMemory}} + @test typeof(B) == CellArrays.CellArray{Int32, length(dims), prod(dims), CuArray{eltype(B.data),_N}} # NOTE: the general constructor used for B is not yet specialized for CUDA. + @test typeof(C) == CellArrays.CellArray{Float, length(dims), 1, CuArray{eltype(C.data),_N, CUDA.DeviceMemory}} + @test typeof(D) == CellArrays.CellArray{Int32, length(dims), 4, CuArray{eltype(D.data),_N, CUDA.DeviceMemory}} + else + @test typeof(A) == CellArrays.CellArray{Float, length(dims), 0, Array{eltype(A.data),_N}} + @test typeof(B) == CellArrays.CellArray{Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} + @test typeof(C) == CellArrays.CellArray{Float, length(dims), 1, Array{eltype(C.data),_N}} + @test typeof(D) == CellArrays.CellArray{Int32, length(dims), 4, Array{eltype(D.data),_N}} + end end; @testset "SArray cells" begin - dims = (2,3) - celldims = (3,4) - T_Float = SMatrix{celldims..., Float, prod(celldims)} - T_Int32 = SMatrix{celldims..., Int32, prod(celldims)} + dims = (2,3) + celldims = (3,4) + T_Float = SMatrix{celldims..., Float, prod(celldims)} + T_Int32 = SMatrix{celldims..., Int32, prod(celldims)} A = CellArray{T_Float}(undef, dims) B = CellArrays.CellArray{T_Int32,prod(dims)}(Array, undef, dims...) C = CellArray{T_Float,1}(undef, dims) @@ -120,10 +127,6 @@ end @test eltype(B) == T_Int32 @test eltype(C) == T_Float @test eltype(D) == T_Int32 - @test typeof(A) == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N}} - @test typeof(B) == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} - @test typeof(C) == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N}} - @test typeof(D) == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N}} @test length(A.data) == prod(dims)*prod(celldims) @test length(B.data) == prod(dims)*prod(celldims) @test length(C.data) == prod(dims)*prod(celldims) @@ -132,11 +135,22 @@ end @test B.dims == dims @test C.dims == dims @test D.dims == dims + if array_type == "CUDA" + @test typeof(A) == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N, CUDA.DeviceMemory}} + @test typeof(B) == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} # NOTE: the general constructor used for B is not yet specialized for CUDA. + @test typeof(C) == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N, CUDA.DeviceMemory}} + @test typeof(D) == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N, CUDA.DeviceMemory}} + else + @test typeof(A) == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N}} + @test typeof(B) == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} + @test typeof(C) == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N}} + @test typeof(D) == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N}} + end end; @testset "FieldArray cells" begin dims = (2,3) celldims = size(MyFieldArray) - T_Float = MyFieldArray{Float} + T_Float = MyFieldArray{Float} T_Int32 = MyFieldArray{Int32} A = CellArray{T_Float}(undef, dims) B = CellArrays.CellArray{T_Int32,prod(dims)}(Array, undef, dims...) @@ -154,10 +168,6 @@ end @test eltype(B) == T_Int32 @test eltype(C) == T_Float @test eltype(D) == T_Int32 - @test typeof(A) == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N}} - @test typeof(B) == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} - @test typeof(C) == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N}} - @test typeof(D) == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N}} @test length(A.data) == prod(dims)*prod(celldims) @test length(B.data) == prod(dims)*prod(celldims) @test length(C.data) == prod(dims)*prod(celldims) @@ -166,15 +176,26 @@ end @test B.dims == dims @test C.dims == dims @test D.dims == dims + if array_type == "CUDA" + @test typeof(A) == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N, CUDA.DeviceMemory}} + @test typeof(B) == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} # NOTE: the general constructor used for B is not yet specialized for CUDA. + @test typeof(C) == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N, CUDA.DeviceMemory}} + @test typeof(D) == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N, CUDA.DeviceMemory}} + else + @test typeof(A) == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N}} + @test typeof(B) == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} + @test typeof(C) == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N}} + @test typeof(D) == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N}} + end end; end; - @testset "2. functions ($array_type arrays) (precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) - dims = (2,3) - celldims = (3,4) # Needs to be compatible for matrix multiplication! - T_Float = SMatrix{celldims..., Float, prod(celldims)} - T_Int32 = SMatrix{celldims..., Int32, prod(celldims)} + @testset "2. functions ($array_type arrays; precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) + dims = (2,3) + celldims = (3,4) # Needs to be compatible for matrix multiplication! + T_Float = SMatrix{celldims..., Float, prod(celldims)} + T_Int32 = SMatrix{celldims..., Int32, prod(celldims)} T2_Float = MyFieldArray{Float} - T2_Int32 = MyFieldArray{Int32} + T2_Int32 = MyFieldArray{Int32} A = CellArray{Float}(undef, dims) B = CellArrays.CellArray{Int32,prod(dims)}(Array, undef, dims) C = CellArray{T_Float}(undef, dims) @@ -194,25 +215,61 @@ end @test size(H) == dims end; @testset "similar" begin - @test typeof(similar(A, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N}} - @test typeof(similar(B, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(B), Array{eltype(T_Int32),_N}} - @test typeof(similar(C, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N}} - @test typeof(similar(D, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(D), Array{eltype(T_Int32),_N}} - @test typeof(similar(E, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N}} - @test typeof(similar(F, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(F), Array{eltype(T_Int32),_N}} - @test typeof(similar(G, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(G), Array{eltype(T_Int32),_N}} - @test typeof(similar(H, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(H), Array{eltype(T_Int32),_N}} - @test typeof(similar(A, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N}} - @test typeof(similar(B, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(B), Array{eltype(T_Int32),_N}} - @test typeof(similar(C, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N}} - @test typeof(similar(D, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(D), Array{eltype(T_Int32),_N}} - @test typeof(similar(E, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N}} - @test typeof(similar(F, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(F), Array{eltype(T_Int32),_N}} - @test typeof(similar(G, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(G), Array{eltype(T_Int32),_N}} - @test typeof(similar(H, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(H), Array{eltype(T_Int32),_N}} + if array_type == "CUDA" + @test typeof(similar(A, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(B, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(B), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(C, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(D, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(D), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(E, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(F, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(F), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(G, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(G), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(H, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(H), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(A, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(B, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(B), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(C, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(D, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(D), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(E, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(F, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(F), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(G, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(G), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + @test typeof(similar(H, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(H), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}} + elseif array_type == "AMDGPU" + @test typeof(similar(A, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(B, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(B), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(C, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(D, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(D), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(E, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(F, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(F), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(G, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(G), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(H, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(H), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(A, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(B, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(B), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(C, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(D, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(D), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(E, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(F, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(F), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(G, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(G), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + @test typeof(similar(H, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(H), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}} + else + @test typeof(similar(A, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N}} + @test typeof(similar(B, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(B), Array{eltype(T_Int32),_N}} + @test typeof(similar(C, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N}} + @test typeof(similar(D, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(D), Array{eltype(T_Int32),_N}} + @test typeof(similar(E, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), 0, Array{eltype(T_Int32),_N}} + @test typeof(similar(F, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(F), Array{eltype(T_Int32),_N}} + @test typeof(similar(G, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(G), Array{eltype(T_Int32),_N}} + @test typeof(similar(H, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(H), Array{eltype(T_Int32),_N}} + @test typeof(similar(A, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N}} + @test typeof(similar(B, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(B), Array{eltype(T_Int32),_N}} + @test typeof(similar(C, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N}} + @test typeof(similar(D, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(D), Array{eltype(T_Int32),_N}} + @test typeof(similar(E, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, 0, Array{eltype(T_Int32),_N}} + @test typeof(similar(F, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(F), Array{eltype(T_Int32),_N}} + @test typeof(similar(G, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(G), Array{eltype(T_Int32),_N}} + @test typeof(similar(H, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(H), Array{eltype(T_Int32),_N}} + end end; @testset "fill!" begin - allowscalar() do + allowscalar(true) # "allowscalar do" is not defined for AMDGPU fill!(A, 9); @test all(Base.Array(A.data) .== 9.0) fill!(B, 9.0); @test all(Base.Array(B.data) .== 9) fill!(C, (1:length(eltype(C)))); @test all(C .== (T_Float(1:length(eltype(C))) for i=1:dims[1], j=1:dims[2])) @@ -221,10 +278,20 @@ end fill!(F, (1:length(eltype(F)))); @test all(F .== (T2_Int32(1:length(eltype(F))) for i=1:dims[1], j=1:dims[2])) fill!(G, (1:length(eltype(G)))); @test all(G .== (T_Float(1:length(eltype(G))) for i=1:dims[1], j=1:dims[2])) fill!(H, (1:length(eltype(H)))); @test all(H .== (T_Int32(1:length(eltype(H))) for i=1:dims[1], j=1:dims[2])) + allowscalar(false) + end + @testset "constructors" begin + @test isa(CPUCellArray(A), CPUCellArray) + if array_type == "CUDA" + @test isa(CuCellArray(CPUCellArray(A)), CuCellArray) + # elseif array_type == "AMDGPU" + # @test isa(ROCCellArray(CPUCellArray(A)), ROCCellArray) # TODO: for some reason this results in scalar indexing + elseif array_type == "Metal" + @test isa(MtlCellArray(CPUCellArray(A)), MtlCellArray) end end @testset "getindex / setindex! (array programming)" begin - allowscalar() do + allowscalar(true) # "allowscalar do" is not defined for AMDGPU A.data.=0; B.data.=0; C.data.=0; D.data.=0; E.data.=0; F.data.=0; G.data.=0; H.data.=0; A[2,2:3] .= 9 B[2,2:3] .= 9.0 @@ -242,7 +309,7 @@ end @test all(F[2,2:3] .== (T2_Int32(1:length(T2_Int32)), T2_Int32(1:length(T2_Int32)))) @test all(G[2,2:3] .== (T_Float(1:length(T_Float)), T_Float(1:length(T_Float)))) @test all(H[2,2:3] .== (T_Int32(1:length(T_Int32)), T_Int32(1:length(T_Int32)))) - end + allowscalar(false) end; @testset "getindex / setindex! (GPU kernel programming)" begin celldims2 = (4,4) # Needs to be compatible for matrix multiplication! @@ -332,21 +399,90 @@ end @test size(field(E, :yyxx)) == dims @test size(field(E, :yyyy)) == dims end; - @testset "field property" begin + @testset "field property (host side access)" begin @test E.xxxx == field(E, :xxxx) @test E.yxxx == field(E, :yxxx) @test E.xyxx == field(E, :xyxx) @test E.yyxx == field(E, :yyxx) @test E.yyyy == field(E, :yyyy) end; + @testset "field property (device side access)" begin + if array_type == "CUDA" + function add2D_CUDA_properties!(A, B) + ix = (CUDA.blockIdx().x-1) * CUDA.blockDim().x + CUDA.threadIdx().x + iy = (CUDA.blockIdx().y-1) * CUDA.blockDim().y + CUDA.threadIdx().y + A.yxxx[ix,iy] = A.yxxx[ix,iy] + 10*B.yxxx[ix,iy]; + A.yyyy[ix,iy] = A.yyyy[ix,iy] + 10*B.yyyy[ix,iy]; + return + end + E.data.=1; @cuda blocks=size(E) add2D_CUDA_properties!(E, E); CUDA.synchronize(); @test all(Base.Array(E.yxxx) .== 11) && all(Base.Array(E.yyyy) .== 11) + # elseif array_type == "AMDGPU" # TODO: activate once supported + # function add2D_AMDGPU_properties!(A, B) + # ix = (AMDGPU.blockIdx().x-1) * AMDGPU.blockDim().x + AMDGPU.threadIdx().x + # iy = (AMDGPU.blockIdx().y-1) * AMDGPU.blockDim().y + AMDGPU.threadIdx().y + # A.yxxx[ix,iy] = A.yxxx[ix,iy] + 10*B.yxxx[ix,iy]; + # A.yyyy[ix,iy] = A.yyyy[ix,iy] + 10*B.yyyy[ix,iy]; + # return + # end + # E.data.=1; @roc gridsize=size(E) add2D_AMDGPU_properties!(E, E); AMDGPU.synchronize(); @test all(Base.Array(E.yxxx) .== 11) && all(Base.Array(E.yyyy) .== 11) + elseif array_type == "Metal" + function add2D_Metal_properties!(A, B) + ix = (Metal.threadgroup_position_in_grid_3d().x-1) * Metal.threads_per_threadgroup_3d().x + Metal.thread_position_in_threadgroup_3d().x + iy = (Metal.threadgroup_position_in_grid_3d().y-1) * Metal.threads_per_threadgroup_3d().y + Metal.thread_position_in_threadgroup_3d().y + A.yxxx[ix,iy] = A.yxxx[ix,iy] + 10*B.yxxx[ix,iy]; + A.yyyy[ix,iy] = A.yyyy[ix,iy] + 10*B.yyyy[ix,iy]; + return + end + E.data.=1; @metal groups=size(E) add2D_Metal_properties!(E, E); Metal.synchronize(); @test all(Base.Array(E.yxxx) .== 11) && all(Base.Array(E.yyyy) .== 11) + end + end + @testset "comparisons (array programming)" begin + A.data.=9; B.data.=9; C.data.=9; D.data.=9; E.data.=9; F.data.=9; G.data.=9; H.data.=9; + A2=similar(A); B2=similar(B); C2=similar(C); D2=similar(D); E2=similar(E); F2=similar(F); G2=similar(G); H2=similar(H); + @test A !== A2 && !(A == A2) + @test B !== B2 && !(B == B2) + @test C !== C2 && !(C == C2) + @test D !== D2 && !(D == D2) + @test E !== E2 && !(E == E2) + @test F !== F2 && !(F == F2) + @test G !== G2 && !(G == G2) + @test H !== H2 && !(H == H2) + # Comparison does not work for integers: + # @test A !== A2 && A2 < A + # # @test B !== B2 && B2 < B + # @test C !== C2 && C2 < C + # # @test D !== D2 && D2 < D + # @test E !== E2 && E2 < E + # # @test F !== F2 && F2 < F + # @test G !== G2 && G2 < G + # # @test H !== H2 && H2 < H + A3=deepcopy(A); B3=deepcopy(B); C3=deepcopy(C); D3=deepcopy(D); E3=deepcopy(E); F3=deepcopy(F); G3=deepcopy(G); H3=deepcopy(H); + @test A !== A3 && A == A3 + @test B !== B3 && B == B3 + @test C !== C3 && C == C3 + @test D !== D3 && D == D3 + @test E !== E3 && E == E3 + @test F !== F3 && F == F3 + @test G !== G3 && G == G3 + @test H !== H3 && H == H3 + # Comparison does not work for integers: + # @test A !== A3 && !(A3 < A) + # # @test B !== B3 && !(B3 < B) + # @test C !== C3 && !(C3 < C) + # # @test D !== D3 && !(D3 < D) + # @test E !== E3 && !(E3 < E) + # # @test F !== F3 && !(F3 < F) + # @test G !== G3 && !(G3 < G) + # # @test H !== H3 && !(H3 < H) + end; end; - @testset "3. Exceptions ($array_type arrays) (precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) - dims = (2,3) - celldims = (3,4) + @testset "3. Exceptions ($array_type arrays; precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) + dims = (2,3) + celldims = (3,4) T_Float = SMatrix{celldims..., Float, prod(celldims)} - T_Int32 = SMatrix{celldims..., Int32, prod(celldims)} + T_Int32 = SMatrix{celldims..., Int32, prod(celldims)} T2_Float = MyFieldArray{Float} - T2_Int32 = MyFieldArray{Int32} + T2_Int32 = MyFieldArray{Int32} A = CellArray{Float}(undef, dims) B = CellArrays.CellArray{Int32,prod(dims)}(Array, undef, dims) C = CellArray{T_Float}(undef, dims)