diff --git a/Project.toml b/Project.toml
index 672e75c..00937f2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -17,8 +17,8 @@ Adapt = "3, 4"
 AMDGPU = "0.3.7, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1"
 CUDA = "3.12, 4, 5"
 Metal = "1"
-julia = "1.9" # Minimum required Julia version (supporting extensions and weak dependencies)
 StaticArrays = "1"
+julia = "1.9" # Minimum required Julia version (supporting extensions and weak dependencies)
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/src/CellArray.jl b/src/CellArray.jl
index 0bae3cb..630bd6b 100644
--- a/src/CellArray.jl
+++ b/src/CellArray.jl
@@ -3,8 +3,10 @@ using StaticArrays, Adapt
 
 ## Constants
 
-const _N = 3
-const Cell = Union{Number, SArray, FieldArray}
+const _N        = 3
+const B0        = 0
+const Cell      = Union{Number, SArray, FieldArray}
+const ArrayCell = Union{SArray, FieldArray}
 
 
 ## Types and constructors
@@ -45,7 +47,7 @@ struct CellArray{T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N} where {T_elem}} <
         CellArray{T,N,B,T_array}(data, dims)
     end
 
-    function CellArray{T,N,B,T_array}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} #where {Type{T_array}<:DataType}
+    function CellArray{T,N,B,T_array}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem}
         check_T(T)
         if (T_elem != eltype(T)) @IncoherentArgumentError("T_elem must match eltype(T).") end
         celldims = size(T)  # Note: size must be defined for type T (as it is e.g. for StaticArrays)
@@ -54,35 +56,35 @@ struct CellArray{T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N} where {T_elem}} <
         CellArray{T,N,B,T_array}(data, dims)
     end
 
-    function CellArray{T,N,B,T_array}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} #where {Type{T_array}<:DataType}
+    function CellArray{T,N,B,T_array}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem}
         CellArray{T,N,B,T_array}(T_array, undef, dims)
     end
 
-    function CellArray{T,N,B}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} #where {Type{T_array}<:DataType}
+    function CellArray{T,N,B}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem}
         CellArray{T,N,B,T_array}(T_array, undef, dims)
     end
 
-    function CellArray{T,B}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem} #where {Type{T_array}<:DataType}
+    function CellArray{T,B}(::Type{T_array}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_array<:AbstractArray{T_elem,_N}} where {T_elem}
         CellArray{T,N,B}(T_array, undef, dims)
     end
 
-    function CellArray{T,N,B}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll}
+    function CellArray{T,N,B}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_arraykind<:AbstractArray}
         CellArray{T,N,B}(T_arraykind{eltype(T),_N}, undef, dims)
     end
 
-    function CellArray{T,B}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll}
+    function CellArray{T,B}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B,T_arraykind<:AbstractArray}
         CellArray{T,N,B}(T_arraykind, undef, dims)
     end
 
-    function CellArray{T,B}(::Type{T_arraykind}, ::UndefInitializer, dims::Int...) where {T<:Cell,B,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll}
+    function CellArray{T,B}(::Type{T_arraykind}, ::UndefInitializer, dims::Vararg{Int, N}) where {T<:Cell,N,B,T_arraykind<:AbstractArray}
         CellArray{T,B}(T_arraykind, undef, dims)
     end
 
-    function CellArray{T}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll}
-        CellArray{T,0}(T_arraykind, undef, dims)
+    function CellArray{T}(::Type{T_arraykind}, ::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,T_arraykind<:AbstractArray}
+        CellArray{T,B0}(T_arraykind, undef, dims)
     end
 
-    function CellArray{T}(::Type{T_arraykind}, ::UndefInitializer, dims::Int...) where {T<:Cell,T_arraykind<:AbstractArray} #where {Type{T_arraykind}<:UnionAll}
+    function CellArray{T}(::Type{T_arraykind}, ::UndefInitializer, dims::Vararg{Int, N}) where {T<:Cell,N,T_arraykind<:AbstractArray}
         CellArray{T}(T_arraykind, undef, dims)
     end
 end
@@ -107,9 +109,15 @@ See also: [`CellArray`](@ref), [`CuCellArray`](@ref), [`ROCCellArray`](@ref)
 const CPUCellArray{T,N,B,T_elem} = CellArray{T,N,B,Array{T_elem,_N}}
 
 CPUCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B} = (check_T(T); CPUCellArray{T,N,B,eltype(T)}(undef, dims))
-CPUCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:Cell,B} = CPUCellArray{T,B}(undef, dims)
-CPUCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N} = CPUCellArray{T,0}(undef, dims)
-CPUCellArray{T}(::UndefInitializer, dims::Int...) where {T<:Cell} = CPUCellArray{T}(undef, dims)
+CPUCellArray{T,B}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:Cell,B,N} = CPUCellArray{T,B}(undef, dims)
+CPUCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N} = CPUCellArray{T,B0}(undef, dims)
+CPUCellArray{T}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:Cell,N} = CPUCellArray{T}(undef, dims)
+
+CPUCellArray(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = CellArray{T,N,B}(Array(A.data), A.dims)
+
+# TODO: to be added for all kinds of CellArrays:
+# CPUCellArray(A::AbstractArray{T,N}, B::Integer) where {T<:Cell,N} = ( C=CPUCellArray{T,B}(undef, N); C.=A; C )
+# CPUCellArray(A::AbstractArray{T,N}) where {T<:Cell,N} = CPUCellArray(A, B0)
 
 
 """
@@ -120,7 +128,7 @@ Define the following type alias and constructors in the caller module:
 ********************************************************************************
     CuCellArray{T<:Cell,N,B,T_elem} <: AbstractArray{T,N} where Cell <: Union{Number, SArray, FieldArray}
 
-`N`-dimensional CellArray with cells of type `T`, blocklength `B`, and `T_array` being a `CuArray` of element type `T_elem`: alias for `CellArray{T,N,B,CuArray{T_elem,CellArrays._N}}`.
+`N`-dimensional CellArray with cells of type `T`, blocklength `B`, and `T_array` being a `CuArray` of element type `T_elem`: alias for `CellArray{T,N,B,CuArray{T_elem,CellArrays._N,CUDA.DeviceMemory}}`.
 
 --------------------------------------------------------------------------------
 
@@ -141,12 +149,17 @@ macro define_CuCellArray() esc(define_CuCellArray()) end
 
 function define_CuCellArray()
     quote
-        const CuCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,CUDA.CuArray{T_elem,CellArrays._N}}
+        const CuCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,CUDA.CuArray{T_elem,CellArrays._N,CUDA.DeviceMemory}}
+
+        CuCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = ( CellArrays.check_T(T); A = CuCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims); f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @cuda launch=false launch=false f(A) end; A )
+        CuCellArray{T,B}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N,B} = CuCellArray{T,B}(undef, dims)
+        CuCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = CuCellArray{T,CellArrays.B0}(undef, dims)
+        CuCellArray{T}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N} = CuCellArray{T}(undef, dims)
 
-        CuCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = (CellArrays.check_T(T); CuCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims))
-        CuCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell,B} = CuCellArray{T,B}(undef, dims)
-        CuCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = CuCellArray{T,0}(undef, dims)
-        CuCellArray{T}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell} = CuCellArray{T}(undef, dims)
+        CuCellArray(A::CellArrays.CellArray{T,N,B,T_array}) where {T,N,B,T_array} = (A = CellArrays.CellArray{T,N,B}(CUDA.CuArray(A.data), A.dims); f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @cuda launch=false f(A) end; A)
+
+        Base.show(io::IO, A::CuCellArray) = Base.show(io, CellArrays.CPUCellArray(A))
+        Base.show(io::IO, ::MIME"text/plain", A::CuCellArray{T,N,B}) where {T,N,B} = ( println(io, "$(length(A))-element CuCellArray{$T, $N, $B, $(CellArrays.eltype(T))}:");  Base.print_array(io, CellArrays.CPUCellArray(A)) )
     end
 end
 
@@ -179,12 +192,20 @@ macro define_ROCCellArray() esc(define_ROCCellArray()) end
 
 function define_ROCCellArray()
     quote
-        const ROCCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCArray{T_elem,CellArrays._N}}
+        const ROCCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCArray{T_elem,CellArrays._N}} # TODO: ,AMDGPU.Runtime.Mem.HIPBuffer should be added here later. The moment it has no impact (and would require adaption of the unit tests).
+        const ROCDeviceCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCDeviceArray{T_elem,CellArrays._N,AMDGPU.Runtime.Mem.HIPBuffer}}
+        
+        ROCCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = ( CellArrays.check_T(T); A = ROCCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims); A ) # TODO: Once reshape is implemented in AMDGPU, the workaround can be applied as well: f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @roc launch=false f(A) end; A )
+        ROCCellArray{T,B}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N,B} = ROCCellArray{T,B}(undef, dims)
+        ROCCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = ROCCellArray{T,CellArrays.B0}(undef, dims)
+        ROCCellArray{T}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N} = ROCCellArray{T}(undef, dims)
+
+        ROCCellArray(A::CellArrays.CellArray{T,N,B,T_array}) where {T,N,B,T_array} = ( A = CellArrays.CellArray{T,N,B}(AMDGPU.ROCArray(A.data), A.dims); A ) # TODO: Once reshape is implemented in AMDGPU, the workaround can be applied as well: f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @roc launch=false f(A) end; A )
 
-        ROCCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = (CellArrays.check_T(T); ROCCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims))
-        ROCCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell,B} = ROCCellArray{T,B}(undef, dims)
-        ROCCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = ROCCellArray{T,0}(undef, dims)
-        ROCCellArray{T}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell} = ROCCellArray{T}(undef, dims)
+        Base.show(io::IO, A::ROCCellArray) = Base.show(io, CellArrays.CPUCellArray(A))
+        Base.show(io::IO, ::MIME"text/plain", A::ROCCellArray{T,N,B}) where {T,N,B} = ( println(io, "$(length(A))-element ROCCellArray{$T, $N, $B, $(CellArrays.eltype(T))}:");  Base.print_array(io, CellArrays.CPUCellArray(A)) )
+
+        @inline Base.getproperty(A::ROCDeviceCellArray{T,N,B,T_elem}, fieldname::Symbol) where {T<:CellArrays.FieldArray,N,B,T_elem} = ( (fieldname===:dims || fieldname===:data) ? getproperty(A, Val(fieldname)) : CellArrays.@ArgumentError("Field access by name is not yet supported for ROCDeviceCellArray.") )
     end
 end
 
@@ -218,27 +239,35 @@ macro define_MtlCellArray() esc(define_MtlCellArray()) end
 function define_MtlCellArray()
     quote
         const MtlCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,Metal.MtlArray{T_elem,CellArrays._N}}
+        const MtlDeviceCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,Metal.MtlDeviceArray{T_elem,CellArrays._N}}
+
+        MtlCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = ( CellArrays.check_T(T); A = MtlCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims); A) #workaround: f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @metal launch=false f(A) end; A )
+        MtlCellArray{T,B}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N,B} = MtlCellArray{T,B}(undef, dims)
+        MtlCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = MtlCellArray{T,CellArrays.B0}(undef, dims)
+        MtlCellArray{T}(::UndefInitializer, dims::Vararg{Int, N}) where {T<:CellArrays.Cell,N} = MtlCellArray{T}(undef, dims)
+
+        MtlCellArray(A::CellArrays.CellArray{T,N,B,T_array}) where {T,N,B,T_array} = ( A = CellArrays.CellArray{T,N,B}(Metal.MtlArray(A.data), A.dims); A) #workaround: f(A)=(CellArrays.plain_flat(A); CellArrays.plain_arrayflat(A); return); if (B in (0,1)) @metal launch=false f(A) end; A )
 
-        MtlCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = (CellArrays.check_T(T); MtlCellArray{T,N,B,CellArrays.eltype(T)}(undef, dims))
-        MtlCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell,B} = MtlCellArray{T,B}(undef, dims)
-        MtlCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = MtlCellArray{T,0}(undef, dims)
-        MtlCellArray{T}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell} = MtlCellArray{T}(undef, dims)
+        Base.show(io::IO, A::MtlCellArray) = Base.show(io, CellArrays.CPUCellArray(A))
+        Base.show(io::IO, ::MIME"text/plain", A::MtlCellArray{T,N,B}) where {T,N,B} = ( println(io, "$(length(A))-element MtlCellArray{$T, $N, $B, $(CellArrays.eltype(T))}:");  Base.print_array(io, CellArrays.CPUCellArray(A)) )
+
+        @inline Base.getproperty(A::MtlDeviceCellArray{T,N,B,T_elem}, fieldname::Symbol) where {T<:CellArrays.FieldArray,N,B,T_elem} = ( (fieldname===:dims || fieldname===:data) ? getproperty(A, Val(fieldname)) : CellArrays.@ArgumentError("Field access by name is not yet supported for MtlCellArray.") )
     end
 end
 
 
 ## AbstractArray methods
 
-@inline Base.IndexStyle(::Type{<:CellArray})    = IndexLinear()
-@inline Base.size(T::Type{<:Number}, args...)   = (1,)
-@inline Base.size(A::CellArray)                 = A.dims
-@inline Base.length(T::Type{<:Number}, args...) = 1
+@inline Base.IndexStyle(::Type{<:CellArray})  = IndexLinear()
+@inline Base.size(::Type{<:Number}, args...)  = (1,)
+@inline Base.size(A::CellArray)               = A.dims
+@inline Base.length(::Type{<:Number})         = 1
 
 
 @inline function Base.similar(A::CellArray{T0,N0,B,T_array0}, ::Type{T}, dims::NTuple{N,Int}) where {T0,N0,B,T_array0,T<:Cell,N}
     check_T(T)
-    T_arraykind = Base.typename(T_array0).wrapper  # Note: an alternative would be: T_array = typeof(similar(A.data, eltype(T), dims.*0)); CellArray{T,N,B}(T_array, dims)
-    CellArray{T,N,B}(T_arraykind{eltype(T),_N}, undef, dims)
+    T_array = typeof(similar(A.data, eltype(T), ntuple(i -> 0, _N))) # Note: an alternative would have been in the past (this misses however the CUDA.DeviceMemory argument if T_arraykind is CuArray): T_arraykind = Base.typename(T_array0).wrapper; CellArray{T,N,B}(T_arraykind{eltype(T),_N}, undef, dims)
+    CellArray{T,N,B}(T_array, undef, dims)
 end
 
 
@@ -248,7 +277,7 @@ end
     return A
 end
 
-@inline function Base.fill!(A::CellArray{T,N,B,T_array}, X) where {T<:Union{SArray,FieldArray},N,B,T_array}
+@inline function Base.fill!(A::CellArray{T,N,B,T_array}, X) where {T<:ArrayCell,N,B,T_array}
     cell = convert(T, X)
     for j=1:length(T)
         A.data[:, j, :] .= cell[j]
@@ -266,11 +295,11 @@ end
     return
 end
 
-@inline function Base.getindex(A::CellArray{T,N,B,T_array}, i::Int) where {T<:Union{SArray,FieldArray},N,B,T_array}
+@inline function Base.getindex(A::CellArray{T,N,B,T_array}, i::Int) where {T<:ArrayCell,N,B,T_array}
     T(getindex(A.data, Base._to_linear_index(A.data::T_array, (i-1)%B+1, j, (i-1)÷B+1)) for j=1:length(T)) # NOTE:The same fails on GPU if convert is used.
 end
 
-@inline function Base.setindex!(A::CellArray{T,N,B,T_array}, X::T, i::Int) where {T<:Union{SArray,FieldArray},N,B,T_array}
+@inline function Base.setindex!(A::CellArray{T,N,B,T_array}, X::T, i::Int) where {T<:ArrayCell,N,B,T_array}
     for j=1:length(T)
         A.data[Base._to_linear_index(A.data::T_array, (i-1)%B+1, j, (i-1)÷B+1)] = X[j]
     end
@@ -281,11 +310,11 @@ end
 @inline Base.getindex(A::CellArray{T,N,0,T_array}, i::Int) where {T<:Number,N,T_array<:AbstractArray{T,_N}} = T(A.data[i])
 @inline Base.setindex!(A::CellArray{T,N,0,T_array}, x::Number, i::Int) where {T<:Number,N,T_array}          = (A.data[i] = x; return)
 
-@inline function Base.getindex(A::CellArray{T,N,0,T_array}, i::Int) where {T<:Union{SArray,FieldArray},N,T_array}
+@inline function Base.getindex(A::CellArray{T,N,0,T_array}, i::Int) where {T<:ArrayCell,N,T_array}
     T(getindex(A.data, Base._to_linear_index(A.data::T_array, i, j, 1)) for j=1:length(T)) # NOTE:The same fails on GPU if convert is used.
 end
 
-@inline function Base.setindex!(A::CellArray{T,N,0,T_array}, X::T, i::Int) where {T<:Union{SArray,FieldArray},N,T_array}
+@inline function Base.setindex!(A::CellArray{T,N,0,T_array}, X::T, i::Int) where {T<:ArrayCell,N,T_array}
     for j=1:length(T)
         A.data[Base._to_linear_index(A.data::T_array, i, j, 1)] = X[j]
     end
@@ -296,34 +325,46 @@ end
 @inline Base.getindex(A::CellArray{T,N,1,T_array}, i::Int) where {T<:Number,N,T_array<:AbstractArray{T,_N}} = T(A.data[i])
 @inline Base.setindex!(A::CellArray{T,N,1,T_array}, x::Number, i::Int) where {T<:Number,N,T_array}          = (A.data[i] = x; return)
 
-@inline function Base.getindex(A::CellArray{T,N,1,T_array}, i::Int) where {T<:Union{SArray,FieldArray},N,T_array}
+@inline function Base.getindex(A::CellArray{T,N,1,T_array}, i::Int) where {T<:ArrayCell,N,T_array}
     T(getindex(A.data, Base._to_linear_index(A.data::T_array, 1, j, i)) for j=1:length(T)) # NOTE:The same fails on GPU if convert is used.
 end
 
-@inline function Base.setindex!(A::CellArray{T,N,1,T_array}, X::T, i::Int) where {T<:Union{SArray,FieldArray},N,T_array}
+@inline function Base.setindex!(A::CellArray{T,N,1,T_array}, X::T, i::Int) where {T<:ArrayCell,N,T_array}
     for j=1:length(T)
         A.data[Base._to_linear_index(A.data::T_array, 1, j, i)] = X[j]
     end
     return
 end
 
-@inline function Base.getindex(A::CPUCellArray{T,N,1,T_elem}, i::Int) where {T<:Union{SArray,FieldArray},N,T_elem}
+@inline function Base.getindex(A::CPUCellArray{T,N,1,T_elem}, i::Int) where {T<:ArrayCell,N,T_elem}
     getindex(reinterpret(reshape, T, view(A.data::Array{T_elem,_N},1,:,:)), i)  # NOTE: reinterpret is not implemented for CUDA device arrays, i.e. for usage in kernels
 end
 
-@inline function Base.setindex!(A::CPUCellArray{T,N,1,T_elem}, X::T, i::Int) where {T<:Union{SArray,FieldArray},N,T_elem}
+@inline function Base.setindex!(A::CPUCellArray{T,N,1,T_elem}, X::T, i::Int) where {T<:ArrayCell,N,T_elem}
     setindex!(reinterpret(reshape, T, view(A.data::Array{T_elem,_N},1,:,:)), X ,i)   # NOTE: reinterpret is not implemented for CUDA device arrays, i.e. for usage in kernels
     return
 end
 
 
+## Array operation overloading
+
+Base.:(==)(A::CellArray, B::CellArray) = all(A.data .== B.data) # NOTE: for some reason the following does not work robustly: A.data == B.data
+# TODO: Comparison "<" does not work for integers:
+# Base.:(<)(A::CellArray, B::CellArray)  = all(A.data .< B.data)
+
+
 ## CellArray properties
 
-@inline Base.getproperty(A::CellArray{T,N,B,T_array}, s::Symbol) where {T<:FieldArray,N,B,T_array} = _getproperty(A, Val(s))
-@inline _getproperty(A::CellArray{T,N,B,T_array}, s::Val) where {T<:FieldArray,N,B,T_array}        = _getfield(A, s)
-@inline _getfield(A::CellArray{T,N,B,T_array}, ::Val{:data}) where {T<:FieldArray,N,B,T_array}     = getfield(A, :data)
-@inline _getfield(A::CellArray{T,N,B,T_array}, ::Val{:dims}) where {T<:FieldArray,N,B,T_array}     = getfield(A, :dims)
-@inline _getfield(A::CellArray{T,N,B,T_array}, s::Val) where {T<:FieldArray,N,B,T_array}           = field(A, s)
+@inline Base.getproperty(A::CellArray{T,N,B,T_array}, fieldname::Symbol) where {T<:FieldArray,N,B,T_array} = getproperty(A, Val(fieldname))
+
+@inline Base.getproperty(A::CellArray{T,N,B,T_array}, ::Val{:data}) where {T<:FieldArray{N2,T2,D},N,B,T_array} where {N2,T2,D} = getfield(A, :data)
+@inline Base.getproperty(A::CellArray{T,N,B,T_array}, ::Val{:dims}) where {T<:FieldArray{N2,T2,D},N,B,T_array} where {N2,T2,D} = getfield(A, :dims)
+
+@inline @generated function Base.getproperty(A::CellArray{T,N,B,T_array}, ::Val{fieldname}) where {T<:FieldArray{N2,T2,D},N,B,T_array,fieldname} where {N2,T2,D}
+    names   = SArray{N2}(fieldnames(T))
+    indices = Tuple(findfirst(x->x===fieldname, names))
+    return :(field(A, $(indices)))
+end
 
 
 ## API functions
@@ -338,12 +379,22 @@ Return a tuple containing the dimensions of `A` or return only a specific dimens
 @inline cellsize(A::AbstractArray, dim::Int) = cellsize(A)[dim]
 
 
+"""
+    celllength(A)
+
+Return the cell length of CellArray `A`.
+"""
+@inline celllength(A::AbstractArray) = length(eltype(A))
+
+
 """
     blocklength(A)
 
 Return the blocklength of CellArray `A`.
 """
-@inline blocklength(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = (B == 0) ? prod(A.dims) : B
+@inline blocklength(A::CellArray{T,N,0,T_array}) where {T,N,  T_array} = prod(A.dims)
+@inline blocklength(A::CellArray{T,N,1,T_array}) where {T,N,  T_array} = 1
+@inline blocklength(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = B
 
 
 """
@@ -353,35 +404,62 @@ Return the blocklength of CellArray `A`.
 Return an array view of the field of CellArray `A` designated with `indices` or `fieldname` (modifying the view will modify `A`). The view's dimensionality and size are equal to `A`'s. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.
 
 ## Arguments
-- `indices::Int|NTuple{N,Int}`: the `indices` that designate the field in accordance with `A`'s cell type.
+- `indices::Int|NTuple{N,Int}`: the `indices` that designate the field in accordance with `A`'s cell type (flat indexing is supported for multi dimensional cells).
 - `fieldname::Symbol`: the `fieldname` that designates the field in accordance with `A`'s cell type.
 """
-@inline field(A::CellArray{T,N,0,T_array}, index::Int)                        where {T,N,T_array}                                     = view(plain(A), Base.OneTo.(size(A))..., index)
-@inline field(A::CellArray{T,N,0,T_array}, indices::NTuple{M,Int})            where {T_elem,M,T<:AbstractArray{T_elem,M},N,  T_array} = view(plain(A), Base.OneTo.(size(A))..., indices...)
-@inline field(A::CellArray{T,N,1,T_array}, index::Int)                        where {T,N,T_array}                                     = view(plain(A), index,      Base.OneTo.(size(A))...)
-@inline field(A::CellArray{T,N,1,T_array}, indices::NTuple{M,Int})            where {T_elem,M,T<:AbstractArray{T_elem,M},N,  T_array} = view(plain(A), indices..., Base.OneTo.(size(A))...)
-@inline field(A::CellArray{T,N,B,T_array}, indices::Union{Int,NTuple{M,Int}}) where {T_elem,M,T<:AbstractArray{T_elem,M},N,B,T_array} = @ArgumentError("the operation is not supported if parameter `B` of `A` is neither `0` nor `1`.")
-@inline field(A::CellArray, indices::Int...)                                                                                          = field(A, indices)
-@inline field(A::CellArray{T,N,B,T_array}, fieldname::Symbol)                 where {T<:FieldArray,N,B,T_array}                       = field(A, Val(fieldname))
-
-@inline @generated function field(A::CellArray{T,N,B,T_array}, ::Val{fieldname}) where {T<:FieldArray{N2,T2,D},N,B,T_array,fieldname} where {N2,T2,D}
-    names   = SArray{N2}(fieldnames(T))
-    indices = Tuple(findfirst(x->x===fieldname, names))
-    return :(field(A, $(indices...)))
-end
+@inline field(A::CellArray{T,N,0,T_array}, index::Tuple{Int})                        where {T<:ArrayCell,N,  T_array}   = reshape(view(plain_flat(A),               :,   index...), size(A))
+@inline field(A::CellArray{T,N,0,T_array}, indices::NTuple{M,Int})                   where {T<:ArrayCell,N,  T_array,M} = reshape(view(plain_arrayflat(A),          :, indices...), size(A))
+@inline field(A::CellArray{T,N,1,T_array}, index::Tuple{Int})                        where {T<:ArrayCell,N,  T_array}   = reshape(view(plain_flat(A),        index...,          :), size(A))
+@inline field(A::CellArray{T,N,1,T_array}, indices::NTuple{M,Int})                   where {T<:ArrayCell,N,  T_array,M} = reshape(view(plain_arrayflat(A), indices...,          :), size(A))
+@inline field(A::CellArray{T,N,B,T_array}, indices::Union{Tuple{Int},NTuple{M,Int}}) where {T<:ArrayCell,N,B,T_array,M} = @ArgumentError("the operation is not supported if parameter `B` of `A` is neither `0` nor `1`.")
+@inline field(A::CellArray{T}, indices::NTuple{M,Int})                               where {T<:Number,M}                = A
+@inline field(A::CellArray{T,N,B,T_array}, fieldname::Symbol)                        where {T<:FieldArray,N,B,T_array}  = getproperty(A, fieldname)
+@inline field(A::CellArray, indices::Vararg{Int, N})                                 where {N}                          = field(A, indices)
 
 
 ## Helper functions
 
+# NOTE: the following function could be provided in public API:
 # """
 #     plain(A)
 #
 # Return a plain `N`-dimensional array view of CellArray `A` (modifying the view will modify `A`), where `N` is the sum of the dimensionalities of `A` and the cell type of `A`. The view's dimensions are `(size(A)..., cellsize(A)...)` if parameter `B` of `A` is `0`, and `(cellsize(A)..., size(A)...)` if parameter `B` of `A` is `1`. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.
 #
 # """
-@inline plain(A::CellArray{T,N,0,T_array}) where {T,N,  T_array} = reshape(A.data, (size(A)..., cellsize(A)...))
-@inline plain(A::CellArray{T,N,1,T_array}) where {T,N,  T_array} = reshape(A.data, (cellsize(A)..., size(A)...))
-@inline plain(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.")
+# @inline plain(A::CellArray{T,N,0,T_array}) where {T,N,  T_array} = reshape(A.data, (size(A)..., cellsize(A)...))
+# @inline plain(A::CellArray{T,N,1,T_array}) where {T,N,  T_array} = reshape(A.data, (cellsize(A)..., size(A)...))
+# @inline plain(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.")
+
+# """
+#     plain_arrayflat(A)
+#
+# Return a plain `N`-dimensional array view of CellArray `A` with flat array indexing (modifying the view will modify `A`), where `N` is the sum of the length of `A` and the dimensionalities of the cell type of `A`. The view's dimensions are `(length(A), cellsize(A)...)` if parameter `B` of `A` is `0`, and `(cellsize(A)..., length(A))` if parameter `B` of `A` is `1`. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.
+#
+# """
+@inline plain_arrayflat(A::CellArray{T,N,0,T_array}) where {T,N,  T_array} = reshape(A.data, (length(A), cellsize(A)...))
+@inline plain_arrayflat(A::CellArray{T,N,1,T_array}) where {T,N,  T_array} = reshape(A.data, (cellsize(A)..., length(A)))
+@inline plain_arrayflat(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.")
+
+# NOTE: the following function could be provided in public API:
+# """
+#     plain_cellflat(A)
+#
+# Return a plain `N`-dimensional array view of CellArray `A` with flat cell indexing (modifying the view will modify `A`), where `N` is the sum of the dimensionalities of `A` and the length of the cell type of `A`. The view's dimensions are `(size(A)..., celllength(A))` if parameter `B` of `A` is `0`, and `(celllength(A), size(A)...)` if parameter `B` of `A` is `1`. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.
+#
+# """
+# @inline plain_cellflat(A::CellArray{T,N,0,T_array}) where {T,N,  T_array} = reshape(A.data, (size(A)..., celllength(A)))
+# @inline plain_cellflat(A::CellArray{T,N,1,T_array}) where {T,N,  T_array} = reshape(A.data, (celllength(A), size(A)...))
+# @inline plain_cellflat(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.")
+
+# """
+#     plain_flat(A)
+#
+# Return a plain `N`-dimensional array view of CellArray `A` with flat array and cell indexing (modifying the view will modify `A`), where `N` is the sum of the length of `A` and the length of the cell type of `A`. The view's dimensions are `(length(A), celllength(A))` if parameter `B` of `A` is `0`, and `(celllength(A), length(A))` if parameter `B` of `A` is `1`. The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.
+#
+# """
+@inline plain_flat(A::CellArray{T,N,0,T_array}) where {T,N,  T_array} = reshape(A.data, (length(A), celllength(A)))
+@inline plain_flat(A::CellArray{T,N,1,T_array}) where {T,N,  T_array} = reshape(A.data, (celllength(A), length(A)))
+@inline plain_flat(A::CellArray{T,N,B,T_array}) where {T,N,B,T_array} = @ArgumentError("The operation is not supported if parameter `B` of `A` is neither `0` nor `1`.")
 
 
 function check_T(::Type{T}) where {T}
diff --git a/src/CellArrays.jl b/src/CellArrays.jl
index a4eca6c..6478d9e 100644
--- a/src/CellArrays.jl
+++ b/src/CellArrays.jl
@@ -32,5 +32,5 @@ using .Exceptions
 include("CellArray.jl")
 
 ## Exports (need to be after include of submodules if re-exports from them)
-export CellArray, CPUCellArray, @define_CuCellArray, @define_ROCCellArray, @define_MtlCellArray, cellsize, blocklength, field
+export CellArray, CPUCellArray, @define_CuCellArray, @define_ROCCellArray, @define_MtlCellArray, cellsize, celllength, blocklength, field
 end
diff --git a/test/test_CellArray.jl b/test/test_CellArray.jl
index 569d4a6..8654885 100644
--- a/test/test_CellArray.jl
+++ b/test/test_CellArray.jl
@@ -67,7 +67,7 @@ mutable struct MyMutableFieldArray{T} <: FieldArray{Tuple{2}, T, 1}
 end
 
 @testset "$(basename(@__FILE__))" begin
-    @testset "1. CellArray allocation ($array_type arrays) (precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) 
+    @testset "1. CellArray allocation ($array_type arrays; precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) 
         @testset "Number cells" begin
 			dims = (2,3)
 			A = CellArray{Float}(undef, dims)
@@ -86,10 +86,6 @@ end
 			@test eltype(B)      == Int32
 			@test eltype(C)      == Float
 			@test eltype(D)      == Int32
-			@test typeof(A)      == CellArrays.CellArray{Float, length(dims), 0, Array{eltype(A.data),_N}}
-			@test typeof(B)      == CellArrays.CellArray{Int32, length(dims), prod(dims), Array{eltype(B.data),_N}}
-			@test typeof(C)      == CellArrays.CellArray{Float, length(dims), 1, Array{eltype(C.data),_N}}
-			@test typeof(D)      == CellArrays.CellArray{Int32, length(dims), 4, Array{eltype(D.data),_N}}
 			@test length(A.data) == prod(dims)
 			@test length(B.data) == prod(dims)
 			@test length(C.data) == prod(dims)
@@ -98,12 +94,23 @@ end
 			@test B.dims         == dims
 			@test C.dims         == dims
 			@test D.dims         == dims
+			if array_type == "CUDA"
+				@test typeof(A)      == CellArrays.CellArray{Float, length(dims), 0, CuArray{eltype(A.data),_N, CUDA.DeviceMemory}}
+				@test typeof(B)      == CellArrays.CellArray{Int32, length(dims), prod(dims), CuArray{eltype(B.data),_N}} # NOTE: the general constructor used for B is not yet specialized for CUDA.
+				@test typeof(C)      == CellArrays.CellArray{Float, length(dims), 1, CuArray{eltype(C.data),_N, CUDA.DeviceMemory}}
+				@test typeof(D)      == CellArrays.CellArray{Int32, length(dims), 4, CuArray{eltype(D.data),_N, CUDA.DeviceMemory}}
+			else
+				@test typeof(A)      == CellArrays.CellArray{Float, length(dims), 0, Array{eltype(A.data),_N}}
+				@test typeof(B)      == CellArrays.CellArray{Int32, length(dims), prod(dims), Array{eltype(B.data),_N}}
+				@test typeof(C)      == CellArrays.CellArray{Float, length(dims), 1, Array{eltype(C.data),_N}}
+				@test typeof(D)      == CellArrays.CellArray{Int32, length(dims), 4, Array{eltype(D.data),_N}}
+			end
         end;
 		@testset "SArray cells" begin
-			dims      = (2,3)
-			celldims  = (3,4)
-			T_Float = SMatrix{celldims..., Float, prod(celldims)}
-			T_Int32   = SMatrix{celldims...,   Int32, prod(celldims)}
+			dims     = (2,3)
+			celldims = (3,4)
+			T_Float  = SMatrix{celldims..., Float, prod(celldims)}
+			T_Int32  = SMatrix{celldims...,   Int32, prod(celldims)}
 			A = CellArray{T_Float}(undef, dims)
 			B = CellArrays.CellArray{T_Int32,prod(dims)}(Array, undef, dims...)
 			C = CellArray{T_Float,1}(undef, dims)
@@ -120,10 +127,6 @@ end
 			@test eltype(B)      == T_Int32
 			@test eltype(C)      == T_Float
 			@test eltype(D)      == T_Int32
-			@test typeof(A)      == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N}}
-			@test typeof(B)      == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}}
-			@test typeof(C)      == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N}}
-			@test typeof(D)      == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N}}
 			@test length(A.data) == prod(dims)*prod(celldims)
 			@test length(B.data) == prod(dims)*prod(celldims)
 			@test length(C.data) == prod(dims)*prod(celldims)
@@ -132,11 +135,22 @@ end
 			@test B.dims         == dims
 			@test C.dims         == dims
 			@test D.dims         == dims
+			if array_type == "CUDA"
+				@test typeof(A)      == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N, CUDA.DeviceMemory}}
+				@test typeof(B)      == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} # NOTE: the general constructor used for B is not yet specialized for CUDA.
+				@test typeof(C)      == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N, CUDA.DeviceMemory}}
+				@test typeof(D)      == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N, CUDA.DeviceMemory}}
+			else
+				@test typeof(A)      == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N}}
+				@test typeof(B)      == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}}
+				@test typeof(C)      == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N}}
+				@test typeof(D)      == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N}}
+			end
         end;
 		@testset "FieldArray cells" begin
 			dims      = (2,3)
 			celldims  = size(MyFieldArray)
-			T_Float = MyFieldArray{Float}
+			T_Float   = MyFieldArray{Float}
 			T_Int32   = MyFieldArray{Int32}
 			A = CellArray{T_Float}(undef, dims)
 			B = CellArrays.CellArray{T_Int32,prod(dims)}(Array, undef, dims...)
@@ -154,10 +168,6 @@ end
 			@test eltype(B)      == T_Int32
 			@test eltype(C)      == T_Float
 			@test eltype(D)      == T_Int32
-			@test typeof(A)      == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N}}
-			@test typeof(B)      == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}}
-			@test typeof(C)      == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N}}
-			@test typeof(D)      == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N}}
 			@test length(A.data) == prod(dims)*prod(celldims)
 			@test length(B.data) == prod(dims)*prod(celldims)
 			@test length(C.data) == prod(dims)*prod(celldims)
@@ -166,15 +176,26 @@ end
 			@test B.dims         == dims
 			@test C.dims         == dims
 			@test D.dims         == dims
+			if array_type == "CUDA"
+				@test typeof(A)      == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N, CUDA.DeviceMemory}}
+				@test typeof(B)      == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}} # NOTE: the general constructor used for B is not yet specialized for CUDA.
+				@test typeof(C)      == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N, CUDA.DeviceMemory}}
+				@test typeof(D)      == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N, CUDA.DeviceMemory}}
+			else
+				@test typeof(A)      == CellArrays.CellArray{T_Float, length(dims), 0, Array{eltype(A.data),_N}}
+				@test typeof(B)      == CellArrays.CellArray{T_Int32, length(dims), prod(dims), Array{eltype(B.data),_N}}
+				@test typeof(C)      == CellArrays.CellArray{T_Float, length(dims), 1, Array{eltype(C.data),_N}}
+				@test typeof(D)      == CellArrays.CellArray{T_Int32, length(dims), 4, Array{eltype(D.data),_N}}
+			end
         end;
     end;
-	@testset "2. functions ($array_type arrays) (precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) 
-		dims      = (2,3)
-		celldims  = (3,4) # Needs to be compatible for matrix multiplication!
-		T_Float = SMatrix{celldims..., Float, prod(celldims)}
-		T_Int32   = SMatrix{celldims...,   Int32, prod(celldims)}
+	@testset "2. functions ($array_type arrays; precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) 
+		dims     = (2,3)
+		celldims = (3,4) # Needs to be compatible for matrix multiplication!
+		T_Float  = SMatrix{celldims..., Float, prod(celldims)}
+		T_Int32  = SMatrix{celldims...,   Int32, prod(celldims)}
 		T2_Float = MyFieldArray{Float}
-		T2_Int32   = MyFieldArray{Int32}
+		T2_Int32 = MyFieldArray{Int32}
 		A = CellArray{Float}(undef, dims)
 		B = CellArrays.CellArray{Int32,prod(dims)}(Array, undef, dims)
 		C = CellArray{T_Float}(undef, dims)
@@ -194,25 +215,61 @@ end
 			@test size(H) == dims
         end;
 		@testset "similar" begin
-			@test typeof(similar(A, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N}}
-			@test typeof(similar(B, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(B), Array{eltype(T_Int32),_N}}
-			@test typeof(similar(C, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N}}
-			@test typeof(similar(D, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(D), Array{eltype(T_Int32),_N}}
-			@test typeof(similar(E, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N}}
-			@test typeof(similar(F, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(F), Array{eltype(T_Int32),_N}}
-			@test typeof(similar(G, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(G), Array{eltype(T_Int32),_N}}
-			@test typeof(similar(H, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(H), Array{eltype(T_Int32),_N}}
-			@test typeof(similar(A, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N}}
-			@test typeof(similar(B, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(B), Array{eltype(T_Int32),_N}}
-			@test typeof(similar(C, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N}}
-			@test typeof(similar(D, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(D), Array{eltype(T_Int32),_N}}
-			@test typeof(similar(E, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N}}
-			@test typeof(similar(F, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(F), Array{eltype(T_Int32),_N}}
-			@test typeof(similar(G, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(G), Array{eltype(T_Int32),_N}}
-			@test typeof(similar(H, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(H), Array{eltype(T_Int32),_N}}
+			if array_type == "CUDA"
+				@test typeof(similar(A, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(B, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(B), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(C, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(D, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(D), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(E, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(F, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(F), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(G, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(G), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(H, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(H), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(A, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(B, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(B), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(C, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(D, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(D), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(E, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(F, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(F), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(G, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(G), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+				@test typeof(similar(H, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(H), Array{eltype(T_Int32),_N, CUDA.DeviceMemory}}
+			elseif array_type == "AMDGPU"
+				@test typeof(similar(A, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(B, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(B), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(C, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(D, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(D), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(E, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(F, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(F), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(G, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(G), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(H, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(H), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(A, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(B, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(B), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(C, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(D, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(D), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(E, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(F, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(F), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(G, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(G), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+				@test typeof(similar(H, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(H), Array{eltype(T_Int32),_N, AMDGPU.Runtime.Mem.HIPBuffer}}
+			else
+				@test typeof(similar(A, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N}}
+				@test typeof(similar(B, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(B), Array{eltype(T_Int32),_N}}
+				@test typeof(similar(C, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N}}
+				@test typeof(similar(D, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(D), Array{eltype(T_Int32),_N}}
+				@test typeof(similar(E, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims),              0, Array{eltype(T_Int32),_N}}
+				@test typeof(similar(F, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(F), Array{eltype(T_Int32),_N}}
+				@test typeof(similar(G, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(G), Array{eltype(T_Int32),_N}}
+				@test typeof(similar(H, T_Int32)) == CellArrays.CellArray{T_Int32, length(dims), blocklength(H), Array{eltype(T_Int32),_N}}
+				@test typeof(similar(A, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N}}
+				@test typeof(similar(B, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(B), Array{eltype(T_Int32),_N}}
+				@test typeof(similar(C, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N}}
+				@test typeof(similar(D, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(D), Array{eltype(T_Int32),_N}}
+				@test typeof(similar(E, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2,              0, Array{eltype(T_Int32),_N}}
+				@test typeof(similar(F, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(F), Array{eltype(T_Int32),_N}}
+				@test typeof(similar(G, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(G), Array{eltype(T_Int32),_N}}
+				@test typeof(similar(H, T_Int32, (1,2))) == CellArrays.CellArray{T_Int32, 2, blocklength(H), Array{eltype(T_Int32),_N}}
+			end
         end;
 		@testset "fill!" begin
-			allowscalar() do
+			allowscalar(true) # "allowscalar do" is not defined for AMDGPU
 				fill!(A, 9);   @test all(Base.Array(A.data) .== 9.0)
 				fill!(B, 9.0); @test all(Base.Array(B.data) .== 9)
 				fill!(C, (1:length(eltype(C)))); @test all(C .== (T_Float(1:length(eltype(C)))  for i=1:dims[1], j=1:dims[2]))
@@ -221,10 +278,20 @@ end
 				fill!(F, (1:length(eltype(F)))); @test all(F .== (T2_Int32(1:length(eltype(F)))   for i=1:dims[1], j=1:dims[2]))
 				fill!(G, (1:length(eltype(G)))); @test all(G .== (T_Float(1:length(eltype(G)))  for i=1:dims[1], j=1:dims[2]))
 				fill!(H, (1:length(eltype(H)))); @test all(H .== (T_Int32(1:length(eltype(H)))    for i=1:dims[1], j=1:dims[2]))
+			allowscalar(false)
+		end
+		@testset "constructors" begin
+			@test isa(CPUCellArray(A), CPUCellArray)
+			if array_type == "CUDA"
+				@test isa(CuCellArray(CPUCellArray(A)), CuCellArray)
+			# elseif array_type == "AMDGPU"
+			# 	@test isa(ROCCellArray(CPUCellArray(A)), ROCCellArray)    # TODO: for some reason this results in scalar indexing
+			elseif array_type == "Metal"
+				@test isa(MtlCellArray(CPUCellArray(A)), MtlCellArray)
 			end
 		end
 		@testset "getindex / setindex! (array programming)" begin
-			allowscalar() do
+			allowscalar(true) # "allowscalar do" is not defined for AMDGPU
 				A.data.=0; B.data.=0; C.data.=0; D.data.=0; E.data.=0; F.data.=0; G.data.=0; H.data.=0;
 				A[2,2:3] .= 9
 				B[2,2:3] .= 9.0
@@ -242,7 +309,7 @@ end
 				@test all(F[2,2:3] .== (T2_Int32(1:length(T2_Int32)), T2_Int32(1:length(T2_Int32))))
 				@test all(G[2,2:3] .== (T_Float(1:length(T_Float)), T_Float(1:length(T_Float))))
 				@test all(H[2,2:3] .== (T_Int32(1:length(T_Int32)), T_Int32(1:length(T_Int32))))
-			end
+			allowscalar(false)
         end;
 		@testset "getindex / setindex! (GPU kernel programming)" begin
 			celldims2 = (4,4) # Needs to be compatible for matrix multiplication!
@@ -332,21 +399,90 @@ end
 			@test size(field(E, :yyxx))	== dims
 			@test size(field(E, :yyyy))	== dims
 		end;
-		@testset "field property" begin
+		@testset "field property (host side access)" begin
 			@test E.xxxx == field(E, :xxxx)
 			@test E.yxxx == field(E, :yxxx)
 			@test E.xyxx == field(E, :xyxx)
 			@test E.yyxx == field(E, :yyxx)
 			@test E.yyyy == field(E, :yyyy)
 		end;
+		@testset "field property (device side access)" begin
+			if array_type == "CUDA"
+				function add2D_CUDA_properties!(A, B)
+				    ix = (CUDA.blockIdx().x-1) * CUDA.blockDim().x + CUDA.threadIdx().x
+				    iy = (CUDA.blockIdx().y-1) * CUDA.blockDim().y + CUDA.threadIdx().y
+				    A.yxxx[ix,iy] = A.yxxx[ix,iy] + 10*B.yxxx[ix,iy];
+					A.yyyy[ix,iy] = A.yyyy[ix,iy] + 10*B.yyyy[ix,iy];
+				    return
+				end
+				E.data.=1;  @cuda blocks=size(E) add2D_CUDA_properties!(E, E); CUDA.synchronize();  @test all(Base.Array(E.yxxx) .== 11) && all(Base.Array(E.yyyy) .== 11)
+			# elseif array_type == "AMDGPU"  # TODO: activate once supported
+			# 	function add2D_AMDGPU_properties!(A, B)
+			# 		ix = (AMDGPU.blockIdx().x-1) * AMDGPU.blockDim().x + AMDGPU.threadIdx().x
+			# 	    iy = (AMDGPU.blockIdx().y-1) * AMDGPU.blockDim().y + AMDGPU.threadIdx().y
+			# 	    A.yxxx[ix,iy] = A.yxxx[ix,iy] + 10*B.yxxx[ix,iy];
+			# 		A.yyyy[ix,iy] = A.yyyy[ix,iy] + 10*B.yyyy[ix,iy];
+			# 	    return
+			# 	end
+			# 	E.data.=1;  @roc gridsize=size(E) add2D_AMDGPU_properties!(E, E); AMDGPU.synchronize();  @test all(Base.Array(E.yxxx) .== 11) && all(Base.Array(E.yyyy) .== 11)
+			elseif array_type == "Metal"
+				function add2D_Metal_properties!(A, B)
+					ix = (Metal.threadgroup_position_in_grid_3d().x-1) * Metal.threads_per_threadgroup_3d().x + Metal.thread_position_in_threadgroup_3d().x
+				    iy = (Metal.threadgroup_position_in_grid_3d().y-1) * Metal.threads_per_threadgroup_3d().y + Metal.thread_position_in_threadgroup_3d().y
+				    A.yxxx[ix,iy] = A.yxxx[ix,iy] + 10*B.yxxx[ix,iy];
+					A.yyyy[ix,iy] = A.yyyy[ix,iy] + 10*B.yyyy[ix,iy];
+				    return
+				end
+				E.data.=1;  @metal groups=size(E) add2D_Metal_properties!(E, E); Metal.synchronize();  @test all(Base.Array(E.yxxx) .== 11) && all(Base.Array(E.yyyy) .== 11)
+			end
+		end
+		@testset "comparisons (array programming)" begin
+			A.data.=9; B.data.=9; C.data.=9; D.data.=9; E.data.=9; F.data.=9; G.data.=9; H.data.=9;
+			A2=similar(A); B2=similar(B); C2=similar(C); D2=similar(D); E2=similar(E); F2=similar(F); G2=similar(G); H2=similar(H);
+			@test A !== A2 && !(A == A2)
+			@test B !== B2 && !(B == B2)
+			@test C !== C2 && !(C == C2)
+			@test D !== D2 && !(D == D2)
+			@test E !== E2 && !(E == E2)
+			@test F !== F2 && !(F == F2)
+			@test G !== G2 && !(G == G2)
+			@test H !== H2 && !(H == H2)
+			# Comparison does not work for integers:
+			# @test A !== A2 && A2 < A
+			# # @test B !== B2 && B2 < B
+			# @test C !== C2 && C2 < C
+			# # @test D !== D2 && D2 < D
+			# @test E !== E2 && E2 < E
+			# # @test F !== F2 && F2 < F
+			# @test G !== G2 && G2 < G
+			# # @test H !== H2 && H2 < H
+			A3=deepcopy(A); B3=deepcopy(B); C3=deepcopy(C); D3=deepcopy(D); E3=deepcopy(E); F3=deepcopy(F); G3=deepcopy(G); H3=deepcopy(H);
+			@test A !== A3 && A == A3
+			@test B !== B3 && B == B3
+			@test C !== C3 && C == C3
+			@test D !== D3 && D == D3
+			@test E !== E3 && E == E3
+			@test F !== F3 && F == F3
+			@test G !== G3 && G == G3
+			@test H !== H3 && H == H3
+			# Comparison does not work for integers:
+			# @test A !== A3 && !(A3 < A)
+			# # @test B !== B3 && !(B3 < B)
+			# @test C !== C3 && !(C3 < C)
+			# # @test D !== D3 && !(D3 < D)
+			# @test E !== E3 && !(E3 < E)
+			# # @test F !== F3 && !(F3 < F)
+			# @test G !== G3 && !(G3 < G)
+			# # @test H !== H3 && !(H3 < H)
+		end;
     end;
-	@testset "3. Exceptions ($array_type arrays) (precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) 
-		dims       = (2,3)
-		celldims   = (3,4)
+	@testset "3. Exceptions ($array_type arrays; precision: $(nameof(Float)))" for (array_type, Array, CellArray, allowscalar, Float) in zip(array_types, ArrayConstructors, CellArrayConstructors, allowscalar_functions, precision_types) 
+		dims     = (2,3)
+		celldims = (3,4)
 		T_Float  = SMatrix{celldims..., Float, prod(celldims)}
-		T_Int32    = SMatrix{celldims...,   Int32, prod(celldims)}
+		T_Int32  = SMatrix{celldims...,   Int32, prod(celldims)}
 		T2_Float = MyFieldArray{Float}
-		T2_Int32   = MyFieldArray{Int32}
+		T2_Int32 = MyFieldArray{Int32}
 		A = CellArray{Float}(undef, dims)
 		B = CellArrays.CellArray{Int32,prod(dims)}(Array, undef, dims)
 		C = CellArray{T_Float}(undef, dims)