Skip to content


Update to GPUArrays 0.9. (#358)
Browse files Browse the repository at this point in the history
* Switch to GPUArrays buffer management.
* Support mixed type constructors.
  • Loading branch information
maleadt authored Sep 1, 2023
1 parent c9e1caf commit 3a63f96
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 118 deletions.
6 changes: 3 additions & 3 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.8.5"
manifest_format = "2.0"
project_hash = "7a3272ed59a2ad154cfe72c74eda180b1c208c6b"
project_hash = "c2f50cf42770de6ef32bf0ade955b592c49b8da5"

deps = ["LinearAlgebra", "Requires"]
Expand Down Expand Up @@ -79,9 +79,9 @@ uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"

deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"]
git-tree-sha1 = "2e57b4a4f9cc15e85a24d603256fe08e527f48d1"
git-tree-sha1 = "8ad8f375ae365aa1eb2f42e2565a40b55a4b69a8"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "8.8.1"
version = "9.0.0"

deps = ["Adapt"]
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ oneAPI_Support_jll = "b049733a-a71d-5ed3-8eba-7d323ac00b36"
Adapt = "2.0, 3.0"
CEnum = "0.4"
ExprTools = "0.1"
GPUArrays = "8.4"
GPUArrays = "9"
GPUCompiler = "0.23"
KernelAbstractions = "0.9.1"
LLVM = "6"
Expand Down
194 changes: 86 additions & 108 deletions src/array.jl
Original file line number Diff line number Diff line change
@@ -1,36 +1,54 @@
export oneArray, oneVector, oneMatrix, oneVecOrMat

## array storage

# array storage is shared by arrays that refer to the same data, while keeping track of
# the number of outstanding references

struct ArrayStorage{B}
## array type

# the refcount also encodes the state of the array:
# < 0: unmanaged
# = 0: freed
# > 0: referenced
function hasfieldcount(@nospecialize(dt))
return false
return true

ArrayStorage(buf::B, state::Int) where {B} =
ArrayStorage{B}(buf, Threads.Atomic{Int}(state))

function contains_eltype(T, X)
if T === X
return true
elseif T isa Union
for U in Base.uniontypes(T)
contains_eltype(U, X) && return true
elseif hasfieldcount(T)
for U in fieldtypes(T)
contains_eltype(U, X) && return true
return false

## array type
function check_eltype(T)
Base.allocatedinline(T) || error("oneArray only supports element types that are stored inline")
Base.isbitsunion(T) && error("oneArray does not yet support isbits-union arrays")
if oneL0.module_properties(device()).fp16flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP16 !=
contains_eltype(T, Float16) && error("Float16 is not supported on this device")
if oneL0.module_properties(device()).fp64flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP64 !=
contains_eltype(T, Float64) && error("Float64 is not supported on this device")

mutable struct oneArray{T,N,B} <: AbstractGPUArray{T,N}

maxsize::Int # maximum data size; excluding any selector bytes
offset::Int # offset of the data in the buffer, in number of elements

function oneArray{T,N,B}(::UndefInitializer, dims::Dims{N}) where {T,N,B}
Base.allocatedinline(T) || error("oneArray only supports element types that are stored inline")
maxsize = prod(dims) * sizeof(T)
bufsize = if Base.isbitsunion(T)
# type tag array past the data
Expand All @@ -42,36 +60,22 @@ mutable struct oneArray{T,N,B} <: AbstractGPUArray{T,N}
ctx = context()
dev = device()
buf = allocate(B, ctx, dev, bufsize, Base.datatype_alignment(T))
storage = ArrayStorage(buf, 1)
obj = new{T,N,B}(storage, maxsize, 0, dims)
data = DataRef(buf) do buf
obj = new{T,N,B}(data, maxsize, 0, dims)
finalizer(unsafe_free!, obj)

function oneArray{T,N}(storage::ArrayStorage{B}, dims::Dims{N};
function oneArray{T,N}(data::DataRef{B}, dims::Dims{N};
maxsize::Int=prod(dims) * sizeof(T), offset::Int=0) where {T,N,B}
Base.allocatedinline(T) || error("oneArray only supports element types that are stored inline")
return new{T,N,B}(storage, maxsize, offset, dims)
obj = new{T,N,B}(copy(data), maxsize, offset, dims)
finalizer(unsafe_free!, obj)

function unsafe_free!(xs::oneArray)
# this call should only have an effect once, because both the user and the GC can call it
if === nothing
elseif[] < 0
throw(ArgumentError("Cannot free an unmanaged buffer."))

refcount = Threads.atomic_add!(, -1)
if refcount == 1

# this array object is now dead, so replace its storage by a dummy one = nothing

unsafe_free!(a::oneArray) = GPUArrays.unsafe_free!(

## alias detection
Expand All @@ -86,6 +90,7 @@ function Base.mightalias(A::oneArray, B::oneArray)
return first(rA) <= first(rB) < last(rA) || first(rB) <= first(rA) < last(rB)

## convenience constructors

const oneVector{T} = oneArray{T,1}
Expand All @@ -96,17 +101,23 @@ const oneVecOrMat{T} = Union{oneVector{T},oneMatrix{T}}
oneArray{T,N}(::UndefInitializer, dims::Dims{N}) where {T,N} =
oneArray{T,N,oneL0.DeviceBuffer}(undef, dims)

# type and dimensionality specified, accepting dims as series of Ints
oneArray{T,N,B}(::UndefInitializer, dims::Integer...) where {T,N,B} =
# buffer, type and dimensionality specified
oneArray{T,N,B}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N,B} =
oneArray{T,N,B}(undef, convert(Tuple{Vararg{Int}}, dims))
oneArray{T,N,B}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N,B} =
oneArray{T,N,B}(undef, convert(Tuple{Vararg{Int}}, dims))
oneArray{T,N}(::UndefInitializer, dims::Integer...) where {T,N} =

# type and dimensionality specified
oneArray{T,N}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N} =
oneArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims))
oneArray{T,N}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N} =
oneArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims))

# type but not dimensionality specified
oneArray{T}(::UndefInitializer, dims::Dims{N}) where {T,N} =
oneArray{T,N}(undef, dims)
oneArray{T}(::UndefInitializer, dims::Integer...) where {T} =
oneArray{T}(undef, convert(Tuple{Vararg{Int}}, dims))
# only type specified
oneArray{T}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N} =
oneArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims))
oneArray{T}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N} =
oneArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims))

# empty vector constructor
oneArray{T,1,B}() where {T,B} = oneArray{T,1,B}(undef, 0)
Expand Down Expand Up @@ -150,13 +161,11 @@ Base.size(x::oneArray) = x.dims
Base.sizeof(x::oneArray) = Base.elsize(x) * length(x)

function context(A::oneArray) === nothing && throw(UndefRefError())
return oneL0.context(
return oneL0.context([])

function device(A::oneArray) === nothing && throw(UndefRefError())
return oneL0.device(
return oneL0.device([])

Expand All @@ -166,22 +175,22 @@ export oneDenseArray, oneDenseVector, oneDenseMatrix, oneDenseVecOrMat,
oneStridedArray, oneStridedVector, oneStridedMatrix, oneStridedVecOrMat,
oneWrappedArray, oneWrappedVector, oneWrappedMatrix, oneWrappedVecOrMat

oneContiguousSubArray{T,N,A<:oneArray} = Base.FastContiguousSubArray{T,N,A}

# dense arrays: stored contiguously in memory
const oneDenseReinterpretArray{T,N,A<:Union{oneArray,oneContiguousSubArray}} = Base.ReinterpretArray{T,N,S,A} where S
const oneDenseReshapedArray{T,N,A<:Union{oneArray,oneContiguousSubArray,oneDenseReinterpretArray}} = Base.ReshapedArray{T,N,A}
const DenseSuboneArray{T,N,A<:Union{oneArray,oneDenseReshapedArray,oneDenseReinterpretArray}} = Base.FastContiguousSubArray{T,N,A}
const oneDenseArray{T,N} = Union{oneArray{T,N}, DenseSuboneArray{T,N}, oneDenseReshapedArray{T,N}, oneDenseReinterpretArray{T,N}}
# all common dense wrappers are currently represented as oneArray objects.
# this simplifies common use cases, and greatly improves load time.
const oneDenseArray{T,N} = oneArray{T,N}
const oneDenseVector{T} = oneDenseArray{T,1}
const oneDenseMatrix{T} = oneDenseArray{T,2}
const oneDenseVecOrMat{T} = Union{oneDenseVector{T}, oneDenseMatrix{T}}
# XXX: these dummy aliases (oneDenseArray=oneArray) break alias printing, as
# `Base.print_without_params` only handles the case of a single alias.

# strided arrays
const oneStridedSubArray{T,N,A<:Union{oneArray,oneDenseReshapedArray,oneDenseReinterpretArray},
I<:Tuple{Vararg{Union{Base.RangeIndex, Base.ReshapedUnitRange,
Base.AbstractCartesianIndex}}}} = SubArray{T,N,A,I}
const oneStridedArray{T,N} = Union{oneArray{T,N}, oneStridedSubArray{T,N}, oneDenseReshapedArray{T,N}, oneDenseReinterpretArray{T,N}}
const oneStridedSubArray{T,N,I<:Tuple{Vararg{Union{Base.RangeIndex, Base.ReshapedUnitRange,
Base.AbstractCartesianIndex}}}} =
const oneStridedArray{T,N} = Union{oneArray{T,N}, oneStridedSubArray{T,N}}
const oneStridedVector{T} = oneStridedArray{T,1}
const oneStridedMatrix{T} = oneStridedArray{T,2}
const oneStridedVecOrMat{T} = Union{oneStridedVector{T}, oneStridedMatrix{T}}
Expand All @@ -191,7 +200,7 @@ Base.pointer(x::oneStridedArray{T}) where {T} = Base.unsafe_convert(ZePtr{T}, x)
Base.unsafe_convert(ZePtr{T}, x) + Base._memory_offset(x, i)

# wrapped arrays: can be used in kernels
# anything that's (secretly) backed by a oneArray
const oneWrappedArray{T,N} = Union{oneArray{T,N}, WrappedArray{T,N,oneArray,oneArray{T,N}}}
const oneWrappedVector{T} = oneWrappedArray{T,1}
const oneWrappedMatrix{T} = oneWrappedArray{T,2}
Expand Down Expand Up @@ -237,7 +246,7 @@ Base.convert(::Type{T}, x::T) where T <: oneArray = x
Base.unsafe_convert(::Type{Ptr{T}}, x::oneArray{T}) where {T} =
throw(ArgumentError("cannot take the host address of a $(typeof(x))"))
Base.unsafe_convert(::Type{ZePtr{T}}, x::oneArray{T}) where {T} =
convert(ZePtr{T}, + x.offset*Base.elsize(x)
convert(ZePtr{T},[]) + x.offset*Base.elsize(x)

## interop with GPU arrays
Expand All @@ -255,7 +264,7 @@ Adapt.adapt_storage(::KernelAdaptor, xs::oneArray{T,N}) where {T,N} =

typetagdata(a::Array, i=1) = ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), a) + i - 1
typetagdata(a::oneArray, i=1) =
convert(ZePtr{UInt8}, + a.maxsize + a.offset + i - 1
convert(ZePtr{UInt8},[]) + a.maxsize + a.offset + i - 1

function Base.copyto!(dest::oneArray{T}, doffs::Integer, src::Array{T}, soffs::Integer,
n::Integer) where T
Expand Down Expand Up @@ -305,7 +314,7 @@ function Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice,
GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n)
if Base.isbitsunion(T)
# copy selector bytes
error("Not implemented")
error("oneArray does not yet support isbits-union arrays")
return dest
Expand All @@ -315,7 +324,7 @@ function Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice,
GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n)
if Base.isbitsunion(T)
# copy selector bytes
error("Not implemented")
error("oneArray does not yet support isbits-union arrays")

# copies to the host are synchronizing
Expand All @@ -329,7 +338,7 @@ function Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice,
GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n)
if Base.isbitsunion(T)
# copy selector bytes
error("Not implemented")
error("oneArray does not yet support isbits-union arrays")
return dest
Expand Down Expand Up @@ -364,37 +373,24 @@ function Base.fill!(A::oneDenseArray{T}, val) where T

## derived arrays

function GPUArrays.derive(::Type{T}, N::Int, a::oneArray, dims::Dims, offset::Int) where {T}
offset = (a.offset * Base.elsize(a)) ÷ sizeof(T) + offset
oneArray{T,N}(, dims; a.maxsize, offset)

## views

device(a::SubArray) = device(parent(a))
context(a::SubArray) = context(parent(a))

# we don't really want an array, so don't call `adapt(Array, ...)`,
# but just want oneArray indices to get downloaded back to the CPU.
# this makes sure we preserve array-like containers, like Base.Slice.
struct BackToCPU end
Adapt.adapt_storage(::BackToCPU, xs::oneArray) = convert(Array, xs)

@inline function Base.view(A::oneArray, I::Vararg{Any,N}) where {N}
J = to_indices(A, I)
@boundscheck begin
# Base's boundscheck accesses the indices, so make sure they reside on the CPU.
# this is expensive, but it's a bounds check after all.
J_cpu = map(j->adapt(BackToCPU(), j), J)
checkbounds(A, J_cpu...)
J_gpu = map(j->adapt(oneArray, j), J)
Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J_gpu...)), J_gpu...)

# pointer conversions
## contiguous
function Base.unsafe_convert(::Type{ZePtr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Base.RangeIndex}}}) where {T,N,P}
return Base.unsafe_convert(ZePtr{T}, parent(V)) +
Base._memory_offset(V.parent, map(first, V.indices)...)

## reshaped
function Base.unsafe_convert(::Type{ZePtr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{Base.RangeIndex,Base.ReshapedUnitRange}}}}) where {T,N,P}
return Base.unsafe_convert(ZePtr{T}, parent(V)) +
Expand All @@ -410,24 +406,6 @@ Base.unsafe_convert(::Type{ZePtr{T}}, A::PermutedDimsArray) where {T} =
Base.unsafe_convert(ZePtr{T}, parent(A))

## reshape

device(a::Base.ReshapedArray) = device(parent(a))
context(a::Base.ReshapedArray) = context(parent(a))

Base.unsafe_convert(::Type{ZePtr{T}}, a::Base.ReshapedArray{T}) where {T} =
Base.unsafe_convert(ZePtr{T}, parent(a))

## reinterpret

device(a::Base.ReinterpretArray) = device(parent(a))
context(a::Base.ReinterpretArray) = context(parent(a))

Base.unsafe_convert(::Type{ZePtr{T}}, a::Base.ReinterpretArray{T,N,S} where N) where {T,S} =
ZePtr{T}(Base.unsafe_convert(ZePtr{S}, parent(a)))

## unsafe_wrap

Expand Down
2 changes: 1 addition & 1 deletion src/device/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ Base.IndexStyle(::Type{<:oneDeviceArray}) = Base.IndexLinear()

Base.@propagate_inbounds Base.getindex(A::oneDeviceArray{T}, i1::Integer) where {T} =
arrayref(A, i1)
Base.@propagate_inbounds Base.setindex!(A::oneDeviceArray{T}, x, i1::Int) where {T} =
Base.@propagate_inbounds Base.setindex!(A::oneDeviceArray{T}, x, i1::Integer) where {T} =
arrayset(A, convert(T,x)::T, i1)

# preserve the specific integer type when indexing device arrays,
Expand Down
10 changes: 5 additions & 5 deletions test/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@ import Adapt
@testset "constructors" begin
xs = oneArray{Int}(undef, 2, 3)
@test collect(oneArray([1 2; 3 4])) == [1 2; 3 4]
@test testf(vec, rand(5,3))
@test testf(vec, rand(Float32, 5,3))
@test Base.elsize(xs) == sizeof(Int)
@test oneArray{Int, 2}(xs) === xs

@test_throws ArgumentError Base.unsafe_convert(Ptr{Int}, xs)
@test_throws ArgumentError Base.unsafe_convert(Ptr{Float32}, xs)

@test collect(oneAPI.zeros(2, 2)) == zeros(2, 2)
@test collect(oneAPI.ones(2, 2)) == ones(2, 2)
@test collect(oneAPI.zeros(Float32, 2, 2)) == zeros(Float32, 2, 2)
@test collect(oneAPI.ones(Float32, 2, 2)) == ones(Float32, 2, 2)

@test collect(oneAPI.fill(0, 2, 2)) == zeros(2, 2)
@test collect(oneAPI.fill(1, 2, 2)) == ones(2, 2)
@test collect(oneAPI.fill(0, 2, 2)) == zeros(Int, 2, 2)
@test collect(oneAPI.fill(1, 2, 2)) == ones(Int, 2, 2)

@testset "adapt" begin
Expand Down

0 comments on commit 3a63f96

Please sign in to comment.