Skip to content

Commit

Permalink
Add support for unified arrays.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Jun 30, 2021
1 parent f821951 commit 4feaea3
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 45 deletions.
66 changes: 32 additions & 34 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export CuArray, CuVector, CuMatrix, CuVecOrMat, cu
# the number of outstanding references

struct ArrayStorage
buffer::Mem.DeviceBuffer
buffer::Union{Mem.DeviceBuffer, Mem.UnifiedBuffer}

ctx::CuContext

Expand All @@ -18,8 +18,7 @@ struct ArrayStorage
refcount::Threads.Atomic{Int}
end

ArrayStorage(buf::Mem.DeviceBuffer, ctx::CuContext, state::Int) =
ArrayStorage(buf, ctx, Threads.Atomic{Int}(state))
ArrayStorage(buf, ctx, state::Int) = ArrayStorage(buf, ctx, Threads.Atomic{Int}(state))


## array type
Expand All @@ -32,7 +31,7 @@ mutable struct CuArray{T,N} <: AbstractGPUArray{T,N}

dims::Dims{N}

function CuArray{T,N}(::UndefInitializer, dims::Dims{N}) where {T,N}
function CuArray{T,N}(::UndefInitializer, dims::Dims{N}; unified::Bool=false) where {T,N}
Base.allocatedinline(T) || error("CuArray only supports element types that are stored inline")
maxsize = prod(dims) * sizeof(T)
bufsize = if Base.isbitsunion(T)
Expand All @@ -41,7 +40,8 @@ mutable struct CuArray{T,N} <: AbstractGPUArray{T,N}
else
maxsize
end
storage = ArrayStorage(alloc(bufsize), context(), 1)
buf = alloc(bufsize; unified)
storage = ArrayStorage(buf, context(), 1)
obj = new{T,N}(storage, maxsize, 0, dims)
finalizer(unsafe_finalize!, obj)
end
Expand Down Expand Up @@ -121,21 +121,23 @@ CuMatrix{T} = CuArray{T,2}
CuVecOrMat{T} = Union{CuVector{T},CuMatrix{T}}

# type and dimensionality specified, accepting dims as series of Ints
CuArray{T,N}(::UndefInitializer, dims::Integer...) where {T,N} = CuArray{T,N}(undef, dims)
CuArray{T,N}(::UndefInitializer, dims::Integer...; kwargs...) where {T,N} =
CuArray{T,N}(undef, dims; kwargs...)

# type but not dimensionality specified
CuArray{T}(::UndefInitializer, dims::Dims{N}) where {T,N} = CuArray{T,N}(undef, dims)
CuArray{T}(::UndefInitializer, dims::Integer...) where {T} =
CuArray{T}(undef, convert(Tuple{Vararg{Int}}, dims))
CuArray{T}(::UndefInitializer, dims::Dims{N}; kwargs...) where {T,N} =
CuArray{T,N}(undef, dims; kwargs...)
CuArray{T}(::UndefInitializer, dims::Integer...; kwargs...) where {T} =
CuArray{T}(undef, convert(Tuple{Vararg{Int}}, dims); kwargs...)

# empty vector constructor
CuArray{T,1}() where {T} = CuArray{T,1}(undef, 0)
CuArray{T,1}(; kwargs...) where {T} = CuArray{T,1}(undef, 0; kwargs...)

# do-block constructors
for (ctor, tvars) in (:CuArray => (), :(CuArray{T}) => (:T,), :(CuArray{T,N}) => (:T, :N))
@eval begin
function $ctor(f::Function, args...) where {$(tvars...)}
xs = $ctor(args...)
function $ctor(f::Function, args...; kwargs...) where {$(tvars...)}
xs = $ctor(args...; kwargs...)
try
f(xs)
finally
Expand Down Expand Up @@ -186,22 +188,8 @@ function Base.unsafe_wrap(::Union{Type{CuArray},Type{CuArray{T}},Type{CuArray{T,
error("Could not identify the buffer type; are you passing a valid CUDA pointer to unsafe_wrap?")
end

storage = ArrayStorage(buf, ctx, -1)
# TODO: make this array normally managed too (deal in pool.jl with different buffer types)
xs = CuArray{T, length(dims)}(storage, dims)
if own
finalizer(xs) do obj
@context! skip_destroyed=true ctx begin
if buf isa Mem.DeviceBuffer
# see comments in unsafe_free! for notes on the use of CuDefaultStream
Mem.free(buf; stream=CuDefaultStream())
else
Mem.free(buf)
end
end
end
end
return xs
storage = ArrayStorage(buf, ctx, own ? 1 : -1)
CuArray{T, length(dims)}(storage, dims)
end

function Base.unsafe_wrap(Atype::Union{Type{CuArray},Type{CuArray{T}},Type{CuArray{T,1}}},
Expand Down Expand Up @@ -262,19 +250,29 @@ AnyCuVecOrMat{T} = Union{AnyCuVector{T}, AnyCuMatrix{T}}

## interop with other arrays

@inline function CuArray{T,N}(xs::AbstractArray{<:Any,N}) where {T,N}
A = CuArray{T,N}(undef, size(xs))
@inline function CuArray{T,N}(xs::AbstractArray{<:Any,N}; kwargs...) where {T,N}
A = CuArray{T,N}(undef, size(xs); kwargs...)
copyto!(A, convert(Array{T}, xs))
return A
end

# underspecified constructors
CuArray{T}(xs::AbstractArray{S,N}) where {T,N,S} = CuArray{T,N}(xs)
(::Type{CuArray{T,N} where T})(x::AbstractArray{S,N}) where {S,N} = CuArray{S,N}(x)
CuArray(A::AbstractArray{T,N}) where {T,N} = CuArray{T,N}(A)
CuArray{T}(xs::AbstractArray{S,N}; kwargs...) where {T,N,S} = CuArray{T,N}(xs; kwargs...)
(::Type{CuArray{T,N} where T})(x::AbstractArray{S,N}; kwargs...) where {S,N} =
CuArray{S,N}(x; kwargs...)
CuArray(A::AbstractArray{T,N}; kwargs...) where {T,N} = CuArray{T,N}(A; kwargs...)

# idempotency
CuArray{T,N}(xs::CuArray{T,N}) where {T,N} = xs
function CuArray{T,N}(xs::CuArray{T,N}; unified::Bool=false) where {T,N}
if (unified && xs.storage.buffer isa Mem.UnifiedBuffer) ||
(!unified && xs.storage.buffer isa Mem.DeviceBuffer)
return xs
else
A = CuArray{T,N}(undef, size(xs); unified)
copyto!(A, xs)
return A
end
end


## conversions
Expand Down
26 changes: 15 additions & 11 deletions src/pool.jl
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,19 @@ end
Allocate a number of bytes `sz` from the memory pool. Returns a buffer object; may throw
an [`OutOfGPUMemoryError`](@ref) if the allocation request cannot be satisfied.
"""
@inline @timeit_ci function alloc(sz; stream::Union{Nothing,CuStream}=nothing)
@inline @timeit_ci function alloc(sz; unified::Bool=false, stream::Union{Nothing,CuStream}=nothing)
# 0-byte allocations shouldn't hit the pool
sz == 0 && return Mem.DeviceBuffer(CU_NULL, 0)

state = active_state()

buf = nothing
gctime = 0.0 # using Base.@timed/gc_num is too expensive
time = Base.@elapsed begin
time = Base.@elapsed if unified
# TODO: integrate this with the non-unified code path (e.g. we want to retry & gc too)
# TODO: add a memory type argument to `alloc`?
buf = Mem.alloc(Mem.Unified, sz)
else
state = active_state()

buf = nothing
if stream_ordered(state.device)
# mark the pool as active
pool_mark(state.device)
Expand Down Expand Up @@ -234,8 +238,8 @@ an [`OutOfGPUMemoryError`](@ref) if the allocation request cannot be satisfied.
buf === nothing || break
end
end
buf === nothing && throw(OutOfGPUMemoryError(sz))
end
buf === nothing && throw(OutOfGPUMemoryError(sz))

alloc_stats.alloc_count += 1
alloc_stats.alloc_bytes += sz
Expand All @@ -250,20 +254,20 @@ end
Releases a buffer `buf` to the memory pool.
"""
@inline @timeit_ci function free(buf::Mem.DeviceBuffer;
@inline @timeit_ci function free(buf::Mem.AbstractBuffer;
stream::Union{Nothing,CuStream}=nothing)
# XXX: have @timeit use the root timer, since we may be called from a finalizer

# 0-byte allocations shouldn't hit the pool
sizeof(buf) == 0 && return

state = active_state()

# this function is typically called from a finalizer, where we can't switch tasks,
# so perform our own error handling.
try

time = Base.@elapsed begin
time = Base.@elapsed if buf isa Mem.UnifiedBuffer
Mem.free(buf)
else
state = active_state()
if stream_ordered(state.device)
# mark the pool as active
pool_mark(state.device)
Expand Down
34 changes: 34 additions & 0 deletions test/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -595,5 +595,39 @@ end
expected = sum(a, dims=2)
actual = sum(c, dims=2)
@test expected == Array(actual)
end

@testset "unified memory" begin
dev = device()

let
a = CuArray{Int}(undef, 1)
@test !is_managed(pointer(a))
end

let
a = CuArray{Int}(undef, 1; unified=true)
@test is_managed(pointer(a))
a .= 0
@test Array(a) == [0]

if length(devices()) > 1
other_devs = filter(!isequal(dev), collect(devices()))
device!(first(other_devs)) do
a .+= 1
@test Array(a) == [1]
end
@test Array(a) == [1]
end
end

let
a = CUDA.rand(1)
@test !is_managed(pointer(a))

b = CuArray(a; unified=true)
@test is_managed(pointer(b))

@test Array(a) == Array(b)
end
end

0 comments on commit 4feaea3

Please sign in to comment.