Skip to content

Commit

Permalink
allow get_device("Metal") and informative error messages (#2319)
Browse files Browse the repository at this point in the history
* Update Project.toml

* Update Project.toml

* cl/ext

* cleanup

* remove some is functional guard

* cleanup

* ordinal -> id

* fix tests

* cleanup buildkite

* user facing error
  • Loading branch information
CarloLucibello authored Aug 28, 2023
1 parent b887018 commit f532045
Show file tree
Hide file tree
Showing 14 changed files with 159 additions and 154 deletions.
21 changes: 5 additions & 16 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
steps:
# - label: "GPU integration with julia v1.9"
# plugins:
# - JuliaCI/julia#v1:
# # Drop default "registries" directory, so it is not persisted from execution to execution
# # Taken from https://github.com/JuliaLang/julia/blob/v1.7.2/.buildkite/pipelines/main/platforms/package_linux.yml#L11-L12
# persist_depot_dirs: packages,artifacts,compiled
# version: "1.9"
# - JuliaCI/julia-test#v1: ~
# agents:
# queue: "juliagpu"
# cuda: "*"
# timeout_in_minutes: 60

- label: "GPU integration with julia v1"
- label: "CUDA GPU with julia v1"
plugins:
- JuliaCI/julia#v1:
version: "1"
Expand All @@ -24,6 +11,7 @@ steps:
cuda: "*"
env:
JULIA_CUDA_USE_BINARYBUILDER: "true"
FLUX_TEST_CUDA: "true"
FLUX_TEST_CPU: "false"
timeout_in_minutes: 60

Expand All @@ -36,6 +24,7 @@ steps:
# queue: "juliagpu"
# cuda: "*"
# timeout_in_minutes: 60

- label: "Metal with julia {{matrix.julia}}"
plugins:
- JuliaCI/julia#v1:
Expand All @@ -57,7 +46,7 @@ steps:
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
env:
FLUX_TEST_METAL: 'true'
FLUX_TEST_METAL: "true"
FLUX_TEST_CPU: "false"
matrix:
setup:
Expand All @@ -84,7 +73,7 @@ steps:
JULIA_AMDGPU_CORE_MUST_LOAD: "1"
JULIA_AMDGPU_HIP_MUST_LOAD: "1"
JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
FLUX_TEST_AMDGPU: true
FLUX_TEST_AMDGPU: "true"
FLUX_TEST_CPU: "false"
JULIA_NUM_THREADS: 4
env:
Expand Down
4 changes: 2 additions & 2 deletions docs/src/gpu.md
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ CUDA.DeviceIterator() for 3 devices:
```

Then, let's select the device with ordinal `0`:
Then, let's select the device with id `0`:

```julia-repl
julia> device0 = Flux.get_device("CUDA", 0) # the currently supported values for backend are "CUDA" and "AMD"
Expand All @@ -354,7 +354,7 @@ CuDevice(0): GeForce RTX 2080 Ti
```

Next, we'll get a handle to the device with ordinal `1`, and move `dense_model` to that device:
Next, we'll get a handle to the device with id `1`, and move `dense_model` to that device:

```julia-repl
julia> device1 = Flux.get_device("CUDA", 1)
Expand Down
26 changes: 13 additions & 13 deletions ext/FluxAMDGPUExt/functor.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Convert Float64 to Float32, but preserve Float16.
function adapt_storage(to::FluxAMDAdaptor, x::AbstractArray)
if to.ordinal === nothing
if to.id === nothing
if (typeof(x) <: AbstractArray{Float16, N} where N)
N = length(size(x))
return isbits(x) ? x : ROCArray{Float16, N}(x)
Expand All @@ -12,10 +12,10 @@ function adapt_storage(to::FluxAMDAdaptor, x::AbstractArray)
end
end

old_ordinal = AMDGPU.device_id(AMDGPU.device()) - 1 # subtracting 1 because ordinals start from 0
old_id = AMDGPU.device_id(AMDGPU.device()) - 1 # subtracting 1 because ids start from 0

if !(x isa ROCArray)
AMDGPU.device!(AMDGPU.devices()[to.ordinal + 1]) # adding 1 because ordinals start from 0
AMDGPU.device!(AMDGPU.devices()[to.id + 1]) # adding 1 because ids start from 0
if (typeof(x) <: AbstractArray{Float16, N} where N)
N = length(size(x))
x_new = isbits(x) ? x : ROCArray{Float16, N}(x)
Expand All @@ -25,14 +25,14 @@ function adapt_storage(to::FluxAMDAdaptor, x::AbstractArray)
else
x_new = isbits(x) ? x : ROCArray(x)
end
AMDGPU.device!(AMDGPU.devices()[old_ordinal + 1])
AMDGPU.device!(AMDGPU.devices()[old_id + 1])
return x_new
elseif AMDGPU.device_id(AMDGPU.device(x)) == to.ordinal
elseif AMDGPU.device_id(AMDGPU.device(x)) == to.id
return x
else
AMDGPU.device!(AMDGPU.devices()[to.ordinal + 1])
AMDGPU.device!(AMDGPU.devices()[to.id + 1])
x_new = copy(x)
AMDGPU.device!(AMDGPU.devices()[old_ordinal + 1])
AMDGPU.device!(AMDGPU.devices()[old_id + 1])
return x_new
end
end
Expand Down Expand Up @@ -76,10 +76,10 @@ Flux._isleaf(::AMD_CONV) = true
_exclude(x) = Flux._isleaf(x)
_exclude(::CPU_CONV) = true

function _amd(ordinal::Union{Nothing, Int}, x)
function _amd(id::Union{Nothing, Int}, x)
check_use_amdgpu()
USE_AMDGPU[] || return x
fmap(x -> Adapt.adapt(FluxAMDAdaptor(ordinal), x), x; exclude=_exclude)
fmap(x -> Adapt.adapt(FluxAMDAdaptor(id), x), x; exclude=_exclude)
end

# CPU -> GPU
Expand All @@ -106,10 +106,10 @@ function Adapt.adapt_structure(to::FluxCPUAdaptor, m::AMD_CONV)
Adapt.adapt(to, m.bias), m.stride, m.pad, m.dilation, m.groups)
end

function Flux.get_device(::Val{:AMD}, ordinal::Int) # ordinal should start from 0
old_ordinal = AMDGPU.device_id(AMDGPU.device()) - 1 # subtracting 1 because ordinals start from 0
AMDGPU.device!(AMDGPU.devices()[ordinal + 1]) # adding 1 because ordinals start from 0
function Flux.get_device(::Val{:AMD}, id::Int) # id should start from 0
old_id = AMDGPU.device_id(AMDGPU.device()) - 1 # subtracting 1 because ids start from 0
AMDGPU.device!(AMDGPU.devices()[id + 1]) # adding 1 because ids start from 0
device = Flux.FluxAMDDevice(AMDGPU.device())
AMDGPU.device!(AMDGPU.devices()[old_ordinal + 1])
AMDGPU.device!(AMDGPU.devices()[old_id + 1])
return device
end
28 changes: 15 additions & 13 deletions ext/FluxCUDAExt/functor.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
adapt_storage(to::FluxCUDAAdaptor, x) = CUDA.cu(x)

function adapt_storage(to::FluxCUDAAdaptor, x::AbstractArray)
to.ordinal === nothing && return CUDA.cu(x)
to.id === nothing && return CUDA.cu(x)

# remember current device
old_ordinal = CUDA.device().handle
old_id = CUDA.device().handle

if !(x isa CuArray)
CUDA.device!(to.ordinal)
CUDA.device!(to.id)
x_new = CUDA.cu(x)
CUDA.device!(old_ordinal)
CUDA.device!(old_id)
return x_new
elseif CUDA.device(x).handle == to.ordinal
elseif CUDA.device(x).handle == to.id
return x
else
CUDA.device!(to.ordinal)
CUDA.device!(to.id)
x_new = copy(x)
CUDA.device!(old_ordinal)
CUDA.device!(old_id)
return x_new
end
end

adapt_storage(to::FluxCUDAAdaptor, x::Zygote.FillArrays.AbstractFill) = CUDA.cu(collect(x))
adapt_storage(to::FluxCUDAAdaptor, x::Random.TaskLocalRNG) = CUDA.default_rng()
adapt_storage(to::FluxCUDAAdaptor, x::CUDA.RNG) = x
Expand All @@ -44,16 +46,16 @@ ChainRulesCore.rrule(::typeof(adapt), a::FluxCUDAAdaptor, x::AnyCuArray) =
ChainRulesCore.rrule(::typeof(adapt), a::FluxCUDAAdaptor, x::AbstractArray) =
adapt(a, x), Δ -> (NoTangent(), NoTangent(), adapt(FluxCPUAdaptor(), unthunk(Δ)))

function _cuda(ordinal::Union{Nothing, Int}, x)
function _cuda(id::Union{Nothing, Int}, x)
check_use_cuda()
USE_CUDA[] || return x
fmap(x -> Adapt.adapt(FluxCUDAAdaptor(ordinal), x), x; exclude=Flux._isleaf)
fmap(x -> Adapt.adapt(FluxCUDAAdaptor(id), x), x; exclude=Flux._isleaf)
end

function Flux.get_device(::Val{:CUDA}, ordinal::Int)
old_ordinal = CUDA.device().handle
CUDA.device!(ordinal)
function Flux.get_device(::Val{:CUDA}, id::Int)
old_id = CUDA.device().handle
CUDA.device!(id)
device = Flux.FluxCUDADevice(CUDA.device())
CUDA.device!(old_ordinal)
CUDA.device!(old_id)
return device
end
6 changes: 6 additions & 0 deletions ext/FluxMetalExt/functor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@ function _metal(x)
USE_METAL[] || return x
fmap(x -> Adapt.adapt(FluxMetalAdaptor(), x), x; exclude=_isleaf)
end

function Flux.get_device(::Val{:Metal}, id::Int)
@assert id == 0 "Metal backend only supports one device at the moment"
return Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]]
end

23 changes: 14 additions & 9 deletions src/functor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -333,14 +333,14 @@ trainable(c::Cholesky) = ()
# CUDA extension. ########

Base.@kwdef struct FluxCUDAAdaptor
ordinal::Union{Nothing, Int} = nothing
id::Union{Nothing, Int} = nothing
end

const CUDA_LOADED = Ref{Bool}(false)

function gpu(to::FluxCUDAAdaptor, x)
if CUDA_LOADED[]
return _cuda(to.ordinal, x)
return _cuda(to.id, x)
else
@info """
The CUDA functionality is being called but
Expand All @@ -356,14 +356,14 @@ function _cuda end
# AMDGPU extension. ########

Base.@kwdef struct FluxAMDAdaptor
ordinal::Union{Nothing, Int} = nothing
id::Union{Nothing, Int} = nothing
end

const AMDGPU_LOADED = Ref{Bool}(false)

function gpu(to::FluxAMDAdaptor, x)
if AMDGPU_LOADED[]
return _amd(to.ordinal, x)
return _amd(to.id, x)
else
@info """
The AMDGPU functionality is being called but
Expand Down Expand Up @@ -650,10 +650,10 @@ function get_device(; verbose=false)::AbstractDevice
end

"""
Flux.get_device(backend::String, ordinal::Int = 0)::Flux.AbstractDevice
Flux.get_device(backend::String, idx::Int = 0)::Flux.AbstractDevice
Get a device object for a backend specified by the string `backend` and `ordinal`. The currently supported values
of `backend` are `"CUDA"`, `"AMD"` and `"CPU"`. `ordinal` must be an integer value between `0` and the number of available devices.
Get a device object for a backend specified by the string `backend` and `idx`. The currently supported values
of `backend` are `"CUDA"`, `"AMD"` and `"CPU"`. `idx` must be an integer value between `0` and the number of available devices.
# Examples
Expand Down Expand Up @@ -683,10 +683,15 @@ julia> cpu_device = Flux.get_device("CPU")
```
"""
function get_device(backend::String, ordinal::Int = 0)
function get_device(backend::String, idx::Int = 0)
if backend == "CPU"
return FluxCPUDevice()
else
return get_device(Val(Symbol(backend)), ordinal)
return get_device(Val(Symbol(backend)), idx)
end
end

# Fallback
function get_device(::Val{D}, idx) where D
error("Unsupported backend: $(D). Try importing the corresponding package.")
end
68 changes: 31 additions & 37 deletions test/ext_amdgpu/get_devices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,41 @@ amd_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]]
# should pass, whether or not AMDGPU is functional
@test typeof(amd_device) <: Flux.FluxAMDDevice

if AMDGPU.functional()
@test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice
else
@test typeof(amd_device.deviceID) <: Nothing
end
@test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice

# testing get_device
dense_model = Dense(2 => 3) # initially lives on CPU
weight = copy(dense_model.weight) # store the weight
bias = copy(dense_model.bias) # store the bias
if AMDGPU.functional() && AMDGPU.functional(:MIOpen)
amd_device = Flux.get_device()

@test typeof(amd_device) <: Flux.FluxAMDDevice
@test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice
@test Flux._get_device_name(amd_device) in Flux.supported_devices()

# correctness of data transfer
x = randn(5, 5)
cx = x |> amd_device
@test cx isa AMDGPU.ROCArray
@test AMDGPU.device_id(AMDGPU.device(cx)) == AMDGPU.device_id(amd_device.deviceID)

# moving models to specific NVIDIA devices
for ordinal in 0:(length(AMDGPU.devices()) - 1)
current_amd_device = Flux.get_device("AMD", ordinal)
@test typeof(current_amd_device.deviceID) <: AMDGPU.HIPDevice
@test AMDGPU.device_id(current_amd_device.deviceID) == ordinal + 1

global dense_model = dense_model |> current_amd_device
@test dense_model.weight isa AMDGPU.ROCArray
@test dense_model.bias isa AMDGPU.ROCArray
@test AMDGPU.device_id(AMDGPU.device(dense_model.weight)) == ordinal + 1
@test AMDGPU.device_id(AMDGPU.device(dense_model.bias)) == ordinal + 1
@test isequal(Flux.cpu(dense_model.weight), weight)
@test isequal(Flux.cpu(dense_model.bias), bias)
end
# finally move to CPU, and see if things work
cpu_device = Flux.get_device("CPU")
dense_model = cpu_device(dense_model)
@test dense_model.weight isa Matrix
@test dense_model.bias isa Vector

amd_device = Flux.get_device()

@test typeof(amd_device) <: Flux.FluxAMDDevice
@test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice
@test Flux._get_device_name(amd_device) in Flux.supported_devices()

# correctness of data transfer
x = randn(5, 5)
cx = x |> amd_device
@test cx isa AMDGPU.ROCArray
@test AMDGPU.device_id(AMDGPU.device(cx)) == AMDGPU.device_id(amd_device.deviceID)

# moving models to specific NVIDIA devices
for id in 0:(length(AMDGPU.devices()) - 1)
current_amd_device = Flux.get_device("AMD", id)
@test typeof(current_amd_device.deviceID) <: AMDGPU.HIPDevice
@test AMDGPU.device_id(current_amd_device.deviceID) == id + 1

global dense_model = dense_model |> current_amd_device
@test dense_model.weight isa AMDGPU.ROCArray
@test dense_model.bias isa AMDGPU.ROCArray
@test AMDGPU.device_id(AMDGPU.device(dense_model.weight)) == id + 1
@test AMDGPU.device_id(AMDGPU.device(dense_model.bias)) == id + 1
@test isequal(Flux.cpu(dense_model.weight), weight)
@test isequal(Flux.cpu(dense_model.bias), bias)
end
# finally move to CPU, and see if things work
cpu_device = Flux.get_device("CPU")
dense_model = cpu_device(dense_model)
@test dense_model.weight isa Matrix
@test dense_model.bias isa Vector
4 changes: 4 additions & 0 deletions test/ext_amdgpu/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ AMDGPU.allowscalar(false)
include("../test_utils.jl")
include("test_utils.jl")

@testset "get_devices" begin
include("get_devices.jl")
end

@testset "Basic" begin
include("basic.jl")
end
Loading

0 comments on commit f532045

Please sign in to comment.