allow get_device("Metal") and informative error messages (#2319)

* Update Project.toml * Update Project.toml * cl/ext * cleanup * remove some is functional guard * cleanup * ordinal -> id * fix tests * cleanup buildkite * user facing error
FluxML · Aug 28, 2023 · f532045 · f532045
1 parent b887018
commit f532045
Show file tree

Hide file tree

Showing 14 changed files with 159 additions and 154 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -1,18 +1,5 @@
 steps:
-  # - label: "GPU integration with julia v1.9"
-  #   plugins:
-  #     - JuliaCI/julia#v1:
-  #         # Drop default "registries" directory, so it is not persisted from execution to execution
-  #         # Taken from https://github.com/JuliaLang/julia/blob/v1.7.2/.buildkite/pipelines/main/platforms/package_linux.yml#L11-L12
-  #         persist_depot_dirs: packages,artifacts,compiled
-  #         version: "1.9"
-  #     - JuliaCI/julia-test#v1: ~
-  #   agents:
-  #     queue: "juliagpu"
-  #     cuda: "*"
-  #   timeout_in_minutes: 60
-
-  - label: "GPU integration with julia v1"
+  - label: "CUDA GPU with julia v1"
     plugins:
       - JuliaCI/julia#v1:
           version: "1"
@@ -24,6 +11,7 @@ steps:
       cuda: "*"
     env:
       JULIA_CUDA_USE_BINARYBUILDER: "true"
+      FLUX_TEST_CUDA: "true"
       FLUX_TEST_CPU: "false"
     timeout_in_minutes: 60
 
@@ -36,6 +24,7 @@ steps:
   #     queue: "juliagpu"
   #     cuda: "*"
   #   timeout_in_minutes: 60
+
   - label: "Metal with julia {{matrix.julia}}"
     plugins:
       - JuliaCI/julia#v1:
@@ -57,7 +46,7 @@ steps:
     if: build.message !~ /\[skip tests\]/
     timeout_in_minutes: 60
     env:
-      FLUX_TEST_METAL: 'true'
+      FLUX_TEST_METAL: "true"
       FLUX_TEST_CPU: "false"
     matrix:
       setup:
@@ -84,7 +73,7 @@ steps:
       JULIA_AMDGPU_CORE_MUST_LOAD: "1"
       JULIA_AMDGPU_HIP_MUST_LOAD: "1"
       JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
-      FLUX_TEST_AMDGPU: true
+      FLUX_TEST_AMDGPU: "true"
       FLUX_TEST_CPU: "false"
       JULIA_NUM_THREADS: 4
 env:

diff --git a/docs/src/gpu.md b/docs/src/gpu.md
@@ -327,7 +327,7 @@ CUDA.DeviceIterator() for 3 devices:
 
 ```
 
-Then, let's select the device with ordinal `0`:
+Then, let's select the device with id `0`:
 
 ```julia-repl
 julia> device0 = Flux.get_device("CUDA", 0)        # the currently supported values for backend are "CUDA" and "AMD"
@@ -354,7 +354,7 @@ CuDevice(0): GeForce RTX 2080 Ti
 
 ```
 
-Next, we'll get a handle to the device with ordinal `1`, and move `dense_model` to that device:
+Next, we'll get a handle to the device with id `1`, and move `dense_model` to that device:
 
 ```julia-repl
 julia> device1 = Flux.get_device("CUDA", 1)

diff --git a/ext/FluxAMDGPUExt/functor.jl b/ext/FluxAMDGPUExt/functor.jl
@@ -1,6 +1,6 @@
 # Convert Float64 to Float32, but preserve Float16.
 function adapt_storage(to::FluxAMDAdaptor, x::AbstractArray)
-    if to.ordinal === nothing
+    if to.id === nothing
         if (typeof(x) <: AbstractArray{Float16, N} where N)
             N = length(size(x))
             return isbits(x) ? x : ROCArray{Float16, N}(x)
@@ -12,10 +12,10 @@ function adapt_storage(to::FluxAMDAdaptor, x::AbstractArray)
         end
     end
 
-    old_ordinal = AMDGPU.device_id(AMDGPU.device()) - 1     # subtracting 1 because ordinals start from 0
+    old_id = AMDGPU.device_id(AMDGPU.device()) - 1     # subtracting 1 because ids start from 0
 
     if !(x isa ROCArray)
-        AMDGPU.device!(AMDGPU.devices()[to.ordinal + 1])    # adding 1 because ordinals start from 0
+        AMDGPU.device!(AMDGPU.devices()[to.id + 1])    # adding 1 because ids start from 0
         if (typeof(x) <: AbstractArray{Float16, N} where N)
             N = length(size(x))
             x_new = isbits(x) ? x : ROCArray{Float16, N}(x)
@@ -25,14 +25,14 @@ function adapt_storage(to::FluxAMDAdaptor, x::AbstractArray)
         else
             x_new = isbits(x) ? x : ROCArray(x)
         end
-        AMDGPU.device!(AMDGPU.devices()[old_ordinal + 1])
+        AMDGPU.device!(AMDGPU.devices()[old_id + 1])
         return x_new
-    elseif AMDGPU.device_id(AMDGPU.device(x)) == to.ordinal
+    elseif AMDGPU.device_id(AMDGPU.device(x)) == to.id
         return x
     else
-        AMDGPU.device!(AMDGPU.devices()[to.ordinal + 1])
+        AMDGPU.device!(AMDGPU.devices()[to.id + 1])
         x_new = copy(x)
-        AMDGPU.device!(AMDGPU.devices()[old_ordinal + 1])
+        AMDGPU.device!(AMDGPU.devices()[old_id + 1])
         return x_new
     end
 end
@@ -76,10 +76,10 @@ Flux._isleaf(::AMD_CONV) = true
 _exclude(x) = Flux._isleaf(x)
 _exclude(::CPU_CONV) = true
 
-function _amd(ordinal::Union{Nothing, Int}, x)
+function _amd(id::Union{Nothing, Int}, x)
     check_use_amdgpu()
     USE_AMDGPU[] || return x
-    fmap(x -> Adapt.adapt(FluxAMDAdaptor(ordinal), x), x; exclude=_exclude)
+    fmap(x -> Adapt.adapt(FluxAMDAdaptor(id), x), x; exclude=_exclude)
 end
 
 # CPU -> GPU
@@ -106,10 +106,10 @@ function Adapt.adapt_structure(to::FluxCPUAdaptor, m::AMD_CONV)
         Adapt.adapt(to, m.bias), m.stride, m.pad, m.dilation, m.groups)
 end
 
-function Flux.get_device(::Val{:AMD}, ordinal::Int)     # ordinal should start from 0
-    old_ordinal = AMDGPU.device_id(AMDGPU.device()) - 1     # subtracting 1 because ordinals start from 0
-    AMDGPU.device!(AMDGPU.devices()[ordinal + 1])           # adding 1 because ordinals start from 0
+function Flux.get_device(::Val{:AMD}, id::Int)     # id should start from 0
+    old_id = AMDGPU.device_id(AMDGPU.device()) - 1     # subtracting 1 because ids start from 0
+    AMDGPU.device!(AMDGPU.devices()[id + 1])           # adding 1 because ids start from 0
     device = Flux.FluxAMDDevice(AMDGPU.device())
-    AMDGPU.device!(AMDGPU.devices()[old_ordinal + 1])
+    AMDGPU.device!(AMDGPU.devices()[old_id + 1])
     return device
 end
diff --git a/ext/FluxCUDAExt/functor.jl b/ext/FluxCUDAExt/functor.jl
@@ -1,24 +1,26 @@
 adapt_storage(to::FluxCUDAAdaptor, x) = CUDA.cu(x)
+
 function adapt_storage(to::FluxCUDAAdaptor, x::AbstractArray)
-    to.ordinal === nothing && return CUDA.cu(x)
+    to.id === nothing && return CUDA.cu(x)
 
     # remember current device
-    old_ordinal = CUDA.device().handle
+    old_id = CUDA.device().handle
 
     if !(x isa CuArray)
-        CUDA.device!(to.ordinal)
+        CUDA.device!(to.id)
         x_new = CUDA.cu(x)
-        CUDA.device!(old_ordinal)
+        CUDA.device!(old_id)
         return x_new
-    elseif CUDA.device(x).handle == to.ordinal
+    elseif CUDA.device(x).handle == to.id
         return x
     else
-        CUDA.device!(to.ordinal)
+        CUDA.device!(to.id)
         x_new = copy(x)
-        CUDA.device!(old_ordinal)
+        CUDA.device!(old_id)
         return x_new
     end
 end
+
 adapt_storage(to::FluxCUDAAdaptor, x::Zygote.FillArrays.AbstractFill) = CUDA.cu(collect(x))
 adapt_storage(to::FluxCUDAAdaptor, x::Random.TaskLocalRNG) = CUDA.default_rng()
 adapt_storage(to::FluxCUDAAdaptor, x::CUDA.RNG) = x
@@ -44,16 +46,16 @@ ChainRulesCore.rrule(::typeof(adapt), a::FluxCUDAAdaptor, x::AnyCuArray) =
 ChainRulesCore.rrule(::typeof(adapt), a::FluxCUDAAdaptor, x::AbstractArray) =
   adapt(a, x), Δ -> (NoTangent(), NoTangent(), adapt(FluxCPUAdaptor(), unthunk(Δ)))
 
-function _cuda(ordinal::Union{Nothing, Int}, x)
+function _cuda(id::Union{Nothing, Int}, x)
   check_use_cuda()
   USE_CUDA[] || return x
-  fmap(x -> Adapt.adapt(FluxCUDAAdaptor(ordinal), x), x; exclude=Flux._isleaf)
+  fmap(x -> Adapt.adapt(FluxCUDAAdaptor(id), x), x; exclude=Flux._isleaf)
 end
 
-function Flux.get_device(::Val{:CUDA}, ordinal::Int)
-    old_ordinal = CUDA.device().handle
-    CUDA.device!(ordinal)
+function Flux.get_device(::Val{:CUDA}, id::Int)
+    old_id = CUDA.device().handle
+    CUDA.device!(id)
     device = Flux.FluxCUDADevice(CUDA.device())
-    CUDA.device!(old_ordinal)
+    CUDA.device!(old_id)
     return device
 end
diff --git a/ext/FluxMetalExt/functor.jl b/ext/FluxMetalExt/functor.jl
@@ -32,3 +32,9 @@ function _metal(x)
     USE_METAL[] || return x
     fmap(x -> Adapt.adapt(FluxMetalAdaptor(), x), x; exclude=_isleaf)
 end
+
+function Flux.get_device(::Val{:Metal}, id::Int)
+    @assert id == 0 "Metal backend only supports one device at the moment"
+    return Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]]
+end
+
diff --git a/src/functor.jl b/src/functor.jl
@@ -333,14 +333,14 @@ trainable(c::Cholesky) = ()
 # CUDA extension. ########
 
 Base.@kwdef struct FluxCUDAAdaptor
-    ordinal::Union{Nothing, Int} = nothing
+    id::Union{Nothing, Int} = nothing
 end
 
 const CUDA_LOADED = Ref{Bool}(false)
 
 function gpu(to::FluxCUDAAdaptor, x)
     if CUDA_LOADED[]
-        return _cuda(to.ordinal, x)
+        return _cuda(to.id, x)
     else
         @info """
         The CUDA functionality is being called but
@@ -356,14 +356,14 @@ function _cuda end
 # AMDGPU extension. ########
 
 Base.@kwdef struct FluxAMDAdaptor
-    ordinal::Union{Nothing, Int} = nothing
+    id::Union{Nothing, Int} = nothing
 end
 
 const AMDGPU_LOADED = Ref{Bool}(false)
 
 function gpu(to::FluxAMDAdaptor, x)
     if AMDGPU_LOADED[]
-        return _amd(to.ordinal, x)
+        return _amd(to.id, x)
     else
         @info """
         The AMDGPU functionality is being called but
@@ -650,10 +650,10 @@ function get_device(; verbose=false)::AbstractDevice
 end
 
 """
-    Flux.get_device(backend::String, ordinal::Int = 0)::Flux.AbstractDevice
+    Flux.get_device(backend::String, idx::Int = 0)::Flux.AbstractDevice
 
-Get a device object for a backend specified by the string `backend` and `ordinal`. The currently supported values
-of `backend` are `"CUDA"`, `"AMD"` and `"CPU"`. `ordinal` must be an integer value between `0` and the number of available devices.
+Get a device object for a backend specified by the string `backend` and `idx`. The currently supported values
+of `backend` are `"CUDA"`, `"AMD"` and `"CPU"`. `idx` must be an integer value between `0` and the number of available devices.
 
 # Examples
 
@@ -683,10 +683,15 @@ julia> cpu_device = Flux.get_device("CPU")
 
 ```
 """
-function get_device(backend::String, ordinal::Int = 0)
+function get_device(backend::String, idx::Int = 0)
     if backend == "CPU"
         return FluxCPUDevice()
     else
-        return get_device(Val(Symbol(backend)), ordinal)
+        return get_device(Val(Symbol(backend)), idx)
     end
 end
+
+# Fallback
+function get_device(::Val{D}, idx) where D
+    error("Unsupported backend: $(D). Try importing the corresponding package.")
+end
diff --git a/test/ext_amdgpu/get_devices.jl b/test/ext_amdgpu/get_devices.jl
@@ -3,47 +3,41 @@ amd_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]]
 # should pass, whether or not AMDGPU is functional
 @test typeof(amd_device) <: Flux.FluxAMDDevice
 
-if AMDGPU.functional()
-    @test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice 
-else
-    @test typeof(amd_device.deviceID) <: Nothing
-end
+@test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice 
 
 # testing get_device
 dense_model = Dense(2 => 3)     # initially lives on CPU
 weight = copy(dense_model.weight)           # store the weight
 bias = copy(dense_model.bias)               # store the bias
-if AMDGPU.functional() && AMDGPU.functional(:MIOpen)
-  amd_device = Flux.get_device()
-
-  @test typeof(amd_device) <: Flux.FluxAMDDevice
-  @test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice
-  @test Flux._get_device_name(amd_device) in Flux.supported_devices()
-
-  # correctness of data transfer
-  x = randn(5, 5)
-  cx = x |> amd_device
-  @test cx isa AMDGPU.ROCArray
-  @test AMDGPU.device_id(AMDGPU.device(cx)) == AMDGPU.device_id(amd_device.deviceID)
-
-  # moving models to specific NVIDIA devices
-  for ordinal in 0:(length(AMDGPU.devices()) - 1)
-    current_amd_device = Flux.get_device("AMD", ordinal)
-    @test typeof(current_amd_device.deviceID) <: AMDGPU.HIPDevice
-    @test AMDGPU.device_id(current_amd_device.deviceID) == ordinal + 1
-
-    global dense_model = dense_model |> current_amd_device
-    @test dense_model.weight isa AMDGPU.ROCArray
-    @test dense_model.bias isa AMDGPU.ROCArray
-    @test AMDGPU.device_id(AMDGPU.device(dense_model.weight)) == ordinal + 1
-    @test AMDGPU.device_id(AMDGPU.device(dense_model.bias)) == ordinal + 1
-    @test isequal(Flux.cpu(dense_model.weight), weight)
-    @test isequal(Flux.cpu(dense_model.bias), bias)
-  end
-  # finally move to CPU, and see if things work
-  cpu_device = Flux.get_device("CPU")
-  dense_model = cpu_device(dense_model)
-  @test dense_model.weight isa Matrix
-  @test dense_model.bias isa Vector
 
+amd_device = Flux.get_device()
+
+@test typeof(amd_device) <: Flux.FluxAMDDevice
+@test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice
+@test Flux._get_device_name(amd_device) in Flux.supported_devices()
+
+# correctness of data transfer
+x = randn(5, 5)
+cx = x |> amd_device
+@test cx isa AMDGPU.ROCArray
+@test AMDGPU.device_id(AMDGPU.device(cx)) == AMDGPU.device_id(amd_device.deviceID)
+
+# moving models to specific NVIDIA devices
+for id in 0:(length(AMDGPU.devices()) - 1)
+  current_amd_device = Flux.get_device("AMD", id)
+  @test typeof(current_amd_device.deviceID) <: AMDGPU.HIPDevice
+  @test AMDGPU.device_id(current_amd_device.deviceID) == id + 1
+
+  global dense_model = dense_model |> current_amd_device
+  @test dense_model.weight isa AMDGPU.ROCArray
+  @test dense_model.bias isa AMDGPU.ROCArray
+  @test AMDGPU.device_id(AMDGPU.device(dense_model.weight)) == id + 1
+  @test AMDGPU.device_id(AMDGPU.device(dense_model.bias)) == id + 1
+  @test isequal(Flux.cpu(dense_model.weight), weight)
+  @test isequal(Flux.cpu(dense_model.bias), bias)
 end
+# finally move to CPU, and see if things work
+cpu_device = Flux.get_device("CPU")
+dense_model = cpu_device(dense_model)
+@test dense_model.weight isa Matrix
+@test dense_model.bias isa Vector
diff --git a/test/ext_amdgpu/runtests.jl b/test/ext_amdgpu/runtests.jl
@@ -5,6 +5,10 @@ AMDGPU.allowscalar(false)
 include("../test_utils.jl")
 include("test_utils.jl")
 
+@testset "get_devices" begin
+  include("get_devices.jl")
+end
+
 @testset "Basic" begin
     include("basic.jl")
 end