From 5d4f2a24ea42a973f4b0233cce427875790c3275 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Sat, 22 Jul 2023 17:30:32 +0530 Subject: [PATCH 01/22] Adding structs for cpu and gpu devices. --- src/functor.jl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/functor.jl b/src/functor.jl index 0c254bb1e1..4cb924964d 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -153,6 +153,27 @@ ChainRulesCore.rrule(::typeof(adapt), a::FluxCPUAdaptor, x::AbstractArray) = # CPU/GPU movement conveniences +abstract type AbstractDevice <: Function end + +struct FluxCPUDevice <: AbstractDevice end + +Base.@kwdef struct FluxCUDADevice <: AbstractDevice + name::String = "CUDA" +end + +Base.@kwdef struct FluxAMDDevice <: AbstractDevice + name::String = "AMD" +end + +Base.@kwdef struct FluxMetalDevice <: AbstractDevice + name::String = "Metal" +end + +(::FluxCPUDevice)(x) = cpu(x) +(::FluxCUDADevice)(x) = gpu(FluxCUDAAdaptor(), x) +(::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x) +(::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x) + """ cpu(m) From 70044fb53dfacd67add517cefe03455e8904ddf2 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Sat, 22 Jul 2023 20:47:54 +0530 Subject: [PATCH 02/22] Adding implementation of `Flux.get_device()`, which returns the most appropriate GPU backend (or CPU, if nothing is available). --- src/Flux.jl | 1 + src/functor.jl | 99 +++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 79 insertions(+), 21 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index d522b91e78..633867aabb 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -5,6 +5,7 @@ using Preferences using LinearAlgebra, Statistics, Random # standard lib using MacroTools, Reexport, ProgressLogging, SpecialFunctions using MacroTools: @forward +import Base: PkgId, UUID @reexport using NNlib using MLUtils diff --git a/src/functor.jl b/src/functor.jl index 4cb924964d..529820bedf 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -153,27 +153,6 @@ ChainRulesCore.rrule(::typeof(adapt), a::FluxCPUAdaptor, x::AbstractArray) = # CPU/GPU movement conveniences -abstract type AbstractDevice <: Function end - -struct FluxCPUDevice <: AbstractDevice end - -Base.@kwdef struct FluxCUDADevice <: AbstractDevice - name::String = "CUDA" -end - -Base.@kwdef struct FluxAMDDevice <: AbstractDevice - name::String = "AMD" -end - -Base.@kwdef struct FluxMetalDevice <: AbstractDevice - name::String = "Metal" -end - -(::FluxCPUDevice)(x) = cpu(x) -(::FluxCUDADevice)(x) = gpu(FluxCUDAAdaptor(), x) -(::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x) -(::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x) - """ cpu(m) @@ -465,3 +444,81 @@ function gpu(d::MLUtils.DataLoader) d.rng, ) end + +abstract type AbstractDevice <: Function end + +Base.@kwdef struct FluxCPUDevice <: AbstractDevice + name::String = "CPU" +end + +Base.@kwdef struct FluxCUDADevice <: AbstractDevice + name::String = "CUDA" + pkgid::PkgId = PkgId(UUID("052768ef-5323-5732-b1bb-66c8b64840ba"), "CUDA") +end + +Base.@kwdef struct FluxAMDDevice <: AbstractDevice + name::String = "AMD" + pkgid::PkgId = PkgId(UUID("21141c5a-9bdb-4563-92ae-f87d6854732e"), "AMDGPU") +end + +Base.@kwdef struct FluxMetalDevice <: AbstractDevice + name::String = "Metal" + pkgid::PkgId = PkgId(UUID("dde4c033-4e86-420c-a63e-0dd931031962"), "Metal") +end + +(::FluxCPUDevice)(x) = cpu(x) +(::FluxCUDADevice)(x) = gpu(FluxCUDAAdaptor(), x) +(::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x) +(::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x) + +function _get_device_name(t::T) where {T <: AbstractDevice} + return hasfield(T, :name) ? t.name : "" +end + +const DEVICES = (FluxCUDADevice(), FluxAMDDevice(), FluxMetalDevice(), FluxCPUDevice()) +supported_devices() = map(_get_device_name, DEVICES) + +function get_device()::AbstractDevice + backend = @load_preference("gpu_backend", nothing) + if backend !== nothing + allowed_backends = supported_devices() + idx = findfirst(isequal(backend), allowed_backends) + if backend ∉ allowed_backends + @warn """ + `gpu_backend` preference is set to $backend, which is not allowed. + Defaulting to automatic device selection. + """ maxlog=1 + else + @info "Using backend set in preferences: $backend." + device = DEVICES[idx] + + if _get_device_name(device) !== "CPU" && !haskey(Base.loaded_modules, device.pkgid) + @warn """ + Trying to use backend $(_get_device_name(device)) but package $(device.pkgid) is not loaded. + Please load the package and call this function again to respect the preferences backend. + """ maxlog=1 + else + if _get_device_name(device) == "CPU" || getproperty(Base.loaded_modules[device.pkgid], :functional)() + @info "Using backend: $(_get_device_name(device))" + return device + else + @warn "Backend: $(_get_device_name(device)) from the set preferences is not functional. Defaulting to autmatic device selection." maxlog=1 + end + end + end + end + + @info "Running automatic device selection..." + for device in DEVICES + if _get_device_name(device) == "CPU" || haskey(Base.loaded_modules, device.pkgid) + @debug "Trying backend: $(_get_device_name(device))." + if _get_device_name(device) == "CPU" || getproperty(Base.loaded_modules[device.pkgid], :functional)() + @debug "Using backend: $(_get_device_name(device))." + return device + end + @debug "Backend: $(_get_device_name(device)) is not functional." + else + @debug "Trigger package for backend ($(_get_device_name(device))): $((device.pkgid)) not loaded." + end + end +end From 0dc56296c51cae28c1a89062387c14dc3380bd6e Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Sun, 23 Jul 2023 11:44:06 +0530 Subject: [PATCH 03/22] Adding docstrings for the new device types, and the `get_device` function. --- src/functor.jl | 103 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/src/functor.jl b/src/functor.jl index 529820bedf..0e3884c383 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -445,22 +445,47 @@ function gpu(d::MLUtils.DataLoader) ) end +""" + Flux.AbstractDevice <: Function + +An abstract type representing `device` objects for different GPU backends. The currently supported backends are `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"`; the `"CPU"` backend is the fallback case when no GPU is available. +""" abstract type AbstractDevice <: Function end +""" + Flux.FluxCPUDevice <: Flux.AbstractDevice + +A type representing `device` objects for the `"CPU"` backend for Flux. This is the fallback case when no GPU is available to Flux. +""" Base.@kwdef struct FluxCPUDevice <: AbstractDevice name::String = "CPU" end +""" + Flux.FluxCUDADevice <: Flux.AbstractDevice + +A type representing `device` objects for the `"CUDA"` backend for Flux. +""" Base.@kwdef struct FluxCUDADevice <: AbstractDevice name::String = "CUDA" pkgid::PkgId = PkgId(UUID("052768ef-5323-5732-b1bb-66c8b64840ba"), "CUDA") end +""" + Flux.FluxAMDDevice <: Flux.AbstractDevice + +A type representing `device` objects for the `"AMD"` backend for Flux. +""" Base.@kwdef struct FluxAMDDevice <: AbstractDevice name::String = "AMD" pkgid::PkgId = PkgId(UUID("21141c5a-9bdb-4563-92ae-f87d6854732e"), "AMDGPU") end +""" + Flux.FluxMetalDevice <: Flux.AbstractDevice + +A type representing `device` objects for the `"Metal"` backend for Flux. +""" Base.@kwdef struct FluxMetalDevice <: AbstractDevice name::String = "Metal" pkgid::PkgId = PkgId(UUID("dde4c033-4e86-420c-a63e-0dd931031962"), "Metal") @@ -475,9 +500,87 @@ function _get_device_name(t::T) where {T <: AbstractDevice} return hasfield(T, :name) ? t.name : "" end +# below order is important const DEVICES = (FluxCUDADevice(), FluxAMDDevice(), FluxMetalDevice(), FluxCPUDevice()) + +""" + Flux.supported_devices() + +Get all supported backends for Flux, in order of preference. + +# Example + +```jldoctest +julia> using Flux; + +julia> Flux.supported_devices() +("CUDA", "AMD", "Metal", "CPU") +``` +""" supported_devices() = map(_get_device_name, DEVICES) +""" + Flux.get_device()::AbstractDevice + +Returns a `device` object for the most appropriate backend for the current Julia session. + +First, the function checks whether a backend preference has been set via the `gpu_backend!` function. If so, then an attempt is made to load this backend. If the corresponding trigger package has been loaded and the backend is functional, a `device` corresponding to the given backend is loaded. Otherwise, an appropriate backend is chosen. + +If there is no preference, then each of the `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"` backends in the given order, this function checks whether the given backend has been loaded via the corresponding trigger package, and whether the backend is functional. If so, the `device` corresponding to the backend is returned. If no GPU backend is available, a `Flux.FluxCPUDevice` is returned. + +# Examples +For the example given below, the backend preference was set to `"AMD"` via the [`gpu_backend!`](@ref) function. + +```jldoctest +julia> using Flux; + +julia> model = Dense(2 => 3) +Dense(2 => 3) # 9 parameters + +julia> device = Flux.get_device() # this will just load the CPU device +[ Info: Using backend set in preferences: AMD. +┌ Warning: Trying to use backend AMD but package AMDGPU [21141c5a-9bdb-4563-92ae-f87d6854732e] is not loaded. +│ Please load the package and call this function again to respect the preferences backend. +└ @ Flux ~/fluxml/Flux.jl/src/functor.jl:496 +[ Info: Running automatic device selection... +(::Flux.FluxCPUDevice) (generic function with 1 method) + +julia> model = model |> device +Dense(2 => 3) # 9 parameters + +julia> model.weight +3×2 Matrix{Float32}: + -0.304362 -0.700477 + -0.861201 0.67825 + -0.176017 0.234188 +``` + +Here is the same example, but using `"CUDA"`: + +```jldoctest +julia> using Flux, CUDA; + +julia> model = Dense(2 => 3) +Dense(2 => 3) # 9 parameters + +julia> device = Flux.get_device() +[ Info: Using backend set in preferences: AMD. +┌ Warning: Trying to use backend AMD but package AMDGPU [21141c5a-9bdb-4563-92ae-f87d6854732e] is not loaded. +│ Please load the package and call this function again to respect the preferences backend. +└ @ Flux ~/fluxml/Flux.jl/src/functor.jl:496 +[ Info: Running automatic device selection... +(::Flux.FluxCUDADevice) (generic function with 1 method) + +julia> model = model |> device +Dense(2 => 3) # 9 parameters + +julia> model.weight +3×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}: + 0.820013 0.527131 + -0.915589 0.549048 + 0.290744 -0.0592499 +``` +""" function get_device()::AbstractDevice backend = @load_preference("gpu_backend", nothing) if backend !== nothing From a3f925732db479643e06376319b1f7be0308d343 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Sun, 23 Jul 2023 12:59:19 +0530 Subject: [PATCH 04/22] Adding `CPU` to the list of supported backends. Made corresponding changes in `gpu(x)`. Adding more details in docstring of `get_device`. --- src/functor.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/functor.jl b/src/functor.jl index 0e3884c383..dc67347ffa 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -187,7 +187,7 @@ _isbitsarray(x) = false _isleaf(::AbstractRNG) = true _isleaf(x) = _isbitsarray(x) || Functors.isleaf(x) -const GPU_BACKENDS = ("CUDA", "AMD", "Metal") +const GPU_BACKENDS = ("CUDA", "AMD", "Metal", "CPU") const GPU_BACKEND = @load_preference("gpu_backend", "CUDA") function gpu_backend!(backend::String) @@ -249,6 +249,8 @@ function gpu(x) gpu(FluxAMDAdaptor(), x) elseif GPU_BACKEND == "Metal" gpu(FluxMetalAdaptor(), x) + elseif GPU_BACKEND == "CPU" + cpu(x) else error(""" Unsupported GPU backend: $GPU_BACKEND. @@ -524,9 +526,9 @@ supported_devices() = map(_get_device_name, DEVICES) Returns a `device` object for the most appropriate backend for the current Julia session. -First, the function checks whether a backend preference has been set via the `gpu_backend!` function. If so, then an attempt is made to load this backend. If the corresponding trigger package has been loaded and the backend is functional, a `device` corresponding to the given backend is loaded. Otherwise, an appropriate backend is chosen. +First, the function checks whether a backend preference has been set via the `gpu_backend!` function. If so, an attempt is made to load this backend. If the corresponding trigger package has been loaded and the backend is functional, a `device` corresponding to the given backend is loaded. Otherwise, the backend is chosen automatically. To update the backend preference, use [gpu_backend!](@ref). -If there is no preference, then each of the `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"` backends in the given order, this function checks whether the given backend has been loaded via the corresponding trigger package, and whether the backend is functional. If so, the `device` corresponding to the backend is returned. If no GPU backend is available, a `Flux.FluxCPUDevice` is returned. +If there is no preference, then for each of the `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"` backends in the given order, this function checks whether the given backend has been loaded via the corresponding trigger package, and whether the backend is functional. If so, the `device` corresponding to the backend is returned. If no GPU backend is available, a `Flux.FluxCPUDevice` is returned. # Examples For the example given below, the backend preference was set to `"AMD"` via the [`gpu_backend!`](@ref) function. From 18938dee4c827af506d82fd23c3eb8eca49d89d4 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Sun, 23 Jul 2023 23:05:24 +0530 Subject: [PATCH 05/22] Using `julia-repl` instead of `jldoctest`, and `@info` instead of `@warn`. --- src/functor.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/functor.jl b/src/functor.jl index dc67347ffa..84099210c5 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -533,7 +533,7 @@ If there is no preference, then for each of the `"CUDA"`, `"AMD"`, `"Metal"` and # Examples For the example given below, the backend preference was set to `"AMD"` via the [`gpu_backend!`](@ref) function. -```jldoctest +```julia-repl julia> using Flux; julia> model = Dense(2 => 3) @@ -559,7 +559,7 @@ julia> model.weight Here is the same example, but using `"CUDA"`: -```jldoctest +```julia-repl julia> using Flux, CUDA; julia> model = Dense(2 => 3) @@ -616,14 +616,14 @@ function get_device()::AbstractDevice @info "Running automatic device selection..." for device in DEVICES if _get_device_name(device) == "CPU" || haskey(Base.loaded_modules, device.pkgid) - @debug "Trying backend: $(_get_device_name(device))." + @info "Trying backend: $(_get_device_name(device))." if _get_device_name(device) == "CPU" || getproperty(Base.loaded_modules[device.pkgid], :functional)() @debug "Using backend: $(_get_device_name(device))." return device end - @debug "Backend: $(_get_device_name(device)) is not functional." + @info "Backend: $(_get_device_name(device)) is not functional." else - @debug "Trigger package for backend ($(_get_device_name(device))): $((device.pkgid)) not loaded." + @info "Trigger package for backend ($(_get_device_name(device))): $((device.pkgid)) not loaded." end end end From bf134ad79cd4309531e8ea89693fef755b8f3d56 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Mon, 24 Jul 2023 01:46:49 +0530 Subject: [PATCH 06/22] Adding `DataLoader` functionality to device objects. --- src/functor.jl | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/functor.jl b/src/functor.jl index 84099210c5..a6dfeb7506 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -498,6 +498,25 @@ end (::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x) (::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x) +# Applying device to DataLoader +function _apply_to_dataloader(device::T, d::MLUtils.DataLoader) where {T <: AbstractDevice} + MLUtils.DataLoader(MLUtils.mapobs(device, d.data), + d.batchsize, + d.buffer, + d.partial, + d.shuffle, + d.parallel, + d.collate, + d.rng, + ) +end + +(device::FluxCPUDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) +(device::FluxCUDADevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) +(device::FluxAMDDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) +(device::FluxMetalDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) + + function _get_device_name(t::T) where {T <: AbstractDevice} return hasfield(T, :name) ? t.name : "" end From f8fc22c8485038668254f0b4acb46ee65ead4274 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Tue, 25 Jul 2023 14:12:43 +0530 Subject: [PATCH 07/22] Removing pkgids and defining new functions to check whether backend is available and functional. --- ext/FluxAMDGPUExt/FluxAMDGPUExt.jl | 3 +++ ext/FluxCUDAExt/FluxCUDAExt.jl | 3 +++ ext/FluxMetalExt/FluxMetalExt.jl | 3 +++ src/functor.jl | 24 +++++++++++++----------- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl index 0fbd8a04a4..b3daac55d6 100644 --- a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl +++ b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl @@ -17,6 +17,9 @@ const MIOPENFloat = AMDGPU.MIOpen.MIOPENFloat # Set to boolean on the first call to check_use_amdgpu const USE_AMDGPU = Ref{Union{Nothing, Bool}}(nothing) +Flux.isavailable(device::Flux.FluxAMDGPUDevice) = true +Flux.isfunctional(device::Flux.FluxAMDGPUDevice) = AMDGPU.functional() + function check_use_amdgpu() if !isnothing(USE_AMDGPU[]) return diff --git a/ext/FluxCUDAExt/FluxCUDAExt.jl b/ext/FluxCUDAExt/FluxCUDAExt.jl index 9f0dae1aa9..a9317b4f71 100644 --- a/ext/FluxCUDAExt/FluxCUDAExt.jl +++ b/ext/FluxCUDAExt/FluxCUDAExt.jl @@ -14,6 +14,9 @@ import Adapt: adapt_storage const USE_CUDA = Ref{Union{Nothing, Bool}}(nothing) +Flux.isavailable(device::Flux.FluxCUDADevice) = true +Flux.isfunctional(device::Flux.FluxCUDADevice) = CUDA.functional() + function check_use_cuda() if !isnothing(USE_CUDA[]) return diff --git a/ext/FluxMetalExt/FluxMetalExt.jl b/ext/FluxMetalExt/FluxMetalExt.jl index 27316c3b16..c4f7b51ceb 100644 --- a/ext/FluxMetalExt/FluxMetalExt.jl +++ b/ext/FluxMetalExt/FluxMetalExt.jl @@ -12,6 +12,9 @@ using Zygote const USE_METAL = Ref{Union{Nothing, Bool}}(nothing) +Flux.isavailable(device::Flux.FluxMetalDevice) = true +Flux.isfunctional(devuce::Flux.FluxMetalDevice) = Metal.functional() + function check_use_metal() isnothing(USE_METAL[]) || return diff --git a/src/functor.jl b/src/functor.jl index a6dfeb7506..4e7370a23e 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -470,7 +470,6 @@ A type representing `device` objects for the `"CUDA"` backend for Flux. """ Base.@kwdef struct FluxCUDADevice <: AbstractDevice name::String = "CUDA" - pkgid::PkgId = PkgId(UUID("052768ef-5323-5732-b1bb-66c8b64840ba"), "CUDA") end """ @@ -480,7 +479,6 @@ A type representing `device` objects for the `"AMD"` backend for Flux. """ Base.@kwdef struct FluxAMDDevice <: AbstractDevice name::String = "AMD" - pkgid::PkgId = PkgId(UUID("21141c5a-9bdb-4563-92ae-f87d6854732e"), "AMDGPU") end """ @@ -490,7 +488,6 @@ A type representing `device` objects for the `"Metal"` backend for Flux. """ Base.@kwdef struct FluxMetalDevice <: AbstractDevice name::String = "Metal" - pkgid::PkgId = PkgId(UUID("dde4c033-4e86-420c-a63e-0dd931031962"), "Metal") end (::FluxCPUDevice)(x) = cpu(x) @@ -515,12 +512,17 @@ end (device::FluxCUDADevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) (device::FluxAMDDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) (device::FluxMetalDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) - - function _get_device_name(t::T) where {T <: AbstractDevice} return hasfield(T, :name) ? t.name : "" end +## check device availability; more definitions in corresponding extensions +isavailable(device::AbstractDevice) = false +isfunctional(device::AbstractDevice) = false + +isavailable(device::FluxCPUDevice) = true +isfunctional(device::FluxCPUDevice) = true + # below order is important const DEVICES = (FluxCUDADevice(), FluxAMDDevice(), FluxMetalDevice(), FluxCPUDevice()) @@ -616,13 +618,13 @@ function get_device()::AbstractDevice @info "Using backend set in preferences: $backend." device = DEVICES[idx] - if _get_device_name(device) !== "CPU" && !haskey(Base.loaded_modules, device.pkgid) + if !isavailable(device) @warn """ - Trying to use backend $(_get_device_name(device)) but package $(device.pkgid) is not loaded. + Trying to use backend $(_get_device_name(device)) but it's trigger package is not loaded. Please load the package and call this function again to respect the preferences backend. """ maxlog=1 else - if _get_device_name(device) == "CPU" || getproperty(Base.loaded_modules[device.pkgid], :functional)() + if isfunctional(device) @info "Using backend: $(_get_device_name(device))" return device else @@ -634,15 +636,15 @@ function get_device()::AbstractDevice @info "Running automatic device selection..." for device in DEVICES - if _get_device_name(device) == "CPU" || haskey(Base.loaded_modules, device.pkgid) + if isavailable(device) @info "Trying backend: $(_get_device_name(device))." - if _get_device_name(device) == "CPU" || getproperty(Base.loaded_modules[device.pkgid], :functional)() + if isfunctional(device) @debug "Using backend: $(_get_device_name(device))." return device end @info "Backend: $(_get_device_name(device)) is not functional." else - @info "Trigger package for backend ($(_get_device_name(device))): $((device.pkgid)) not loaded." + @info "Trigger package for backend ($(_get_device_name(device))) is not loaded." end end end From 3cd1d890246ebf363ddc500aded01c9f4f599f5f Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Tue, 25 Jul 2023 14:55:57 +0530 Subject: [PATCH 08/22] Correcting typographical errors, and removing useless imports. --- ext/FluxAMDGPUExt/FluxAMDGPUExt.jl | 4 ++-- ext/FluxMetalExt/FluxMetalExt.jl | 2 +- src/Flux.jl | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl index b3daac55d6..e767ff4602 100644 --- a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl +++ b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl @@ -17,8 +17,8 @@ const MIOPENFloat = AMDGPU.MIOpen.MIOPENFloat # Set to boolean on the first call to check_use_amdgpu const USE_AMDGPU = Ref{Union{Nothing, Bool}}(nothing) -Flux.isavailable(device::Flux.FluxAMDGPUDevice) = true -Flux.isfunctional(device::Flux.FluxAMDGPUDevice) = AMDGPU.functional() +Flux.isavailable(device::Flux.FluxAMDDevice) = true +Flux.isfunctional(device::Flux.FluxAMDDevice) = AMDGPU.functional() function check_use_amdgpu() if !isnothing(USE_AMDGPU[]) diff --git a/ext/FluxMetalExt/FluxMetalExt.jl b/ext/FluxMetalExt/FluxMetalExt.jl index c4f7b51ceb..a1abb9dd71 100644 --- a/ext/FluxMetalExt/FluxMetalExt.jl +++ b/ext/FluxMetalExt/FluxMetalExt.jl @@ -13,7 +13,7 @@ using Zygote const USE_METAL = Ref{Union{Nothing, Bool}}(nothing) Flux.isavailable(device::Flux.FluxMetalDevice) = true -Flux.isfunctional(devuce::Flux.FluxMetalDevice) = Metal.functional() +Flux.isfunctional(device::Flux.FluxMetalDevice) = Metal.functional() function check_use_metal() isnothing(USE_METAL[]) || return diff --git a/src/Flux.jl b/src/Flux.jl index 633867aabb..d522b91e78 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -5,7 +5,6 @@ using Preferences using LinearAlgebra, Statistics, Random # standard lib using MacroTools, Reexport, ProgressLogging, SpecialFunctions using MacroTools: @forward -import Base: PkgId, UUID @reexport using NNlib using MLUtils From f7f21e19c49f5003a0106a9dffabf57671380f09 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Thu, 27 Jul 2023 12:50:46 +0530 Subject: [PATCH 09/22] Adding `deviceID` to each device struct, and moving struct definitions to package extensions. --- ext/FluxAMDGPUExt/FluxAMDGPUExt.jl | 16 +++- ext/FluxCUDAExt/FluxCUDAExt.jl | 18 ++++- ext/FluxMetalExt/FluxMetalExt.jl | 16 +++- src/functor.jl | 118 ++++++++++++----------------- 4 files changed, 93 insertions(+), 75 deletions(-) diff --git a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl index e767ff4602..991f3a21dd 100644 --- a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl +++ b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl @@ -17,8 +17,19 @@ const MIOPENFloat = AMDGPU.MIOpen.MIOPENFloat # Set to boolean on the first call to check_use_amdgpu const USE_AMDGPU = Ref{Union{Nothing, Bool}}(nothing) -Flux.isavailable(device::Flux.FluxAMDDevice) = true -Flux.isfunctional(device::Flux.FluxAMDDevice) = AMDGPU.functional() +""" + FluxAMDGPUExt.FluxAMDDevice <: Flux.AbstractDevice + +A type representing `device` objects for the `"AMD"` backend for Flux. +""" +Base.@kwdef struct FluxAMDDevice <: Flux.AbstractDevice + deviceID::AMDGPU.HIPDevice +end + +(::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x) +Flux.isavailable(::FluxAMDDevice) = true +Flux.isfunctional(::FluxAMDDevice) = AMDGPU.functional() +Flux._get_device_name(::FluxAMDDevice) = "AMD" function check_use_amdgpu() if !isnothing(USE_AMDGPU[]) @@ -47,6 +58,7 @@ include("conv.jl") function __init__() Flux.AMDGPU_LOADED[] = true + Flux.DEVICES[Flux.GPU_BACKEND_ORDER["AMD"]] = FluxAMDDevice(AMDGPU.device()) end # TODO diff --git a/ext/FluxCUDAExt/FluxCUDAExt.jl b/ext/FluxCUDAExt/FluxCUDAExt.jl index a9317b4f71..eda667d1fc 100644 --- a/ext/FluxCUDAExt/FluxCUDAExt.jl +++ b/ext/FluxCUDAExt/FluxCUDAExt.jl @@ -14,8 +14,19 @@ import Adapt: adapt_storage const USE_CUDA = Ref{Union{Nothing, Bool}}(nothing) -Flux.isavailable(device::Flux.FluxCUDADevice) = true -Flux.isfunctional(device::Flux.FluxCUDADevice) = CUDA.functional() +""" + FluxCUDAExt.FluxCUDADevice <: Flux.AbstractDevice + +A type representing `device` objects for the `"CUDA"` backend for Flux. +""" +Base.@kwdef struct FluxCUDADevice <: Flux.AbstractDevice + deviceID::CUDA.CuDevice +end + +(::FluxCUDADevice)(x) = gpu(FluxCUDAAdaptor(), x) +Flux.isavailable(::FluxCUDADevice) = true +Flux.isfunctional(::FluxCUDADevice) = CUDA.functional() +Flux._get_device_name(::FluxCUDADevice) = "CUDA" function check_use_cuda() if !isnothing(USE_CUDA[]) @@ -39,6 +50,9 @@ include("functor.jl") function __init__() Flux.CUDA_LOADED[] = true + ## add device to available devices + Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]] = FluxCUDADevice(CUDA.device()) + try Base.require(Main, :cuDNN) catch diff --git a/ext/FluxMetalExt/FluxMetalExt.jl b/ext/FluxMetalExt/FluxMetalExt.jl index a1abb9dd71..bd85e35fba 100644 --- a/ext/FluxMetalExt/FluxMetalExt.jl +++ b/ext/FluxMetalExt/FluxMetalExt.jl @@ -12,8 +12,19 @@ using Zygote const USE_METAL = Ref{Union{Nothing, Bool}}(nothing) -Flux.isavailable(device::Flux.FluxMetalDevice) = true -Flux.isfunctional(device::Flux.FluxMetalDevice) = Metal.functional() +""" + FluxMetalExt.FluxMetalDevice <: Flux.AbstractDevice + +A type representing `device` objects for the `"Metal"` backend for Flux. +""" +Base.@kwdef struct FluxMetalDevice <: Flux.AbstractDevice + deviceID::MTLDevice +end + +(::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x) +Flux.isavailable(::FluxMetalDevice) = true +Flux.isfunctional(::FluxMetalDevice) = Metal.functional() +Flux._get_device_name(::FluxMetalDevice) = "Metal" function check_use_metal() isnothing(USE_METAL[]) || return @@ -33,6 +44,7 @@ include("functor.jl") function __init__() Flux.METAL_LOADED[] = true + Flux.DEVICES[Flux.GPU_BACKEND_ORDER["Meta"]] = FluxMetalDevice(Metal.current_device()) end end diff --git a/src/functor.jl b/src/functor.jl index 4e7370a23e..c62fcb9367 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -187,7 +187,16 @@ _isbitsarray(x) = false _isleaf(::AbstractRNG) = true _isleaf(x) = _isbitsarray(x) || Functors.isleaf(x) -const GPU_BACKENDS = ("CUDA", "AMD", "Metal", "CPU") +const GPU_BACKEND_ORDER = sort( + Dict( + "CUDA" => 1, + "AMD" => 2, + "Metal" => 3, + "CPU" => 4, + ), + byvalue = true +) +const GPU_BACKENDS = tuple(collect(keys(GPU_BACKEND_ORDER))...) const GPU_BACKEND = @load_preference("gpu_backend", "CUDA") function gpu_backend!(backend::String) @@ -447,56 +456,16 @@ function gpu(d::MLUtils.DataLoader) ) end +# Defining device interfaces. """ Flux.AbstractDevice <: Function -An abstract type representing `device` objects for different GPU backends. The currently supported backends are `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"`; the `"CPU"` backend is the fallback case when no GPU is available. -""" -abstract type AbstractDevice <: Function end - -""" - Flux.FluxCPUDevice <: Flux.AbstractDevice - -A type representing `device` objects for the `"CPU"` backend for Flux. This is the fallback case when no GPU is available to Flux. -""" -Base.@kwdef struct FluxCPUDevice <: AbstractDevice - name::String = "CPU" -end - -""" - Flux.FluxCUDADevice <: Flux.AbstractDevice - -A type representing `device` objects for the `"CUDA"` backend for Flux. -""" -Base.@kwdef struct FluxCUDADevice <: AbstractDevice - name::String = "CUDA" -end - -""" - Flux.FluxAMDDevice <: Flux.AbstractDevice - -A type representing `device` objects for the `"AMD"` backend for Flux. -""" -Base.@kwdef struct FluxAMDDevice <: AbstractDevice - name::String = "AMD" -end - -""" - Flux.FluxMetalDevice <: Flux.AbstractDevice +An abstract type representing `device` objects for different GPU backends. The currently supported backends are `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"`; the `"CPU"` backend is the fallback case when no GPU is available. GPU extensions of Flux define subtypes of this type. -A type representing `device` objects for the `"Metal"` backend for Flux. """ -Base.@kwdef struct FluxMetalDevice <: AbstractDevice - name::String = "Metal" -end - -(::FluxCPUDevice)(x) = cpu(x) -(::FluxCUDADevice)(x) = gpu(FluxCUDAAdaptor(), x) -(::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x) -(::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x) +abstract type AbstractDevice <: Function end -# Applying device to DataLoader -function _apply_to_dataloader(device::T, d::MLUtils.DataLoader) where {T <: AbstractDevice} +function (device::AbstractDevice)(d::MLUtils.DataLoader) MLUtils.DataLoader(MLUtils.mapobs(device, d.data), d.batchsize, d.buffer, @@ -508,23 +477,32 @@ function _apply_to_dataloader(device::T, d::MLUtils.DataLoader) where {T <: Abst ) end -(device::FluxCPUDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) -(device::FluxCUDADevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) -(device::FluxAMDDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) -(device::FluxMetalDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d) -function _get_device_name(t::T) where {T <: AbstractDevice} - return hasfield(T, :name) ? t.name : "" -end +function _get_device_name(::T)::String where {T <: AbstractDevice} end ## check device availability; more definitions in corresponding extensions -isavailable(device::AbstractDevice) = false -isfunctional(device::AbstractDevice) = false +isavailable(::Nothing) = false +isfunctional(::Nothing) = false + +isavailable(::AbstractDevice) = false +isfunctional(::AbstractDevice) = false -isavailable(device::FluxCPUDevice) = true -isfunctional(device::FluxCPUDevice) = true +""" + Flux.FluxCPUDevice <: Flux.AbstractDevice -# below order is important -const DEVICES = (FluxCUDADevice(), FluxAMDDevice(), FluxMetalDevice(), FluxCPUDevice()) +A type representing `device` objects for the `"CPU"` backend for Flux. This is the fallback case when no GPU is available to Flux. +""" +Base.@kwdef struct FluxCPUDevice <: AbstractDevice end + +(::FluxCPUDevice)(x) = cpu(x) +isavailable(::FluxCPUDevice) = true +isfunctional(::FluxCPUDevice) = true +_get_device_name(::FluxCPUDevice) = "CPU" + +## device list. order is important +const DEVICES = Ref{Vector{Union{Nothing, AbstractDevice}}}(Vector{Union{Nothing, AbstractDevice}}(nothing, length(GPU_BACKENDS))) +DEVICES[][GPU_BACKEND_ORDER["CPU"]] = FluxCPUDevice() + +## get device """ Flux.supported_devices() @@ -540,7 +518,7 @@ julia> Flux.supported_devices() ("CUDA", "AMD", "Metal", "CPU") ``` """ -supported_devices() = map(_get_device_name, DEVICES) +supported_devices() = GPU_BACKENDS """ Flux.get_device()::AbstractDevice @@ -606,6 +584,7 @@ julia> model.weight """ function get_device()::AbstractDevice backend = @load_preference("gpu_backend", nothing) + if backend !== nothing allowed_backends = supported_devices() idx = findfirst(isequal(backend), allowed_backends) @@ -616,35 +595,36 @@ function get_device()::AbstractDevice """ maxlog=1 else @info "Using backend set in preferences: $backend." - device = DEVICES[idx] + device = DEVICES[][idx] if !isavailable(device) @warn """ - Trying to use backend $(_get_device_name(device)) but it's trigger package is not loaded. + Trying to use backend: $backend but it's trigger package is not loaded. Please load the package and call this function again to respect the preferences backend. - """ maxlog=1 + """ else if isfunctional(device) - @info "Using backend: $(_get_device_name(device))" + @info "Using backend: $backend" return device else - @warn "Backend: $(_get_device_name(device)) from the set preferences is not functional. Defaulting to autmatic device selection." maxlog=1 + @warn "Backend: $backend from the set preferences is not functional. Defaulting to autmatic device selection." end end end end @info "Running automatic device selection..." - for device in DEVICES + for backend in GPU_BACKENDS + device = DEVICES[][GPU_BACKEND_ORDER[backend]] if isavailable(device) - @info "Trying backend: $(_get_device_name(device))." + @info "Trying backend: $backend." if isfunctional(device) - @debug "Using backend: $(_get_device_name(device))." + @info "Using backend: $backend." return device end - @info "Backend: $(_get_device_name(device)) is not functional." + @info "Backend: $backend is not functional." else - @info "Trigger package for backend ($(_get_device_name(device))) is not loaded." + @info "Trigger package for backend: $backend is not loaded." end end end From d22aaf59a2bcc81fd5d67264141354d3e23dcd9d Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Mon, 31 Jul 2023 03:45:38 +0530 Subject: [PATCH 10/22] Adding tutorial for using device objects in manual. --- docs/src/gpu.md | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/docs/src/gpu.md b/docs/src/gpu.md index 659b586880..5677d545a8 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -231,3 +231,71 @@ $ export CUDA_VISIBLE_DEVICES='0,1' More information for conditional use of GPUs in CUDA.jl can be found in its [documentation](https://cuda.juliagpu.org/stable/installation/conditional/#Conditional-use), and information about the specific use of the variable is described in the [Nvidia CUDA blog post](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/). +## Using device objects + +As a more convenient syntax, Flux allows the usage of GPU `device` objects which can be used to easily transfer models to GPUs (and defaulting to using the CPU if no GPU backend is available). This syntax has a few advantages including automatic selection of the GPU backend and type stability of data movement. To do this, the [`Flux.get_device`](@ref) function can be used. + +`Flux.get_device` first checks for a GPU preference, and if possible returns a device for the preference backend. For instance, consider the following example, where we load the [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) package to use an NVIDIA GPU (`"CUDA"` is the default preference): + +```julia-repl +julia> using Flux, CUDA; + +julia> device = Flux.get_device() # returns handle to an NVIDIA GPU +[ Info: Using backend set in preferences: CUDA. +[ Info: Using backend: CUDA +(::FluxCUDAExt.FluxCUDADevice) (generic function with 1 method) + +julia> device.deviceID # check the id of the GPU +CuDevice(0): NVIDIA GeForce GTX 1650 + +julia> model = Dense(2 => 3); + +julia> model.weight # the model initially lives in CPU memory +3×2 Matrix{Float32}: + -0.984794 -0.904345 + 0.720379 -0.486398 + 0.851011 -0.586942 + +julia> model = model |> device # transfer model to the GPU +Dense(2 => 3) # 9 parameters + +julia> model.weight +3×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}: + -0.984794 -0.904345 + 0.720379 -0.486398 + 0.851011 -0.586942 + +``` + +The device preference can also be set via the [`gpu_backend!`](@ref) function. For instance, below we first set our device preference to `"CPU"`: + +```julia-repl +julia> using Flux; Flux.gpu_backend!("CPU") +┌ Info: New GPU backend set: CPU. +└ Restart your Julia session for this change to take effect! +``` + +Then, after restarting the Julia session, `Flux.get_device` returns a handle to the `"CPU"`: + +```julia-repl +julia> using Flux, CUDA; # even if CUDA is loaded, we'll still get a CPU device + +julia> device = Flux.get_device() # get a CPU device +[ Info: Using backend set in preferences: CPU. +[ Info: Using backend: CPU +(::Flux.FluxCPUDevice) (generic function with 1 method) + +julia> model = Dense(2 => 3); + +julia> model = model |> device +Dense(2 => 3) # 9 parameters + +julia> model.weight # no change; model still lives on CPU +3×2 Matrix{Float32}: + -0.942968 0.856258 + 0.440009 0.714106 + -0.419192 -0.471838 +``` +Clearly, this means that the same code will work for any GPU backend and the CPU. + +If the preference backend isn't available or isn't functional, then [`Flux.get_device`](@ref) looks for a CUDA, AMD or Metal backend, and returns a corresponding device (if the backend is available and functional). Otherwise, a CPU device is returned. For detailed information about how the backend is selected, check the documentation for [`Flux.get_device`](@ref). From 03faa963e84c7492381d29aefb6c6878b30b8beb Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Mon, 31 Jul 2023 12:42:30 +0530 Subject: [PATCH 11/22] Adding docstring for `get_device` in manual, and renaming internal functions. --- docs/src/gpu.md | 4 ++++ ext/FluxAMDGPUExt/FluxAMDGPUExt.jl | 4 ++-- ext/FluxCUDAExt/FluxCUDAExt.jl | 4 ++-- ext/FluxMetalExt/FluxMetalExt.jl | 4 ++-- src/functor.jl | 20 ++++++++++---------- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/src/gpu.md b/docs/src/gpu.md index 5677d545a8..8b344166ad 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -299,3 +299,7 @@ julia> model.weight # no change; model still lives on CPU Clearly, this means that the same code will work for any GPU backend and the CPU. If the preference backend isn't available or isn't functional, then [`Flux.get_device`](@ref) looks for a CUDA, AMD or Metal backend, and returns a corresponding device (if the backend is available and functional). Otherwise, a CPU device is returned. For detailed information about how the backend is selected, check the documentation for [`Flux.get_device`](@ref). + +```@docs +Flux.get_device +``` diff --git a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl index 991f3a21dd..6a26308fe1 100644 --- a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl +++ b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl @@ -27,8 +27,8 @@ Base.@kwdef struct FluxAMDDevice <: Flux.AbstractDevice end (::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x) -Flux.isavailable(::FluxAMDDevice) = true -Flux.isfunctional(::FluxAMDDevice) = AMDGPU.functional() +Flux._isavailable(::FluxAMDDevice) = true +Flux._isfunctional(::FluxAMDDevice) = AMDGPU.functional() Flux._get_device_name(::FluxAMDDevice) = "AMD" function check_use_amdgpu() diff --git a/ext/FluxCUDAExt/FluxCUDAExt.jl b/ext/FluxCUDAExt/FluxCUDAExt.jl index eda667d1fc..dbaf4ca5f6 100644 --- a/ext/FluxCUDAExt/FluxCUDAExt.jl +++ b/ext/FluxCUDAExt/FluxCUDAExt.jl @@ -24,8 +24,8 @@ Base.@kwdef struct FluxCUDADevice <: Flux.AbstractDevice end (::FluxCUDADevice)(x) = gpu(FluxCUDAAdaptor(), x) -Flux.isavailable(::FluxCUDADevice) = true -Flux.isfunctional(::FluxCUDADevice) = CUDA.functional() +Flux._isavailable(::FluxCUDADevice) = true +Flux._isfunctional(::FluxCUDADevice) = CUDA.functional() Flux._get_device_name(::FluxCUDADevice) = "CUDA" function check_use_cuda() diff --git a/ext/FluxMetalExt/FluxMetalExt.jl b/ext/FluxMetalExt/FluxMetalExt.jl index bd85e35fba..4b404ec21a 100644 --- a/ext/FluxMetalExt/FluxMetalExt.jl +++ b/ext/FluxMetalExt/FluxMetalExt.jl @@ -22,8 +22,8 @@ Base.@kwdef struct FluxMetalDevice <: Flux.AbstractDevice end (::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x) -Flux.isavailable(::FluxMetalDevice) = true -Flux.isfunctional(::FluxMetalDevice) = Metal.functional() +Flux._isavailable(::FluxMetalDevice) = true +Flux._isfunctional(::FluxMetalDevice) = Metal.functional() Flux._get_device_name(::FluxMetalDevice) = "Metal" function check_use_metal() diff --git a/src/functor.jl b/src/functor.jl index c62fcb9367..4568ee0d42 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -480,11 +480,11 @@ end function _get_device_name(::T)::String where {T <: AbstractDevice} end ## check device availability; more definitions in corresponding extensions -isavailable(::Nothing) = false -isfunctional(::Nothing) = false +_isavailable(::Nothing) = false +_isfunctional(::Nothing) = false -isavailable(::AbstractDevice) = false -isfunctional(::AbstractDevice) = false +_isavailable(::AbstractDevice) = false +_isfunctional(::AbstractDevice) = false """ Flux.FluxCPUDevice <: Flux.AbstractDevice @@ -494,8 +494,8 @@ A type representing `device` objects for the `"CPU"` backend for Flux. This is t Base.@kwdef struct FluxCPUDevice <: AbstractDevice end (::FluxCPUDevice)(x) = cpu(x) -isavailable(::FluxCPUDevice) = true -isfunctional(::FluxCPUDevice) = true +_isavailable(::FluxCPUDevice) = true +_isfunctional(::FluxCPUDevice) = true _get_device_name(::FluxCPUDevice) = "CPU" ## device list. order is important @@ -597,13 +597,13 @@ function get_device()::AbstractDevice @info "Using backend set in preferences: $backend." device = DEVICES[][idx] - if !isavailable(device) + if !_isavailable(device) @warn """ Trying to use backend: $backend but it's trigger package is not loaded. Please load the package and call this function again to respect the preferences backend. """ else - if isfunctional(device) + if _isfunctional(device) @info "Using backend: $backend" return device else @@ -616,9 +616,9 @@ function get_device()::AbstractDevice @info "Running automatic device selection..." for backend in GPU_BACKENDS device = DEVICES[][GPU_BACKEND_ORDER[backend]] - if isavailable(device) + if _isavailable(device) @info "Trying backend: $backend." - if isfunctional(device) + if _isfunctional(device) @info "Using backend: $backend." return device end From e1ad3e786887cc8a00c4b0e8a6fb82326e23a833 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Mon, 31 Jul 2023 12:58:23 +0530 Subject: [PATCH 12/22] Minor change in docs. --- docs/src/gpu.md | 2 +- src/functor.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/gpu.md b/docs/src/gpu.md index 8b344166ad..b4f5a6a372 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -267,7 +267,7 @@ julia> model.weight ``` -The device preference can also be set via the [`gpu_backend!`](@ref) function. For instance, below we first set our device preference to `"CPU"`: +The device preference can also be set via the [`Flux.gpu_backend!`](@ref) function. For instance, below we first set our device preference to `"CPU"`: ```julia-repl julia> using Flux; Flux.gpu_backend!("CPU") diff --git a/src/functor.jl b/src/functor.jl index 4568ee0d42..9cba443400 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -525,7 +525,7 @@ supported_devices() = GPU_BACKENDS Returns a `device` object for the most appropriate backend for the current Julia session. -First, the function checks whether a backend preference has been set via the `gpu_backend!` function. If so, an attempt is made to load this backend. If the corresponding trigger package has been loaded and the backend is functional, a `device` corresponding to the given backend is loaded. Otherwise, the backend is chosen automatically. To update the backend preference, use [gpu_backend!](@ref). +First, the function checks whether a backend preference has been set via the [`Flux.gpu_backend!`](@ref) function. If so, an attempt is made to load this backend. If the corresponding trigger package has been loaded and the backend is functional, a `device` corresponding to the given backend is loaded. Otherwise, the backend is chosen automatically. To update the backend preference, use [`Flux.gpu_backend!`](@ref). If there is no preference, then for each of the `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"` backends in the given order, this function checks whether the given backend has been loaded via the corresponding trigger package, and whether the backend is functional. If so, the `device` corresponding to the backend is returned. If no GPU backend is available, a `Flux.FluxCPUDevice` is returned. From 179bbeac81c56f8ce01b2d937c56de320a237b6a Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Mon, 31 Jul 2023 18:55:53 +0530 Subject: [PATCH 13/22] Removing structs from package extensions as it is bad practice. --- ext/FluxAMDGPUExt/FluxAMDGPUExt.jl | 17 +++----------- ext/FluxCUDAExt/FluxCUDAExt.jl | 17 +++----------- ext/FluxMetalExt/FluxMetalExt.jl | 17 +++----------- src/functor.jl | 36 ++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 42 deletions(-) diff --git a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl index 6a26308fe1..3c987ec550 100644 --- a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl +++ b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl @@ -17,19 +17,8 @@ const MIOPENFloat = AMDGPU.MIOpen.MIOPENFloat # Set to boolean on the first call to check_use_amdgpu const USE_AMDGPU = Ref{Union{Nothing, Bool}}(nothing) -""" - FluxAMDGPUExt.FluxAMDDevice <: Flux.AbstractDevice - -A type representing `device` objects for the `"AMD"` backend for Flux. -""" -Base.@kwdef struct FluxAMDDevice <: Flux.AbstractDevice - deviceID::AMDGPU.HIPDevice -end - -(::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x) -Flux._isavailable(::FluxAMDDevice) = true -Flux._isfunctional(::FluxAMDDevice) = AMDGPU.functional() -Flux._get_device_name(::FluxAMDDevice) = "AMD" +Flux._isavailable(::Flux.FluxAMDDevice) = true +Flux._isfunctional(::Flux.FluxAMDDevice) = AMDGPU.functional() function check_use_amdgpu() if !isnothing(USE_AMDGPU[]) @@ -58,7 +47,7 @@ include("conv.jl") function __init__() Flux.AMDGPU_LOADED[] = true - Flux.DEVICES[Flux.GPU_BACKEND_ORDER["AMD"]] = FluxAMDDevice(AMDGPU.device()) + Flux.DEVICES[Flux.GPU_BACKEND_ORDER["AMD"]] = Flux.FluxAMDDevice(AMDGPU.device()) end # TODO diff --git a/ext/FluxCUDAExt/FluxCUDAExt.jl b/ext/FluxCUDAExt/FluxCUDAExt.jl index dbaf4ca5f6..bc7fbc5b83 100644 --- a/ext/FluxCUDAExt/FluxCUDAExt.jl +++ b/ext/FluxCUDAExt/FluxCUDAExt.jl @@ -14,19 +14,8 @@ import Adapt: adapt_storage const USE_CUDA = Ref{Union{Nothing, Bool}}(nothing) -""" - FluxCUDAExt.FluxCUDADevice <: Flux.AbstractDevice - -A type representing `device` objects for the `"CUDA"` backend for Flux. -""" -Base.@kwdef struct FluxCUDADevice <: Flux.AbstractDevice - deviceID::CUDA.CuDevice -end - -(::FluxCUDADevice)(x) = gpu(FluxCUDAAdaptor(), x) -Flux._isavailable(::FluxCUDADevice) = true -Flux._isfunctional(::FluxCUDADevice) = CUDA.functional() -Flux._get_device_name(::FluxCUDADevice) = "CUDA" +Flux._isavailable(::Flux.FluxCUDADevice) = true +Flux._isfunctional(::Flux.FluxCUDADevice) = CUDA.functional() function check_use_cuda() if !isnothing(USE_CUDA[]) @@ -51,7 +40,7 @@ function __init__() Flux.CUDA_LOADED[] = true ## add device to available devices - Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]] = FluxCUDADevice(CUDA.device()) + Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]] = Flux.FluxCUDADevice(CUDA.device()) try Base.require(Main, :cuDNN) diff --git a/ext/FluxMetalExt/FluxMetalExt.jl b/ext/FluxMetalExt/FluxMetalExt.jl index 4b404ec21a..bc4a578fb6 100644 --- a/ext/FluxMetalExt/FluxMetalExt.jl +++ b/ext/FluxMetalExt/FluxMetalExt.jl @@ -12,19 +12,8 @@ using Zygote const USE_METAL = Ref{Union{Nothing, Bool}}(nothing) -""" - FluxMetalExt.FluxMetalDevice <: Flux.AbstractDevice - -A type representing `device` objects for the `"Metal"` backend for Flux. -""" -Base.@kwdef struct FluxMetalDevice <: Flux.AbstractDevice - deviceID::MTLDevice -end - -(::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x) -Flux._isavailable(::FluxMetalDevice) = true -Flux._isfunctional(::FluxMetalDevice) = Metal.functional() -Flux._get_device_name(::FluxMetalDevice) = "Metal" +Flux._isavailable(::Flux.FluxMetalDevice) = true +Flux._isfunctional(::Flux.FluxMetalDevice) = Metal.functional() function check_use_metal() isnothing(USE_METAL[]) || return @@ -44,7 +33,7 @@ include("functor.jl") function __init__() Flux.METAL_LOADED[] = true - Flux.DEVICES[Flux.GPU_BACKEND_ORDER["Meta"]] = FluxMetalDevice(Metal.current_device()) + Flux.DEVICES[Flux.GPU_BACKEND_ORDER["Meta"]] = Flux.FluxMetalDevice(Metal.current_device()) end end diff --git a/src/functor.jl b/src/functor.jl index 9cba443400..72f548c08f 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -498,6 +498,42 @@ _isavailable(::FluxCPUDevice) = true _isfunctional(::FluxCPUDevice) = true _get_device_name(::FluxCPUDevice) = "CPU" +""" + FluxCUDADevice <: AbstractDevice + +A type representing `device` objects for the `"CUDA"` backend for Flux. +""" +Base.@kwdef struct FluxCUDADevice <: AbstractDevice + deviceID +end + +(::FluxCUDADevice)(x) = gpu(FluxCUDAAdaptor(), x) +_get_device_name(::FluxCUDADevice) = "CUDA" + +""" + FluxAMDDevice <: AbstractDevice + +A type representing `device` objects for the `"AMD"` backend for Flux. +""" +Base.@kwdef struct FluxAMDDevice <: AbstractDevice + deviceID +end + +(::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x) +_get_device_name(::FluxAMDDevice) = "AMD" + +""" + FluxMetalDevice <: AbstractDevice + +A type representing `device` objects for the `"Metal"` backend for Flux. +""" +Base.@kwdef struct FluxMetalDevice <: AbstractDevice + deviceID +end + +(::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x) +_get_device_name(::FluxMetalDevice) = "Metal" + ## device list. order is important const DEVICES = Ref{Vector{Union{Nothing, AbstractDevice}}}(Vector{Union{Nothing, AbstractDevice}}(nothing, length(GPU_BACKENDS))) DEVICES[][GPU_BACKEND_ORDER["CPU"]] = FluxCPUDevice() From bb67ad6399b6aec647f588587c0b52cbec98c560 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Mon, 31 Jul 2023 19:14:17 +0530 Subject: [PATCH 14/22] Adding more docstrings in manual. --- docs/src/gpu.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/src/gpu.md b/docs/src/gpu.md index b4f5a6a372..966144869f 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -298,8 +298,32 @@ julia> model.weight # no change; model still lives on CPU ``` Clearly, this means that the same code will work for any GPU backend and the CPU. -If the preference backend isn't available or isn't functional, then [`Flux.get_device`](@ref) looks for a CUDA, AMD or Metal backend, and returns a corresponding device (if the backend is available and functional). Otherwise, a CPU device is returned. For detailed information about how the backend is selected, check the documentation for [`Flux.get_device`](@ref). +If the preference backend isn't available or isn't functional, then [`Flux.get_device`](@ref) looks for a CUDA, AMD or Metal backend, and returns a corresponding device (if the backend is available and functional). Otherwise, a CPU device is returned. In the below example, the GPU preference is `"CUDA"`: + +```julia-repl +julia> using Flux; # preference is CUDA, but CUDA.jl not loaded + +julia> device = Flux.get_device() # this will resort to automatic device selection +[ Info: Using backend set in preferences: CUDA. +┌ Warning: Trying to use backend: CUDA but it's trigger package is not loaded. +│ Please load the package and call this function again to respect the preferences backend. +└ @ Flux ~/fluxml/Flux.jl/src/functor.jl:637 +[ Info: Running automatic device selection... +[ Info: Trigger package for backend: CUDA is not loaded. +[ Info: Trigger package for backend: AMD is not loaded. +[ Info: Trigger package for backend: Metal is not loaded. +[ Info: Trying backend: CPU. +[ Info: Using backend: CPU. +(::Flux.FluxCPUDevice) (generic function with 1 method) +``` +For detailed information about how the backend is selected, check the documentation for [`Flux.get_device`](@ref). ```@docs +Flux.AbstractDevice +Flux.FluxCPUDevice +Flux.FluxCUDADevice +Flux.FluxAMDDevice +Flux.FluxMetalDevice +Flux.supported_devices Flux.get_device ``` From 7be17003d18fc5812226d07aa2481691822f59d7 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Mon, 31 Jul 2023 22:02:50 +0530 Subject: [PATCH 15/22] Removing redundant log messages. --- docs/src/gpu.md | 9 +-------- src/functor.jl | 18 ++++++------------ 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/docs/src/gpu.md b/docs/src/gpu.md index 966144869f..48f3e0a0bd 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -242,8 +242,7 @@ julia> using Flux, CUDA; julia> device = Flux.get_device() # returns handle to an NVIDIA GPU [ Info: Using backend set in preferences: CUDA. -[ Info: Using backend: CUDA -(::FluxCUDAExt.FluxCUDADevice) (generic function with 1 method) +(::Flux.FluxCUDADevice) (generic function with 1 method) julia> device.deviceID # check the id of the GPU CuDevice(0): NVIDIA GeForce GTX 1650 @@ -282,7 +281,6 @@ julia> using Flux, CUDA; # even if CUDA is loaded, we'll still get a CPU devi julia> device = Flux.get_device() # get a CPU device [ Info: Using backend set in preferences: CPU. -[ Info: Using backend: CPU (::Flux.FluxCPUDevice) (generic function with 1 method) julia> model = Dense(2 => 3); @@ -308,11 +306,6 @@ julia> device = Flux.get_device() # this will resort to automatic device s ┌ Warning: Trying to use backend: CUDA but it's trigger package is not loaded. │ Please load the package and call this function again to respect the preferences backend. └ @ Flux ~/fluxml/Flux.jl/src/functor.jl:637 -[ Info: Running automatic device selection... -[ Info: Trigger package for backend: CUDA is not loaded. -[ Info: Trigger package for backend: AMD is not loaded. -[ Info: Trigger package for backend: Metal is not loaded. -[ Info: Trying backend: CPU. [ Info: Using backend: CPU. (::Flux.FluxCPUDevice) (generic function with 1 method) ``` diff --git a/src/functor.jl b/src/functor.jl index 72f548c08f..8222e2d853 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -576,10 +576,10 @@ Dense(2 => 3) # 9 parameters julia> device = Flux.get_device() # this will just load the CPU device [ Info: Using backend set in preferences: AMD. -┌ Warning: Trying to use backend AMD but package AMDGPU [21141c5a-9bdb-4563-92ae-f87d6854732e] is not loaded. +┌ Warning: Trying to use backend: AMD but it's trigger package is not loaded. │ Please load the package and call this function again to respect the preferences backend. -└ @ Flux ~/fluxml/Flux.jl/src/functor.jl:496 -[ Info: Running automatic device selection... +└ @ Flux ~/fluxml/Flux.jl/src/functor.jl:638 +[ Info: Using backend: CPU. (::Flux.FluxCPUDevice) (generic function with 1 method) julia> model = model |> device @@ -602,10 +602,10 @@ Dense(2 => 3) # 9 parameters julia> device = Flux.get_device() [ Info: Using backend set in preferences: AMD. -┌ Warning: Trying to use backend AMD but package AMDGPU [21141c5a-9bdb-4563-92ae-f87d6854732e] is not loaded. +┌ Warning: Trying to use backend: AMD but it's trigger package is not loaded. │ Please load the package and call this function again to respect the preferences backend. -└ @ Flux ~/fluxml/Flux.jl/src/functor.jl:496 -[ Info: Running automatic device selection... +└ @ Flux ~/fluxml/Flux.jl/src/functor.jl:637 +[ Info: Using backend: CUDA. (::Flux.FluxCUDADevice) (generic function with 1 method) julia> model = model |> device @@ -640,7 +640,6 @@ function get_device()::AbstractDevice """ else if _isfunctional(device) - @info "Using backend: $backend" return device else @warn "Backend: $backend from the set preferences is not functional. Defaulting to autmatic device selection." @@ -649,18 +648,13 @@ function get_device()::AbstractDevice end end - @info "Running automatic device selection..." for backend in GPU_BACKENDS device = DEVICES[][GPU_BACKEND_ORDER[backend]] if _isavailable(device) - @info "Trying backend: $backend." if _isfunctional(device) @info "Using backend: $backend." return device end - @info "Backend: $backend is not functional." - else - @info "Trigger package for backend: $backend is not loaded." end end end From 7558d29ce93eb2d97eb80aa5ce3e5ab1b1fcd618 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Mon, 31 Jul 2023 22:22:14 +0530 Subject: [PATCH 16/22] Adding kwarg to `get_device` for verbose output. --- docs/src/gpu.md | 6 +++--- src/functor.jl | 14 ++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/docs/src/gpu.md b/docs/src/gpu.md index 48f3e0a0bd..70708c444a 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -240,7 +240,7 @@ As a more convenient syntax, Flux allows the usage of GPU `device` objects which ```julia-repl julia> using Flux, CUDA; -julia> device = Flux.get_device() # returns handle to an NVIDIA GPU +julia> device = Flux.get_device(; verbose=true) # returns handle to an NVIDIA GPU [ Info: Using backend set in preferences: CUDA. (::Flux.FluxCUDADevice) (generic function with 1 method) @@ -279,7 +279,7 @@ Then, after restarting the Julia session, `Flux.get_device` returns a handle to ```julia-repl julia> using Flux, CUDA; # even if CUDA is loaded, we'll still get a CPU device -julia> device = Flux.get_device() # get a CPU device +julia> device = Flux.get_device(; verbose=true) # get a CPU device [ Info: Using backend set in preferences: CPU. (::Flux.FluxCPUDevice) (generic function with 1 method) @@ -301,7 +301,7 @@ If the preference backend isn't available or isn't functional, then [`Flux.get_d ```julia-repl julia> using Flux; # preference is CUDA, but CUDA.jl not loaded -julia> device = Flux.get_device() # this will resort to automatic device selection +julia> device = Flux.get_device(; verbose=true) # this will resort to automatic device selection [ Info: Using backend set in preferences: CUDA. ┌ Warning: Trying to use backend: CUDA but it's trigger package is not loaded. │ Please load the package and call this function again to respect the preferences backend. diff --git a/src/functor.jl b/src/functor.jl index 8222e2d853..d39fb619af 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -557,7 +557,7 @@ julia> Flux.supported_devices() supported_devices() = GPU_BACKENDS """ - Flux.get_device()::AbstractDevice + Flux.get_device(; verbose=false)::AbstractDevice Returns a `device` object for the most appropriate backend for the current Julia session. @@ -565,6 +565,8 @@ First, the function checks whether a backend preference has been set via the [`F If there is no preference, then for each of the `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"` backends in the given order, this function checks whether the given backend has been loaded via the corresponding trigger package, and whether the backend is functional. If so, the `device` corresponding to the backend is returned. If no GPU backend is available, a `Flux.FluxCPUDevice` is returned. +If `verbose` is set to `true`, then the function prints informative log messages. + # Examples For the example given below, the backend preference was set to `"AMD"` via the [`gpu_backend!`](@ref) function. @@ -574,7 +576,7 @@ julia> using Flux; julia> model = Dense(2 => 3) Dense(2 => 3) # 9 parameters -julia> device = Flux.get_device() # this will just load the CPU device +julia> device = Flux.get_device(; verbose=true) # this will just load the CPU device [ Info: Using backend set in preferences: AMD. ┌ Warning: Trying to use backend: AMD but it's trigger package is not loaded. │ Please load the package and call this function again to respect the preferences backend. @@ -600,7 +602,7 @@ julia> using Flux, CUDA; julia> model = Dense(2 => 3) Dense(2 => 3) # 9 parameters -julia> device = Flux.get_device() +julia> device = Flux.get_device(; verbose=true) [ Info: Using backend set in preferences: AMD. ┌ Warning: Trying to use backend: AMD but it's trigger package is not loaded. │ Please load the package and call this function again to respect the preferences backend. @@ -618,7 +620,7 @@ julia> model.weight 0.290744 -0.0592499 ``` """ -function get_device()::AbstractDevice +function get_device(; verbose=false)::AbstractDevice backend = @load_preference("gpu_backend", nothing) if backend !== nothing @@ -630,7 +632,7 @@ function get_device()::AbstractDevice Defaulting to automatic device selection. """ maxlog=1 else - @info "Using backend set in preferences: $backend." + verbose && @info "Using backend set in preferences: $backend." device = DEVICES[][idx] if !_isavailable(device) @@ -652,7 +654,7 @@ function get_device()::AbstractDevice device = DEVICES[][GPU_BACKEND_ORDER[backend]] if _isavailable(device) if _isfunctional(device) - @info "Using backend: $backend." + verbose && @info "Using backend: $backend." return device end end From 95e3bc3b91d198a2a164b6fef9f28758892bcbfc Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Tue, 1 Aug 2023 00:57:37 +0530 Subject: [PATCH 17/22] Setting `deviceID` to `nothing` if GPU is not functional. --- ext/FluxAMDGPUExt/FluxAMDGPUExt.jl | 2 +- ext/FluxCUDAExt/FluxCUDAExt.jl | 2 +- ext/FluxMetalExt/FluxMetalExt.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl index 3c987ec550..57a2bb96b8 100644 --- a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl +++ b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl @@ -47,7 +47,7 @@ include("conv.jl") function __init__() Flux.AMDGPU_LOADED[] = true - Flux.DEVICES[Flux.GPU_BACKEND_ORDER["AMD"]] = Flux.FluxAMDDevice(AMDGPU.device()) + Flux.DEVICES[Flux.GPU_BACKEND_ORDER["AMD"]] = AMDGPU.functional() ? Flux.FluxAMDDevice(AMDGPU.device()) : Flux.FluxAMDDevice(nothing) end # TODO diff --git a/ext/FluxCUDAExt/FluxCUDAExt.jl b/ext/FluxCUDAExt/FluxCUDAExt.jl index bc7fbc5b83..ad80cf8a58 100644 --- a/ext/FluxCUDAExt/FluxCUDAExt.jl +++ b/ext/FluxCUDAExt/FluxCUDAExt.jl @@ -40,7 +40,7 @@ function __init__() Flux.CUDA_LOADED[] = true ## add device to available devices - Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]] = Flux.FluxCUDADevice(CUDA.device()) + Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]] = CUDA.functional() ? Flux.FluxCUDADevice(CUDA.device()) : Flux.FluxCUDADevice(nothing) try Base.require(Main, :cuDNN) diff --git a/ext/FluxMetalExt/FluxMetalExt.jl b/ext/FluxMetalExt/FluxMetalExt.jl index bc4a578fb6..504ad01522 100644 --- a/ext/FluxMetalExt/FluxMetalExt.jl +++ b/ext/FluxMetalExt/FluxMetalExt.jl @@ -33,7 +33,7 @@ include("functor.jl") function __init__() Flux.METAL_LOADED[] = true - Flux.DEVICES[Flux.GPU_BACKEND_ORDER["Meta"]] = Flux.FluxMetalDevice(Metal.current_device()) + Flux.DEVICES[Flux.GPU_BACKEND_ORDER["Meta"]] = Metal.functional() ? Flux.FluxMetalDevice(Metal.current_device()) : Flux.FluxMetalDevice(nothing) end end From 40b1fe23a3463e7d62476f79794fab760934e2fb Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Tue, 1 Aug 2023 00:58:21 +0530 Subject: [PATCH 18/22] Adding basic tests for device objects. --- test/functors.jl | 10 ++++++++++ test/runtests.jl | 32 +++++++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/test/functors.jl b/test/functors.jl index 9919567b56..0308aa8526 100644 --- a/test/functors.jl +++ b/test/functors.jl @@ -2,3 +2,13 @@ x = rand(Float32, 10, 10) if !(Flux.CUDA_LOADED[] || Flux.AMD_LOADED[] || Flux.METAL_LOADED[]) @test x === gpu(x) end + +@test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]]) <: Nothing +@test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]]) <: Nothing +@test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]]) <: Nothing +@test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CPU"]]) <: Flux.FluxCPUDevice + +device = Flux.get_device() +@test typeof(device) <: Flux.FluxCPUDevice +@test device(x) == x +@test Flux._get_device_name(device) in Flux.supported_devices() diff --git a/test/runtests.jl b/test/runtests.jl index 90bafb67ba..fb657500e9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,8 +5,6 @@ using Test using Random, Statistics, LinearAlgebra using IterTools: ncycle using Zygote -using CUDA -using cuDNN # ENV["FLUX_TEST_AMDGPU"] = "true" ENV["FLUX_TEST_CUDA"] = "true" @@ -61,14 +59,24 @@ Random.seed!(0) end if get(ENV, "FLUX_TEST_CUDA", "false") == "true" - using CUDA + using CUDA, cuDNN Flux.gpu_backend!("CUDA") + + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]]) <: Flux.FluxCUDADevice + device = Flux.get_device() + @testset "CUDA" begin if CUDA.functional() + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]].deviceID) <: CUDA.CuDevice + @test typeof(device) <: Flux.FluxCUDADevice + @test typeof(device.deviceID) <: CUDA.CuDevice + @test Flux._get_device_name(device) in Flux.supported_device() + @info "Testing CUDA Support" include("ext_cuda/runtests.jl") else @warn "CUDA.jl package is not functional. Skipping CUDA tests." + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]].deviceID) <: Nothing end end else @@ -79,12 +87,21 @@ Random.seed!(0) using AMDGPU Flux.gpu_backend!("AMD") + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]]) <: Flux.FluxAMDDevice + device = Flux.get_device() + if AMDGPU.functional() && AMDGPU.functional(:MIOpen) + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]].deviceID) <: AMDGPU.HIPDevice + @test typeof(device) <: Flux.FluxAMDDevice + @test typeof(device.deviceID) <: AMDGPU.HIPDevice + @test Flux._get_device_name(device) in Flux.supported_device() + @testset "AMDGPU" begin include("ext_amdgpu/runtests.jl") end else @info "AMDGPU.jl package is not functional. Skipping AMDGPU tests." + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]].deviceID) <: nothing end else @info "Skipping AMDGPU tests, set FLUX_TEST_AMDGPU=true to run them." @@ -94,12 +111,21 @@ Random.seed!(0) using Metal Flux.gpu_backend!("Metal") + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]]) <: Flux.FluxAMDDevice + device = Flux.get_device() + if Metal.functional() + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]].deviceID) <: Metal.MTLDevice + @test typeof(device) <: Flux.FluxMetalDevice + @test typeof(device.deviceID) <: Metal.MTLDevice + @test Flux._get_device_name(device) in Flux.supported_device() + @testset "Metal" begin include("ext_metal/runtests.jl") end else @info "Metal.jl package is not functional. Skipping Metal tests." + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]].deviceID) <: Nothing end else @info "Skipping Metal tests, set FLUX_TEST_METAL=true to run them." From df70154bc6074e5d673eccc8b02b723f7cdbe050 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Tue, 1 Aug 2023 01:50:44 +0530 Subject: [PATCH 19/22] Fixing minor errors in package extensions and tests. --- ext/FluxAMDGPUExt/FluxAMDGPUExt.jl | 2 +- ext/FluxMetalExt/FluxMetalExt.jl | 2 +- test/runtests.jl | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl index 57a2bb96b8..f41984ec38 100644 --- a/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl +++ b/ext/FluxAMDGPUExt/FluxAMDGPUExt.jl @@ -47,7 +47,7 @@ include("conv.jl") function __init__() Flux.AMDGPU_LOADED[] = true - Flux.DEVICES[Flux.GPU_BACKEND_ORDER["AMD"]] = AMDGPU.functional() ? Flux.FluxAMDDevice(AMDGPU.device()) : Flux.FluxAMDDevice(nothing) + Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]] = AMDGPU.functional() ? Flux.FluxAMDDevice(AMDGPU.device()) : Flux.FluxAMDDevice(nothing) end # TODO diff --git a/ext/FluxMetalExt/FluxMetalExt.jl b/ext/FluxMetalExt/FluxMetalExt.jl index 504ad01522..bca48fe279 100644 --- a/ext/FluxMetalExt/FluxMetalExt.jl +++ b/ext/FluxMetalExt/FluxMetalExt.jl @@ -33,7 +33,7 @@ include("functor.jl") function __init__() Flux.METAL_LOADED[] = true - Flux.DEVICES[Flux.GPU_BACKEND_ORDER["Meta"]] = Metal.functional() ? Flux.FluxMetalDevice(Metal.current_device()) : Flux.FluxMetalDevice(nothing) + Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]] = Metal.functional() ? Flux.FluxMetalDevice(Metal.current_device()) : Flux.FluxMetalDevice(nothing) end end diff --git a/test/runtests.jl b/test/runtests.jl index fb657500e9..328c64e816 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -70,7 +70,7 @@ Random.seed!(0) @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]].deviceID) <: CUDA.CuDevice @test typeof(device) <: Flux.FluxCUDADevice @test typeof(device.deviceID) <: CUDA.CuDevice - @test Flux._get_device_name(device) in Flux.supported_device() + @test Flux._get_device_name(device) in Flux.supported_devices() @info "Testing CUDA Support" include("ext_cuda/runtests.jl") @@ -94,7 +94,7 @@ Random.seed!(0) @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]].deviceID) <: AMDGPU.HIPDevice @test typeof(device) <: Flux.FluxAMDDevice @test typeof(device.deviceID) <: AMDGPU.HIPDevice - @test Flux._get_device_name(device) in Flux.supported_device() + @test Flux._get_device_name(device) in Flux.supported_devices() @testset "AMDGPU" begin include("ext_amdgpu/runtests.jl") @@ -111,14 +111,14 @@ Random.seed!(0) using Metal Flux.gpu_backend!("Metal") - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]]) <: Flux.FluxAMDDevice + @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]]) <: Flux.FluxMetalDevice device = Flux.get_device() if Metal.functional() @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]].deviceID) <: Metal.MTLDevice @test typeof(device) <: Flux.FluxMetalDevice @test typeof(device.deviceID) <: Metal.MTLDevice - @test Flux._get_device_name(device) in Flux.supported_device() + @test Flux._get_device_name(device) in Flux.supported_devices() @testset "Metal" begin include("ext_metal/runtests.jl") From 650a273c3209d84d9e20a5f75028e795504673a2 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Tue, 1 Aug 2023 02:13:21 +0530 Subject: [PATCH 20/22] Minor fix in tests + docs. --- src/functor.jl | 2 +- test/functors.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/functor.jl b/src/functor.jl index d39fb619af..8296e6bd98 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -644,7 +644,7 @@ function get_device(; verbose=false)::AbstractDevice if _isfunctional(device) return device else - @warn "Backend: $backend from the set preferences is not functional. Defaulting to autmatic device selection." + @warn "Backend: $backend from the set preferences is not functional. Defaulting to automatic device selection." end end end diff --git a/test/functors.jl b/test/functors.jl index 0308aa8526..b2c6c37f8d 100644 --- a/test/functors.jl +++ b/test/functors.jl @@ -1,5 +1,5 @@ x = rand(Float32, 10, 10) -if !(Flux.CUDA_LOADED[] || Flux.AMD_LOADED[] || Flux.METAL_LOADED[]) +if !(Flux.CUDA_LOADED[] || Flux.AMDGPU_LOADED[] || Flux.METAL_LOADED[]) @test x === gpu(x) end From 1495e0405908b82357e7c777ab0cf67ec6bed5d5 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Wed, 2 Aug 2023 16:42:25 +0530 Subject: [PATCH 21/22] Moving device tests to extensions, and adding a basic data transfer test. --- test/ext_amdgpu/device_selection.jl | 10 ++++++++++ test/ext_amdgpu/get_device.jl | 11 +++++++++++ test/ext_amdgpu/runtests.jl | 4 ++++ test/ext_cuda/device_selection.jl | 10 ++++++++++ test/ext_cuda/get_device.jl | 11 +++++++++++ test/ext_cuda/runtests.jl | 3 +++ test/ext_metal/device_selection.jl | 10 ++++++++++ test/ext_metal/get_device.jl | 11 +++++++++++ test/ext_metal/runtests.jl | 4 ++++ test/runtests.jl | 29 ++++------------------------- 10 files changed, 78 insertions(+), 25 deletions(-) create mode 100644 test/ext_amdgpu/device_selection.jl create mode 100644 test/ext_amdgpu/get_device.jl create mode 100644 test/ext_cuda/device_selection.jl create mode 100644 test/ext_cuda/get_device.jl create mode 100644 test/ext_metal/device_selection.jl create mode 100644 test/ext_metal/get_device.jl diff --git a/test/ext_amdgpu/device_selection.jl b/test/ext_amdgpu/device_selection.jl new file mode 100644 index 0000000000..107700de26 --- /dev/null +++ b/test/ext_amdgpu/device_selection.jl @@ -0,0 +1,10 @@ +amd_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]] + +# should pass, whether or not AMDGPU is functional +@test typeof(amd_device) <: Flux.FluxAMDDevice + +if AMDGPU.functional() + @test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice +else + @test typeof(amd_device.deviceID) <: Nothing +end diff --git a/test/ext_amdgpu/get_device.jl b/test/ext_amdgpu/get_device.jl new file mode 100644 index 0000000000..72b555f134 --- /dev/null +++ b/test/ext_amdgpu/get_device.jl @@ -0,0 +1,11 @@ +device = Flux.get_device() + +@test typeof(device) <: Flux.FluxAMDDevice +@test typeof(device.deviceID) <: AMDGPU.HIPDevice +@test Flux._get_device_name(device) in Flux.supported_devices() + +# correctness of data transfer +x = randn(5, 5) +cx = x |> device +@test cx isa AMDGPU.ROCArray +@test AMDGPU.device_id(AMDGPU.device(cx)) == AMDGPU.device_id(device.deviceID) diff --git a/test/ext_amdgpu/runtests.jl b/test/ext_amdgpu/runtests.jl index ec4f04663f..985123c603 100644 --- a/test/ext_amdgpu/runtests.jl +++ b/test/ext_amdgpu/runtests.jl @@ -8,3 +8,7 @@ include("test_utils.jl") @testset "Basic" begin include("basic.jl") end + +@testset "get_device" begin + include("get_device.jl") +end diff --git a/test/ext_cuda/device_selection.jl b/test/ext_cuda/device_selection.jl new file mode 100644 index 0000000000..14444b8e5a --- /dev/null +++ b/test/ext_cuda/device_selection.jl @@ -0,0 +1,10 @@ +cuda_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]] + +# should pass, whether or not CUDA is functional +@test typeof(cuda_device) <: Flux.FluxCUDADevice + +if CUDA.functional() + @test typeof(cuda_device.deviceID) <: CUDA.CuDevice +else + @test typeof(cuda_device.deviceID) <: Nothing +end diff --git a/test/ext_cuda/get_device.jl b/test/ext_cuda/get_device.jl new file mode 100644 index 0000000000..262511823d --- /dev/null +++ b/test/ext_cuda/get_device.jl @@ -0,0 +1,11 @@ +device = Flux.get_device() + +@test typeof(device) <: Flux.FluxCUDADevice +@test typeof(device.deviceID) <: CUDA.CuDevice +@test Flux._get_device_name(device) in Flux.supported_devices() + +# correctness of data transfer +x = randn(5, 5) +cx = x |> device +@test cx isa CUDA.CuArray +@test CUDA.device(cx).handle == device.deviceID.handle diff --git a/test/ext_cuda/runtests.jl b/test/ext_cuda/runtests.jl index 65dc51dbb0..586eef6f57 100644 --- a/test/ext_cuda/runtests.jl +++ b/test/ext_cuda/runtests.jl @@ -28,3 +28,6 @@ end @testset "ctc" begin include("ctc.jl") end +@testset "get_device" begin + include("get_device.jl") +end diff --git a/test/ext_metal/device_selection.jl b/test/ext_metal/device_selection.jl new file mode 100644 index 0000000000..44a0a19282 --- /dev/null +++ b/test/ext_metal/device_selection.jl @@ -0,0 +1,10 @@ +metal_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]] + +# should pass, whether or not Metal is functional +@test typeof(metal_device) <: Flux.FluxMetalDevice + +if Metal.functional() + @test typeof(metal_device.deviceID) <: Metal.MTLDevice +else + @test typeof(metal_device.deviceID) <: Nothing +end diff --git a/test/ext_metal/get_device.jl b/test/ext_metal/get_device.jl new file mode 100644 index 0000000000..bf2a52eee2 --- /dev/null +++ b/test/ext_metal/get_device.jl @@ -0,0 +1,11 @@ +device = Flux.get_device() + +@test typeof(device) <: Flux.FluxMetalDevice +@test typeof(device.deviceID) <: Metal.MTLDevice +@test Flux._get_device_name(device) in Flux.supported_devices() + +# correctness of data transfer +x = randn(5, 5) +cx = x |> device +@test cx isa Metal.MtlArray +@test Metal.device(cx).registryID == device.deviceID.registryID diff --git a/test/ext_metal/runtests.jl b/test/ext_metal/runtests.jl index e6ca64508b..e9e07053d8 100644 --- a/test/ext_metal/runtests.jl +++ b/test/ext_metal/runtests.jl @@ -11,3 +11,7 @@ include("test_utils.jl") @testset "Basic" begin include("basic.jl") end + +@testset "get_device" begin + include("get_device.jl") +end diff --git a/test/runtests.jl b/test/runtests.jl index 328c64e816..342f00a06d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -62,21 +62,14 @@ Random.seed!(0) using CUDA, cuDNN Flux.gpu_backend!("CUDA") - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]]) <: Flux.FluxCUDADevice - device = Flux.get_device() - @testset "CUDA" begin - if CUDA.functional() - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]].deviceID) <: CUDA.CuDevice - @test typeof(device) <: Flux.FluxCUDADevice - @test typeof(device.deviceID) <: CUDA.CuDevice - @test Flux._get_device_name(device) in Flux.supported_devices() + include("ext_cuda/device_selection.jl") + if CUDA.functional() @info "Testing CUDA Support" include("ext_cuda/runtests.jl") else @warn "CUDA.jl package is not functional. Skipping CUDA tests." - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]].deviceID) <: Nothing end end else @@ -87,21 +80,14 @@ Random.seed!(0) using AMDGPU Flux.gpu_backend!("AMD") - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]]) <: Flux.FluxAMDDevice - device = Flux.get_device() + include("ext_amdgpu/device_selection.jl") if AMDGPU.functional() && AMDGPU.functional(:MIOpen) - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]].deviceID) <: AMDGPU.HIPDevice - @test typeof(device) <: Flux.FluxAMDDevice - @test typeof(device.deviceID) <: AMDGPU.HIPDevice - @test Flux._get_device_name(device) in Flux.supported_devices() - @testset "AMDGPU" begin include("ext_amdgpu/runtests.jl") end else @info "AMDGPU.jl package is not functional. Skipping AMDGPU tests." - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]].deviceID) <: nothing end else @info "Skipping AMDGPU tests, set FLUX_TEST_AMDGPU=true to run them." @@ -111,21 +97,14 @@ Random.seed!(0) using Metal Flux.gpu_backend!("Metal") - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]]) <: Flux.FluxMetalDevice - device = Flux.get_device() + include("ext_metal/device_selection.jl") if Metal.functional() - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]].deviceID) <: Metal.MTLDevice - @test typeof(device) <: Flux.FluxMetalDevice - @test typeof(device.deviceID) <: Metal.MTLDevice - @test Flux._get_device_name(device) in Flux.supported_devices() - @testset "Metal" begin include("ext_metal/runtests.jl") end else @info "Metal.jl package is not functional. Skipping Metal tests." - @test typeof(Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]].deviceID) <: Nothing end else @info "Skipping Metal tests, set FLUX_TEST_METAL=true to run them." From b07985bf30dc952580777f3c7cafb9ec4ac4b5e6 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Wed, 2 Aug 2023 23:37:18 +0530 Subject: [PATCH 22/22] Moving all device tests in single file per extension. --- test/ext_amdgpu/device_selection.jl | 10 ---------- test/ext_amdgpu/get_device.jl | 11 ----------- test/ext_amdgpu/get_devices.jl | 24 ++++++++++++++++++++++++ test/ext_amdgpu/runtests.jl | 4 ---- test/ext_cuda/device_selection.jl | 10 ---------- test/ext_cuda/get_device.jl | 11 ----------- test/ext_cuda/get_devices.jl | 25 +++++++++++++++++++++++++ test/ext_cuda/runtests.jl | 3 --- test/ext_metal/device_selection.jl | 10 ---------- test/ext_metal/get_device.jl | 11 ----------- test/ext_metal/get_devices.jl | 25 +++++++++++++++++++++++++ test/ext_metal/runtests.jl | 4 ---- test/runtests.jl | 6 +++--- 13 files changed, 77 insertions(+), 77 deletions(-) delete mode 100644 test/ext_amdgpu/device_selection.jl delete mode 100644 test/ext_amdgpu/get_device.jl create mode 100644 test/ext_amdgpu/get_devices.jl delete mode 100644 test/ext_cuda/device_selection.jl delete mode 100644 test/ext_cuda/get_device.jl create mode 100644 test/ext_cuda/get_devices.jl delete mode 100644 test/ext_metal/device_selection.jl delete mode 100644 test/ext_metal/get_device.jl create mode 100644 test/ext_metal/get_devices.jl diff --git a/test/ext_amdgpu/device_selection.jl b/test/ext_amdgpu/device_selection.jl deleted file mode 100644 index 107700de26..0000000000 --- a/test/ext_amdgpu/device_selection.jl +++ /dev/null @@ -1,10 +0,0 @@ -amd_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]] - -# should pass, whether or not AMDGPU is functional -@test typeof(amd_device) <: Flux.FluxAMDDevice - -if AMDGPU.functional() - @test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice -else - @test typeof(amd_device.deviceID) <: Nothing -end diff --git a/test/ext_amdgpu/get_device.jl b/test/ext_amdgpu/get_device.jl deleted file mode 100644 index 72b555f134..0000000000 --- a/test/ext_amdgpu/get_device.jl +++ /dev/null @@ -1,11 +0,0 @@ -device = Flux.get_device() - -@test typeof(device) <: Flux.FluxAMDDevice -@test typeof(device.deviceID) <: AMDGPU.HIPDevice -@test Flux._get_device_name(device) in Flux.supported_devices() - -# correctness of data transfer -x = randn(5, 5) -cx = x |> device -@test cx isa AMDGPU.ROCArray -@test AMDGPU.device_id(AMDGPU.device(cx)) == AMDGPU.device_id(device.deviceID) diff --git a/test/ext_amdgpu/get_devices.jl b/test/ext_amdgpu/get_devices.jl new file mode 100644 index 0000000000..7691241f38 --- /dev/null +++ b/test/ext_amdgpu/get_devices.jl @@ -0,0 +1,24 @@ +amd_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["AMD"]] + +# should pass, whether or not AMDGPU is functional +@test typeof(amd_device) <: Flux.FluxAMDDevice + +if AMDGPU.functional() + @test typeof(amd_device.deviceID) <: AMDGPU.HIPDevice +else + @test typeof(amd_device.deviceID) <: Nothing +end + +if AMDGPU.functional() && AMDGPU.functional(:MIOpen) + device = Flux.get_device() + + @test typeof(device) <: Flux.FluxAMDDevice + @test typeof(device.deviceID) <: AMDGPU.HIPDevice + @test Flux._get_device_name(device) in Flux.supported_devices() + + # correctness of data transfer + x = randn(5, 5) + cx = x |> device + @test cx isa AMDGPU.ROCArray + @test AMDGPU.device_id(AMDGPU.device(cx)) == AMDGPU.device_id(device.deviceID) +end diff --git a/test/ext_amdgpu/runtests.jl b/test/ext_amdgpu/runtests.jl index 985123c603..ec4f04663f 100644 --- a/test/ext_amdgpu/runtests.jl +++ b/test/ext_amdgpu/runtests.jl @@ -8,7 +8,3 @@ include("test_utils.jl") @testset "Basic" begin include("basic.jl") end - -@testset "get_device" begin - include("get_device.jl") -end diff --git a/test/ext_cuda/device_selection.jl b/test/ext_cuda/device_selection.jl deleted file mode 100644 index 14444b8e5a..0000000000 --- a/test/ext_cuda/device_selection.jl +++ /dev/null @@ -1,10 +0,0 @@ -cuda_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]] - -# should pass, whether or not CUDA is functional -@test typeof(cuda_device) <: Flux.FluxCUDADevice - -if CUDA.functional() - @test typeof(cuda_device.deviceID) <: CUDA.CuDevice -else - @test typeof(cuda_device.deviceID) <: Nothing -end diff --git a/test/ext_cuda/get_device.jl b/test/ext_cuda/get_device.jl deleted file mode 100644 index 262511823d..0000000000 --- a/test/ext_cuda/get_device.jl +++ /dev/null @@ -1,11 +0,0 @@ -device = Flux.get_device() - -@test typeof(device) <: Flux.FluxCUDADevice -@test typeof(device.deviceID) <: CUDA.CuDevice -@test Flux._get_device_name(device) in Flux.supported_devices() - -# correctness of data transfer -x = randn(5, 5) -cx = x |> device -@test cx isa CUDA.CuArray -@test CUDA.device(cx).handle == device.deviceID.handle diff --git a/test/ext_cuda/get_devices.jl b/test/ext_cuda/get_devices.jl new file mode 100644 index 0000000000..e1f4c7d8a8 --- /dev/null +++ b/test/ext_cuda/get_devices.jl @@ -0,0 +1,25 @@ +cuda_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["CUDA"]] + +# should pass, whether or not CUDA is functional +@test typeof(cuda_device) <: Flux.FluxCUDADevice + +if CUDA.functional() + @test typeof(cuda_device.deviceID) <: CUDA.CuDevice +else + @test typeof(cuda_device.deviceID) <: Nothing +end + +# testing get_device +if CUDA.functional() + device = Flux.get_device() + + @test typeof(device) <: Flux.FluxCUDADevice + @test typeof(device.deviceID) <: CUDA.CuDevice + @test Flux._get_device_name(device) in Flux.supported_devices() + + # correctness of data transfer + x = randn(5, 5) + cx = x |> device + @test cx isa CUDA.CuArray + @test CUDA.device(cx).handle == device.deviceID.handle +end diff --git a/test/ext_cuda/runtests.jl b/test/ext_cuda/runtests.jl index 586eef6f57..65dc51dbb0 100644 --- a/test/ext_cuda/runtests.jl +++ b/test/ext_cuda/runtests.jl @@ -28,6 +28,3 @@ end @testset "ctc" begin include("ctc.jl") end -@testset "get_device" begin - include("get_device.jl") -end diff --git a/test/ext_metal/device_selection.jl b/test/ext_metal/device_selection.jl deleted file mode 100644 index 44a0a19282..0000000000 --- a/test/ext_metal/device_selection.jl +++ /dev/null @@ -1,10 +0,0 @@ -metal_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]] - -# should pass, whether or not Metal is functional -@test typeof(metal_device) <: Flux.FluxMetalDevice - -if Metal.functional() - @test typeof(metal_device.deviceID) <: Metal.MTLDevice -else - @test typeof(metal_device.deviceID) <: Nothing -end diff --git a/test/ext_metal/get_device.jl b/test/ext_metal/get_device.jl deleted file mode 100644 index bf2a52eee2..0000000000 --- a/test/ext_metal/get_device.jl +++ /dev/null @@ -1,11 +0,0 @@ -device = Flux.get_device() - -@test typeof(device) <: Flux.FluxMetalDevice -@test typeof(device.deviceID) <: Metal.MTLDevice -@test Flux._get_device_name(device) in Flux.supported_devices() - -# correctness of data transfer -x = randn(5, 5) -cx = x |> device -@test cx isa Metal.MtlArray -@test Metal.device(cx).registryID == device.deviceID.registryID diff --git a/test/ext_metal/get_devices.jl b/test/ext_metal/get_devices.jl new file mode 100644 index 0000000000..83786e9834 --- /dev/null +++ b/test/ext_metal/get_devices.jl @@ -0,0 +1,25 @@ +metal_device = Flux.DEVICES[][Flux.GPU_BACKEND_ORDER["Metal"]] + +# should pass, whether or not Metal is functional +@test typeof(metal_device) <: Flux.FluxMetalDevice + +if Metal.functional() + @test typeof(metal_device.deviceID) <: Metal.MTLDevice +else + @test typeof(metal_device.deviceID) <: Nothing +end + +# testing get_device +if Metal.functional() + device = Flux.get_device() + + @test typeof(device) <: Flux.FluxMetalDevice + @test typeof(device.deviceID) <: Metal.MTLDevice + @test Flux._get_device_name(device) in Flux.supported_devices() + + # correctness of data transfer + x = randn(5, 5) + cx = x |> device + @test cx isa Metal.MtlArray + @test Metal.device(cx).registryID == device.deviceID.registryID +end diff --git a/test/ext_metal/runtests.jl b/test/ext_metal/runtests.jl index e9e07053d8..e6ca64508b 100644 --- a/test/ext_metal/runtests.jl +++ b/test/ext_metal/runtests.jl @@ -11,7 +11,3 @@ include("test_utils.jl") @testset "Basic" begin include("basic.jl") end - -@testset "get_device" begin - include("get_device.jl") -end diff --git a/test/runtests.jl b/test/runtests.jl index 342f00a06d..539586f4c7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -63,7 +63,7 @@ Random.seed!(0) Flux.gpu_backend!("CUDA") @testset "CUDA" begin - include("ext_cuda/device_selection.jl") + include("ext_cuda/get_devices.jl") if CUDA.functional() @info "Testing CUDA Support" @@ -80,7 +80,7 @@ Random.seed!(0) using AMDGPU Flux.gpu_backend!("AMD") - include("ext_amdgpu/device_selection.jl") + include("ext_amdgpu/get_devices.jl") if AMDGPU.functional() && AMDGPU.functional(:MIOpen) @testset "AMDGPU" begin @@ -97,7 +97,7 @@ Random.seed!(0) using Metal Flux.gpu_backend!("Metal") - include("ext_metal/device_selection.jl") + include("ext_metal/get_devices.jl") if Metal.functional() @testset "Metal" begin