Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding device objects for selecting GPU backends (and defaulting to CPU if none exists). #2297

Merged
merged 22 commits into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5d4f2a2
Adding structs for cpu and gpu devices.
codetalker7 Jul 22, 2023
70044fb
Adding implementation of `Flux.get_device()`, which returns the most
codetalker7 Jul 22, 2023
0dc5629
Adding docstrings for the new device types, and the `get_device` func…
codetalker7 Jul 23, 2023
a3f9257
Adding `CPU` to the list of supported backends. Made corresponding
codetalker7 Jul 23, 2023
18938de
Using `julia-repl` instead of `jldoctest`, and `@info` instead of `@w…
codetalker7 Jul 23, 2023
bf134ad
Adding `DataLoader` functionality to device objects.
codetalker7 Jul 23, 2023
f8fc22c
Removing pkgids and defining new functions to check whether backend is
codetalker7 Jul 25, 2023
3cd1d89
Correcting typographical errors, and removing useless imports.
codetalker7 Jul 25, 2023
f7f21e1
Adding `deviceID` to each device struct, and moving struct definitions
codetalker7 Jul 27, 2023
d22aaf5
Adding tutorial for using device objects in manual.
codetalker7 Jul 30, 2023
03faa96
Adding docstring for `get_device` in manual, and renaming internal
codetalker7 Jul 31, 2023
e1ad3e7
Minor change in docs.
codetalker7 Jul 31, 2023
179bbea
Removing structs from package extensions as it is bad practice.
codetalker7 Jul 31, 2023
bb67ad6
Adding more docstrings in manual.
codetalker7 Jul 31, 2023
7be1700
Removing redundant log messages.
codetalker7 Jul 31, 2023
7558d29
Adding kwarg to `get_device` for verbose output.
codetalker7 Jul 31, 2023
95e3bc3
Setting `deviceID` to `nothing` if GPU is not functional.
codetalker7 Jul 31, 2023
40b1fe2
Adding basic tests for device objects.
codetalker7 Jul 31, 2023
df70154
Fixing minor errors in package extensions and tests.
codetalker7 Jul 31, 2023
650a273
Minor fix in tests + docs.
codetalker7 Jul 31, 2023
1495e04
Moving device tests to extensions, and adding a basic data transfer
codetalker7 Aug 2, 2023
b07985b
Moving all device tests in single file per extension.
codetalker7 Aug 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ext/FluxAMDGPUExt/FluxAMDGPUExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ const MIOPENFloat = AMDGPU.MIOpen.MIOPENFloat
# Set to boolean on the first call to check_use_amdgpu
const USE_AMDGPU = Ref{Union{Nothing, Bool}}(nothing)

Flux.isavailable(device::Flux.FluxAMDDevice) = true
Flux.isfunctional(device::Flux.FluxAMDDevice) = AMDGPU.functional()

function check_use_amdgpu()
if !isnothing(USE_AMDGPU[])
return
Expand Down
3 changes: 3 additions & 0 deletions ext/FluxCUDAExt/FluxCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ import Adapt: adapt_storage

const USE_CUDA = Ref{Union{Nothing, Bool}}(nothing)

Flux.isavailable(device::Flux.FluxCUDADevice) = true
Flux.isfunctional(device::Flux.FluxCUDADevice) = CUDA.functional()

function check_use_cuda()
if !isnothing(USE_CUDA[])
return
Expand Down
3 changes: 3 additions & 0 deletions ext/FluxMetalExt/FluxMetalExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ using Zygote

const USE_METAL = Ref{Union{Nothing, Bool}}(nothing)

Flux.isavailable(device::Flux.FluxMetalDevice) = true
Flux.isfunctional(device::Flux.FluxMetalDevice) = Metal.functional()

function check_use_metal()
isnothing(USE_METAL[]) || return

Expand Down
206 changes: 205 additions & 1 deletion src/functor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ _isbitsarray(x) = false
_isleaf(::AbstractRNG) = true
_isleaf(x) = _isbitsarray(x) || Functors.isleaf(x)

const GPU_BACKENDS = ("CUDA", "AMD", "Metal")
const GPU_BACKENDS = ("CUDA", "AMD", "Metal", "CPU")
const GPU_BACKEND = @load_preference("gpu_backend", "CUDA")

function gpu_backend!(backend::String)
Expand Down Expand Up @@ -249,6 +249,8 @@ function gpu(x)
gpu(FluxAMDAdaptor(), x)
elseif GPU_BACKEND == "Metal"
gpu(FluxMetalAdaptor(), x)
elseif GPU_BACKEND == "CPU"
cpu(x)
else
error("""
Unsupported GPU backend: $GPU_BACKEND.
Expand Down Expand Up @@ -444,3 +446,205 @@ function gpu(d::MLUtils.DataLoader)
d.rng,
)
end

"""
Flux.AbstractDevice <: Function

An abstract type representing `device` objects for different GPU backends. The currently supported backends are `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"`; the `"CPU"` backend is the fallback case when no GPU is available.
"""
abstract type AbstractDevice <: Function end

"""
Flux.FluxCPUDevice <: Flux.AbstractDevice

A type representing `device` objects for the `"CPU"` backend for Flux. This is the fallback case when no GPU is available to Flux.
"""
Base.@kwdef struct FluxCPUDevice <: AbstractDevice
name::String = "CPU"
end

"""
Flux.FluxCUDADevice <: Flux.AbstractDevice

A type representing `device` objects for the `"CUDA"` backend for Flux.
"""
Base.@kwdef struct FluxCUDADevice <: AbstractDevice
name::String = "CUDA"
end

"""
Flux.FluxAMDDevice <: Flux.AbstractDevice

A type representing `device` objects for the `"AMD"` backend for Flux.
"""
Base.@kwdef struct FluxAMDDevice <: AbstractDevice
name::String = "AMD"
end

"""
Flux.FluxMetalDevice <: Flux.AbstractDevice

A type representing `device` objects for the `"Metal"` backend for Flux.
"""
Base.@kwdef struct FluxMetalDevice <: AbstractDevice
name::String = "Metal"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use dispatch to get these fixed names and use the fields to instead store info about the actual device? e.g. ordinal number or wrapping the actual device type(s) from each GPU package.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'll try to add this to the structs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @ToucheSir. I've added a deviceID to each device struct, whose type is the device type from the corresponding GPU package. Since KernelAbstractions or GPUArrays doesn't have any type hierarchy for device objects, I've moved the struct definitions to the package extensions. The device types are CUDA.CuDevice, AMDGPU.HIPDevice and Metal.MTLDevice respectively.

One disadvantage of this approach: from what I understand, Flux leaves the work of managing devices to the GPU packages. So, if the user chooses to switch a device by using functions from the GPU package, then our device object will also have to be updated (which currently isn't the case). But if users of Flux don't care about what device is allocated to them, I think this works fine.

What do you think about this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my mind, the whole point of calling this a device instead of a backend is that we'd allow users to choose which device they want their model to be transferred onto. If that's not feasible because of limitations in the way GPU packages must be used, I'd rather just call these backends instead. Others might have differing opinions on this, however, cc @CarloLucibello from earlier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my mind, the whole point of calling this a device instead of a backend is that we'd allow users to choose which device they want their model to be transferred onto. If that's not feasible because of limitations in the way GPU packages must be used, I'd rather just call these backends instead. Others might have differing opinions on this, however, cc @CarloLucibello from earlier.

Yes, I agree. Also, if a user wants to have finer control over which device they want to use, isn't it better for them to just rely on CUDA.jl for example?

If not, I think it won't be hard to add a device selection capability within Flux as well. But ultimately, we will be calling functions from GPU packages, which the user can just call themselves.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. I'm fine with either; also, if we are to implement an interface for handling multiple devices, wouldn't it be a good idea to first discuss the overall API we want, and the specific implementation details we need (asking because I'm not completely aware of what all I'll have to implement to handle multiple devices)?

For instance, when we are talking about "multiple devices", do we mean providing the user the functionality to use "just one device", but have the ability to choose which one? Or do we mean using multiple devices simultaneously to train models? For the latter I was going through DaggerFlux.jl and it seems it's more non-trivial. The first idea seems easier to implement.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somewhere in the middle I think. Training on multiple GPUs is out of scope for this PR (we have other efforts looking into that), but allowing users to transfer models to any active GPU without calling device! beforehand every time would be great for ergonomics.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somewhere in the middle I think. Training on multiple GPUs is out of scope for this PR (we have other efforts looking into that), but allowing users to transfer models to any active GPU without calling device! beforehand every time would be great for ergonomics.

Sure, I think this shouldn't be too hard to implement. I have one idea for this.

Device methods

We will have the following methods:

function get_device()
    # this will be what we have right now
    # this returns an `AbstractDevice` whose deviceID
    # is the device with which the GPU package has been
    # initialized automatically
end

function get_device(backend::Type{<:KA.GPU}, ordinal::UInt)
    # this will return an `AbstractDevice` from the given backend whose deviceID
    # is the device with the given ordinal number. These methods will be defined
    # in the corresponding package extensions.
end

With these functions, users can then specify the backend + ordinal of the GPU device which they want to work with.

Model transfer between devices

Next, suppose we have a model m which is bound to an AbstractDevice, say device1, which has a backend1::Type{<:KA.GPU} and an ordinal1::UInt. Suppose device2 is another device object with backend2::Type{<:KA.GPU} and ordinal2::UInt.

Then, a call to device2(m) will do the following: if backend1 == backend2 and ordinal1 == ordinal2, then nothing happens and m is returned. Otherwise, device1 is "freed" of m (we'll have to do some device memory management here) and is bound to device2.

In the above, the tricky part is how to identify the GPU backend + ordinal which m is bound to, and how to do free the memory taken by m on the device. For simple models like Dense, I can do the following

# suppose the backend is CUDA
julia> using Flux, CUDA;

julia> m = Dense(2 => 3) |> gpu;

julia> CUDA.device(m.weight)    # this gives me the device to which m is bound
CuDevice(0): NVIDIA GeForce GTX 1650

julia> CUDA.unsafe_free!(m.weight) ;   # just an idea, but something similar

Now clearly, I can't do something similar if m is a complex model. So we'll probably have to add some property to models which stores the device backend + ordinal to which they are bound.

Regarding the freeing the GPU device memory: for CUDA for example, we can probably use the CUDA.unsafe_free! method. But it might be unsafe for a reason.

How does this idea sound, @ToucheSir @CarloLucibello? Any pointers/suggestions on how to track which device a model is bound to and how to do the memory management?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the actual data movement adaptors (e.g. FluxCUDAAdaptor) receives the device ID as an argument, then you only need to apply your detect + free logic at the level of individual parameters. fmap will take care of mapping the logic over a complex model.

In the simple case we are talking about, every parameter in the model should be bound to the same device. In general, model parallelism means that a model could be across multiple devices.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the only thing we need to worry about is "can I move this array to this device the user asked for?" Which sounds simple but might be tricky in practice if the GPU packages don't provide a way to do that directly. I hope here's a relatively straightforward way for most of them, but if not we can save that for future work and/or bug upstream to add it in for us :)

end

(::FluxCPUDevice)(x) = cpu(x)
(::FluxCUDADevice)(x) = gpu(FluxCUDAAdaptor(), x)
(::FluxAMDDevice)(x) = gpu(FluxAMDAdaptor(), x)
(::FluxMetalDevice)(x) = gpu(FluxMetalAdaptor(), x)

# Applying device to DataLoader
function _apply_to_dataloader(device::T, d::MLUtils.DataLoader) where {T <: AbstractDevice}
MLUtils.DataLoader(MLUtils.mapobs(device, d.data),
d.batchsize,
d.buffer,
d.partial,
d.shuffle,
d.parallel,
d.collate,
d.rng,
)
end

(device::FluxCPUDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d)
(device::FluxCUDADevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d)
(device::FluxAMDDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d)
(device::FluxMetalDevice)(d::MLUtils.DataLoader) = _apply_to_dataloader(device, d)
function _get_device_name(t::T) where {T <: AbstractDevice}
return hasfield(T, :name) ? t.name : ""
end

## check device availability; more definitions in corresponding extensions
isavailable(device::AbstractDevice) = false
isfunctional(device::AbstractDevice) = false

isavailable(device::FluxCPUDevice) = true
isfunctional(device::FluxCPUDevice) = true

# below order is important
const DEVICES = (FluxCUDADevice(), FluxAMDDevice(), FluxMetalDevice(), FluxCPUDevice())

"""
Flux.supported_devices()

Get all supported backends for Flux, in order of preference.

# Example

```jldoctest
julia> using Flux;

julia> Flux.supported_devices()
("CUDA", "AMD", "Metal", "CPU")
```
"""
supported_devices() = map(_get_device_name, DEVICES)

"""
Flux.get_device()::AbstractDevice

Returns a `device` object for the most appropriate backend for the current Julia session.

First, the function checks whether a backend preference has been set via the `gpu_backend!` function. If so, an attempt is made to load this backend. If the corresponding trigger package has been loaded and the backend is functional, a `device` corresponding to the given backend is loaded. Otherwise, the backend is chosen automatically. To update the backend preference, use [gpu_backend!](@ref).

If there is no preference, then for each of the `"CUDA"`, `"AMD"`, `"Metal"` and `"CPU"` backends in the given order, this function checks whether the given backend has been loaded via the corresponding trigger package, and whether the backend is functional. If so, the `device` corresponding to the backend is returned. If no GPU backend is available, a `Flux.FluxCPUDevice` is returned.

# Examples
For the example given below, the backend preference was set to `"AMD"` via the [`gpu_backend!`](@ref) function.

```julia-repl
julia> using Flux;

julia> model = Dense(2 => 3)
Dense(2 => 3) # 9 parameters

julia> device = Flux.get_device() # this will just load the CPU device
[ Info: Using backend set in preferences: AMD.
┌ Warning: Trying to use backend AMD but package AMDGPU [21141c5a-9bdb-4563-92ae-f87d6854732e] is not loaded.
│ Please load the package and call this function again to respect the preferences backend.
└ @ Flux ~/fluxml/Flux.jl/src/functor.jl:496
[ Info: Running automatic device selection...
(::Flux.FluxCPUDevice) (generic function with 1 method)

julia> model = model |> device
Dense(2 => 3) # 9 parameters

julia> model.weight
3×2 Matrix{Float32}:
-0.304362 -0.700477
-0.861201 0.67825
-0.176017 0.234188
```

Here is the same example, but using `"CUDA"`:

```julia-repl
julia> using Flux, CUDA;

julia> model = Dense(2 => 3)
Dense(2 => 3) # 9 parameters

julia> device = Flux.get_device()
[ Info: Using backend set in preferences: AMD.
┌ Warning: Trying to use backend AMD but package AMDGPU [21141c5a-9bdb-4563-92ae-f87d6854732e] is not loaded.
│ Please load the package and call this function again to respect the preferences backend.
└ @ Flux ~/fluxml/Flux.jl/src/functor.jl:496
[ Info: Running automatic device selection...
(::Flux.FluxCUDADevice) (generic function with 1 method)

julia> model = model |> device
Dense(2 => 3) # 9 parameters

julia> model.weight
3×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
0.820013 0.527131
-0.915589 0.549048
0.290744 -0.0592499
```
"""
function get_device()::AbstractDevice
backend = @load_preference("gpu_backend", nothing)
if backend !== nothing
allowed_backends = supported_devices()
idx = findfirst(isequal(backend), allowed_backends)
if backend ∉ allowed_backends
@warn """
`gpu_backend` preference is set to $backend, which is not allowed.
Defaulting to automatic device selection.
""" maxlog=1
else
@info "Using backend set in preferences: $backend."
device = DEVICES[idx]

if !isavailable(device)
@warn """
Trying to use backend $(_get_device_name(device)) but it's trigger package is not loaded.
Please load the package and call this function again to respect the preferences backend.
""" maxlog=1
else
if isfunctional(device)
@info "Using backend: $(_get_device_name(device))"
return device
else
@warn "Backend: $(_get_device_name(device)) from the set preferences is not functional. Defaulting to autmatic device selection." maxlog=1
end
end
end
end

@info "Running automatic device selection..."
for device in DEVICES
if isavailable(device)
@info "Trying backend: $(_get_device_name(device))."
if isfunctional(device)
@debug "Using backend: $(_get_device_name(device))."
return device
end
@info "Backend: $(_get_device_name(device)) is not functional."
else
@info "Trigger package for backend ($(_get_device_name(device))) is not loaded."
end
end
end
Loading