Skip to content

Commit

Permalink
simplify test machinery (#2498)
Browse files Browse the repository at this point in the history
* simplify test machinery
  • Loading branch information
CarloLucibello authored Oct 13, 2024
1 parent 09a16ee commit 35b893a
Show file tree
Hide file tree
Showing 19 changed files with 153 additions and 273 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Manifest.toml
LocalPreferences.toml
.DS_Store
docs/mymodel.bson
prova.jl
2 changes: 1 addition & 1 deletion src/distributed/public_api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ Backend Agnostic API to perform an allreduce operation on the given buffer `send
workers.
"""
function allreduce!(backend::AbstractFluxDistributedBackend, sendrecvbuf, op::F) where {F}
return __allreduce!(backend, sendrecvbuf, op, get_device())
return __allreduce!(backend, sendrecvbuf, op, gpu_device())
end

function allreduce!(
Expand Down
2 changes: 1 addition & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
[compat]
FiniteDifferences = "0.12"
Tracker = "0.2.33"
Enzyme = "0.12.4"
Enzyme = "0.13"
41 changes: 21 additions & 20 deletions test/ext_amdgpu/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,27 @@ end
end

@testset "Chain of Dense layers" begin
m = Chain(Dense(10, 5, tanh), Dense(5, 2), softmax) |> f32
m = Chain(Dense(10, 5, tanh), Dense(5, 2), softmax)
x = rand(Float32, 10, 10)
gpu_autodiff_test(m, x)
test_gradients(m, x, test_gpu=true, compare_finite_diff=false)
end

@testset "Convolution" begin
for conv_type in (Conv, ConvTranspose), nd in 1:3
m = conv_type(tuple(fill(2, nd)...), 3 => 4) |> f32
m = conv_type(tuple(fill(2, nd)...), 3 => 4)
x = rand(Float32, fill(10, nd)..., 3, 5)

md, xd = Flux.gpu.((m, x))
y = m(x)
# Ensure outputs are the same.
gpu_autodiff_test(m, x; atol=1f-3, checkgrad=false)
@test collect(md(xd)) y atol=1f-3

# Gradients are flipped as well.
md, xd = Flux.gpu.((m, x))
gs = gradient(m -> sum(m(x)), m)
gsd = gradient(m -> sum(m(xd)), md)
gs = gradient(m -> sum(m(x)), m)[1]
gsd = gradient(m -> sum(m(xd)), md)[1]

dims = ntuple(i -> i, ndims(m.weight) - 2)
@test reverse(gs[1].weight; dims) Array(gsd[1].weight) atol=1f-2
@test reverse(gs.weight; dims) Array(gsd.weight) atol=1f-2

# Movement back to CPU flips weights back.
mh = Flux.cpu(md)
Expand All @@ -52,10 +53,10 @@ end
x = rand(Float32, fill(10, nd)..., 3, 5) |> gpu

pad = ntuple(i -> i, nd)
m = conv_type(kernel, 3 => 4, pad=pad) |> f32 |> gpu
m = conv_type(kernel, 3 => 4, pad=pad) |> gpu

expanded_pad = ntuple(i -> pad[(i - 1) ÷ 2 + 1], 2 * nd)
m_expanded = conv_type(kernel, 3 => 4, pad=expanded_pad) |> f32 |> gpu
m_expanded = conv_type(kernel, 3 => 4, pad=expanded_pad) |> gpu

@test size(m(x)) == size(m_expanded(x))
end
Expand All @@ -74,25 +75,25 @@ end
end

@testset "Chain(Conv)" begin
m = Chain(Conv((3, 3), 3 => 3)) |> f32
x = rand(Float32, 10, 10, 3, 2)
gpu_autodiff_test(m, x; atol=1f-3, checkgrad=false)
m = Chain(Conv((3, 3), 3 => 3))
x = rand(Float32, 5, 5, 3, 2)
test_gradients(m, x, test_gpu=true, compare_finite_diff=false, test_grad_f=false)

md = m |> gpu |> cpu
@test md[1].weight m[1].weight atol=1f-3

m = Chain(ConvTranspose((3, 3), 3 => 3)) |> f32
x = rand(Float32, 10, 10, 3, 2)
gpu_autodiff_test(m, x; atol=1f-3, checkgrad=false)
m = Chain(ConvTranspose((3, 3), 3 => 3))
x = rand(Float32, 5, 5, 3, 2)
test_gradients(m, x, test_gpu=true, compare_finite_diff=false, test_grad_f=false)

md = m |> gpu |> cpu
@test md[1].weight m[1].weight atol=1f-3
end

@testset "Cross-correlation" begin
m = CrossCor((2, 2), 3 => 4) |> f32
x = rand(Float32, 10, 10, 3, 2)
gpu_autodiff_test(m, x; atol=1f-3)
m = CrossCor((2, 2), 3 => 4)
x = rand(Float32, 5, 5, 3, 2)
test_gradients(m, x, test_gpu=true, compare_finite_diff=false)
end

@testset "Restructure" begin
Expand Down Expand Up @@ -132,7 +133,7 @@ end
bn = BatchNorm(3, σ)
for nd in 1:3
x = rand(Float32, fill(2, nd - 1)..., 3, 4)
gpu_autodiff_test(bn, x; atol=1f-3, allow_nothing=true)
test_gradients(bn, x; test_gpu=true, compare_finite_diff=false)
end
end

Expand Down
4 changes: 2 additions & 2 deletions test/ext_amdgpu/get_devices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ x = randn(Float32, 5, 5)
cx = x |> amdgpu_device
@test cx isa AMDGPU.ROCArray

# moving models to specific NVIDIA devices
# moving models to specific AMDGPU devices
for id in 0:(length(AMDGPU.devices()) - 1)
current_amdgpu_device = Flux.get_device("AMDGPU", id)
current_amdgpu_device = gpu_device(id+1)

global dense_model = dense_model |> current_amdgpu_device
@test dense_model.weight isa AMDGPU.ROCArray
Expand Down
3 changes: 0 additions & 3 deletions test/ext_amdgpu/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
@assert AMDGPU.functional()
AMDGPU.allowscalar(false)

include("../test_utils.jl")
include("test_utils.jl")

@testset "get_devices" begin
include("get_devices.jl")
end
Expand Down
15 changes: 0 additions & 15 deletions test/ext_amdgpu/test_utils.jl

This file was deleted.

9 changes: 6 additions & 3 deletions test/ext_cuda/get_devices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ dense_model = Dense(2 => 3) # initially lives on CPU
weight = copy(dense_model.weight) # store the weight
bias = copy(dense_model.bias) # store the bias

cuda_device = Flux.get_device()

@test typeof(cuda_device) <: Flux.CUDADevice

# correctness of data transfer
x = randn(5, 5)
Expand All @@ -30,6 +27,12 @@ for id in 0:(length(CUDA.devices()) - 1)
@test isequal(Flux.cpu(dense_model.weight), weight)
@test isequal(Flux.cpu(dense_model.bias), bias)
end

# gpu_device remembers the last device selected
# Therefore, we need to reset it to the current cuda device
@test gpu_device().device.handle == length(CUDA.devices()) - 1
gpu_device(CUDA.device().handle + 1)

# finally move to CPU, and see if things work
cdev = cpu_device()
dense_model = cdev(dense_model)
Expand Down
126 changes: 32 additions & 94 deletions test/ext_cuda/layers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,73 +10,23 @@
@test gradient(x -> sum(cpu(x)), gpu(rand(3,3))) isa Tuple
end

# TODO: These layers get into scalar indexing issues.
const BROKEN_LAYERS = Union{}

const ACTIVATIONS = [identity, relu, tanh,
sigmoid, exp, softplus,
elu, selu]
const ACTIVATIONS = [identity, tanh]

function gpu_gradtest(name::String, layers::Vector, x_cpu = nothing, args...; test_cpu = true, test_mode = false)
isnothing(x_cpu) && error("Missing input to test the layers against.")
function gpu_gradtest(name::String, layers::Vector, x_cpu, args...;
test_mode=false, test_grad_x=true,
atol=1e-4, rtol=1e-4)
@testset "$name GPU grad tests" begin
for layer in layers
@testset "$layer Layer GPU grad test" begin

# compute output and grad of parameters
l_cpu = layer(args...)
l_gpu = l_cpu |> gpu
if test_mode
testmode!(l_cpu)
testmode!(l_gpu)
end

ps_cpu = Flux.params(l_cpu)
y_cpu, back_cpu = pullback(() -> sum(l_cpu(x_cpu)), ps_cpu)
gs_cpu = back_cpu(1f0)

x_gpu = gpu(x_cpu)
ps_gpu = Flux.params(l_gpu)

if typeof(l_gpu) <: BROKEN_LAYERS
@test_broken gradient(() -> sum(l_gpu(x_gpu)), ps_gpu) isa Flux.Zygote.Grads
else
y_gpu, back_gpu = pullback(() -> sum(l_gpu(x_gpu)), ps_gpu)
gs_gpu = back_gpu(1f0) # TODO many layers error out when backprop int 1, should fix

# compute grad of input
xg_cpu = gradient(x -> sum(l_cpu(x)), x_cpu)[1]
xg_gpu = gradient(x -> sum(l_gpu(x)), x_gpu)[1]

# test
if test_cpu
if layer === GroupedConvTranspose
@test y_gpu y_cpu rtol=1f-2 atol=1f-3
else
@test y_gpu y_cpu rtol=1f-3 atol=1f-3
end
if isnothing(xg_cpu)
@test isnothing(xg_gpu)
else
if layer === GroupedConvTranspose
@test Array(xg_gpu) xg_cpu rtol = 2f-2 atol = 1f-3
else
@test Array(xg_gpu) xg_cpu rtol = 1f-3 atol = 1f-3
end
end
end
@test gs_gpu isa Flux.Zygote.Grads
for (p_cpu, p_gpu) in zip(ps_cpu, ps_gpu)
if isnothing(gs_cpu[p_cpu])
@test isnothing(gs_gpu[p_gpu])
else
@test gs_gpu[p_gpu] isa CuArray
if test_cpu
@test Array(gs_gpu[p_gpu]) gs_cpu[p_cpu] rtol=1f-3 atol=1f-3
end
end
end
end
test_gradients(l_cpu, x_cpu; test_gpu=true, compare_finite_diff=false, test_grad_x, atol, rtol)
end
end
end
Expand All @@ -97,23 +47,24 @@ for act in ACTIVATIONS
ConvTranspose, ConvTransposeNoBias,
CrossCor, CrossCorNoBias,
DepthwiseConv, DepthwiseConvNoBias]
gpu_gradtest("Convolution with $act", conv_layers, r, (2,2), 1=>3, act, test_cpu = false)
gpu_gradtest("Convolution with $act", conv_layers, r, (2,2), 1=>3, act)

groupedconv = [GroupedConv, GroupedConvTranspose]
gpu_gradtest("GroupedConvolution with $act", groupedconv, rand(Float32, 28, 28, 100, 2), (3,3), 100 => 25, act, test_cpu = true)
gpu_gradtest("GroupedConvolution with $act", groupedconv, rand(Float32, 28, 28, 100, 2), (3,3), 100 => 25, act)

batch_norm = [BatchNorm, BatchNormNoTrackStats]
gpu_gradtest("BatchNorm 1 with $act", batch_norm, rand(Float32, 28,28,3,4), 3, act, test_cpu = false) #TODO fix errors
gpu_gradtest("BatchNorm 2 with $act", batch_norm, rand(Float32, 5,4), 5, act, test_cpu = true)
gpu_gradtest("BatchNorm 1 with $act", batch_norm, rand(Float32, 28,28,3,4), 3, act, atol=1e-3)
gpu_gradtest("BatchNorm 2 with $act", batch_norm, rand(Float32, 5,4), 5, act, atol=1e-3)

batch_norm = [BatchNormNoTrackStats]
gpu_gradtest("BatchNorm 3 with $act (test mode)", batch_norm, rand(Float32, 5,4), 5, act, test_cpu = true, test_mode = true)
gpu_gradtest("BatchNorm 3 with $act (test mode)", batch_norm, rand(Float32, 5,4), 5, act,
test_mode=true, atol=1e-3)

instancenorm = [InstanceNorm]
gpu_gradtest("InstanceNorm with $act", instancenorm, r, 1, act, test_cpu = false)
gpu_gradtest("InstanceNorm with $act", instancenorm, r, 1, act)

groupnorm = [GroupNorm]
gpu_gradtest("GroupNorm with $act", groupnorm, rand(Float32, 28,28,3,1), 3, 1, act, test_cpu = false)
gpu_gradtest("GroupNorm with $act", groupnorm, rand(Float32, 28,28,3,1), 3, 1, act)
end

r = rand(Float32, 28, 28, 1, 1)
Expand All @@ -122,13 +73,13 @@ pooling_layers = [MaxPool, MeanPool]
gpu_gradtest("Pooling", pooling_layers, r, (2,2))

adaptive_pooling_layers = [AdaptiveMaxPool, AdaptiveMeanPool]
gpu_gradtest("AdaptivePooling", adaptive_pooling_layers, r, (7,7), test_cpu = false)
gpu_gradtest("AdaptivePooling", adaptive_pooling_layers, r, (7,7))

dropout_layers = [Dropout, AlphaDropout]
gpu_gradtest("Dropout", dropout_layers, r, 0.5f0; test_cpu = false) # dropout is not deterministic
gpu_gradtest("Dropout", dropout_layers, r, 1e-6) # dropout is not deterministic

layer_norm = [LayerNorm]
gpu_gradtest("LayerNorm 1", layer_norm, rand(Float32, 28,28,3,4), 28, test_cpu = false) #TODO fix errors
gpu_gradtest("LayerNorm 1", layer_norm, rand(Float32, 28,28,3,4), 28)
gpu_gradtest("LayerNorm 2", layer_norm, rand(Float32, 5,4), 5)

upsample = [x -> Upsample(scale=x)]
Expand All @@ -140,32 +91,27 @@ gpu_gradtest("PixelShuffle 2d", pixelshuffle, rand(Float32, 3, 4, 18, 3), 3)
gpu_gradtest("PixelShuffle 1d", pixelshuffle, rand(Float32, 3, 18, 3), 3)

embedding = [Flux.Embedding]
gpu_gradtest("Embedding", embedding, [1,3,5], 5, 2)
gpu_gradtest("Embedding repeated indices", embedding, [1,3,5,3], 5, 2)
gpu_gradtest("Embedding integer index", embedding, 1, 5, 2)
gpu_gradtest("Embedding 2d index", embedding, [1 2; 3 4], 5, 2)
gpu_gradtest("Embedding OneHotVec index", embedding, OneHotVector(1, 5), 5, 2)
gpu_gradtest("Embedding OneHotMatrix index", embedding, OneHotMatrix([1,2,3], 5), 5, 2)
gpu_gradtest("Embedding OneHotMatrix repeated indices", embedding, OneHotMatrix([1,2,2], 5), 5, 2)
gpu_gradtest("Embedding", embedding, [1,3,5], 5, 2, test_grad_x=false)
gpu_gradtest("Embedding repeated indices", embedding, [1,3,5,3], 5, 2, test_grad_x=false)
gpu_gradtest("Embedding integer index", embedding, 1, 5, 2, test_grad_x=false)
gpu_gradtest("Embedding 2d index", embedding, [1 2; 3 4], 5, 2, test_grad_x=false)
gpu_gradtest("Embedding OneHotVec index", embedding, OneHotVector(1, 5), 5, 2, test_grad_x=false)
gpu_gradtest("Embedding OneHotMatrix index", embedding, OneHotMatrix([1,2,3], 5), 5, 2, test_grad_x=false)
gpu_gradtest("Embedding OneHotMatrix repeated indices", embedding, OneHotMatrix([1,2,2], 5), 5, 2, test_grad_x=false)

@testset "function layers" begin
x = rand(Float32, 3,3)
gpu_autodiff_test(x -> sum(Flux.normalise(x; dims=1)), x)
gpu_autodiff_test(x -> sum(Flux.normalise(x; dims=2)), x)
gpu_autodiff_test(x -> sum(Flux.normalise(x)), x)
x = rand(Float32, 3, 3)
test_gradients(x -> sum(Flux.normalise(x; dims=1)), x, test_gpu=true, compare_finite_diff=false)
test_gradients(x -> sum(Flux.normalise(x; dims=2)), x, test_gpu=true, compare_finite_diff=false)
test_gradients(x -> sum(Flux.normalise(x)), x, test_gpu=true, compare_finite_diff=false)
end

@testset "Zeros mapped for $cl" for cl in (Conv, ConvTranspose, CrossCor, DepthwiseConv)
l = cl((2,2), 1=>3, bias = false) |> gpu
ip = zeros(Float32, 28,28,1,1) |> gpu
if typeof(l) <: BROKEN_LAYERS
@test_broken sum(l(ip)) 0.f0
@test_broken gradient(() -> sum(l(ip)), Flux.params(l)) isa Flux.Zygote.Grads
else
@test sum(l(ip)) 0.f0
gs = gradient(() -> sum(l(ip)), Flux.params(l))
@test l.bias gs.params
end
@test sum(l(ip)) 0.f0
gs = gradient(() -> sum(l(ip)), Flux.params(l))
@test l.bias gs.params
end

@testset "Dense without bias" begin
Expand Down Expand Up @@ -366,14 +312,6 @@ end
@test Array(y_gpu) y_cpu atol=1e-4
@test Array(α_gpu) α_cpu atol=1e-4

gm_cpu, gx_cpu = gradient(mha_cpu, x_cpu) do mha, x
y, α = mha(x)
return sum(y.^2) + sum.^2)
end
gm_gpu, gx_gpu = gradient(mha_gpu, x_gpu) do mha, x
y, α = mha(x)
return sum(y.^2) + sum.^2)
end
check_grad(gm_gpu, gm_cpu)
check_grad(gx_gpu, gx_cpu)
test_gradients(mha_cpu, x_cpu, loss = o -> sum(o[1].^2) + sum(o[2].^2),
test_gpu=true, compare_finite_diff=false)
end
7 changes: 4 additions & 3 deletions test/ext_cuda/losses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ y = [1 0 0 0 1
@test focal_loss(x, y) focal_loss(gpu(x), gpu(y))

@testset "GPU: $loss" for loss in ALL_LOSSES
x = rand(Float32, 3,4)
y = rand(Float32, 3,4)
# let's stay far from the boundaries to avoid problems with finite differences gradients
x = 0.1f0 .+ 0.8f0 .* rand(Float32, 3, 4)
y = 0.1f0 .+ 0.8f0 .* rand(Float32, 3, 4)
@test loss(x, y) loss(gpu(x), gpu(y))

gpu_autodiff_test(loss, x, y)
test_gradients(loss, x, y, test_gpu=true, test_grad_f=false, compare_finite_diff=false)

# Float16 tests
@test loss(f16(x), f16(y)) loss(gpu(f16(x)), gpu(f16(y)))
Expand Down
4 changes: 0 additions & 4 deletions test/ext_cuda/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,9 @@ using Random, LinearAlgebra, Statistics
@assert CUDA.functional()
CUDA.allowscalar(false)

# include("../test_utils.jl")
include("test_utils.jl")

@testset "get_devices" begin
include("get_devices.jl")
end

@testset "cuda" begin
include("cuda.jl")
end
Expand Down
Loading

0 comments on commit 35b893a

Please sign in to comment.