From e490c1e82a68f2f5237a5dade3b3776ce16b76bd Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 29 Aug 2024 16:07:46 -0400 Subject: [PATCH] test: more detailed layernorm testing --- test/normalization/layernorm_tests.jl | 74 +++++++++++++++++---------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/test/normalization/layernorm_tests.jl b/test/normalization/layernorm_tests.jl index 344cc67f..0713de5d 100644 --- a/test/normalization/layernorm_tests.jl +++ b/test/normalization/layernorm_tests.jl @@ -14,41 +14,48 @@ function setup_layernorm(gen_f, aType, T, x_size, affine_shape) end function run_layernorm_testing(gen_f, aType, T, x_size, affine_shape, act, ongpu, mode) - dims = Colon() - epsilon = LuxLib.Utils.default_epsilon(T) - _f = (args...) -> layernorm(args..., act, dims, epsilon) + @testset for dims in (Colon(), nothing) + if dims === nothing + affine_shape === nothing && continue + length(x_size) ≤ length(affine_shape) && continue + end - x, scale, bias = setup_layernorm(gen_f, aType, T, x_size, affine_shape) + epsilon = LuxLib.Utils.default_epsilon(T) + _f = (args...) -> layernorm(args..., act, dims, epsilon) - @test @inferred(layernorm(x, scale, bias, act, dims, epsilon)) isa Any - @jet layernorm(x, scale, bias, act, dims, epsilon) + x, scale, bias = setup_layernorm(gen_f, aType, T, x_size, affine_shape) - y = _f(x, scale, bias) + @test @inferred(layernorm(x, scale, bias, act, dims, epsilon)) isa Any + @jet layernorm(x, scale, bias, act, dims, epsilon) - @test y isa aType{T, length(x_size)} - @test size(y) == x_size + y = _f(x, scale, bias) - if affine_shape === nothing && act === identity - @test check_approx(mean(y; dims), 0; atol=1e-3, rtol=1e-3) - @test check_approx(std(y; dims), 1; atol=1e-1, rtol=1e-1) - end + @test y isa aType{T, length(x_size)} + @test size(y) == x_size - fp16 = T == Float16 - atol = fp16 ? 1.0f-2 : 1.0f-3 - rtol = fp16 ? 1.0f-2 : 1.0f-3 + if affine_shape === nothing && act === identity + @test check_approx(mean(y; dims), 0; atol=1e-3, rtol=1e-3) + @test check_approx(std(y; dims), 1; atol=1e-1, rtol=1e-1) + end - soft_fail = fp16 ? fp16 : [AutoFiniteDiff()] - if affine_shape !== nothing - __f = (args...) -> sum(_f(args...)) - test_gradients(__f, x, scale, bias; atol, rtol, soft_fail) - else - __f = x -> sum(_f(x, scale, bias)) - test_gradients(__f, x; atol, rtol, soft_fail) - end + fp16 = T == Float16 + atol = fp16 ? 1.0f-2 : 1.0f-3 + rtol = fp16 ? 1.0f-2 : 1.0f-3 + + soft_fail = fp16 ? fp16 : [AutoFiniteDiff()] + if affine_shape !== nothing + __f = (args...) -> sum(_f(args...)) + test_gradients(__f, x, scale, bias; atol, rtol, soft_fail) + else + __f = x -> sum(_f(x, scale, bias)) + test_gradients(__f, x; atol, rtol, soft_fail) + end - if anonact !== act - lfn = (x, sc, b, act, dim, ϵ) -> sum(layernorm(x, sc, b, act, dim, ϵ)) - @test @inferred(Zygote.gradient(lfn, x, scale, bias, act, dims, epsilon)) isa Any + if anonact !== act + lfn = (x, sc, b, act, dim, ϵ) -> sum(layernorm(x, sc, b, act, dim, ϵ)) + @test @inferred(Zygote.gradient(lfn, x, scale, bias, act, dims, epsilon)) isa + Any + end end end @@ -115,3 +122,16 @@ end end end end + +@testitem "Layer Norm: Error Checks" tags=[:layer_norm] setup=[SharedTestSetup] begin + @testset "$mode" for (mode, aType, ongpu) in MODES + x = rand(2, 3) |> aType + + @test_throws ArgumentError layernorm(x, nothing, nothing, identity, nothing, 1e-5) + + sc = rand(2, 1) |> aType + b = rand(2, 1) |> aType + + @test_throws AssertionError layernorm(x, sc, b, identity, nothing, 1e-5) + end +end