From e490c1e82a68f2f5237a5dade3b3776ce16b76bd Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Thu, 29 Aug 2024 16:07:46 -0400
Subject: [PATCH] test: more detailed layernorm testing

---
 test/normalization/layernorm_tests.jl | 74 +++++++++++++++++----------
 1 file changed, 47 insertions(+), 27 deletions(-)

diff --git a/test/normalization/layernorm_tests.jl b/test/normalization/layernorm_tests.jl
index 344cc67f..0713de5d 100644
--- a/test/normalization/layernorm_tests.jl
+++ b/test/normalization/layernorm_tests.jl
@@ -14,41 +14,48 @@ function setup_layernorm(gen_f, aType, T, x_size, affine_shape)
 end
 
 function run_layernorm_testing(gen_f, aType, T, x_size, affine_shape, act, ongpu, mode)
-    dims = Colon()
-    epsilon = LuxLib.Utils.default_epsilon(T)
-    _f = (args...) -> layernorm(args..., act, dims, epsilon)
+    @testset for dims in (Colon(), nothing)
+        if dims === nothing
+            affine_shape === nothing && continue
+            length(x_size) ≤ length(affine_shape) && continue
+        end
 
-    x, scale, bias = setup_layernorm(gen_f, aType, T, x_size, affine_shape)
+        epsilon = LuxLib.Utils.default_epsilon(T)
+        _f = (args...) -> layernorm(args..., act, dims, epsilon)
 
-    @test @inferred(layernorm(x, scale, bias, act, dims, epsilon)) isa Any
-    @jet layernorm(x, scale, bias, act, dims, epsilon)
+        x, scale, bias = setup_layernorm(gen_f, aType, T, x_size, affine_shape)
 
-    y = _f(x, scale, bias)
+        @test @inferred(layernorm(x, scale, bias, act, dims, epsilon)) isa Any
+        @jet layernorm(x, scale, bias, act, dims, epsilon)
 
-    @test y isa aType{T, length(x_size)}
-    @test size(y) == x_size
+        y = _f(x, scale, bias)
 
-    if affine_shape === nothing && act === identity
-        @test check_approx(mean(y; dims), 0; atol=1e-3, rtol=1e-3)
-        @test check_approx(std(y; dims), 1; atol=1e-1, rtol=1e-1)
-    end
+        @test y isa aType{T, length(x_size)}
+        @test size(y) == x_size
 
-    fp16 = T == Float16
-    atol = fp16 ? 1.0f-2 : 1.0f-3
-    rtol = fp16 ? 1.0f-2 : 1.0f-3
+        if affine_shape === nothing && act === identity
+            @test check_approx(mean(y; dims), 0; atol=1e-3, rtol=1e-3)
+            @test check_approx(std(y; dims), 1; atol=1e-1, rtol=1e-1)
+        end
 
-    soft_fail = fp16 ? fp16 : [AutoFiniteDiff()]
-    if affine_shape !== nothing
-        __f = (args...) -> sum(_f(args...))
-        test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
-    else
-        __f = x -> sum(_f(x, scale, bias))
-        test_gradients(__f, x; atol, rtol, soft_fail)
-    end
+        fp16 = T == Float16
+        atol = fp16 ? 1.0f-2 : 1.0f-3
+        rtol = fp16 ? 1.0f-2 : 1.0f-3
+
+        soft_fail = fp16 ? fp16 : [AutoFiniteDiff()]
+        if affine_shape !== nothing
+            __f = (args...) -> sum(_f(args...))
+            test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
+        else
+            __f = x -> sum(_f(x, scale, bias))
+            test_gradients(__f, x; atol, rtol, soft_fail)
+        end
 
-    if anonact !== act
-        lfn = (x, sc, b, act, dim, ϵ) -> sum(layernorm(x, sc, b, act, dim, ϵ))
-        @test @inferred(Zygote.gradient(lfn, x, scale, bias, act, dims, epsilon)) isa Any
+        if anonact !== act
+            lfn = (x, sc, b, act, dim, ϵ) -> sum(layernorm(x, sc, b, act, dim, ϵ))
+            @test @inferred(Zygote.gradient(lfn, x, scale, bias, act, dims, epsilon)) isa
+                  Any
+        end
     end
 end
 
@@ -115,3 +122,16 @@ end
         end
     end
 end
+
+@testitem "Layer Norm: Error Checks" tags=[:layer_norm] setup=[SharedTestSetup] begin
+    @testset "$mode" for (mode, aType, ongpu) in MODES
+        x = rand(2, 3) |> aType
+
+        @test_throws ArgumentError layernorm(x, nothing, nothing, identity, nothing, 1e-5)
+
+        sc = rand(2, 1) |> aType
+        b = rand(2, 1) |> aType
+
+        @test_throws AssertionError layernorm(x, sc, b, identity, nothing, 1e-5)
+    end
+end