FluxML · mcabbott · Dec 20, 2022 · Dec 21, 2022 · Dec 23, 2022 · Dec 23, 2022
diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -172,6 +172,9 @@ function (a::Dense)(x::AbstractVecOrMat)
   return σ.(a.weight * x .+ a.bias)
 end
 
+(a::Dense{typeof(identity), <:AbstractMatrix, Bool})(x::AbstractVecOrMat) =
+  a.weight * x  # fast path, no broadcast
+
 (a::Dense)(x::AbstractArray) = 
   reshape(a(reshape(x, size(x,1), :)), :, size(x)[2:end]...)
 
@@ -246,6 +249,9 @@ function (a::Scale)(x::AbstractArray)
   σ.(a.scale .* x .+ a.bias)
 end
 
+(a::Scale{typeof(identity), <:AbstractArray, Bool})(x::AbstractArray) =
+  a.scale .* x
+
 function Base.show(io::IO, l::Scale)
   print(io, "Scale(", join(size(l.scale), ", "))
   l.σ == identity || print(io, ", ", l.σ)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -199,6 +199,10 @@ function (c::Conv)(x::AbstractArray)
   cdims = conv_dims(c, x)
   σ.(conv(x, c.weight, cdims) .+ conv_reshape_bias(c))
 end
+function (c::Conv{<:Any,<:Any,typeof(identity),<:AbstractArray,Bool})(x::AbstractArray)
+  cdims = conv_dims(c, x)
+  conv(x, c.weight, cdims)  # fast path, no broadcast
+end
 
 _channels_in(l::Conv) = size(l.weight, ndims(l.weight)-1) * l.groups
 _channels_out(l::Conv) = size(l.weight, ndims(l.weight))
@@ -332,6 +336,10 @@ function (c::ConvTranspose)(x::AbstractArray)
   cdims = conv_transpose_dims(c, x)
   σ.(∇conv_data(x, c.weight, cdims) .+ conv_reshape_bias(c))
 end
+function (c::ConvTranspose{<:Any,<:Any,typeof(identity),<:AbstractArray,Bool})(x::AbstractArray)
+  cdims = conv_transpose_dims(c, x)
+  ∇conv_data(x, c.weight, cdims)  # fast path, no broadcast
+end
 
 function Base.show(io::IO, l::ConvTranspose)
   print(io, "ConvTranspose(", size(l.weight)[1:ndims(l.weight)-2])
@@ -470,6 +478,10 @@ function (c::CrossCor)(x::AbstractArray)
   cdims = crosscor_dims(c, x)
   σ.(crosscor(x, c.weight, cdims) .+ conv_reshape_bias(c))
 end
+function (c::CrossCor{<:Any,<:Any,typeof(identity),<:AbstractArray,Bool})(x::AbstractArray)
+  cdims = crosscor_dims(c, x)
+  crosscor(x, c.weight, cdims)  # fast path, no broadcast
+end
 
 function Base.show(io::IO, l::CrossCor)
   print(io, "CrossCor(", size(l.weight)[1:ndims(l.weight)-2])

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
@@ -210,7 +210,7 @@ true
 ```
 """
 struct LayerNorm{F,D,T,N}
-  λ::F
+  λ::F  # this field is not used
   diag::D
   ϵ::T
   size::NTuple{N,Int}
@@ -254,16 +254,16 @@ function _norm_layer_forward(
     end
   end
 
-  o = _norm_layer_forward(x, μ, σ², l.ϵ)
-  hasaffine(l) || return l.λ.(o)
-
-  γ = reshape(l.γ, affine_shape)
-  β = reshape(l.β, affine_shape)
-  return l.λ.(γ .* o .+ β)
+  s = (inv∘sqrt).(σ² .+ l.ϵ)  # faster to un-fuse this, smaller... ideally mean_var(x, ε)?
+  if hasaffine(l)
+    γ = reshape(l.γ, affine_shape)  # ideally reshape on construction, store Scale?
+    β = reshape(l.β, affine_shape)
+    return l.λ.(γ .* s .* (x .- μ) .+ β)
+  else
+    return l.λ.(s .* (x .- μ))
+  end
 end
 
-@inline _norm_layer_forward(x, μ, σ², ϵ) = (x .- μ) ./ sqrt.(σ² .+ ϵ)
-
 function _track_stats!(
   bn, x::AbstractArray{T, N}, μ, σ², reduce_dims,
 ) where {T, N}