Add implementation of Lion optimiser

FluxML · Feb 16, 2023 · 358f601 · 358f601
1 parent e2254b4
commit 358f601
Showing 1 changed file with 29 additions and 0 deletions.
diff --git a/src/rules.jl b/src/rules.jl
@@ -217,6 +217,35 @@ function apply!(o::Adam, state, x, dx)
   return (mt, vt, βt .* β), dx′
 end
 
+"""
+    Lion(η = 0.001, β::Tuple = (0.9, 0.999))
+
+[Lion](https://arxiv.org/abs/2302.06675) optimiser.
+
+# Parameters
+- Learning rate (`η`): Amount by which gradients are discounted before updating
+                       the weights.
+- Decay of momentums (`β::Tuple`): Exponential decay for the first (β1) and the
+                                   second (β2) momentum estimate.
+"""
+struct Lion <: AbstractRule
+  eta::Float64
+  beta::Tuple{Float64,Float64}
+end
+Lion(η = 1f-3, β = (9f-1, 9.99f-1)) = Lion{typeof(η)}(η, β)
+
+sign(x) = x == 0 ? 0 : (x > 0 ? 1 : -1)
+init(o::Lion, x::AbstractArray) = (zero(x), zero(x), o.beta)
+
+function apply!(o::Lion, state, x, dx)
+  η, β = o.eta, o.beta
+  mt, vt, βt = state
+  @.. ct = β[1] * mt + (1 - β[1]) * dx
+  dx′ = @lazy sign(ct)* η
+  @.. mt = β[2] * mt + (1 - β[2]) * dx
+  return (mt, vt, βt .* β), dx′
+end
+
 """
     RAdam(η = 1f-3, β = (9f-1, 9.99f-1), ϵ = eps(typeof(η)))