Skip to content

Commit

Permalink
Add implementation of Lion optimiser
Browse files Browse the repository at this point in the history
  • Loading branch information
mashu committed Feb 16, 2023
1 parent e2254b4 commit 358f601
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions src/rules.jl
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,35 @@ function apply!(o::Adam, state, x, dx)
return (mt, vt, βt .* β), dx′
end

"""
Lion(η = 0.001, β::Tuple = (0.9, 0.999))
[Lion](https://arxiv.org/abs/2302.06675) optimiser.
# Parameters
- Learning rate (`η`): Amount by which gradients are discounted before updating
the weights.
- Decay of momentums (`β::Tuple`): Exponential decay for the first (β1) and the
second (β2) momentum estimate.
"""
struct Lion <: AbstractRule
eta::Float64
beta::Tuple{Float64,Float64}
end
Lion= 1f-3, β = (9f-1, 9.99f-1)) = Lion{typeof(η)}(η, β)

sign(x) = x == 0 ? 0 : (x > 0 ? 1 : -1)
init(o::Lion, x::AbstractArray) = (zero(x), zero(x), o.beta)

function apply!(o::Lion, state, x, dx)
η, β = o.eta, o.beta
mt, vt, βt = state
@.. ct = β[1] * mt + (1 - β[1]) * dx
dx′ = @lazy sign(ct)* η
@.. mt = β[2] * mt + (1 - β[2]) * dx
return (mt, vt, βt .* β), dx′
end

"""
RAdam(η = 1f-3, β = (9f-1, 9.99f-1), ϵ = eps(typeof(η)))
Expand Down

0 comments on commit 358f601

Please sign in to comment.