Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove greek-letter keyword arguments #2139

Merged
merged 5 commits into from
Apr 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/Flux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ include("loading.jl")
include("outputsize.jl")
export @autosize

include("deprecations.jl")

include("losses/Losses.jl")
using .Losses

include("deprecations.jl")

include("cuda/cuda.jl")

end # module
11 changes: 11 additions & 0 deletions src/deprecations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,17 @@ function trainmode!(m, active::Bool)
testmode!(m, !active)
end

# Greek-letter keywords deprecated in Flux 0.13
# Arguments (old => new, :function, "β" => "beta")
function _greek_ascii_depwarn(βbeta::Pair, func = :loss, names = "" => "")
Base.depwarn("""function $func no longer accepts greek-letter keyword $(names.first)
please use ascii $(names.second) instead""", func)
βbeta.first
end
_greek_ascii_depwarn(βbeta::Pair{Nothing}, _...) = βbeta.second

ChainRulesCore.@non_differentiable _greek_ascii_depwarn(::Any...)


# v0.14 deprecations

Expand Down
41 changes: 24 additions & 17 deletions src/layers/normalise.jl
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,12 @@ testmode!(m::AlphaDropout, mode=true) =
(m.active = isnothing(_tidy_active(mode)) ? nothing : !mode; m)

"""
LayerNorm(size..., λ=identity; affine=true, ϵ=1fe-5)
LayerNorm(size..., λ=identity; affine=true, eps=1f-5)

A [normalisation layer](https://arxiv.org/abs/1607.06450) designed to be
used with recurrent hidden states.
The argument `size` should be an integer or a tuple of integers.

In the forward pass, the layer normalises the mean and standard
deviation of the input, then applies the elementwise activation `λ`.
The input is normalised along the first `length(size)` dimensions
Expand Down Expand Up @@ -190,9 +191,10 @@ struct LayerNorm{F,D,T,N}
affine::Bool
end

function LayerNorm(size::Tuple{Vararg{Int}}, λ=identity; affine::Bool=true, ϵ::Real=1f-5)
function LayerNorm(size::Tuple{Vararg{Int}}, λ=identity; affine::Bool=true, eps::Real=1f-5, ϵ=nothing)
ε = _greek_ascii_depwarn(ϵ => eps, :LayerNorm, "ϵ" => "eps")
diag = affine ? Scale(size..., λ) : λ!=identity ? Base.Fix1(broadcast, λ) : identity
return LayerNorm(λ, diag, ϵ, size, affine)
return LayerNorm(λ, diag, ε, size, affine)
end
LayerNorm(size::Integer...; kw...) = LayerNorm(Int.(size); kw...)
LayerNorm(size_act...; kw...) = LayerNorm(Int.(size_act[1:end-1]), size_act[end]; kw...)
Expand Down Expand Up @@ -269,7 +271,7 @@ ChainRulesCore.@non_differentiable _track_stats!(::Any...)
BatchNorm(channels::Integer, λ=identity;
initβ=zeros32, initγ=ones32,
affine=true, track_stats=true, active=nothing,
ϵ=1f-5, momentum= 0.1f0)
eps=1f-5, momentum= 0.1f0)

[Batch Normalization](https://arxiv.org/abs/1502.03167) layer.
`channels` should be the size of the channel dimension in your data (see below).
Expand Down Expand Up @@ -321,16 +323,18 @@ end

function BatchNorm(chs::Int, λ=identity;
initβ=zeros32, initγ=ones32,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR does not remove initβ, initγ. IMO they should be replaced with a method where you pass in a Scale layer. But perhaps better part of an overhaul of norm layers.

Getting rid of greek-letter field names may also be a good idea. The norm layers are the worst offenders.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can tackle this as part of a bigger rework of norm layer internals to save on some churn.

affine=true, track_stats=true, active::Union{Bool,Nothing}=nothing,
ϵ=1f-5, momentum=0.1f0)
affine::Bool=true, track_stats::Bool=true, active::Union{Bool,Nothing}=nothing,
eps::Real=1f-5, momentum::Real=0.1f0, ϵ=nothing)

ε = _greek_ascii_depwarn(ϵ => eps, :BatchNorm, "ϵ" => "eps")

β = affine ? initβ(chs) : nothing
γ = affine ? initγ(chs) : nothing
μ = track_stats ? zeros32(chs) : nothing
σ² = track_stats ? ones32(chs) : nothing

return BatchNorm(λ, β, γ,
μ, σ², ϵ, momentum,
μ, σ², ε, momentum,
affine, track_stats,
active, chs)
end
Expand Down Expand Up @@ -361,7 +365,7 @@ end
InstanceNorm(channels::Integer, λ=identity;
initβ=zeros32, initγ=ones32,
affine=false, track_stats=false,
ϵ=1f-5, momentum=0.1f0)
eps=1f-5, momentum=0.1f0)

[Instance Normalization](https://arxiv.org/abs/1607.08022) layer.
`channels` should be the size of the channel dimension in your data (see below).
Expand Down Expand Up @@ -411,16 +415,18 @@ end

function InstanceNorm(chs::Int, λ=identity;
initβ=zeros32, initγ=ones32,
affine=false, track_stats=false, active::Union{Bool,Nothing}=nothing,
ϵ=1f-5, momentum=0.1f0)
affine::Bool=false, track_stats::Bool=false, active::Union{Bool,Nothing}=nothing,
eps::Real=1f-5, momentum::Real=0.1f0, ϵ=nothing)

ε = _greek_ascii_depwarn(ϵ => eps, :InstanceNorm, "ϵ" => "eps")

β = affine ? initβ(chs) : nothing
γ = affine ? initγ(chs) : nothing
μ = track_stats ? zeros32(chs) : nothing
σ² = track_stats ? ones32(chs) : nothing

return InstanceNorm(λ, β, γ,
μ, σ², ϵ, momentum,
μ, σ², ε, momentum,
affine, track_stats,
active, chs)
end
Expand Down Expand Up @@ -450,7 +456,7 @@ end
GroupNorm(channels::Integer, G::Integer, λ=identity;
initβ=zeros32, initγ=ones32,
affine=true, track_stats=false,
ϵ=1f-5, momentum=0.1f0)
eps=1f-5, momentum=0.1f0)

[Group Normalization](https://arxiv.org/abs/1803.08494) layer.

Expand Down Expand Up @@ -508,12 +514,13 @@ trainable(gn::GroupNorm) = hasaffine(gn) ? (β = gn.β, γ = gn.γ) : (;)

function GroupNorm(chs::Int, G::Int, λ=identity;
initβ=zeros32, initγ=ones32,
affine=true, track_stats=false, active::Union{Bool,Nothing}=nothing,
ϵ=1f-5, momentum=0.1f0)
affine::Bool=true, track_stats::Bool=false, active::Union{Bool,Nothing}=nothing,
eps::Real=1f-5, momentum::Real=0.1f0, ϵ=nothing)

if track_stats
if track_stats
Base.depwarn("`track_stats=true` will be removed from GroupNorm in Flux 0.14. The default value is `track_stats=false`, which will work as before.", :GroupNorm)
end
end
ε = _greek_ascii_depwarn(ϵ => eps, :GroupNorm, "ϵ" => "eps")

chs % G == 0 || error("The number of groups ($(G)) must divide the number of channels ($chs)")

Expand All @@ -525,7 +532,7 @@ end
return GroupNorm(G, λ,
β, γ,
μ, σ²,
ϵ, momentum,
ε, momentum,
affine, track_stats,
active, chs)
end
Expand Down
2 changes: 1 addition & 1 deletion src/losses/Losses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using Statistics
using Zygote
using Zygote: @adjoint
using ChainRulesCore
using ..Flux: ofeltype, epseltype
using ..Flux: ofeltype, epseltype, _greek_ascii_depwarn
using CUDA
using NNlib: logsoftmax, logσ, ctc_loss, ctc_alpha, ∇ctc_loss
import Base.Broadcast: broadcasted
Expand Down
Loading