diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index 34300ca840..ad5a99b737 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -68,7 +68,6 @@ These layers don't affect the structure of the network but may improve training ```@docs Flux.normalise BatchNorm -Flux.dropout Dropout AlphaDropout LayerNorm diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 9b31813f89..f543db0c09 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -67,32 +67,24 @@ Custom RNGs are only supported on the CPU. Does nothing to the input once [`Flux.testmode!`](@ref) is `true`. # Examples -```julia -julia> m = Chain(Dense(2 => 2), Dropout(1)) -Chain( - Dense(2 => 2), # 6 parameters - Dropout(1), -) +```jldoctest +julia> m = Chain(Dense(1 => 1), Dropout(1)); + +julia> Flux.trainmode!(m); -julia> Flux.trainmode!(m); # activating the layer without actually training it +julia> y = m([1]); -julia> m([1, 2]) # drops neurons with a probability of 1 -2-element Vector{Float32}: - -0.0 - -0.0 +julia> count(i->(i == 0), y) == m[2].p # number of zeros == 1 +true -julia> m = Chain(Dense(2 => 2), Dropout(0.5)) -Chain( - Dense(2 => 2), # 6 parameters - Dropout(0.5), -) +julia> m = Chain(Dense(1 => 1), Dropout(0.5)); julia> Flux.trainmode!(m); -julia> m([1, 2]) # drops neurons with a probability of 0.5 -2-element Vector{Float32}: - -4.537827 - -0.0 +julia> y = m([1]); + +julia> m[2].p - 0.5 <= count(i->(i == 0), y) <= m[2].p + 0.5 # number of zeros can be 0 or 1 +true ``` """ mutable struct Dropout{F,D,R<:AbstractRNG} @@ -136,7 +128,9 @@ remain the same as before. Does nothing to the input once [`testmode!`](@ref) is true. # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +```jldoctest +julia> using Statistics + julia> x = randn(20,1); julia> m = Chain(Dense(20 => 10, selu), AlphaDropout(0.5)); @@ -145,17 +139,8 @@ julia> Flux.trainmode!(m); julia> y = m(x); -julia> Flux.std(x) -1.097500619939126 - -julia> Flux.std(y) # maintains the standard deviation of the input -1.1504012188827453 - -julia> Flux.mean(x) # maintains the mean of the input --0.3217018554158738 - -julia> Flux.mean(y) --0.2526866470385106 +julia> isapprox(std(x), std(y), rtol=0.6) +true ``` """ mutable struct AlphaDropout{F,R<:AbstractRNG} @@ -208,24 +193,20 @@ using the [`Scale`](@ref) layer. See also [`BatchNorm`](@ref), [`InstanceNorm`](@ref), [`GroupNorm`](@ref), and [`normalise`](@ref). # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +```jldoctest +julia> using Statistics + julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images julia> m = LayerNorm(3); julia> y = m(xs); -julia> Flux.std(xs[:, :, :, 1]) -0.28713812337208383 - -julia> Flux.std(y[:, :, :, 1]) # normalises each image (or all channels in an image) -1.018993632693022 +julia> isapprox(std(y[:, :, :, 1]), 1, atol=0.1) && std(xs[:, :, :, 1]) != std(y[:, :, :, 1]) +true -julia> Flux.std(xs[:, :, :, 2]) -0.22540260537916373 - -julia> Flux.std(y[:, :, :, 2]) # normalises each image (or all channels in an image) -1.018965249873791 +julia> isapprox(std(y[:, :, :, 2]), 1, atol=0.1) && std(xs[:, :, :, 2]) != std(y[:, :, :, 2]) +true ``` """ struct LayerNorm{F,D,T,N} @@ -329,17 +310,16 @@ Use [`testmode!`](@ref) during inference. # Examples ```julia -julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images +julia> using Statistics -julia> Flux.std(xs) -2.6822461565718467 +julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images julia> m = BatchNorm(3); -julia> Flux.trainmode!(m); # activating the layer without actually training it +julia> Flux.trainmode!(m); -julia> Flux.std(m(xs)) # normalises the complete batch -1.0093209961092855 +julia> isapprox(std(m(xs)), 1, atol=0.1) && std(xs) != std(m(xs)) +true ``` """ mutable struct BatchNorm{F,V,N,W} @@ -419,24 +399,20 @@ that will be used to renormalize the input in test phase. in previous Flux versions (< v0.12). # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +```jldoctest +julia> using Statistics + julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images julia> m = InstanceNorm(3); julia> y = m(xs); -julia> Flux.std(xs[:, :, 1, 1]) # original standard deviation of the first channel of image 1 -0.2989802650787384 +julia> isapprox(std(y[:, :, 1, 1]), 1, atol=0.1) && std(xs[:, :, 1, 1]) != std(y[:, :, 1, 1]) +true -julia> Flux.std(y[:, :, 1, 1]) # each channel of the batch is normalised -1.0606027381538408 - -julia> Flux.std(xs[:, :, 2, 2]) # original standard deviation of the second channel of image 2 -0.28662705400461197 - -julia> Flux.std(y[:, :, 2, 2]) # each channel of the batch is normalised -1.06058729821187 +julia> isapprox(std(y[:, :, 2, 2]), 1, atol=0.1) && std(xs[:, :, 2, 2]) != std(y[:, :, 2, 2]) +true ``` """ mutable struct InstanceNorm{F,V,N,W} @@ -517,24 +493,20 @@ If `track_stats=true`, accumulates mean and var statistics in training phase that will be used to renormalize the input in test phase. # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +```jldoctest +julia> using Statistics + julia> xs = rand(3, 3, 4, 2); # a batch of 2 3X3X4 images julia> m = GroupNorm(4, 2); julia> y = m(xs); -julia> Flux.std(xs[:, :, 1:2, 1]) # original standard deviation of the first 2 channels of image 1 -0.307588490584917 - -julia> Flux.std(y[:, :, 1:2, 1]) # normalises channels in groups of 2 (as specified) -1.0289339365431291 - -julia> Flux.std(xs[:, :, 3:4, 2]) # original standard deviation of the last 2 channels of image 2 -0.3111566100804274 +julia> isapprox(std(y[:, :, 1:2, 1]), 1, atol=0.1) && std(xs[:, :, 1:2, 1]) != std(y[:, :, 1:2, 1]) +true -julia> Flux.std(y[:, :, 3:4, 2]) # normalises channels in groups of 2 (as specified) -1.0289352493058574 +julia> isapprox(std(y[:, :, 3:4, 2]), 1, atol=0.1) && std(xs[:, :, 3:4, 2]) != std(y[:, :, 3:4, 2]) +true ``` """ mutable struct GroupNorm{F,V,N,W} diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 5059449f38..929f5b2d71 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -79,22 +79,6 @@ julia> rnn(3) julia> rnn.state 5 - -julia> rnn(1:10) # apply to a sequence -10-element Vector{Int64}: - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - -julia> rnn.state -60 ``` Folding over a 3d Array of dimensions `(features, batch, time)` is also supported: