From 8b189d08dfcad5b1f91f2e6d3ecad53c71d2324d Mon Sep 17 00:00:00 2001 From: Saransh Date: Sat, 11 Jun 2022 00:24:32 +0530 Subject: [PATCH 01/15] Add doctests in `upsample.jl` --- src/layers/upsample.jl | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl index 9deb413bb7..3649f0f01e 100644 --- a/src/layers/upsample.jl +++ b/src/layers/upsample.jl @@ -75,9 +75,41 @@ end """ PixelShuffle(r::Int) -Pixel shuffling layer with upscale factor `r`. +Pixel shuffling layer with upscale factor `r`. Usually used for generating higher +resolution images while upscaling them. See [`NNlib.pixel_shuffle`](@ref). + +# Examples +```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +julia> p = PixelShuffle(2); + +julia> xs = rand(2, 2, 4, 1) # an image with 4 channels having 2X2 pixels in each channel +2×2×4×1 Array{Float64, 4}: +[:, :, 1, 1] = + 0.826452 0.0519244 + 0.0686387 0.438346 + +[:, :, 2, 1] = + 0.343179 0.445101 + 0.543927 0.740905 + +[:, :, 3, 1] = + 0.105997 0.422996 + 0.32957 0.167205 + +[:, :, 4, 1] = + 0.825737 0.98609 + 0.757365 0.294784 + +julia> p(xs) # an image with only 1 channel with 4X4 pixels in the single channel +4×4×1×1 Array{Float64, 4}: +[:, :, 1, 1] = + 0.826452 0.105997 0.0519244 0.422996 + 0.343179 0.825737 0.445101 0.98609 + 0.0686387 0.32957 0.438346 0.167205 + 0.543927 0.757365 0.740905 0.294784 +``` """ struct PixelShuffle r::Int From 2a0ed9bd902475ccae3250954dcdb814fdddeea2 Mon Sep 17 00:00:00 2001 From: Saransh Date: Sat, 11 Jun 2022 00:26:37 +0530 Subject: [PATCH 02/15] Add doctests in `recurrent.jl` --- docs/src/models/layers.md | 1 + src/layers/recurrent.jl | 135 ++++++++++++++++++++++++++++++++------ 2 files changed, 117 insertions(+), 19 deletions(-) diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index 81fbb60a2d..34300ca840 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -42,6 +42,7 @@ Much like the core layers above, but can be used to process sequence data (as we RNN LSTM GRU +GRUv3 Flux.Recur Flux.reset! ``` diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 7c98f2394f..3ef902d3e5 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -63,28 +63,97 @@ in the background. `cell` should be a model of the form: For example, here's a recurrent network that keeps a running total of its inputs: -```julia -accum(h, x) = (h + x, x) -rnn = Flux.Recur(accum, 0) -rnn(2) # 2 -rnn(3) # 3 -rnn.state # 5 -rnn.(1:10) # apply to a sequence -rnn.state # 60 +# Examples +```jldoctest +julia> accum(h, x) = (h + x, x) +accum (generic function with 1 method) + +julia> rnn = Flux.Recur(accum, 0) +Recur(accum) + +julia> rnn(2) +2 + +julia> rnn(3) +3 + +julia> rnn.state +5 + +julia> rnn.(1:10) # apply to a sequence +10-element Vector{Int64}: + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + +julia> rnn.state +60 ``` Folding over a 3d Array of dimensions `(features, batch, time)` is also supported: -```julia -accum(h, x) = (h .+ x, x) -rnn = Flux.Recur(accum, zeros(Int, 1, 1)) -rnn([2]) # 2 -rnn([3]) # 3 -rnn.state # 5 -rnn(reshape(1:10, 1, 1, :)) # apply to a sequence of (features, batch, time) -rnn.state # 60 -``` +```jldoctest +julia> accum(h, x) = (h .+ x, x) +accum (generic function with 1 method) + +julia> rnn = Flux.Recur(accum, zeros(Int, 1, 1)) +Recur(accum) + +julia> rnn([2]) +1-element Vector{Int64}: + 2 + +julia> rnn([3]) +1-element Vector{Int64}: + 3 + +julia> rnn.state +1×1 Matrix{Int64}: + 5 + +julia> rnn(reshape(1:10, 1, 1, :)) # apply to a sequence of (features, batch, time) +1×1×10 Array{Int64, 3}: +[:, :, 1] = + 1 + +[:, :, 2] = + 2 + +[:, :, 3] = + 3 + +[:, :, 4] = + 4 +[:, :, 5] = + 5 + +[:, :, 6] = + 6 + +[:, :, 7] = + 7 + +[:, :, 8] = + 8 + +[:, :, 9] = + 9 + +[:, :, 10] = + 10 + +julia> rnn.state +1×1 Matrix{Int64}: + 60 +``` """ mutable struct Recur{T,S} cell::T @@ -107,8 +176,36 @@ Base.show(io::IO, m::Recur) = print(io, "Recur(", m.cell, ")") Reset the hidden state of a recurrent layer back to its original value. Assuming you have a `Recur` layer `rnn`, this is roughly equivalent to: -```julia -rnn.state = hidden(rnn.cell) + + rnn.state = hidden(rnn.cell) + +# Examples +```jldoctest +julia> r = RNN(3 => 5); + +julia> r.state +5×1 Matrix{Float32}: + 0.0 + 0.0 + 0.0 + 0.0 + 0.0 + +julia> r(rand(Float32, 3)); r.state +5×1 Matrix{Float32}: + -0.32719195 + -0.45280662 + -0.50386846 + -0.14782222 + 0.23584609 + +julia> Flux.reset!(r) +5×1 Matrix{Float32}: + 0.0 + 0.0 + 0.0 + 0.0 + 0.0 ``` """ reset!(m::Recur) = (m.state = m.cell.state0) From 4b9e2fba2dac131568375d110f1efc5ca949d118 Mon Sep 17 00:00:00 2001 From: Saransh Date: Sat, 11 Jun 2022 13:58:04 +0530 Subject: [PATCH 03/15] Add doctests in `normalise.jl` --- src/layers/normalise.jl | 138 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 131 insertions(+), 7 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 4c696d916d..da85bc7d61 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -55,7 +55,7 @@ ChainRulesCore.@non_differentiable dropout_mask(::Any, ::Any, ::Any) """ Dropout(p; dims=:, rng = rng_from_array()) -Dropout layer. In the forward pass, apply the [`Flux.dropout`](@ref) function on the input. +Dropout layer. In the forward pass, applies the [`Flux.dropout`](@ref) function on the input. To apply dropout along certain dimension(s), specify the `dims` keyword. e.g. `Dropout(p; dims = 3)` will randomly zero out entire channels on WHCN input @@ -65,6 +65,35 @@ Specify `rng` to use a custom RNG instead of the default. Custom RNGs are only supported on the CPU. Does nothing to the input once [`Flux.testmode!`](@ref) is `true`. + +# Examples +```jldoctest +julia> m = Chain(Dense(2 => 2), Dropout(1)) +Chain( + Dense(2 => 2), # 6 parameters + Dropout(1), +) + +julia> Flux.trainmode!(m); # activating the layer without actually training it + +julia> m([1, 2]) # drops neurons with a probability of 1 +2-element Vector{Float32}: + -0.0 + -0.0 + +julia> m = Chain(Dense(2 => 2), Dropout(0.5)) +Chain( + Dense(2 => 2), # 6 parameters + Dropout(0.5), +) + +julia> Flux.trainmode!(m); # activating the layer without actually training it + +julia> m([1, 2]) # drops neurons with a probability of 0.5 +2-element Vector{Float32}: + -4.537827 + -0.0 +``` """ mutable struct Dropout{F,D,R<:AbstractRNG} p::F @@ -105,6 +134,33 @@ The AlphaDropout layer ensures that mean and variance of activations remain the same as before. Does nothing to the input once [`testmode!`](@ref) is true. + +# Examples +```jldoctest +julia> x = randn(20,1); + +julia> m = Chain(Dense(20 => 10, selu), AlphaDropout(0.5)) +Chain( + Dense(20 => 10, selu), # 210 parameters + AlphaDropout{Float64, Random.TaskLocalRNG}(0.5, nothing, Random.TaskLocalRNG()), +) + +julia> Flux.trainmode!(m); + +julia> y = m(x); + +julia> Flux.std(x) +1.097500619939126 + +julia> Flux.std(y) # maintains the standard deviation of the input +1.1504012188827453 + +julia> Flux.mean(x) # maintains the mean of the input +-0.3217018554158738 + +julia> Flux.mean(y) +-0.2526866470385106 +``` """ mutable struct AlphaDropout{F,R<:AbstractRNG} p::F @@ -154,6 +210,27 @@ If `affine=true`, it also applies a learnable shift and rescaling using the [`Scale`](@ref) layer. See also [`BatchNorm`](@ref), [`InstanceNorm`](@ref), [`GroupNorm`](@ref), and [`normalise`](@ref). + +# Examples +```jldoctest +julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images + +julia> m = LayerNorm(3); + +julia> y = m(xs); + +julia> Flux.std(xs[:, :, :, 1]) +0.28713812337208383 + +julia> Flux.std(y[:, :, :, 1]) # normalises each image (or all channels in an image) +1.018993632693022 + +julia> Flux.std(xs[:, :, :, 2]) +0.22540260537916373 + +julia> Flux.std(y[:, :, :, 2]) # normalises each image (or all channels in an image) +1.018965249873791 +``` """ struct LayerNorm{F,D,T,N} λ::F @@ -256,12 +333,17 @@ Use [`testmode!`](@ref) during inference. # Examples ```julia -m = Chain( - Dense(28^2 => 64), - BatchNorm(64, relu), - Dense(64 => 10), - BatchNorm(10), - softmax) +julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images + +julia> Flux.std(xs) +2.6822461565718467 + +julia> m = BatchNorm(3); + +julia> Flux.trainmode!(m); # activating the layer without actually training it + +julia> Flux.std(m(xs)) # normalises the complete batch +1.0093209961092855 ``` """ mutable struct BatchNorm{F,V,N,W} @@ -339,6 +421,27 @@ that will be used to renormalize the input in test phase. **Warning**: the defaults for `affine` and `track_stats` used to be `true` in previous Flux versions (< v0.12). + +# Examples +```jldoctest +julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images + +julia> m = InstanceNorm(3); + +julia> y = m(xs); + +julia> Flux.std(xs[:, :, 1, 1]) # original standard deviation of the first channel of image 1 +0.2989802650787384 + +julia> Flux.std(y[:, :, 1, 1]) # each channel of the batch is normalised +1.0606027381538408 + +julia> Flux.std(xs[:, :, 2, 2]) # original standard deviation of the second channel of image 2 +0.28662705400461197 + +julia> Flux.std(y[:, :, 2, 2]) # each channel of the batch is normalised +1.06058729821187 +``` """ mutable struct InstanceNorm{F,V,N,W} λ::F # activation function @@ -416,6 +519,27 @@ through to learnable per-channel bias `β` and scale `γ` parameters. If `track_stats=true`, accumulates mean and var statistics in training phase that will be used to renormalize the input in test phase. + +# Examples +```jldoctest +julia> xs = rand(3, 3, 4, 2); # a batch of 2 3X3X4 images + +julia> m = GroupNorm(4, 2); + +julia> y = m(xs); + +julia> Flux.std(xs[:, :, 1:2, 1]) # original standard deviation of the first 2 channels of image 1 +0.307588490584917 + +julia> Flux.std(y[:, :, 1:2, 1]) # normalises channels in groups of 2 (as specified) +1.0289339365431291 + +julia> Flux.std(xs[:, :, 3:4, 2]) # original standard deviation of the last 2 channels of image 2 +0.3111566100804274 + +julia> Flux.std(y[:, :, 3:4, 2]) # normalises channels in groups of 2 (as specified) +1.0289352493058574 +``` """ mutable struct GroupNorm{F,V,N,W} G::Int # number of groups From 69e996ababf6e4a8951b768bdc0a2ac1b84e64b0 Mon Sep 17 00:00:00 2001 From: Saransh Date: Sat, 11 Jun 2022 14:05:15 +0530 Subject: [PATCH 04/15] Typos --- src/layers/normalise.jl | 2 +- src/layers/upsample.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index da85bc7d61..fecd5a3732 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -87,7 +87,7 @@ Chain( Dropout(0.5), ) -julia> Flux.trainmode!(m); # activating the layer without actually training it +julia> Flux.trainmode!(m); julia> m([1, 2]) # drops neurons with a probability of 0.5 2-element Vector{Float32}: diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl index 3649f0f01e..47bf84b49c 100644 --- a/src/layers/upsample.jl +++ b/src/layers/upsample.jl @@ -102,7 +102,7 @@ julia> xs = rand(2, 2, 4, 1) # an image with 4 channels having 2X2 pixels in ea 0.825737 0.98609 0.757365 0.294784 -julia> p(xs) # an image with only 1 channel with 4X4 pixels in the single channel +julia> p(xs) # upsampled image with only 1 channel 4×4×1×1 Array{Float64, 4}: [:, :, 1, 1] = 0.826452 0.105997 0.0519244 0.422996 From ef69936663b0d79f54f2f24ba5166b915a0850d6 Mon Sep 17 00:00:00 2001 From: Saransh Date: Sat, 11 Jun 2022 15:23:03 +0530 Subject: [PATCH 05/15] Remove redundant randomness, add docfilters, and make them stricter for Dropout layer --- src/layers/normalise.jl | 16 ++++++---------- src/layers/recurrent.jl | 30 +++++++++++------------------- src/layers/upsample.jl | 31 +++++-------------------------- 3 files changed, 22 insertions(+), 55 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index fecd5a3732..e6caca7122 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -67,7 +67,7 @@ Custom RNGs are only supported on the CPU. Does nothing to the input once [`Flux.testmode!`](@ref) is `true`. # Examples -```jldoctest +```jldoctest; filter = r"[+-]?(?:(?:[0-9])(?:\\.\\d+)?)|(?:1)(?:\\.0+)?" julia> m = Chain(Dense(2 => 2), Dropout(1)) Chain( Dense(2 => 2), # 6 parameters @@ -136,14 +136,10 @@ remain the same as before. Does nothing to the input once [`testmode!`](@ref) is true. # Examples -```jldoctest +```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" julia> x = randn(20,1); -julia> m = Chain(Dense(20 => 10, selu), AlphaDropout(0.5)) -Chain( - Dense(20 => 10, selu), # 210 parameters - AlphaDropout{Float64, Random.TaskLocalRNG}(0.5, nothing, Random.TaskLocalRNG()), -) +julia> m = Chain(Dense(20 => 10, selu), AlphaDropout(0.5)); julia> Flux.trainmode!(m); @@ -212,7 +208,7 @@ using the [`Scale`](@ref) layer. See also [`BatchNorm`](@ref), [`InstanceNorm`](@ref), [`GroupNorm`](@ref), and [`normalise`](@ref). # Examples -```jldoctest +```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images julia> m = LayerNorm(3); @@ -423,7 +419,7 @@ that will be used to renormalize the input in test phase. in previous Flux versions (< v0.12). # Examples -```jldoctest +```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images julia> m = InstanceNorm(3); @@ -521,7 +517,7 @@ If `track_stats=true`, accumulates mean and var statistics in training phase that will be used to renormalize the input in test phase. # Examples -```jldoctest +```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" julia> xs = rand(3, 3, 4, 2); # a batch of 2 3X3X4 images julia> m = GroupNorm(4, 2); diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 3ef902d3e5..64726aaa40 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -180,31 +180,23 @@ Assuming you have a `Recur` layer `rnn`, this is roughly equivalent to: rnn.state = hidden(rnn.cell) # Examples -```jldoctest -julia> r = RNN(3 => 5); +```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +julia> r = RNN(1 => 1); + +julia> a = Vector{Float32}([1]) +1-element Vector{Float32}: + 1.0 julia> r.state -5×1 Matrix{Float32}: - 0.0 - 0.0 - 0.0 - 0.0 +1×1 Matrix{Float32}: 0.0 -julia> r(rand(Float32, 3)); r.state -5×1 Matrix{Float32}: - -0.32719195 - -0.45280662 - -0.50386846 - -0.14782222 - 0.23584609 +julia> r(a); r.state +1×1 Matrix{Float32}: + 0.61431444 julia> Flux.reset!(r) -5×1 Matrix{Float32}: - 0.0 - 0.0 - 0.0 - 0.0 +1×1 Matrix{Float32}: 0.0 ``` """ diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl index 47bf84b49c..662e056adc 100644 --- a/src/layers/upsample.jl +++ b/src/layers/upsample.jl @@ -81,34 +81,13 @@ resolution images while upscaling them. See [`NNlib.pixel_shuffle`](@ref). # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +```jldoctest julia> p = PixelShuffle(2); -julia> xs = rand(2, 2, 4, 1) # an image with 4 channels having 2X2 pixels in each channel -2×2×4×1 Array{Float64, 4}: -[:, :, 1, 1] = - 0.826452 0.0519244 - 0.0686387 0.438346 - -[:, :, 2, 1] = - 0.343179 0.445101 - 0.543927 0.740905 - -[:, :, 3, 1] = - 0.105997 0.422996 - 0.32957 0.167205 - -[:, :, 4, 1] = - 0.825737 0.98609 - 0.757365 0.294784 - -julia> p(xs) # upsampled image with only 1 channel -4×4×1×1 Array{Float64, 4}: -[:, :, 1, 1] = - 0.826452 0.105997 0.0519244 0.422996 - 0.343179 0.825737 0.445101 0.98609 - 0.0686387 0.32957 0.438346 0.167205 - 0.543927 0.757365 0.740905 0.294784 +julia> xs = rand(2, 2, 4, 1); # an image with 4 channels having 2X2 pixels in each channel + +julia> p(xs) |> size # upsampled image with only 1 channel +(4, 4, 1, 1) ``` """ struct PixelShuffle From e7ad7f8478d3de7f86c5e50fdcc03f7f1634905b Mon Sep 17 00:00:00 2001 From: Saransh Date: Sat, 11 Jun 2022 16:08:15 +0530 Subject: [PATCH 06/15] Remove doctest from `Dropout` layer --- src/layers/normalise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index e6caca7122..9b31813f89 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -67,7 +67,7 @@ Custom RNGs are only supported on the CPU. Does nothing to the input once [`Flux.testmode!`](@ref) is `true`. # Examples -```jldoctest; filter = r"[+-]?(?:(?:[0-9])(?:\\.\\d+)?)|(?:1)(?:\\.0+)?" +```julia julia> m = Chain(Dense(2 => 2), Dropout(1)) Chain( Dense(2 => 2), # 6 parameters From 142918e6a3326091651fae4d2dd42ff9e8ec9c73 Mon Sep 17 00:00:00 2001 From: Saransh Date: Sat, 11 Jun 2022 23:33:59 +0530 Subject: [PATCH 07/15] Update src/layers/recurrent.jl Co-authored-by: Brian Chen --- src/layers/recurrent.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 64726aaa40..c1d8ec057c 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -183,7 +183,7 @@ Assuming you have a `Recur` layer `rnn`, this is roughly equivalent to: ```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" julia> r = RNN(1 => 1); -julia> a = Vector{Float32}([1]) +julia> a = ones(Float32, 1) 1-element Vector{Float32}: 1.0 From a24d7592dcf1553040fdf1aec6d4d15946834acf Mon Sep 17 00:00:00 2001 From: Saransh Date: Sun, 12 Jun 2022 00:53:17 +0530 Subject: [PATCH 08/15] Update docstrings of `Recur` and `PixelShuffle` --- src/layers/recurrent.jl | 9 +++++-- src/layers/upsample.jl | 55 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index c1d8ec057c..5059449f38 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -80,7 +80,7 @@ julia> rnn(3) julia> rnn.state 5 -julia> rnn.(1:10) # apply to a sequence +julia> rnn(1:10) # apply to a sequence 10-element Vector{Int64}: 1 2 @@ -118,7 +118,12 @@ julia> rnn.state 1×1 Matrix{Int64}: 5 -julia> rnn(reshape(1:10, 1, 1, :)) # apply to a sequence of (features, batch, time) +julia> vec = rnn(reshape(1:10, 1, 1, :)); # apply to a sequence of (features, batch, time) + +julia> size(vec) +(1, 1, 10) + +julia> vec 1×1×10 Array{Int64, 3}: [:, :, 1] = 1 diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl index 662e056adc..c71a9acc8d 100644 --- a/src/layers/upsample.jl +++ b/src/layers/upsample.jl @@ -84,10 +84,57 @@ See [`NNlib.pixel_shuffle`](@ref). ```jldoctest julia> p = PixelShuffle(2); -julia> xs = rand(2, 2, 4, 1); # an image with 4 channels having 2X2 pixels in each channel - -julia> p(xs) |> size # upsampled image with only 1 channel -(4, 4, 1, 1) +julia> xs = [2row + col + channel/10 for row in 1:2, col in 1:2, channel in 1:4, n in 1:1] +2×2×4×1 Array{Float64, 4}: +[:, :, 1, 1] = + 3.1 4.1 + 5.1 6.1 + +[:, :, 2, 1] = + 3.2 4.2 + 5.2 6.2 + +[:, :, 3, 1] = + 3.3 4.3 + 5.3 6.3 + +[:, :, 4, 1] = + 3.4 4.4 + 5.4 6.4 + +julia> p(xs) +4×4×1×1 Array{Float64, 4}: +[:, :, 1, 1] = + 3.1 3.3 4.1 4.3 + 3.2 3.4 4.2 4.4 + 5.1 5.3 6.1 6.3 + 5.2 5.4 6.2 6.4 + +julia> xs = [3row + col + channel/10 for row in 1:2, col in 1:3, channel in 1:4, n in 1:1] +2×3×4×1 Array{Float64, 4}: +[:, :, 1, 1] = + 4.1 5.1 6.1 + 7.1 8.1 9.1 + +[:, :, 2, 1] = + 4.2 5.2 6.2 + 7.2 8.2 9.2 + +[:, :, 3, 1] = + 4.3 5.3 6.3 + 7.3 8.3 9.3 + +[:, :, 4, 1] = + 4.4 5.4 6.4 + 7.4 8.4 9.4 + +julia> p(xs) +4×6×1×1 Array{Float64, 4}: +[:, :, 1, 1] = + 4.1 4.3 5.1 5.3 6.1 6.3 + 4.2 4.4 5.2 5.4 6.2 6.4 + 7.1 7.3 8.1 8.3 9.1 9.3 + 7.2 7.4 8.2 8.4 9.2 9.4 ``` """ struct PixelShuffle From 10a2b52a73d7369e05b15d2e40d3c62714c90418 Mon Sep 17 00:00:00 2001 From: Saransh Date: Sun, 12 Jun 2022 02:04:08 +0530 Subject: [PATCH 09/15] Clean the doctests of `normalise.jl` --- docs/src/models/layers.md | 1 - src/layers/normalise.jl | 114 ++++++++++++++------------------------ src/layers/recurrent.jl | 16 ------ 3 files changed, 43 insertions(+), 88 deletions(-) diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index 34300ca840..ad5a99b737 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -68,7 +68,6 @@ These layers don't affect the structure of the network but may improve training ```@docs Flux.normalise BatchNorm -Flux.dropout Dropout AlphaDropout LayerNorm diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 9b31813f89..f543db0c09 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -67,32 +67,24 @@ Custom RNGs are only supported on the CPU. Does nothing to the input once [`Flux.testmode!`](@ref) is `true`. # Examples -```julia -julia> m = Chain(Dense(2 => 2), Dropout(1)) -Chain( - Dense(2 => 2), # 6 parameters - Dropout(1), -) +```jldoctest +julia> m = Chain(Dense(1 => 1), Dropout(1)); + +julia> Flux.trainmode!(m); -julia> Flux.trainmode!(m); # activating the layer without actually training it +julia> y = m([1]); -julia> m([1, 2]) # drops neurons with a probability of 1 -2-element Vector{Float32}: - -0.0 - -0.0 +julia> count(i->(i == 0), y) == m[2].p # number of zeros == 1 +true -julia> m = Chain(Dense(2 => 2), Dropout(0.5)) -Chain( - Dense(2 => 2), # 6 parameters - Dropout(0.5), -) +julia> m = Chain(Dense(1 => 1), Dropout(0.5)); julia> Flux.trainmode!(m); -julia> m([1, 2]) # drops neurons with a probability of 0.5 -2-element Vector{Float32}: - -4.537827 - -0.0 +julia> y = m([1]); + +julia> m[2].p - 0.5 <= count(i->(i == 0), y) <= m[2].p + 0.5 # number of zeros can be 0 or 1 +true ``` """ mutable struct Dropout{F,D,R<:AbstractRNG} @@ -136,7 +128,9 @@ remain the same as before. Does nothing to the input once [`testmode!`](@ref) is true. # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +```jldoctest +julia> using Statistics + julia> x = randn(20,1); julia> m = Chain(Dense(20 => 10, selu), AlphaDropout(0.5)); @@ -145,17 +139,8 @@ julia> Flux.trainmode!(m); julia> y = m(x); -julia> Flux.std(x) -1.097500619939126 - -julia> Flux.std(y) # maintains the standard deviation of the input -1.1504012188827453 - -julia> Flux.mean(x) # maintains the mean of the input --0.3217018554158738 - -julia> Flux.mean(y) --0.2526866470385106 +julia> isapprox(std(x), std(y), rtol=0.6) +true ``` """ mutable struct AlphaDropout{F,R<:AbstractRNG} @@ -208,24 +193,20 @@ using the [`Scale`](@ref) layer. See also [`BatchNorm`](@ref), [`InstanceNorm`](@ref), [`GroupNorm`](@ref), and [`normalise`](@ref). # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +```jldoctest +julia> using Statistics + julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images julia> m = LayerNorm(3); julia> y = m(xs); -julia> Flux.std(xs[:, :, :, 1]) -0.28713812337208383 - -julia> Flux.std(y[:, :, :, 1]) # normalises each image (or all channels in an image) -1.018993632693022 +julia> isapprox(std(y[:, :, :, 1]), 1, atol=0.1) && std(xs[:, :, :, 1]) != std(y[:, :, :, 1]) +true -julia> Flux.std(xs[:, :, :, 2]) -0.22540260537916373 - -julia> Flux.std(y[:, :, :, 2]) # normalises each image (or all channels in an image) -1.018965249873791 +julia> isapprox(std(y[:, :, :, 2]), 1, atol=0.1) && std(xs[:, :, :, 2]) != std(y[:, :, :, 2]) +true ``` """ struct LayerNorm{F,D,T,N} @@ -329,17 +310,16 @@ Use [`testmode!`](@ref) during inference. # Examples ```julia -julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images +julia> using Statistics -julia> Flux.std(xs) -2.6822461565718467 +julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images julia> m = BatchNorm(3); -julia> Flux.trainmode!(m); # activating the layer without actually training it +julia> Flux.trainmode!(m); -julia> Flux.std(m(xs)) # normalises the complete batch -1.0093209961092855 +julia> isapprox(std(m(xs)), 1, atol=0.1) && std(xs) != std(m(xs)) +true ``` """ mutable struct BatchNorm{F,V,N,W} @@ -419,24 +399,20 @@ that will be used to renormalize the input in test phase. in previous Flux versions (< v0.12). # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +```jldoctest +julia> using Statistics + julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images julia> m = InstanceNorm(3); julia> y = m(xs); -julia> Flux.std(xs[:, :, 1, 1]) # original standard deviation of the first channel of image 1 -0.2989802650787384 +julia> isapprox(std(y[:, :, 1, 1]), 1, atol=0.1) && std(xs[:, :, 1, 1]) != std(y[:, :, 1, 1]) +true -julia> Flux.std(y[:, :, 1, 1]) # each channel of the batch is normalised -1.0606027381538408 - -julia> Flux.std(xs[:, :, 2, 2]) # original standard deviation of the second channel of image 2 -0.28662705400461197 - -julia> Flux.std(y[:, :, 2, 2]) # each channel of the batch is normalised -1.06058729821187 +julia> isapprox(std(y[:, :, 2, 2]), 1, atol=0.1) && std(xs[:, :, 2, 2]) != std(y[:, :, 2, 2]) +true ``` """ mutable struct InstanceNorm{F,V,N,W} @@ -517,24 +493,20 @@ If `track_stats=true`, accumulates mean and var statistics in training phase that will be used to renormalize the input in test phase. # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" +```jldoctest +julia> using Statistics + julia> xs = rand(3, 3, 4, 2); # a batch of 2 3X3X4 images julia> m = GroupNorm(4, 2); julia> y = m(xs); -julia> Flux.std(xs[:, :, 1:2, 1]) # original standard deviation of the first 2 channels of image 1 -0.307588490584917 - -julia> Flux.std(y[:, :, 1:2, 1]) # normalises channels in groups of 2 (as specified) -1.0289339365431291 - -julia> Flux.std(xs[:, :, 3:4, 2]) # original standard deviation of the last 2 channels of image 2 -0.3111566100804274 +julia> isapprox(std(y[:, :, 1:2, 1]), 1, atol=0.1) && std(xs[:, :, 1:2, 1]) != std(y[:, :, 1:2, 1]) +true -julia> Flux.std(y[:, :, 3:4, 2]) # normalises channels in groups of 2 (as specified) -1.0289352493058574 +julia> isapprox(std(y[:, :, 3:4, 2]), 1, atol=0.1) && std(xs[:, :, 3:4, 2]) != std(y[:, :, 3:4, 2]) +true ``` """ mutable struct GroupNorm{F,V,N,W} diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 5059449f38..929f5b2d71 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -79,22 +79,6 @@ julia> rnn(3) julia> rnn.state 5 - -julia> rnn(1:10) # apply to a sequence -10-element Vector{Int64}: - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - -julia> rnn.state -60 ``` Folding over a 3d Array of dimensions `(features, batch, time)` is also supported: From 430f7a09575e593935e76feace27a74a23b58817 Mon Sep 17 00:00:00 2001 From: Saransh Date: Sun, 12 Jun 2022 13:01:56 +0530 Subject: [PATCH 10/15] Update src/layers/normalise.jl Co-authored-by: Brian Chen --- src/layers/normalise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index f543db0c09..ee5a3d78f3 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -74,7 +74,7 @@ julia> Flux.trainmode!(m); julia> y = m([1]); -julia> count(i->(i == 0), y) == m[2].p # number of zeros == 1 +julia> y == [0] true julia> m = Chain(Dense(1 => 1), Dropout(0.5)); From 4beb2a2135b507d12c652d40829f293f6fa4af7a Mon Sep 17 00:00:00 2001 From: Saransh Date: Sun, 12 Jun 2022 13:45:15 +0530 Subject: [PATCH 11/15] Clean the doctests further --- src/layers/normalise.jl | 8 ++++---- src/layers/recurrent.jl | 45 ++++++++++++----------------------------- 2 files changed, 17 insertions(+), 36 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index ee5a3d78f3..7ac31ade8b 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -77,13 +77,13 @@ julia> y = m([1]); julia> y == [0] true -julia> m = Chain(Dense(1 => 1), Dropout(0.5)); +julia> m = Chain(Dense(1000 => 1000), Dropout(0.5)); julia> Flux.trainmode!(m); -julia> y = m([1]); +julia> y = m(ones(1000)); -julia> m[2].p - 0.5 <= count(i->(i == 0), y) <= m[2].p + 0.5 # number of zeros can be 0 or 1 +julia> isapprox(count(==(0), y) / length(y), 0.5, atol=0.1) true ``` """ @@ -139,7 +139,7 @@ julia> Flux.trainmode!(m); julia> y = m(x); -julia> isapprox(std(x), std(y), rtol=0.6) +julia> isapprox(std(x), std(y), atol=0.6) true ``` """ diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 929f5b2d71..baf6190f32 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -102,41 +102,22 @@ julia> rnn.state 1×1 Matrix{Int64}: 5 -julia> vec = rnn(reshape(1:10, 1, 1, :)); # apply to a sequence of (features, batch, time) +julia> out = rnn(reshape(1:10, 1, 1, :)); # apply to a sequence of (features, batch, time) -julia> size(vec) +julia> out |> size (1, 1, 10) -julia> vec -1×1×10 Array{Int64, 3}: -[:, :, 1] = - 1 - -[:, :, 2] = - 2 - -[:, :, 3] = - 3 - -[:, :, 4] = - 4 - -[:, :, 5] = - 5 - -[:, :, 6] = - 6 - -[:, :, 7] = - 7 - -[:, :, 8] = - 8 - -[:, :, 9] = - 9 - -[:, :, 10] = +julia> vec(out) +10-element Vector{Int64}: + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 10 julia> rnn.state From 004b369059cadc11727fd442242230af8b73a44a Mon Sep 17 00:00:00 2001 From: Saransh Date: Wed, 15 Jun 2022 01:31:40 +0530 Subject: [PATCH 12/15] Fix the shape of images in comments --- src/layers/normalise.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 7ac31ade8b..c3594b6950 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -196,7 +196,7 @@ See also [`BatchNorm`](@ref), [`InstanceNorm`](@ref), [`GroupNorm`](@ref), and [ ```jldoctest julia> using Statistics -julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images +julia> xs = rand(3, 3, 3, 2); # a batch of 2 images, each having 3 channels julia> m = LayerNorm(3); @@ -312,7 +312,7 @@ Use [`testmode!`](@ref) during inference. ```julia julia> using Statistics -julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images +julia> xs = rand(3, 3, 3, 2); # a batch of 2 images, each having 3 channels julia> m = BatchNorm(3); @@ -402,7 +402,7 @@ in previous Flux versions (< v0.12). ```jldoctest julia> using Statistics -julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images +julia> xs = rand(3, 3, 3, 2); # a batch of 2 images, each having 3 channels julia> m = InstanceNorm(3); @@ -496,7 +496,7 @@ that will be used to renormalize the input in test phase. ```jldoctest julia> using Statistics -julia> xs = rand(3, 3, 4, 2); # a batch of 2 3X3X4 images +julia> xs = rand(3, 3, 4, 2); # a batch of 2 images, each having 4 channels julia> m = GroupNorm(4, 2); From ce0e64cf985db4a62329842f74bf0c822d5be2ac Mon Sep 17 00:00:00 2001 From: Saransh Date: Wed, 15 Jun 2022 19:57:58 +0530 Subject: [PATCH 13/15] Use more data points for the failing doctest --- src/layers/normalise.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index c3594b6950..762521703b 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -131,15 +131,15 @@ Does nothing to the input once [`testmode!`](@ref) is true. ```jldoctest julia> using Statistics -julia> x = randn(20,1); +julia> x = randn(1000,1); -julia> m = Chain(Dense(20 => 10, selu), AlphaDropout(0.5)); +julia> m = Chain(Dense(1000 => 1000, selu), AlphaDropout(0.2)); julia> Flux.trainmode!(m); julia> y = m(x); -julia> isapprox(std(x), std(y), atol=0.6) +julia> isapprox(std(x), std(y), atol=0.2) true ``` """ From 565cf2452d107fd1b968b6d083a682767a26a8a7 Mon Sep 17 00:00:00 2001 From: Saransh Date: Fri, 24 Jun 2022 17:37:24 +0530 Subject: [PATCH 14/15] Use the dims kwarg --- src/layers/normalise.jl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 762521703b..ef145ad102 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -202,10 +202,7 @@ julia> m = LayerNorm(3); julia> y = m(xs); -julia> isapprox(std(y[:, :, :, 1]), 1, atol=0.1) && std(xs[:, :, :, 1]) != std(y[:, :, :, 1]) -true - -julia> isapprox(std(y[:, :, :, 2]), 1, atol=0.1) && std(xs[:, :, :, 2]) != std(y[:, :, :, 2]) +julia> isapprox(std(y, dims=1:3), ones(1, 1, 1, 2), atol=0.1) && std(y, dims=1:3) != std(xs, dims=1:3) true ``` """ @@ -408,10 +405,7 @@ julia> m = InstanceNorm(3); julia> y = m(xs); -julia> isapprox(std(y[:, :, 1, 1]), 1, atol=0.1) && std(xs[:, :, 1, 1]) != std(y[:, :, 1, 1]) -true - -julia> isapprox(std(y[:, :, 2, 2]), 1, atol=0.1) && std(xs[:, :, 2, 2]) != std(y[:, :, 2, 2]) +julia> isapprox(std(y, dims=1:2), ones(1, 1, 3, 2), atol=0.2) && std(y, dims=1:2) != std(xs, dims=1:2) true ``` """ From 2ab42cdc1378b9b73bc5fd219451b80534740e46 Mon Sep 17 00:00:00 2001 From: Saransh Date: Sun, 26 Jun 2022 19:22:40 +0530 Subject: [PATCH 15/15] Update the doctests of `Flux.reset!` --- src/layers/recurrent.jl | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index baf6190f32..760933bb96 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -150,23 +150,29 @@ Assuming you have a `Recur` layer `rnn`, this is roughly equivalent to: rnn.state = hidden(rnn.cell) # Examples -```jldoctest; filter = r"[+-]?([0-9]*[.])?[0-9]+" -julia> r = RNN(1 => 1); +```jldoctest +julia> r = Flux.RNNCell(relu, ones(1,1), zeros(1,1), ones(1,1), zeros(1,1)); # users should use the RNN wrapper struct instead + +julia> y = Flux.Recur(r, ones(1,1)); -julia> a = ones(Float32, 1) -1-element Vector{Float32}: +julia> y.state +1×1 Matrix{Float64}: 1.0 -julia> r.state -1×1 Matrix{Float32}: - 0.0 +julia> y(ones(1,1)) # relu(1*1 + 1) +1×1 Matrix{Float64}: + 2.0 -julia> r(a); r.state -1×1 Matrix{Float32}: - 0.61431444 +julia> y.state +1×1 Matrix{Float64}: + 2.0 + +julia> Flux.reset!(y) +1×1 Matrix{Float64}: + 0.0 -julia> Flux.reset!(r) -1×1 Matrix{Float32}: +julia> y.state +1×1 Matrix{Float64}: 0.0 ``` """