From 4ce53a3741759d01364a87623cdd7603eb3436a6 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 15 Jun 2021 15:34:08 +0530 Subject: [PATCH 1/9] add rng to dropout --- src/layers/normalise.jl | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index dbd67240c3..b4d5954b5e 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -28,26 +28,26 @@ automatically managed using the [`Dropout`](@ref) layer instead of the The [`Dropout`](@ref) layer is what you should use in most scenarios. """ -function dropout(x, p; dims=:, active::Bool=true) +function dropout(rng::AbstractRNG, x, p; dims=:, active::Bool=true) active || return x - y = dropout_mask(x, p, dims=dims) + y = dropout_mask(rng, x, p, dims=dims) return x .* y end -@adjoint function dropout(x, p; dims=:, active::Bool=true) +@adjoint function dropout(rng, x, p; dims=:, active::Bool=true) active || return x, Δ -> (Δ, nothing) - y = dropout_mask(x, p, dims=dims) - return x .* y, Δ -> (Δ .* y, nothing) + y = dropout_mask(rng, x, p, dims=dims) + return x .* y, Δ -> (nothing, Δ .* y, nothing) end -function dropout_mask(x, p; dims=:) - y = rand!(similar(x, _dropout_shape(x, dims))) +function dropout_mask(rng::AbstractRNG, x, p; dims=:) + y = rand!(rng, similar(x, _dropout_shape(x, dims))) y .= _dropout_kernel.(y, p, 1 - p) return y end """ - Dropout(p; dims=:) + Dropout([rng = GLOBAL_RNG], p; dims=:) Dropout layer. In the forward pass, apply the [`Flux.dropout`](@ref) function on the input. @@ -60,20 +60,26 @@ Does nothing to the input once [`Flux.testmode!`](@ref) is `true`. mutable struct Dropout{F,D} p::F dims::D + rng::AbstractRNG active::Union{Bool, Nothing} end function Dropout(p; dims=:) @assert 0 ≤ p ≤ 1 - Dropout(p, dims, nothing) + Dropout(Random.GLOBAL_RNG, p; dims) +end + +function Dropout(rng, p; dims = :) + @assert 0 ≤ p ≤ 1 + Dropout(p, dims, rng, nothing) end function (a::Dropout)(x) _isactive(a) || return x - return dropout(x, a.p; dims=a.dims, active=true) + return dropout(a.rng, x, a.p; dims=a.dims, active=true) end -testmode!(m::Dropout, mode=true) = +testmode!(m::Dropout, mode = true) = (m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m) function Base.show(io::IO, d::Dropout) From c63ded693169084855007e8c134acd2d607e5436 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 15 Jun 2021 15:38:57 +0530 Subject: [PATCH 2/9] add rng to kernel call --- src/layers/normalise.jl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index b4d5954b5e..f54e3deed6 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -1,3 +1,5 @@ +using Random: GLOBAL_RNG + istraining() = false @adjoint istraining() = true, _ -> nothing @@ -10,7 +12,7 @@ _dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(s _dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) """ - dropout(x, p; dims=:, active=true) + dropout([rng = GLOBAL_RNG], x, p; dims=:, active=true) The dropout function. If `active` is `true`, for each input, either sets that input to `0` (with probability @@ -28,15 +30,19 @@ automatically managed using the [`Dropout`](@ref) layer instead of the The [`Dropout`](@ref) layer is what you should use in most scenarios. """ -function dropout(rng::AbstractRNG, x, p; dims=:, active::Bool=true) +function dropout(rng::AbstractRNG, x, p; dims = :, active::Bool = true) active || return x y = dropout_mask(rng, x, p, dims=dims) return x .* y end -@adjoint function dropout(rng, x, p; dims=:, active::Bool=true) +function dropout(x, p; dims = :, active::Bool = true) + dropout(GLOBAL_RNG, x, p, dims = dims, active = active) +end + +@adjoint function dropout(rng, x, p; dims = :, active::Bool = true) active || return x, Δ -> (Δ, nothing) - y = dropout_mask(rng, x, p, dims=dims) + y = dropout_mask(rng, x, p, dims = dims) return x .* y, Δ -> (nothing, Δ .* y, nothing) end @@ -66,7 +72,7 @@ end function Dropout(p; dims=:) @assert 0 ≤ p ≤ 1 - Dropout(Random.GLOBAL_RNG, p; dims) + Dropout(GLOBAL_RNG, p; dims) end function Dropout(rng, p; dims = :) From 1121e7ccca72e83d4dc7e9e86714c58c0c8593c1 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 16 Jun 2021 11:34:45 +0530 Subject: [PATCH 3/9] actually disallow bad kernels --- test/runtests.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index a40433d0f1..6d98f45787 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,7 +25,6 @@ end @testset "Losses" begin include("losses.jl") include("ctc.jl") - if Flux.use_cuda[] include("ctc-gpu.jl") end end @testset "Layers" begin @@ -44,6 +43,9 @@ end @testset "CUDA" begin if Flux.use_cuda[] + using CUDA + CUDA.allowscalar(false) + include("ctc-gpu.jl") include("cuda/runtests.jl") else @warn "CUDA unavailable, not testing GPU support" From 0399c37077975a3e80abd8617101680b8da23d44 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 16 Jun 2021 11:37:11 +0530 Subject: [PATCH 4/9] replace global_rng with default_rng --- src/layers/normalise.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index f54e3deed6..8ebff1f0c5 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -1,4 +1,4 @@ -using Random: GLOBAL_RNG +using Random: default_rng, GLOBAL_RNG istraining() = false @@ -12,7 +12,7 @@ _dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(s _dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) """ - dropout([rng = GLOBAL_RNG], x, p; dims=:, active=true) + dropout([rng = default_rng()], x, p; dims=:, active=true) The dropout function. If `active` is `true`, for each input, either sets that input to `0` (with probability @@ -37,7 +37,7 @@ function dropout(rng::AbstractRNG, x, p; dims = :, active::Bool = true) end function dropout(x, p; dims = :, active::Bool = true) - dropout(GLOBAL_RNG, x, p, dims = dims, active = active) + dropout(default_rng(), x, p, dims = dims, active = active) end @adjoint function dropout(rng, x, p; dims = :, active::Bool = true) @@ -53,7 +53,7 @@ function dropout_mask(rng::AbstractRNG, x, p; dims=:) end """ - Dropout([rng = GLOBAL_RNG], p; dims=:) + Dropout([rng = default_rng()], p; dims=:) Dropout layer. In the forward pass, apply the [`Flux.dropout`](@ref) function on the input. @@ -72,7 +72,7 @@ end function Dropout(p; dims=:) @assert 0 ≤ p ≤ 1 - Dropout(GLOBAL_RNG, p; dims) + Dropout(default_rng(), p; dims) end function Dropout(rng, p; dims = :) From d9f49272af79f901d18e43fa5dd2905fa5bfb8ac Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 16 Jun 2021 13:12:45 +0530 Subject: [PATCH 5/9] Update src/layers/normalise.jl Co-authored-by: Carlo Lucibello --- src/layers/normalise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 8ebff1f0c5..0aabe9cc29 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -1,4 +1,4 @@ -using Random: default_rng, GLOBAL_RNG +using Random: default_rng istraining() = false From 3a2fcea99731aa18c0e84eef84a8b220c308efb8 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 16 Jun 2021 19:49:03 +0530 Subject: [PATCH 6/9] add manual cuda dispatch --- src/layers/normalise.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 8ebff1f0c5..5fed096f35 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -40,6 +40,12 @@ function dropout(x, p; dims = :, active::Bool = true) dropout(default_rng(), x, p, dims = dims, active = active) end +# CUDA currently needs a manual dispatch to avoid +# calling a non-GPU RNG with a CuArray +function dropout(x::CUDA.CuArray, p; dims = :, active::Bool = true) + dropout(CUDA.CURAND.default_rng(), x, p, dims = dims, active = active) +end + @adjoint function dropout(rng, x, p; dims = :, active::Bool = true) active || return x, Δ -> (Δ, nothing) y = dropout_mask(rng, x, p, dims = dims) @@ -70,8 +76,7 @@ mutable struct Dropout{F,D} active::Union{Bool, Nothing} end -function Dropout(p; dims=:) - @assert 0 ≤ p ≤ 1 +function Dropout(p; dims = :) Dropout(default_rng(), p; dims) end @@ -82,7 +87,7 @@ end function (a::Dropout)(x) _isactive(a) || return x - return dropout(a.rng, x, a.p; dims=a.dims, active=true) + return dropout(x, a.p; dims = a.dims, active = true) end testmode!(m::Dropout, mode = true) = From 29964901e7d0e090dc0814cd6d77e0fe62ea185f Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 16 Jun 2021 21:21:26 +0530 Subject: [PATCH 7/9] dont ignore rng --- src/layers/normalise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 240f3e52ad..7c3b9fba82 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -87,7 +87,7 @@ end function (a::Dropout)(x) _isactive(a) || return x - return dropout(x, a.p; dims = a.dims, active = true) + return dropout(a.rng, x, a.p; dims = a.dims, active = true) end testmode!(m::Dropout, mode = true) = From 0cded38e176642e696d7ca4cb7ccb8751614e883 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 17 Jun 2021 17:30:15 +0530 Subject: [PATCH 8/9] fix adjoint --- src/layers/normalise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 7c3b9fba82..1fa9cb22ec 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -47,7 +47,7 @@ function dropout(x::CUDA.CuArray, p; dims = :, active::Bool = true) end @adjoint function dropout(rng, x, p; dims = :, active::Bool = true) - active || return x, Δ -> (Δ, nothing) + active || return x, Δ -> (nothing, Δ, nothing) y = dropout_mask(rng, x, p, dims = dims) return x .* y, Δ -> (nothing, Δ .* y, nothing) end From efc7de3d526b558880e8ce5cf7bbfc0255421067 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 23 Jun 2021 18:56:03 +0530 Subject: [PATCH 9/9] run doctests on latest julia --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 6d98f45787..e3d4173b5b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -52,7 +52,7 @@ end end end -@static if VERSION == v"1.5" +@static if VERSION >= v"1.5" using Documenter @testset "Docs" begin DocMeta.setdocmeta!(Flux, :DocTestSetup, :(using Flux); recursive=true)