From 428a2d78c9205f52a00a0867e8674fe5247b7963 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Sat, 30 Jan 2021 14:30:16 +0530 Subject: [PATCH 01/13] Implementation of Focal Loss --- src/losses/Losses.jl | 3 ++- src/losses/functions.jl | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/losses/Losses.jl b/src/losses/Losses.jl index e781affaf6..f87f487dfc 100644 --- a/src/losses/Losses.jl +++ b/src/losses/Losses.jl @@ -18,7 +18,8 @@ export mse, mae, msle, dice_coeff_loss, poisson_loss, hinge_loss, squared_hinge_loss, - ctc_loss + ctc_loss, + focal_loss include("utils.jl") include("functions.jl") diff --git a/src/losses/functions.jl b/src/losses/functions.jl index d7fd666b63..2a70fb149c 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -429,6 +429,23 @@ function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7)) 1 - num / den end +""" + focal_loss(yhat, y; dims=1, agg=mean, gamma=2.0, eps = eps(eltype(yhat)) + + 𝛄: modulating factor. + +Return the [focal_loss](https://arxiv.org/pdf/1708.02002.pdf) +Extremely useful for classification when you have highly imbalanced classes. It down-weights +well-classified examples and focuses on hard examples. Loss is much high for misclassified points as compared to well-classified points. Used in single-shot detectors. +""" +function focal_loss(ŷ, y; dims=1, agg=mean, 𝛄=2.0, eps = eps(eltype(ŷ))) + ŷ = ŷ .+ eps + p_t = [y==1 ? ŷ : 1-ŷ for (ŷ, y) in zip(ŷ, y)] + ce = -log.(p_t) + weight = (1 .- p_t) .^ 𝛄 + loss = weight .* ce + agg(sum(loss, dims=dims)) +end ```@meta DocTestFilters = nothing From 3c05c80ca4d6a4413cf1b5700fb754dac9d4cb35 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Sat, 30 Jan 2021 15:11:17 +0530 Subject: [PATCH 02/13] Added tests for focal_loss --- test/losses.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/losses.jl b/test/losses.jl index 6f7a5c8407..8a3157d7a3 100644 --- a/test/losses.jl +++ b/test/losses.jl @@ -174,3 +174,16 @@ end end end end + +y = [0 1 1] +ŷ = [0.1 0.7 0.9] +y1 = [1 0 + 0 0 + 0 1] +ŷ1 = [0.6 0.3 + 0.3 0.1 + 0.1 0.6] +@testset "focal_loss" begin + @test Flux.focal_loss(ŷ, y) ≈ 0.011402651755880793 + @test Flux.focal_loss(ŷ1, y1) ≈ 0.11488644991362265 +end From 00a785d3ab163fb5922ea4f4ae92b8cf4cb4d171 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Sun, 31 Jan 2021 00:18:37 +0530 Subject: [PATCH 03/13] Changes done --- src/losses/functions.jl | 27 ++++++++++++++++++-------- test/losses.jl | 42 ++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 2a70fb149c..2d0a33d54b 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -430,23 +430,34 @@ function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7)) end """ - focal_loss(yhat, y; dims=1, agg=mean, gamma=2.0, eps = eps(eltype(yhat)) + binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, eps = epseltype(ŷ)) - 𝛄: modulating factor. + γ: modulating factor. -Return the [focal_loss](https://arxiv.org/pdf/1708.02002.pdf) -Extremely useful for classification when you have highly imbalanced classes. It down-weights -well-classified examples and focuses on hard examples. Loss is much high for misclassified points as compared to well-classified points. Used in single-shot detectors. """ -function focal_loss(ŷ, y; dims=1, agg=mean, 𝛄=2.0, eps = eps(eltype(ŷ))) +function binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, eps = epseltype(ŷ)) ŷ = ŷ .+ eps - p_t = [y==1 ? ŷ : 1-ŷ for (ŷ, y) in zip(ŷ, y)] + p_t = y .*ŷ + (1 .- y) .* (1 .- ŷ) ce = -log.(p_t) - weight = (1 .- p_t) .^ 𝛄 + weight = (1 .- p_t) .^ γ loss = weight .* ce agg(sum(loss, dims=dims)) end +""" + categorical_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, eps = epseltype(ŷ)) + Softmax version of Focal Loss + γ: modulating factor. + +""" +function categorical_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, eps = epseltype(ŷ)) + ŷ = softmax(ŷ; dims=dims) + ŷ = ŷ .+ eps + ce = -y .* log.(ŷ) + weight = (1 .- ŷ) .^ γ + loss = weight .* ce + agg(sum(loss, dims=dims)) +end ```@meta DocTestFilters = nothing ``` diff --git a/test/losses.jl b/test/losses.jl index 8a3157d7a3..2723c073b0 100644 --- a/test/losses.jl +++ b/test/losses.jl @@ -175,15 +175,35 @@ end end end -y = [0 1 1] -ŷ = [0.1 0.7 0.9] -y1 = [1 0 - 0 0 - 0 1] -ŷ1 = [0.6 0.3 - 0.3 0.1 - 0.1 0.6] -@testset "focal_loss" begin - @test Flux.focal_loss(ŷ, y) ≈ 0.011402651755880793 - @test Flux.focal_loss(ŷ1, y1) ≈ 0.11488644991362265 +@testset "binary_focal_loss" begin + y = [0 1 1] + ŷ = [0.1 0.7 0.9] + y1 = [1 0 + 0 0 + 0 1] + ŷ1 = [0.6 0.3 + 0.3 0.1 + 0.1 0.6] + @test Flux.binary_focal_loss(ŷ, y) ≈ 0.011402651755880793 + @test Flux.binary_focal_loss(ŷ1, y1) ≈ 0.11488644991362265 +end + +@testset "categorical_focal_loss" begin + y = [1 0 0 1 0 1 1 + 0 1 0 0 1 0 0 + 0 0 1 0 0 0 0] + + ŷ = [-9.0 -6.0 -3.0 0.0 2.0 5.0 8.0 + -8.0 -5.0 -2.0 0.0 3.0 6.0 9.0 + -7.0 -4.0 -1.0 1.0 4.0 7.0 7.5] + y1 = [1 0 + 0 0 + 0 1] + ŷ1 = [5.0 1.0 + 2.3 2.0 + 3.8 9.0] + @test Flux.categorical_focal_loss(ŷ, y) ≈ 1.0668209889165343 + @test Flux.categorical_focal_loss(ŷ1, y1) ≈ 0.011366240888043638 end + + From b87a6d282848a3a67662c2e828c89b3a9f9cbc29 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Sun, 31 Jan 2021 00:48:23 +0530 Subject: [PATCH 04/13] oops! forgot toadd these --- src/losses/Losses.jl | 3 ++- test/losses.jl | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/losses/Losses.jl b/src/losses/Losses.jl index f87f487dfc..39a09e84c9 100644 --- a/src/losses/Losses.jl +++ b/src/losses/Losses.jl @@ -19,7 +19,8 @@ export mse, mae, msle, poisson_loss, hinge_loss, squared_hinge_loss, ctc_loss, - focal_loss + binary_focal_loss, + categorical_focal_loss include("utils.jl") include("functions.jl") diff --git a/test/losses.jl b/test/losses.jl index 2723c073b0..5466a669bf 100644 --- a/test/losses.jl +++ b/test/losses.jl @@ -205,5 +205,3 @@ end @test Flux.categorical_focal_loss(ŷ, y) ≈ 1.0668209889165343 @test Flux.categorical_focal_loss(ŷ1, y1) ≈ 0.011366240888043638 end - - From 36c2f2c91798a212aae505f51dc25fc8244bc6c2 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Sun, 31 Jan 2021 23:21:30 +0530 Subject: [PATCH 05/13] Revised focal_loss --- src/losses/Losses.jl | 3 +-- src/losses/functions.jl | 38 ++++++++++++++++++++++---------------- test/losses.jl | 35 +++++++++++++++++++---------------- 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/losses/Losses.jl b/src/losses/Losses.jl index 39a09e84c9..bf944f9231 100644 --- a/src/losses/Losses.jl +++ b/src/losses/Losses.jl @@ -19,8 +19,7 @@ export mse, mae, msle, poisson_loss, hinge_loss, squared_hinge_loss, ctc_loss, - binary_focal_loss, - categorical_focal_loss + binary_focal_loss, focal_loss include("utils.jl") include("functions.jl") diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 2d0a33d54b..3ff2701470 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -430,33 +430,39 @@ function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7)) end """ - binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, eps = epseltype(ŷ)) + binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, ϵ=epseltype(ŷ)) - γ: modulating factor. +Return the [binary_focal_loss](https://arxiv.org/pdf/1708.02002.pdf) +Can be used in classification tasks in the presence of highly imbalanced classes. +It down-weights well-classified examples and focuses on hard examples. + +γ(default=2.0) is a number called modulating factor. +For γ=0, the loss is mathematically equivalent to binarycrossentropy(ŷ, y) + +See also: ['focal_loss'](@ref) """ -function binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, eps = epseltype(ŷ)) - ŷ = ŷ .+ eps - p_t = y .*ŷ + (1 .- y) .* (1 .- ŷ) +function binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) + ŷ = ŷ .+ ϵ + p_t = y .* ŷ + (1 .- y) .* (1 .- ŷ) ce = -log.(p_t) weight = (1 .- p_t) .^ γ loss = weight .* ce - agg(sum(loss, dims=dims)) + agg(loss) end """ - categorical_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, eps = epseltype(ŷ)) - Softmax version of Focal Loss - γ: modulating factor. + focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, ϵ=epseltype(ŷ)) + +Return the [focal_loss](https://arxiv.org/pdf/1708.02002.pdf) +For γ=0, the loss is mathematically equivalent to crossentropy(ŷ, y) + +See also: [`binary_focal_loss`](@ref) """ -function categorical_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, eps = epseltype(ŷ)) - ŷ = softmax(ŷ; dims=dims) - ŷ = ŷ .+ eps - ce = -y .* log.(ŷ) - weight = (1 .- ŷ) .^ γ - loss = weight .* ce - agg(sum(loss, dims=dims)) +function focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) + ŷ = ŷ .+ ϵ + agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims=1)) end ```@meta DocTestFilters = nothing diff --git a/test/losses.jl b/test/losses.jl index 5466a669bf..8ae4510c02 100644 --- a/test/losses.jl +++ b/test/losses.jl @@ -176,32 +176,35 @@ end end @testset "binary_focal_loss" begin - y = [0 1 1] - ŷ = [0.1 0.7 0.9] + y = [0 1 0 + 1 0 1] + yhat = [0.268941 0.5 0.268941 + 0.731059 0.5 0.731059] + y1 = [1 0 - 0 0 0 1] ŷ1 = [0.6 0.3 - 0.3 0.1 - 0.1 0.6] - @test Flux.binary_focal_loss(ŷ, y) ≈ 0.011402651755880793 - @test Flux.binary_focal_loss(ŷ1, y1) ≈ 0.11488644991362265 + 0.4 0.7] + @test Flux.binary_focal_loss(ŷ, y) ≈ 0.0728675615927385 + @test Flux.binary_focal_loss(ŷ1, y1) ≈ 0.05691642237852222 + @test Flux.binary_focal_loss(ŷ, y; γ=0.0) == Flux.binarycrossentropy(ŷ, y) end -@testset "categorical_focal_loss" begin - y = [1 0 0 1 0 1 1 - 0 1 0 0 1 0 0 - 0 0 1 0 0 0 0] +@testset "focal_loss" begin + y = [1 0 0 0 1 + 0 1 0 1 0 + 0 0 1 0 0] - ŷ = [-9.0 -6.0 -3.0 0.0 2.0 5.0 8.0 - -8.0 -5.0 -2.0 0.0 3.0 6.0 9.0 - -7.0 -4.0 -1.0 1.0 4.0 7.0 7.5] + ŷ = [0.0900306 0.0900306 0.0900306 0.0900306 0.0900306 + 0.244728 0.244728 0.244728 0.244728 0.244728 + 0.665241 0.665241 0.665241 0.665241 0.665241] y1 = [1 0 0 0 0 1] ŷ1 = [5.0 1.0 2.3 2.0 3.8 9.0] - @test Flux.categorical_focal_loss(ŷ, y) ≈ 1.0668209889165343 - @test Flux.categorical_focal_loss(ŷ1, y1) ≈ 0.011366240888043638 + @test Flux.focal_loss(ŷ, y) ≈ 1.1277571935622628 + @test Flux.focal_loss(ŷ1, y1) ≈ 0.011366240888043638 + @test Flux.focal_loss(ŷ, y; γ=0.0) == Flux.crossentropy(ŷ, y) end From feeb6d7c725eb0f120a43630430faab19efd6f5a Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Mon, 1 Feb 2021 13:56:10 +0530 Subject: [PATCH 06/13] Refactored code and docstring --- src/losses/functions.jl | 22 ++++++++++++++-------- test/losses.jl | 10 +++++----- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 3ff2701470..f9b6f012c9 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -430,16 +430,17 @@ function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7)) end """ - binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, ϵ=epseltype(ŷ)) + binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) Return the [binary_focal_loss](https://arxiv.org/pdf/1708.02002.pdf) -Can be used in classification tasks in the presence of highly imbalanced classes. +which can be used in classification tasks with highly imbalanced classes. It down-weights well-classified examples and focuses on hard examples. +The input, 'ŷ', is expected to be unnormalized. -γ(default=2.0) is a number called modulating factor. -For γ=0, the loss is mathematically equivalent to binarycrossentropy(ŷ, y) +The modulating factor, `γ`, controls the down-weighting strength. +For `γ == 0`, the loss is mathematically equivalent to [`Losses.binarycrossentropy`](@ref). -See also: ['focal_loss'](@ref) +See also: [`Losses.focal_loss`](@ref) for multi-class setting """ function binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) @@ -452,12 +453,17 @@ function binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=e end """ - focal_loss(ŷ, y; dims=1, agg=mean, γ=2.0, ϵ=epseltype(ŷ)) + focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) Return the [focal_loss](https://arxiv.org/pdf/1708.02002.pdf) -For γ=0, the loss is mathematically equivalent to crossentropy(ŷ, y) +which can be used in classification tasks with highly imbalanced classes. +It down-weights well-classified examples and focuses on hard examples. +The input, `ŷ`, is expected to be unnormalized. + +The modulating factor, `γ`, controls the down-weighting strength. +For `γ == 0`, the loss is mathematically equivalent to [`Losses.crossentropy`](@ref). -See also: [`binary_focal_loss`](@ref) +See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels """ function focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) diff --git a/test/losses.jl b/test/losses.jl index 8ae4510c02..4983d63679 100644 --- a/test/losses.jl +++ b/test/losses.jl @@ -178,7 +178,7 @@ end @testset "binary_focal_loss" begin y = [0 1 0 1 0 1] - yhat = [0.268941 0.5 0.268941 + ŷ = [0.268941 0.5 0.268941 0.731059 0.5 0.731059] y1 = [1 0 @@ -201,10 +201,10 @@ end y1 = [1 0 0 0 0 1] - ŷ1 = [5.0 1.0 - 2.3 2.0 - 3.8 9.0] + ŷ1 = [0.4 0.2 + 0.5 0.5 + 0.1 0.3] @test Flux.focal_loss(ŷ, y) ≈ 1.1277571935622628 - @test Flux.focal_loss(ŷ1, y1) ≈ 0.011366240888043638 + @test Flux.focal_loss(ŷ1, y1) ≈ 0.45990566879720157 @test Flux.focal_loss(ŷ, y; γ=0.0) == Flux.crossentropy(ŷ, y) end From 07c33eb73d7527fddfc94138208c6ec59f2b0f64 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Tue, 2 Feb 2021 20:56:03 +0530 Subject: [PATCH 07/13] Changes made --- src/losses/functions.jl | 8 ++++---- test/losses.jl | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index f9b6f012c9..460a256e7a 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -435,7 +435,7 @@ end Return the [binary_focal_loss](https://arxiv.org/pdf/1708.02002.pdf) which can be used in classification tasks with highly imbalanced classes. It down-weights well-classified examples and focuses on hard examples. -The input, 'ŷ', is expected to be unnormalized. +The input, 'ŷ', is expected to be normalized (i.e. [`softmax`](@ref) output). The modulating factor, `γ`, controls the down-weighting strength. For `γ == 0`, the loss is mathematically equivalent to [`Losses.binarycrossentropy`](@ref). @@ -443,7 +443,7 @@ For `γ == 0`, the loss is mathematically equivalent to [`Losses.binarycrossentr See also: [`Losses.focal_loss`](@ref) for multi-class setting """ -function binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) +function binary_focal_loss(ŷ, y; agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) ŷ = ŷ .+ ϵ p_t = y .* ŷ + (1 .- y) .* (1 .- ŷ) ce = -log.(p_t) @@ -458,7 +458,7 @@ end Return the [focal_loss](https://arxiv.org/pdf/1708.02002.pdf) which can be used in classification tasks with highly imbalanced classes. It down-weights well-classified examples and focuses on hard examples. -The input, `ŷ`, is expected to be unnormalized. +The input, 'ŷ', is expected to be normalized (i.e. [`softmax`](@ref) output). The modulating factor, `γ`, controls the down-weighting strength. For `γ == 0`, the loss is mathematically equivalent to [`Losses.crossentropy`](@ref). @@ -468,7 +468,7 @@ See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels """ function focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) ŷ = ŷ .+ ϵ - agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims=1)) + agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims=dims)) end ```@meta DocTestFilters = nothing diff --git a/test/losses.jl b/test/losses.jl index 4983d63679..7b44ee1586 100644 --- a/test/losses.jl +++ b/test/losses.jl @@ -187,7 +187,7 @@ end 0.4 0.7] @test Flux.binary_focal_loss(ŷ, y) ≈ 0.0728675615927385 @test Flux.binary_focal_loss(ŷ1, y1) ≈ 0.05691642237852222 - @test Flux.binary_focal_loss(ŷ, y; γ=0.0) == Flux.binarycrossentropy(ŷ, y) + @test Flux.binary_focal_loss(ŷ, y; γ=0.0) ≈ Flux.binarycrossentropy(ŷ, y) end @testset "focal_loss" begin @@ -206,5 +206,5 @@ end 0.1 0.3] @test Flux.focal_loss(ŷ, y) ≈ 1.1277571935622628 @test Flux.focal_loss(ŷ1, y1) ≈ 0.45990566879720157 - @test Flux.focal_loss(ŷ, y; γ=0.0) == Flux.crossentropy(ŷ, y) + @test Flux.focal_loss(ŷ, y; γ=0.0) ≈ Flux.crossentropy(ŷ, y) end From 8ccff262d49595e4bee281e591f911a217c26e84 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Tue, 2 Feb 2021 21:04:13 +0530 Subject: [PATCH 08/13] docstring mistake solved! --- src/losses/functions.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 460a256e7a..5f023459f3 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -430,7 +430,7 @@ function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7)) end """ - binary_focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) + binary_focal_loss(ŷ, y; agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) Return the [binary_focal_loss](https://arxiv.org/pdf/1708.02002.pdf) which can be used in classification tasks with highly imbalanced classes. From e6342a46a997518adcf9d4689935abfb83f7aad4 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Fri, 5 Feb 2021 15:19:26 +0530 Subject: [PATCH 09/13] Added GPU tests and requested changes made --- src/losses/functions.jl | 11 ++++------- test/cuda/losses.jl | 13 ++++++++++++- test/losses.jl | 10 ++++------ 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 5f023459f3..210e40c9c1 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -430,20 +430,17 @@ function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7)) end """ - binary_focal_loss(ŷ, y; agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) + binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ)) Return the [binary_focal_loss](https://arxiv.org/pdf/1708.02002.pdf) -which can be used in classification tasks with highly imbalanced classes. -It down-weights well-classified examples and focuses on hard examples. The input, 'ŷ', is expected to be normalized (i.e. [`softmax`](@ref) output). -The modulating factor, `γ`, controls the down-weighting strength. For `γ == 0`, the loss is mathematically equivalent to [`Losses.binarycrossentropy`](@ref). See also: [`Losses.focal_loss`](@ref) for multi-class setting """ -function binary_focal_loss(ŷ, y; agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) +function binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ)) ŷ = ŷ .+ ϵ p_t = y .* ŷ + (1 .- y) .* (1 .- ŷ) ce = -log.(p_t) @@ -453,7 +450,7 @@ function binary_focal_loss(ŷ, y; agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype end """ - focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) + focal_loss(ŷ, y; dims=1, agg=mean, γ=2, ϵ=epseltype(ŷ)) Return the [focal_loss](https://arxiv.org/pdf/1708.02002.pdf) which can be used in classification tasks with highly imbalanced classes. @@ -466,7 +463,7 @@ For `γ == 0`, the loss is mathematically equivalent to [`Losses.crossentropy`]( See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels """ -function focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) +function focal_loss(ŷ, y; dims=1, agg=mean, γ=2, ϵ=epseltype(ŷ)) ŷ = ŷ .+ ϵ agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims=dims)) end diff --git a/test/cuda/losses.jl b/test/cuda/losses.jl index 0913b0eb6a..a0f7f47d80 100644 --- a/test/cuda/losses.jl +++ b/test/cuda/losses.jl @@ -1,4 +1,4 @@ -using Flux.Losses: crossentropy, binarycrossentropy, logitbinarycrossentropy +using Flux.Losses: crossentropy, binarycrossentropy, logitbinarycrossentropy, binary_focal_loss, focal_loss @testset "Losses" begin @@ -14,6 +14,17 @@ y = [1, 1, 0.] @test binarycrossentropy(σ.(x), y) ≈ binarycrossentropy(gpu(σ.(x)), gpu(y)) @test logitbinarycrossentropy(x, y) ≈ logitbinarycrossentropy(gpu(x), gpu(y)) +x = [0.268941 0.5 0.268941 + 0.731059 0.5 0.731059] +y = [0 1 0 + 1 0 1] +@test binary_focal_loss(x, y) ≈ binary_focal_loss(gpu(x), gpu(y)) + +x = softmax(reshape(-7:7, 3, 5) .* 1f0) +y = [1 0 0 0 1 + 0 1 0 1 0 + 0 0 1 0 0] +@test focal_loss(x, y) ≈ focal_loss(gpu(x), gpu(y)) @testset "GPU grad tests" begin x = rand(Float32, 3,3) diff --git a/test/losses.jl b/test/losses.jl index 7b44ee1586..9abc03abb8 100644 --- a/test/losses.jl +++ b/test/losses.jl @@ -13,7 +13,8 @@ const ALL_LOSSES = [Flux.Losses.mse, Flux.Losses.mae, Flux.Losses.msle, Flux.Losses.tversky_loss, Flux.Losses.dice_coeff_loss, Flux.Losses.poisson_loss, - Flux.Losses.hinge_loss, Flux.Losses.squared_hinge_loss] + Flux.Losses.hinge_loss, Flux.Losses.squared_hinge_loss, + Flux.Losses.binary_focal_loss, Flux.Losses.focal_loss] @testset "xlogx & xlogy" begin @@ -179,7 +180,7 @@ end y = [0 1 0 1 0 1] ŷ = [0.268941 0.5 0.268941 - 0.731059 0.5 0.731059] + 0.731059 0.5 0.731059] y1 = [1 0 0 1] @@ -194,10 +195,7 @@ end y = [1 0 0 0 1 0 1 0 1 0 0 0 1 0 0] - - ŷ = [0.0900306 0.0900306 0.0900306 0.0900306 0.0900306 - 0.244728 0.244728 0.244728 0.244728 0.244728 - 0.665241 0.665241 0.665241 0.665241 0.665241] + ŷ = softmax(reshape(-7:7, 3, 5) .* 1f0) y1 = [1 0 0 0 0 1] From 63747a6f97daf0791cad9c02fcce819923ac7fe3 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Fri, 5 Feb 2021 19:13:42 +0530 Subject: [PATCH 10/13] Added Doctest and fixed docstring mistake --- src/losses/functions.jl | 43 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 210e40c9c1..8687147a2f 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -430,13 +430,31 @@ function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7)) end """ - binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ)) + binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=eps(ŷ)) Return the [binary_focal_loss](https://arxiv.org/pdf/1708.02002.pdf) The input, 'ŷ', is expected to be normalized (i.e. [`softmax`](@ref) output). For `γ == 0`, the loss is mathematically equivalent to [`Losses.binarycrossentropy`](@ref). +# Example +```jldoctest +julia> y = [0 1 0 + 1 0 1] +2×3 Array{Int64,2}: + 0 1 0 + 1 0 1 + +julia> ŷ = [0.268941 0.5 0.268941 + 0.731059 0.5 0.731059] +2×3 Array{Float64,2}: + 0.268941 0.5 0.268941 + 0.731059 0.5 0.731059 + +julia> Flux.binary_focal_loss(ŷ, y) ≈ 0.0728675615927385 +true +``` + See also: [`Losses.focal_loss`](@ref) for multi-class setting """ @@ -450,7 +468,7 @@ function binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ)) end """ - focal_loss(ŷ, y; dims=1, agg=mean, γ=2, ϵ=epseltype(ŷ)) + focal_loss(ŷ, y; dims=1, agg=mean, γ=2, ϵ=eps(ŷ)) Return the [focal_loss](https://arxiv.org/pdf/1708.02002.pdf) which can be used in classification tasks with highly imbalanced classes. @@ -460,6 +478,26 @@ The input, 'ŷ', is expected to be normalized (i.e. [`softmax`](@ref) output). The modulating factor, `γ`, controls the down-weighting strength. For `γ == 0`, the loss is mathematically equivalent to [`Losses.crossentropy`](@ref). +# Example +```jldoctest +julia> y = [1 0 0 0 1 + 0 1 0 1 0 + 0 0 1 0 0] +3×5 Array{Int64,2}: + 1 0 0 0 1 + 0 1 0 1 0 + 0 0 1 0 0 + +julia> ŷ = softmax(reshape(-7:7, 3, 5) .* 1f0) +3×5 Array{Float32,2}: + 0.0900306 0.0900306 0.0900306 0.0900306 0.0900306 + 0.244728 0.244728 0.244728 0.244728 0.244728 + 0.665241 0.665241 0.665241 0.665241 0.665241 + +julia> Flux.focal_loss(ŷ, y) ≈ 1.1277571935622628 +true +``` + See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels """ @@ -470,3 +508,4 @@ end ```@meta DocTestFilters = nothing ``` + From 5ce5481fcf40d91e6954f3f14db32d28bd94494e Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Fri, 5 Feb 2021 19:16:32 +0530 Subject: [PATCH 11/13] Done! --- src/losses/functions.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 8687147a2f..154291b7be 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -458,7 +458,7 @@ true See also: [`Losses.focal_loss`](@ref) for multi-class setting """ -function binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ)) +function binary_focal_loss(ŷ, y; agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) ŷ = ŷ .+ ϵ p_t = y .* ŷ + (1 .- y) .* (1 .- ŷ) ce = -log.(p_t) @@ -501,7 +501,7 @@ true See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels """ -function focal_loss(ŷ, y; dims=1, agg=mean, γ=2, ϵ=epseltype(ŷ)) +function focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) ŷ = ŷ .+ ϵ agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims=dims)) end From 19876931299e99ed98df3839a7da19bd1c46c995 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami Date: Fri, 5 Feb 2021 19:41:12 +0530 Subject: [PATCH 12/13] Added entry in NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index db7853995e..8001bf7c6e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ ## v0.12.0 +* Added [Focal Loss function](https://github.com/FluxML/Flux.jl/pull/1489) to Losses module * The Dense layer now supports inputs with [multiple batch dimensions](https://github.com/FluxML/Flux.jl/pull/1405). * Dense and Conv layers no longer perform [implicit type conversion](https://github.com/FluxML/Flux.jl/pull/1394). * Excise datasets in favour of other providers in the julia ecosystem. From 63e4d98c95770b3b0051dd7b791f4f74a5984365 Mon Sep 17 00:00:00 2001 From: Shikhar Goswami <44720861+shikhargoswami@users.noreply.github.com> Date: Fri, 5 Feb 2021 22:20:34 +0530 Subject: [PATCH 13/13] Applied the suggestions Co-authored-by: Carlo Lucibello --- src/losses/functions.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 154291b7be..20d45b7a80 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -458,7 +458,7 @@ true See also: [`Losses.focal_loss`](@ref) for multi-class setting """ -function binary_focal_loss(ŷ, y; agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) +function binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ)) ŷ = ŷ .+ ϵ p_t = y .* ŷ + (1 .- y) .* (1 .- ŷ) ce = -log.(p_t) @@ -501,11 +501,10 @@ true See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels """ -function focal_loss(ŷ, y; dims=1, agg=mean, γ=ofeltype(ŷ, 2.0), ϵ=epseltype(ŷ)) +function focal_loss(ŷ, y; dims=1, agg=mean, γ=2, ϵ=epseltype(ŷ)) ŷ = ŷ .+ ϵ agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims=dims)) end ```@meta DocTestFilters = nothing ``` -