From d1815336f605172b2f348c537316b811d8253902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Mon, 7 Jun 2021 16:32:01 +0100 Subject: [PATCH 1/3] Add squared L2 norms of some discrete distributions --- Project.toml | 2 +- src/pdfnorm.jl | 15 +++++++++++++++ test/pdfnorm.jl | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index c5b7c37af6..6a811cf869 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.2" +version = "0.25.3" [deps] FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" diff --git a/src/pdfnorm.jl b/src/pdfnorm.jl index 28abcabfd2..9855759ebe 100644 --- a/src/pdfnorm.jl +++ b/src/pdfnorm.jl @@ -11,6 +11,8 @@ where ``S`` is the support of ``f(x)``. """ pdfsquaredL2norm +pdfsquaredL2norm(d::Bernoulli) = @evalpoly d.p 1 -2 2 + function pdfsquaredL2norm(d::Beta) α, β = params(d) z = beta(2 * α - 1, 2 * β - 1) / beta(α, β) ^ 2 @@ -18,6 +20,8 @@ function pdfsquaredL2norm(d::Beta) return α > 0.5 && β > 0.5 ? z : oftype(z, Inf) end +pdfsquaredL2norm(d::Categorical) = dot(probs(d), probs(d)) + pdfsquaredL2norm(d::Cauchy) = inv2π / d.σ function pdfsquaredL2norm(d::Chi) @@ -34,6 +38,8 @@ function pdfsquaredL2norm(d::Chisq) return ν > 1 ? z : oftype(z, Inf) end +pdfsquaredL2norm(d::DiscreteUniform) = 1 / (d.b - d.a + 1) + pdfsquaredL2norm(d::Exponential) = 1 / (2 * d.θ) function pdfsquaredL2norm(d::Gamma) @@ -43,8 +49,17 @@ function pdfsquaredL2norm(d::Gamma) return α > 0.5 ? z : oftype(z, Inf) end +pdfsquaredL2norm(d::Geometric) = d.p ^ 2 / (2 * d.p - d.p ^ 2) + pdfsquaredL2norm(d::Logistic) = 1 / (6 * d.θ) pdfsquaredL2norm(d::Normal) = inv(sqrt4π * d.σ) +# The identity is obvious if you look at the definition of the modified Bessel function of +# first kind I_0. Starting from the L2-norm of the Poisson distribution this can be proven +# by observing this is the Laguerre exponential l−e of x², which is related to said Bessel +# function, see (preprint: +# ). +pdfsquaredL2norm(d::Poisson) = besseli(0, 2 * d.λ) * exp(-2 * d.λ) + pdfsquaredL2norm(d::Uniform) = 1 / (d.b - d.a) diff --git a/test/pdfnorm.jl b/test/pdfnorm.jl index bf9334910e..d8d5c9c7e8 100644 --- a/test/pdfnorm.jl +++ b/test/pdfnorm.jl @@ -4,6 +4,13 @@ using Test, Distributions, SpecialFunctions # Test error on a non implemented norm. @test_throws MethodError pdfsquaredL2norm(Gumbel()) + @testset "Bernoulli" begin + @test pdfsquaredL2norm(Bernoulli(0.5)) ≈ 0.5 + # The norm is the same for complementary probabilities + @test pdfsquaredL2norm(Bernoulli(0)) == pdfsquaredL2norm(Bernoulli(1)) ≈ 1 + @test pdfsquaredL2norm(Bernoulli(0.25)) == pdfsquaredL2norm(Bernoulli(0.75)) ≈ 0.625 + end + @testset "Beta" begin @test pdfsquaredL2norm(Beta(1, 1)) ≈ 1 @test pdfsquaredL2norm(Beta(2, 2)) ≈ 6 / 5 @@ -11,6 +18,14 @@ using Test, Distributions, SpecialFunctions @test pdfsquaredL2norm(Beta(1, 0.25)) ≈ Inf end + @testset "Categorical" begin + for n in (1, 2, 5, 10) + @test pdfsquaredL2norm(Categorical(collect(1 / n for _ in 1:n))) ≈ 1 / n + end + @test pdfsquaredL2norm(Categorical([0.25, 0.75])) ≈ 0.625 + @test pdfsquaredL2norm(Categorical([1 / 6, 1 / 3, 1 / 2])) ≈ 7 / 18 + end + @testset "Cauchy" begin @test pdfsquaredL2norm(Cauchy(0, 1)) ≈ 1 / (2 * π) @test pdfsquaredL2norm(Cauchy(0, 2)) ≈ 1 / (4 * π) @@ -28,6 +43,11 @@ using Test, Distributions, SpecialFunctions @test pdfsquaredL2norm(Chisq(1)) ≈ Inf end + @testset "DiscreteUniform" begin + @test pdfsquaredL2norm(DiscreteUniform(-1, 1)) ≈ 1 / 3 + @test pdfsquaredL2norm(DiscreteUniform(1, 2)) ≈ 1 / 2 + end + @testset "Exponential" begin @test pdfsquaredL2norm(Exponential(1)) ≈ 1 / 2 @test pdfsquaredL2norm(Exponential(2)) ≈ 1 / 4 @@ -41,6 +61,14 @@ using Test, Distributions, SpecialFunctions @test pdfsquaredL2norm(Gamma(0.5, 1)) ≈ Inf end + @testset "Geometric" begin + @test pdfsquaredL2norm(Geometric(0.20)) ≈ 1 / 9 + @test pdfsquaredL2norm(Geometric(0.25)) ≈ 1 / 7 + @test pdfsquaredL2norm(Geometric(0.50)) ≈ 1 / 3 + @test pdfsquaredL2norm(Geometric(0.75)) ≈ 3 / 5 + @test pdfsquaredL2norm(Geometric(0.80)) ≈ 2 / 3 + end + @testset "Logistic" begin @test pdfsquaredL2norm(Logistic(0, 1)) ≈ 1 / 6 @test pdfsquaredL2norm(Logistic(0, 2)) ≈ 1 / 12 @@ -56,6 +84,12 @@ using Test, Distributions, SpecialFunctions @test pdfsquaredL2norm(Normal(100, 1)) == pdfsquaredL2norm(Normal(-100, 1)) == pdfsquaredL2norm(Normal(0, 1)) end + @testset "Poisson" begin + @test pdfsquaredL2norm(Poisson(0)) ≈ 1 + @test pdfsquaredL2norm(Poisson(1)) ≈ besseli(0, 2) * exp(-2) + @test pdfsquaredL2norm(Poisson(pi)) ≈ besseli(0, 2 * pi) * exp(-2) + end + @testset "Uniform" begin @test pdfsquaredL2norm(Uniform(-1, 1)) ≈ 1 / 2 @test pdfsquaredL2norm(Uniform(1, 2)) ≈ 1 From 76417464fe29ab2d539168d080a9ba04a714959e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Tue, 8 Jun 2021 12:48:52 +0100 Subject: [PATCH 2/3] Compute L2 norms of PDFs numerically in tests --- test/pdfnorm.jl | 90 ++++++++++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/test/pdfnorm.jl b/test/pdfnorm.jl index d8d5c9c7e8..9b72ab1c5a 100644 --- a/test/pdfnorm.jl +++ b/test/pdfnorm.jl @@ -1,97 +1,119 @@ using Test, Distributions, SpecialFunctions +using QuadGK + +# `numeric_norm` is a helper function to compute numerically the squared L2 +# norms of the distributions. These methods aren't very robust because can't +# deal with divergent norms, or discrete distributions with infinite support. +numeric_norm(d::ContinuousUnivariateDistribution) = + quadgk(x -> pdf(d, x) ^ 2, support(d).lb, support(d).ub)[1] + +function numeric_norm(d::DiscreteUnivariateDistribution) + # When the distribution has infinite support, sum up to an arbitrary large + # value. + upper = isfinite(maximum(d)) ? round(Int, maximum(d)) : 100 + return sum(pdf(d, k) ^ 2 for k in round(Int, minimum(d)):upper) +end @testset "pdf L2 norm" begin # Test error on a non implemented norm. @test_throws MethodError pdfsquaredL2norm(Gumbel()) @testset "Bernoulli" begin - @test pdfsquaredL2norm(Bernoulli(0.5)) ≈ 0.5 + for d in (Bernoulli(0.5), Bernoulli(0), Bernoulli(0.25)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end # The norm is the same for complementary probabilities - @test pdfsquaredL2norm(Bernoulli(0)) == pdfsquaredL2norm(Bernoulli(1)) ≈ 1 - @test pdfsquaredL2norm(Bernoulli(0.25)) == pdfsquaredL2norm(Bernoulli(0.75)) ≈ 0.625 + @test pdfsquaredL2norm(Bernoulli(0)) == pdfsquaredL2norm(Bernoulli(1)) + @test pdfsquaredL2norm(Bernoulli(0.25)) == pdfsquaredL2norm(Bernoulli(0.75)) end @testset "Beta" begin - @test pdfsquaredL2norm(Beta(1, 1)) ≈ 1 - @test pdfsquaredL2norm(Beta(2, 2)) ≈ 6 / 5 + for d in (Beta(1, 1), Beta(2, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end @test pdfsquaredL2norm(Beta(0.25, 1)) ≈ Inf @test pdfsquaredL2norm(Beta(1, 0.25)) ≈ Inf end @testset "Categorical" begin for n in (1, 2, 5, 10) - @test pdfsquaredL2norm(Categorical(collect(1 / n for _ in 1:n))) ≈ 1 / n + d = Categorical(collect(1 / n for _ in 1:n)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end + for d in (Categorical([0.25, 0.75]), Categorical([1 / 6, 1 / 3, 1 / 2])) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) end - @test pdfsquaredL2norm(Categorical([0.25, 0.75])) ≈ 0.625 - @test pdfsquaredL2norm(Categorical([1 / 6, 1 / 3, 1 / 2])) ≈ 7 / 18 end @testset "Cauchy" begin - @test pdfsquaredL2norm(Cauchy(0, 1)) ≈ 1 / (2 * π) - @test pdfsquaredL2norm(Cauchy(0, 2)) ≈ 1 / (4 * π) + for d in (Cauchy(0, 1), Cauchy(0, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end # The norm doesn't depend on the mean @test pdfsquaredL2norm(Cauchy(100, 1)) == pdfsquaredL2norm(Cauchy(-100, 1)) == pdfsquaredL2norm(Cauchy(0, 1)) end @testset "Chi" begin - @test pdfsquaredL2norm(Chi(2)) ≈ gamma(3 / 2) / 2 + @test pdfsquaredL2norm(Chi(2)) ≈ numeric_norm(Chi(2)) @test pdfsquaredL2norm(Chi(0.25)) ≈ Inf end @testset "Chisq" begin - @test pdfsquaredL2norm(Chisq(2)) ≈ 1 / 4 + @test pdfsquaredL2norm(Chisq(2)) ≈ numeric_norm(Chisq(2)) @test pdfsquaredL2norm(Chisq(1)) ≈ Inf end @testset "DiscreteUniform" begin - @test pdfsquaredL2norm(DiscreteUniform(-1, 1)) ≈ 1 / 3 - @test pdfsquaredL2norm(DiscreteUniform(1, 2)) ≈ 1 / 2 + for d in (DiscreteUniform(-1, 1), DiscreteUniform(1, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end end @testset "Exponential" begin - @test pdfsquaredL2norm(Exponential(1)) ≈ 1 / 2 - @test pdfsquaredL2norm(Exponential(2)) ≈ 1 / 4 + for d in (Exponential(1), Exponential(2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end end @testset "Gamma" begin - @test pdfsquaredL2norm(Gamma(1, 1)) ≈ 1 / 2 - @test pdfsquaredL2norm(Gamma(1, 2)) ≈ 1 / 4 - @test pdfsquaredL2norm(Gamma(2, 2)) ≈ 1 / 8 - @test pdfsquaredL2norm(Gamma(1, 0.25)) ≈ 2 + for d in (Gamma(1, 1), Gamma(1, 2), Gamma(2, 2), Gamma(1, 0.25)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end @test pdfsquaredL2norm(Gamma(0.5, 1)) ≈ Inf end @testset "Geometric" begin - @test pdfsquaredL2norm(Geometric(0.20)) ≈ 1 / 9 - @test pdfsquaredL2norm(Geometric(0.25)) ≈ 1 / 7 - @test pdfsquaredL2norm(Geometric(0.50)) ≈ 1 / 3 - @test pdfsquaredL2norm(Geometric(0.75)) ≈ 3 / 5 - @test pdfsquaredL2norm(Geometric(0.80)) ≈ 2 / 3 + for d in (Geometric(0.20), Geometric(0.25), Geometric(0.50), Geometric(0.75), Geometric(0.80)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end end @testset "Logistic" begin - @test pdfsquaredL2norm(Logistic(0, 1)) ≈ 1 / 6 - @test pdfsquaredL2norm(Logistic(0, 2)) ≈ 1 / 12 + for d in (Logistic(0, 1), Logistic(0, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end # The norm doesn't depend on the mean @test pdfsquaredL2norm(Logistic(100, 1)) == pdfsquaredL2norm(Logistic(-100, 1)) == pdfsquaredL2norm(Logistic(0, 1)) end @testset "Normal" begin - @test pdfsquaredL2norm(Normal(0, 1)) ≈ 1 / (2 * sqrt(π)) - @test pdfsquaredL2norm(Normal(0, 2)) ≈ 1 / (4 * sqrt(π)) + for d in (Normal(0, 1), Normal(0, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end @test pdfsquaredL2norm(Normal(1, 0)) ≈ Inf # The norm doesn't depend on the mean @test pdfsquaredL2norm(Normal(100, 1)) == pdfsquaredL2norm(Normal(-100, 1)) == pdfsquaredL2norm(Normal(0, 1)) end @testset "Poisson" begin - @test pdfsquaredL2norm(Poisson(0)) ≈ 1 - @test pdfsquaredL2norm(Poisson(1)) ≈ besseli(0, 2) * exp(-2) - @test pdfsquaredL2norm(Poisson(pi)) ≈ besseli(0, 2 * pi) * exp(-2) + for d in (Poisson(0), Poisson(1), Poisson(pi)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end end @testset "Uniform" begin - @test pdfsquaredL2norm(Uniform(-1, 1)) ≈ 1 / 2 - @test pdfsquaredL2norm(Uniform(1, 2)) ≈ 1 + for d in (Uniform(-1, 1), Uniform(1, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end end end From 91a4ffa76694b18cb94d962fb805c62f9d730c38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Tue, 8 Jun 2021 22:26:46 +0100 Subject: [PATCH 3/3] Update src/pdfnorm.jl Co-authored-by: David Widmann --- src/pdfnorm.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdfnorm.jl b/src/pdfnorm.jl index 9855759ebe..1bfba6ee4f 100644 --- a/src/pdfnorm.jl +++ b/src/pdfnorm.jl @@ -20,7 +20,7 @@ function pdfsquaredL2norm(d::Beta) return α > 0.5 && β > 0.5 ? z : oftype(z, Inf) end -pdfsquaredL2norm(d::Categorical) = dot(probs(d), probs(d)) +pdfsquaredL2norm(d::DiscreteNonParametric) = dot(probs(d), probs(d)) pdfsquaredL2norm(d::Cauchy) = inv2π / d.σ