diff --git a/Project.toml b/Project.toml index c5b7c37af6..6a811cf869 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.2" +version = "0.25.3" [deps] FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" diff --git a/src/pdfnorm.jl b/src/pdfnorm.jl index 28abcabfd2..1bfba6ee4f 100644 --- a/src/pdfnorm.jl +++ b/src/pdfnorm.jl @@ -11,6 +11,8 @@ where ``S`` is the support of ``f(x)``. """ pdfsquaredL2norm +pdfsquaredL2norm(d::Bernoulli) = @evalpoly d.p 1 -2 2 + function pdfsquaredL2norm(d::Beta) α, β = params(d) z = beta(2 * α - 1, 2 * β - 1) / beta(α, β) ^ 2 @@ -18,6 +20,8 @@ function pdfsquaredL2norm(d::Beta) return α > 0.5 && β > 0.5 ? z : oftype(z, Inf) end +pdfsquaredL2norm(d::DiscreteNonParametric) = dot(probs(d), probs(d)) + pdfsquaredL2norm(d::Cauchy) = inv2π / d.σ function pdfsquaredL2norm(d::Chi) @@ -34,6 +38,8 @@ function pdfsquaredL2norm(d::Chisq) return ν > 1 ? z : oftype(z, Inf) end +pdfsquaredL2norm(d::DiscreteUniform) = 1 / (d.b - d.a + 1) + pdfsquaredL2norm(d::Exponential) = 1 / (2 * d.θ) function pdfsquaredL2norm(d::Gamma) @@ -43,8 +49,17 @@ function pdfsquaredL2norm(d::Gamma) return α > 0.5 ? z : oftype(z, Inf) end +pdfsquaredL2norm(d::Geometric) = d.p ^ 2 / (2 * d.p - d.p ^ 2) + pdfsquaredL2norm(d::Logistic) = 1 / (6 * d.θ) pdfsquaredL2norm(d::Normal) = inv(sqrt4π * d.σ) +# The identity is obvious if you look at the definition of the modified Bessel function of +# first kind I_0. Starting from the L2-norm of the Poisson distribution this can be proven +# by observing this is the Laguerre exponential l−e of x², which is related to said Bessel +# function, see (preprint: +# ). +pdfsquaredL2norm(d::Poisson) = besseli(0, 2 * d.λ) * exp(-2 * d.λ) + pdfsquaredL2norm(d::Uniform) = 1 / (d.b - d.a) diff --git a/test/pdfnorm.jl b/test/pdfnorm.jl index bf9334910e..9b72ab1c5a 100644 --- a/test/pdfnorm.jl +++ b/test/pdfnorm.jl @@ -1,63 +1,119 @@ using Test, Distributions, SpecialFunctions +using QuadGK + +# `numeric_norm` is a helper function to compute numerically the squared L2 +# norms of the distributions. These methods aren't very robust because can't +# deal with divergent norms, or discrete distributions with infinite support. +numeric_norm(d::ContinuousUnivariateDistribution) = + quadgk(x -> pdf(d, x) ^ 2, support(d).lb, support(d).ub)[1] + +function numeric_norm(d::DiscreteUnivariateDistribution) + # When the distribution has infinite support, sum up to an arbitrary large + # value. + upper = isfinite(maximum(d)) ? round(Int, maximum(d)) : 100 + return sum(pdf(d, k) ^ 2 for k in round(Int, minimum(d)):upper) +end @testset "pdf L2 norm" begin # Test error on a non implemented norm. @test_throws MethodError pdfsquaredL2norm(Gumbel()) + @testset "Bernoulli" begin + for d in (Bernoulli(0.5), Bernoulli(0), Bernoulli(0.25)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end + # The norm is the same for complementary probabilities + @test pdfsquaredL2norm(Bernoulli(0)) == pdfsquaredL2norm(Bernoulli(1)) + @test pdfsquaredL2norm(Bernoulli(0.25)) == pdfsquaredL2norm(Bernoulli(0.75)) + end + @testset "Beta" begin - @test pdfsquaredL2norm(Beta(1, 1)) ≈ 1 - @test pdfsquaredL2norm(Beta(2, 2)) ≈ 6 / 5 + for d in (Beta(1, 1), Beta(2, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end @test pdfsquaredL2norm(Beta(0.25, 1)) ≈ Inf @test pdfsquaredL2norm(Beta(1, 0.25)) ≈ Inf end + @testset "Categorical" begin + for n in (1, 2, 5, 10) + d = Categorical(collect(1 / n for _ in 1:n)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end + for d in (Categorical([0.25, 0.75]), Categorical([1 / 6, 1 / 3, 1 / 2])) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end + end + @testset "Cauchy" begin - @test pdfsquaredL2norm(Cauchy(0, 1)) ≈ 1 / (2 * π) - @test pdfsquaredL2norm(Cauchy(0, 2)) ≈ 1 / (4 * π) + for d in (Cauchy(0, 1), Cauchy(0, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end # The norm doesn't depend on the mean @test pdfsquaredL2norm(Cauchy(100, 1)) == pdfsquaredL2norm(Cauchy(-100, 1)) == pdfsquaredL2norm(Cauchy(0, 1)) end @testset "Chi" begin - @test pdfsquaredL2norm(Chi(2)) ≈ gamma(3 / 2) / 2 + @test pdfsquaredL2norm(Chi(2)) ≈ numeric_norm(Chi(2)) @test pdfsquaredL2norm(Chi(0.25)) ≈ Inf end @testset "Chisq" begin - @test pdfsquaredL2norm(Chisq(2)) ≈ 1 / 4 + @test pdfsquaredL2norm(Chisq(2)) ≈ numeric_norm(Chisq(2)) @test pdfsquaredL2norm(Chisq(1)) ≈ Inf end + @testset "DiscreteUniform" begin + for d in (DiscreteUniform(-1, 1), DiscreteUniform(1, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end + end + @testset "Exponential" begin - @test pdfsquaredL2norm(Exponential(1)) ≈ 1 / 2 - @test pdfsquaredL2norm(Exponential(2)) ≈ 1 / 4 + for d in (Exponential(1), Exponential(2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end end @testset "Gamma" begin - @test pdfsquaredL2norm(Gamma(1, 1)) ≈ 1 / 2 - @test pdfsquaredL2norm(Gamma(1, 2)) ≈ 1 / 4 - @test pdfsquaredL2norm(Gamma(2, 2)) ≈ 1 / 8 - @test pdfsquaredL2norm(Gamma(1, 0.25)) ≈ 2 + for d in (Gamma(1, 1), Gamma(1, 2), Gamma(2, 2), Gamma(1, 0.25)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end @test pdfsquaredL2norm(Gamma(0.5, 1)) ≈ Inf end + @testset "Geometric" begin + for d in (Geometric(0.20), Geometric(0.25), Geometric(0.50), Geometric(0.75), Geometric(0.80)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end + end + @testset "Logistic" begin - @test pdfsquaredL2norm(Logistic(0, 1)) ≈ 1 / 6 - @test pdfsquaredL2norm(Logistic(0, 2)) ≈ 1 / 12 + for d in (Logistic(0, 1), Logistic(0, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end # The norm doesn't depend on the mean @test pdfsquaredL2norm(Logistic(100, 1)) == pdfsquaredL2norm(Logistic(-100, 1)) == pdfsquaredL2norm(Logistic(0, 1)) end @testset "Normal" begin - @test pdfsquaredL2norm(Normal(0, 1)) ≈ 1 / (2 * sqrt(π)) - @test pdfsquaredL2norm(Normal(0, 2)) ≈ 1 / (4 * sqrt(π)) + for d in (Normal(0, 1), Normal(0, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end @test pdfsquaredL2norm(Normal(1, 0)) ≈ Inf # The norm doesn't depend on the mean @test pdfsquaredL2norm(Normal(100, 1)) == pdfsquaredL2norm(Normal(-100, 1)) == pdfsquaredL2norm(Normal(0, 1)) end + @testset "Poisson" begin + for d in (Poisson(0), Poisson(1), Poisson(pi)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end + end + @testset "Uniform" begin - @test pdfsquaredL2norm(Uniform(-1, 1)) ≈ 1 / 2 - @test pdfsquaredL2norm(Uniform(1, 2)) ≈ 1 + for d in (Uniform(-1, 1), Uniform(1, 2)) + @test pdfsquaredL2norm(d) ≈ numeric_norm(d) + end end end