From 56043162b854617c51430883a7dd16d13019a321 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sun, 2 May 2021 09:52:09 +0200 Subject: [PATCH 1/2] Generalize `LocationScale` to discrete distributions (#1286) * Move `locationscale.jl` * Fix `logcdf` and `logccdf` for DiscreteNonParametric * Generalize `LocationScale` to discrete distributions --- src/univariate/continuous/locationscale.jl | 96 ------------- .../discrete/discretenonparametric.jl | 10 ++ src/univariate/locationscale.jl | 132 ++++++++++++++++++ src/univariates.jl | 3 +- test/locationscale.jl | 114 ++++++++++----- 5 files changed, 223 insertions(+), 132 deletions(-) delete mode 100644 src/univariate/continuous/locationscale.jl create mode 100644 src/univariate/locationscale.jl diff --git a/src/univariate/continuous/locationscale.jl b/src/univariate/continuous/locationscale.jl deleted file mode 100644 index 462bd4296..000000000 --- a/src/univariate/continuous/locationscale.jl +++ /dev/null @@ -1,96 +0,0 @@ -""" - LocationScale(μ,σ,ρ) - -A location-scale transformed distribution with location parameter `μ`, -scale parameter `σ`, and given distribution `ρ`. - -```math -f(x) = \\frac{1}{σ} ρ \\! \\left( \\frac{x-μ}{σ} \\right) -``` - -```julia -LocationScale(μ,σ,ρ) # location-scale transformed distribution -params(d) # Get the parameters, i.e. (μ, σ, and the base distribution) -location(d) # Get the location parameter -scale(d) # Get the scale parameter -``` - -External links -[Location-Scale family on Wikipedia](https://en.wikipedia.org/wiki/Location%E2%80%93scale_family) -""" -struct LocationScale{T<:Real, D<:ContinuousUnivariateDistribution} <: ContinuousUnivariateDistribution - μ::T - σ::T - ρ::D - LocationScale{T, D}(μ::T,σ::T,ρ::D) where {T<:Real, D<:ContinuousUnivariateDistribution} = new{T, D}(μ, σ, ρ) -end - -function LocationScale(μ::T,σ::T,ρ::D; check_args=true) where {T<:Real, D<:ContinuousUnivariateDistribution} - check_args && @check_args(LocationScale, σ > zero(σ)) - return LocationScale{T,D}(μ,σ,ρ) -end - -function LocationScale(μ::Integer, σ::Integer, ρ::ContinuousUnivariateDistribution) - return LocationScale(float(μ), float(σ), ρ) -end - -LocationScale(μ::Real, σ::Real, ρ::D) where {D<:ContinuousUnivariateDistribution} = LocationScale(promote(μ,σ)...,ρ) - -minimum(d::LocationScale) = d.μ + d.σ * minimum(d.ρ) -maximum(d::LocationScale) = d.μ + d.σ * maximum(d.ρ) - -LocationScale(μ::Real, σ::Real, d::LocationScale) = LocationScale(μ + d.μ * σ, σ * d.σ, d.ρ) - -#### Conversions - -convert(::Type{LocationScale{T}}, μ::Real, σ::Real, ρ::D) where {T<:Real, D<:ContinuousUnivariateDistribution} = LocationScale(T(μ),T(σ),ρ) -convert(::Type{LocationScale{T}}, d::LocationScale{S}) where {T<:Real, S<:Real} = LocationScale(T(d.μ),T(d.σ),d.ρ, check_args=false) - -#### Parameters - -location(d::LocationScale) = d.μ -scale(d::LocationScale) = d.σ -params(d::LocationScale) = (d.μ,d.σ,d.ρ) -partype(::LocationScale{T}) where {T} = T - -#### Statistics - -mean(d::LocationScale) = d.μ + d.σ * mean(d.ρ) -median(d::LocationScale) = d.μ + d.σ * median(d.ρ) -mode(d::LocationScale) = d.μ + d.σ * mode(d.ρ) -modes(d::LocationScale) = d.μ .+ d.σ .* modes(d.ρ) - -var(d::LocationScale) = d.σ^2 * var(d.ρ) -std(d::LocationScale) = d.σ * std(d.ρ) -skewness(d::LocationScale) = skewness(d.ρ) -kurtosis(d::LocationScale) = kurtosis(d.ρ) - -isplatykurtic(d::LocationScale) = isplatykurtic(d.ρ) -isleptokurtic(d::LocationScale) = isleptokurtic(d.ρ) -ismesokurtic(d::LocationScale) = ismesokurtic(d.ρ) - -entropy(d::LocationScale) = entropy(d.ρ) + log(d.σ) -mgf(d::LocationScale,t::Real) = exp(d.μ*t) * mgf(d.ρ,d.σ*t) - -#### Evaluation & Sampling - -pdf(d::LocationScale,x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ -logpdf(d::LocationScale,x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) - log(d.σ) -cdf(d::LocationScale,x::Real) = cdf(d.ρ,(x-d.μ)/d.σ) -logcdf(d::LocationScale,x::Real) = logcdf(d.ρ,(x-d.μ)/d.σ) -quantile(d::LocationScale,q::Real) = d.μ + d.σ * quantile(d.ρ,q) - -rand(rng::AbstractRNG, d::LocationScale) = d.μ + d.σ * rand(rng, d.ρ) -cf(d::LocationScale, t::Real) = cf(d.ρ,t*d.σ) * exp(1im*t*d.μ) -gradlogpdf(d::LocationScale, x::Real) = gradlogpdf(d.ρ,(x-d.μ)/d.σ) / d.σ - -#### Syntactic sugar for simple transforms of distributions, e.g., d + x, d - x, and so on - -Base.:+(d::ContinuousUnivariateDistribution, x::Real) = LocationScale(x, one(x), d) -Base.:+(x::Real, d::ContinuousUnivariateDistribution) = d + x -Base.:*(x::Real, d::ContinuousUnivariateDistribution) = LocationScale(zero(x), x, d) -Base.:*(d::ContinuousUnivariateDistribution, x::Real) = x * d -Base.:-(d::ContinuousUnivariateDistribution, x::Real) = d + -x -Base.:/(d::ContinuousUnivariateDistribution, x::Real) = 1/x * d -Base.:+(d::ContinuousUnivariateDistribution, x::Int) = d + float(x) -Base.:*(x::Int, d::ContinuousUnivariateDistribution) = float(x) * d diff --git a/src/univariate/discrete/discretenonparametric.jl b/src/univariate/discrete/discretenonparametric.jl index 530dd302a..940166cd1 100644 --- a/src/univariate/discrete/discretenonparametric.jl +++ b/src/univariate/discrete/discretenonparametric.jl @@ -133,6 +133,16 @@ end ccdf(d::DiscreteNonParametric{T}, x::Integer) where T = _ccdf(d, convert(T, x)) ccdf(d::DiscreteNonParametric{T}, x::Real) where T = _ccdf(d, convert(T, x)) +# fix incorrect defaults +for f in (:cdf, :ccdf) + _f = Symbol(:_, f) + logf = Symbol(:log, f) + @eval begin + $logf(d::DiscreteNonParametric{T}, x::Integer) where T = log($_f(d, convert(T, x))) + $logf(d::DiscreteNonParametric{T}, x::Real) where T = log($_f(d, convert(T, x))) + end +end + function quantile(d::DiscreteNonParametric, q::Real) 0 <= q <= 1 || throw(DomainError()) x = support(d) diff --git a/src/univariate/locationscale.jl b/src/univariate/locationscale.jl new file mode 100644 index 000000000..bb44c5e8d --- /dev/null +++ b/src/univariate/locationscale.jl @@ -0,0 +1,132 @@ +""" + LocationScale(μ,σ,ρ) + +A location-scale transformed distribution with location parameter `μ`, +scale parameter `σ`, and given univariate distribution `ρ`. + +If ``Z`` is a random variable with distribution `ρ`, then the distribution of the random +variable +```math +X = μ + σ Z +``` +is the location-scale transformed distribution with location parameter `μ` and scale +parameter `σ`. + +If `ρ` is a discrete distribution, the probability mass function of +the transformed distribution is given by +```math +P(X = x) = P\\left(Z = \\frac{x-μ}{σ} \\right). +``` +If `ρ` is a continuous distribution, the probability density function of +the transformed distribution is given by +```math +f(x) = \\frac{1}{σ} ρ \\! \\left( \\frac{x-μ}{σ} \\right). +``` + +```julia +LocationScale(μ,σ,ρ) # location-scale transformed distribution +params(d) # Get the parameters, i.e. (μ, σ, and the base distribution) +location(d) # Get the location parameter +scale(d) # Get the scale parameter +``` + +External links +[Location-Scale family on Wikipedia](https://en.wikipedia.org/wiki/Location%E2%80%93scale_family) +""" +struct LocationScale{T<:Real, S<:ValueSupport, D<:UnivariateDistribution{S}} <: UnivariateDistribution{S} + μ::T + σ::T + ρ::D + function LocationScale{T,S,D}(μ::T, σ::T, ρ::D; check_args=true) where {T<:Real, S<:ValueSupport, D<:UnivariateDistribution{S}} + check_args && @check_args(LocationScale, σ > zero(σ)) + new{T, S, D}(μ, σ, ρ) + end +end + +function LocationScale(μ::T, σ::T, ρ::UnivariateDistribution; check_args=true) where {T<:Real} + _T = promote_type(eltype(ρ), T) + D = typeof(ρ) + S = value_support(D) + return LocationScale{_T,S,D}(_T(μ), _T(σ), ρ; check_args=check_args) +end + +LocationScale(μ::Real, σ::Real, ρ::UnivariateDistribution) = LocationScale(promote(μ, σ)..., ρ) + +# aliases +const ContinuousLocationScale{T<:Real,D<:ContinuousUnivariateDistribution} = LocationScale{T,Continuous,D} +const DiscreteLocationScale{T<:Real,D<:DiscreteUnivariateDistribution} = LocationScale{T,Discrete,D} + +Base.eltype(::Type{<:LocationScale{T}}) where T = T + +minimum(d::LocationScale) = d.μ + d.σ * minimum(d.ρ) +maximum(d::LocationScale) = d.μ + d.σ * maximum(d.ρ) + +LocationScale(μ::Real, σ::Real, d::LocationScale) = LocationScale(μ + d.μ * σ, σ * d.σ, d.ρ) + +#### Conversions + +convert(::Type{LocationScale{T}}, μ::Real, σ::Real, ρ::D) where {T<:Real, D<:UnivariateDistribution} = LocationScale(T(μ),T(σ),ρ) +convert(::Type{LocationScale{T}}, d::LocationScale{S}) where {T<:Real, S<:Real} = LocationScale(T(d.μ),T(d.σ),d.ρ, check_args=false) + +#### Parameters + +location(d::LocationScale) = d.μ +scale(d::LocationScale) = d.σ +params(d::LocationScale) = (d.μ,d.σ,d.ρ) +partype(::LocationScale{T}) where {T} = T + +#### Statistics + +mean(d::LocationScale) = d.μ + d.σ * mean(d.ρ) +median(d::LocationScale) = d.μ + d.σ * median(d.ρ) +mode(d::LocationScale) = d.μ + d.σ * mode(d.ρ) +modes(d::LocationScale) = d.μ .+ d.σ .* modes(d.ρ) + +var(d::LocationScale) = d.σ^2 * var(d.ρ) +std(d::LocationScale) = d.σ * std(d.ρ) +skewness(d::LocationScale) = skewness(d.ρ) +kurtosis(d::LocationScale) = kurtosis(d.ρ) + +isplatykurtic(d::LocationScale) = isplatykurtic(d.ρ) +isleptokurtic(d::LocationScale) = isleptokurtic(d.ρ) +ismesokurtic(d::LocationScale) = ismesokurtic(d.ρ) + +entropy(d::ContinuousLocationScale) = entropy(d.ρ) + log(d.σ) +entropy(d::DiscreteLocationScale) = entropy(d.ρ) + +mgf(d::LocationScale,t::Real) = exp(d.μ*t) * mgf(d.ρ,d.σ*t) + +#### Evaluation & Sampling + +pdf(d::ContinuousLocationScale,x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ +pdf(d::DiscreteLocationScale, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) + +logpdf(d::ContinuousLocationScale,x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) - log(d.σ) +logpdf(d::DiscreteLocationScale, x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) + +# additional definitions are required to fix ambiguity errors and incorrect defaults +for f in (:cdf, :ccdf, :logcdf, :logccdf) + _f = Symbol(:_, f) + @eval begin + $f(d::LocationScale, x::Real) = $_f(d, x) + $f(d::DiscreteLocationScale, x::Real) = $_f(d, x) + $f(d::DiscreteLocationScale, x::Integer) = $_f(d, x) + $_f(d::LocationScale, x::Real) = $f(d.ρ, (x - d.μ) / d.σ) + end +end + +quantile(d::LocationScale,q::Real) = d.μ + d.σ * quantile(d.ρ,q) + +rand(rng::AbstractRNG, d::LocationScale) = d.μ + d.σ * rand(rng, d.ρ) +cf(d::LocationScale, t::Real) = cf(d.ρ,t*d.σ) * exp(1im*t*d.μ) +gradlogpdf(d::ContinuousLocationScale, x::Real) = gradlogpdf(d.ρ,(x-d.μ)/d.σ) / d.σ + +#### Syntactic sugar for simple transforms of distributions, e.g., d + x, d - x, and so on + +Base.:+(d::UnivariateDistribution, x::Real) = LocationScale(x, one(x), d) +Base.:+(x::Real, d::UnivariateDistribution) = d + x +Base.:*(x::Real, d::UnivariateDistribution) = LocationScale(zero(x), x, d) +Base.:*(d::UnivariateDistribution, x::Real) = x * d +Base.:-(d::UnivariateDistribution, x::Real) = d + -x +Base.:/(d::UnivariateDistribution, x::Real) = inv(x) * d + diff --git a/src/univariates.jl b/src/univariates.jl index 47809fef6..4191dadce 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -606,7 +606,6 @@ const continuous_distributions = [ "ksonesided", "laplace", "levy", - "locationscale", "logistic", "noncentralbeta", "noncentralchisq", @@ -631,6 +630,8 @@ const continuous_distributions = [ "weibull" ] +include(joinpath("univariate", "locationscale.jl")) + for dname in discrete_distributions include(joinpath("univariate", "discrete", "$(dname).jl")) end diff --git a/test/locationscale.jl b/test/locationscale.jl index bbfe46a9a..f7ca168e1 100644 --- a/test/locationscale.jl +++ b/test/locationscale.jl @@ -1,18 +1,26 @@ - - - -function test_location_scale_normal(μ::Real, σ::Real, μD::Real, σD::Real, - rng::Union{AbstractRNG, Missing} = missing) - ρ = Normal(μD,σD) +function test_location_scale( + rng::Union{AbstractRNG, Missing}, + μ::Real, σ::Real, ρ::UnivariateDistribution, dref::UnivariateDistribution, +) d = LocationScale(μ,σ,ρ) @test params(d) == (μ,σ,ρ) - - d_dict = Dict( # Different ways to construct the LocationScale object - "original" => d, - "sugar" => σ * ρ + μ, - "composed" => μ + ((2 * ρ) * σ - 2) / 2 + 1 - ) - dref = Normal(μ+σ*μD,σ*σD) + @test eltype(d) === eltype(dref) + + # Different ways to construct the LocationScale object + if dref isa DiscreteDistribution + # floating point division introduces numerical errors + # Better: multiply with rational numbers + d_dict = Dict( + "original" => d, + "sugar" => σ * ρ + μ, + ) + else + d_dict = Dict( + "original" => d, + "sugar" => σ * ρ + μ, + "composed" => μ + ((2 * ρ) * σ - 2) / 2 + 1 + ) + end @test d == deepcopy(d) @@ -71,55 +79,91 @@ function test_location_scale_normal(μ::Real, σ::Real, μD::Real, σD::Real, #### Evaluation & Sampling @testset "Evaluation & Sampling" begin - function test_evaluation_and_sampling(d, dref, rng) - insupport(d,0.4) == insupport(dref,0.4) - @test pdf(d,0.1) ≈ pdf(dref,0.1) - @test pdf.(d,2:4) ≈ pdf.(dref,2:4) - @test logpdf(d,0.4) ≈ logpdf(dref,0.4) - @test loglikelihood(d,[0.1,0.2,0.3]) ≈ loglikelihood(dref,[0.1,0.2,0.3]) - @test loglikelihood(d, 0.4) ≈ loglikelihood(dref, 0.4) - @test cdf(d,μ-0.4) ≈ cdf(dref,μ-0.4) - @test logcdf(d,μ-0.4) ≈ logcdf(dref,μ-0.4) - @test ccdf(d,μ-0.4) ≈ ccdf(dref,μ-0.4) atol=1e-15 - @test logccdf(d,μ-0.4) ≈ logccdf(dref,μ-0.4) atol=1e-16 + function test_evaluation_and_sampling(rng, d, dref) + xs = rand(dref, 5) + x = first(xs) + insupport(d, x) == insupport(dref, x) + # might return `false` for discrete distributions + insupport(d, -x) == insupport(dref, -x) + + @test pdf(d, x) ≈ pdf(dref, x) + @test pdf.(d, xs) ≈ pdf.(dref, xs) + @test logpdf(d, x) ≈ logpdf(dref, x) + @test logpdf.(d, xs) ≈ logpdf.(dref, xs) + @test loglikelihood(d, x) ≈ loglikelihood(dref, x) + @test loglikelihood(d, xs) ≈ loglikelihood(dref, xs) + + @test cdf(d, x) ≈ cdf(dref, x) + @test logcdf(d, x) ≈ logcdf(dref, x) + @test ccdf(d, x) ≈ ccdf(dref, x) atol=1e-15 + @test logccdf(d, x) ≈ logccdf(dref, x) atol=1e-15 + @test quantile(d,0.1) ≈ quantile(dref,0.1) @test quantile(d,0.5) ≈ quantile(dref,0.5) @test quantile(d,0.9) ≈ quantile(dref,0.9) + @test cquantile(d,0.1) ≈ cquantile(dref,0.1) @test cquantile(d,0.5) ≈ cquantile(dref,0.5) @test cquantile(d,0.9) ≈ cquantile(dref,0.9) + @test invlogcdf(d,log(0.2)) ≈ invlogcdf(dref,log(0.2)) @test invlogcdf(d,log(0.5)) ≈ invlogcdf(dref,log(0.5)) @test invlogcdf(d,log(0.8)) ≈ invlogcdf(dref,log(0.8)) + @test invlogccdf(d,log(0.2)) ≈ invlogccdf(dref,log(0.2)) @test invlogccdf(d,log(0.5)) ≈ invlogccdf(dref,log(0.5)) @test invlogccdf(d,log(0.8)) ≈ invlogccdf(dref,log(0.8)) - r = Array{partype(d)}(undef, 100000) + r = Array{float(eltype(d))}(undef, 100000) if ismissing(rng) rand!(d,r) else rand!(rng,d,r) end - @test mean(r) ≈ mean(dref) atol=0.01 + @test mean(r) ≈ mean(dref) atol=0.02 @test std(r) ≈ std(dref) atol=0.01 @test cf(d, -0.1) ≈ cf(dref,-0.1) - @test gradlogpdf(d, 0.1) ≈ gradlogpdf(dref, 0.1) + + if dref isa ContinuousDistribution + @test gradlogpdf(d, 0.1) ≈ gradlogpdf(dref, 0.1) + end end @testset "$k" for (k,dtest) in d_dict - test_evaluation_and_sampling(dtest, dref, rng) + test_evaluation_and_sampling(rng, dtest, dref) end end +end + +function test_location_scale_normal( + rng::Union{AbstractRNG, Missing}, μ::Real, σ::Real, μD::Real, σD::Real, +) + ρ = Normal(μD, σD) + dref = Normal(μ + σ * μD, σ * σD) + return test_location_scale(rng, μ, σ, ρ, dref) +end +function test_location_scale_discretenonparametric( + rng::Union{AbstractRNG, Missing}, μ::Real, σ::Real, support, probs, +) + ρ = DiscreteNonParametric(support, probs) + dref = DiscreteNonParametric(μ .+ σ .* support, probs) + return test_location_scale(rng, μ, σ, ρ, dref) end @testset "LocationScale" begin rng = MersenneTwister(123) - test_location_scale_normal(0.3,0.2,0.1,0.2) - test_location_scale_normal(-0.3,0.1,-0.1,0.3) - test_location_scale_normal(1.3,0.4,-0.1,0.5) - test_location_scale_normal(0.3,0.2,0.1,0.2,rng) - test_location_scale_normal(-0.3,0.1,-0.1,0.3,rng) - test_location_scale_normal(1.3,0.4,-0.1,0.5,rng) - test_location_scale_normal(ForwardDiff.Dual(0.3),0.2,0.1,0.2, rng) + + for _rng in (missing, rng) + test_location_scale_normal(_rng, 0.3, 0.2, 0.1, 0.2) + test_location_scale_normal(_rng, -0.3, 0.1, -0.1, 0.3) + test_location_scale_normal(_rng, 1.3, 0.4, -0.1, 0.5) + end + test_location_scale_normal(rng, ForwardDiff.Dual(0.3), 0.2, 0.1, 0.2) + + probs = Distributions.pnormalize!(rand(10)) + for _rng in (missing, rng) + test_location_scale_discretenonparametric(_rng, 1//3, 1//2, 1:10, probs) + test_location_scale_discretenonparametric(_rng, -1//4, 1//3, (-10):(-1), probs) + test_location_scale_discretenonparametric(_rng, 6//5, 3//2, 15:24, probs) + end end From 99b2b57faf21e34498ad443d6c572525d95986fc Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sun, 2 May 2021 10:36:36 +0200 Subject: [PATCH 2/2] Improve type inference of `MixtureModel` (#1308) * Improve type inference of `MixtureModel` * Bump version * Remove comment * Bump minor version --- Project.toml | 2 +- src/mixtures/mixturemodel.jl | 74 +++++------------------------------- test/mixture.jl | 46 +++++++++++----------- 3 files changed, 34 insertions(+), 88 deletions(-) diff --git a/Project.toml b/Project.toml index d4e1d9c1d..16927f0dd 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.24.18" +version = "0.25.0" [deps] FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" diff --git a/src/mixtures/mixturemodel.jl b/src/mixtures/mixturemodel.jl index cc7a8f5d4..93402c14d 100644 --- a/src/mixtures/mixturemodel.jl +++ b/src/mixtures/mixturemodel.jl @@ -19,11 +19,11 @@ A mixture of distributions, parametrized on: * `C` distribution family of the mixture * `CT` the type for probabilities of the prior """ -struct MixtureModel{VF<:VariateForm,VS<:ValueSupport,C<:Distribution,CT<:Real} <: AbstractMixtureModel{VF,VS,C} +struct MixtureModel{VF<:VariateForm,VS<:ValueSupport,C<:Distribution,CT<:Categorical} <: AbstractMixtureModel{VF,VS,C} components::Vector{C} - prior::Categorical{CT} + prior::CT - function MixtureModel{VF,VS,C}(cs::Vector{C}, pri::Categorical{CT}) where {VF,VS,C,CT} + function MixtureModel{VF,VS,C}(cs::Vector{C}, pri::CT) where {VF,VS,C,CT} length(cs) == ncategories(pri) || error("The number of components does not match the length of prior.") new{VF,VS,C,CT}(cs, pri) @@ -171,16 +171,8 @@ minimum(d::MixtureModel) = minimum([minimum(dci) for dci in d.components]) maximum(d::MixtureModel) = maximum([maximum(dci) for dci in d.components]) function mean(d::UnivariateMixture) - K = ncomponents(d) p = probs(d) - m = 0.0 - for i = 1:K - pi = p[i] - if pi > 0.0 - c = component(d, i) - m += mean(c) * pi - end - end + m = sum(pi * mean(component(d, i)) for (i, pi) in enumerate(p) if !iszero(pi)) return m end @@ -281,28 +273,13 @@ end #### Evaluation function insupport(d::AbstractMixtureModel, x::AbstractVector) - K = ncomponents(d) p = probs(d) - @inbounds for i in eachindex(p) - pi = p[i] - if pi > 0.0 && insupport(component(d, i), x) - return true - end - end - return false + return any(insupport(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi)) end function _cdf(d::UnivariateMixture, x::Real) - K = ncomponents(d) p = probs(d) - r = 0.0 - @inbounds for i in eachindex(p) - pi = p[i] - if pi > 0.0 - c = component(d, i) - r += pi * cdf(c, x) - end - end + r = sum(pi * cdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi)) return r end @@ -310,9 +287,8 @@ cdf(d::UnivariateMixture{Continuous}, x::Real) = _cdf(d, x) cdf(d::UnivariateMixture{Discrete}, x::Integer) = _cdf(d, x) function _mixpdf1(d::AbstractMixtureModel, x) - ps = probs(d) - cs = components(d) - return sum((ps[i] > 0) * (ps[i] * pdf(cs[i], x)) for i in eachindex(ps)) + p = probs(d) + return sum(pi * pdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi)) end function _mixpdf!(r::AbstractArray, d::AbstractMixtureModel, x) @@ -335,39 +311,9 @@ function _mixpdf!(r::AbstractArray, d::AbstractMixtureModel, x) end function _mixlogpdf1(d::AbstractMixtureModel, x) - # using the formula below for numerical stability - # - # logpdf(d, x) = log(sum_i pri[i] * pdf(cs[i], x)) - # = log(sum_i pri[i] * exp(logpdf(cs[i], x))) - # = log(sum_i exp(logpri[i] + logpdf(cs[i], x))) - # = m + log(sum_i exp(logpri[i] + logpdf(cs[i], x) - m)) - # - # m is chosen to be the maximum of logpri[i] + logpdf(cs[i], x) - # such that the argument of exp is in a reasonable range - # - - K = ncomponents(d) p = probs(d) - lp = Vector{eltype(p)}(undef, K) - m = -Inf # m <- the maximum of log(p(cs[i], x)) + log(pri[i]) - @inbounds for i in eachindex(p) - pi = p[i] - if pi > 0.0 - # lp[i] <- log(p(cs[i], x)) + log(pri[i]) - lp_i = logpdf(component(d, i), x) + log(pi) - lp[i] = lp_i - if lp_i > m - m = lp_i - end - end - end - v = 0.0 - @inbounds for i = 1:K - if p[i] > 0.0 - v += exp(lp[i] - m) - end - end - return m + log(v) + lp = logsumexp(log(pi) + logpdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi)) + return lp end function _mixlogpdf!(r::AbstractArray, d::AbstractMixtureModel, x) diff --git a/test/mixture.jl b/test/mixture.jl index b027324c1..3873df72a 100644 --- a/test/mixture.jl +++ b/test/mixture.jl @@ -18,7 +18,7 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int, end K = ncomponents(g) - pr = probs(g) + pr = @inferred(probs(g)) @assert length(pr) == K # mean @@ -26,7 +26,7 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int, for k = 1:K mu += pr[k] * mean(component(g, k)) end - @test mean(g) ≈ mu + @test @inferred(mean(g)) ≈ mu # evaluation of cdf cf = zeros(T, n) @@ -38,7 +38,7 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int, end for i = 1:n - @test cdf(g, X[i]) ≈ cf[i] + @test @inferred(cdf(g, X[i])) ≈ cf[i] end @test cdf.(g, X) ≈ cf @@ -58,16 +58,16 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int, mix_lp0 = log.(mix_p0) for i = 1:n - @test pdf(g, X[i]) ≈ mix_p0[i] - @test logpdf(g, X[i]) ≈ mix_lp0[i] - @test componentwise_pdf(g, X[i]) ≈ vec(P0[i,:]) - @test componentwise_logpdf(g, X[i]) ≈ vec(LP0[i,:]) + @test @inferred(pdf(g, X[i])) ≈ mix_p0[i] + @test @inferred(logpdf(g, X[i])) ≈ mix_lp0[i] + @test @inferred(componentwise_pdf(g, X[i])) ≈ vec(P0[i,:]) + @test @inferred(componentwise_logpdf(g, X[i])) ≈ vec(LP0[i,:]) end - @test pdf.(g, X) ≈ mix_p0 - @test logpdf.(g, X) ≈ mix_lp0 - @test componentwise_pdf(g, X) ≈ P0 - @test componentwise_logpdf(g, X) ≈ LP0 + @test @inferred(map(Base.Fix1(pdf, g), X)) ≈ mix_p0 + @test @inferred(map(Base.Fix1(logpdf, g), X)) ≈ mix_lp0 + @test @inferred(componentwise_pdf(g, X)) ≈ P0 + @test @inferred(componentwise_logpdf(g, X)) ≈ LP0 # sampling if (T <: AbstractFloat) @@ -94,7 +94,7 @@ function test_mixture(g::MultivariateMixture, n::Int, ns::Int, end K = ncomponents(g) - pr = probs(g) + pr = @inferred(probs(g)) @assert length(pr) == K # mean @@ -102,7 +102,7 @@ function test_mixture(g::MultivariateMixture, n::Int, ns::Int, for k = 1:K mu .+= pr[k] .* mean(component(g, k)) end - @test mean(g) ≈ mu + @test @inferred(mean(g)) ≈ mu # evaluation P0 = zeros(n, K) @@ -121,20 +121,20 @@ function test_mixture(g::MultivariateMixture, n::Int, ns::Int, for i = 1:n x_i = X[:,i] - @test pdf(g, x_i) ≈ mix_p0[i] - @test logpdf(g, x_i) ≈ mix_lp0[i] - @test componentwise_pdf(g, x_i) ≈ vec(P0[i,:]) - @test componentwise_logpdf(g, x_i) ≈ vec(LP0[i,:]) + @test @inferred(pdf(g, x_i)) ≈ mix_p0[i] + @test @inferred(logpdf(g, x_i)) ≈ mix_lp0[i] + @test @inferred(componentwise_pdf(g, x_i)) ≈ vec(P0[i,:]) + @test @inferred(componentwise_logpdf(g, x_i)) ≈ vec(LP0[i,:]) end #= @show g @show size(X) @show size(mix_p0) =# - @test pdf(g, X) ≈ mix_p0 - @test logpdf(g, X) ≈ mix_lp0 - @test componentwise_pdf(g, X) ≈ P0 - @test componentwise_logpdf(g, X) ≈ LP0 + @test @inferred(pdf(g, X)) ≈ mix_p0 + @test @inferred(logpdf(g, X)) ≈ mix_lp0 + @test @inferred(componentwise_pdf(g, X)) ≈ P0 + @test @inferred(componentwise_logpdf(g, X)) ≈ LP0 # sampling if ismissing(rng) @@ -172,8 +172,8 @@ end "rand(rng, ...)" => MersenneTwister(123)) @testset "Testing UnivariateMixture" begin - g_u = MixtureModel(Normal, [(0.0, 1.0), (2.0, 1.0), (-4.0, 1.5)], [0.2, 0.5, 0.3]) - @test isa(g_u, MixtureModel{Univariate, Continuous, Normal}) + g_u = MixtureModel(Normal{Float64}, [(0.0, 1.0), (2.0, 1.0), (-4.0, 1.5)], [0.2, 0.5, 0.3]) + @test isa(g_u, MixtureModel{Univariate,Continuous,<:Normal}) @test ncomponents(g_u) == 3 test_mixture(g_u, 1000, 10^6, rng) test_params(g_u)