From 56043162b854617c51430883a7dd16d13019a321 Mon Sep 17 00:00:00 2001
From: David Widmann <devmotion@users.noreply.github.com>
Date: Sun, 2 May 2021 09:52:09 +0200
Subject: [PATCH 1/2] Generalize `LocationScale` to discrete distributions
 (#1286)

* Move `locationscale.jl`

* Fix `logcdf` and `logccdf` for DiscreteNonParametric

* Generalize `LocationScale` to discrete distributions
---
 src/univariate/continuous/locationscale.jl    |  96 -------------
 .../discrete/discretenonparametric.jl         |  10 ++
 src/univariate/locationscale.jl               | 132 ++++++++++++++++++
 src/univariates.jl                            |   3 +-
 test/locationscale.jl                         | 114 ++++++++++-----
 5 files changed, 223 insertions(+), 132 deletions(-)
 delete mode 100644 src/univariate/continuous/locationscale.jl
 create mode 100644 src/univariate/locationscale.jl

diff --git a/src/univariate/continuous/locationscale.jl b/src/univariate/continuous/locationscale.jl
deleted file mode 100644
index 462bd4296..000000000
--- a/src/univariate/continuous/locationscale.jl
+++ /dev/null
@@ -1,96 +0,0 @@
-"""
-    LocationScale(μ,σ,ρ)
-
-A location-scale transformed distribution with location parameter `μ`,
-scale parameter `σ`, and given distribution `ρ`.
-
-```math
-f(x) = \\frac{1}{σ} ρ \\! \\left( \\frac{x-μ}{σ} \\right)
-```
-
-```julia
-LocationScale(μ,σ,ρ) # location-scale transformed distribution
-params(d)            # Get the parameters, i.e. (μ, σ, and the base distribution)
-location(d)          # Get the location parameter
-scale(d)             # Get the scale parameter
-```
-
-External links
-[Location-Scale family on Wikipedia](https://en.wikipedia.org/wiki/Location%E2%80%93scale_family)
-"""
-struct LocationScale{T<:Real, D<:ContinuousUnivariateDistribution} <: ContinuousUnivariateDistribution
-    μ::T
-    σ::T
-    ρ::D
-    LocationScale{T, D}(μ::T,σ::T,ρ::D) where {T<:Real, D<:ContinuousUnivariateDistribution} = new{T, D}(μ, σ, ρ)
-end
-
-function LocationScale(μ::T,σ::T,ρ::D; check_args=true) where {T<:Real, D<:ContinuousUnivariateDistribution}
-    check_args && @check_args(LocationScale, σ > zero(σ))
-    return LocationScale{T,D}(μ,σ,ρ)
-end
-
-function LocationScale(μ::Integer, σ::Integer, ρ::ContinuousUnivariateDistribution)
-    return LocationScale(float(μ), float(σ), ρ)
-end
-
-LocationScale(μ::Real, σ::Real, ρ::D) where {D<:ContinuousUnivariateDistribution} = LocationScale(promote(μ,σ)...,ρ)
-
-minimum(d::LocationScale) = d.μ + d.σ * minimum(d.ρ)
-maximum(d::LocationScale) = d.μ + d.σ * maximum(d.ρ)
-
-LocationScale(μ::Real, σ::Real, d::LocationScale) = LocationScale(μ + d.μ * σ, σ * d.σ, d.ρ)
-
-#### Conversions
-
-convert(::Type{LocationScale{T}}, μ::Real, σ::Real, ρ::D) where {T<:Real, D<:ContinuousUnivariateDistribution} = LocationScale(T(μ),T(σ),ρ)
-convert(::Type{LocationScale{T}}, d::LocationScale{S}) where {T<:Real, S<:Real} = LocationScale(T(d.μ),T(d.σ),d.ρ, check_args=false)
-
-#### Parameters
-
-location(d::LocationScale) = d.μ
-scale(d::LocationScale) = d.σ
-params(d::LocationScale) = (d.μ,d.σ,d.ρ)
-partype(::LocationScale{T}) where {T} = T
-
-#### Statistics
-
-mean(d::LocationScale) = d.μ + d.σ * mean(d.ρ)
-median(d::LocationScale) = d.μ + d.σ * median(d.ρ)
-mode(d::LocationScale) = d.μ + d.σ * mode(d.ρ)
-modes(d::LocationScale) = d.μ .+ d.σ .* modes(d.ρ)
-
-var(d::LocationScale) = d.σ^2 * var(d.ρ)
-std(d::LocationScale) = d.σ * std(d.ρ)
-skewness(d::LocationScale) = skewness(d.ρ)
-kurtosis(d::LocationScale) = kurtosis(d.ρ)
-
-isplatykurtic(d::LocationScale) = isplatykurtic(d.ρ)
-isleptokurtic(d::LocationScale) = isleptokurtic(d.ρ)
-ismesokurtic(d::LocationScale) = ismesokurtic(d.ρ)
-
-entropy(d::LocationScale) = entropy(d.ρ) + log(d.σ)
-mgf(d::LocationScale,t::Real) = exp(d.μ*t) * mgf(d.ρ,d.σ*t)
-
-#### Evaluation & Sampling
-
-pdf(d::LocationScale,x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ
-logpdf(d::LocationScale,x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) - log(d.σ)
-cdf(d::LocationScale,x::Real) = cdf(d.ρ,(x-d.μ)/d.σ)
-logcdf(d::LocationScale,x::Real) = logcdf(d.ρ,(x-d.μ)/d.σ)
-quantile(d::LocationScale,q::Real) = d.μ + d.σ * quantile(d.ρ,q)
-
-rand(rng::AbstractRNG, d::LocationScale) = d.μ + d.σ * rand(rng, d.ρ)
-cf(d::LocationScale, t::Real) = cf(d.ρ,t*d.σ) * exp(1im*t*d.μ)
-gradlogpdf(d::LocationScale, x::Real) = gradlogpdf(d.ρ,(x-d.μ)/d.σ) / d.σ
-
-#### Syntactic sugar for simple transforms of distributions, e.g., d + x, d - x, and so on
-
-Base.:+(d::ContinuousUnivariateDistribution, x::Real) = LocationScale(x, one(x), d)
-Base.:+(x::Real, d::ContinuousUnivariateDistribution) = d + x
-Base.:*(x::Real, d::ContinuousUnivariateDistribution) = LocationScale(zero(x), x, d)
-Base.:*(d::ContinuousUnivariateDistribution, x::Real) = x * d
-Base.:-(d::ContinuousUnivariateDistribution, x::Real) = d + -x
-Base.:/(d::ContinuousUnivariateDistribution, x::Real) = 1/x * d
-Base.:+(d::ContinuousUnivariateDistribution, x::Int) = d + float(x)
-Base.:*(x::Int, d::ContinuousUnivariateDistribution) = float(x) * d
diff --git a/src/univariate/discrete/discretenonparametric.jl b/src/univariate/discrete/discretenonparametric.jl
index 530dd302a..940166cd1 100644
--- a/src/univariate/discrete/discretenonparametric.jl
+++ b/src/univariate/discrete/discretenonparametric.jl
@@ -133,6 +133,16 @@ end
 ccdf(d::DiscreteNonParametric{T}, x::Integer) where T = _ccdf(d, convert(T, x))
 ccdf(d::DiscreteNonParametric{T}, x::Real) where T = _ccdf(d, convert(T, x))
 
+# fix incorrect defaults
+for f in (:cdf, :ccdf)
+    _f = Symbol(:_, f)
+    logf = Symbol(:log, f)
+    @eval begin
+        $logf(d::DiscreteNonParametric{T}, x::Integer) where T = log($_f(d, convert(T, x)))
+        $logf(d::DiscreteNonParametric{T}, x::Real) where T = log($_f(d, convert(T, x)))
+    end
+end
+
 function quantile(d::DiscreteNonParametric, q::Real)
     0 <= q <= 1 || throw(DomainError())
     x = support(d)
diff --git a/src/univariate/locationscale.jl b/src/univariate/locationscale.jl
new file mode 100644
index 000000000..bb44c5e8d
--- /dev/null
+++ b/src/univariate/locationscale.jl
@@ -0,0 +1,132 @@
+"""
+    LocationScale(μ,σ,ρ)
+
+A location-scale transformed distribution with location parameter `μ`,
+scale parameter `σ`, and given univariate distribution `ρ`.
+
+If ``Z`` is a random variable with distribution `ρ`, then the distribution of the random
+variable
+```math
+X = μ + σ Z
+```
+is the location-scale transformed distribution with location parameter `μ` and scale
+parameter `σ`.
+
+If `ρ` is a discrete distribution, the probability mass function of
+the transformed distribution is given by
+```math
+P(X = x) = P\\left(Z = \\frac{x-μ}{σ} \\right).
+```
+If `ρ` is a continuous distribution, the probability density function of
+the transformed distribution is given by
+```math
+f(x) = \\frac{1}{σ} ρ \\! \\left( \\frac{x-μ}{σ} \\right).
+```
+
+```julia
+LocationScale(μ,σ,ρ) # location-scale transformed distribution
+params(d)            # Get the parameters, i.e. (μ, σ, and the base distribution)
+location(d)          # Get the location parameter
+scale(d)             # Get the scale parameter
+```
+
+External links
+[Location-Scale family on Wikipedia](https://en.wikipedia.org/wiki/Location%E2%80%93scale_family)
+"""
+struct LocationScale{T<:Real, S<:ValueSupport, D<:UnivariateDistribution{S}} <: UnivariateDistribution{S}
+    μ::T
+    σ::T
+    ρ::D
+    function LocationScale{T,S,D}(μ::T, σ::T, ρ::D; check_args=true) where {T<:Real, S<:ValueSupport, D<:UnivariateDistribution{S}}
+        check_args && @check_args(LocationScale, σ > zero(σ))
+        new{T, S, D}(μ, σ, ρ)
+    end
+end
+
+function LocationScale(μ::T, σ::T, ρ::UnivariateDistribution; check_args=true) where {T<:Real}
+    _T = promote_type(eltype(ρ), T)
+    D = typeof(ρ)
+    S = value_support(D)
+    return LocationScale{_T,S,D}(_T(μ), _T(σ), ρ; check_args=check_args)
+end
+
+LocationScale(μ::Real, σ::Real, ρ::UnivariateDistribution) = LocationScale(promote(μ, σ)..., ρ)
+
+# aliases
+const ContinuousLocationScale{T<:Real,D<:ContinuousUnivariateDistribution} = LocationScale{T,Continuous,D}
+const DiscreteLocationScale{T<:Real,D<:DiscreteUnivariateDistribution} = LocationScale{T,Discrete,D}
+
+Base.eltype(::Type{<:LocationScale{T}}) where T = T
+
+minimum(d::LocationScale) = d.μ + d.σ * minimum(d.ρ)
+maximum(d::LocationScale) = d.μ + d.σ * maximum(d.ρ)
+
+LocationScale(μ::Real, σ::Real, d::LocationScale) = LocationScale(μ + d.μ * σ, σ * d.σ, d.ρ)
+
+#### Conversions
+
+convert(::Type{LocationScale{T}}, μ::Real, σ::Real, ρ::D) where {T<:Real, D<:UnivariateDistribution} = LocationScale(T(μ),T(σ),ρ)
+convert(::Type{LocationScale{T}}, d::LocationScale{S}) where {T<:Real, S<:Real} = LocationScale(T(d.μ),T(d.σ),d.ρ, check_args=false)
+
+#### Parameters
+
+location(d::LocationScale) = d.μ
+scale(d::LocationScale) = d.σ
+params(d::LocationScale) = (d.μ,d.σ,d.ρ)
+partype(::LocationScale{T}) where {T} = T
+
+#### Statistics
+
+mean(d::LocationScale) = d.μ + d.σ * mean(d.ρ)
+median(d::LocationScale) = d.μ + d.σ * median(d.ρ)
+mode(d::LocationScale) = d.μ + d.σ * mode(d.ρ)
+modes(d::LocationScale) = d.μ .+ d.σ .* modes(d.ρ)
+
+var(d::LocationScale) = d.σ^2 * var(d.ρ)
+std(d::LocationScale) = d.σ * std(d.ρ)
+skewness(d::LocationScale) = skewness(d.ρ)
+kurtosis(d::LocationScale) = kurtosis(d.ρ)
+
+isplatykurtic(d::LocationScale) = isplatykurtic(d.ρ)
+isleptokurtic(d::LocationScale) = isleptokurtic(d.ρ)
+ismesokurtic(d::LocationScale) = ismesokurtic(d.ρ)
+
+entropy(d::ContinuousLocationScale) = entropy(d.ρ) + log(d.σ)
+entropy(d::DiscreteLocationScale) = entropy(d.ρ)
+
+mgf(d::LocationScale,t::Real) = exp(d.μ*t) * mgf(d.ρ,d.σ*t)
+
+#### Evaluation & Sampling
+
+pdf(d::ContinuousLocationScale,x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ
+pdf(d::DiscreteLocationScale, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ)
+
+logpdf(d::ContinuousLocationScale,x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) - log(d.σ)
+logpdf(d::DiscreteLocationScale, x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ)
+
+# additional definitions are required to fix ambiguity errors and incorrect defaults
+for f in (:cdf, :ccdf, :logcdf, :logccdf)
+    _f = Symbol(:_, f)
+    @eval begin
+        $f(d::LocationScale, x::Real) = $_f(d, x)
+        $f(d::DiscreteLocationScale, x::Real) = $_f(d, x)
+        $f(d::DiscreteLocationScale, x::Integer) = $_f(d, x)
+        $_f(d::LocationScale, x::Real) = $f(d.ρ, (x - d.μ) / d.σ)
+    end
+end
+
+quantile(d::LocationScale,q::Real) = d.μ + d.σ * quantile(d.ρ,q)
+
+rand(rng::AbstractRNG, d::LocationScale) = d.μ + d.σ * rand(rng, d.ρ)
+cf(d::LocationScale, t::Real) = cf(d.ρ,t*d.σ) * exp(1im*t*d.μ)
+gradlogpdf(d::ContinuousLocationScale, x::Real) = gradlogpdf(d.ρ,(x-d.μ)/d.σ) / d.σ
+
+#### Syntactic sugar for simple transforms of distributions, e.g., d + x, d - x, and so on
+
+Base.:+(d::UnivariateDistribution, x::Real) = LocationScale(x, one(x), d)
+Base.:+(x::Real, d::UnivariateDistribution) = d + x
+Base.:*(x::Real, d::UnivariateDistribution) = LocationScale(zero(x), x, d)
+Base.:*(d::UnivariateDistribution, x::Real) = x * d
+Base.:-(d::UnivariateDistribution, x::Real) = d + -x
+Base.:/(d::UnivariateDistribution, x::Real) = inv(x) * d
+
diff --git a/src/univariates.jl b/src/univariates.jl
index 47809fef6..4191dadce 100644
--- a/src/univariates.jl
+++ b/src/univariates.jl
@@ -606,7 +606,6 @@ const continuous_distributions = [
     "ksonesided",
     "laplace",
     "levy",
-    "locationscale",
     "logistic",
     "noncentralbeta",
     "noncentralchisq",
@@ -631,6 +630,8 @@ const continuous_distributions = [
     "weibull"
 ]
 
+include(joinpath("univariate", "locationscale.jl"))
+
 for dname in discrete_distributions
     include(joinpath("univariate", "discrete", "$(dname).jl"))
 end
diff --git a/test/locationscale.jl b/test/locationscale.jl
index bbfe46a9a..f7ca168e1 100644
--- a/test/locationscale.jl
+++ b/test/locationscale.jl
@@ -1,18 +1,26 @@
-
-
-
-function test_location_scale_normal(μ::Real, σ::Real, μD::Real, σD::Real,
-                                    rng::Union{AbstractRNG, Missing} = missing)
-    ρ = Normal(μD,σD)
+function test_location_scale(
+    rng::Union{AbstractRNG, Missing},
+    μ::Real, σ::Real, ρ::UnivariateDistribution, dref::UnivariateDistribution,
+)
     d = LocationScale(μ,σ,ρ)
     @test params(d) == (μ,σ,ρ)
-    
-    d_dict = Dict( # Different ways to construct the LocationScale object
-                  "original" => d,
-                  "sugar" => σ * ρ + μ,
-                  "composed" =>  μ + ((2 * ρ) * σ - 2) / 2 + 1
-                 )
-    dref = Normal(μ+σ*μD,σ*σD)
+    @test eltype(d) === eltype(dref)
+
+    # Different ways to construct the LocationScale object
+    if dref isa DiscreteDistribution
+        # floating point division introduces numerical errors
+        # Better: multiply with rational numbers
+        d_dict = Dict(
+            "original" => d,
+            "sugar" => σ * ρ + μ,
+        )
+    else
+        d_dict = Dict(
+            "original" => d,
+            "sugar" => σ * ρ + μ,
+            "composed" =>  μ + ((2 * ρ) * σ - 2) / 2 + 1
+        )
+    end
 
     @test d == deepcopy(d)
 
@@ -71,55 +79,91 @@ function test_location_scale_normal(μ::Real, σ::Real, μD::Real, σD::Real,
     #### Evaluation & Sampling
 
     @testset "Evaluation & Sampling" begin
-        function test_evaluation_and_sampling(d, dref, rng)
-            insupport(d,0.4) == insupport(dref,0.4)
-            @test pdf(d,0.1) ≈ pdf(dref,0.1)
-            @test pdf.(d,2:4) ≈ pdf.(dref,2:4)
-            @test logpdf(d,0.4) ≈ logpdf(dref,0.4)
-            @test loglikelihood(d,[0.1,0.2,0.3]) ≈ loglikelihood(dref,[0.1,0.2,0.3])
-            @test loglikelihood(d, 0.4) ≈ loglikelihood(dref, 0.4)
-            @test cdf(d,μ-0.4) ≈ cdf(dref,μ-0.4)
-            @test logcdf(d,μ-0.4) ≈ logcdf(dref,μ-0.4)
-            @test ccdf(d,μ-0.4) ≈ ccdf(dref,μ-0.4) atol=1e-15
-            @test logccdf(d,μ-0.4) ≈ logccdf(dref,μ-0.4) atol=1e-16
+        function test_evaluation_and_sampling(rng, d, dref)
+            xs = rand(dref, 5)
+            x = first(xs)
+            insupport(d, x) == insupport(dref, x)
+            # might return `false` for discrete distributions
+            insupport(d, -x) == insupport(dref, -x)
+
+            @test pdf(d, x) ≈ pdf(dref, x)
+            @test pdf.(d, xs) ≈ pdf.(dref, xs)
+            @test logpdf(d, x) ≈ logpdf(dref, x)
+            @test logpdf.(d, xs) ≈ logpdf.(dref, xs)
+            @test loglikelihood(d, x) ≈ loglikelihood(dref, x)
+            @test loglikelihood(d, xs) ≈ loglikelihood(dref, xs)
+
+            @test cdf(d, x) ≈ cdf(dref, x)
+            @test logcdf(d, x) ≈ logcdf(dref, x)
+            @test ccdf(d, x) ≈ ccdf(dref, x) atol=1e-15
+            @test logccdf(d, x) ≈ logccdf(dref, x) atol=1e-15
+
             @test quantile(d,0.1) ≈ quantile(dref,0.1)
             @test quantile(d,0.5) ≈ quantile(dref,0.5)
             @test quantile(d,0.9) ≈ quantile(dref,0.9)
+
             @test cquantile(d,0.1) ≈ cquantile(dref,0.1)
             @test cquantile(d,0.5) ≈ cquantile(dref,0.5)
             @test cquantile(d,0.9) ≈ cquantile(dref,0.9)
+
             @test invlogcdf(d,log(0.2)) ≈ invlogcdf(dref,log(0.2))
             @test invlogcdf(d,log(0.5)) ≈ invlogcdf(dref,log(0.5))
             @test invlogcdf(d,log(0.8)) ≈ invlogcdf(dref,log(0.8))
+
             @test invlogccdf(d,log(0.2)) ≈ invlogccdf(dref,log(0.2))
             @test invlogccdf(d,log(0.5)) ≈ invlogccdf(dref,log(0.5))
             @test invlogccdf(d,log(0.8)) ≈ invlogccdf(dref,log(0.8))
 
-            r = Array{partype(d)}(undef, 100000)
+            r = Array{float(eltype(d))}(undef, 100000)
             if ismissing(rng)
                 rand!(d,r)
             else
                 rand!(rng,d,r)
             end
-            @test mean(r) ≈ mean(dref) atol=0.01
+            @test mean(r) ≈ mean(dref) atol=0.02
             @test std(r) ≈ std(dref) atol=0.01
             @test cf(d, -0.1) ≈ cf(dref,-0.1)
-            @test gradlogpdf(d, 0.1) ≈ gradlogpdf(dref, 0.1)
+
+            if dref isa ContinuousDistribution
+                @test gradlogpdf(d, 0.1) ≈ gradlogpdf(dref, 0.1)
+            end
         end
         @testset "$k" for (k,dtest) in d_dict
-            test_evaluation_and_sampling(dtest, dref, rng)
+            test_evaluation_and_sampling(rng, dtest, dref)
         end
     end
+end
+
+function test_location_scale_normal(
+    rng::Union{AbstractRNG, Missing}, μ::Real, σ::Real, μD::Real, σD::Real,
+)
+    ρ = Normal(μD, σD)
+    dref = Normal(μ + σ * μD, σ * σD)
+    return test_location_scale(rng, μ, σ, ρ, dref)
+end
 
+function test_location_scale_discretenonparametric(
+    rng::Union{AbstractRNG, Missing}, μ::Real, σ::Real, support, probs,
+)
+    ρ = DiscreteNonParametric(support, probs)
+    dref = DiscreteNonParametric(μ .+ σ .* support, probs)
+    return test_location_scale(rng, μ, σ, ρ, dref)
 end
 
 @testset "LocationScale" begin
     rng = MersenneTwister(123)
-    test_location_scale_normal(0.3,0.2,0.1,0.2)
-    test_location_scale_normal(-0.3,0.1,-0.1,0.3)
-    test_location_scale_normal(1.3,0.4,-0.1,0.5)
-    test_location_scale_normal(0.3,0.2,0.1,0.2,rng)
-    test_location_scale_normal(-0.3,0.1,-0.1,0.3,rng)
-    test_location_scale_normal(1.3,0.4,-0.1,0.5,rng)
-    test_location_scale_normal(ForwardDiff.Dual(0.3),0.2,0.1,0.2, rng)
+
+    for _rng in (missing, rng)
+        test_location_scale_normal(_rng, 0.3, 0.2, 0.1, 0.2)
+        test_location_scale_normal(_rng, -0.3, 0.1, -0.1, 0.3)
+        test_location_scale_normal(_rng, 1.3, 0.4, -0.1, 0.5)
+    end
+    test_location_scale_normal(rng, ForwardDiff.Dual(0.3), 0.2, 0.1, 0.2)
+
+    probs = Distributions.pnormalize!(rand(10))
+    for _rng in (missing, rng)
+        test_location_scale_discretenonparametric(_rng, 1//3, 1//2, 1:10, probs)
+        test_location_scale_discretenonparametric(_rng, -1//4, 1//3, (-10):(-1), probs)
+        test_location_scale_discretenonparametric(_rng, 6//5, 3//2, 15:24, probs)
+    end
 end

From 99b2b57faf21e34498ad443d6c572525d95986fc Mon Sep 17 00:00:00 2001
From: David Widmann <devmotion@users.noreply.github.com>
Date: Sun, 2 May 2021 10:36:36 +0200
Subject: [PATCH 2/2] Improve type inference of `MixtureModel` (#1308)

* Improve type inference of `MixtureModel`

* Bump version

* Remove comment

* Bump minor version
---
 Project.toml                 |  2 +-
 src/mixtures/mixturemodel.jl | 74 +++++-------------------------------
 test/mixture.jl              | 46 +++++++++++-----------
 3 files changed, 34 insertions(+), 88 deletions(-)

diff --git a/Project.toml b/Project.toml
index d4e1d9c1d..16927f0dd 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Distributions"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
 authors = ["JuliaStats"]
-version = "0.24.18"
+version = "0.25.0"
 
 [deps]
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
diff --git a/src/mixtures/mixturemodel.jl b/src/mixtures/mixturemodel.jl
index cc7a8f5d4..93402c14d 100644
--- a/src/mixtures/mixturemodel.jl
+++ b/src/mixtures/mixturemodel.jl
@@ -19,11 +19,11 @@ A mixture of distributions, parametrized on:
 * `C` distribution family of the mixture
 * `CT` the type for probabilities of the prior
 """
-struct MixtureModel{VF<:VariateForm,VS<:ValueSupport,C<:Distribution,CT<:Real} <: AbstractMixtureModel{VF,VS,C}
+struct MixtureModel{VF<:VariateForm,VS<:ValueSupport,C<:Distribution,CT<:Categorical} <: AbstractMixtureModel{VF,VS,C}
     components::Vector{C}
-    prior::Categorical{CT}
+    prior::CT
 
-    function MixtureModel{VF,VS,C}(cs::Vector{C}, pri::Categorical{CT}) where {VF,VS,C,CT}
+    function MixtureModel{VF,VS,C}(cs::Vector{C}, pri::CT) where {VF,VS,C,CT}
         length(cs) == ncategories(pri) ||
             error("The number of components does not match the length of prior.")
         new{VF,VS,C,CT}(cs, pri)
@@ -171,16 +171,8 @@ minimum(d::MixtureModel) = minimum([minimum(dci) for dci in d.components])
 maximum(d::MixtureModel) = maximum([maximum(dci) for dci in d.components])
 
 function mean(d::UnivariateMixture)
-    K = ncomponents(d)
     p = probs(d)
-    m = 0.0
-    for i = 1:K
-        pi = p[i]
-        if pi > 0.0
-            c = component(d, i)
-            m += mean(c) * pi
-        end
-    end
+    m = sum(pi * mean(component(d, i)) for (i, pi) in enumerate(p) if !iszero(pi))
     return m
 end
 
@@ -281,28 +273,13 @@ end
 #### Evaluation
 
 function insupport(d::AbstractMixtureModel, x::AbstractVector)
-    K = ncomponents(d)
     p = probs(d)
-    @inbounds for i in eachindex(p)
-        pi = p[i]
-        if pi > 0.0 && insupport(component(d, i), x)
-            return true
-        end
-    end
-    return false
+    return any(insupport(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
 end
 
 function _cdf(d::UnivariateMixture, x::Real)
-    K = ncomponents(d)
     p = probs(d)
-    r = 0.0
-    @inbounds for i in eachindex(p)
-        pi = p[i]
-        if pi > 0.0
-            c = component(d, i)
-            r += pi * cdf(c, x)
-        end
-    end
+    r = sum(pi * cdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
     return r
 end
 
@@ -310,9 +287,8 @@ cdf(d::UnivariateMixture{Continuous}, x::Real) = _cdf(d, x)
 cdf(d::UnivariateMixture{Discrete}, x::Integer) = _cdf(d, x)
 
 function _mixpdf1(d::AbstractMixtureModel, x)
-    ps = probs(d)
-    cs = components(d)
-    return sum((ps[i] > 0) * (ps[i] * pdf(cs[i], x)) for i in eachindex(ps))
+    p = probs(d)
+    return sum(pi * pdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
 end
 
 function _mixpdf!(r::AbstractArray, d::AbstractMixtureModel, x)
@@ -335,39 +311,9 @@ function _mixpdf!(r::AbstractArray, d::AbstractMixtureModel, x)
 end
 
 function _mixlogpdf1(d::AbstractMixtureModel, x)
-    # using the formula below for numerical stability
-    #
-    # logpdf(d, x) = log(sum_i pri[i] * pdf(cs[i], x))
-    #              = log(sum_i pri[i] * exp(logpdf(cs[i], x)))
-    #              = log(sum_i exp(logpri[i] + logpdf(cs[i], x)))
-    #              = m + log(sum_i exp(logpri[i] + logpdf(cs[i], x) - m))
-    #
-    #  m is chosen to be the maximum of logpri[i] + logpdf(cs[i], x)
-    #  such that the argument of exp is in a reasonable range
-    #
-
-    K = ncomponents(d)
     p = probs(d)
-    lp = Vector{eltype(p)}(undef, K)
-    m = -Inf   # m <- the maximum of log(p(cs[i], x)) + log(pri[i])
-    @inbounds for i in eachindex(p)
-        pi = p[i]
-        if pi > 0.0
-            # lp[i] <- log(p(cs[i], x)) + log(pri[i])
-            lp_i = logpdf(component(d, i), x) + log(pi)
-            lp[i] = lp_i
-            if lp_i > m
-                m = lp_i
-            end
-        end
-    end
-    v = 0.0
-    @inbounds for i = 1:K
-        if p[i] > 0.0
-            v += exp(lp[i] - m)
-        end
-    end
-    return m + log(v)
+    lp = logsumexp(log(pi) + logpdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
+    return lp
 end
 
 function _mixlogpdf!(r::AbstractArray, d::AbstractMixtureModel, x)
diff --git a/test/mixture.jl b/test/mixture.jl
index b027324c1..3873df72a 100644
--- a/test/mixture.jl
+++ b/test/mixture.jl
@@ -18,7 +18,7 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int,
     end
 
     K = ncomponents(g)
-    pr = probs(g)
+    pr = @inferred(probs(g))
     @assert length(pr) == K
 
     # mean
@@ -26,7 +26,7 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int,
     for k = 1:K
         mu += pr[k] * mean(component(g, k))
     end
-    @test mean(g) ≈ mu
+    @test @inferred(mean(g)) ≈ mu
 
     # evaluation of cdf
     cf = zeros(T, n)
@@ -38,7 +38,7 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int,
     end
 
     for i = 1:n
-        @test cdf(g, X[i]) ≈ cf[i]
+        @test @inferred(cdf(g, X[i])) ≈ cf[i]
     end
     @test cdf.(g, X) ≈ cf
 
@@ -58,16 +58,16 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int,
     mix_lp0 = log.(mix_p0)
 
     for i = 1:n
-        @test pdf(g, X[i])                  ≈ mix_p0[i]
-        @test logpdf(g, X[i])               ≈ mix_lp0[i]
-        @test componentwise_pdf(g, X[i])    ≈ vec(P0[i,:])
-        @test componentwise_logpdf(g, X[i]) ≈ vec(LP0[i,:])
+        @test @inferred(pdf(g, X[i])) ≈ mix_p0[i]
+        @test @inferred(logpdf(g, X[i])) ≈ mix_lp0[i]
+        @test @inferred(componentwise_pdf(g, X[i])) ≈ vec(P0[i,:])
+        @test @inferred(componentwise_logpdf(g, X[i])) ≈ vec(LP0[i,:])
     end
 
-    @test pdf.(g, X)                  ≈ mix_p0
-    @test logpdf.(g, X)               ≈ mix_lp0
-    @test componentwise_pdf(g, X)    ≈ P0
-    @test componentwise_logpdf(g, X) ≈ LP0
+    @test @inferred(map(Base.Fix1(pdf, g), X)) ≈ mix_p0
+    @test @inferred(map(Base.Fix1(logpdf, g), X)) ≈ mix_lp0
+    @test @inferred(componentwise_pdf(g, X)) ≈ P0
+    @test @inferred(componentwise_logpdf(g, X)) ≈ LP0
 
     # sampling
     if (T <: AbstractFloat)
@@ -94,7 +94,7 @@ function test_mixture(g::MultivariateMixture, n::Int, ns::Int,
     end
 
     K = ncomponents(g)
-    pr = probs(g)
+    pr = @inferred(probs(g))
     @assert length(pr) == K
 
     # mean
@@ -102,7 +102,7 @@ function test_mixture(g::MultivariateMixture, n::Int, ns::Int,
     for k = 1:K
         mu .+= pr[k] .* mean(component(g, k))
     end
-    @test mean(g) ≈ mu
+    @test @inferred(mean(g)) ≈ mu
 
     # evaluation
     P0 = zeros(n, K)
@@ -121,20 +121,20 @@ function test_mixture(g::MultivariateMixture, n::Int, ns::Int,
 
     for i = 1:n
         x_i = X[:,i]
-        @test pdf(g, x_i)                  ≈ mix_p0[i]
-        @test logpdf(g, x_i)               ≈ mix_lp0[i]
-        @test componentwise_pdf(g, x_i)    ≈ vec(P0[i,:])
-        @test componentwise_logpdf(g, x_i) ≈ vec(LP0[i,:])
+        @test @inferred(pdf(g, x_i)) ≈ mix_p0[i]
+        @test @inferred(logpdf(g, x_i)) ≈ mix_lp0[i]
+        @test @inferred(componentwise_pdf(g, x_i)) ≈ vec(P0[i,:])
+        @test @inferred(componentwise_logpdf(g, x_i)) ≈ vec(LP0[i,:])
     end
 #=
     @show g
     @show size(X)
     @show size(mix_p0)
 =#
-    @test pdf(g, X)                  ≈ mix_p0
-    @test logpdf(g, X)               ≈ mix_lp0
-    @test componentwise_pdf(g, X)    ≈ P0
-    @test componentwise_logpdf(g, X) ≈ LP0
+    @test @inferred(pdf(g, X)) ≈ mix_p0
+    @test @inferred(logpdf(g, X)) ≈ mix_lp0
+    @test @inferred(componentwise_pdf(g, X)) ≈ P0
+    @test @inferred(componentwise_logpdf(g, X)) ≈ LP0
 
     # sampling
     if ismissing(rng)
@@ -172,8 +172,8 @@ end
          "rand(rng, ...)" => MersenneTwister(123))
 
     @testset "Testing UnivariateMixture" begin
-        g_u = MixtureModel(Normal, [(0.0, 1.0), (2.0, 1.0), (-4.0, 1.5)], [0.2, 0.5, 0.3])
-        @test isa(g_u, MixtureModel{Univariate, Continuous, Normal})
+        g_u = MixtureModel(Normal{Float64}, [(0.0, 1.0), (2.0, 1.0), (-4.0, 1.5)], [0.2, 0.5, 0.3])
+        @test isa(g_u, MixtureModel{Univariate,Continuous,<:Normal})
         @test ncomponents(g_u) == 3
         test_mixture(g_u, 1000, 10^6, rng)
         test_params(g_u)