Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generic support for uni gmm #615

Merged
merged 13 commits into from
Mar 17, 2019
26 changes: 13 additions & 13 deletions src/mixtures/mixturemodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,13 @@ function MixtureModel(components::Vector{C}, prior::Categorical) where C<:Distri
MixtureModel{VF,VS,C}(components, prior)
end

MixtureModel(components::Vector{C}, p::Vector{Float64}) where {C<:Distribution} =
MixtureModel(components::Vector{C}, p::VT) where {C<:Distribution,VT<:AbstractVector{<:Real}} =
MixtureModel(components, Categorical(p))

_construct_component(::Type{C}, arg) where {C<:Distribution} = C(arg)
_construct_component(::Type{C}, args::Tuple) where {C<:Distribution} = C(args...)

function MixtureModel(::Type{C}, params::AbstractArray, p::Vector{Float64}) where C<:Distribution
function MixtureModel(::Type{C}, params::AbstractArray, p::Vector{T}) where {C<:Distribution,T<:Real}
components = C[_construct_component(C, a) for a in params]
MixtureModel(components, p)
end
Expand Down Expand Up @@ -304,7 +304,7 @@ function _cdf(d::UnivariateMixture, x::Real)
return r
end

cdf(d::UnivariateMixture{Continuous}, x::Float64) = _cdf(d, x)
cdf(d::UnivariateMixture{Continuous}, x::Real) = _cdf(d, x)
cdf(d::UnivariateMixture{Discrete}, x::Int) = _cdf(d, x)


Expand Down Expand Up @@ -359,7 +359,7 @@ function _mixlogpdf1(d::AbstractMixtureModel, x)
p = probs(d)
@assert length(p) == K

lp = Vector{Float64}(undef, K)
lp = Vector{eltype(x)}(undef, K)
m = -Inf # m <- the maximum of log(p(cs[i], x)) + log(pri[i])
for i = 1:K
@inbounds pi = p[i]
Expand All @@ -386,7 +386,7 @@ function _mixlogpdf!(r::AbstractArray, d::AbstractMixtureModel, x)
p = probs(d)
@assert length(p) == K
n = length(r)
Lp = Matrix{Float64}(undef, n, K)
Lp = Matrix{eltype(x)}(undef, n, K)
m = fill(-Inf, n)
for i = 1:K
@inbounds pi = p[i]
Expand Down Expand Up @@ -501,15 +501,15 @@ componentwise_logpdf!(r::AbstractVector, d::MultivariateMixture, x::AbstractVect
componentwise_logpdf!(r::AbstractMatrix, d::UnivariateMixture, x::AbstractVector) = _cwise_logpdf!(r, d, x)
componentwise_logpdf!(r::AbstractMatrix, d::MultivariateMixture, x::AbstractMatrix) = _cwise_logpdf!(r, d, x)

componentwise_pdf(d::UnivariateMixture, x::Real) = componentwise_pdf!(Vector{Float64}(undef, ncomponents(d)), d, x)
componentwise_pdf(d::UnivariateMixture, x::AbstractVector) = componentwise_pdf!(Matrix{Float64}(undef, length(x), ncomponents(d)), d, x)
componentwise_pdf(d::MultivariateMixture, x::AbstractVector) = componentwise_pdf!(Vector{Float64}(undef, ncomponents(d)), d, x)
componentwise_pdf(d::MultivariateMixture, x::AbstractMatrix) = componentwise_pdf!(Matrix{Float64}(undef, size(x,2), ncomponents(d)), d, x)
componentwise_pdf(d::UnivariateMixture, x::Real) = componentwise_pdf!(Vector{eltype(x)}(undef, ncomponents(d)), d, x)
componentwise_pdf(d::UnivariateMixture, x::AbstractVector) = componentwise_pdf!(Matrix{eltype(x)}(undef, length(x), ncomponents(d)), d, x)
componentwise_pdf(d::MultivariateMixture, x::AbstractVector) = componentwise_pdf!(Vector{eltype(x)}(undef, ncomponents(d)), d, x)
componentwise_pdf(d::MultivariateMixture, x::AbstractMatrix) = componentwise_pdf!(Matrix{eltype(x)}(undef, size(x,2), ncomponents(d)), d, x)

componentwise_logpdf(d::UnivariateMixture, x::Real) = componentwise_logpdf!(Vector{Float64}(undef, ncomponents(d)), d, x)
componentwise_logpdf(d::UnivariateMixture, x::AbstractVector) = componentwise_logpdf!(Matrix{Float64}(undef, length(x), ncomponents(d)), d, x)
componentwise_logpdf(d::MultivariateMixture, x::AbstractVector) = componentwise_logpdf!(Vector{Float64}(undef, ncomponents(d)), d, x)
componentwise_logpdf(d::MultivariateMixture, x::AbstractMatrix) = componentwise_logpdf!(Matrix{Float64}(undef, size(x,2), ncomponents(d)), d, x)
componentwise_logpdf(d::UnivariateMixture, x::Real) = componentwise_logpdf!(Vector{eltype(x)}(undef, ncomponents(d)), d, x)
componentwise_logpdf(d::UnivariateMixture, x::AbstractVector) = componentwise_logpdf!(Matrix{eltype(x)}(undef, length(x), ncomponents(d)), d, x)
componentwise_logpdf(d::MultivariateMixture, x::AbstractVector) = componentwise_logpdf!(Vector{eltype(x)}(undef, ncomponents(d)), d, x)
componentwise_logpdf(d::MultivariateMixture, x::AbstractMatrix) = componentwise_logpdf!(Matrix{eltype(x)}(undef, size(x,2), ncomponents(d)), d, x)


## Sampling
Expand Down
18 changes: 10 additions & 8 deletions src/mixtures/unigmm.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# Univariate Gaussian Mixture Models

struct UnivariateGMM <: UnivariateMixture{Continuous,Normal}
struct UnivariateGMM{VT1<:AbstractVector{<:Real},VT2<:AbstractVector{<:Real}} <: UnivariateMixture{Continuous,Normal}
K::Int
means::Vector{Float64}
stds::Vector{Float64}
means::VT1
stds::VT2
prior::Categorical

function UnivariateGMM(ms::Vector{Float64}, ss::Vector{Float64}, pri::Categorical)
function UnivariateGMM(ms::VT1, ss::VT2, pri::Categorical) where {VT1<:AbstractVector{<:Real},VT2<:AbstractVector{<:Real}}
K = length(ms)
length(ss) == K || throw(DimensionMismatch())
ncategories(pri) == K ||
error("The number of categories in pri should be equal to the number of components.")
new(K, ms, ss, pri)
new{VT1,VT2}(K, ms, ss, pri)
end
end

Expand All @@ -25,14 +25,16 @@ probs(d::UnivariateGMM) = probs(d.prior)

mean(d::UnivariateGMM) = dot(d.means, probs(d))

rand(d::UnivariateGMM) = (k = rand(d.prior); d.means[k] + randn() * d.stds[k])

rand(rng::AbstractRNG, d::UnivariateGMM) =
(k = rand(rng, d.prior); d.means[k] + randn(rng) * d.stds[k])

params(d::UnivariateGMM) = (d.means, d.stds, d.prior)

struct UnivariateGMMSampler <: Sampleable{Univariate,Continuous}
means::Vector{Float64}
stds::Vector{Float64}
struct UnivariateGMMSampler{VT1<:AbstractVector{<:Real},VT2<:AbstractVector{<:Real}} <: Sampleable{Univariate,Continuous}
means::VT1
stds::VT2
psampler::AliasTable
end

Expand Down
141 changes: 74 additions & 67 deletions test/mixture.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
using Distributions, Random
using Test
using ForwardDiff: Dual


# Core testing procedure

function test_mixture(g::UnivariateMixture, n::Int, ns::Int,
rng::Union{AbstractRNG, Missing} = missing)
X = zeros(n)
if g isa UnivariateGMM
T = eltype(g.means)
else
T = eltype(g)
end
X = zeros(T, n)
for i = 1:n
if ismissing(rng)
X[i] = rand(g)
else
X[i] = rand(rng, g)
end
X[i] = rand(g)
end

K = ncomponents(g)
Expand All @@ -27,7 +29,7 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int,
@test mean(g) ≈ mu

# evaluation of cdf
cf = zeros(n)
cf = zeros(T, n)
for k = 1:K
c_k = component(g, k)
for i = 1:n
Expand All @@ -41,8 +43,8 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int,
@test cdf.(g, X) ≈ cf

# evaluation
P0 = zeros(n, K)
LP0 = zeros(n, K)
P0 = zeros(T, n, K)
LP0 = zeros(T, n, K)
for k = 1:K
c_k = component(g, k)
for i = 1:n
Expand All @@ -68,14 +70,16 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int,
@test componentwise_logpdf(g, X) ≈ LP0

# sampling
if ismissing(rng)
Xs = rand(g, ns)
else
Xs = rand(rng, g, ns)
if (T isa AbstractFloat)
if ismissing(rng)
Xs = rand(g, ns)
else
Xs = rand(rng, g, ns)
end
@test isa(Xs, Vector{T})
@test length(Xs) == ns
@test isapprox(mean(Xs), mean(g), atol=0.01)
end
@test isa(Xs, Vector{Float64})
@test length(Xs) == ns
@test isapprox(mean(Xs), mean(g), atol=0.01)
end

function test_mixture(g::MultivariateMixture, n::Int, ns::Int,
Expand Down Expand Up @@ -164,56 +168,59 @@ end
Dict("rand(...)" => missing,
"rand(rng, ...)" => MersenneTwister(123))

@testset "Testing UnivariateMixture" begin
g_u = MixtureModel(Normal, [(0.0, 1.0), (2.0, 1.0), (-4.0, 1.5)], [0.2, 0.5, 0.3])
@test isa(g_u, MixtureModel{Univariate, Continuous, Normal})
@test ncomponents(g_u) == 3
test_mixture(g_u, 1000, 10^6, rng)
test_params(g_u)
@test minimum(g_u) == -Inf
@test maximum(g_u) == Inf
@test extrema(g_u) == (-Inf, Inf)

g_u = MixtureModel([TriangularDist(-1,2,0),TriangularDist(-.5,3,1),TriangularDist(-2,0,-1)])
@test minimum(g_u) ≈ -2.0
@test maximum(g_u) ≈ 3.0
@test extrema(g_u) == (minimum(g_u), maximum(g_u))
@test insupport(g_u, 2.5) == true
@test insupport(g_u, 3.5) == false

g_u = UnivariateGMM([0.0, 2.0, -4.0], [1.0, 1.2, 1.5], Categorical([0.2, 0.5, 0.3]))
@test isa(g_u, UnivariateGMM)
@test ncomponents(g_u) == 3
test_mixture(g_u, 1000, 10^6, rng)
test_params(g_u)
@test minimum(g_u) == -Inf
@test maximum(g_u) == Inf
@test extrema(g_u) == (-Inf, Inf)
end
@testset "Testing UnivariateMixture" begin
g_u = MixtureModel(Normal, [(0.0, 1.0), (2.0, 1.0), (-4.0, 1.5)], [0.2, 0.5, 0.3])
@test isa(g_u, MixtureModel{Univariate, Continuous, Normal})
@test ncomponents(g_u) == 3
test_mixture(g_u, 1000, 10^6, rng)
test_params(g_u)
@test minimum(g_u) == -Inf
@test maximum(g_u) == Inf
@test extrema(g_u) == (-Inf, Inf)

g_u = MixtureModel([TriangularDist(-1,2,0),TriangularDist(-.5,3,1),TriangularDist(-2,0,-1)])
@test minimum(g_u) ≈ -2.0
@test maximum(g_u) ≈ 3.0
@test extrema(g_u) == (minimum(g_u), maximum(g_u))
@test insupport(g_u, 2.5) == true
@test insupport(g_u, 3.5) == false

μ = [0.0, 2.0, -4.0]; σ = [1.0, 1.2, 1.5]; p = [0.2, 0.5, 0.3]
for T = [Float64, Dual]
g_u = UnivariateGMM(map(Dual, μ), map(Dual, σ), Categorical(p))
@test isa(g_u, UnivariateGMM)
@test ncomponents(g_u) == 3
test_mixture(g_u, 1000, 10^6, rng)
test_params(g_u)
@test minimum(g_u) == -Inf
@test maximum(g_u) == Inf
@test extrema(g_u) == (-Inf, Inf)
end
end

@testset "Testing MultivariatevariateMixture" begin
g_m = MixtureModel(
IsoNormal[ MvNormal([0.0, 0.0], 1.0),
MvNormal([0.2, 1.0], 1.0),
MvNormal([-0.5, -3.0], 1.6) ],
[0.2, 0.5, 0.3])
@test isa(g_m, MixtureModel{Multivariate, Continuous, IsoNormal})
@test length(components(g_m)) == 3
@test length(g_m) == 2
@test insupport(g_m, [0.0, 0.0]) == true
test_mixture(g_m, 1000, 10^6, rng)
test_params(g_m)

u1 = Uniform()
u2 = Uniform(1.0, 2.0)
utot =Uniform(0.0, 2.0)
# mixture supposed to be a uniform on [0.0,2.0]
unif_mixt = MixtureModel([u1,u2])
@test var(utot) ≈ var(unif_mixt)
@test mean(utot) ≈ mean(unif_mixt)
for x in -1.0:0.5:2.5
@test cdf(utot,x) ≈ cdf(utot,x)
end
end
@testset "Testing MultivariatevariateMixture" begin
g_m = MixtureModel(
IsoNormal[ MvNormal([0.0, 0.0], 1.0),
MvNormal([0.2, 1.0], 1.0),
MvNormal([-0.5, -3.0], 1.6) ],
[0.2, 0.5, 0.3])
@test isa(g_m, MixtureModel{Multivariate, Continuous, IsoNormal})
@test length(components(g_m)) == 3
@test length(g_m) == 2
@test insupport(g_m, [0.0, 0.0]) == true
test_mixture(g_m, 1000, 10^6, rng)
test_params(g_m)

u1 = Uniform()
u2 = Uniform(1.0, 2.0)
utot =Uniform(0.0, 2.0)

# mixture supposed to be a uniform on [0.0,2.0]
unif_mixt = MixtureModel([u1,u2])
@test var(utot) ≈ var(unif_mixt)
@test mean(utot) ≈ mean(unif_mixt)
for x in -1.0:0.5:2.5
@test cdf(utot,x) ≈ cdf(utot,x)
end
end
end