Skip to content

Commit

Permalink
Merge branch 'master' into dw/sampler
Browse files Browse the repository at this point in the history
  • Loading branch information
devmotion authored May 2, 2021
2 parents 06f3a78 + 99b2b57 commit 95e7274
Show file tree
Hide file tree
Showing 8 changed files with 257 additions and 220 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Distributions"
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
authors = ["JuliaStats"]
version = "0.24.19"
version = "0.25.0"

[deps]
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
Expand Down
74 changes: 10 additions & 64 deletions src/mixtures/mixturemodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ A mixture of distributions, parametrized on:
* `C` distribution family of the mixture
* `CT` the type for probabilities of the prior
"""
struct MixtureModel{VF<:VariateForm,VS<:ValueSupport,C<:Distribution,CT<:Real} <: AbstractMixtureModel{VF,VS,C}
struct MixtureModel{VF<:VariateForm,VS<:ValueSupport,C<:Distribution,CT<:Categorical} <: AbstractMixtureModel{VF,VS,C}
components::Vector{C}
prior::Categorical{CT}
prior::CT

function MixtureModel{VF,VS,C}(cs::Vector{C}, pri::Categorical{CT}) where {VF,VS,C,CT}
function MixtureModel{VF,VS,C}(cs::Vector{C}, pri::CT) where {VF,VS,C,CT}
length(cs) == ncategories(pri) ||
error("The number of components does not match the length of prior.")
new{VF,VS,C,CT}(cs, pri)
Expand Down Expand Up @@ -171,16 +171,8 @@ minimum(d::MixtureModel) = minimum([minimum(dci) for dci in d.components])
maximum(d::MixtureModel) = maximum([maximum(dci) for dci in d.components])

function mean(d::UnivariateMixture)
K = ncomponents(d)
p = probs(d)
m = 0.0
for i = 1:K
pi = p[i]
if pi > 0.0
c = component(d, i)
m += mean(c) * pi
end
end
m = sum(pi * mean(component(d, i)) for (i, pi) in enumerate(p) if !iszero(pi))
return m
end

Expand Down Expand Up @@ -281,38 +273,22 @@ end
#### Evaluation

function insupport(d::AbstractMixtureModel, x::AbstractVector)
K = ncomponents(d)
p = probs(d)
@inbounds for i in eachindex(p)
pi = p[i]
if pi > 0.0 && insupport(component(d, i), x)
return true
end
end
return false
return any(insupport(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
end

function _cdf(d::UnivariateMixture, x::Real)
K = ncomponents(d)
p = probs(d)
r = 0.0
@inbounds for i in eachindex(p)
pi = p[i]
if pi > 0.0
c = component(d, i)
r += pi * cdf(c, x)
end
end
r = sum(pi * cdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
return r
end

cdf(d::UnivariateMixture{Continuous}, x::Real) = _cdf(d, x)
cdf(d::UnivariateMixture{Discrete}, x::Integer) = _cdf(d, x)

function _mixpdf1(d::AbstractMixtureModel, x)
ps = probs(d)
cs = components(d)
return sum((ps[i] > 0) * (ps[i] * pdf(cs[i], x)) for i in eachindex(ps))
p = probs(d)
return sum(pi * pdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
end

function _mixpdf!(r::AbstractArray, d::AbstractMixtureModel, x)
Expand All @@ -335,39 +311,9 @@ function _mixpdf!(r::AbstractArray, d::AbstractMixtureModel, x)
end

function _mixlogpdf1(d::AbstractMixtureModel, x)
# using the formula below for numerical stability
#
# logpdf(d, x) = log(sum_i pri[i] * pdf(cs[i], x))
# = log(sum_i pri[i] * exp(logpdf(cs[i], x)))
# = log(sum_i exp(logpri[i] + logpdf(cs[i], x)))
# = m + log(sum_i exp(logpri[i] + logpdf(cs[i], x) - m))
#
# m is chosen to be the maximum of logpri[i] + logpdf(cs[i], x)
# such that the argument of exp is in a reasonable range
#

K = ncomponents(d)
p = probs(d)
lp = Vector{eltype(p)}(undef, K)
m = -Inf # m <- the maximum of log(p(cs[i], x)) + log(pri[i])
@inbounds for i in eachindex(p)
pi = p[i]
if pi > 0.0
# lp[i] <- log(p(cs[i], x)) + log(pri[i])
lp_i = logpdf(component(d, i), x) + log(pi)
lp[i] = lp_i
if lp_i > m
m = lp_i
end
end
end
v = 0.0
@inbounds for i = 1:K
if p[i] > 0.0
v += exp(lp[i] - m)
end
end
return m + log(v)
lp = logsumexp(log(pi) + logpdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
return lp
end

function _mixlogpdf!(r::AbstractArray, d::AbstractMixtureModel, x)
Expand Down
96 changes: 0 additions & 96 deletions src/univariate/continuous/locationscale.jl

This file was deleted.

10 changes: 10 additions & 0 deletions src/univariate/discrete/discretenonparametric.jl
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,16 @@ end
ccdf(d::DiscreteNonParametric{T}, x::Integer) where T = _ccdf(d, convert(T, x))
ccdf(d::DiscreteNonParametric{T}, x::Real) where T = _ccdf(d, convert(T, x))

# fix incorrect defaults
for f in (:cdf, :ccdf)
_f = Symbol(:_, f)
logf = Symbol(:log, f)
@eval begin
$logf(d::DiscreteNonParametric{T}, x::Integer) where T = log($_f(d, convert(T, x)))
$logf(d::DiscreteNonParametric{T}, x::Real) where T = log($_f(d, convert(T, x)))
end
end

function quantile(d::DiscreteNonParametric, q::Real)
0 <= q <= 1 || throw(DomainError())
x = support(d)
Expand Down
132 changes: 132 additions & 0 deletions src/univariate/locationscale.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""
LocationScale(μ,σ,ρ)
A location-scale transformed distribution with location parameter `μ`,
scale parameter `σ`, and given univariate distribution `ρ`.
If ``Z`` is a random variable with distribution `ρ`, then the distribution of the random
variable
```math
X = μ + σ Z
```
is the location-scale transformed distribution with location parameter `μ` and scale
parameter `σ`.
If `ρ` is a discrete distribution, the probability mass function of
the transformed distribution is given by
```math
P(X = x) = P\\left(Z = \\frac{x-μ}{σ} \\right).
```
If `ρ` is a continuous distribution, the probability density function of
the transformed distribution is given by
```math
f(x) = \\frac{1}{σ} ρ \\! \\left( \\frac{x-μ}{σ} \\right).
```
```julia
LocationScale(μ,σ,ρ) # location-scale transformed distribution
params(d) # Get the parameters, i.e. (μ, σ, and the base distribution)
location(d) # Get the location parameter
scale(d) # Get the scale parameter
```
External links
[Location-Scale family on Wikipedia](https://en.wikipedia.org/wiki/Location%E2%80%93scale_family)
"""
struct LocationScale{T<:Real, S<:ValueSupport, D<:UnivariateDistribution{S}} <: UnivariateDistribution{S}
μ::T
σ::T
ρ::D
function LocationScale{T,S,D}::T, σ::T, ρ::D; check_args=true) where {T<:Real, S<:ValueSupport, D<:UnivariateDistribution{S}}
check_args && @check_args(LocationScale, σ > zero(σ))
new{T, S, D}(μ, σ, ρ)
end
end

function LocationScale::T, σ::T, ρ::UnivariateDistribution; check_args=true) where {T<:Real}
_T = promote_type(eltype(ρ), T)
D = typeof(ρ)
S = value_support(D)
return LocationScale{_T,S,D}(_T(μ), _T(σ), ρ; check_args=check_args)
end

LocationScale::Real, σ::Real, ρ::UnivariateDistribution) = LocationScale(promote(μ, σ)..., ρ)

# aliases
const ContinuousLocationScale{T<:Real,D<:ContinuousUnivariateDistribution} = LocationScale{T,Continuous,D}
const DiscreteLocationScale{T<:Real,D<:DiscreteUnivariateDistribution} = LocationScale{T,Discrete,D}

Base.eltype(::Type{<:LocationScale{T}}) where T = T

minimum(d::LocationScale) = d.μ + d.σ * minimum(d.ρ)
maximum(d::LocationScale) = d.μ + d.σ * maximum(d.ρ)

LocationScale::Real, σ::Real, d::LocationScale) = LocationScale+ d.μ * σ, σ * d.σ, d.ρ)

#### Conversions

convert(::Type{LocationScale{T}}, μ::Real, σ::Real, ρ::D) where {T<:Real, D<:UnivariateDistribution} = LocationScale(T(μ),T(σ),ρ)
convert(::Type{LocationScale{T}}, d::LocationScale{S}) where {T<:Real, S<:Real} = LocationScale(T(d.μ),T(d.σ),d.ρ, check_args=false)

#### Parameters

location(d::LocationScale) = d.μ
scale(d::LocationScale) = d.σ
params(d::LocationScale) = (d.μ,d.σ,d.ρ)
partype(::LocationScale{T}) where {T} = T

#### Statistics

mean(d::LocationScale) = d.μ + d.σ * mean(d.ρ)
median(d::LocationScale) = d.μ + d.σ * median(d.ρ)
mode(d::LocationScale) = d.μ + d.σ * mode(d.ρ)
modes(d::LocationScale) = d.μ .+ d.σ .* modes(d.ρ)

var(d::LocationScale) = d.σ^2 * var(d.ρ)
std(d::LocationScale) = d.σ * std(d.ρ)
skewness(d::LocationScale) = skewness(d.ρ)
kurtosis(d::LocationScale) = kurtosis(d.ρ)

isplatykurtic(d::LocationScale) = isplatykurtic(d.ρ)
isleptokurtic(d::LocationScale) = isleptokurtic(d.ρ)
ismesokurtic(d::LocationScale) = ismesokurtic(d.ρ)

entropy(d::ContinuousLocationScale) = entropy(d.ρ) + log(d.σ)
entropy(d::DiscreteLocationScale) = entropy(d.ρ)

mgf(d::LocationScale,t::Real) = exp(d.μ*t) * mgf(d.ρ,d.σ*t)

#### Evaluation & Sampling

pdf(d::ContinuousLocationScale,x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ
pdf(d::DiscreteLocationScale, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ)

logpdf(d::ContinuousLocationScale,x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) - log(d.σ)
logpdf(d::DiscreteLocationScale, x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ)

# additional definitions are required to fix ambiguity errors and incorrect defaults
for f in (:cdf, :ccdf, :logcdf, :logccdf)
_f = Symbol(:_, f)
@eval begin
$f(d::LocationScale, x::Real) = $_f(d, x)
$f(d::DiscreteLocationScale, x::Real) = $_f(d, x)
$f(d::DiscreteLocationScale, x::Integer) = $_f(d, x)
$_f(d::LocationScale, x::Real) = $f(d.ρ, (x - d.μ) / d.σ)
end
end

quantile(d::LocationScale,q::Real) = d.μ + d.σ * quantile(d.ρ,q)

rand(rng::AbstractRNG, d::LocationScale) = d.μ + d.σ * rand(rng, d.ρ)
cf(d::LocationScale, t::Real) = cf(d.ρ,t*d.σ) * exp(1im*t*d.μ)
gradlogpdf(d::ContinuousLocationScale, x::Real) = gradlogpdf(d.ρ,(x-d.μ)/d.σ) / d.σ

#### Syntactic sugar for simple transforms of distributions, e.g., d + x, d - x, and so on

Base.:+(d::UnivariateDistribution, x::Real) = LocationScale(x, one(x), d)
Base.:+(x::Real, d::UnivariateDistribution) = d + x
Base.:*(x::Real, d::UnivariateDistribution) = LocationScale(zero(x), x, d)
Base.:*(d::UnivariateDistribution, x::Real) = x * d
Base.:-(d::UnivariateDistribution, x::Real) = d + -x
Base.:/(d::UnivariateDistribution, x::Real) = inv(x) * d

3 changes: 2 additions & 1 deletion src/univariates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,6 @@ const continuous_distributions = [
"ksonesided",
"laplace",
"levy",
"locationscale",
"logistic",
"noncentralbeta",
"noncentralchisq",
Expand All @@ -631,6 +630,8 @@ const continuous_distributions = [
"weibull"
]

include(joinpath("univariate", "locationscale.jl"))

for dname in discrete_distributions
include(joinpath("univariate", "discrete", "$(dname).jl"))
end
Expand Down
Loading

0 comments on commit 95e7274

Please sign in to comment.