Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplement Wishart and InverseWishart: allow the use of general pdmats as arguments #304

Merged
merged 3 commits into from
Nov 8, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/deprecates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,9 @@ end
@Base.deprecate logpmf logpdf
@Base.deprecate logpmf! logpmf!
@Base.deprecate pmf pdf


#### Deprecate on 0.6 (to be removed on 0.7)

@Base.deprecate expected_logdet meanlogdet

146 changes: 64 additions & 82 deletions src/matrix/inversewishart.jl
Original file line number Diff line number Diff line change
@@ -1,103 +1,85 @@
##############################################################################
# Inverse Wishart distribution
#
# Inverse Wishart Distribution
# following Wikipedia parametrization
#
# Parameterized such that E(X) = Psi / (nu - p - 1)
# See the riwish and diwish function of R's MCMCpack
#
##############################################################################

immutable InverseWishart <: ContinuousMatrixDistribution
nu::Float64
Psichol::Cholesky{Float64}
function InverseWishart(n::Real, Pc::Cholesky{Float64})
if n > size(Pc, 1) - 1
new(float64(n), Pc)
else
error("Inverse Wishart parameters must be df > p - 1")
end
end
end

dim(d::InverseWishart) = size(d.Psichol, 1)
size(d::InverseWishart) = size(d.Psichol)
immutable InverseWishart{ST<:AbstractPDMat} <: ContinuousMatrixDistribution
df::Float64 # degree of freedom
Ψ::ST # scale matrix
c0::Float64 # log of normalizing constant
end

show(io::IO, d::InverseWishart) = show_multline(io, d, [(:nu, d.nu), (:Psi, full(d.Psichol))])
#### Constructors

function InverseWishart(nu::Real, Psi::Matrix{Float64})
InverseWishart(float64(nu), cholfact(Psi))
function InverseWishart{ST<:AbstractPDMat}(df::Real, Ψ::ST)
p = dim(Ψ)
df > p - 1 || error("df should be greater than dim - 1.")
InverseWishart{ST}(df, Ψ, _invwishart_c0(df, Ψ))
end

function insupport(IW::InverseWishart, X::Matrix{Float64})
return size(X) == size(IW) && isApproxSymmmetric(X) && hasCholesky(X)
end
# This just checks if X could come from any Inverse-Wishart
function insupport(::Type{InverseWishart}, X::Matrix{Float64})
return size(X, 1) == size(X, 2) && isApproxSymmmetric(X) && hasCholesky(X)
end
InverseWishart(df::Real, Ψ::Matrix{Float64}) = InverseWishart(df, PDMat(Ψ))

function mean(IW::InverseWishart)
if IW.nu > size(IW.Psichol, 1) + 1
return 1.0 / (IW.nu - size(IW.Psichol, 1) - 1.0) *
(IW.Psichol[:U]' * IW.Psichol[:U])
else
error("mean only defined for nu > p + 1")
end
InverseWishart(df::Real, Ψ::Cholesky) = InverseWishart(df, PDMat(Ψ))

function _invwishart_c0(df::Float64, Ψ::AbstractPDMat)
h_df = df / 2
p = dim(Ψ)
h_df * (p * logtwo - logdet(Ψ)) + lpgamma(p, h_df)
end

function _logpdf{T<:Real}(IW::InverseWishart, X::DenseMatrix{T})
Xchol = trycholfact(X)
if size(X) == size(IW) && isApproxSymmmetric(X) && isa(Xchol, Cholesky)
p = size(X, 1)
logd::Float64 = IW.nu * p / 2.0 * log(2.0)
logd += lpgamma(p, IW.nu / 2.0)
logd -= IW.nu / 2.0 * logdet(IW.Psichol)
logd = -logd
logd -= 0.5 * (IW.nu + p + 1.0) * logdet(Xchol)
logd -= 0.5 * trace(inv(Xchol) * (IW.Psichol[:U]' * IW.Psichol[:U]))
return logd

#### Properties

insupport(::Type{InverseWishart}, X::Matrix{Float64}) = isposdef(X)
insupport(d::InverseWishart, X::Matrix{Float64}) = size(X) == size(d) && isposdef(X)

dim(d::InverseWishart) = dim(d.Ψ)
size(d::InverseWishart) = (p = dim(d); (p, p))


#### Show

show(io::IO, d::InverseWishart) = show_multline(io, d, [(:df, d.df), (:Ψ, full(d.Ψ))])


#### Statistics

function mean(d::InverseWishart)
df = d.df
p = dim(d)
r = df - (p + 1)
if r > 0.0
return full(d.Ψ) * (1.0 / r)
else
return -Inf
error("mean only defined for df > p + 1")
end
end

# rand(Wishart(nu, Psi^-1))^-1 is an sample from an
# inverse wishart(nu, Psi). there is actually some wacky
# behavior here where inv of the Cholesky returns the
# inverse of the original matrix, in this case we're getting
# Psi^-1 like we want
rand(IW::InverseWishart) = inv(rand(Wishart(IW.nu, inv(IW.Psichol))))

function rand!(IW::InverseWishart, X::Array{Matrix{Float64}})
Psiinv = inv(IW.Psichol)
W = Wishart(IW.nu, Psiinv)
X = rand!(W, X)
for i in 1:length(X)
X[i] = inv(X[i])
end
return X
end
mode(d::InverseWishart) = d.Ψ * inv(d.df + dim(d) + 1.0)

var(IW::InverseWishart) = error("Not yet implemented")

# because X == X' keeps failing due to floating point nonsense
function isApproxSymmmetric(a::Matrix{Float64})
tmp = true
for j in 2:size(a, 1)
for i in 1:(j - 1)
tmp &= abs(a[i, j] - a[j, i]) < 1e-8
end
end
return tmp
#### Evaluation

function _logpdf(d::InverseWishart, X::DenseMatrix{Float64})
p = dim(d)
df = d.df
Xcf = cholfact(X)
# we use the fact: trace(Ψ * inv(X)) = trace(inv(X) * Ψ) = trace(X \ Ψ)
Ψ = full(d.Ψ)
-0.5 * ((df + p + 1) * logdet(Xcf) + trace(Xcf \ Ψ)) - d.c0
end

# because isposdef keeps giving the wrong answer for samples
# from Wishart and InverseWisharts
hasCholesky(a::Matrix{Float64}) = isa(trycholfact(a), Cholesky)
_logpdf{T<:Real}(d::InverseWishart, X::DenseMatrix{T}) = _logpdf(d, float64(X))


#### Sampling

function trycholfact(a::Matrix{Float64})
try cholfact(a)
catch e
return e
rand(d::InverseWishart) = inv(rand(Wishart(d.df, inv(d.Ψ))))

function _rand!{M<:Matrix}(d::InverseWishart, X::AbstractArray{M})
wd = Wishart(d.df, inv(d.Ψ))
for i in 1:length(X)
X[i] = inv(rand(wd))
end
return X
end
155 changes: 84 additions & 71 deletions src/matrix/wishart.jl
Original file line number Diff line number Diff line change
@@ -1,96 +1,109 @@
##############################################################################
# Wishart distribution
#
# Wishart Distribution
# following the Wikipedia parameterization
#
# Parameters nu and S such that E(X) = nu * S
# See the rwish and dwish implementation in R's MCMCPack
# This parametrization differs from Bernardo & Smith p 435
# in this way: (nu, S) = (2.0 * alpha, 0.5 * beta^-1)
#
##############################################################################

immutable Wishart <: ContinuousMatrixDistribution
nu::Float64
Schol::Cholesky{Float64}
function Wishart(n::Real, Sc::Cholesky{Float64})
if n > size(Sc, 1) - 1
new(float64(n), Sc)
else
error("Wishart parameters must be df > p - 1")
end
end

immutable Wishart{ST<:AbstractPDMat} <: ContinuousMatrixDistribution
df::Float64 # degree of freedom
S::ST # the scale matrix
c0::Float64 # the logarithm of normalizing constant in pdf
end

Wishart(nu::Real, S::Matrix{Float64}) = Wishart(nu, cholfact(S))
#### Constructors

show(io::IO, d::Wishart) = show_multline(io, d, [(:nu, d.nu), (:S, full(d.Schol))])
function Wishart{ST<:AbstractPDMat}(df::Real, S::ST)
p = dim(S)
df > p - 1 || error("df should be greater than dim - 1.")
Wishart{ST}(df, S, _wishart_c0(df, S))
end

Wishart(df::Real, S::Matrix{Float64}) = Wishart(df, PDMat(S))

dim(W::Wishart) = size(W.Schol, 1)
size(W::Wishart) = size(W.Schol)
Wishart(df::Real, S::Cholesky) = Wishart(df, PDMat(S))

function insupport(W::Wishart, X::Matrix{Float64})
return size(X) == size(W) && isApproxSymmmetric(X) && hasCholesky(X)
end
# This just checks if X could come from any Wishart
function insupport(::Type{Wishart}, X::Matrix{Float64})
return size(X, 1) == size(X, 2) && isApproxSymmmetric(X) && hasCholesky(X)
function _wishart_c0(df::Float64, S::AbstractPDMat)
h_df = df / 2
p = dim(S)
h_df * (logdet(S) + p * logtwo) + lpgamma(p, h_df)
end

mean(w::Wishart) = w.nu * (w.Schol[:U]' * w.Schol[:U])

function expected_logdet(W::Wishart)
logd = 0.
d = dim(W)
#### Properties

for i=1:d
logd += digamma(0.5 * (W.nu + 1 - i))
end
insupport(::Type{Wishart}, X::Matrix{Float64}) = isposdef(X)
insupport(d::Wishart, X::Matrix{Float64}) = size(X) == size(d) && isposdef(X)

logd += d * log(2)
logd += logdet(W.Schol)
dim(d::Wishart) = dim(d.S)
size(d::Wishart) = (p = dim(d); (p, p))

return logd
end

function lognorm(W::Wishart)
d = dim(W)
return (W.nu / 2) * logdet(W.Schol) + (d * W.nu / 2) * log(2) + lpgamma(d, W.nu / 2)
end
#### Show

show(io::IO, d::Wishart) = show_multline(io, d, [(:df, d.df), (:S, full(d.S))])


#### Statistics

mean(d::Wishart) = d.df * full(d.S)

function _logpdf{T<:Real}(W::Wishart, X::DenseMatrix{T})
Xchol = trycholfact(X)
if size(X) == size(W) && isApproxSymmmetric(X) && isa(Xchol, Cholesky)
d = dim(W)
logd = -lognorm(W)
logd += 0.5 * (W.nu - d - 1.0) * logdet(Xchol)
logd -= 0.5 * trace(W.Schol \ X)
return logd
function mode(d::Wishart)
r = d.df - dim(d) - 1.0
if r > 0.0
return full(d.S) * r
else
return -Inf
error("mode is only defined when df > p + 1")
end
end

function rand(w::Wishart)
p = size(w.Schol, 1)
X = zeros(p, p)
for ii in 1:p
X[ii, ii] = sqrt(rand(Chisq(w.nu - ii + 1)))
function meanlogdet(d::Wishart)
p = dim(d)
df = d.df
v = logdet(d.S) + p * logtwo
for i = 1:p
v += digamma(0.5 * (df - (i - 1)))
end
if p > 1
for col in 2:p
for row in 1:(col - 1)
X[row, col] = randn()
end
end
end
Z = X * w.Schol[:U]
return At_mul_B(Z, Z)
return v
end

function entropy(d::Wishart)
p = dim(d)
df = d.df
d.c0 - 0.5 * (df - p - 1) * meanlogdet(d) + 0.5 * df * p
end


#### Evaluation

function _logpdf(d::Wishart, X::DenseMatrix{Float64})
df = d.df
p = dim(d)
Xcf = cholfact(X)
0.5 * ((df - (p + 1)) * logdet(Xcf) - trace(d.S \ X)) - d.c0
end

function entropy(W::Wishart)
d = dim(W)
return lognorm(W) - (W.nu - d - 1) / 2 * expected_logdet(W) + W.nu * d / 2
_logpdf{T<:Real}(d::Wishart, X::DenseMatrix{T}) = _logpdf(d, float64(X))

#### Sampling

function rand(d::Wishart)
Z = unwhiten!(d.S, _wishart_genA(dim(d), d.df))
A_mul_Bt(Z, Z)
end

var(w::Wishart) = error("Not yet implemented")
function _wishart_genA(p::Int, df::Float64)
# Generate the matrix A in the Bartlett decomposition
#
# A is a lower triangular matrix, with
#
# A(i, j) ~ sqrt of Chisq(df - i + 1) when i == j
# ~ Normal() when i > j
#
A = zeros(p, p)
for i = 1:p
@inbounds A[i,i] = sqrt(rand(Chisq(df - i + 1.0)))
end
for j = 1:p-1, i = j+1:p
@inbounds A[i,j] = randn()
end
return A
end
32 changes: 22 additions & 10 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,16 +145,28 @@ macro checkinvlogcdf(lp,ex)
:($lp <= zero($lp) ? $ex : NaN)
end

# Simple method for integration
function simpson(f::AbstractVector{Float64}, h::Float64)
n = length(f)
isodd(n) || error("The length of the input vector must be odd.")
s = f[1]
t = -1
for i = 2:2:n-1
@inbounds s += (4 * f[i] + 2 * f[i+1])
# because X == X' keeps failing due to floating point nonsense
function isApproxSymmmetric(a::Matrix{Float64})
tmp = true
for j in 2:size(a, 1)
for i in 1:(j - 1)
tmp &= abs(a[i, j] - a[j, i]) < 1e-8
end
end
return tmp
end

# because isposdef keeps giving the wrong answer for samples
# from Wishart and InverseWisharts
hasCholesky(a::Matrix{Float64}) = isa(trycholfact(a), Cholesky)

function trycholfact(a::Matrix{Float64})
try cholfact(a)
catch e
return e
end
s += f[n]
return s * h / 3.0
end




Loading