From c988945826339f6bc7bafbb1044c1dd77991d0c2 Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Sat, 8 Nov 2014 12:09:28 +0800
Subject: [PATCH 1/3] New implementation of Wishart

---
 src/deprecates.jl            |   6 ++
 src/matrix/inversewishart.jl |  22 ------
 src/matrix/wishart.jl        | 149 ++++++++++++++++++-----------------
 src/utils.jl                 |  26 ++++++
 test/runtests.jl             |   1 -
 test/wishart.jl              |  15 ----
 6 files changed, 108 insertions(+), 111 deletions(-)
 delete mode 100644 test/wishart.jl

diff --git a/src/deprecates.jl b/src/deprecates.jl
index 167666f37..d0a06fb8f 100644
--- a/src/deprecates.jl
+++ b/src/deprecates.jl
@@ -14,3 +14,9 @@ end
 @Base.deprecate logpmf logpdf
 @Base.deprecate logpmf! logpmf!
 @Base.deprecate pmf pdf
+
+
+#### Deprecate on 0.6 (to be removed on 0.7)
+
+@Base.deprecate expected_logdet meanlogdet
+
diff --git a/src/matrix/inversewishart.jl b/src/matrix/inversewishart.jl
index 044a09d9f..69b1536dc 100644
--- a/src/matrix/inversewishart.jl
+++ b/src/matrix/inversewishart.jl
@@ -79,25 +79,3 @@ function rand!(IW::InverseWishart, X::Array{Matrix{Float64}})
 end
 
 var(IW::InverseWishart) = error("Not yet implemented")
-
-# because X == X' keeps failing due to floating point nonsense
-function isApproxSymmmetric(a::Matrix{Float64})
-    tmp = true
-    for j in 2:size(a, 1)
-        for i in 1:(j - 1)
-            tmp &= abs(a[i, j] - a[j, i]) < 1e-8
-        end
-    end
-    return tmp
-end
-
-# because isposdef keeps giving the wrong answer for samples
-# from Wishart and InverseWisharts
-hasCholesky(a::Matrix{Float64}) = isa(trycholfact(a), Cholesky)
-
-function trycholfact(a::Matrix{Float64})
-    try cholfact(a)
-    catch e
-        return e
-    end
-end
diff --git a/src/matrix/wishart.jl b/src/matrix/wishart.jl
index bfd818096..a1d8dd789 100644
--- a/src/matrix/wishart.jl
+++ b/src/matrix/wishart.jl
@@ -1,96 +1,99 @@
-##############################################################################
+# Wishart distribution
 #
-# Wishart Distribution
+#   following the Wikipedia parameterization
 #
-#  Parameters nu and S such that E(X) = nu * S 
-#  See the rwish and dwish implementation in R's MCMCPack
-#  This parametrization differs from Bernardo & Smith p 435
-#  in this way: (nu, S) = (2.0 * alpha, 0.5 * beta^-1) 
-#
-##############################################################################
-
-immutable Wishart <: ContinuousMatrixDistribution
-    nu::Float64
-    Schol::Cholesky{Float64}
-    function Wishart(n::Real, Sc::Cholesky{Float64})
-        if n > size(Sc, 1) - 1
-            new(float64(n), Sc)
-        else
-            error("Wishart parameters must be df > p - 1")
-        end
-    end
+
+immutable Wishart{ST<:AbstractPDMat} <: ContinuousMatrixDistribution
+    df::Float64     # degree of freedom
+    S::ST           # the scale matrix
+    c0::Float64     # the logarithm of normalizing constant in pdf
 end
 
-Wishart(nu::Real, S::Matrix{Float64}) = Wishart(nu, cholfact(S))
+#### Constructors
 
-show(io::IO, d::Wishart) = show_multline(io, d, [(:nu, d.nu), (:S, full(d.Schol))])
+function Wishart{ST<:AbstractPDMat}(df::Real, S::ST)
+    p = dim(S)
+    df > p - 1 || error("df should be greater than dim - 1.")
+    Wishart{ST}(df, S, _wishart_c0(df, S))
+end
 
+Wishart(df::Real, S::Matrix{Float64}) = Wishart(df, PDMat(S))
 
-dim(W::Wishart) = size(W.Schol, 1)
-size(W::Wishart) = size(W.Schol)
+Wishart(df::Real, S::Cholesky) = Wishart(df, PDMat(S))
 
-function insupport(W::Wishart, X::Matrix{Float64})
-    return size(X) == size(W) && isApproxSymmmetric(X) && hasCholesky(X)
-end
-# This just checks if X could come from any Wishart
-function insupport(::Type{Wishart}, X::Matrix{Float64})
-    return size(X, 1) == size(X, 2) && isApproxSymmmetric(X) && hasCholesky(X)
+function _wishart_c0(df::Float64, S::AbstractPDMat)
+    h_df = df / 2
+    p = dim(S)
+    h_df * (logdet(S) + p * logtwo) + lpgamma(p, h_df)
 end
 
-mean(w::Wishart) = w.nu * (w.Schol[:U]' * w.Schol[:U])
 
-function expected_logdet(W::Wishart)
-    logd = 0.
-    d = dim(W)
+#### Properties
 
-    for i=1:d
-        logd += digamma(0.5 * (W.nu + 1 - i))
-    end
+insupport(::Type{Wishart}, X::Matrix{Float64}) = isposdef(X)
+insupport(d::Wishart, X::Matrix{Float64}) = size(X) == size(d) && isposdef(X)
 
-    logd += d * log(2)
-    logd += logdet(W.Schol)
+dim(d::Wishart) = dim(d.S)
+size(d::Wishart) = (p = dim(d); (p, p))
 
-    return logd
-end
 
-function lognorm(W::Wishart)
-    d = dim(W)
-    return (W.nu / 2) * logdet(W.Schol) + (d * W.nu / 2) * log(2) + lpgamma(d, W.nu / 2)
-end
+#### Show
+
+show(io::IO, d::Wishart) = show_multline(io, d, [(:df, d.df), (:S, full(d.S))])
+
+
+#### Statistics
+
+mean(d::Wishart) = d.df * full(d.S)
 
-function _logpdf{T<:Real}(W::Wishart, X::DenseMatrix{T})
-    Xchol = trycholfact(X)
-    if size(X) == size(W) && isApproxSymmmetric(X) && isa(Xchol, Cholesky)
-        d = dim(W)
-        logd = -lognorm(W)
-        logd += 0.5 * (W.nu - d - 1.0) * logdet(Xchol)
-        logd -= 0.5 * trace(W.Schol \ X)
-        return logd
-    else
-        return -Inf
+function meanlogdet(d::Wishart) 
+    p = dim(d)
+    df = d.df
+    v = logdet(d.S) + p * logtwo
+    for i = 1:p
+        v += digamma(0.5 * (df - (i - 1)))
     end
+    return v
 end
 
-function rand(w::Wishart)
-    p = size(w.Schol, 1)
-    X = zeros(p, p)
-    for ii in 1:p
-        X[ii, ii] = sqrt(rand(Chisq(w.nu - ii + 1)))
-    end
-    if p > 1
-        for col in 2:p
-            for row in 1:(col - 1)
-                X[row, col] = randn()
-            end
-        end
-    end
-    Z = X * w.Schol[:U]
-    return At_mul_B(Z, Z)
+function entropy(d::Wishart)
+    p = dim(d)
+    df = d.df
+    d.c0 - 0.5 * (df - p - 1) * meanlogdet(d) + 0.5 * df * p
 end
 
-function entropy(W::Wishart)
-    d = dim(W)
-    return lognorm(W) - (W.nu - d - 1) / 2 * expected_logdet(W) + W.nu * d / 2
+
+#### Evaluation
+
+function _logpdf(d::Wishart, X::DenseMatrix{Float64})
+    Xcf = cholfact(X)
+    df = d.df
+    p = dim(d)
+    0.5 * ((df - (p + 1)) * logdet(Xcf) - trace(d.S \ X)) - d.c0
 end
 
-var(w::Wishart) = error("Not yet implemented")
+
+#### Sampling
+
+function rand(d::Wishart)
+    Z = unwhiten!(d.S, _wishart_genA(dim(d), d.df))
+    A_mul_Bt(Z, Z)
+end
+
+function _wishart_genA(p::Int, df::Float64)
+    # Generate the matrix A in the Bartlett decomposition
+    #
+    #   A is a lower triangular matrix, with
+    #   
+    #       A(i, j) ~ sqrt of Chisq(df - i + 1) when i == j
+    #               ~ Normal()                  when i > j
+    #
+    A = zeros(p, p)
+    for i = 1:p
+        @inbounds A[i,i] = sqrt(rand(Chisq(df - i + 1.0)))
+    end
+    for j = 1:p-1, i = j+1:p
+        @inbounds A[i,j] = randn()
+    end
+    return A
+end
diff --git a/src/utils.jl b/src/utils.jl
index bca33d72e..a41a2dc7f 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -158,3 +158,29 @@ function simpson(f::AbstractVector{Float64}, h::Float64)
     return s * h / 3.0
 end
 
+
+# because X == X' keeps failing due to floating point nonsense
+function isApproxSymmmetric(a::Matrix{Float64})
+    tmp = true
+    for j in 2:size(a, 1)
+        for i in 1:(j - 1)
+            tmp &= abs(a[i, j] - a[j, i]) < 1e-8
+        end
+    end
+    return tmp
+end
+
+# because isposdef keeps giving the wrong answer for samples
+# from Wishart and InverseWisharts
+hasCholesky(a::Matrix{Float64}) = isa(trycholfact(a), Cholesky)
+
+function trycholfact(a::Matrix{Float64})
+    try cholfact(a)
+    catch e
+        return e
+    end
+end
+
+
+
+
diff --git a/test/runtests.jl b/test/runtests.jl
index 58de8585e..fe0c5f84f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -17,7 +17,6 @@ tests = [
     "conjugates",
     "conjugates_normal",
     "conjugates_mvnormal",
-    "wishart",
     "mixture",
     "gradlogpdf"]
 
diff --git a/test/wishart.jl b/test/wishart.jl
deleted file mode 100644
index a5665674c..000000000
--- a/test/wishart.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-# Tests on Wishart distributions
-
-using Distributions
-using Base.Test
-
-V = [[2. 1.], [1. 2.]]
-W = Wishart(3., V)
-
-# logdet
-
-@test_approx_eq expected_logdet(W) 1.9441809588650447
-
-# entropy
-
-@test_approx_eq entropy(W) 7.178942679971454

From 19b8a3e84816d761158f167972501ae795fba7eb Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Sat, 8 Nov 2014 12:47:55 +0800
Subject: [PATCH 2/3] new implementation of InverseWishart

---
 src/matrix/inversewishart.jl | 128 ++++++++++++++++++-----------------
 src/matrix/wishart.jl        |  12 +++-
 2 files changed, 77 insertions(+), 63 deletions(-)

diff --git a/src/matrix/inversewishart.jl b/src/matrix/inversewishart.jl
index 69b1536dc..18659968f 100644
--- a/src/matrix/inversewishart.jl
+++ b/src/matrix/inversewishart.jl
@@ -1,81 +1,85 @@
-##############################################################################
+# Inverse Wishart distribution
 #
-# Inverse Wishart Distribution
+#   following Wikipedia parametrization
 #
-#  Parameterized such that E(X) = Psi / (nu - p - 1)
-#  See the riwish and diwish function of R's MCMCpack
-#
-##############################################################################
-
-immutable InverseWishart <: ContinuousMatrixDistribution
-    nu::Float64
-    Psichol::Cholesky{Float64}
-    function InverseWishart(n::Real, Pc::Cholesky{Float64})
-        if n > size(Pc, 1) - 1
-            new(float64(n), Pc)
-        else
-            error("Inverse Wishart parameters must be df > p - 1")
-        end
-    end
-end
 
-dim(d::InverseWishart) = size(d.Psichol, 1)
-size(d::InverseWishart) = size(d.Psichol)
+immutable InverseWishart{ST<:AbstractPDMat} <: ContinuousMatrixDistribution
+    df::Float64     # degree of freedom
+    Ψ::ST           # scale matrix
+    c0::Float64     # log of normalizing constant
+end
 
-show(io::IO, d::InverseWishart) = show_multline(io, d, [(:nu, d.nu), (:Psi, full(d.Psichol))])
+#### Constructors
 
-function InverseWishart(nu::Real, Psi::Matrix{Float64})
-    InverseWishart(float64(nu), cholfact(Psi))
+function InverseWishart{ST<:AbstractPDMat}(df::Real, Ψ::ST)
+    p = dim(Ψ)
+    df > p - 1 || error("df should be greater than dim - 1.")
+    InverseWishart{ST}(df, Ψ, _invwishart_c0(df, Ψ))
 end
 
-function insupport(IW::InverseWishart, X::Matrix{Float64})
-    return size(X) == size(IW) && isApproxSymmmetric(X) && hasCholesky(X)
-end
-# This just checks if X could come from any Inverse-Wishart
-function insupport(::Type{InverseWishart}, X::Matrix{Float64})
-    return size(X, 1) == size(X, 2) && isApproxSymmmetric(X) && hasCholesky(X)
+InverseWishart(df::Real, Ψ::Matrix{Float64}) = InverseWishart(df, PDMat(Ψ))
+
+InverseWishart(df::Real, Ψ::Cholesky) = InverseWishart(df, PDMat(Ψ))
+
+function _invwishart_c0(df::Float64, Ψ::AbstractPDMat)
+    h_df = df / 2
+    p = dim(Ψ)
+    h_df * (p * logtwo - logdet(Ψ)) + lpgamma(p, h_df)
 end
 
-function mean(IW::InverseWishart)
-    if IW.nu > size(IW.Psichol, 1) + 1
-        return 1.0 / (IW.nu - size(IW.Psichol, 1) - 1.0) *
-               (IW.Psichol[:U]' * IW.Psichol[:U])
+
+#### Properties
+
+insupport(::Type{InverseWishart}, X::Matrix{Float64}) = isposdef(X)
+insupport(d::InverseWishart, X::Matrix{Float64}) = size(X) == size(d) && isposdef(X)
+
+dim(d::InverseWishart) = dim(d.Ψ)
+size(d::InverseWishart) = (p = dim(d); (p, p))
+
+
+#### Show
+
+show(io::IO, d::InverseWishart) = show_multline(io, d, [(:df, d.df), (:Ψ, full(d.Ψ))])
+
+
+#### Statistics
+
+function mean(d::InverseWishart)
+    df = d.df
+    p = dim(d)
+    r = df - (p + 1)
+    if r > 0.0
+        return full(d.Ψ) * (1.0 / r)
     else
-        error("mean only defined for nu > p + 1")
+        error("mean only defined for df > p + 1")
     end
 end
 
-function _logpdf{T<:Real}(IW::InverseWishart, X::DenseMatrix{T})
-    Xchol = trycholfact(X)
-    if size(X) == size(IW) && isApproxSymmmetric(X) && isa(Xchol, Cholesky)
-        p = size(X, 1)
-        logd::Float64 = IW.nu * p / 2.0 * log(2.0)
-        logd += lpgamma(p, IW.nu / 2.0)
-        logd -= IW.nu / 2.0 * logdet(IW.Psichol)
-        logd = -logd
-        logd -= 0.5 * (IW.nu + p + 1.0) * logdet(Xchol)
-        logd -= 0.5 * trace(inv(Xchol) * (IW.Psichol[:U]' * IW.Psichol[:U]))
-        return logd
-    else
-        return -Inf
-    end
+mode(d::InverseWishart) = d.Ψ * inv(d.df + dim(d) + 1.0)
+
+
+#### Evaluation
+
+function _logpdf(d::InverseWishart, X::DenseMatrix{Float64})
+    p = dim(d)
+    df = d.df
+    Xcf = cholfact(X)
+    # we use the fact: trace(Ψ * inv(X)) = trace(inv(X) * Ψ) = trace(X \ Ψ)
+    Ψ = full(d.Ψ)
+    -0.5 * ((df + p + 1) * logdet(Xcf) + trace(Xcf \ Ψ)) - d.c0
 end
 
-# rand(Wishart(nu, Psi^-1))^-1 is an sample from an
-#  inverse wishart(nu, Psi). there is actually some wacky
-#  behavior here where inv of the Cholesky returns the 
-#  inverse of the original matrix, in this case we're getting
-#  Psi^-1 like we want
-rand(IW::InverseWishart) = inv(rand(Wishart(IW.nu, inv(IW.Psichol))))
-
-function rand!(IW::InverseWishart, X::Array{Matrix{Float64}})
-    Psiinv = inv(IW.Psichol)
-    W = Wishart(IW.nu, Psiinv)
-    X = rand!(W, X)
+_logpdf{T<:Real}(d::InverseWishart, X::DenseMatrix{T}) = _logpdf(d, float64(X))
+
+
+#### Sampling
+
+rand(d::InverseWishart) = inv(rand(Wishart(d.df, inv(d.Ψ))))
+
+function _rand!{M<:Matrix}(d::InverseWishart, X::AbstractArray{M})
+    wd = Wishart(d.df, inv(d.Ψ))
     for i in 1:length(X)
-        X[i] = inv(X[i])
+        X[i] = inv(rand(wd))
     end
     return X
 end
-
-var(IW::InverseWishart) = error("Not yet implemented")
diff --git a/src/matrix/wishart.jl b/src/matrix/wishart.jl
index a1d8dd789..0e5c3a2e5 100644
--- a/src/matrix/wishart.jl
+++ b/src/matrix/wishart.jl
@@ -46,6 +46,15 @@ show(io::IO, d::Wishart) = show_multline(io, d, [(:df, d.df), (:S, full(d.S))])
 
 mean(d::Wishart) = d.df * full(d.S)
 
+function mode(d::Wishart)
+    r = d.df - dim(d) - 1.0
+    if r > 0.0
+        return full(d.S) * r
+    else
+        error("mode is only defined when df > p + 1")
+    end
+end
+
 function meanlogdet(d::Wishart) 
     p = dim(d)
     df = d.df
@@ -66,12 +75,13 @@ end
 #### Evaluation
 
 function _logpdf(d::Wishart, X::DenseMatrix{Float64})
-    Xcf = cholfact(X)
     df = d.df
     p = dim(d)
+    Xcf = cholfact(X)
     0.5 * ((df - (p + 1)) * logdet(Xcf) - trace(d.S \ X)) - d.c0
 end
 
+_logpdf{T<:Real}(d::Wishart, X::DenseMatrix{T}) = _logpdf(d, float64(X))
 
 #### Sampling
 

From 3c12980d997968bfe3d1660a0e5fd764bf8cdc68 Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Sat, 8 Nov 2014 12:52:53 +0800
Subject: [PATCH 3/3] remove unused simpson function

---
 src/utils.jl | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/src/utils.jl b/src/utils.jl
index a41a2dc7f..b09bfa75e 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -145,20 +145,6 @@ macro checkinvlogcdf(lp,ex)
     :($lp <= zero($lp) ? $ex : NaN)
 end
 
-# Simple method for integration
-function simpson(f::AbstractVector{Float64}, h::Float64)
-    n = length(f)
-    isodd(n) || error("The length of the input vector must be odd.")
-    s = f[1]
-    t = -1
-    for i = 2:2:n-1
-        @inbounds s += (4 * f[i] + 2 * f[i+1])
-    end
-    s += f[n]
-    return s * h / 3.0
-end
-
-
 # because X == X' keeps failing due to floating point nonsense
 function isApproxSymmmetric(a::Matrix{Float64})
     tmp = true