TuringLang · devmotion · Aug 27, 2021 · Aug 26, 2021 · Aug 26, 2021
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "Turing"
 uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
-version = "0.17.3"
+version = "0.17.4"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
@@ -1,5 +1,5 @@
 using Turing, BenchmarkTools, BenchmarkHelper
-
+using LinearAlgebra
 
 ## Dummny benchmarks
 
@@ -8,7 +8,7 @@ BenchmarkSuite["dummy"] = BenchmarkGroup(["dummy"])
 data = [0, 1, 0, 1, 1, 1, 1, 1, 1, 1]
 
 
-@model constrained_test(obs) = begin
+@model function constrained_test(obs)
     p ~ Beta(2,2)
     for i = 1:length(obs)
         obs[i] ~ Bernoulli(p)
@@ -17,22 +17,22 @@ data = [0, 1, 0, 1, 1, 1, 1, 1, 1, 1]
 end
 
 
-BenchmarkSuite["dummy"]["dummy"] = @benchmarkable sample(constrained_test($data), HMC(0.01, 2), 2000)
+BenchmarkSuite["dummy"]["dummy"] = @benchmarkable sample($(constrained_test(data)), $(HMC(0.01, 2)), 2000)
 
 
 ## gdemo
 
 BenchmarkSuite["gdemo"] = BenchmarkGroup(["gdemo"])
 
-@model gdemo(x, y) = begin
+@model function gdemo(x, y)
     s² ~ InverseGamma(2, 3)
     m ~ Normal(0, sqrt(s²))
     x ~ Normal(m, sqrt(s²))
     y ~ Normal(m, sqrt(s²))
     return s², m
 end
 
-BenchmarkSuite["gdemo"]["hmc"] = @benchmarkable sample(gdemo(1.5, 2.0), HMC(0.01, 2), 2000)
+BenchmarkSuite["gdemo"]["hmc"] = @benchmarkable sample($(gdemo(1.5, 2.0)), $(HMC(0.01, 2)), 2000)
 
 
 ##
@@ -42,24 +42,22 @@ BenchmarkSuite["gdemo"]["hmc"] = @benchmarkable sample(gdemo(1.5, 2.0), HMC(0.01
 BenchmarkSuite["mnormal"] = BenchmarkGroup(["mnormal"])
 
 # Define the target distribution and its gradient
-const D = 10
 
-@model target(dim) = begin
+@model function target(dim)
    Θ = Vector{Real}(undef, dim)
-   θ ~ MvNormal(zeros(D), ones(dim))
+   θ ~ MvNormal(zeros(dim), I)
 end
 
 # Sampling parameter settings
+dim = 10
 n_samples = 100_000
 n_adapts = 2_000
 
-BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample(target($D), HMC(0.1, 5), $n_samples)
+BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample($(target(dim)), $(HMC(0.1, 5)), $n_samples)
 
 ## MvNormal: ForwardDiff vs BackwardDiff (Tracker)
 
-using LinearAlgebra
-
-@model mdemo(d, N) = begin
+@model function mdemo(d, N)
     Θ = Vector(undef, N)
    for n=1:N
       Θ[n] ~ d
@@ -72,9 +70,9 @@ d    = MvNormal(zeros(dim2), A)
 
 # ForwardDiff
 Turing.setadbackend(:forwarddiff)
-BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample(mdemo($d, 1), HMC(0.1, 5), 5000)
+BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
 
 
 # BackwardDiff
 Turing.setadbackend(:reversediff)
-BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample(mdemo($d, 1), HMC(0.1, 5), 5000)
+BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
diff --git a/benchmarks/nuts/hlr.jl b/benchmarks/nuts/hlr.jl
@@ -17,7 +17,7 @@ x, y = readlrdata()
 
     σ² ~ Exponential(θ)
     α ~ Normal(0, sqrt(σ²))
-    β ~ MvNormal(zeros(D), ones(D)*sqrt(σ²))
+    β ~ MvNormal(zeros(D), σ² * I)
 
     for n = 1:N
         y[n] ~ BinomialLogit(1, dot(x[n,:], β) + α)

diff --git a/benchmarks/nuts/lr.jl b/benchmarks/nuts/lr.jl
@@ -16,7 +16,7 @@ X, Y = readlrdata()
     N,D = size(x)
 
     α ~ Normal(0, σ)
-    β ~ MvNormal(zeros(D), ones(D)*σ)
+    β ~ MvNormal(zeros(D), σ^2 * I)
 
     for n = 1:N
         y[n] ~ BinomialLogit(1, dot(x[n,:], β) + α)

diff --git a/docs/src/for-developers/compiler.md b/docs/src/for-developers/compiler.md
@@ -187,7 +187,7 @@ without any default values. Finally, in the new function body a `model::Model` w
 In order to track random variables in the sampling process, `Turing` uses the `VarName` struct which acts as a random variable identifier generated at runtime. The `VarName` of a random variable is generated from the expression on the LHS of a `~` statement when the symbol on the LHS is in the set `P` of unobserved random variables. Every `VarName` instance has a type parameter `sym` which is the symbol of the Julia variable in the model that the random variable belongs to. For example, `x[1] ~ Normal()` will generate an instance of `VarName{:x}` assuming `x` is an unobserved random variable. Every `VarName` also has a field `indexing`, which stores the indices required to access the random variable from the Julia variable indicated by `sym` as a tuple of tuples.  Each element of the tuple thereby contains the indices of one indexing operation (`VarName` also supports hierarchical arrays and range indexing). Some examples:
 - `x ~ Normal()` will generate a `VarName(:x, ())`.
 - `x[1] ~ Normal()` will generate a `VarName(:x, ((1,),))`.
-- `x[:,1] ~ MvNormal(zeros(2))` will generate a `VarName(:x, ((Colon(), 1),))`.
+- `x[:,1] ~ MvNormal(zeros(2), I)` will generate a `VarName(:x, ((Colon(), 1),))`.
 - `x[:,1][1+1] ~ Normal()` will generate a `VarName(:x, ((Colon(), 1), (2,)))`.
 
 The easiest way to manually construct a `VarName` is to use the `@varname` macro on an indexing expression, which will take the `sym` value from the actual variable name, and put the index values appropriately into the constructor.
@@ -227,7 +227,7 @@ not. Let `md` be an instance of `Metadata`:
 - `md.flags` is a dictionary of true/false flags. `md.flags[flag][md.idcs[vn]]` is the
  value of `flag` corresponding to `vn`.
 
-Note that in order to make `md::Metadata` type stable, all the `md.vns` must have the same symbol and distribution type. However, one can have a single Julia variable, e.g. `x`, that is a matrix or a hierarchical array sampled in partitions, e.g. `x[1][:] ~ MvNormal(zeros(2), 1.0); x[2][:] ~ MvNormal(ones(2), 1.0)`. The symbol `x` can still be managed by a single `md::Metadata` without hurting the type stability since all the distributions on the RHS of `~` are of the same type. 
+Note that in order to make `md::Metadata` type stable, all the `md.vns` must have the same symbol and distribution type. However, one can have a single Julia variable, e.g. `x`, that is a matrix or a hierarchical array sampled in partitions, e.g. `x[1][:] ~ MvNormal(zeros(2), I); x[2][:] ~ MvNormal(ones(2), I)`. The symbol `x` can still be managed by a single `md::Metadata` without hurting the type stability since all the distributions on the RHS of `~` are of the same type. 
 
 However, in `Turing` models one cannot have this restriction, so we must use a type unstable `Metadata` if we want to use one `Metadata` instance for the whole model. This is what `UntypedVarInfo` does. A type unstable `Metadata` will still work but will have inferior performance.
 

diff --git a/docs/src/for-developers/interface.md b/docs/src/for-developers/interface.md
@@ -90,7 +90,7 @@ end
 
 # Default constructors.
 MetropolisHastings(init_θ::Real) = MetropolisHastings(init_θ, Normal(0,1))
-MetropolisHastings(init_θ::Vector{<:Real}) = MetropolisHastings(init_θ, MvNormal(length(init_θ),1))
+MetropolisHastings(init_θ::Vector{<:Real}) = MetropolisHastings(init_θ, MvNormal(zero(init_θ), I))
 ```
 
 Above, we have defined a sampler that stores the initial parameterization of the prior,

diff --git a/docs/src/using-turing/advanced.md b/docs/src/using-turing/advanced.md
@@ -108,14 +108,14 @@ using Turing
 using LinearAlgebra
 
 @model function demo(x)
-    m ~ MvNormal(length(x))
+    m ~ MvNormal(zero(x), I)
     if dot(m, x) < 0
         Turing.@addlogprob! -Inf
         # Exit the model evaluation early
         return
     end
 
-    x ~ MvNormal(m, 1.0)
+    x ~ MvNormal(m, I)
     return
 end
 ```

diff --git a/docs/src/using-turing/performancetips.md b/docs/src/using-turing/performancetips.md
@@ -16,9 +16,9 @@ The following example:
 
 ```julia
 @model function gmodel(x)
-    m ~ Normal()
+    m ~ Normal()
     for i = 1:length(x)
-        x[i] ~ Normal(m, 0.2)
+        x[i] ~ Normal(m, 0.2)
     end
 end
 ```
@@ -28,8 +28,8 @@ can be directly expressed more efficiently using a simple transformation:
 using FillArrays
 
 @model function gmodel(x)
-    m ~ Normal()
-    x ~ MvNormal(Fill(m, length(x)), 0.2)
+    m ~ Normal()
+    x ~ MvNormal(Fill(m, length(x)), 0.04 * I)
 end
 ```
 
@@ -62,7 +62,7 @@ The following example with abstract types
     end
 
     a = x * params
-    y ~ MvNormal(a, 1.0)
+    y ~ MvNormal(a, I)
 end
 ```
 
@@ -77,7 +77,7 @@ can be transformed into the following representation with concrete types:
     end
 
     a = x * params
-    y ~ MvNormal(a, 1.0)
+    y ~ MvNormal(a, I)
 end
 ```
 
@@ -87,7 +87,7 @@ Alternatively, you could use `filldist` in this example:
 @model function tmodel(x, y)
     params ~ filldist(truncated(Normal(), 0, Inf), size(x, 2))
     a = x * params
-    y ~ MvNormal(a, 1.0)
+    y ~ MvNormal(a, I)
 end
 ```
 

diff --git a/src/inference/hmc.jl b/src/inference/hmc.jl
@@ -459,8 +459,12 @@ function gen_metric(dim::Int, spl::Sampler{<:AdaptiveHamiltonian}, state)
     return AHMC.renew(state.hamiltonian.metric, AHMC.getM⁻¹(state.adaptor.pc))
 end
 
-make_ahmc_kernel(alg::HMC, ϵ) = AHMC.StaticTrajectory(AHMC.Leapfrog(ϵ), alg.n_leapfrog)
-make_ahmc_kernel(alg::HMCDA, ϵ) = AHMC.HMCDA(AHMC.Leapfrog(ϵ), alg.λ)
+function make_ahmc_kernel(alg::HMC, ϵ)
+    return AHMC.HMCKernel(AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedNSteps(alg.n_leapfrog)))
+end
+function make_ahmc_kernel(alg::HMCDA, ϵ)
+    return AHMC.HMCKernel(AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedIntegrationTime(alg.λ)))
+end
 make_ahmc_kernel(alg::NUTS, ϵ) = AHMC.NUTS(AHMC.Leapfrog(ϵ), alg.max_depth, alg.Δ_max)
 
 ####

diff --git a/test/Project.toml b/test/Project.toml
@@ -34,7 +34,7 @@ AdvancedPS = "0.2"
 AdvancedVI = "0.1"
 Clustering = "0.14"
 CmdStan = "6.0.8"
-Distributions = "< 0.25.11"
+Distributions = "0.25"
 DistributionsAD = "0.6.3"
 DynamicHMC = "2.1.6, 3.0"
 DynamicPPL = "0.14"

diff --git a/test/core/ad.jl b/test/core/ad.jl
@@ -112,7 +112,7 @@
             params = TV(undef, 2)
             @. params ~ Normal(0, 1)
 
-            x ~ MvNormal(params, 1)
+            x ~ MvNormal(params, I)
         end
 
         function make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext)

diff --git a/test/inference/gibbs_conditional.jl b/test/inference/gibbs_conditional.jl
@@ -72,26 +72,26 @@
         K = 2 # number of clusters
         π = fill(1/K, K) # uniform cluster weights
         m = 0.5 # prior mean of μₖ
-        σ_μ = 2.0 # prior variance of μₖ
-        σ_x = 0.1 # observation variance
+        σ²_μ = 4.0 # prior variance of μₖ
+        σ²_x = 0.01 # observation variance
         N = 20  # number of observations
 
         # We generate data
-        μ_data = rand(Normal(m, σ_μ), K)
+        μ_data = rand(Normal(m, sqrt(σ²_μ)), K)
         z_data = rand(Categorical(π), N)
-        x_data = rand(MvNormal(μ_data[z_data], σ_x))
+        x_data = rand(MvNormal(μ_data[z_data], σ²_x * I))
 
         @model function mixture(x)
-            μ ~ $(MvNormal(fill(m, K), σ_μ))
+            μ ~ $(MvNormal(fill(m, K), σ²_μ * I))
             z ~ $(filldist(Categorical(π), N))
-            x ~ MvNormal(μ[z], $(σ_x))
+            x ~ MvNormal(μ[z], $(σ²_x * I))
             return x
         end
         model = mixture(x_data)
 
         # Conditional distribution ``z | μ, x``
         # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
-        cond_z = let x=x_data, log_π=log.(π), σ_x=σ_x
+        cond_z = let x=x_data, log_π=log.(π), σ_x=sqrt(σ²_x)
             c -> begin
                 dists = map(x) do xi
                     logp = log_π .+ logpdf.(Normal.(c.μ, σ_x), xi)
@@ -103,7 +103,7 @@
 
         # Conditional distribution ``μ | z, x``
         # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
-        cond_μ = let K=K, x_data=x_data, inv_σ_μ2=inv(σ_μ^2), inv_σ_x2=inv(σ_x^2)
+        cond_μ = let K=K, x_data=x_data, inv_σ²_μ=inv(σ²_μ), inv_σ²_x=inv(σ²_x)
             c -> begin
                 # Convert cluster assignments to one-hot encodings
                 z_onehot = c.z .== (1:K)'
@@ -112,10 +112,10 @@
                 n = vec(sum(z_onehot; dims=1))
 
                 # Compute mean and variance of the conditional distribution
-                μ_var = @. inv(inv_σ_x2 * n + inv_σ_μ2)
-                μ_mean = (z_onehot' * x_data) .* inv_σ_x2 .* μ_var
+                μ_var = @. inv(inv_σ²_x * n + inv_σ²_μ)
+                μ_mean = (z_onehot' * x_data) .* inv_σ²_x .* μ_var
 
-                return MvNormal(μ_mean, μ_var)
+                return MvNormal(μ_mean, Diagonal(μ_var))
             end
         end
 

diff --git a/test/inference/hmc.jl b/test/inference/hmc.jl
@@ -181,23 +181,23 @@
 
     @turing_testset "Regression tests" begin
         # https://github.com/TuringLang/DynamicPPL.jl/issues/27
-        @model function mwe(::Type{T}=Float64) where {T<:Real}
+        @model function mwe1(::Type{T}=Float64) where {T<:Real}
             m = Matrix{T}(undef, 2, 3)
-            @. m ~ MvNormal(zeros(2), 1)
+            m .~ MvNormal(zeros(2), I)
         end
-        @test sample(mwe(), HMC(0.2, 4), 1_000) isa Chains
+        @test sample(mwe1(), HMC(0.2, 4), 1_000) isa Chains
 
-        @model function mwe(::Type{T} = Matrix{Float64}) where T
+        @model function mwe2(::Type{T} = Matrix{Float64}) where T
             m = T(undef, 2, 3)
-            @. m ~ MvNormal(zeros(2), 1)
+            m .~ MvNormal(zeros(2), I)
         end
-        @test sample(mwe(), HMC(0.2, 4), 1_000) isa Chains
+        @test sample(mwe2(), HMC(0.2, 4), 1_000) isa Chains
 
         # https://github.com/TuringLang/Turing.jl/issues/1308
-        @model function mwe(::Type{T} = Array{Float64}) where T
+        @model function mwe3(::Type{T} = Array{Float64}) where T
             m = T(undef, 2, 3)
-            @. m ~ MvNormal(zeros(2), 1)
+            m .~ MvNormal(zeros(2), I)
         end
-        @test sample(mwe(), HMC(0.2, 4), 1_000) isa Chains
+        @test sample(mwe3(), HMC(0.2, 4), 1_000) isa Chains
     end
 end
diff --git a/test/inference/mh.jl b/test/inference/mh.jl
@@ -47,7 +47,7 @@
 
     # Test MH shape passing.
     @turing_testset "shape" begin
-        @model M(mu, sigma, observable) = begin
+        @model function M(mu, sigma, observable)
             z ~ MvNormal(mu, sigma)
 
             m = Array{Float64}(undef, 1, 2)
@@ -64,7 +64,7 @@
             2.0 ~ Normal(m[1], s)
         end
 
-        model = M(zeros(2), ones(2), 1)
+        model = M(zeros(2), I, 1)
         sampler = Inference.Sampler(MH(), model)
 
         dt, vt = Inference.dist_val_tuple(sampler, Turing.VarInfo(model))
@@ -116,7 +116,7 @@
         # Turing model
         @model function twomeans(x, y)
             # Set Priors
-            μ ~ MvNormal(2, 3)
+            μ ~ MvNormal(zeros(2), 9 * I)
             σ ~ filldist(Exponential(1), 2)
 
             # Distributions of supplied data
@@ -188,7 +188,7 @@
         # Link if proposal is `AdvancedHM.RandomWalkProposal`
         vi = deepcopy(vi_base)
         d = length(vi_base[DynamicPPL.SampleFromPrior()])
-        alg = MH(AdvancedMH.RandomWalkProposal(MvNormal(d, 1.0)))
+        alg = MH(AdvancedMH.RandomWalkProposal(MvNormal(zeros(d), I)))
         spl = DynamicPPL.Sampler(alg)
         Turing.Inference.maybe_link!(vi, spl, alg.proposals)
         @test DynamicPPL.islinked(vi, spl)