From b63b9c66d4041a0918fb5e73e705497d4537e526 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Wed, 14 Jul 2021 12:46:59 +0100
Subject: [PATCH 01/13] updated HMC implementation according to new AHMC
 interface

---
 src/inference/gibbs.jl |  2 +-
 src/inference/hmc.jl   | 38 +++++++++++++++++++-------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/inference/gibbs.jl b/src/inference/gibbs.jl
index 6f1507805..1d4fa6f64 100644
--- a/src/inference/gibbs.jl
+++ b/src/inference/gibbs.jl
@@ -126,7 +126,7 @@ function gibbs_state(
     state.z.θ .= θ_old
     z = state.z
 
-    return HMCState(varinfo, state.i, state.traj, hamiltonian, z, state.adaptor)
+    return HMCState(varinfo, state.i, state.kernel, hamiltonian, z, state.adaptor)
 end
 
 """
diff --git a/src/inference/hmc.jl b/src/inference/hmc.jl
index ffdbf7b71..aedaf3a7e 100644
--- a/src/inference/hmc.jl
+++ b/src/inference/hmc.jl
@@ -4,14 +4,14 @@
 
 struct HMCState{
     TV<:AbstractVarInfo,
-    TTraj<:AHMC.AbstractTrajectory,
+    TKernel<:AHMC.HMCKernel,
     THam<:AHMC.Hamiltonian,
     PhType<:AHMC.PhasePoint,
     TAdapt<:AHMC.Adaptation.AbstractAdaptor,
 }
     vi::TV
     i::Int
-    traj::TTraj
+    kernel::TKernel
     hamiltonian::THam
     z::PhType
     adaptor::TAdapt
@@ -190,18 +190,18 @@ function DynamicPPL.initialstep(
         ϵ = spl.alg.ϵ
     end
 
-    # Generate a trajectory.
-    traj = gen_traj(spl.alg, ϵ)
+    # Generate a kernel.
+    kernel = make_ahmc_kernel(spl.alg, ϵ)
 
     # Create initial transition and state.
     # Already perform one step since otherwise we don't get any statistics.
-    t = AHMC.step(rng, hamiltonian, traj, z)
+    t = AHMC.transition(rng, hamiltonian, kernel, z)
 
     # Adaptation
     adaptor = AHMCAdaptor(spl.alg, hamiltonian.metric; ϵ=ϵ)
     if spl.alg isa AdaptiveHamiltonian
-        hamiltonian, traj, _ =
-            AHMC.adapt!(hamiltonian, traj, adaptor,
+        hamiltonian, kernel, _ =
+            AHMC.adapt!(hamiltonian, kernel, adaptor,
                         1, nadapts, t.z.θ, t.stat.acceptance_rate)
     end
 
@@ -215,7 +215,7 @@ function DynamicPPL.initialstep(
     end
 
     transition = HMCTransition(vi, t)
-    state = HMCState(vi, 1, traj, hamiltonian, t.z, adaptor)
+    state = HMCState(vi, 1, kernel, hamiltonian, t.z, adaptor)
 
     return transition, state
 end
@@ -234,16 +234,16 @@ function AbstractMCMC.step(
     # Compute transition.
     hamiltonian = state.hamiltonian
     z = state.z
-    t = AHMC.step(rng, hamiltonian, state.traj, z)
+    t = AHMC.transition(rng, hamiltonian, state.kernel, z)
 
     # Adaptation
     i = state.i + 1
     if spl.alg isa AdaptiveHamiltonian
-        hamiltonian, traj, _ =
-            AHMC.adapt!(hamiltonian, state.traj, state.adaptor,
+        hamiltonian, kernel, _ =
+            AHMC.adapt!(hamiltonian, state.kernel, state.adaptor,
                         i, nadapts, t.z.θ, t.stat.acceptance_rate)
     else
-        traj = state.traj
+        kernel = state.kernel
     end
 
     # Update variables
@@ -255,7 +255,7 @@ function AbstractMCMC.step(
 
     # Compute next transition and state.
     transition = HMCTransition(vi, t)
-    newstate = HMCState(vi, i, traj, hamiltonian, t.z, state.adaptor)
+    newstate = HMCState(vi, i, kernel, hamiltonian, t.z, state.adaptor)
 
     return transition, newstate
 end
@@ -459,9 +459,9 @@ function gen_metric(dim::Int, spl::Sampler{<:AdaptiveHamiltonian}, state)
     return AHMC.renew(state.hamiltonian.metric, AHMC.getM⁻¹(state.adaptor.pc))
 end
 
-gen_traj(alg::HMC, ϵ) = AHMC.StaticTrajectory(AHMC.Leapfrog(ϵ), alg.n_leapfrog)
-gen_traj(alg::HMCDA, ϵ) = AHMC.HMCDA(AHMC.Leapfrog(ϵ), alg.λ)
-gen_traj(alg::NUTS, ϵ) = AHMC.NUTS(AHMC.Leapfrog(ϵ), alg.max_depth, alg.Δ_max)
+make_ahmc_kernel(alg::HMC, ϵ) = AHMC.StaticTrajectory(AHMC.Leapfrog(ϵ), alg.n_leapfrog)
+make_ahmc_kernel(alg::HMCDA, ϵ) = AHMC.HMCDA(AHMC.Leapfrog(ϵ), alg.λ)
+make_ahmc_kernel(alg::NUTS, ϵ) = AHMC.NUTS(AHMC.Leapfrog(ϵ), alg.max_depth, alg.Δ_max)
 
 ####
 #### Compiler interface, i.e. tilde operators.
@@ -584,8 +584,8 @@ function HMCState(
         ϵ = spl.alg.ϵ
     end
 
-    # Generate a trajectory.
-    traj = gen_traj(spl.alg, ϵ)
+    # Generate a kernel.
+    kernel = make_ahmc_kernel(spl.alg, ϵ)
 
     # Generate a phasepoint. Replaced during sample_init!
     h, t = AHMC.sample_init(rng, h, θ_init) # this also ensure AHMC has the same dim as θ.
@@ -593,5 +593,5 @@ function HMCState(
     # Unlink everything.
     invlink!(vi, spl)
 
-    return HMCState(vi, 0, 0, traj, h, AHMCAdaptor(spl.alg, metric; ϵ=ϵ), t.z)
+    return HMCState(vi, 0, 0, kernel.τ, h, AHMCAdaptor(spl.alg, metric; ϵ=ϵ), t.z)
 end

From c77a928510ebe3e475a57f11a8efb8e605db6023 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Thu, 15 Jul 2021 15:31:00 +0100
Subject: [PATCH 02/13] bump compat bound for AdvancedHMC

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index d37f3196f..e84960507 100644
--- a/Project.toml
+++ b/Project.toml
@@ -34,7 +34,7 @@ ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444"
 
 [compat]
 AbstractMCMC = "3.2"
-AdvancedHMC = "0.2.24"
+AdvancedHMC = "0.3.0"
 AdvancedMH = "0.6"
 AdvancedPS = "0.2.4"
 AdvancedVI = "0.1"

From 7242b024de4e34fbedc4cd76871cca68820e33dd Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Thu, 15 Jul 2021 15:31:33 +0100
Subject: [PATCH 03/13] bumped patch version

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index e84960507..a7d365d9f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "Turing"
 uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
-version = "0.16.5"
+version = "0.16.6"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"

From e7b4d2b18b631d21140213b1ed2c8f24e8bf8656 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Fri, 16 Jul 2021 10:52:01 +0100
Subject: [PATCH 04/13] disable GMM Gibbs conditional test to see if it fixes
 CI

---
 test/inference/gibbs_conditional.jl | 164 ++++++++++++++--------------
 1 file changed, 82 insertions(+), 82 deletions(-)

diff --git a/test/inference/gibbs_conditional.jl b/test/inference/gibbs_conditional.jl
index eaee4bb7f..70789a4aa 100644
--- a/test/inference/gibbs_conditional.jl
+++ b/test/inference/gibbs_conditional.jl
@@ -61,86 +61,86 @@
         check_gdemo(chain)
     end
 
-    @turing_testset "GMM" begin
-        # We consider the model
-        # ```math
-        # μₖ ~ Normal(m, σ_μ), k = 1, …, K,
-        # zᵢ ~ Categorical(π), i = 1, …, N,
-        # xᵢ ~ Normal(μ_{zᵢ}, σₓ), i = 1, …, N,
-        # ```
-        # with ``K = 2`` clusters, ``N = 20`` observations, and the following parameters:
-        K = 2 # number of clusters
-        π = fill(1/K, K) # uniform cluster weights
-        m = 0.5 # prior mean of μₖ
-        σ_μ = 2.0 # prior variance of μₖ
-        σ_x = 0.1 # observation variance
-        N = 20  # number of observations
-
-        # We generate data
-        μ_data = rand(Normal(m, σ_μ), K)
-        z_data = rand(Categorical(π), N)
-        x_data = rand(MvNormal(μ_data[z_data], σ_x))
-
-        @model function mixture(x)
-            μ ~ $(MvNormal(fill(m, K), σ_μ))
-            z ~ $(filldist(Categorical(π), N))
-            x ~ MvNormal(μ[z], $(σ_x))
-            return x
-        end
-        model = mixture(x_data)
-
-        # Conditional distribution ``z | μ, x``
-        # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
-        cond_z = let x=x_data, log_π=log.(π), σ_x=σ_x
-            c -> begin
-                dists = map(x) do xi
-                    logp = log_π .+ logpdf.(Normal.(c.μ, σ_x), xi)
-                    return Categorical(softmax!(logp))
-                end
-                return arraydist(dists)
-            end
-        end
-
-        # Conditional distribution ``μ | z, x``
-        # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
-        cond_μ = let K=K, x_data=x_data, inv_σ_μ2=inv(σ_μ^2), inv_σ_x2=inv(σ_x^2)
-            c -> begin
-                # Convert cluster assignments to one-hot encodings
-                z_onehot = c.z .== (1:K)'
-
-                # Count number of observations in each cluster
-                n = vec(sum(z_onehot; dims=1))
-
-                # Compute mean and variance of the conditional distribution
-                μ_var = @. inv(inv_σ_x2 * n + inv_σ_μ2)
-                μ_mean = (z_onehot' * x_data) .* inv_σ_x2 .* μ_var
-
-                return MvNormal(μ_mean, μ_var)
-            end
-        end
-
-        estimate(chain, var) = dropdims(mean(Array(group(chain, var)), dims=1), dims=1)
-        function estimatez(chain, var, range)
-            z = Int.(Array(group(chain, var)))
-            return map(i -> findmax(counts(z[:,i], range))[2], 1:size(z,2))
-        end
-
-        lμ_data, uμ_data = extrema(μ_data)
-
-        # Compare three Gibbs samplers
-        sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
-        sampler2 = Gibbs(GibbsConditional(:z, cond_z), MH(:μ))
-        sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ))
-        for sampler in (sampler1, sampler2, sampler3)
-            chain = sample(model, sampler, 10_000)
-
-            μ_hat = estimate(chain, :μ)
-            lμ_hat, uμ_hat = extrema(μ_hat)
-            @test isapprox([lμ_data, uμ_data], [lμ_hat, uμ_hat], atol=0.1)
-
-            z_hat = estimatez(chain, :z, 1:2)
-            ari, _, _, _ = randindex(z_data, Int.(z_hat))
-            @test isapprox(ari, 1, atol=0.1)
-        end
-    end
+    # @turing_testset "GMM" begin
+    #     # We consider the model
+    #     # ```math
+    #     # μₖ ~ Normal(m, σ_μ), k = 1, …, K,
+    #     # zᵢ ~ Categorical(π), i = 1, …, N,
+    #     # xᵢ ~ Normal(μ_{zᵢ}, σₓ), i = 1, …, N,
+    #     # ```
+    #     # with ``K = 2`` clusters, ``N = 20`` observations, and the following parameters:
+    #     K = 2 # number of clusters
+    #     π = fill(1/K, K) # uniform cluster weights
+    #     m = 0.5 # prior mean of μₖ
+    #     σ_μ = 2.0 # prior variance of μₖ
+    #     σ_x = 0.1 # observation variance
+    #     N = 20  # number of observations
+
+    #     # We generate data
+    #     μ_data = rand(Normal(m, σ_μ), K)
+    #     z_data = rand(Categorical(π), N)
+    #     x_data = rand(MvNormal(μ_data[z_data], σ_x))
+
+    #     @model function mixture(x)
+    #         μ ~ $(MvNormal(fill(m, K), σ_μ))
+    #         z ~ $(filldist(Categorical(π), N))
+    #         x ~ MvNormal(μ[z], $(σ_x))
+    #         return x
+    #     end
+    #     model = mixture(x_data)
+
+    #     # Conditional distribution ``z | μ, x``
+    #     # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
+    #     cond_z = let x=x_data, log_π=log.(π), σ_x=σ_x
+    #         c -> begin
+    #             dists = map(x) do xi
+    #                 logp = log_π .+ logpdf.(Normal.(c.μ, σ_x), xi)
+    #                 return Categorical(softmax!(logp))
+    #             end
+    #             return arraydist(dists)
+    #         end
+    #     end
+
+    #     # Conditional distribution ``μ | z, x``
+    #     # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
+    #     cond_μ = let K=K, x_data=x_data, inv_σ_μ2=inv(σ_μ^2), inv_σ_x2=inv(σ_x^2)
+    #         c -> begin
+    #             # Convert cluster assignments to one-hot encodings
+    #             z_onehot = c.z .== (1:K)'
+
+    #             # Count number of observations in each cluster
+    #             n = vec(sum(z_onehot; dims=1))
+
+    #             # Compute mean and variance of the conditional distribution
+    #             μ_var = @. inv(inv_σ_x2 * n + inv_σ_μ2)
+    #             μ_mean = (z_onehot' * x_data) .* inv_σ_x2 .* μ_var
+
+    #             return MvNormal(μ_mean, μ_var)
+    #         end
+    #     end
+
+    #     estimate(chain, var) = dropdims(mean(Array(group(chain, var)), dims=1), dims=1)
+    #     function estimatez(chain, var, range)
+    #         z = Int.(Array(group(chain, var)))
+    #         return map(i -> findmax(counts(z[:,i], range))[2], 1:size(z,2))
+    #     end
+
+    #     lμ_data, uμ_data = extrema(μ_data)
+
+    #     # Compare three Gibbs samplers
+    #     sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
+    #     sampler2 = Gibbs(GibbsConditional(:z, cond_z), MH(:μ))
+    #     sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ))
+    #     for sampler in (sampler1, sampler2, sampler3)
+    #         chain = sample(model, sampler, 10_000)
+
+    #         μ_hat = estimate(chain, :μ)
+    #         lμ_hat, uμ_hat = extrema(μ_hat)
+    #         @test isapprox([lμ_data, uμ_data], [lμ_hat, uμ_hat], atol=0.1)
+
+    #         z_hat = estimatez(chain, :z, 1:2)
+    #         ari, _, _, _ = randindex(z_data, Int.(z_hat))
+    #         @test isapprox(ari, 1, atol=0.1)
+    #     end
+    # end
 end

From df9a050d6a244a48d19026916f0827cfc70a81e9 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Fri, 16 Jul 2021 23:28:08 +0100
Subject: [PATCH 05/13] include tests again

---
 test/inference/gibbs_conditional.jl | 164 ++++++++++++++--------------
 1 file changed, 82 insertions(+), 82 deletions(-)

diff --git a/test/inference/gibbs_conditional.jl b/test/inference/gibbs_conditional.jl
index 70789a4aa..eaee4bb7f 100644
--- a/test/inference/gibbs_conditional.jl
+++ b/test/inference/gibbs_conditional.jl
@@ -61,86 +61,86 @@
         check_gdemo(chain)
     end
 
-    # @turing_testset "GMM" begin
-    #     # We consider the model
-    #     # ```math
-    #     # μₖ ~ Normal(m, σ_μ), k = 1, …, K,
-    #     # zᵢ ~ Categorical(π), i = 1, …, N,
-    #     # xᵢ ~ Normal(μ_{zᵢ}, σₓ), i = 1, …, N,
-    #     # ```
-    #     # with ``K = 2`` clusters, ``N = 20`` observations, and the following parameters:
-    #     K = 2 # number of clusters
-    #     π = fill(1/K, K) # uniform cluster weights
-    #     m = 0.5 # prior mean of μₖ
-    #     σ_μ = 2.0 # prior variance of μₖ
-    #     σ_x = 0.1 # observation variance
-    #     N = 20  # number of observations
-
-    #     # We generate data
-    #     μ_data = rand(Normal(m, σ_μ), K)
-    #     z_data = rand(Categorical(π), N)
-    #     x_data = rand(MvNormal(μ_data[z_data], σ_x))
-
-    #     @model function mixture(x)
-    #         μ ~ $(MvNormal(fill(m, K), σ_μ))
-    #         z ~ $(filldist(Categorical(π), N))
-    #         x ~ MvNormal(μ[z], $(σ_x))
-    #         return x
-    #     end
-    #     model = mixture(x_data)
-
-    #     # Conditional distribution ``z | μ, x``
-    #     # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
-    #     cond_z = let x=x_data, log_π=log.(π), σ_x=σ_x
-    #         c -> begin
-    #             dists = map(x) do xi
-    #                 logp = log_π .+ logpdf.(Normal.(c.μ, σ_x), xi)
-    #                 return Categorical(softmax!(logp))
-    #             end
-    #             return arraydist(dists)
-    #         end
-    #     end
-
-    #     # Conditional distribution ``μ | z, x``
-    #     # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
-    #     cond_μ = let K=K, x_data=x_data, inv_σ_μ2=inv(σ_μ^2), inv_σ_x2=inv(σ_x^2)
-    #         c -> begin
-    #             # Convert cluster assignments to one-hot encodings
-    #             z_onehot = c.z .== (1:K)'
-
-    #             # Count number of observations in each cluster
-    #             n = vec(sum(z_onehot; dims=1))
-
-    #             # Compute mean and variance of the conditional distribution
-    #             μ_var = @. inv(inv_σ_x2 * n + inv_σ_μ2)
-    #             μ_mean = (z_onehot' * x_data) .* inv_σ_x2 .* μ_var
-
-    #             return MvNormal(μ_mean, μ_var)
-    #         end
-    #     end
-
-    #     estimate(chain, var) = dropdims(mean(Array(group(chain, var)), dims=1), dims=1)
-    #     function estimatez(chain, var, range)
-    #         z = Int.(Array(group(chain, var)))
-    #         return map(i -> findmax(counts(z[:,i], range))[2], 1:size(z,2))
-    #     end
-
-    #     lμ_data, uμ_data = extrema(μ_data)
-
-    #     # Compare three Gibbs samplers
-    #     sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
-    #     sampler2 = Gibbs(GibbsConditional(:z, cond_z), MH(:μ))
-    #     sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ))
-    #     for sampler in (sampler1, sampler2, sampler3)
-    #         chain = sample(model, sampler, 10_000)
-
-    #         μ_hat = estimate(chain, :μ)
-    #         lμ_hat, uμ_hat = extrema(μ_hat)
-    #         @test isapprox([lμ_data, uμ_data], [lμ_hat, uμ_hat], atol=0.1)
-
-    #         z_hat = estimatez(chain, :z, 1:2)
-    #         ari, _, _, _ = randindex(z_data, Int.(z_hat))
-    #         @test isapprox(ari, 1, atol=0.1)
-    #     end
-    # end
+    @turing_testset "GMM" begin
+        # We consider the model
+        # ```math
+        # μₖ ~ Normal(m, σ_μ), k = 1, …, K,
+        # zᵢ ~ Categorical(π), i = 1, …, N,
+        # xᵢ ~ Normal(μ_{zᵢ}, σₓ), i = 1, …, N,
+        # ```
+        # with ``K = 2`` clusters, ``N = 20`` observations, and the following parameters:
+        K = 2 # number of clusters
+        π = fill(1/K, K) # uniform cluster weights
+        m = 0.5 # prior mean of μₖ
+        σ_μ = 2.0 # prior variance of μₖ
+        σ_x = 0.1 # observation variance
+        N = 20  # number of observations
+
+        # We generate data
+        μ_data = rand(Normal(m, σ_μ), K)
+        z_data = rand(Categorical(π), N)
+        x_data = rand(MvNormal(μ_data[z_data], σ_x))
+
+        @model function mixture(x)
+            μ ~ $(MvNormal(fill(m, K), σ_μ))
+            z ~ $(filldist(Categorical(π), N))
+            x ~ MvNormal(μ[z], $(σ_x))
+            return x
+        end
+        model = mixture(x_data)
+
+        # Conditional distribution ``z | μ, x``
+        # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
+        cond_z = let x=x_data, log_π=log.(π), σ_x=σ_x
+            c -> begin
+                dists = map(x) do xi
+                    logp = log_π .+ logpdf.(Normal.(c.μ, σ_x), xi)
+                    return Categorical(softmax!(logp))
+                end
+                return arraydist(dists)
+            end
+        end
+
+        # Conditional distribution ``μ | z, x``
+        # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
+        cond_μ = let K=K, x_data=x_data, inv_σ_μ2=inv(σ_μ^2), inv_σ_x2=inv(σ_x^2)
+            c -> begin
+                # Convert cluster assignments to one-hot encodings
+                z_onehot = c.z .== (1:K)'
+
+                # Count number of observations in each cluster
+                n = vec(sum(z_onehot; dims=1))
+
+                # Compute mean and variance of the conditional distribution
+                μ_var = @. inv(inv_σ_x2 * n + inv_σ_μ2)
+                μ_mean = (z_onehot' * x_data) .* inv_σ_x2 .* μ_var
+
+                return MvNormal(μ_mean, μ_var)
+            end
+        end
+
+        estimate(chain, var) = dropdims(mean(Array(group(chain, var)), dims=1), dims=1)
+        function estimatez(chain, var, range)
+            z = Int.(Array(group(chain, var)))
+            return map(i -> findmax(counts(z[:,i], range))[2], 1:size(z,2))
+        end
+
+        lμ_data, uμ_data = extrema(μ_data)
+
+        # Compare three Gibbs samplers
+        sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
+        sampler2 = Gibbs(GibbsConditional(:z, cond_z), MH(:μ))
+        sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ))
+        for sampler in (sampler1, sampler2, sampler3)
+            chain = sample(model, sampler, 10_000)
+
+            μ_hat = estimate(chain, :μ)
+            lμ_hat, uμ_hat = extrema(μ_hat)
+            @test isapprox([lμ_data, uμ_data], [lμ_hat, uμ_hat], atol=0.1)
+
+            z_hat = estimatez(chain, :z, 1:2)
+            ari, _, _, _ = randindex(z_data, Int.(z_hat))
+            @test isapprox(ari, 1, atol=0.1)
+        end
+    end
 end

From 2982c0eb9f302e4ca4cdb8861453f5fec7fcdb87 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Sat, 17 Jul 2021 00:32:05 +0100
Subject: [PATCH 06/13] dont test non-AD samplers for every AD backend

---
 test/inference/gibbs_conditional.jl |  4 ++--
 test/runtests.jl                    | 20 +++++++++++++++-----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/test/inference/gibbs_conditional.jl b/test/inference/gibbs_conditional.jl
index eaee4bb7f..d103000f0 100644
--- a/test/inference/gibbs_conditional.jl
+++ b/test/inference/gibbs_conditional.jl
@@ -128,10 +128,10 @@
         lμ_data, uμ_data = extrema(μ_data)
 
         # Compare three Gibbs samplers
-        sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
+        # sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
         sampler2 = Gibbs(GibbsConditional(:z, cond_z), MH(:μ))
         sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ))
-        for sampler in (sampler1, sampler2, sampler3)
+        for sampler in (sampler2, sampler3)
             chain = sample(model, sampler, 10_000)
 
             μ_hat = estimate(chain, :μ)
diff --git a/test/runtests.jl b/test/runtests.jl
index f9fa2cb70..0fa857dee 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -46,22 +46,27 @@ include("test_utils/AllUtils.jl")
         include("core/ad.jl")
     end
 
+    @testset "samplers (without AD)" begin
+        include("inference/AdvancedSMC.jl")
+        include("inference/emcee.jl")
+        include("inference/ess.jl")
+        include("inference/is.jl")
+    end
+
     Turing.setrdcache(false)
     for adbackend in (:forwarddiff, :tracker, :reversediff)
         Turing.setadbackend(adbackend)
+        @info "Testing $(adbackend)"
+        start = time()
         @testset "inference: $adbackend" begin
             @testset "samplers" begin
                 include("inference/gibbs.jl")
                 include("inference/gibbs_conditional.jl")
                 include("inference/hmc.jl")
-                include("inference/is.jl")
-                include("inference/mh.jl")
-                include("inference/ess.jl")
-                include("inference/emcee.jl")
-                include("inference/AdvancedSMC.jl")
                 include("inference/Inference.jl")
                 include("contrib/inference/dynamichmc.jl")
                 include("contrib/inference/sghmc.jl")
+                include("inference/mh.jl")
             end
         end
 
@@ -72,6 +77,11 @@ include("test_utils/AllUtils.jl")
         @testset "modes" begin
             include("modes/ModeEstimation.jl")
         end
+
+        # Useful for
+        # a) discovering performance regressions,
+        # b) figuring out why CI is timing out.
+        @info "Tests for $(adbackend) took $(time() - start) seconds"
     end
     @testset "variational optimisers" begin
         include("variational/optimisers.jl")

From 63c1ec01201c8ae712fac71a6bb566ab76ee0144 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Sat, 17 Jul 2021 01:18:41 +0100
Subject: [PATCH 07/13] added back a test

---
 test/inference/gibbs_conditional.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/inference/gibbs_conditional.jl b/test/inference/gibbs_conditional.jl
index d103000f0..078c29c47 100644
--- a/test/inference/gibbs_conditional.jl
+++ b/test/inference/gibbs_conditional.jl
@@ -128,7 +128,7 @@
         lμ_data, uμ_data = extrema(μ_data)
 
         # Compare three Gibbs samplers
-        # sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
+        sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
         sampler2 = Gibbs(GibbsConditional(:z, cond_z), MH(:μ))
         sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ))
         for sampler in (sampler2, sampler3)

From 0f3ec25340c28877591eedff3f46902813278bcc Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Sat, 17 Jul 2021 01:19:03 +0100
Subject: [PATCH 08/13] added back a test

---
 test/inference/gibbs_conditional.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/inference/gibbs_conditional.jl b/test/inference/gibbs_conditional.jl
index 078c29c47..eaee4bb7f 100644
--- a/test/inference/gibbs_conditional.jl
+++ b/test/inference/gibbs_conditional.jl
@@ -131,7 +131,7 @@
         sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
         sampler2 = Gibbs(GibbsConditional(:z, cond_z), MH(:μ))
         sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ))
-        for sampler in (sampler2, sampler3)
+        for sampler in (sampler1, sampler2, sampler3)
             chain = sample(model, sampler, 10_000)
 
             μ_hat = estimate(chain, :μ)

From 31018c7af98f20ff1c63e1ad64de745a3a1e37dc Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Sat, 17 Jul 2021 14:09:38 +0100
Subject: [PATCH 09/13] removed some redundant tests and fixed a typo

---
 test/inference/Inference.jl | 2 +-
 test/inference/gibbs.jl     | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/test/inference/Inference.jl b/test/inference/Inference.jl
index 52e2e1a76..6d6955664 100644
--- a/test/inference/Inference.jl
+++ b/test/inference/Inference.jl
@@ -1,4 +1,4 @@
-@testset "io.jl" begin
+@testset "inference.jl" begin
     # Only test threading if 1.3+.
     if VERSION > v"1.2"
         @testset "threaded sampling" begin
diff --git a/test/inference/gibbs.jl b/test/inference/gibbs.jl
index a9425b630..9077aecbe 100644
--- a/test/inference/gibbs.jl
+++ b/test/inference/gibbs.jl
@@ -50,19 +50,10 @@
         chain = sample(gdemo(1.5, 2.0), alg, 5_000)
         check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1)
 
-        setadsafe(true)
-
         Random.seed!(200)
         gibbs = Gibbs(PG(15, :z1, :z2, :z3, :z4), HMC(0.15, 3, :mu1, :mu2))
         chain = sample(MoGtest_default, gibbs, 5_000)
         check_MoGtest_default(chain, atol=0.15)
-
-        setadsafe(false)
-
-        Random.seed!(200)
-        gibbs = Gibbs(PG(15, :z1, :z2, :z3, :z4), ESS(:mu1), ESS(:mu2))
-        chain = sample(MoGtest_default, gibbs, 5_000)
-        check_MoGtest_default(chain, atol=0.1)
     end
 
     @turing_testset "transitions" begin

From fc87e420e20bcbe8b401fc5da4fdbe7884d04a33 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Sat, 17 Jul 2021 14:09:49 +0100
Subject: [PATCH 10/13] added macro timed_testset

---
 test/test_utils/staging.jl | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/test/test_utils/staging.jl b/test/test_utils/staging.jl
index 15d5853d0..9768c2962 100644
--- a/test/test_utils/staging.jl
+++ b/test/test_utils/staging.jl
@@ -39,7 +39,7 @@ end
 
 macro stage_testset(stage_string::String, args...)
     if do_test(stage_string)
-        return esc(:(@testset($(args...))))
+        return esc(:(@timed_testset($(args...))))
     end
 end
 
@@ -50,3 +50,13 @@ end
 macro turing_testset(args...)
     esc(:(@stage_testset "test" $(args...)))
 end
+
+macro timed_testset(name, args...)
+    expr = quote
+        start = time()
+        @testset($name, $(args...))
+        let name = $name
+            @info "$(name): $(time() - start)s"
+        end
+    end
+end

From 65e735ce1395ef77c7ad3a7d608cf42032db8619 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Sat, 17 Jul 2021 15:22:05 +0100
Subject: [PATCH 11/13] upper-bound Distributions.jl apparently fixes the
 test-freeze

---
 test/Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/Project.toml b/test/Project.toml
index efb9fa08c..822435eed 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -34,7 +34,7 @@ AdvancedPS = "0.2"
 AdvancedVI = "0.1"
 Clustering = "0.14"
 CmdStan = "6.0.8"
-Distributions = "0.23.8, 0.24, 0.25"
+Distributions = "0.23.8 - 0.25.10"
 DistributionsAD = "0.6.3"
 DynamicHMC = "2.1.6, 3.0"
 DynamicPPL = "0.12"

From fec5e1b439e76a439d72e813c39805102a01cf72 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Sat, 17 Jul 2021 15:25:41 +0100
Subject: [PATCH 12/13] hyphen compat specifies arent compatible with Julia 1.3

---
 test/Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/Project.toml b/test/Project.toml
index 822435eed..6d7ba0b91 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -34,7 +34,7 @@ AdvancedPS = "0.2"
 AdvancedVI = "0.1"
 Clustering = "0.14"
 CmdStan = "6.0.8"
-Distributions = "0.23.8 - 0.25.10"
+Distributions = "< 0.25.11"
 DistributionsAD = "0.6.3"
 DynamicHMC = "2.1.6, 3.0"
 DynamicPPL = "0.12"

From 604e3e23476192bdb69ebd1ad1542bd88a3df692 Mon Sep 17 00:00:00 2001
From: Tor Erlend Fjelde <tor.erlend95@gmail.com>
Date: Sat, 17 Jul 2021 15:55:53 +0100
Subject: [PATCH 13/13] removed the timed_testset stuff

---
 test/test_utils/staging.jl | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/test/test_utils/staging.jl b/test/test_utils/staging.jl
index 9768c2962..15d5853d0 100644
--- a/test/test_utils/staging.jl
+++ b/test/test_utils/staging.jl
@@ -39,7 +39,7 @@ end
 
 macro stage_testset(stage_string::String, args...)
     if do_test(stage_string)
-        return esc(:(@timed_testset($(args...))))
+        return esc(:(@testset($(args...))))
     end
 end
 
@@ -50,13 +50,3 @@ end
 macro turing_testset(args...)
     esc(:(@stage_testset "test" $(args...)))
 end
-
-macro timed_testset(name, args...)
-    expr = quote
-        start = time()
-        @testset($name, $(args...))
-        let name = $name
-            @info "$(name): $(time() - start)s"
-        end
-    end
-end