Merge pull request #2 from FluxML/ap/diffeqflux

DiffEqFlux Benchmarks
FluxML · Apr 7, 2021 · 2132a29 · 2132a29
2 parents 9615844 + 566f68e
commit 2132a29
Show file tree

Hide file tree

Showing 7 changed files with 561 additions and 37 deletions.
diff --git a/Manifest.toml b/Manifest.toml
diff --git a/Project.toml b/Project.toml
@@ -6,8 +6,16 @@ version = "0.1.0"
 [deps]
 BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+DiffEqFlux = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
 ObjectDetector = "3dfc1049-5314-49cf-8447-288dfd02f9fb"
+OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/src/FluxBench.jl b/src/FluxBench.jl
@@ -1,9 +1,12 @@
 module FluxBench
 
-using Flux, Metalhead, ObjectDetector
+using Flux, Metalhead, ObjectDetector, DiffEqFlux
+using OrdinaryDiffEq, StochasticDiffEq, Distributions
 using BenchmarkTools, TimerOutputs
 using HTTP, JSON, FileIO
 using Flux.CUDA
+using Statistics
+using Zygote
 # using Torch - If we want to compare progress
 
 const MODELS = (ResNet, DenseNet, GoogleNet, VGG19, SqueezeNet)
@@ -12,6 +15,7 @@ SUITE = BenchmarkGroup()
 
 include("benchmarkutils.jl")
 include("packages/objectdetector.jl")
+include("packages/diffeqflux.jl")
 include("bench.jl")
 
 results = run(SUITE, verbose = true)

diff --git a/src/bench.jl b/src/bench.jl
@@ -2,10 +2,6 @@
 
 group = addgroup!(SUITE, "Metalhead")
 
-function fw(m, ip)
-    CUDA.@sync m(ip)
-end
-
 function benchmark_cu(io, model, batchsize = 64)
   resnet = model
   ip = rand(Float32, 224, 224, 3, batchsize)
@@ -23,10 +19,6 @@ function benchmark_cu(io, model, batchsize = 64)
   # write(io, "\n\n")
 end
 
-function bw(m, ip)
-  gs = CUDA.@sync gradient((m, x) -> sum(m(x)), m, ip)
-end
-
 function benchmark_bw_cu(io, model, batchsize = 64)
   resnet = model
   ip = rand(Float32, 224, 224, 3, batchsize)
@@ -57,9 +49,22 @@ function bench()
     # end
   end
 
-  for model in [ObjectDetector.YOLO.v3_608_COCO, ObjectDetector.v3_tiny_416_COCO]
-    for batchsize in [1, 3]
-      objectdetector_add_yolo_fw(model=model, batchsize=batchsize)
-    end
+  # ObjectDetector
+  for model in [ObjectDetector.YOLO.v3_608_COCO, ObjectDetector.v3_tiny_416_COCO], batchsize in [1, 3]
+    objectdetector_add_yolo_fw(model = model, batchsize = batchsize)
+  end
+
+  # DiffEqFlux
+  ## NeuralODE
+  for tol in (1f-3, 1f-5, 1f-8), b in (4, 16, 64, 256)
+    diffeqflux_add_neuralode(tol, tol, tol > 1f-8 ? Tsit5() : Vern7(), b)
+  end
+  ## NeuralSDE
+  for b in (4, 16, 64), traj in (1, 10, 32)
+    diffeqflux_add_neuralsde(b, traj)
+  end
+  ## FFJORD
+  for b in (4, 16, 64, 256), ndims in (2, 4, 8)
+    diffeqflux_add_ffjord(b, ndims)
   end
 end
diff --git a/src/benchmarkutils.jl b/src/benchmarkutils.jl
@@ -39,3 +39,13 @@ function flatten(results, prefix = "")
     end
   end
 end
+
+# Do a forward pass
+function fw(m, ip)
+    CUDA.@sync m(ip)
+end
+
+# Do a forward + backward pass
+function bw(m, ip)
+  gs = CUDA.@sync gradient((m, x) -> sum(m(x)), m, ip)
+end
diff --git a/src/packages/diffeqflux.jl b/src/packages/diffeqflux.jl
@@ -0,0 +1,88 @@
+group = addgroup!(SUITE, "DiffEqFlux")
+
+function diffeqflux_add_neuralode(abstol = 1f-3, reltol = 1f-3, solver = Tsit5(), batchsize = 256)
+  down = Chain(flatten, Dense(784, 512, tanh))
+  nn = Chain(Dense(512, 256, tanh),
+             Dense(256, 256, tanh),
+             Dense(256, 512, tanh))
+  nn_ode = f -> NeuralODE(f, (0.f0, 1.f0), solver,
+                          save_everystep = false,
+                          reltol = reltol, abstol = abstol,
+                          save_start = false)
+  fc  = Chain(Dense(512, 10))
+
+  function diffeqarray_to_array(x)
+    xarr = gpu(x)
+    return reshape(xarr, size(xarr)[1:2])
+  end
+
+  ip = rand(Float32, 784, batchsize)
+
+  group["DiffEqFlux - Forward Pass - NeuralODE with abstol $abstol, reltol $reltol, batchsize $batchsize, and solver $solver"] = b = @benchmarkable(
+    fw(model, gip),
+    setup = (nn_gpu = $nn |> gpu; model = Chain($down, $nn_ode(nn_gpu), $diffeqarray_to_array, $fc); gip = $ip |> gpu),
+    teardown = (GC.gc(); CUDA.reclaim()))
+
+  group["DiffEqFlux - Backward Pass - NeuralODE with abstol $abstol, reltol $reltol, batchsize $batchsize, and solver $solver"] = b = @benchmarkable(
+    bw(model, gip),
+    setup = (nn_gpu = $nn |> gpu; model = Chain($down, $nn_ode(nn_gpu), $diffeqarray_to_array, $fc); gip = $ip |> gpu),
+    teardown = (GC.gc(); CUDA.reclaim()))
+end
+
+function diffeqflux_add_neuralsde(batchsize = 16, ntrajectories = 100)
+  diffusion = Chain(Dense(2, 8, tanh), Dense(8, 2))
+  drift = Chain(Dense(2, 32, tanh), Dense(32, 32, tanh), Dense(32, 2))
+  nn_sde = (f, g) -> NeuralDSDE(f, g, (0.0f0, 1.0f0), SOSRI(), abstol = 1f-1, reltol = 1f-1)
+
+  function sdesol_to_array(x)
+    xarr = gpu(x)
+    return reshape(mean(reshape(xarr, size(xarr, 1), ntrajectories, size(xarr, 2)), dims = 2), size(xarr))
+  end
+
+  ip = repeat(rand(Float32, 2, batchsize), inner = (1, ntrajectories))
+
+  group["DiffEqFlux - Forward Pass - NeuralSDE with batchsize $batchsize, and ntrajectories $ntrajectories"] = b = @benchmarkable(
+    fw(model, gip),
+    setup = (drift_gpu = $drift |> gpu; diffusion_gpu = $diffusion; model = Chain($nn_sde(drift_gpu, diffusion_gpu), $sdesol_to_array); gip = $ip |> gpu),
+    teardown = (GC.gc(); CUDA.reclaim()))
+
+  group["DiffEqFlux - Backward Pass - NeuralSDE with batchsize $batchsize, and ntrajectories $ntrajectories"] = b = @benchmarkable(
+    bw(model, gip),
+    setup = (drift_gpu = $drift |> gpu; diffusion_gpu = $diffusion; model = Chain($nn_sde(drift_gpu, diffusion_gpu), $sdesol_to_array); gip = $ip |> gpu),
+    teardown = (GC.gc(); CUDA.reclaim()))
+end
+
+function diffeqflux_add_ffjord(ndims = 2, batchsize = 256)
+  nn = Chain(Dense(ndims, ndims * 8, tanh), Dense(ndims * 8, ndims * 8, tanh), Dense(ndims * 8, ndims * 8, tanh), Dense(ndims * 8, ndims))
+  cnf_ffjord = f -> FFJORD(f, (0.0f0, 1.0f0), Tsit5(), monte_carlo = true)
+  ffjordsol_to_logpx(x) = -mean(x[1])[1]
+
+  ip = rand(Float32, ndims, batchsize)
+
+  nsamples = batchsize
+  function sample_from_learned_model(cnf_ffjord)
+    pz = cnf_ffjord.basedist
+    Z_samples = cu(rand(pz, nsamples))
+    ffjord_ = (u, p, t) -> DiffEqFlux.ffjord(u, p, t, cnf_ffjord.re, e, false, false)
+    e = cu(randn(eltype(X), size(Z_samples)))
+    _z = Zygote.@ignore similar(X, 1, size(Z_samples, 2))
+    Zygote.@ignore fill!(_z, 0.0f0)
+    prob = ODEProblem{false}(ffjord_, vcat(Z_samples, _z), (1.0, 0.0), cnf_ffjord.p)
+    x_gen = solve(prob, cnf_ffjord.args...; sensealg = InterpolatingAdjoint(), cnf_ffjord.kwargs...)[1:end-1, :, end]
+  end
+
+  group["DiffEqFlux - Forward Pass - FFJORD with batchsize $batchsize, and ndims $ndims"] = b = @benchmarkable(
+    fw(model, gip),
+    setup = (nn_gpu = $nn |> gpu; model = Chain($cnf_ffjord(nn_gpu), $ffjordsol_to_logpx); gip = $ip |> gpu),
+    teardown = (GC.gc(); CUDA.reclaim()))
+
+  group["DiffEqFlux - Backward Pass - FFJORD with batchsize $batchsize, and ndims $ndims"] = b = @benchmarkable(
+    bw(model, gip),
+    setup = (nn_gpu = $nn |> gpu; model = Chain($cnf_ffjord(nn_gpu), $ffjordsol_to_logpx); gip = $ip |> gpu),
+    teardown = (GC.gc(); CUDA.reclaim()))
+
+  group["DiffEqFlux - Sampling - FFJORD with nsamples $nsamples, and ndims $ndims"] = b = @benchmarkable(
+    fw(sampler, model),
+    setup = (nn_gpu = $nn |> gpu; model = $cnf_ffjord(nn_gpu); sampler = $sample_from_learned_model),
+    teardown = (GC.gc(); CUDA.reclaim()))
+end
diff --git a/src/packages/objectdetector.jl b/src/packages/objectdetector.jl
@@ -12,4 +12,4 @@ function objectdetector_add_yolo_fw(model = YOLO.v3_608_COCO, batchsize = 1)
   group["ObjectDetector - $model with batchsize $batchsize"] = b = @benchmarkable(
     yolomod(batch, detectThresh=0.5, overlapThresh=0.8),
     teardown=(GC.gc(); CUDA.reclaim()))
-end
+end