Skip to content

Commit

Permalink
Merge pull request #2 from FluxML/ap/diffeqflux
Browse files Browse the repository at this point in the history
DiffEqFlux Benchmarks
  • Loading branch information
DhairyaLGandhi authored Apr 7, 2021
2 parents 9615844 + 566f68e commit 2132a29
Show file tree
Hide file tree
Showing 7 changed files with 561 additions and 37 deletions.
455 changes: 432 additions & 23 deletions Manifest.toml

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@ version = "0.1.0"
[deps]
BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
DiffEqFlux = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
ObjectDetector = "3dfc1049-5314-49cf-8447-288dfd02f9fb"
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0"
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
6 changes: 5 additions & 1 deletion src/FluxBench.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
module FluxBench

using Flux, Metalhead, ObjectDetector
using Flux, Metalhead, ObjectDetector, DiffEqFlux
using OrdinaryDiffEq, StochasticDiffEq, Distributions
using BenchmarkTools, TimerOutputs
using HTTP, JSON, FileIO
using Flux.CUDA
using Statistics
using Zygote
# using Torch - If we want to compare progress

const MODELS = (ResNet, DenseNet, GoogleNet, VGG19, SqueezeNet)
Expand All @@ -12,6 +15,7 @@ SUITE = BenchmarkGroup()

include("benchmarkutils.jl")
include("packages/objectdetector.jl")
include("packages/diffeqflux.jl")
include("bench.jl")

results = run(SUITE, verbose = true)
Expand Down
29 changes: 17 additions & 12 deletions src/bench.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@

group = addgroup!(SUITE, "Metalhead")

function fw(m, ip)
CUDA.@sync m(ip)
end

function benchmark_cu(io, model, batchsize = 64)
resnet = model
ip = rand(Float32, 224, 224, 3, batchsize)
Expand All @@ -23,10 +19,6 @@ function benchmark_cu(io, model, batchsize = 64)
# write(io, "\n\n")
end

function bw(m, ip)
gs = CUDA.@sync gradient((m, x) -> sum(m(x)), m, ip)
end

function benchmark_bw_cu(io, model, batchsize = 64)
resnet = model
ip = rand(Float32, 224, 224, 3, batchsize)
Expand Down Expand Up @@ -57,9 +49,22 @@ function bench()
# end
end

for model in [ObjectDetector.YOLO.v3_608_COCO, ObjectDetector.v3_tiny_416_COCO]
for batchsize in [1, 3]
objectdetector_add_yolo_fw(model=model, batchsize=batchsize)
end
# ObjectDetector
for model in [ObjectDetector.YOLO.v3_608_COCO, ObjectDetector.v3_tiny_416_COCO], batchsize in [1, 3]
objectdetector_add_yolo_fw(model = model, batchsize = batchsize)
end

# DiffEqFlux
## NeuralODE
for tol in (1f-3, 1f-5, 1f-8), b in (4, 16, 64, 256)
diffeqflux_add_neuralode(tol, tol, tol > 1f-8 ? Tsit5() : Vern7(), b)
end
## NeuralSDE
for b in (4, 16, 64), traj in (1, 10, 32)
diffeqflux_add_neuralsde(b, traj)
end
## FFJORD
for b in (4, 16, 64, 256), ndims in (2, 4, 8)
diffeqflux_add_ffjord(b, ndims)
end
end
10 changes: 10 additions & 0 deletions src/benchmarkutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,13 @@ function flatten(results, prefix = "")
end
end
end

# Do a forward pass
function fw(m, ip)
CUDA.@sync m(ip)
end

# Do a forward + backward pass
function bw(m, ip)
gs = CUDA.@sync gradient((m, x) -> sum(m(x)), m, ip)
end
88 changes: 88 additions & 0 deletions src/packages/diffeqflux.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
group = addgroup!(SUITE, "DiffEqFlux")

function diffeqflux_add_neuralode(abstol = 1f-3, reltol = 1f-3, solver = Tsit5(), batchsize = 256)
down = Chain(flatten, Dense(784, 512, tanh))
nn = Chain(Dense(512, 256, tanh),
Dense(256, 256, tanh),
Dense(256, 512, tanh))
nn_ode = f -> NeuralODE(f, (0.f0, 1.f0), solver,
save_everystep = false,
reltol = reltol, abstol = abstol,
save_start = false)
fc = Chain(Dense(512, 10))

function diffeqarray_to_array(x)
xarr = gpu(x)
return reshape(xarr, size(xarr)[1:2])
end

ip = rand(Float32, 784, batchsize)

group["DiffEqFlux - Forward Pass - NeuralODE with abstol $abstol, reltol $reltol, batchsize $batchsize, and solver $solver"] = b = @benchmarkable(
fw(model, gip),
setup = (nn_gpu = $nn |> gpu; model = Chain($down, $nn_ode(nn_gpu), $diffeqarray_to_array, $fc); gip = $ip |> gpu),
teardown = (GC.gc(); CUDA.reclaim()))

group["DiffEqFlux - Backward Pass - NeuralODE with abstol $abstol, reltol $reltol, batchsize $batchsize, and solver $solver"] = b = @benchmarkable(
bw(model, gip),
setup = (nn_gpu = $nn |> gpu; model = Chain($down, $nn_ode(nn_gpu), $diffeqarray_to_array, $fc); gip = $ip |> gpu),
teardown = (GC.gc(); CUDA.reclaim()))
end

function diffeqflux_add_neuralsde(batchsize = 16, ntrajectories = 100)
diffusion = Chain(Dense(2, 8, tanh), Dense(8, 2))
drift = Chain(Dense(2, 32, tanh), Dense(32, 32, tanh), Dense(32, 2))
nn_sde = (f, g) -> NeuralDSDE(f, g, (0.0f0, 1.0f0), SOSRI(), abstol = 1f-1, reltol = 1f-1)

function sdesol_to_array(x)
xarr = gpu(x)
return reshape(mean(reshape(xarr, size(xarr, 1), ntrajectories, size(xarr, 2)), dims = 2), size(xarr))
end

ip = repeat(rand(Float32, 2, batchsize), inner = (1, ntrajectories))

group["DiffEqFlux - Forward Pass - NeuralSDE with batchsize $batchsize, and ntrajectories $ntrajectories"] = b = @benchmarkable(
fw(model, gip),
setup = (drift_gpu = $drift |> gpu; diffusion_gpu = $diffusion; model = Chain($nn_sde(drift_gpu, diffusion_gpu), $sdesol_to_array); gip = $ip |> gpu),
teardown = (GC.gc(); CUDA.reclaim()))

group["DiffEqFlux - Backward Pass - NeuralSDE with batchsize $batchsize, and ntrajectories $ntrajectories"] = b = @benchmarkable(
bw(model, gip),
setup = (drift_gpu = $drift |> gpu; diffusion_gpu = $diffusion; model = Chain($nn_sde(drift_gpu, diffusion_gpu), $sdesol_to_array); gip = $ip |> gpu),
teardown = (GC.gc(); CUDA.reclaim()))
end

function diffeqflux_add_ffjord(ndims = 2, batchsize = 256)
nn = Chain(Dense(ndims, ndims * 8, tanh), Dense(ndims * 8, ndims * 8, tanh), Dense(ndims * 8, ndims * 8, tanh), Dense(ndims * 8, ndims))
cnf_ffjord = f -> FFJORD(f, (0.0f0, 1.0f0), Tsit5(), monte_carlo = true)
ffjordsol_to_logpx(x) = -mean(x[1])[1]

ip = rand(Float32, ndims, batchsize)

nsamples = batchsize
function sample_from_learned_model(cnf_ffjord)
pz = cnf_ffjord.basedist
Z_samples = cu(rand(pz, nsamples))
ffjord_ = (u, p, t) -> DiffEqFlux.ffjord(u, p, t, cnf_ffjord.re, e, false, false)
e = cu(randn(eltype(X), size(Z_samples)))
_z = Zygote.@ignore similar(X, 1, size(Z_samples, 2))
Zygote.@ignore fill!(_z, 0.0f0)
prob = ODEProblem{false}(ffjord_, vcat(Z_samples, _z), (1.0, 0.0), cnf_ffjord.p)
x_gen = solve(prob, cnf_ffjord.args...; sensealg = InterpolatingAdjoint(), cnf_ffjord.kwargs...)[1:end-1, :, end]
end

group["DiffEqFlux - Forward Pass - FFJORD with batchsize $batchsize, and ndims $ndims"] = b = @benchmarkable(
fw(model, gip),
setup = (nn_gpu = $nn |> gpu; model = Chain($cnf_ffjord(nn_gpu), $ffjordsol_to_logpx); gip = $ip |> gpu),
teardown = (GC.gc(); CUDA.reclaim()))

group["DiffEqFlux - Backward Pass - FFJORD with batchsize $batchsize, and ndims $ndims"] = b = @benchmarkable(
bw(model, gip),
setup = (nn_gpu = $nn |> gpu; model = Chain($cnf_ffjord(nn_gpu), $ffjordsol_to_logpx); gip = $ip |> gpu),
teardown = (GC.gc(); CUDA.reclaim()))

group["DiffEqFlux - Sampling - FFJORD with nsamples $nsamples, and ndims $ndims"] = b = @benchmarkable(
fw(sampler, model),
setup = (nn_gpu = $nn |> gpu; model = $cnf_ffjord(nn_gpu); sampler = $sample_from_learned_model),
teardown = (GC.gc(); CUDA.reclaim()))
end
2 changes: 1 addition & 1 deletion src/packages/objectdetector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ function objectdetector_add_yolo_fw(model = YOLO.v3_608_COCO, batchsize = 1)
group["ObjectDetector - $model with batchsize $batchsize"] = b = @benchmarkable(
yolomod(batch, detectThresh=0.5, overlapThresh=0.8),
teardown=(GC.gc(); CUDA.reclaim()))
end
end

0 comments on commit 2132a29

Please sign in to comment.