From 4860245c80f77476f5c93d10a5a72af19899798a Mon Sep 17 00:00:00 2001 From: skyleaworlder <870033938@qq.com> Date: Wed, 19 Jul 2023 06:53:19 +0000 Subject: [PATCH] refact: split nnlib benchmarks --- benchmark/benchmark/flux.jl | 2 +- benchmark/benchmark/nnlib.jl | 261 +---------------------- benchmark/benchmark/nnlib/activations.jl | 12 ++ benchmark/benchmark/nnlib/attention.jl | 26 +++ benchmark/benchmark/nnlib/conv.jl | 58 +++++ benchmark/benchmark/nnlib/dropout.jl | 20 ++ benchmark/benchmark/nnlib/gemm.jl | 26 +++ benchmark/benchmark/nnlib/pooling.jl | 38 ++++ benchmark/benchmark/nnlib/softmax.jl | 23 ++ benchmark/benchmark/nnlib/upsample.jl | 35 +++ 10 files changed, 247 insertions(+), 254 deletions(-) create mode 100644 benchmark/benchmark/nnlib/activations.jl create mode 100644 benchmark/benchmark/nnlib/attention.jl create mode 100644 benchmark/benchmark/nnlib/conv.jl create mode 100644 benchmark/benchmark/nnlib/dropout.jl create mode 100644 benchmark/benchmark/nnlib/gemm.jl create mode 100644 benchmark/benchmark/nnlib/pooling.jl create mode 100644 benchmark/benchmark/nnlib/softmax.jl create mode 100644 benchmark/benchmark/nnlib/upsample.jl diff --git a/benchmark/benchmark/flux.jl b/benchmark/benchmark/flux.jl index 757d941..ab2da5f 100644 --- a/benchmark/benchmark/flux.jl +++ b/benchmark/benchmark/flux.jl @@ -2,4 +2,4 @@ using Flux SUITE["flux"] = BenchmarkGroup() -register_benchmark("FLUXML_BENCHMARK_FLUX_MLP", "benchmark/flux/mlp.jl") +register_benchmark("FLUXML_BENCHMARK_FLUX_MLP", "flux/mlp.jl") diff --git a/benchmark/benchmark/nnlib.jl b/benchmark/benchmark/nnlib.jl index 2fa63b5..f4f2548 100644 --- a/benchmark/benchmark/nnlib.jl +++ b/benchmark/benchmark/nnlib.jl @@ -1,258 +1,13 @@ using NNlib using NNlib.ChainRulesCore: rrule -using Random SUITE["nnlib"] = BenchmarkGroup() -########## activations ############ -SUITE["nnlib"]["activations"] = BenchmarkGroup() -for et in (Float64, Float32, Float16,) - et_suite = BenchmarkGroup() - SUITE["nnlib"]["activations"][string(et)] = et_suite - let x = rand(et, 1024, 1024), y = similar(x) - for f in NNlib.ACTIVATIONS - act = @eval($f) - et_suite[string(f)] = @benchmarkable broadcast!($act, $y, $x) - end - end -end - - -########## softmax ############ -SUITE["nnlib"]["softmax"] = BenchmarkGroup() -for (fn!, fn_bw) in [(softmax!, NNlib.∇softmax_data), (logsoftmax!, NNlib.∇logsoftmax_data)] - fn_suite = BenchmarkGroup() - SUITE["nnlib"]["softmax"][rstrip(string(fn!), '!')] = fn_suite - let SIZES = [ - (12288, 2048, 1), (4096, 4096, 2), (4096, 2048, 2), (2048, 2048, 2), - (1024, 2048, 4), (768, 1024, 4), (512, 784, 8), (128, 384, 8), - ] - for et in (Float32, Float16,) - et_suite = BenchmarkGroup("fw" => BenchmarkGroup(), "bw" => BenchmarkGroup()) - fn_suite[string(et)] = et_suite - for sz in SIZES - x = randn(et, sz) - y = similar(x) - dy = zero(x) - fn!(y, x) - et_suite["fw"][string(sz)] = @benchmarkable $fn!($y, $x) - et_suite["bw"][string(sz)] = @benchmarkable $fn_bw($dy, $y) - end - end - end -end - - -########## conv ############ -SUITE["nnlib"]["conv"] = BenchmarkGroup() -for rank in (3, 2, 1,), N in (512, 256,), K in (3,), - C_in in (1,), C_out in (1,), - stride in (1,), dilation in (1,), padding in (2, 0,) - - size_suite = BenchmarkGroup() - SUITE["nnlib"]["conv"][ - "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" - ] = size_suite - - conv_items = [ - (NNlib.conv_direct!, NNlib.∇conv_data_direct!, NNlib.∇conv_filter_direct!, DenseConvDims, "direct"), - (NNlib.conv_im2col!, NNlib.∇conv_data_im2col!, NNlib.∇conv_filter_im2col!, DenseConvDims, "im2col"), - (NNlib.depthwiseconv_direct!, NNlib.∇depthwiseconv_data_direct!, NNlib.∇depthwiseconv_filter_direct!, DepthwiseConvDims, "direct"), - (NNlib.depthwiseconv_im2col!, NNlib.∇depthwiseconv_data_im2col!, NNlib.∇depthwiseconv_filter_im2col!, DepthwiseConvDims, "im2col"), - ] - - for (conv!, ∇conv_data!, ∇conv_filter!, cdimT, _) in conv_items - conv_suite = BenchmarkGroup() - SUITE["nnlib"]["conv"][ - "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" - ][rstrip(string(conv!), '!')] = conv_suite - - for et in (Float32, Float64) - et_suite = BenchmarkGroup() - SUITE["nnlib"]["conv"][ - "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" - ][rstrip(string(conv!), '!')][string(et)] = et_suite - - x = zeros(et, repeat([N], rank)..., C_in, 1) - w = (cdimT == DenseConvDims) ? - zeros(et, repeat([K], rank)..., C_in, C_out) : - zeros(et, repeat([K], rank)..., C_out, C_in) - - cdims = try - cdimT(x, w; stride = stride, dilation = dilation, padding = padding) - catch - continue - end - - y = (cdimT == DenseConvDims) ? - zeros(et, NNlib.output_size(cdims)..., C_out, 1) : - zeros(et, NNlib.output_size(cdims)..., C_out*C_in, 1) - - dx, dy, dw = similar(x), similar(y), similar(w) - SUITE["nnlib"]["conv"][ - "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" - ][rstrip(string(conv!), '!')][string(et)]["conv"] = @benchmarkable $(conv!)($y, $x, $w, $cdims) - SUITE["nnlib"]["conv"][ - "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" - ][rstrip(string(conv!), '!')][string(et)]["data"] = @benchmarkable $(∇conv_data!)($dx, $y, $w, $cdims) - SUITE["nnlib"]["conv"][ - "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" - ][rstrip(string(conv!), '!')][string(et)]["filter"] = @benchmarkable $(∇conv_filter!)($dw, $x, $y, $cdims) - end - end -end - - -########## pooling ############ -SUITE["nnlib"]["pooling"] = BenchmarkGroup() -for rank in (3, 2, 1,), N in (512, 256,), K in (4, 2,), stride in (4, 2, 1,) - size_suite = BenchmarkGroup() - SUITE["nnlib"]["pooling"]["$(rank+2)-N($N)-K($K)-stride($stride)"] = size_suite - - x = zeros(Float32, repeat([N], rank)..., 1, 1) - pdims = PoolDims(x, K; stride = stride) - y = zeros(Float32, NNlib.output_size(pdims)..., 1, 1) - dx, dy = similar(x), similar(y) - - pooling_items = [ - (NNlib.maxpool!, NNlib.∇maxpool!, "maxpool"), - (NNlib.meanpool!, NNlib.∇meanpool!, "meanpool"), - (NNlib.lpnormpool!, NNlib.∇lpnormpool!, "lpnormpool"), - ] - - for (pool, ∇pool, name) in pooling_items - pooling_suite = BenchmarkGroup() - SUITE["nnlib"]["pooling"][ - "$(rank+2)-N($N)-K($K)-stride($stride)" - ]["$(name)$(rank)d-direct"] = pooling_suite - SUITE["nnlib"]["pooling"][ - "$(rank+2)-N($N)-K($K)-stride($stride)" - ]["$(name)$(rank)d-direct"]["pool"] = @benchmarkable $pool( - $y, $x, $pdims; p = ($name == "lpnormpool") ? 2 : nothing) - SUITE["nnlib"]["pooling"][ - "$(rank+2)-N($N)-K($K)-stride($stride)" - ]["$(name)$(rank)d-direct"]["data"] = @benchmarkable $(∇pool)( - $dx, $dy, $y, $x, $pdims; p = ($name == "lpnormpool") ? 2 : nothing) - end - - if NNlib.is_nnpack_available() && NNlib.nnpack_supported_operation(pdims) - SUITE["nnlib"]["pooling"][ - "$(rank+2)-N($N)-K($K)-stride($stride)" - ]["maxpool$(rank)d-nnpack"]["pool"] = @benchmarkable NNlib.maxpool_nnpack!($y, $x, $pdims) - end -end - - -########## dropout ############ -SUITE["nnlib"]["dropout"] = BenchmarkGroup() -for rank in (1, 2, 3,), N in (128, 512, 1024,) - size_suite = BenchmarkGroup() - SUITE["nnlib"]["dropout"]["$(rank+2)-N($N)"] = size_suite - - x = ones(Float32, repeat([N], rank)..., 1, 1) - y = zeros(Float32, repeat([N], rank)..., 1, 1) - p = 0.2 - - dropout_suite = BenchmarkGroup() - dropout_suite["with-colon"] = @benchmarkable dropout($x, $p) - dropout_suite["with-dim"] = @benchmarkable dropout($x, $p; dims = 1) - SUITE["nnlib"]["dropout"]["$(rank+2)-N($N)"]["dropout"] = dropout_suite - - dropout!_suite = BenchmarkGroup() - dropout!_suite["with-colon"] = @benchmarkable dropout!($y, $x, $p) - dropout!_suite["with-dim"] = @benchmarkable dropout!($y, $x, $p; dims = 1) - SUITE["nnlib"]["dropout"]["$(rank+2)-N($N)"]["dropout!"] = dropout!_suite -end - - -########## upsample ############ -SUITE["nnlib"]["upsample"] = BenchmarkGroup() -SUITE["nnlib"]["upsample"]["linear"] = BenchmarkGroup() -for rank in (3, 2, 1,), et in (Float32, Float16,) - et_suite = BenchmarkGroup("fw" => BenchmarkGroup(), "bw" => BenchmarkGroup()) - SUITE["nnlib"]["upsample"]["linear"][string(et)] = et_suite - - inputs_sizes = [ - (1024, (0.5, 2), false), (256, 8, false), - (256, 4, true), (128, (1, 2), false), (128, 2, true), - ] - for (sz, scale, ac) in inputs_sizes - x = ones(et, repeat([sz], rank)..., 1, 1) - et_suite["fw"][ - "$(rank+2)-N($sz)-scale($scale)" - ] = @benchmarkable upsample_linear($x, $scale; align_corners = $ac) - et_suite["bw"][ - "$(rank+2)-N($sz)-scale($scale)" - ] = @benchmarkable ∇upsample_linear($x; - size = (typeof($scale) <: Tuple) ? - floor.(Integer, $sz .* $scale) : - ntuple(_ -> floor(Integer, $sz * $scale), $rank), - align_corners = $ac) - end -end - -SUITE["nnlib"]["upsample"]["nearest"] = BenchmarkGroup() -for rank in (3, 2, 1,), N in (1024, 512, 128,) - et_suite = BenchmarkGroup() - for et in (Float64, Float32, Float16,) - x = zeros(Float32, repeat([N], rank)..., 1, 1) - et_suite[string(et)] = @benchmarkable upsample_nearest($x; size = (repeat([$N * 10], $rank)..., 1, 1)) - end - SUITE["nnlib"]["upsample"]["nearest"]["$(rank+2)-N($N)"] = et_suite -end - - -########## gemm ############ -SUITE["nnlib"]["gemm"] = BenchmarkGroup() -for et in (Float32, Float64) - et_suite = BenchmarkGroup( - "gemm!" => BenchmarkGroup(), - "batched_gemm!" => BenchmarkGroup()) - SUITE["nnlib"]["gemm"][string(et)] = et_suite - - # transA and transB are not of the main varaints. - # gemm! meets some memory problem, not included here. - input_items = [ - (Val(false), Val(false), 'N', 'N', 1024, 1024, 1024, et(0.5), et(0.0)), - (Val(false), Val(false), 'N', 'N', 512, 512, 128, et(0.5), et(1.0)), - (Val(false), Val(false), 'N', 'N', 80, 40, 100, et(1.0), et(0.0)), - ] - for (transA, transB, transA_ch, transB_ch, M, N, K, alpha, beta) in input_items - bA = ones(et, M, N, 1) - bB = ones(et, N, K, 1) - bC = zeros(et, M, K, 1) - et_suite["batched_gemm!"][ - "trans($transA_ch,$transB_ch)-M($M)-N($N)-K($K)-alpha($alpha)-beta($beta)" - ] = @benchmarkable NNlib.batched_gemm!( - $transA_ch, $transB_ch, - $alpha, $bA, $bB, $beta, $bC) - end -end - - -########## attention ############ -SUITE["nnlib"]["attention"] = BenchmarkGroup() -for et in (Float16, Float64) - et_suite = BenchmarkGroup( - "attention" => BenchmarkGroup(), "score" => BenchmarkGroup()) - SUITE["nnlib"]["attention"][string(et)] = et_suite - - input_items = [ - ((16,128,8), (16,512,8), (32,512,8), (512,128), 4), - ((64,64,16), (64,64,16), (64,64,16), (64,64), 4), - ((8,6,1), (8,10,1), (4,10,1), nothing, 1), - ] - for (q_sz, k_sz, v_sz, bias_sz, nheads) in input_items - q, q_score = rand(et, q_sz...), rand(et, 8, q_sz...) - k, k_score = rand(et, k_sz...), rand(et, 8, k_sz...) - v = rand(et, v_sz...) - bias = isnothing(bias_sz) ? nothing : rand(et, bias_sz...) - mask = isnothing(bias_sz) ? nothing : rand(Bool, bias_sz...) - et_suite["attention"][ - "q($q_sz)-k($k_sz)-v($v_sz)-bias($bias_sz)-nheads($nheads)" - ] = @benchmarkable dot_product_attention($q, $k, $v, $bias; nheads = $nheads) - et_suite["score"][ - "q(8, $q_sz)-k(8, $k_sz)-bias($bias_sz)-nheads($nheads)" - ] = @benchmarkable dot_product_attention_scores($q_score, $k_score, $bias; mask = $mask) - end -end +register_benchmark("FLUXML_BENCHMARK_NNLIB_ACTIVATIONS", "nnlib/activations.jl") +register_benchmark("FLUXML_BENCHMARK_NNLIB_SOFTMAX", "nnlib/softmax.jl") +register_benchmark("FLUXML_BENCHMARK_NNLIB_CONV", "nnlib/conv.jl") +register_benchmark("FLUXML_BENCHMARK_NNLIB_POOLING", "nnlib/pooling.jl") +register_benchmark("FLUXML_BENCHMARK_NNLIB_DROPOUT", "nnlib/dropout.jl") +register_benchmark("FLUXML_BENCHMARK_NNLIB_UPSAMPLE", "nnlib/upsample.jl") +register_benchmark("FLUXML_BENCHMARK_NNLIB_GEMM", "nnlib/gemm.jl") +register_benchmark("FLUXML_BENCHMARK_NNLIB_ATTENTION", "nnlib/attention.jl") diff --git a/benchmark/benchmark/nnlib/activations.jl b/benchmark/benchmark/nnlib/activations.jl new file mode 100644 index 0000000..5133e86 --- /dev/null +++ b/benchmark/benchmark/nnlib/activations.jl @@ -0,0 +1,12 @@ +########## activations ############ +SUITE["nnlib"]["activations"] = BenchmarkGroup() +for et in (Float64, Float32, Float16,) + et_suite = BenchmarkGroup() + SUITE["nnlib"]["activations"][string(et)] = et_suite + let x = rand(et, 1024, 1024), y = similar(x) + for f in NNlib.ACTIVATIONS + act = @eval($f) + et_suite[string(f)] = @benchmarkable broadcast!($act, $y, $x) + end + end +end diff --git a/benchmark/benchmark/nnlib/attention.jl b/benchmark/benchmark/nnlib/attention.jl new file mode 100644 index 0000000..50a00e0 --- /dev/null +++ b/benchmark/benchmark/nnlib/attention.jl @@ -0,0 +1,26 @@ +########## attention ############ +SUITE["nnlib"]["attention"] = BenchmarkGroup() +for et in (Float16, Float64) + et_suite = BenchmarkGroup( + "attention" => BenchmarkGroup(), "score" => BenchmarkGroup()) + SUITE["nnlib"]["attention"][string(et)] = et_suite + + input_items = [ + ((16,128,8), (16,512,8), (32,512,8), (512,128), 4), + ((64,64,16), (64,64,16), (64,64,16), (64,64), 4), + ((8,6,1), (8,10,1), (4,10,1), nothing, 1), + ] + for (q_sz, k_sz, v_sz, bias_sz, nheads) in input_items + q, q_score = rand(et, q_sz...), rand(et, 8, q_sz...) + k, k_score = rand(et, k_sz...), rand(et, 8, k_sz...) + v = rand(et, v_sz...) + bias = isnothing(bias_sz) ? nothing : rand(et, bias_sz...) + mask = isnothing(bias_sz) ? nothing : rand(Bool, bias_sz...) + et_suite["attention"][ + "q($q_sz)-k($k_sz)-v($v_sz)-bias($bias_sz)-nheads($nheads)" + ] = @benchmarkable dot_product_attention($q, $k, $v, $bias; nheads = $nheads) + et_suite["score"][ + "q(8, $q_sz)-k(8, $k_sz)-bias($bias_sz)-nheads($nheads)" + ] = @benchmarkable dot_product_attention_scores($q_score, $k_score, $bias; mask = $mask) + end +end diff --git a/benchmark/benchmark/nnlib/conv.jl b/benchmark/benchmark/nnlib/conv.jl new file mode 100644 index 0000000..9b571f4 --- /dev/null +++ b/benchmark/benchmark/nnlib/conv.jl @@ -0,0 +1,58 @@ +########## conv ############ +SUITE["nnlib"]["conv"] = BenchmarkGroup() +for rank in (3, 2, 1,), N in (512, 256,), K in (3,), + C_in in (1,), C_out in (1,), + stride in (1,), dilation in (1,), padding in (2, 0,) + + size_suite = BenchmarkGroup() + SUITE["nnlib"]["conv"][ + "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" + ] = size_suite + + conv_items = [ + (NNlib.conv_direct!, NNlib.∇conv_data_direct!, NNlib.∇conv_filter_direct!, DenseConvDims, "direct"), + (NNlib.conv_im2col!, NNlib.∇conv_data_im2col!, NNlib.∇conv_filter_im2col!, DenseConvDims, "im2col"), + (NNlib.depthwiseconv_direct!, NNlib.∇depthwiseconv_data_direct!, NNlib.∇depthwiseconv_filter_direct!, DepthwiseConvDims, "direct"), + (NNlib.depthwiseconv_im2col!, NNlib.∇depthwiseconv_data_im2col!, NNlib.∇depthwiseconv_filter_im2col!, DepthwiseConvDims, "im2col"), + ] + + for (conv!, ∇conv_data!, ∇conv_filter!, cdimT, _) in conv_items + conv_suite = BenchmarkGroup() + SUITE["nnlib"]["conv"][ + "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" + ][rstrip(string(conv!), '!')] = conv_suite + + for et in (Float32, Float64) + et_suite = BenchmarkGroup() + SUITE["nnlib"]["conv"][ + "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" + ][rstrip(string(conv!), '!')][string(et)] = et_suite + + x = zeros(et, repeat([N], rank)..., C_in, 1) + w = (cdimT == DenseConvDims) ? + zeros(et, repeat([K], rank)..., C_in, C_out) : + zeros(et, repeat([K], rank)..., C_out, C_in) + + cdims = try + cdimT(x, w; stride = stride, dilation = dilation, padding = padding) + catch + continue + end + + y = (cdimT == DenseConvDims) ? + zeros(et, NNlib.output_size(cdims)..., C_out, 1) : + zeros(et, NNlib.output_size(cdims)..., C_out*C_in, 1) + + dx, dy, dw = similar(x), similar(y), similar(w) + SUITE["nnlib"]["conv"][ + "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" + ][rstrip(string(conv!), '!')][string(et)]["conv"] = @benchmarkable $(conv!)($y, $x, $w, $cdims) + SUITE["nnlib"]["conv"][ + "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" + ][rstrip(string(conv!), '!')][string(et)]["data"] = @benchmarkable $(∇conv_data!)($dx, $y, $w, $cdims) + SUITE["nnlib"]["conv"][ + "$(rank+2)-N($N)-K($K)-in($C_in)-out($C_out)-stride($stride)-dilation($dilation)-padding($padding)" + ][rstrip(string(conv!), '!')][string(et)]["filter"] = @benchmarkable $(∇conv_filter!)($dw, $x, $y, $cdims) + end + end +end diff --git a/benchmark/benchmark/nnlib/dropout.jl b/benchmark/benchmark/nnlib/dropout.jl new file mode 100644 index 0000000..90dd838 --- /dev/null +++ b/benchmark/benchmark/nnlib/dropout.jl @@ -0,0 +1,20 @@ +########## dropout ############ +SUITE["nnlib"]["dropout"] = BenchmarkGroup() +for rank in (1, 2, 3,), N in (128, 512, 1024,) + size_suite = BenchmarkGroup() + SUITE["nnlib"]["dropout"]["$(rank+2)-N($N)"] = size_suite + + x = ones(Float32, repeat([N], rank)..., 1, 1) + y = zeros(Float32, repeat([N], rank)..., 1, 1) + p = 0.2 + + dropout_suite = BenchmarkGroup() + dropout_suite["with-colon"] = @benchmarkable dropout($x, $p) + dropout_suite["with-dim"] = @benchmarkable dropout($x, $p; dims = 1) + SUITE["nnlib"]["dropout"]["$(rank+2)-N($N)"]["dropout"] = dropout_suite + + dropout!_suite = BenchmarkGroup() + dropout!_suite["with-colon"] = @benchmarkable dropout!($y, $x, $p) + dropout!_suite["with-dim"] = @benchmarkable dropout!($y, $x, $p; dims = 1) + SUITE["nnlib"]["dropout"]["$(rank+2)-N($N)"]["dropout!"] = dropout!_suite +end diff --git a/benchmark/benchmark/nnlib/gemm.jl b/benchmark/benchmark/nnlib/gemm.jl new file mode 100644 index 0000000..9741902 --- /dev/null +++ b/benchmark/benchmark/nnlib/gemm.jl @@ -0,0 +1,26 @@ +########## gemm ############ +SUITE["nnlib"]["gemm"] = BenchmarkGroup() +for et in (Float32, Float64) + et_suite = BenchmarkGroup( + "gemm!" => BenchmarkGroup(), + "batched_gemm!" => BenchmarkGroup()) + SUITE["nnlib"]["gemm"][string(et)] = et_suite + + # transA and transB are not of the main varaints. + # gemm! meets some memory problem, not included here. + input_items = [ + (Val(false), Val(false), 'N', 'N', 1024, 1024, 1024, et(0.5), et(0.0)), + (Val(false), Val(false), 'N', 'N', 512, 512, 128, et(0.5), et(1.0)), + (Val(false), Val(false), 'N', 'N', 80, 40, 100, et(1.0), et(0.0)), + ] + for (transA, transB, transA_ch, transB_ch, M, N, K, alpha, beta) in input_items + bA = ones(et, M, N, 1) + bB = ones(et, N, K, 1) + bC = zeros(et, M, K, 1) + et_suite["batched_gemm!"][ + "trans($transA_ch,$transB_ch)-M($M)-N($N)-K($K)-alpha($alpha)-beta($beta)" + ] = @benchmarkable NNlib.batched_gemm!( + $transA_ch, $transB_ch, + $alpha, $bA, $bB, $beta, $bC) + end +end diff --git a/benchmark/benchmark/nnlib/pooling.jl b/benchmark/benchmark/nnlib/pooling.jl new file mode 100644 index 0000000..6f4bfd7 --- /dev/null +++ b/benchmark/benchmark/nnlib/pooling.jl @@ -0,0 +1,38 @@ +########## pooling ############ +SUITE["nnlib"]["pooling"] = BenchmarkGroup() +for rank in (3, 2, 1,), N in (512, 256,), K in (4, 2,), stride in (4, 2, 1,) + size_suite = BenchmarkGroup() + SUITE["nnlib"]["pooling"]["$(rank+2)-N($N)-K($K)-stride($stride)"] = size_suite + + x = zeros(Float32, repeat([N], rank)..., 1, 1) + pdims = PoolDims(x, K; stride = stride) + y = zeros(Float32, NNlib.output_size(pdims)..., 1, 1) + dx, dy = similar(x), similar(y) + + pooling_items = [ + (NNlib.maxpool!, NNlib.∇maxpool!, "maxpool"), + (NNlib.meanpool!, NNlib.∇meanpool!, "meanpool"), + (NNlib.lpnormpool!, NNlib.∇lpnormpool!, "lpnormpool"), + ] + + for (pool, ∇pool, name) in pooling_items + pooling_suite = BenchmarkGroup() + SUITE["nnlib"]["pooling"][ + "$(rank+2)-N($N)-K($K)-stride($stride)" + ]["$(name)$(rank)d-direct"] = pooling_suite + SUITE["nnlib"]["pooling"][ + "$(rank+2)-N($N)-K($K)-stride($stride)" + ]["$(name)$(rank)d-direct"]["pool"] = @benchmarkable $pool( + $y, $x, $pdims; p = ($name == "lpnormpool") ? 2 : nothing) + SUITE["nnlib"]["pooling"][ + "$(rank+2)-N($N)-K($K)-stride($stride)" + ]["$(name)$(rank)d-direct"]["data"] = @benchmarkable $(∇pool)( + $dx, $dy, $y, $x, $pdims; p = ($name == "lpnormpool") ? 2 : nothing) + end + + if NNlib.is_nnpack_available() && NNlib.nnpack_supported_operation(pdims) + SUITE["nnlib"]["pooling"][ + "$(rank+2)-N($N)-K($K)-stride($stride)" + ]["maxpool$(rank)d-nnpack"]["pool"] = @benchmarkable NNlib.maxpool_nnpack!($y, $x, $pdims) + end +end diff --git a/benchmark/benchmark/nnlib/softmax.jl b/benchmark/benchmark/nnlib/softmax.jl new file mode 100644 index 0000000..a8b5abe --- /dev/null +++ b/benchmark/benchmark/nnlib/softmax.jl @@ -0,0 +1,23 @@ +########## softmax ############ +SUITE["nnlib"]["softmax"] = BenchmarkGroup() +for (fn!, fn_bw) in [(softmax!, NNlib.∇softmax_data), (logsoftmax!, NNlib.∇logsoftmax_data)] + fn_suite = BenchmarkGroup() + SUITE["nnlib"]["softmax"][rstrip(string(fn!), '!')] = fn_suite + let SIZES = [ + (12288, 2048, 1), (4096, 4096, 2), (4096, 2048, 2), (2048, 2048, 2), + (1024, 2048, 4), (768, 1024, 4), (512, 784, 8), (128, 384, 8), + ] + for et in (Float32, Float16,) + et_suite = BenchmarkGroup("fw" => BenchmarkGroup(), "bw" => BenchmarkGroup()) + fn_suite[string(et)] = et_suite + for sz in SIZES + x = randn(et, sz) + y = similar(x) + dy = zero(x) + fn!(y, x) + et_suite["fw"][string(sz)] = @benchmarkable $fn!($y, $x) + et_suite["bw"][string(sz)] = @benchmarkable $fn_bw($dy, $y) + end + end + end +end diff --git a/benchmark/benchmark/nnlib/upsample.jl b/benchmark/benchmark/nnlib/upsample.jl new file mode 100644 index 0000000..2f54c89 --- /dev/null +++ b/benchmark/benchmark/nnlib/upsample.jl @@ -0,0 +1,35 @@ +########## upsample ############ +SUITE["nnlib"]["upsample"] = BenchmarkGroup() +SUITE["nnlib"]["upsample"]["linear"] = BenchmarkGroup() +for rank in (3, 2, 1,), et in (Float32, Float16,) + et_suite = BenchmarkGroup("fw" => BenchmarkGroup(), "bw" => BenchmarkGroup()) + SUITE["nnlib"]["upsample"]["linear"][string(et)] = et_suite + + inputs_sizes = [ + (1024, (0.5, 2), false), (256, 8, false), + (256, 4, true), (128, (1, 2), false), (128, 2, true), + ] + for (sz, scale, ac) in inputs_sizes + x = ones(et, repeat([sz], rank)..., 1, 1) + et_suite["fw"][ + "$(rank+2)-N($sz)-scale($scale)" + ] = @benchmarkable upsample_linear($x, $scale; align_corners = $ac) + et_suite["bw"][ + "$(rank+2)-N($sz)-scale($scale)" + ] = @benchmarkable ∇upsample_linear($x; + size = (typeof($scale) <: Tuple) ? + floor.(Integer, $sz .* $scale) : + ntuple(_ -> floor(Integer, $sz * $scale), $rank), + align_corners = $ac) + end +end + +SUITE["nnlib"]["upsample"]["nearest"] = BenchmarkGroup() +for rank in (3, 2, 1,), N in (1024, 512, 128,) + et_suite = BenchmarkGroup() + for et in (Float64, Float32, Float16,) + x = zeros(Float32, repeat([N], rank)..., 1, 1) + et_suite[string(et)] = @benchmarkable upsample_nearest($x; size = (repeat([$N * 10], $rank)..., 1, 1)) + end + SUITE["nnlib"]["upsample"]["nearest"]["$(rank+2)-N($N)"] = et_suite +end