diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 52a371d..922d5aa 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -17,4 +17,4 @@ StatsBase = "0.34" CairoMakie = "0.12" PyCall = "1.96" BenchmarkTools = "1.6" -ChunkSplitter = "2" +ChunkSplitter = "3" diff --git a/benchmark/benchmark_comparison_non_stream_WWR.jl b/benchmark/benchmark_comparison_non_stream_WWR.jl index 3d45414..885e044 100644 --- a/benchmark/benchmark_comparison_non_stream_WWR.jl +++ b/benchmark/benchmark_comparison_non_stream_WWR.jl @@ -64,7 +64,7 @@ function weighted_reservoir_sample_parallel_1_pass(rngs, a, ws, n) nt = Threads.nthreads() ss = Vector{Vector{eltype(a)}}(undef, nt) w_sums = Vector{Float64}(undef, nt) - chunks_inds = chunks(a; n=nt) + chunks_inds = index_chunks(a; n=nt) Threads.@threads for (i, inds) in enumerate(chunks_inds) s = weighted_reservoir_sample_seq(rngs[i], @view(a[inds]), @view(ws[inds]), n) ss[i], w_sums[i] = s @@ -84,7 +84,7 @@ end function weighted_reservoir_sample_parallel_2_pass(rngs, a, ws, n) nt = Threads.nthreads() - chunks_inds = chunks(a; n=nt) + chunks_inds = index_chunks(a; n=nt) w_sums = Vector{Float64}(undef, nt) Threads.@threads for (i, inds) in enumerate(chunks_inds) w_sums[i] = sum(@view(ws[inds])) @@ -102,7 +102,7 @@ end function sample_parallel_2_pass(rngs, a, ws, n) nt = Threads.nthreads() - chunks_inds = chunks(a; n=nt) + chunks_inds = index_chunks(a; n=nt) w_sums = Vector{Float64}(undef, nt) Threads.@threads for (i, inds) in enumerate(chunks_inds) w_sums[i] = sum(@view(ws[inds])) @@ -190,26 +190,28 @@ def sample_times_numpy(): """ times_numpy = py"sample_times_numpy()" -f = Figure(backgroundcolor = RGBf(0.98, 0.98, 0.98), size = (1100, 700)); +f = Figure(backgroundcolor = RGBf(0.98, 0.98, 0.98), size = (700, 600), dpi=1200); ax1 = Axis(f[1, 1], yscale=log10, xscale=log10, yminorticksvisible = true, yminorgridvisible = true, - yminorticks = IntervalsBetween(10)) + yminorticks = IntervalsBetween(10), xticklabelsize=15, yticklabelsize=15, titlesize=16, + xlabelsize=17, ylabelsize=17,) -scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_numpy[3:end], label = "numpy.choice sequential", marker = :circle, markersize = 12, linestyle = :dot) -scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_other[3:end], label = "StatsBase.sample sequential", marker = :rect, markersize = 12, linestyle = :dot) -scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_other_parallel[3:end], label = "StatsBase.sample parallel (2 passes)", marker = :diamond, markersize = 12, linestyle = :dot) -scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_single_thread[3:end], label = "WRSWR-SKIP sequential", marker = :hexagon, markersize = 12, linestyle = :dot) -scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_multi_thread[3:end], label = "WRSWR-SKIP parallel (1 pass)", marker = :cross, markersize = 12, linestyle = :dot) -scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_multi_thread_2[3:end], label = "WRSWR-SKIP parallel (2 passes)", marker = :xcross, markersize = 12, linestyle = :dot) -Legend(f[2,1], ax1, labelsize=10, framevisible = false, orientation = :horizontal) +scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_numpy[3:end]./10^3, label = "numpy.choice sequential", marker = :circle, markersize = 12, linestyle = :dot) +scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_other[3:end]./10^3, label = "StatsBase.sample sequential", marker = :rect, markersize = 12, linestyle = :dot) +scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_other_parallel[3:end]./10^3, label = "StatsBase.sample parallel (2 passes)", marker = :diamond, markersize = 12, linestyle = :dot) +scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_single_thread[3:end]./10^3, label = "WRSWR-SKIP sequential", marker = :hexagon, markersize = 12, linestyle = :dot) +scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_multi_thread[3:end]./10^3, label = "WRSWR-SKIP parallel (1 pass)", marker = :cross, markersize = 12, linestyle = :dot) +scatterlines!(ax1, [10^i/10^8 for i in 2:7], times_multi_thread_2[3:end]./10^3, label = "WRSWR-SKIP parallel (2 passes)", marker = :xcross, markersize = 12, linestyle = :dot) +Legend(f[2,1], ax1, labelsize=12, framevisible = false, orientation = :horizontal, nbanks = 3) ax1.xtickformat = x -> string.(round.(x.*100, digits=10)) .* "%" ax1.title = "Comparison between weighted sampling algorithms in a non-streaming context" ax1.xticks = [10^(i)/10^8 for i in 2:7] +ax1.yticks = [10^float(i) for i in -1:1] ax1.xlabel = "sample ratio" -ax1.ylabel = "time (ms)" +ax1.ylabel = "time (s)" f save("comparison_WRSWR_SKIP_alg_no_stream.png", f) diff --git a/benchmark/benchmark_comparison_stream_WWR.jl b/benchmark/benchmark_comparison_stream_WWR.jl index 675a063..307d86e 100644 --- a/benchmark/benchmark_comparison_stream_WWR.jl +++ b/benchmark/benchmark_comparison_stream_WWR.jl @@ -85,41 +85,43 @@ end using CairoMakie -f = Figure(backgroundcolor = RGBf(0.98, 0.98, 0.98), size = (1100, 700)); - -f.title = "Comparison between AExpJ-WR and WRSWR-SKIP Algorithms" +f = Figure(backgroundcolor = RGBf(0.98, 0.98, 0.98), size = (700, 400), dpi=1200); ax1 = Axis(f[1, 1], yscale=log10, xscale=log10, - yminorticksvisible = true, yminorgridvisible = true, - yminorticks = IntervalsBetween(10)) + yminorticksvisible = true, yminorgridvisible = true, xlabelsize=16, ylabelsize=16, + yminorticks = IntervalsBetween(10), xticklabelsize=11, titlesize=16) ax2 = Axis(f[1, 2], yscale=log10, xscale=log10, + yminorticksvisible = true, yminorgridvisible = true, xlabelsize=16, + yminorticks = IntervalsBetween(10), xticklabelsize=11, titlesize=16) +ax3 = Axis(f[1, 3], yscale=log10, xscale=log10, xlabelsize=16, yminorticksvisible = true, yminorgridvisible = true, - yminorticks = IntervalsBetween(10)) -ax3 = Axis(f[1, 3], yscale=log10, xscale=log10, - yminorticksvisible = true, yminorgridvisible = true, - yminorticks = IntervalsBetween(10)) + yminorticks = IntervalsBetween(10), xticklabelsize=11, titlesize=16) + +linkyaxes!(ax1, ax2, ax3) -#ax4 = Axis(f[2, 1]) +hideydecorations!(ax2, grid=false, minorgrid=false) +hideydecorations!(ax3, grid=false, minorgrid=false) for x in benchs - label = x[1] == :wv_const ? (x[2] == AlgAExpJWR() ? "ExpJ-WR" : "WRSWR-SKIP") : "" + label = x[1] == :wv_const ? (x[2] == AlgAExpJWR() ? "A-ExpJ-WR" : "WRSWR-SKIP") : "" ax = x[1] == :wv_decr ? ax1 : (x[1] == :wv_const ? ax2 : ax3) marker = x[2] == AlgAExpJWR() ? :circle : (:xcross) - scatterlines!(ax, [10^i/10^8 for i in 3:7], x[3] ./ 10^6, marker = marker, + scatterlines!(ax, [10^i/10^8 for i in 4:7], x[3][2:end] ./ 10^9, marker = marker, label = label, markersize = 12, linestyle = :dot) end -Legend(ax4, labelsize=10, framevisible = false, orientation = :horizontal) +Legend(f[2,:], ax2, labelsize=12, markersize=2, framevisible=false, orientation = :horizontal) +rowsize!(f.layout, 1, Relative(4/5)) for ax in [ax1, ax2, ax3] ax.xtickformat = x -> string.(round.(x.*100, digits=10)) .* "%" #ax.ytickformat = y -> y .* "^" ax.title = ax == ax1 ? "decreasing weights" : (ax == ax2 ? "constant weights" : "increasing weights") - ax.xticks = [10^(i)/10^8 for i in 3:7] - ax.yticks = [10^i for i in 2:4] + ax.xticks = [10^(i)/10^8 for i in 4:7] + ax.yticks = [10^float(i) for i in -1:1] ax.xlabel = "sample ratio" - ax == ax1 && (ax.ylabel = "time (ms)") + ax == ax1 && (ax.ylabel = "time (s)") end save("comparison_WRSWR_SKIP_alg_stream.png", f) -f \ No newline at end of file +f