From 5b2b294344f4d02a614b00d9006e78ca23b43cc6 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 4 Dec 2023 22:54:26 -0500 Subject: [PATCH 01/20] use pairwise order for mapreduce on arbitrary iterators --- base/reduce.jl | 61 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/base/reduce.jl b/base/reduce.jl index f9411f8b5d783..53e6091b12b78 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -277,6 +277,57 @@ end mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Integer) = mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op)) +# the following mapreduce_impl is called by mapreduce for non-array iterators, and +# implements an index-free in-order pairwise strategy: +function mapreduce_impl(f, op, nt, itr) + it = iterate(itr) + it === nothing && return nt isa _InitialValue ? mapreduce_empty_iter(f, op, itr, IteratorEltype(itr)) : nt + a1, state = it + it = iterate(itr, state) + it === nothing && return nt isa _InitialValue ? mapreduce_first(f, op, a1) : op(nt, f(a1)) + a2, state = it + v = op(nt isa _InitialValue ? f(a1) : op(nt, f(a1)), f(a2)) + n = pairwise_blocksize(f, op) + v, state = _mapreduce_impl(f, op, v, itr, state, n) + while state !== nothing + v, state = _mapreduce_impl(f, op, v, itr, state, n) + n *= 2 + end + return v +end + +# apply mapreduce to at most n elements of itr, in a pairwise recursive fashion, +# returning op(nt, ...n elements...), state --- state=nothing if itr ended +function _mapreduce_impl(f, op, nt, itr, state, n) + # for the pairwise algorithm we want to reduce this block + # separately *before* combining it with nt ... try to peel + # off the first two elements to construct block's initial v + it = iterate(itr, state) + it === nothing && return nt, nothing + a1, state = it + it = iterate(itr, state) + it === nothing && return op(nt, f(a1)) + a2, state = it + v = op(f(a1), f(a2)) + + if n ≤ pairwise_blocksize(f, op) # coarsened base case + @simd for _ = 3:n + it = iterate(itr, state) + it === nothing && return op(nt, v), nothing + a, state = it + v = op(v, f(a)) + end + return op(nt, v), state + else + n >>= 1 + v, state = _mapreduce_impl(f, op, v, itr, state, n-2) + state === nothing && return op(nt, v), nothing + v, state = _mapreduce_impl(f, op, v, itr, state, n) + # break return statement into two cases to help type inference + return state === nothing ? (op(nt, v), nothing) : (op(nt, v), state) + end +end + """ mapreduce(f, op, itrs...; [init]) @@ -304,9 +355,17 @@ implementations may reuse the return value of `f` for elements that appear multi `itr`. Use [`mapfoldl`](@ref) or [`mapfoldr`](@ref) instead for guaranteed left or right associativity and invocation of `f` for every value. """ -mapreduce(f, op, itr; kw...) = mapfoldl(f, op, itr; kw...) +function mapreduce(f, op, itr; init=_InitialValue()) + if haslength(itr) && length(itr) ≤ pairwise_blocksize(f, op) + return mapfoldl(f, op, itr; init) # skip overhead for small iterators + else + return mapreduce_impl(f, op, init, itr, IteratorSize(itr)) + end +end mapreduce(f, op, itrs...; kw...) = reduce(op, Generator(f, itrs...); kw...) +mapreduce(f, op, itr::Union{Tuple,NamedTuple}; kw...) = mapfoldl(f, op, itr; kw...) + # Note: sum_seq usually uses four or more accumulators after partial # unrolling, so each accumulator gets at most 256 numbers pairwise_blocksize(f, op) = 1024 From 5cdca767ecd228a75ecba68435f4b1b0e4852044 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 4 Dec 2023 23:06:11 -0500 Subject: [PATCH 02/20] tweak --- base/reduce.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/reduce.jl b/base/reduce.jl index 53e6091b12b78..8326622b8bbfd 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -288,7 +288,7 @@ function mapreduce_impl(f, op, nt, itr) a2, state = it v = op(nt isa _InitialValue ? f(a1) : op(nt, f(a1)), f(a2)) n = pairwise_blocksize(f, op) - v, state = _mapreduce_impl(f, op, v, itr, state, n) + v, state = _mapreduce_impl(f, op, v, itr, state, n-2) while state !== nothing v, state = _mapreduce_impl(f, op, v, itr, state, n) n *= 2 From 92e0028996e02376ab1b6c44b09e48616e76676a Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 4 Dec 2023 23:08:20 -0500 Subject: [PATCH 03/20] fallback to mapfoldl if pairwise_blocksize < 3 --- base/reduce.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/base/reduce.jl b/base/reduce.jl index 8326622b8bbfd..41bb75563f284 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -288,6 +288,7 @@ function mapreduce_impl(f, op, nt, itr) a2, state = it v = op(nt isa _InitialValue ? f(a1) : op(nt, f(a1)), f(a2)) n = pairwise_blocksize(f, op) + n < 3 && return mapfoldl(f, op, itr; init=v) v, state = _mapreduce_impl(f, op, v, itr, state, n-2) while state !== nothing v, state = _mapreduce_impl(f, op, v, itr, state, n) From ba7ade96902a734a0aaf37ca716432cad39df25e Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 4 Dec 2023 23:11:37 -0500 Subject: [PATCH 04/20] comment --- base/reduce.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/reduce.jl b/base/reduce.jl index 41bb75563f284..2adc0b25eaab8 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -288,7 +288,7 @@ function mapreduce_impl(f, op, nt, itr) a2, state = it v = op(nt isa _InitialValue ? f(a1) : op(nt, f(a1)), f(a2)) n = pairwise_blocksize(f, op) - n < 3 && return mapfoldl(f, op, itr; init=v) + n < 3 && return mapfoldl(f, op, itr; init=v) # not supported v, state = _mapreduce_impl(f, op, v, itr, state, n-2) while state !== nothing v, state = _mapreduce_impl(f, op, v, itr, state, n) From 52d7c753b707a9b6840ee39278e67b9e9893ee43 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 4 Dec 2023 23:17:24 -0500 Subject: [PATCH 05/20] whoops --- base/reduce.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/reduce.jl b/base/reduce.jl index 2adc0b25eaab8..613c16128f673 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -360,7 +360,7 @@ function mapreduce(f, op, itr; init=_InitialValue()) if haslength(itr) && length(itr) ≤ pairwise_blocksize(f, op) return mapfoldl(f, op, itr; init) # skip overhead for small iterators else - return mapreduce_impl(f, op, init, itr, IteratorSize(itr)) + return mapreduce_impl(f, op, init, itr) end end mapreduce(f, op, itrs...; kw...) = reduce(op, Generator(f, itrs...); kw...) From bc949257f00cecc2c72f7d1acf1fbdd6af3363ac Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 4 Dec 2023 23:27:03 -0500 Subject: [PATCH 06/20] handle pairwise_blocksize < 3 --- base/reduce.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index 613c16128f673..8f6241722a648 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -287,8 +287,7 @@ function mapreduce_impl(f, op, nt, itr) it === nothing && return nt isa _InitialValue ? mapreduce_first(f, op, a1) : op(nt, f(a1)) a2, state = it v = op(nt isa _InitialValue ? f(a1) : op(nt, f(a1)), f(a2)) - n = pairwise_blocksize(f, op) - n < 3 && return mapfoldl(f, op, itr; init=v) # not supported + n = max(2, pairwise_blocksize(f, op)) v, state = _mapreduce_impl(f, op, v, itr, state, n-2) while state !== nothing v, state = _mapreduce_impl(f, op, v, itr, state, n) @@ -311,7 +310,7 @@ function _mapreduce_impl(f, op, nt, itr, state, n) a2, state = it v = op(f(a1), f(a2)) - if n ≤ pairwise_blocksize(f, op) # coarsened base case + if n ≤ max(2, pairwise_blocksize(f, op)) # coarsened base case @simd for _ = 3:n it = iterate(itr, state) it === nothing && return op(nt, v), nothing From ee24cb2160498fbe204a2297b9b4fbe56cb57bd1 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 4 Dec 2023 23:40:58 -0500 Subject: [PATCH 07/20] whoops --- base/reduce.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/reduce.jl b/base/reduce.jl index 8f6241722a648..198e9ec009f94 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -306,7 +306,7 @@ function _mapreduce_impl(f, op, nt, itr, state, n) it === nothing && return nt, nothing a1, state = it it = iterate(itr, state) - it === nothing && return op(nt, f(a1)) + it === nothing && return op(nt, f(a1)), nothing a2, state = it v = op(f(a1), f(a2)) From 106563f1ed5692394039b839b321fd8e579d0e97 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 5 Dec 2023 08:27:07 -0500 Subject: [PATCH 08/20] length may not be O(1) --- base/reduce.jl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index 198e9ec009f94..97e0b57a2c645 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -355,13 +355,7 @@ implementations may reuse the return value of `f` for elements that appear multi `itr`. Use [`mapfoldl`](@ref) or [`mapfoldr`](@ref) instead for guaranteed left or right associativity and invocation of `f` for every value. """ -function mapreduce(f, op, itr; init=_InitialValue()) - if haslength(itr) && length(itr) ≤ pairwise_blocksize(f, op) - return mapfoldl(f, op, itr; init) # skip overhead for small iterators - else - return mapreduce_impl(f, op, init, itr) - end -end +mapreduce(f, op, itr; init=_InitialValue()) = mapreduce_impl(f, op, init, itr) mapreduce(f, op, itrs...; kw...) = reduce(op, Generator(f, itrs...); kw...) mapreduce(f, op, itr::Union{Tuple,NamedTuple}; kw...) = mapfoldl(f, op, itr; kw...) From 1f8d60b2fce138971b723600a19d0a97ff40f48f Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 5 Dec 2023 14:19:04 -0500 Subject: [PATCH 09/20] unify error messages --- base/reduce.jl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index 97e0b57a2c645..b5c81abb906f5 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -369,10 +369,9 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096 # handling empty arrays -_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed")) +_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed; consider supplying `init` to the reducer")) _empty_reduce_error(@nospecialize(f), @nospecialize(T::Type)) = throw(ArgumentError(""" - reducing with $f over an empty collection of element type $T is not allowed. - You may be able to prevent this error by supplying an `init` value to the reducer.""")) + reducing with $f over an empty collection of element type $T is not allowed; consider supplying `init` to the reducer""")) """ Base.reduce_empty(op, T) @@ -435,9 +434,7 @@ mapreduce_empty_iter(f, op, itr, ItrEltype) = @inline reduce_empty_iter(op, itr) = reduce_empty_iter(op, itr, IteratorEltype(itr)) @inline reduce_empty_iter(op, itr, ::HasEltype) = reduce_empty(op, eltype(itr)) -reduce_empty_iter(op, itr, ::EltypeUnknown) = throw(ArgumentError(""" - reducing over an empty collection of unknown element type is not allowed. - You may be able to prevent this error by supplying an `init` value to the reducer.""")) +reduce_empty_iter(op, itr, ::EltypeUnknown) = _empty_reduce_error() # handling of single-element iterators From 5e06e36ca8af0afac05d100290ae2e1c09f69e3b Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 5 Dec 2023 14:40:28 -0500 Subject: [PATCH 10/20] don't change the exception type (make sure to call reduce_empty / mapreduce_empty) --- base/reduce.jl | 2 +- test/reduce.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/base/reduce.jl b/base/reduce.jl index b5c81abb906f5..859e1ae7718fb 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -281,7 +281,7 @@ mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Int # implements an index-free in-order pairwise strategy: function mapreduce_impl(f, op, nt, itr) it = iterate(itr) - it === nothing && return nt isa _InitialValue ? mapreduce_empty_iter(f, op, itr, IteratorEltype(itr)) : nt + it === nothing && return nt isa _InitialValue ? reduce_empty_iter(_xfadjoint(BottomRF(op), Generator(f, itr))...) : nt a1, state = it it = iterate(itr, state) it === nothing && return nt isa _InitialValue ? mapreduce_first(f, op, a1) : op(nt, f(a1)) diff --git a/test/reduce.jl b/test/reduce.jl index 2c852084de37e..d1c5474d1b35e 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -615,6 +615,7 @@ test18695(r) = sum( t^2 for t in r ) @test_throws str -> ( occursin("reducing over an empty", str) && occursin("consider supplying `init`", str) && !occursin("or defining", str)) test18695(Any[]) +@test_throws MethodError test18695(Any[]) # For Core.IntrinsicFunction @test_throws str -> ( occursin("reducing over an empty", str) && From c783ea35b28ac2f37a21eaa8306e84195cc69b6e Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 5 Dec 2023 14:52:07 -0500 Subject: [PATCH 11/20] simplification --- base/reduce.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/reduce.jl b/base/reduce.jl index 859e1ae7718fb..cb329e665c371 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -281,7 +281,7 @@ mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Int # implements an index-free in-order pairwise strategy: function mapreduce_impl(f, op, nt, itr) it = iterate(itr) - it === nothing && return nt isa _InitialValue ? reduce_empty_iter(_xfadjoint(BottomRF(op), Generator(f, itr))...) : nt + it === nothing && return nt isa _InitialValue ? reduce_empty_iter(_xfadjoint(op, Generator(f, itr))...) : nt a1, state = it it = iterate(itr, state) it === nothing && return nt isa _InitialValue ? mapreduce_first(f, op, a1) : op(nt, f(a1)) From f96f76aeda1173b3c27dc72ee8fab4df83a56797 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 5 Dec 2023 16:56:33 -0500 Subject: [PATCH 12/20] updated sums in doctests (more accurate!) --- base/mathconstants.jl | 4 ++-- doc/src/manual/arrays.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/mathconstants.jl b/base/mathconstants.jl index 4bb8c409acf00..223804356f770 100644 --- a/base/mathconstants.jl +++ b/base/mathconstants.jl @@ -76,7 +76,7 @@ julia> Base.MathConstants.eulergamma julia> dx = 10^-6; julia> sum(-exp(-x) * log(x) for x in dx:dx:100) * dx -0.5772078382499133 +0.5772078382090375 ``` """ γ, const eulergamma = γ @@ -109,7 +109,7 @@ julia> Base.MathConstants.catalan catalan = 0.9159655941772... julia> sum(log(x)/(1+x^2) for x in 1:0.01:10^6) * 0.01 -0.9159466120554123 +0.9159466120556188 ``` """ catalan diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md index 027f9ca661443..012e83641e0bb 100644 --- a/doc/src/manual/arrays.md +++ b/doc/src/manual/arrays.md @@ -407,7 +407,7 @@ sums a series without allocating memory: ```jldoctest julia> sum(1/n^2 for n=1:1000) -1.6439345666815615 +1.6439345666815597 ``` When writing a generator expression with multiple dimensions inside an argument list, parentheses From f679885cc83661d48dd4b6edb430d46ba963ad3e Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 6 Dec 2023 17:03:49 -0500 Subject: [PATCH 13/20] more transducer style implementation --- base/reduce.jl | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index cb329e665c371..1ff8e959b9be9 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -277,28 +277,28 @@ end mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Integer) = mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op)) -# the following mapreduce_impl is called by mapreduce for non-array iterators, and -# implements an index-free in-order pairwise strategy: -function mapreduce_impl(f, op, nt, itr) +# the following reduce_impl is called by mapreduce/reduce for non-array iterators, +# and implements an index-free in-order pairwise strategy: +function reduce_impl(op, itr, nt) it = iterate(itr) - it === nothing && return nt isa _InitialValue ? reduce_empty_iter(_xfadjoint(op, Generator(f, itr))...) : nt + it === nothing && return nt isa _InitialValue ? reduce_empty_iter(op, itr) : nt a1, state = it it = iterate(itr, state) - it === nothing && return nt isa _InitialValue ? mapreduce_first(f, op, a1) : op(nt, f(a1)) + it === nothing && return nt isa _InitialValue ? reduce_first(op, a1) : op(nt, a1) a2, state = it - v = op(nt isa _InitialValue ? f(a1) : op(nt, f(a1)), f(a2)) - n = max(2, pairwise_blocksize(f, op)) - v, state = _mapreduce_impl(f, op, v, itr, state, n-2) + v = op(nt isa _InitialValue ? a1 : op(nt, a1), a2) + n = max(2, pairwise_blocksize(op)) + v, state = reduce_impl(op, itr, v, state, n-2) while state !== nothing - v, state = _mapreduce_impl(f, op, v, itr, state, n) + v, state = reduce_impl(op, itr, v, state, n) n *= 2 end return v end -# apply mapreduce to at most n elements of itr, in a pairwise recursive fashion, +# apply reduction to at most n elements of itr, in a pairwise recursive fashion, # returning op(nt, ...n elements...), state --- state=nothing if itr ended -function _mapreduce_impl(f, op, nt, itr, state, n) +function reduce_impl(op, itr, nt, state, n) # for the pairwise algorithm we want to reduce this block # separately *before* combining it with nt ... try to peel # off the first two elements to construct block's initial v @@ -306,28 +306,30 @@ function _mapreduce_impl(f, op, nt, itr, state, n) it === nothing && return nt, nothing a1, state = it it = iterate(itr, state) - it === nothing && return op(nt, f(a1)), nothing + it === nothing && return op(nt, a1), nothing a2, state = it - v = op(f(a1), f(a2)) + v = op(a1, a2) - if n ≤ max(2, pairwise_blocksize(f, op)) # coarsened base case + if n ≤ max(2, pairwise_blocksize(op)) # coarsened base case @simd for _ = 3:n it = iterate(itr, state) it === nothing && return op(nt, v), nothing a, state = it - v = op(v, f(a)) + v = op(v, a) end return op(nt, v), state else n >>= 1 - v, state = _mapreduce_impl(f, op, v, itr, state, n-2) + v, state = reduce_impl(op, itr, v, state, n-2) state === nothing && return op(nt, v), nothing - v, state = _mapreduce_impl(f, op, v, itr, state, n) + v, state = reduce_impl(op, itr, v, state, n) # break return statement into two cases to help type inference return state === nothing ? (op(nt, v), nothing) : (op(nt, v), state) end end +mapreduce_impl(f, op, itr, nt) = reduce_impl(_xfadjoint(op, Generator(f, itr))..., nt) + """ mapreduce(f, op, itrs...; [init]) @@ -355,7 +357,7 @@ implementations may reuse the return value of `f` for elements that appear multi `itr`. Use [`mapfoldl`](@ref) or [`mapfoldr`](@ref) instead for guaranteed left or right associativity and invocation of `f` for every value. """ -mapreduce(f, op, itr; init=_InitialValue()) = mapreduce_impl(f, op, init, itr) +mapreduce(f, op, itr; init=_InitialValue()) = mapreduce_impl(f, op, itr, init) mapreduce(f, op, itrs...; kw...) = reduce(op, Generator(f, itrs...); kw...) mapreduce(f, op, itr::Union{Tuple,NamedTuple}; kw...) = mapfoldl(f, op, itr; kw...) @@ -367,6 +369,10 @@ pairwise_blocksize(f, op) = 1024 # This combination appears to show a benefit from a larger block size pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096 +# for reduce_impl: +pairwise_blocksize(op) = pairwise_blocksize(identity, op) +pairwise_blocksize(op::MappingRF) = pairwise_blocksize(op.f, op.rf) + # handling empty arrays _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed; consider supplying `init` to the reducer")) From 42105bf1ba3dc5a8bc05add7f15f1061251ed0aa Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 6 Dec 2023 17:04:22 -0500 Subject: [PATCH 14/20] use mapreduce_impl instead of mapfoldl in mapreduce fallbacks --- base/missing.jl | 2 +- base/reduce.jl | 2 +- base/reducedim.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/missing.jl b/base/missing.jl index 35e1b4034643c..b3e7c145d3a10 100644 --- a/base/missing.jl +++ b/base/missing.jl @@ -296,7 +296,7 @@ function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray}) something(mapreduce_impl(f, op, itr, first(inds), last(inds))) end -_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapfoldl(f, op, itr) +_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapreduce_impl(f, op, itr, init) mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) = mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op)) diff --git a/base/reduce.jl b/base/reduce.jl index 1ff8e959b9be9..7520b34322942 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -506,7 +506,7 @@ end mapreduce(f, op, a::Number) = mapreduce_first(f, op, a) -_mapreduce(f, op, ::IndexCartesian, A::AbstractArrayOrBroadcasted) = mapfoldl(f, op, A) +_mapreduce(f, op, ::IndexCartesian, A::AbstractArrayOrBroadcasted) = mapreduce_impl(f, op, itr, _InitialValue()) """ reduce(op, itr; [init]) diff --git a/base/reducedim.jl b/base/reducedim.jl index f5a22940310cf..5254e79ddbae7 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -360,7 +360,7 @@ mapreduce(f, op, A::AbstractArrayOrBroadcasted...; kw...) = reduce(op, map(f, A...); kw...) _mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) = - mapfoldl_impl(f, op, nt, A) + mapreduce_impl(f, op, A, nt) _mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) = _mapreduce(f, op, IndexStyle(A), A) From 1e972a195947f42f5f5d97f19d3b8470656d17a9 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 6 Dec 2023 17:54:31 -0500 Subject: [PATCH 15/20] pairwise is not safe for arbitrary init value --- base/reduce.jl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index 7520b34322942..62fb066f759e1 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -279,14 +279,14 @@ mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Int # the following reduce_impl is called by mapreduce/reduce for non-array iterators, # and implements an index-free in-order pairwise strategy: -function reduce_impl(op, itr, nt) +function reduce_impl(op, itr) it = iterate(itr) - it === nothing && return nt isa _InitialValue ? reduce_empty_iter(op, itr) : nt + it === nothing && return reduce_empty_iter(op, itr) a1, state = it it = iterate(itr, state) - it === nothing && return nt isa _InitialValue ? reduce_first(op, a1) : op(nt, a1) + it === nothing && return reduce_first(op, a1) a2, state = it - v = op(nt isa _InitialValue ? a1 : op(nt, a1), a2) + v = op(a1, a2) n = max(2, pairwise_blocksize(op)) v, state = reduce_impl(op, itr, v, state, n-2) while state !== nothing @@ -328,7 +328,12 @@ function reduce_impl(op, itr, nt, state, n) end end -mapreduce_impl(f, op, itr, nt) = reduce_impl(_xfadjoint(op, Generator(f, itr))..., nt) +mapreduce_impl(f::F, op::OP, itr, ::_InitialValue) where {F,OP} = reduce_impl(_xfadjoint(op, Generator(f, itr))...) + +# for an arbitrary initial value, we need to call foldl, +# because op(nt, itr[i]) may have a different type than op(nt, itr[j])) +# … it's not clear how to reliably determine this without foldl associativity. +mapreduce_impl(f::F, op::OP, itr, nt) where {F,OP} = mapfoldl_impl(f, op, nt, itr) """ mapreduce(f, op, itrs...; [init]) From c074cb9cbd0781e83e6f860ec8813088665e29e4 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 6 Dec 2023 18:13:04 -0500 Subject: [PATCH 16/20] make mapreduce_impl argument order consistent with mapfoldl_impl, fix bug --- base/missing.jl | 2 +- base/reduce.jl | 8 ++++---- base/reducedim.jl | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/base/missing.jl b/base/missing.jl index b3e7c145d3a10..7b7ad50658644 100644 --- a/base/missing.jl +++ b/base/missing.jl @@ -296,7 +296,7 @@ function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray}) something(mapreduce_impl(f, op, itr, first(inds), last(inds))) end -_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapreduce_impl(f, op, itr, init) +_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapreduce_impl(f, op, _InitialValue(), itr) mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) = mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op)) diff --git a/base/reduce.jl b/base/reduce.jl index 62fb066f759e1..bf38c15ca64f0 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -328,12 +328,12 @@ function reduce_impl(op, itr, nt, state, n) end end -mapreduce_impl(f::F, op::OP, itr, ::_InitialValue) where {F,OP} = reduce_impl(_xfadjoint(op, Generator(f, itr))...) +mapreduce_impl(f::F, op::OP, ::_InitialValue, itr) where {F,OP} = reduce_impl(_xfadjoint(op, Generator(f, itr))...) # for an arbitrary initial value, we need to call foldl, # because op(nt, itr[i]) may have a different type than op(nt, itr[j])) # … it's not clear how to reliably determine this without foldl associativity. -mapreduce_impl(f::F, op::OP, itr, nt) where {F,OP} = mapfoldl_impl(f, op, nt, itr) +mapreduce_impl(f::F, op::OP, nt, itr) where {F,OP} = mapfoldl_impl(f, op, nt, itr) """ mapreduce(f, op, itrs...; [init]) @@ -362,7 +362,7 @@ implementations may reuse the return value of `f` for elements that appear multi `itr`. Use [`mapfoldl`](@ref) or [`mapfoldr`](@ref) instead for guaranteed left or right associativity and invocation of `f` for every value. """ -mapreduce(f, op, itr; init=_InitialValue()) = mapreduce_impl(f, op, itr, init) +mapreduce(f, op, itr; init=_InitialValue()) = mapreduce_impl(f, op, init, itr) mapreduce(f, op, itrs...; kw...) = reduce(op, Generator(f, itrs...); kw...) mapreduce(f, op, itr::Union{Tuple,NamedTuple}; kw...) = mapfoldl(f, op, itr; kw...) @@ -511,7 +511,7 @@ end mapreduce(f, op, a::Number) = mapreduce_first(f, op, a) -_mapreduce(f, op, ::IndexCartesian, A::AbstractArrayOrBroadcasted) = mapreduce_impl(f, op, itr, _InitialValue()) +_mapreduce(f, op, ::IndexCartesian, A::AbstractArrayOrBroadcasted) = mapreduce_impl(f, op, _InitialValue(), itr) """ reduce(op, itr; [init]) diff --git a/base/reducedim.jl b/base/reducedim.jl index 5254e79ddbae7..2f0d5a948d30b 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -360,7 +360,7 @@ mapreduce(f, op, A::AbstractArrayOrBroadcasted...; kw...) = reduce(op, map(f, A...); kw...) _mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) = - mapreduce_impl(f, op, A, nt) + mapreduce_impl(f, op, nt, A) _mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) = _mapreduce(f, op, IndexStyle(A), A) From 808d61c13f185f2d13232cb49574dcfdb5466e19 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 6 Dec 2023 19:46:06 -0500 Subject: [PATCH 17/20] pairwise reduction seems incompatible with the transducer style --- base/reduce.jl | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index bf38c15ca64f0..11285cc52f55a 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -277,64 +277,63 @@ end mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Integer) = mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op)) -# the following reduce_impl is called by mapreduce/reduce for non-array iterators, +# the following mapreduce_impl is called by mapreduce for non-array iterators, # and implements an index-free in-order pairwise strategy: -function reduce_impl(op, itr) +function mapreduce_impl(f, op, ::_InitialValue, itr) it = iterate(itr) - it === nothing && return reduce_empty_iter(op, itr) + it === nothing && return reduce_empty_iter(_xfadjoint(op, Generator(f, itr))...) a1, state = it it = iterate(itr, state) - it === nothing && return reduce_first(op, a1) + it === nothing && return mapreduce_first(f, op, a1) a2, state = it - v = op(a1, a2) - n = max(2, pairwise_blocksize(op)) - v, state = reduce_impl(op, itr, v, state, n-2) + v = op(f(a1), f(a2)) + n = max(2, pairwise_blocksize(f, op)) + v, state = _mapreduce_impl(f, op, itr, v, state, n) while state !== nothing - v, state = reduce_impl(op, itr, v, state, n) + v, state = _mapreduce_impl(f, op, itr, v, state, n) n *= 2 end return v end -# apply reduction to at most n elements of itr, in a pairwise recursive fashion, +# apply reduction to at most ≈ n elements of itr, in a pairwise recursive fashion, # returning op(nt, ...n elements...), state --- state=nothing if itr ended -function reduce_impl(op, itr, nt, state, n) +function _mapreduce_impl(f, op, itr, nt, state, n) # for the pairwise algorithm we want to reduce this block - # separately *before* combining it with nt ... try to peel - # off the first two elements to construct block's initial v + # separately *before* combining it with nt, so we peel + # off the first element to initialize the block reduction: it = iterate(itr, state) it === nothing && return nt, nothing a1, state = it it = iterate(itr, state) - it === nothing && return op(nt, a1), nothing + it === nothing && return op(nt, f(a1)), nothing a2, state = it - v = op(a1, a2) + v = op(f(a1), f(a2)) - if n ≤ max(2, pairwise_blocksize(op)) # coarsened base case + if n ≤ max(2, pairwise_blocksize(f, op)) # coarsened base case @simd for _ = 3:n it = iterate(itr, state) it === nothing && return op(nt, v), nothing a, state = it - v = op(v, a) + v = op(v, f(a)) end return op(nt, v), state else n >>= 1 - v, state = reduce_impl(op, itr, v, state, n-2) + v, state = _mapreduce_impl(f, op, itr, v, state, n-2) state === nothing && return op(nt, v), nothing - v, state = reduce_impl(op, itr, v, state, n) + v, state = _mapreduce_impl(f, op, itr, v, state, n) # break return statement into two cases to help type inference return state === nothing ? (op(nt, v), nothing) : (op(nt, v), state) end end -mapreduce_impl(f::F, op::OP, ::_InitialValue, itr) where {F,OP} = reduce_impl(_xfadjoint(op, Generator(f, itr))...) - # for an arbitrary initial value, we need to call foldl, # because op(nt, itr[i]) may have a different type than op(nt, itr[j])) # … it's not clear how to reliably determine this without foldl associativity. mapreduce_impl(f::F, op::OP, nt, itr) where {F,OP} = mapfoldl_impl(f, op, nt, itr) + """ mapreduce(f, op, itrs...; [init]) @@ -374,10 +373,6 @@ pairwise_blocksize(f, op) = 1024 # This combination appears to show a benefit from a larger block size pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096 -# for reduce_impl: -pairwise_blocksize(op) = pairwise_blocksize(identity, op) -pairwise_blocksize(op::MappingRF) = pairwise_blocksize(op.f, op.rf) - # handling empty arrays _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed; consider supplying `init` to the reducer")) From d4e94202644993e5640b505f13b3980669b8f34c Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 6 Dec 2023 19:58:16 -0500 Subject: [PATCH 18/20] rm unnecessary specialization --- base/reduce.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/reduce.jl b/base/reduce.jl index 11285cc52f55a..7fd605c1ef889 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -331,7 +331,7 @@ end # for an arbitrary initial value, we need to call foldl, # because op(nt, itr[i]) may have a different type than op(nt, itr[j])) # … it's not clear how to reliably determine this without foldl associativity. -mapreduce_impl(f::F, op::OP, nt, itr) where {F,OP} = mapfoldl_impl(f, op, nt, itr) +mapreduce_impl(f, op, nt, itr) = mapfoldl_impl(f, op, nt, itr) """ From 43da6445549da4cf69ebff7ed99f091ec03c0684 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 6 Dec 2023 20:07:01 -0500 Subject: [PATCH 19/20] cleanup/unify mapreduce_empty_iter --- base/reduce.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index 7fd605c1ef889..0a29275c92db0 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -281,7 +281,7 @@ mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Int # and implements an index-free in-order pairwise strategy: function mapreduce_impl(f, op, ::_InitialValue, itr) it = iterate(itr) - it === nothing && return reduce_empty_iter(_xfadjoint(op, Generator(f, itr))...) + it === nothing && return mapreduce_empty_iter(f, op, itr) a1, state = it it = iterate(itr, state) it === nothing && return mapreduce_first(f, op, a1) @@ -434,6 +434,8 @@ mapreduce_empty(::typeof(abs2), op, T) = abs2(reduce_empty(op, T)) mapreduce_empty(f::typeof(abs), ::typeof(max), T) = abs(zero(T)) mapreduce_empty(f::typeof(abs2), ::typeof(max), T) = abs2(zero(T)) +mapreduce_empty_iter(f::F, op::OP, itr) where {F,OP} = reduce_empty_iter(_xfadjoint(op, Generator(f, itr))...) + # For backward compatibility: mapreduce_empty_iter(f, op, itr, ItrEltype) = reduce_empty_iter(MappingRF(f, op), itr, ItrEltype) @@ -485,7 +487,7 @@ function _mapreduce(f, op, ::IndexLinear, A::AbstractArrayOrBroadcasted) inds = LinearIndices(A) n = length(inds) if n == 0 - return mapreduce_empty_iter(f, op, A, IteratorEltype(A)) + return mapreduce_empty_iter(f, op, A) elseif n == 1 @inbounds a1 = A[first(inds)] return mapreduce_first(f, op, a1) From a8cf82eae7c2b9dbcda5bb3af619395b358e936d Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 6 Dec 2023 22:33:28 -0500 Subject: [PATCH 20/20] performance optimization by unrolling --- base/reduce.jl | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index 0a29275c92db0..0b8baa0b9c624 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -277,6 +277,11 @@ end mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Integer) = mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op)) +# repeat expr n times in a block +macro _repeat(n::Int, expr) + Expr(:block, fill(esc(expr), n)...) +end + # the following mapreduce_impl is called by mapreduce for non-array iterators, # and implements an index-free in-order pairwise strategy: function mapreduce_impl(f, op, ::_InitialValue, itr) @@ -287,8 +292,15 @@ function mapreduce_impl(f, op, ::_InitialValue, itr) it === nothing && return mapreduce_first(f, op, a1) a2, state = it v = op(f(a1), f(a2)) + # unroll a few iterations to reduce overhead for small iterators: + @_repeat 14 begin + it = iterate(itr, state) + it === nothing && return v + a, state = it + v = op(v, f(a)) + end n = max(2, pairwise_blocksize(f, op)) - v, state = _mapreduce_impl(f, op, itr, v, state, n) + v, state = _mapreduce_impl(f, op, itr, v, state, n-16) while state !== nothing v, state = _mapreduce_impl(f, op, itr, v, state, n) n *= 2 @@ -330,10 +342,11 @@ end # for an arbitrary initial value, we need to call foldl, # because op(nt, itr[i]) may have a different type than op(nt, itr[j])) -# … it's not clear how to reliably determine this without foldl associativity. +# … it's not clear how to reliably do this without foldl associativity. mapreduce_impl(f, op, nt, itr) = mapfoldl_impl(f, op, nt, itr) + """ mapreduce(f, op, itrs...; [init])