Skip to content

Commit

Permalink
Merge cab4ebd into a23be08
Browse files Browse the repository at this point in the history
  • Loading branch information
nalimilan authored Jun 26, 2018
2 parents a23be08 + cab4ebd commit d201e62
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 1 deletion.
95 changes: 94 additions & 1 deletion base/missing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ IteratorSize(::Type{<:SkipMissing}) = SizeUnknown()
IteratorEltype(::Type{SkipMissing{T}}) where {T} = IteratorEltype(T)
eltype(::Type{SkipMissing{T}}) where {T} = nonmissingtype(eltype(T))

function Base.iterate(itr::SkipMissing, state...)
function iterate(itr::SkipMissing, state...)
y = iterate(itr.x, state...)
y === nothing && return nothing
item, state = y
Expand All @@ -189,6 +189,99 @@ function Base.iterate(itr::SkipMissing, state...)
item, state
end

# Optimized mapreduce implementation
mapreduce(f, op, itr::SkipMissing{<:AbstractArray}) = _mapreduce(f, op, IndexStyle(itr.x), itr)

"Sentinel indicating that no non-missing value was encountered."
struct AllMissing end

function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
A = itr.x
local ai
inds = LinearIndices(A)
i = first(inds)
ilast = last(inds)
while i <= ilast
@inbounds ai = A[i]
ai === missing || break
i += 1
end
i > ilast && return mapreduce_empty(f, op, eltype(itr))
a1 = ai
i += 1
while i <= ilast
@inbounds ai = A[i]
ai === missing || break
i += 1
end
i > ilast && return mapreduce_first(f, op, a1)
# We know A contains at least two non-missing entries, therefore AllMissing() is not
# a possible return value: the check provides that information to inference
s = mapreduce_impl(f, op, itr, first(inds), last(inds))
s isa AllMissing && error("got AllMissing for an array with non-missing values")
return s
end

_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapfoldl(f, op, itr)

mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))

@noinline function mapreduce_impl(f, op, itr::SkipMissing{<:AbstractArray},
ifirst::Integer, ilast::Integer, blksize::Int)
A = itr.x
if ifirst == ilast
@inbounds a1 = A[ifirst]
if a1 === missing
return AllMissing()
else
return mapreduce_first(f, op, a1)
end
elseif ifirst + blksize > ilast
# sequential portion
local ai
i = ifirst
while i <= ilast
@inbounds ai = A[i]
ai === missing || break
i += 1
end
i > ilast && return AllMissing()
a1 = ai::eltype(itr)
i += 1
while i <= ilast
@inbounds ai = A[i]
ai === missing || break
i += 1
end
i > ilast && return mapreduce_first(f, op, a1)
a2 = ai::eltype(itr)
# Unexpectedly, the following assertion allows SIMD instructions to be emitted
A[i]::eltype(itr)
i += 1
v = op(f(a1), f(a2))
@simd for i = i:ilast
@inbounds ai = A[i]
if ai !== missing
v = op(v, f(ai))
end
end
return v
else
# pairwise portion
imid = (ifirst + ilast) >> 1
v1 = mapreduce_impl(f, op, itr, ifirst, imid, blksize)
v2 = mapreduce_impl(f, op, itr, imid+1, ilast, blksize)
if v1 isa AllMissing
return v2
elseif v2 isa AllMissing
return v1
else
return op(v1, v2)
end
end
end

"""
coalesce(x, y...)
Expand Down
51 changes: 51 additions & 0 deletions test/missing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,57 @@ end
@test eltype(x) === Any
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}

@testset "mapreduce" begin
# Vary size to test splitting blocks with several configurations of missing values
for T in (Int, Float64),
A in (rand(T, 10), rand(T, 1000), rand(T, 10000))
if T === Int
@test sum(A) === sum(skipmissing(A)) ===
reduce(+, skipmissing(A)) === mapreduce(identity, +, skipmissing(A))
else
@test sum(A) sum(skipmissing(A)) ===
reduce(+, skipmissing(A)) === mapreduce(identity, +, skipmissing(A))
end
@test mapreduce(cos, *, A) mapreduce(cos, *, skipmissing(A))

B = Vector{Union{T,Missing}}(A)
replace!(x -> rand(Bool) ? x : missing, B)
if T === Int
@test sum(collect(skipmissing(B))) === sum(skipmissing(B)) ===
reduce(+, skipmissing(B)) === mapreduce(identity, +, skipmissing(B))
else
@test sum(collect(skipmissing(B))) sum(skipmissing(B)) ===
reduce(+, skipmissing(B)) === mapreduce(identity, +, skipmissing(B))
end
@test mapreduce(cos, *, collect(skipmissing(A))) mapreduce(cos, *, skipmissing(A))

# Test block full of missing values
B[1:length(B)÷2] .= missing
if T === Int
@test sum(collect(skipmissing(B))) == sum(skipmissing(B)) ==
reduce(+, skipmissing(B)) == mapreduce(identity, +, skipmissing(B))
else
@test sum(collect(skipmissing(B))) sum(skipmissing(B)) ==
reduce(+, skipmissing(B)) == mapreduce(identity, +, skipmissing(B))
end

@test mapreduce(cos, *, collect(skipmissing(A))) mapreduce(cos, *, skipmissing(A))
end

# Patterns that exercize code paths for inputs with 1 or 2 non-missing values
@test sum(skipmissing([1, missing, missing, missing])) === 1
@test sum(skipmissing([missing, missing, missing, 1])) === 1
@test sum(skipmissing([1, missing, missing, missing, 2])) === 3
@test sum(skipmissing([missing, missing, missing, 1, 2])) === 3

for n in 0:3
itr = skipmissing(Vector{Union{Int,Missing}}(fill(missing, n)))
@test sum(itr) == reduce(+, itr) == mapreduce(identity, +, itr) === 0
@test_throws ArgumentError reduce(x -> x/2, itr)
@test_throws ArgumentError mapreduce(x -> x/2, +, itr)
end
end
end

@testset "coalesce" begin
Expand Down

0 comments on commit d201e62

Please sign in to comment.