Skip to content

Commit

Permalink
Merge pull request #23528 from JuliaLang/rf/few-set-methods
Browse files Browse the repository at this point in the history
add few missing methods for set-like operations
  • Loading branch information
rfourquet authored Dec 20, 2017
2 parents eb0d9e4 + 6bf6973 commit 7b1c06a
Show file tree
Hide file tree
Showing 9 changed files with 325 additions and 255 deletions.
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,20 @@ Library improvements
linear-to-cartesian conversion ([#24715])
- It has a new constructor taking an array

* several missing set-like operations have been added ([#23528]):
`union`, `intersect`, `symdiff`, `setdiff` are now implemented for
all collections with arbitrary many arguments, as well as the
mutating counterparts (`union!` etc.). The performance is also
much better in many cases. Note that this change is slightly
breaking: all the non-mutating functions always return a new
object even if only one argument is passed. Moreover the semantics
of `intersect` and `symdiff` is changed for vectors:
+ `intersect` doesn't preserve the multiplicity anymore (use `filter` for
the old behavior)
+ `symdiff` has been made consistent with the corresponding methods for
other containers, by taking the multiplicity of the arguments into account.
Use `unique` to get the old behavior.

* The type `LinearIndices` has been added, providing conversion from
cartesian incices to linear indices using the normal indexing operation. ([#24715])

Expand Down
6 changes: 4 additions & 2 deletions base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -580,8 +580,10 @@ julia> empty([1.0, 2.0, 3.0], String)
0-element Array{String,1}
```
"""
empty(a::AbstractVector) = empty(a, eltype(a))
empty(a::AbstractVector, ::Type{T}) where {T} = Vector{T}()
empty(a::AbstractVector{T}, ::Type{U}=T) where {T,U} = Vector{U}()

# like empty, but should return a mutable collection, a Vector by default
emptymutable(a::AbstractVector{T}, ::Type{U}=T) where {T,U} = Vector{U}()

## from general iterable to any array

Expand Down
116 changes: 35 additions & 81 deletions base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2209,99 +2209,53 @@ function filter!(f, a::AbstractVector)
return a
end

function filter(f, a::Vector)
r = Vector{eltype(a)}()
for ai in a
if f(ai)
push!(r, ai)
end
end
return r
end
filter(f, a::Vector) = mapfilter(f, push!, a, similar(a, 0))

# set-like operators for vectors
# These are moderately efficient, preserve order, and remove dupes.

function intersect(v1, vs...)
ret = Vector{promote_eltype(v1, vs...)}()
for v_elem in v1
inall = true
for vsi in vs
if !in(v_elem, vsi)
inall=false; break
end
end
if inall
push!(ret, v_elem)
end
_unique_filter!(pred, update!, state) = function (x)
if pred(x, state)
update!(state, x)
true
else
false
end
ret
end

function union(vs...)
ret = Vector{promote_eltype(vs...)}()
seen = Set()
for v in vs
for v_elem in v
if !in(v_elem, seen)
push!(ret, v_elem)
push!(seen, v_elem)
end
end
_grow_filter!(seen) = _unique_filter!(, push!, seen)
_shrink_filter!(keep) = _unique_filter!(, pop!, keep)

function _grow!(pred!, v::AbstractVector, itrs)
filter!(pred!, v) # uniquify v
foldl(v, itrs) do v, itr
mapfilter(pred!, push!, itr, v)
end
ret
end
# setdiff only accepts two args

"""
setdiff(a, b)
union!(v::AbstractVector{T}, itrs...) where {T} =
_grow!(_grow_filter!(sizehint!(Set{T}(), length(v))), v, itrs)

Construct the set of elements in `a` but not `b`. Maintains order with arrays. Note that
both arguments must be collections, and both will be iterated over. In particular,
`setdiff(set,element)` where `element` is a potential member of `set`, will not work in
general.
symdiff!(v::AbstractVector{T}, itrs...) where {T} =
_grow!(_shrink_filter!(symdiff!(Set{T}(), v, itrs...)), v, itrs)

# Examples
```jldoctest
julia> setdiff([1,2,3],[3,4,5])
2-element Array{Int64,1}:
1
2
```
"""
function setdiff(a, b)
args_type = promote_type(eltype(a), eltype(b))
bset = Set(b)
ret = Vector{args_type}()
seen = Set{eltype(a)}()
for a_elem in a
if !in(a_elem, seen) && !in(a_elem, bset)
push!(ret, a_elem)
push!(seen, a_elem)
end
end
ret
function _shrink!(shrinker!, v::AbstractVector, itrs)
seen = Set{eltype(v)}()
filter!(_grow_filter!(seen), v)
shrinker!(seen, itrs...)
filter!(_in(seen), v)
end
# symdiff is associative, so a relatively clean
# way to implement this is by using setdiff and union, and
# recursing. Has the advantage of keeping order, too, but
# not as fast as other methods that make a single pass and
# store counts with a Dict.
symdiff(a) = a
symdiff(a, b) = union(setdiff(a,b), setdiff(b,a))
"""
symdiff(a, b, rest...)

Construct the symmetric difference of elements in the passed in sets or arrays.
Maintains order with arrays.
intersect!(v::AbstractVector, itrs...) = _shrink!(intersect!, v, itrs)
setdiff!( v::AbstractVector, itrs...) = _shrink!(setdiff!, v, itrs)

# Examples
```jldoctest
julia> symdiff([1,2,3],[3,4,5],[4,5,6])
3-element Array{Int64,1}:
1
2
6
```
"""
symdiff(a, b, rest...) = symdiff(a, symdiff(b, rest...))
vectorfilter(f, v::AbstractVector) = filter(f, v) # TODO: do we want this special case?
vectorfilter(f, v) = [x for x in v if f(x)]

function _shrink(shrinker!, itr, itrs)
keep = shrinker!(Set(itr), itrs...)
vectorfilter(_shrink_filter!(keep), itr)
end

intersect(itr, itrs...) = _shrink(intersect!, itr, itrs)
setdiff( itr, itrs...) = _shrink(setdiff!, itr, itrs)
43 changes: 9 additions & 34 deletions base/bitset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@ BitSet(itr) = union!(BitSet(), itr)

eltype(::Type{BitSet}) = Int
similar(s::BitSet) = BitSet()

empty(s::BitSet, ::Type{Int}=Int) = BitSet()
emptymutable(s::BitSet, ::Type{Int}=Int) = BitSet()

copy(s1::BitSet) = copy!(BitSet(), s1)
copymutable(s::BitSet) = copy(s)

"""
copy!(dst, src)
Expand Down Expand Up @@ -253,49 +258,17 @@ isempty(s::BitSet) = _check0(s.bits, 1, length(s.bits))

# Mathematical set functions: union!, intersect!, setdiff!, symdiff!

union(s::BitSet) = copy(s)
union(s1::BitSet, s2::BitSet) = union!(copy(s1), s2)
union(s1::BitSet, ss::BitSet...) = union(s1, union(ss...))
union(s::BitSet, ns) = union!(copy(s), ns)
union!(s::BitSet, ns) = (for n in ns; push!(s, n); end; s)
union(s::BitSet, sets...) = union!(copy(s), sets...)
union!(s1::BitSet, s2::BitSet) = _matched_map!(|, s1, s2)

intersect(s1::BitSet) = copy(s1)
intersect(s1::BitSet, ss::BitSet...) = intersect(s1, intersect(ss...))
function intersect(s1::BitSet, ns)
s = BitSet()
for n in ns
n in s1 && push!(s, n)
end
s
end
intersect(s1::BitSet, s2::BitSet) =
length(s1.bits) < length(s2.bits) ? intersect!(copy(s1), s2) : intersect!(copy(s2), s1)
"""
intersect!(s1::BitSet, s2::BitSet)

Intersects sets `s1` and `s2` and overwrites the set `s1` with the result. If needed, `s1`
will be expanded to the size of `s2`.
"""
intersect!(s1::BitSet, s2::BitSet) = _matched_map!(&, s1, s2)

setdiff(s::BitSet, ns) = setdiff!(copy(s), ns)
setdiff!(s::BitSet, ns) = (for n in ns; delete!(s, n); end; s)
setdiff!(s1::BitSet, s2::BitSet) = _matched_map!((p, q) -> p & ~q, s1, s2)

symdiff(s::BitSet, ns) = symdiff!(copy(s), ns)
"""
symdiff!(s, itr)
For each element in `itr`, destructively toggle its inclusion in set `s`.
"""
symdiff!(s::BitSet, ns) = (for n in ns; int_symdiff!(s, n); end; s)
"""
symdiff!(s, n)
The set `s` is destructively modified to toggle the inclusion of integer `n`.
"""
symdiff!(s::BitSet, n::Integer) = int_symdiff!(s, n)
symdiff!(s::BitSet, ns) = foldl(int_symdiff!, s, ns)

function int_symdiff!(s::BitSet, n::Integer)
n0 = _check_bitset_bounds(n)
Expand All @@ -306,6 +279,8 @@ end

symdiff!(s1::BitSet, s2::BitSet) = _matched_map!(xor, s1, s2)

filter!(f, s::BitSet) = unsafe_filter!(f, s)

@inline in(n::Int, s::BitSet) = _bits_getindex(s.bits, n, s.offset)
@inline in(n::Integer, s::BitSet) = _is_convertible_Int(n) ? in(Int(n), s) : false

Expand Down
Loading

0 comments on commit 7b1c06a

Please sign in to comment.