Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up permsort by utilizing stability of the default sorting algorithm #47587

Draft
wants to merge 41 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
42c70a1
initial functionality
Oct 17, 2022
61e4006
support 5- and 3-argument sort! for backwards compatability
Oct 29, 2022
901182c
test for bug that slipped through test suite
Oct 17, 2022
e032ba6
fix bug
Oct 17, 2022
e6cfee0
make send_to_end more human friendly (and less compiler friendly! int…
Oct 20, 2022
f160582
Give each sorting pass and DEFAULT_STABLE a docstring
Oct 29, 2022
15a4484
add tests and fix typos they unveiled
Oct 30, 2022
d82b090
avoid potential name conflict
Nov 1, 2022
029cbae
switch to custom keyword handling
Nov 1, 2022
d3bdca3
remove InsertionSortAlg and MergeSortAlg
Nov 2, 2022
2232cac
better algorithm display
Nov 2, 2022
a574c7f
stop passing U around
Nov 2, 2022
05de36e
remove lenm1
Nov 6, 2022
70290d6
fix unexpected allocations in Radix Sort
Nov 7, 2022
f06de10
fix doctests? I have no idea how
Nov 7, 2022
38f4512
support and test backwards compatability with packages that depend in…
Nov 9, 2022
383b9d2
Merge branch 'master' into sort-dispatch
Nov 9, 2022
d8ae968
improve extensibility tests
Nov 10, 2022
c633419
overhall scratch space handling
Nov 11, 2022
32a6f54
Merge branch 'master' into sort-dispatch
LilithHafner Nov 14, 2022
a2c2646
Consistency with other constructors
Nov 15, 2022
71e8fa1
Introduce PermUnstable to speed up sortperm
petvana Nov 15, 2022
812c917
Fix a mistake
petvana Nov 15, 2022
e752ea7
pass around even fewer easily computed things in kw to reduce load on…
Nov 18, 2022
15666f2
Merge branch 'master' into sort-dispatch
LilithHafner Nov 18, 2022
04399d9
Merge branch 'sort-dispatch' into pv/PermUnstable-v4
petvana Nov 18, 2022
34621c7
Merge branch 'pv/PermUnstable-v4' into pv/PermUnstable-v5
petvana Dec 5, 2022
7e6f103
Restore the PR
petvana Dec 5, 2022
77b2b08
Rename to PermFast
petvana Dec 5, 2022
36d3ff3
Introduce send_to_end_stable!
petvana Dec 5, 2022
1fe68d9
Fix spacing
petvana Dec 5, 2022
dd1d89b
Fix trailing whitespace
petvana Dec 5, 2022
91c2d2a
Merge branch 'master' into pv/PermUnstable-v4
petvana Dec 5, 2022
20ddeb4
Small fixes
petvana Dec 6, 2022
c14432b
Fix sortperm!
petvana Dec 6, 2022
176d779
Merge branch 'master' into pv/PermUnstable-v4
petvana Dec 13, 2022
a2f9710
Commit suggestion from review
petvana Dec 14, 2022
2d2cf4d
De-duplicate code
petvana Dec 14, 2022
ef8e8eb
Merge branch 'pv/PermUnstable-v4' of github.com:petvana/julia into pv…
petvana Dec 14, 2022
3b972eb
Re-use scratch array
petvana Dec 14, 2022
9b5be34
Merge branch 'master' into pv/PermUnstable-v4
petvana Dec 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion base/ordering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import .Base:

export # not exported by Base
Ordering, Forward, Reverse,
By, Lt, Perm,
By, Lt, Perm, PermFast,
ReverseOrdering, ForwardOrdering,
DirectOrdering,
lt, ord, ordtype
Expand Down Expand Up @@ -106,8 +106,21 @@ struct Perm{O<:Ordering,V<:AbstractVector} <: Ordering
data::V
end

"""
PermFast(order::Ordering, data::AbstractVector)
`Ordering` on the indices of `data` where `i` is less than `j` if `data[i]` is
less than `data[j]` according to `order`. In the case that `data[i]` and
`data[j]` are equal, the ordering is undefined. Thus, it is designed to be
faster than `Perm` when a stable sorting algorithm is used.
"""
struct PermFast{O<:Ordering,V<:AbstractVector} <: Ordering
order::O
data::V
end

ReverseOrdering(by::By) = By(by.by, ReverseOrdering(by.order))
ReverseOrdering(perm::Perm) = Perm(ReverseOrdering(perm.order), perm.data)
ReverseOrdering(perm::PermFast) = PermFast(ReverseOrdering(perm.order), perm.data)

"""
lt(o::Ordering, a, b)
Expand All @@ -125,6 +138,12 @@ lt(o::Lt, a, b) = o.lt(a,b)
(lt(p.order, da, db)::Bool) | (!(lt(p.order, db, da)::Bool) & (a < b))
end

@propagate_inbounds function lt(p::PermFast, a::Integer, b::Integer)
da = p.data[a]
db = p.data[b]
lt(p.order, da, db)::Bool
end

_ord(lt::typeof(isless), by::typeof(identity), order::Ordering) = order
_ord(lt::typeof(isless), by, order::Ordering) = By(by, order)

Expand Down
82 changes: 68 additions & 14 deletions base/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -560,13 +560,49 @@ elements that are not
@inline send_to_end!(f::F, v::AbstractVector, ::ReverseOrdering, end_stable=false; lo, hi) where F <: Function =
end_stable ? (send_to_end!(!f, v; lo, hi)+1, hi) : (hi-send_to_end!(f, view(v, hi:-1:lo))+1, hi)

"""
send_to_end_stable!(f::Function, v::AbstractVector; [lo, hi])

Send every element of `v` for which `f` returns `true` to the end of the vector `out` and return
the index of the last element which for which `f` returns `false`.

`send_to_end_stable!(f, v, out, lo, hi)` is equivalent to `send_to_end_stable!(f, view(v, lo:hi), view(out, lo:hi))+lo-1`

Preserves the order of the elements.
"""
function send_to_end_stable!(f::F, v::AbstractVector, out::AbstractVector; lo=firstindex(v), hi=lastindex(v)) where F <: Function
offset = 0
@inbounds begin
while lo <= hi
x = v[lo]
fx = f(x)::Bool
out[(fx ? hi : lo) - offset] = x
offset += fx
lo += 1
end
end

# This is similar to the partition function
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to have code re-use; the loop here is almost exactly the same as the loops in partition!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice. However, partition! left a pivot on the place and split to two parts with. Thus it seems challenging.

pivot_index = lo-offset-1
# out[<=pivot_index] <* f(x) = false
# out[>pivot_index] >* f(x) = true

# Make the results stable
reverse!(out, pivot_index+1, hi)
return pivot_index
end

@inline send_to_end_stable!(f::F, v::AbstractVector, out::AbstractVector, ::ForwardOrdering; lo, hi) where F <: Function =
(lo, send_to_end_stable!(f, v, out; lo, hi))
@inline send_to_end_stable!(f::F, v::AbstractVector, out::AbstractVector, ::ReverseOrdering; lo, hi) where F <: Function =
(hi-send_to_end_stable!(f, view(v, hi:-1:lo), view(out, hi:-1:lo))+1, hi)

function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw)
@getkw lo hi
if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering
lo, hi = send_to_end!(ismissing, v, o; lo, hi)
_sort!(WithoutMissingVector(v, unsafe=true), a.next, o, (;kw..., lo, hi))
elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering &&
elseif eltype(v) <: Integer && (o isa Perm || o isa PermT) && o.order isa DirectOrdering &&
nonmissingtype(eltype(o.data)) != eltype(o.data) &&
all(i === j for (i,j) in zip(v, eachindex(o.data)))
# TODO make this branch known at compile time
Expand All @@ -590,7 +626,8 @@ function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw)
hi = hi_i
end

_sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)), (;kw..., lo, hi))
PermT = o isa PermFast ? PermFast : Perm
_sort!(v, a.next, PermT(o.order, WithoutMissingVector(o.data, unsafe=true)), (;kw..., lo, hi))
else
_sort!(v, a.next, o, kw)
end
Expand Down Expand Up @@ -618,26 +655,39 @@ after_zero(::ForwardOrdering, x) = !signbit(x)
after_zero(::ReverseOrdering, x) = signbit(x)
is_concrete_IEEEFloat(T::Type) = T <: Base.IEEEFloat && isconcretetype(T)
function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering, kw)
@getkw lo hi
@getkw lo hi scratch
if is_concrete_IEEEFloat(eltype(v)) && o isa DirectOrdering
lo, hi = send_to_end!(isnan, v, o, true; lo, hi)
iv = reinterpret(UIntType(eltype(v)), v)
j = send_to_end!(x -> after_zero(o, x), v; lo, hi)
scratch = _sort!(iv, a.next, Reverse, (;kw..., lo, hi=j))
if scratch === nothing # Union split
_sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch))
_sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, nothing))
else
_sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LilithHafner Btw, is scratch type-stable here, i.e., can compiler infer that scratch cannot be Nothing?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recall that removing the if statement entirely introduced dynamic dispatch. IICU, all of type inference is an implementation detail, so I'm not totally sure, but I believe that we need an if statement to force union splitting though it works well whether we use _sort!(..., nothing) or _sort!(..., scratch) because the compiler can determine the type of scratch at compile time..

end
elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data))
lo, hi = send_to_end!(i -> isnan(@inbounds o.data[i]), v, o.order, true; lo, hi)
elseif eltype(v) <: Integer && (o isa Perm || o isa PermFast) && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data))
if o isa Perm
petvana marked this conversation as resolved.
Show resolved Hide resolved
lo, hi = send_to_end!(i -> isnan(@inbounds o.data[i]), v, o.order, true; lo, hi)
j = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v; lo, hi)
PermT = Perm
kw = (;kw..., lo, hi=j)
else
scratch, t = make_scratch(scratch, eltype(v), hi-lo+1)
lo2, hi2 = send_to_end_stable!(i -> isnan(@inbounds o.data[i]), v, scratch, o.order; lo, hi)
ran = lo < lo2 ? (lo:lo2-1) : (hi2+1:hi)
v[ran] = view(scratch, ran)
lo, hi = lo2, hi2
j = send_to_end_stable!(i -> after_zero(o.order, @inbounds o.data[i]), scratch, v; lo, hi)
PermT = PermFast
kw = (;kw..., lo, hi=j, scratch=scratch)
end
ip = reinterpret(UIntType(eltype(o.data)), o.data)
j = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v; lo, hi)
scratch = _sort!(v, a.next, Perm(Reverse, ip), (;kw..., lo, hi=j))
scratch = _sort!(v, a.next, Perm(Reverse, ip), kw)
if scratch === nothing # Union split
_sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch))
_sort!(v, a.next, PermT(Forward, ip), (;kw..., lo=j+1, hi, nothing))
else
_sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch))
_sort!(v, a.next, PermT(Forward, ip), (;kw..., lo=j+1, hi, scratch))
end
else
_sort!(v, a.next, o, kw)
Expand Down Expand Up @@ -1579,7 +1629,11 @@ function _sortperm(A::AbstractArray; alg, order, scratch, dims...)
end
end
ix = copymutable(LinearIndices(A))
sort!(ix; alg, order = Perm(order, vec(A)), scratch, dims...)
if alg == DEFAULT_STABLE
sort!(ix; alg, order = PermFast(order, vec(A)), scratch, dims...)
else
sort!(ix; alg, order = Perm(order, vec(A)), scratch, dims...)
end
end


Expand Down Expand Up @@ -1636,11 +1690,11 @@ julia> sortperm!(p, A; dims=2); p
if !initialized
ix .= LinearIndices(A)
end

PermT = alg == DEFAULT_STABLE ? PermFast : Perm
if rev === true
sort!(ix; alg, order=Perm(ord(lt, by, true, order), vec(A)), scratch, dims...)
sort!(ix; alg, order = PermT(ord(lt, by, rev, order), vec(A)), scratch, dims...)
else
sort!(ix; alg, order=Perm(ord(lt, by, nothing, order), vec(A)), scratch, dims...)
sort!(ix; alg, order = PermT(ord(lt, by, nothing, order), vec(A)), scratch, dims...)
end
end

Expand Down