Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

radix sort fallback to Base on small input #51

1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ version = "1.0.1"

[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"

[compat]
julia = "1"
Expand Down
32 changes: 29 additions & 3 deletions src/SortingAlgorithms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ __precompile__()
module SortingAlgorithms

using DataStructures
using OffsetArrays
using Base.Sort
using Base.Order

Expand Down Expand Up @@ -61,9 +62,34 @@ uint_mapping(o::Lt, x ) = error("uint_mapping does not work with general L
const RADIX_SIZE = 11
const RADIX_MASK = 0x7FF

function sort!(vs::AbstractVector, lo::Int, hi::Int, ::RadixSortAlg, o::Ordering, ts=similar(vs))
# Input checking
if lo >= hi; return vs; end
# Fallback to default algorithm for short vectors as radix sort is slower for them
# The threshold has been chosen because radix sort allocates an array of that size
# and validated by benchmarks
const RADIX_SMALL_THRESHOLD = 2^RADIX_SIZE


LilithHafner marked this conversation as resolved.
Show resolved Hide resolved
function sort!(vs::AbstractVector, lo::Int, hi::Int, a::RadixSortAlg, o::Ordering)
if hi <= lo
vs
elseif hi - lo < RADIX_SMALL_THRESHOLD
sort!(vs, lo, hi, Base.Sort.defalg(vs), o)
else
_sort!(vs, lo, hi, a, o, similar(vs, lo:hi))
end
end
function sort!(vs::AbstractVector{T}, lo::Int, hi::Int, a::RadixSortAlg, o::Ordering, ts::AbstractVector{T}) where T
if hi <= lo
vs
elseif hi - lo < RADIX_SMALL_THRESHOLD
checkbounds(ts, lo:hi) # Consistently throw an error for insufficient ts.
sort!(vs, lo, hi, Base.Sort.defalg(vs), o)
else
_sort!(vs, lo, hi, a, o, ts)
end
end
function _sort!(vs::AbstractVector{T}, lo::Int, hi::Int, a::RadixSortAlg, o::Ordering, ts::AbstractVector{T}) where T
checkbounds(vs, lo:hi)
checkbounds(ts, lo:hi)
Comment on lines +69 to +90
Copy link
Contributor

@nalimilan nalimilan Oct 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't you simplify all this by taking the following approach (not tested)?

Suggested change
function sort!(vs::AbstractVector, lo::Int, hi::Int, a::RadixSortAlg, o::Ordering)
if hi <= lo
vs
elseif hi - lo < RADIX_SMALL_THRESHOLD
sort!(vs, lo, hi, Base.Sort.defalg(vs), o)
else
_sort!(vs, lo, hi, a, o, similar(vs, lo:hi))
end
end
function sort!(vs::AbstractVector{T}, lo::Int, hi::Int, a::RadixSortAlg, o::Ordering, ts::AbstractVector{T}) where T
if hi <= lo
vs
elseif hi - lo < RADIX_SMALL_THRESHOLD
checkbounds(ts, lo:hi) # Consistently throw an error for insufficient ts.
sort!(vs, lo, hi, Base.Sort.defalg(vs), o)
else
_sort!(vs, lo, hi, a, o, ts)
end
end
function _sort!(vs::AbstractVector{T}, lo::Int, hi::Int, a::RadixSortAlg, o::Ordering, ts::AbstractVector{T}) where T
checkbounds(vs, lo:hi)
checkbounds(ts, lo:hi)
function sort!(vs::AbstractVector{T}, lo::Int, hi::Int, a::RadixSortAlg, o::Ordering,
ts::Union{AbstractVector{T}, Nothing}=nothing) where T
if hi <= lo
return vs
elseif hi - lo < RADIX_SMALL_THRESHOLD
return sort!(vs, lo, hi, Base.Sort.defalg(vs), o)
elseif ts === nothing
ts = similar(vs, lo:hi)
end
checkbounds(vs, lo:hi)
checkbounds(ts, lo:hi)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I didn't want to expose sort!(... ::Nothing) for potential compatibility issues, but if this is internal &/or we are okay with declaring sort!(... ::Nothing), then that would be simpler. Is it okay to expand the interface in this case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also put the type check before the fallback & add a ts boundscheck on the fallback if ts is provided to give consistent errors. (Not require users to test large arrays to ensure they are getting errors)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you think it's okay to expand the interface in this way, I'll make the changes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nalimilan, do you think it's okay to expand the interface in this way?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I think that's fine.


# Make sure we're sorting a bits type
T = Base.Order.ordtype(o, vs)
Expand Down
74 changes: 37 additions & 37 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,44 @@ using Test
using StatsBase
using Random

a = rand(1:10000, 1000)

for alg in [TimSort, HeapSort, RadixSort]
b = sort(a, alg=alg)
@test issorted(b)
ix = sortperm(a, alg=alg)
b = a[ix]
@test issorted(b)
@test a[ix] == b

b = sort(a, alg=alg, rev=true)
@test issorted(b, rev=true)
ix = sortperm(a, alg=alg, rev=true)
b = a[ix]
@test issorted(b, rev=true)
@test a[ix] == b

b = sort(a, alg=alg, by=x->1/x)
@test issorted(b, by=x->1/x)
ix = sortperm(a, alg=alg, by=x->1/x)
b = a[ix]
@test issorted(b, by=x->1/x)
@test a[ix] == b

c = copy(a)
permute!(c, ix)
@test c == b

invpermute!(c, ix)
@test c == a

if alg != RadixSort # RadixSort does not work with Lt orderings
c = sort(a, alg=alg, lt=(>))
for a in [rand(1:10000, 2200), rand(1:10000, 1000)]
for alg in [TimSort, HeapSort, RadixSort]
b = sort(a, alg=alg)
@test issorted(b)
ix = sortperm(a, alg=alg)
b = a[ix]
@test issorted(b)
@test a[ix] == b

b = sort(a, alg=alg, rev=true)
@test issorted(b, rev=true)
ix = sortperm(a, alg=alg, rev=true)
b = a[ix]
@test issorted(b, rev=true)
@test a[ix] == b

b = sort(a, alg=alg, by=x->1/x)
@test issorted(b, by=x->1/x)
ix = sortperm(a, alg=alg, by=x->1/x)
b = a[ix]
@test issorted(b, by=x->1/x)
@test a[ix] == b

c = copy(a)
permute!(c, ix)
@test c == b

invpermute!(c, ix)
@test c == a

if alg != RadixSort # RadixSort does not work with Lt orderings
c = sort(a, alg=alg, lt=(>))
@test b == c
end

c = sort(a, alg=alg, by=x->1/x)
@test b == c
end

c = sort(a, alg=alg, by=x->1/x)
@test b == c
end

randnans(n) = reinterpret(Float64,[rand(UInt64)|0x7ff8000000000000 for i=1:n])
Expand All @@ -54,7 +54,7 @@ end

Random.seed!(0xdeadbeef)

for n in [0:10..., 100, 101, 1000, 1001]
for n in [0:10..., 100, 101, 1000, 1001, 2200, 2201]
r = 1:10
v = rand(1:10,n)
h = fit(Histogram, v, r)
Expand Down