From 5dfd6c959fb0f08e4fa9dd838d9b480d37ca8998 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner <60898866+LilithHafner@users.noreply.github.com> Date: Sat, 2 Apr 2022 21:26:38 -0500 Subject: [PATCH] Add radix sort (#44230) * Add radix sort --- base/sort.jl | 351 ++++++++++++++++++++++++++++++++++++++++++++---- test/sorting.jl | 157 ++++++++++++++++++++-- 2 files changed, 468 insertions(+), 40 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 981eea35d96ab..c8cacb9770c54 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -10,8 +10,8 @@ using .Base: copymutable, LinearIndices, length, (:), iterate, AbstractVector, @inbounds, AbstractRange, @eval, @inline, Vector, @noinline, AbstractMatrix, AbstractUnitRange, isless, identity, eltype, >, <, <=, >=, |, +, -, *, !, extrema, sub_with_overflow, add_with_overflow, oneunit, div, getindex, setindex!, - length, resize!, fill, Missing, require_one_based_indexing, keytype, - UnitRange, max, min + length, resize!, fill, Missing, require_one_based_indexing, keytype, UnitRange, + min, max, reinterpret, signed, unsigned, Signed, Unsigned, typemin, xor, Type, BitSigned using .Base: >>>, !== @@ -426,6 +426,22 @@ struct InsertionSortAlg <: Algorithm end struct QuickSortAlg <: Algorithm end struct MergeSortAlg <: Algorithm end +""" + AdaptiveSort(fallback) + +Indicate that a sorting function should use the fastest available algorithm. + +Adaptive sort will use the algorithm specified by `fallback` for types and orders that are +not [`UIntMappable`](@ref). Otherwise, it will typically use: + * Insertion sort for short vectors + * Radix sort for long vectors + * Counting sort for vectors of integers spanning a short range + +Adaptive sort is guaranteed to be stable if the fallback algorithm is stable. +""" +struct AdaptiveSort{Fallback <: Algorithm} <: Algorithm + fallback::Fallback +end """ PartialQuickSort{T <: Union{Integer,OrdinalRange}} @@ -451,7 +467,7 @@ end Indicate that a sorting function should use the insertion sort algorithm. Insertion sort traverses the collection one element at a time, inserting each element into its correct, sorted position in -the output list. +the output vector. Characteristics: * *stable*: preserves the ordering of elements which @@ -495,8 +511,8 @@ Characteristics: """ const MergeSort = MergeSortAlg() -const DEFAULT_UNSTABLE = QuickSort -const DEFAULT_STABLE = MergeSort +const DEFAULT_UNSTABLE = AdaptiveSort(QuickSort) +const DEFAULT_STABLE = AdaptiveSort(MergeSort) const SMALL_ALGORITHM = InsertionSort const SMALL_THRESHOLD = 20 @@ -652,13 +668,202 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort, return v end +# This is a stable least significant bit first radix sort. +# +# That is, it first sorts the entire vector by the last chunk_size bits, then by the second +# to last chunk_size bits, and so on. Stability means that it will not reorder two elements +# that compare equal. This is essential so that the order introduced by earlier, +# less significant passes is preserved by later passes. +# +# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it +# * counts the number of entries that fall into each bucket +# * uses those counts to compute the indices to move elements of those buckets into +# * moves elements into the computed indices in the swap array +# * switches the swap and working array +# +# In the case of an odd number of passes, the returned vector will === the input vector t, +# not v. This is one of the many reasons radix_sort! is not exported. +function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned, + t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned + # bits is unsigned for performance reasons. + mask = UInt(1) << chunk_size - 0x1 + counts = Vector{UInt}(undef, mask+2) + + @inbounds for shift in 0:chunk_size:bits-1 + + # counts[2:mask+2] will store the number of elements that fall into each bucket. + # if chunk_size = 8, counts[2] is bucket 0x00 and counts[257] is bucket 0xff. + counts .= 0 + for k in lo:hi + x = v[k] # lookup the element + i = (x >> shift)&mask + 2 # compute its bucket's index for this pass + counts[i] += 1 # increment that bucket's count + end + + counts[1] = lo # set target index for the first bucket + cumsum!(counts, counts) # set target indices for subsequent buckets + # counts[1:mask+1] now stores indices where the first member of each bucket + # belongs, not the number of elements in each bucket. We will put the first element + # of bucket 0x00 in t[counts[1]], the next element of bucket 0x00 in t[counts[1]+1], + # and the last element of bucket 0x00 in t[counts[2]-1]. + + for k in lo:hi + x = v[k] # lookup the element + i = (x >> shift)&mask + 1 # compute its bucket's index for this pass + j = counts[i] # lookup the target index + t[j] = x # put the element where it belongs + counts[i] = j + 1 # increment the target index for the next + end # ↳ element in this bucket + + v, t = t, v # swap the now sorted destination vector t back into primary vector v + + end + + v +end +function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned) + # chunk_size is the number of bits to radix over at once. + # We need to allocate an array of size 2^chunk size, and on the other hand the higher + # the chunk size the fewer passes we need. Theoretically, chunk size should be based on + # the Lambert W function applied to length. Empirically, we use this heuristic: + guess = min(10, log(maybe_unsigned(hi-lo))*3/4+3) + # TODO the maximum chunk size should be based on archetecture cache size. + + # We need iterations * chunk size ≥ bits, and these cld's + # make an effort to get iterations * chunk size ≈ bits + UInt8(cld(bits, cld(bits, guess))) +end + +# For AbstractVector{Bool}, counting sort is always best. +# This is an implementation of counting sort specialized for Bools. +function sort!(v::AbstractVector{<:Bool}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering) + first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v + count = 0 + @inbounds for i in lo:hi + if v[i] == first + count += 1 + end + end + @inbounds v[lo:lo+count-1] .= first + @inbounds v[lo+count:hi] .= !first + v +end + +maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt +maybe_unsigned(x::BitSigned) = unsigned(x) +function _extrema(v::AbstractArray, lo::Integer, hi::Integer, o::Ordering) + mn = mx = v[lo] + @inbounds for i in (lo+1):hi + vi = v[i] + lt(o, vi, mn) && (mn = vi) + lt(o, mx, vi) && (mx = vi) + end + mn, mx +end +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering) + # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range! + # so we skip straight to the fallback algorithm which is comparison based. + U = UIntMappable(eltype(v), o) + U === nothing && return sort!(v, lo, hi, a.fallback, o) + + # to avoid introducing excessive detection costs for the trivial sorting problem + # and to avoid overflow, we check for small inputs before any other runtime checks + hi <= lo && return v + lenm1 = maybe_unsigned(hi-lo) # adding 1 would risk overflow + # only count sort on a short range can compete with insertion sort when lenm1 < 40 + # and the optimization is not worth the detection cost, so we use insertion sort. + lenm1 < 40 && return sort!(v, lo, hi, SMALL_ALGORITHM, o) + + # For most arrays, a presorted check is cheap (overhead < 5%) and for most large + # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted + # input and this guarantees presorted input will always be efficiently handled + issorted(view(v, lo:hi), o) && return v + + # For large arrays, a reverse-sorted check is essentially free (overhead < 1%) + if lenm1 >= 500 && issorted(view(v, lo:hi), ReverseOrdering(o)) + reverse!(view(v, lo:hi)) + return v + end + + # UInt128 does not support fast bit shifting so we never + # dispatch to radix sort but we may still perform count sort + if sizeof(U) > 8 + if eltype(v) <: Integer && o isa DirectOrdering + v_min, v_max = _extrema(v, lo, hi, Forward) + v_range = maybe_unsigned(v_max-v_min) + v_range == 0 && return v # all same + + # we know lenm1 ≥ 40, so this will never underflow. + # if lenm1 > 3.7e18 (59 exabytes), then this may incorrectly dispatch to fallback + if v_range < 5lenm1-100 # count sort will outperform comparison sort if v's range is small + return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi) + end + end + return sort!(v, lo, hi, a.fallback, o) + end + + v_min, v_max = _extrema(v, lo, hi, o) + lt(o, v_min, v_max) || return v # all same + if eltype(v) <: Integer && o isa DirectOrdering + R = o === Reverse + v_range = maybe_unsigned(R ? v_min-v_max : v_max-v_min) + if v_range < div(lenm1, 2) # count sort will be superior if v's range is very small + return sort_int_range!(v, Int(v_range+1), R ? v_max : v_min, R ? reverse : identity, lo, hi) + end + end + + u_min, u_max = uint_map(v_min, o), uint_map(v_max, o) + u_range = maybe_unsigned(u_max-u_min) + if u_range < div(lenm1, 2) # count sort will be superior if u's range is very small + u = uint_map!(v, lo, hi, o) + sort_int_range!(u, Int(u_range+1), u_min, identity, lo, hi) + return uint_unmap!(v, u, lo, hi, o) + end + + # if u's range is small, then once we subtract out v_min, we'll get a vector like + # UInt16[0x001a, 0x0015, 0x0006, 0x001b, 0x0008, 0x000c, 0x0001, 0x000e, 0x001c, 0x0009] + # where we only need to radix over the last few bits (5, in the example). + bits = unsigned(8sizeof(u_range) - leading_zeros(u_range)) + + # radix sort runs in O(bits * lenm1), insertion sort runs in O(lenm1^2). Radix sort + # has a constant factor that is three times higher, so radix runtime is 3bits * lenm1 + # and insertion runtime is lenm1^2. Empirically, insertion is faster than radix iff + # lenm1 < 3bits. + # Insertion < Radix + # lenm1^2 < 3 * bits * lenm1 + # lenm1 < 3bits + if lenm1 < 3bits + # at lenm1 = 64*3-1, QuickSort is about 20% faster than InsertionSort. + alg = a.fallback === QuickSort && lenm1 > 120 ? QuickSort : SMALL_ALGORITHM + return sort!(v, lo, hi, alg, o) + end + + # At this point, we are committed to radix sort. + u = uint_map!(v, lo, hi, o) + + # we subtract u_min to avoid radixing over unnecessary bits. For example, + # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002] + # which uses all 32 bits, but once we subtract u_min = 0x7fffffff, we are left with + # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and + # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4] + # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits. + # the overhead for this subtraction is small enough that it is worthwhile in many cases. + + # this is faster than u[lo:hi] .-= u_min as of v1.9.0-DEV.100 + @inbounds for i in lo:hi + u[i] -= u_min + end + + u2 = radix_sort!(u, lo, hi, bits, similar(u)) + uint_unmap!(v, u2, lo, hi, o, u_min) +end ## generic sorting methods ## defalg(v::AbstractArray) = DEFAULT_STABLE defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE -defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE -defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE +defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation +defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation function sort!(v::AbstractVector, alg::Algorithm, order::Ordering) inds = axes(v,1) @@ -711,31 +916,20 @@ function sort!(v::AbstractVector; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) - ordr = ord(lt,by,rev,order) - if (ordr === Forward || ordr === Reverse) && eltype(v)<:Integer - n = length(v) - if n > 1 - min, max = extrema(v) - (diff, o1) = sub_with_overflow(max, min) - (rangelen, o2) = add_with_overflow(diff, oneunit(diff)) - if !o1 && !o2 && rangelen < div(n,2) - return sort_int_range!(v, rangelen, min, ordr === Reverse ? reverse : identity) - end - end - end - sort!(v, alg, ordr) + sort!(v, alg, ord(lt,by,rev,order)) end # sort! for vectors of few unique integers -function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse) +function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse, + lo=firstindex(x), hi=lastindex(x)) offs = 1 - minval counts = fill(0, rangelen) - @inbounds for i = eachindex(x) + @inbounds for i = lo:hi counts[x[i] + offs] += 1 end - idx = firstindex(x) + idx = lo @inbounds for i = maybereverse(1:rangelen) lastidx = idx + counts[i] - 1 val = i-offs @@ -1109,15 +1303,104 @@ function sort!(A::AbstractArray; A end + +## uint mapping to allow radix sorting primitives other than UInts ## + +""" + UIntMappable(T::Type, order::Ordering) + +Return `typeof(uint_map(x::T, order))` if [`uint_map`](@ref) and +[`uint_unmap`](@ref) are implemented. + +If either is not implemented, return `nothing`. +""" +UIntMappable(T::Type, order::Ordering) = nothing + +""" + uint_map(x, order::Ordering)::Unsigned + +Map `x` to an un unsigned integer, maintaining sort order. + +The map should be reversible with [`uint_unmap`](@ref), so `isless(order, a, b)` must be +a linear ordering for `a, b <: typeof(x)`. Satisfies +`isless(order, a, b) === (uint_map(a, order) < uint_map(b, order))` +and `x === uint_unmap(typeof(x), uint_map(x, order), order)` + +See also: [`UIntMappable`](@ref) [`uint_unmap`](@ref) +""" +function uint_map end + +""" + uint_unmap(T::Type, u::Unsigned, order::Ordering) + +Reconstruct the unique value `x::T` that uint_maps to `u`. Satisfies +`x === uint_unmap(T, uint_map(x::T, order), order)` for all `x <: T`. + +See also: [`uint_map`](@ref) [`UIntMappable`](@ref) +""" +function uint_unmap end + + +### Primitive Types + +# Integers +uint_map(x::Unsigned, ::ForwardOrdering) = x +uint_unmap(::Type{T}, u::T, ::ForwardOrdering) where T <: Unsigned = u + +uint_map(x::Signed, ::ForwardOrdering) = + unsigned(xor(x, typemin(x))) +uint_unmap(::Type{T}, u::Unsigned, ::ForwardOrdering) where T <: Signed = + xor(signed(u), typemin(T)) + +# unsigned(Int) is not available during bootstrapping. +for (U, S) in [(UInt8, Int8), (UInt16, Int16), (UInt32, Int32), (UInt64, Int64), (UInt128, Int128)] + @eval UIntMappable(::Type{<:Union{$U, $S}}, ::ForwardOrdering) = $U +end + +# Floats are not UIntMappable under regular orderings because they fail on NaN edge cases. +# uint mappings for floats are defined in Float, where the Left and Right orderings +# guarantee that there are no NaN values + +# Chars +uint_map(x::Char, ::ForwardOrdering) = reinterpret(UInt32, x) +uint_unmap(::Type{Char}, u::UInt32, ::ForwardOrdering) = reinterpret(Char, u) +UIntMappable(::Type{Char}, ::ForwardOrdering) = UInt32 + +### Reverse orderings +uint_map(x, rev::ReverseOrdering) = ~uint_map(x, rev.fwd) +uint_unmap(T::Type, u::Unsigned, rev::ReverseOrdering) = uint_unmap(T, ~u, rev.fwd) +UIntMappable(T::Type, order::ReverseOrdering) = UIntMappable(T, order.fwd) + + +### Vectors + +# Convert v to unsigned integers in place, maintaining sort order. +function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering) + u = reinterpret(UIntMappable(eltype(v), order), v) + @inbounds for i in lo:hi + u[i] = uint_map(v[i], order) + end + u +end + +function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::Integer, + order::Ordering, offset::U=zero(U)) where U <: Unsigned + @inbounds for i in lo:hi + v[i] = uint_unmap(eltype(v), u[i]+offset, order) + end + v +end + + ## fast clever sorting for floats ## module Float using ..Sort using ...Order -using ..Base: @inbounds, AbstractVector, Vector, last, axes, Missing +using ..Base: @inbounds, AbstractVector, Vector, last, axes, Missing, Type, reinterpret import Core.Intrinsics: slt_int -import ..Sort: sort! +import ..Sort: sort!, UIntMappable, uint_map, uint_unmap import ...Order: lt, DirectOrdering const Floats = Union{Float32,Float64} @@ -1140,6 +1423,18 @@ right(o::Perm) = Perm(right(o.order), o.data) lt(::Left, x::T, y::T) where {T<:Floats} = slt_int(y, x) lt(::Right, x::T, y::T) where {T<:Floats} = slt_int(x, y) +uint_map(x::Float32, ::Left) = ~reinterpret(UInt32, x) +uint_unmap(::Type{Float32}, u::UInt32, ::Left) = reinterpret(Float32, ~u) +uint_map(x::Float32, ::Right) = reinterpret(UInt32, x) +uint_unmap(::Type{Float32}, u::UInt32, ::Right) = reinterpret(Float32, u) +UIntMappable(::Type{Float32}, ::Union{Left, Right}) = UInt32 + +uint_map(x::Float64, ::Left) = ~reinterpret(UInt64, x) +uint_unmap(::Type{Float64}, u::UInt64, ::Left) = reinterpret(Float64, ~u) +uint_map(x::Float64, ::Right) = reinterpret(UInt64, x) +uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u) +UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64 + isnan(o::DirectOrdering, x::Floats) = (x!=x) isnan(o::DirectOrdering, x::Missing) = false isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i]) @@ -1221,6 +1516,10 @@ issignleft(o::ReverseOrdering, x::Floats) = lt(o, x, -zero(x)) issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i]) function fpsort!(v::AbstractVector, a::Algorithm, o::Ordering) + # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn). + # The overhead is O(n). For n < 10, it's not worth it. + length(v) < 10 && return sort!(v, first(axes(v,1)), last(axes(v,1)), SMALL_ALGORITHM, o) + i, j = lo, hi = specials2end!(v,a,o) @inbounds while true while i <= j && issignleft(o,v[i]); i += 1; end @@ -1240,7 +1539,7 @@ fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) = sort!(v::FPSortable, a::Algorithm, o::DirectOrdering) = fpsort!(v, a, o) -sort!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable}) = +sort!(v::AbstractVector{<:Union{Signed, Unsigned}}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable}) = fpsort!(v, a, o) end # module Sort.Float diff --git a/test/sorting.jl b/test/sorting.jl index 86479eca6cc78..2cb4eec93b380 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -265,7 +265,8 @@ Base.step(r::ConstantRange) = 0 @test searchsortedlast(r, UInt(1), Forward) == 5 a = rand(1:10000, 1000) - for alg in [InsertionSort, MergeSort] + for alg in [InsertionSort, MergeSort, Base.DEFAULT_STABLE] + b = sort(a, alg=alg) @test issorted(b) @@ -330,15 +331,17 @@ Base.step(r::ConstantRange) = 0 end @testset "unstable algorithms" begin - b = sort(a, alg=QuickSort) - @test issorted(b) - @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)))) - b = sort(a, alg=QuickSort, rev=true) - @test issorted(b, rev=true) - @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), rev=true)) - b = sort(a, alg=QuickSort, by=x->1/x) - @test issorted(b, by=x->1/x) - @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), by=x->1/x)) + for alg in [QuickSort, Base.DEFAULT_UNSTABLE] + b = sort(a, alg=alg) + @test issorted(b) + @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)))) + b = sort(a, alg=alg, rev=true) + @test issorted(b, rev=true) + @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), rev=true)) + b = sort(a, alg=alg, by=x->1/x) + @test issorted(b, by=x->1/x) + @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), by=x->1/x)) + end end end @testset "insorted" begin @@ -464,7 +467,7 @@ end @test c == v # stable algorithms - for alg in [MergeSort] + for alg in [MergeSort, Base.DEFAULT_STABLE] p = sortperm(v, alg=alg, rev=rev) p2 = sortperm(float(v), alg=alg, rev=rev) @test p == p2 @@ -477,7 +480,7 @@ end end # unstable algorithms - for alg in [QuickSort, PartialQuickSort(1:n)] + for alg in [QuickSort, PartialQuickSort(1:n), Base.DEFAULT_UNSTABLE] p = sortperm(v, alg=alg, rev=rev) p2 = sortperm(float(v), alg=alg, rev=rev) @test p == p2 @@ -509,8 +512,9 @@ end v = randn_with_nans(n,0.1) # TODO: alg = PartialQuickSort(n) fails here - for alg in [InsertionSort, QuickSort, MergeSort], + for alg in [InsertionSort, QuickSort, MergeSort, Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE], rev in [false,true] + alg === InsertionSort && n >= 3000 && continue # test float sorting with NaNs s = sort(v, alg=alg, rev=rev) @test issorted(s, rev=rev) @@ -570,7 +574,7 @@ end @test all(issorted, [sp[inds.==x] for x in 1:200]) end - for alg in [InsertionSort, MergeSort] + for alg in [InsertionSort, MergeSort, Base.DEFAULT_STABLE] sp = sortperm(inds, alg=alg) @test all(issorted, [sp[inds.==x] for x in 1:200]) end @@ -685,4 +689,129 @@ end @test searchsortedlast(o, 1.5) == -1 end +function adaptive_sort_test(v; trusted=InsertionSort, kw...) + sm = sum(hash.(v)) + truth = sort!(deepcopy(v); alg=trusted, kw...) + return ( + v === sort!(v; kw...) && + issorted(v; kw...) && + sum(hash.(v)) == sm && + all(v .=== truth)) +end +@testset "AdaptiveSort" begin + len = 70 + + @testset "Bool" begin + @test sort([false, true, false]) == [false, false, true] + @test sort([false, true, false], by=x->0) == [false, true, false] + @test sort([false, true, false], rev=true) == [true, false, false] + end + + @testset "fallback" begin + @test adaptive_sort_test(rand(1:typemax(Int32), len), by=x->x^2)# fallback + @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=QuickSort) + end + + @test adaptive_sort_test(rand(Int, 20)) # InsertionSort + + @testset "large eltype" begin + for rev in [true, false] + @test adaptive_sort_test(rand(Int128, len), rev=rev) # direct ordered int + @test adaptive_sort_test(fill(rand(UInt128), len), rev=rev) # all same + @test adaptive_sort_test(rand(Int128.(1:len), len), rev=rev) # short int range + end + end + + @test adaptive_sort_test(fill(rand(), len)) # All same + + @testset "count sort" begin + @test adaptive_sort_test(rand(1:20, len)) + @test adaptive_sort_test(rand(1:20, len), rev=true) + end + + @testset "post-serialization count sort" begin + v = reinterpret(Float64, rand(1:20, len)) + @test adaptive_sort_test(copy(v)) + @test adaptive_sort_test(copy(v), rev=true) + end + + @testset "presorted" begin + @test adaptive_sort_test(sort!(rand(len))) + @test adaptive_sort_test(sort!(rand(Float32, len), rev=true)) + @test adaptive_sort_test(vcat(sort!(rand(Int16, len)), Int16(0))) + @test adaptive_sort_test(vcat(sort!(rand(UInt64, len), rev=true), 0)) + end + + @testset "lenm1 < 3bits fallback" begin + @test adaptive_sort_test(rand(len)) # InsertionSort + @test adaptive_sort_test(rand(130)) # QuickSort + end + + @test adaptive_sort_test(rand(1000)) # RadixSort +end + +@testset "uint mappings" begin + + #Construct value lists + floats = [T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN, + prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))] + for T in [Float16, Float32, Float64]] + + ints = [T[17, -T(17), 0, -one(T), 1, typemax(T), typemin(T), typemax(T)-1, typemin(T)+1] + for T in Base.BitInteger_types] + + char = Char['\n', ' ', Char(0), Char(8), Char(17), typemax(Char)] + + vals = vcat(floats, ints, [char]) + + #Add random values + UIntN(::Val{1}) = UInt8 + UIntN(::Val{2}) = UInt16 + UIntN(::Val{4}) = UInt32 + UIntN(::Val{8}) = UInt64 + UIntN(::Val{16}) = UInt128 + map(vals) do x + T = eltype(x) + U = UIntN(Val(sizeof(T))) + append!(x, rand(T, 4)) + append!(x, reinterpret.(T, rand(U, 4))) + if T <: AbstractFloat + mask = reinterpret(U, T(NaN)) + append!(x, reinterpret.(T, mask .| rand(U, 4))) + end + end + + for x in vals + T = eltype(x) + U = UIntN(Val(sizeof(T))) + for order in [Forward, Reverse, Base.Sort.Float.Left(), Base.Sort.Float.Right(), By(Forward, identity)] + if order isa Base.Order.By || T === Float16 || + ((T <: AbstractFloat) == (order isa DirectOrdering)) + @test Base.Sort.UIntMappable(T, order) === nothing + continue + end + + @test Base.Sort.UIntMappable(T, order) === U + x2 = deepcopy(x) + u = Base.Sort.uint_map!(x2, 1, length(x), order) + @test eltype(u) === U + @test all(Base.Sort.uint_map.(x, (order,)) .=== u) + mn = rand(U) + u .-= mn + @test x2 === Base.Sort.uint_unmap!(x2, u, 1, length(x), order, mn) + @test all(x2 .=== x) + + for a in x + for b in x + if order === Base.Sort.Float.Left() || order === Base.Sort.Float.Right() + # Left and Right orderings guarantee homogeneous sign and no NaNs + (isnan(a) || isnan(b) || signbit(a) != signbit(b)) && continue + end + @test Base.Order.lt(order, a, b) === Base.Order.lt(Forward, Base.Sort.uint_map(a, order), Base.Sort.uint_map(b, order)) + end + end + end + end +end + end