From 7457402e18721ba169d73204c2d02f768a7e0301 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 29 Oct 2022 11:51:35 +0600 Subject: [PATCH] Give each sorting pass and DEFAULT_STABLE a docstring --- base/sort.jl | 286 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 210 insertions(+), 76 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index db18d6c832a24..6cc4c1707098a 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -412,10 +412,15 @@ insorted(x, r::AbstractRange) = in(x, r) abstract type Algorithm end +""" + MissingOptimization(next) <: Algorithm -# -# Missing values always go at the end -# +Filter out missing values. + +Missing values are placed after other values according to `DirectOrdering`s. This pass puts +them there and passes on a view into the original vector that excludes the missing values. +This pass is triggered for both `sort([1, missing, 3])` and `sortperm([1, missing, 3])`. +""" struct MissingOptimization{T <: Algorithm} <: Algorithm next::T end @@ -496,10 +501,16 @@ function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering; end +""" + IEEEFloatOptimization(next) <: Algorithm -# -# fast clever sorting for floats -# +Move NaN values to the end, partition by sign, and reinterpret the rest as unsigned integers. + +IEEE floating point numbers (`Float64`, `Float32`, and `Float16`) compare the same as +unsigned integers with the bits with a few exceptions. This pass + +This pass is triggered for both `sort([1.0, NaN, 3.0])` and `sortperm([1.0, NaN, 3.0])`. +""" struct IEEEFloatOptimization{T <: Algorithm} <: Algorithm next::T end @@ -532,10 +543,14 @@ function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering; end +""" + BoolOptimization(next) <: Algorithm + +Sort `AbstractVector{Bool}`s using a specialized version of counting sort. -# For AbstractVector{Bool}, counting sort is always best. -# This is an implementation of counting sort specialized for Bools. -# Accepts unused buffer to avoid method ambiguity. +Accesses each element at most twice (one read and one write), and performs at most two +comparisons. +""" struct BoolOptimization{T <: Algorithm} <: Algorithm next::T end @@ -554,10 +569,15 @@ function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering; lo::In end +""" + IsUIntMappable(yes, no) <: Algorithm -# -# -# +Determines if the elements of a vector can be mapped to unsigned integers while preserving +their order under the specified ordering. + +If they can be, dispatch to the `yes` algorithm and record the unsigned integer type that +the elements may be mapped to. Otherwise dispatch to the `no` algorithm. +""" struct IsUIntMappable{T <: Algorithm, U <: Algorithm} <: Algorithm yes::T no::U @@ -572,10 +592,12 @@ function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering; end +""" + Small{N}(small=SMALL_ALGORITHM, big) <: Algorithm -# -# -# +Sort inputs with `length(lo:hi) <= N` using the `small` algorithm. Otherwise use the `big` +algorithm. +""" struct Small{N, T <: Algorithm, U <: Algorithm} <: Algorithm small::T big::U @@ -591,27 +613,21 @@ function _sort!(v::AbstractVector, a::Small{N}, o::Ordering; end - -# -# -# struct InsertionSortAlg <: Algorithm end - """ - InsertionSort + InseritonSort -Indicate that a sorting function should use the insertion sort algorithm. +Use the insertion sort algorithm. Insertion sort traverses the collection one element at a time, inserting each element into its correct, sorted position in the output vector. Characteristics: - * *stable*: preserves the ordering of elements which - compare equal (e.g. "a" and "A" in a sort of letters - which ignores case). - * *in-place* in memory. - * *quadratic performance* in the number of elements to be sorted: - it is well-suited to small collections but should not be used for large ones. +* *stable*: preserves the ordering of elements which compare equal +(e.g. "a" and "A" in a sort of letters which ignores case). +* *in-place* in memory. +* *quadratic performance* in the number of elements to be sorted: +it is well-suited to small collections but should not be used for large ones. """ const InsertionSort = InsertionSortAlg() const SMALL_ALGORITHM = InsertionSort @@ -635,24 +651,24 @@ function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering; end +""" + CheckSorted(next) <: Algorithm -# -# -# +Check if the input is already sorted and for large inputs, also check if it is +reverse-sorted. The reverse-sorted check is unstable. +""" struct CheckSorted{T <: Algorithm} <: Algorithm next::T end function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering; lo=firstindex(v), hi=lastindex(v), lenm1 = hi-lo, kw...) # For most arrays, a presorted check is cheap (overhead < 5%) and for most large - # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted - # input and this guarantees presorted input will always be efficiently handled + # arrays it is essentially free (<1%). _issorted(v, lo, hi, o) && return v - # For large arrays, a reverse-sorted check is essentially free (overhead < 1%) + # For most large arrays, a reverse-sorted check is essentially free (overhead < 1%) if lenm1 >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) - # If reversing is valid, do so. This does not violate stability - # because being UIntMappable implies a linear order. + # If reversing is valid, do so. This does violates stability. reverse!(v, lo, hi) return v end @@ -661,10 +677,14 @@ function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering; end +""" + ComputeExtrema(next) <: Algorithm -# -# Prerequisite: region to be sorted [lo, hi] is nonempty -# +Compute the extrema of the input under the provided order. + +If the minimum is no less than the maximum, then the input is already sorted. Otherwise, +dispatch to the `next` algorithm. +""" struct ComputeExtrema{T <: Algorithm} <: Algorithm next::T end @@ -684,10 +704,16 @@ function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering; end +""" + ConsiderCountingSort(counting=CountingSort(), next) <: Algorithm -# -# Consider counting sort -# +If the input's range is small enough, use the `counting` algorithm. Otherwise, dispatch to +the `next` algorithm. + +For most types, the threshold is if the range is shorter than half the length, but for types +larger than Int64, bitshifts are expensive and RadixSort is not viable, so the threshold is +much more generous. +""" struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm counting::T next::U @@ -707,10 +733,15 @@ end _sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering; kw...) = _sort!(v, a.next, o; kw...) +""" + CountingSort <: Algorithm -# -# Counting sort -# +Use the counting sort algorithm. + +`CountingSort` is an algorithm for sorting integers that runs in Θ(length + range) time and +space. It counts the number of occurrences of each value in the input and then iterates +through those counts repopulating the input with the values in sorted order. +""" struct CountingSort <: Algorithm end maybe_reverse(o::ForwardOrdering, x) = x maybe_reverse(o::ReverseOrdering, x) = reverse(x) @@ -738,10 +769,12 @@ function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering; end +""" + ConsiderRadixSort(radix=RadixSort(), next) <: Algorithm -# -# Consider radix sort -# +If the number of bits in the input's range is small enough and the input supports efficient +bitshifts, use the `radix` algorithm. Otherwise, dispatch to the `next` algorithm. +""" struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm radix::T next::U @@ -760,10 +793,27 @@ function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering; end +""" + RadixSort <: Algorithm -# -# Radix sort -# +Use the radix sort algorithm. + +`RadixSort` is a stable least significant bit first radix sort algorithm that runs in +`O(length * log(range))` time and linear space. + +It first sorts the entire vector by the last `chunk_size` bits, then by the second +to last `chunk_size` bits, and so on. Stability means that it will not reorder two elements +that compare equal. This is essential so that the order introduced by earlier, +less significant passes is preserved by later passes. + +Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, it + * counts the number of entries that fall into each bucket + * uses those counts to compute the indices to move elements of those buckets into + * moves elements into the computed indices in the swap array + * switches the swap and working array + +`chunk_size` is larger for larger inputs and determined by an empirical heuristic. +""" struct RadixSort <: Algorithm end function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering; lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, @@ -803,17 +853,13 @@ function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering; end - -# -# Quicksort -# """ - PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) + PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm Indicate that a sorting function should use the partial quick sort algorithm. -Partial quick sort finds and sorts the elements that would end up in positions -`lo:hi` using [`QuickSort`](@ref). +Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using +[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks Characteristics: * *stable*: preserves the ordering of elements which compare equal @@ -929,10 +975,15 @@ function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering; end +""" + StableCheckSorted(next) <: Algorithm -# -# StableCheckSorted -# +Check if an input is sorted and/or reverse-sorted. + +The definition of reverse-sorted is that for every pair of adjacent elements, the latter is +less than the former. This is stricter than `issorted(v, Reverse(o))` to avoid swapping pairs +of elements that compare equal. +""" struct StableCheckSorted{T<:Algorithm} <: Algorithm next::T end @@ -949,19 +1000,6 @@ function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering; end -# This is a stable least significant bit first radix sort. -# -# That is, it first sorts the entire vector by the last chunk_size bits, then by the second -# to last chunk_size bits, and so on. Stability means that it will not reorder two elements -# that compare equal. This is essential so that the order introduced by earlier, -# less significant passes is preserved by later passes. -# -# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it -# * counts the number of entries that fall into each bucket -# * uses those counts to compute the indices to move elements of those buckets into -# * moves elements into the computed indices in the swap array -# * switches the swap and working array -# # In the case of an odd number of passes, the returned vector will === the input vector t, # not v. This is one of the many reasons radix_sort! is not exported. function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned, @@ -1034,17 +1072,113 @@ function _issorted(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) true end + ## default sorting policy ## -InitialOptimizations(x) = MissingOptimization(BoolOptimization(Small{10}(IEEEFloatOptimization(x)))) +""" + InitialOptimizations(next) <: Algorithm + +Attempt to apply a suite of low-cost optimizations to the input vector before sorting. + +`InitialOptimizations` is an implementation detail and subject to change or removal in +future versions of Julia. + +If `next` is stable, then `InitialOptimizations(next)` is also stable. + +The specific optimizations attempted by `InitialOptimizations` are +[`MissingOptimization`](@ref), [`BoolOptimization`](@ref), dispatch to +[`InsertionSort`](@ref) for inputs with `length <= 10`, and [`IEEEFloatOptimization`](@ref). +""" +InitialOptimizations(next) = MissingOptimization(BoolOptimization(Small{10}(IEEEFloatOptimization(next)))) +""" + DEFAULT_STABLE + +The default sorting algorithm. + +This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare +equal). It makes an effort to be fast for most inputs. + +The algorithms used by `DEFAULT_STABLE` are an implementation detail. See extended help +for the current dispatch system. + +## Extended Help + +`DEFAULT_STABLE` is composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid +of Radix, Insertion, Counting, Quick sorts. + +We begin with MissingOptimization because it has no runtime cost when it is not +triggered and can enable other optimizations to be applied later. For example, +BoolOptimization cannot apply to an `AbstractVector{Union{Missing, Bool}}`, but after +[`MissingOptimization`](@ref) is applied, that input will be converted into am +`AbstractVector{Bool}`. + +We next apply [`BoolOptimization`](@ref) because it also has no runtime cost when it is not +triggered and when it is triggered, it is an incredibly efficient algorithm (sorting `Bool`s +is quite easy). + +Next, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`. This dispatch +occurs before the [`IEEEFloatOptimization`](@ref) pass because the +[`IEEEFloatOptimization`](@ref)s are not beneficial for very small inputs. + +To conclude the [`InitialOptimizations`](@ref), we apply [`IEEEFloatOptimization`](@ref). + +After these optimizations, we branch on whether radix sort and related algorithms can be +applied to the input vector and ordering. We conduct this branch by testing if +`UIntMappable(v, order) !== nothing`. That is, we see if we know of a reversible mapping +from `eltype(v)` to `UInt` that preserves the ordering `order`. We perform this check after +the initial optimizations because they can change the input vector's type and ordering to +make them `UIntMappable`. + +If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch +to [`QuickSort`](@ref). + +Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then +perform a presorted check ([`CheckSorted`](@ref)). + +We check for short inputs before performing the presorted check to avoid the overhead of the +check for small inputs. Because the alternate dispatch is to [`InseritonSort`](@ref) which +has efficient `O(n)` runtime on presorted inputs, the check is not necessary for small +inputs. + +We check if the input is reverse-sorted for long vectors (more than 500 elements) because +the check is essentially free unless the input is almost entirely reverse sorted. + +Note that once the input is determined to be [`UIntMappable`](@ref), we know the order forms +a [total order](wikipedia.org/wiki/Total_order) over the inputs and so it is impossible to +perform an unstable sort because no two elements can compare equal unless they _are_ equal, +in which case switching them is undetectable. We utilize this fact to perform a more +aggressive reverse sorted check that will reverse the vector `[3, 2, 2, 1]`. + +After these potential fast-paths are tried and failed, we [`ComputeExtrema`](@ref) of the +input. This computation has a fairly fast `O(n)` runtime, but we still try to delay it until +it is necessary. + +Next, we [`ConsiderCountingSort`](@ref). If the range the input is small compared to its +length, we apply [`CountingSort`](@ref). + +Next, we [`ConsiderRadixSort`](@ref). This is similar to the dispatch to counting sort, +but we conside rthe number of _bits_ in the range, rather than the range itself. +Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that reach this +stage. + +Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and +otherwise we dispatch to [`QuickSort`](@ref). +""" const DEFAULT_STABLE = InitialOptimizations(IsUIntMappable( Small{40}(CheckSorted(ComputeExtrema(ConsiderCountingSort(ConsiderRadixSort(Small{80}(QuickSort)))))), StableCheckSorted(QuickSort))) +""" + DEFAULT_UNSTABLE + +An efficient sorting algorithm. + +The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently +the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future. +""" const DEFAULT_UNSTABLE = DEFAULT_STABLE const SMALL_THRESHOLD = 20 - defalg(v::AbstractArray) = DEFAULT_STABLE defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation