-
Notifications
You must be signed in to change notification settings - Fork 0
/
quicksort.singeli
169 lines (149 loc) · 4.9 KB
/
quicksort.singeli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
include './partition'
include './median'
include './xorshift'
include './arith' # Logs and square roots
def base_cases{src:*T, dst:*T, aux:*T, n:U, aux_bytes:usize, min:T} = {
# Short array
if (n < 32) {
sort_lt32{dst, src, n}
return{}
}
# Distribution base cases
if (isint{T}) {
range := dist{min, dst->n}
if (U^~(range/4) < n and U^~range < aux_bytes/bytes{U}) {
# Always sort in place on dst
# Counting sort could have a different src/dst, but if src isn't
# dst then it's equal to aux, and we need that space
if (src != dst) set{dst, src, n}
count_sort{dst, u32<~n, *u32~~aux, min, u32<~range + 1}
return{}
}
if (width{T} == 32 and n <= (1<<16) and range < (1<<16)) {
radpack32(*u32~~src, *u32~~dst, u32<~n, *void~~aux, u32~~min)
return{}
}
}
}
# Robin Hood approval state
def RH_UNTRIED = 0
def RH_APPROVED = 2
def get_pivot{array:pT, n:U, getaux, sort, proc_pivots, rh_state} = {
# log_2 of minimum size for sampling
sl0:U = 8
# Output array and index
arr:=array; ind:U = 0
if (n <= 192) {
ind = locate_3_median{array, n}
} else if (rh_state!=RH_UNTRIED and n <= 1024) {
ind = locate_3of3_pseudomedian{array, n}
} else if (rh_state!=RH_UNTRIED and n <= 1 << (sl0 = 14)) {
ind = locate_5of3_pseudomedian{array, n}
} else {
aux := getaux{}
# gap is the expected distance between adjacent samples
# We'll get about n/gap samples
log2:U = floor_log2{n, sl0}
gap_min := 1 << (log2 / 2)
gap := sqrt_approx{n, gap_min}
# Collect samples with split xorshift and add to aux
aux1 := aux
def add{ind} = { aux1 <- array->ind; ++aux1 }
mask := gap_min - 1
def add3 = make_split_xorshift{tup{13,17,5}, n, mask, add}
i:U = 0; while (i < n - (mask + 2 * gap)) add3{i, gap}
ns := aux1 - aux
sort{aux, ns, *void~~aux1, ns*bytes{eltype{pT}}}
proc_pivots{aux, ns}
arr = aux
ind = ns / 2
}
arr -> ind
}
# Fluxsort recurrence: partition, handle right then left side
# src may be equal to dst or aux
def flux_recur{recur, tailcall, piv:T, src:*T, dst:*T, aux:*T, n:U, aux_bytes:usize, min:T, rh_state:u8} = {
# Partition: left side directly in dst with length l, includes pivots
l:U = 0
rsrc := aux
if (LIKELY{n < 1<<14}) {
l = flux_partition{src, <=, piv, dst, aux, n}
} else {
# Previous partitions must have used fulcrum too, so we haven't yet
# touched aux, and src==dst
l = fulcrum_partition{dst, <=, piv, aux, n} # (unstable)
rsrc = dst + l
}
r := n - l # Values on the right
m := l # Values not on the right
# Never recurse on a partition with more than this many values
most := n - n/16
# If most values end up on the left, they're probably mostly pivots
if (l > most) {
# Partition again with pivots on the right
# This bounds performance by O(k*n) for only k unique values
l = flux_partition{dst, <, piv, dst, aux+r, m}
set{dst+l, aux+r, m-l} # Should probably write a reverse partition to avoid this
}
# Sort the right-hand side, moving it from rsrc to dst
rdst := dst + m
if (r > most) { # Unbalanced
set{rdst, rsrc, r}
piposort{T}(rdst, r, rsrc)
} else {
recur{rsrc, rdst, aux, r, aux_bytes, piv, rh_state}
}
if (l > most) { # Unbalanced
piposort{T}(dst, l, aux)
return{}
}
# Left-hand side by tail call
src = dst
n = l
}
flux_loop{U, T}(src:*T, dst:*T, aux:*T, n:U, aux_bytes:usize, min:T, rh_state:u8) : void = {
while (u1~~1) {
base_cases{src, dst, aux, n, aux_bytes, min}
# Find pivot and check for RH sorting
def getaux{} = { a:=aux; if (a==src) a=dst; a }
def proc_pivots{pivots, num} = {
if (isint{T} and n<=1<<17 and rh_state!=RH_APPROVED) {
# 1 if tried and not approved, 2 for RH_APPROVED
rh_state = 1 + u8^~checkdist{pivots, num, min, n, dist{U, min, dst->n}}
}
}
def fn{gen} = bind{call, gen{U,T}}
piv := get_pivot{src, n, getaux, fn{flux_sort}, proc_pivots, rh_state}
if (isint{T} and rh_state == RH_APPROVED) {
try_rh{src, dst, n, aux, aux_bytes, min, dst->n, merge_chk, merge_from{merge_chk}}
}
# Then finish sorting; loop around for tail call
flux_recur{fn{flux_loop},1, piv, src,dst,aux,n,aux_bytes, min,rh_state}
}
}
flux_sort{U, T}(x:*T, n:U, aux:*void, aux_bytes:usize) : void = {
if (n <= 192) {
piposort{T}(x, n, *T~~aux)
return{}
}
# Find the minimum value and index of last maximum
def block = 1024
min := x->0; max := min
i:U = 0; imax:=i
do {
i0 := i
i += block; if (i > n) i = n
blm := x->i0
@for (x over j from i0 to i) {
if (x < min) min=x
if (x > blm) blm=x
}
if (blm >= max) { max=blm; imax=i; } # Save block index; refine later
} while (i < n)
if (min == max) return{}
do { --imax } while (x->imax < max)
x <-{imax} x->(n-1) # (unstable)
x <-{n-1} max
# Now sort
flux_loop{U,T}(x, x, *T~~aux, n-1, aux_bytes, min, 0)
}