-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdqsort.html
350 lines (330 loc) · 18.2 KB
/
pdqsort.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="author" content="F3real" />
<meta name="keywords" content="sorting,algorithm" />
<meta name="description" content="Quick intro to pdqsort" />
<title>Pdqsort - EnSec blog</title>
<link href="https://f3real.github.io/theme/css/combined.css" rel="stylesheet" />
<!-- Feeds -->
</head>
<body data-spy="scroll" data-target="#scrollspy">
<div id="wrapper">
<!-- Sidebar -->
<nav id="sidebar-wrapper-small" class="twitchy-background">
<ul id="accordion-small" class="sidebar-nav sidebar-nav-small">
<li>
<a href="https://f3real.github.io" title="EnSec blog" class="collapsed">
<span class="fas fa-home"></span>
</a>
</li>
<li class="nav-divider"></li>
<li>
<a href="https://f3real.github.io/archives.html" title="Recent Articles" class="collapsed">
<span class="fas fa-th-list"></span>
</a>
</li>
<li class="nav-divider"></li>
<li>
<a data-toggle="collapse" data-parent="#accordion-small" href="#collapse-social-small" title="Social" class="collapsed">
<i class="fas fa-users padding-small"></i>
</a>
</li>
<li class="panel anti-panel"><ul id="collapse-social-small" class="collapse ">
<li>
<a href="https://github.com/F3real" title="Github"><i class="fab fa-github-square padding-small"></i></a>
</li>
<li>
<a href="https://www.linkedin.com/in/stefan-ili%C4%87-61a004111" title="Linkedin"><i class="fab fa-linkedin padding-small"></i></a>
</li>
</ul></li>
<li class="nav-divider"></li>
<li>
<a href="#" title="Back to top" class="collapsed">
<span class="fas fa-arrow-up"></span>
</a>
</li>
</ul>
</nav>
<nav id="sidebar-wrapper" class="twitchy-background">
<ul id="accordion" class="sidebar-nav">
<li class="sidebar-brand">
<a href="https://f3real.github.io/">
<span class="fas fa-home padding-small"></span>
EnSec blog
</a>
</li>
<li>
<a href="https://f3real.github.io/archives.html">
<span class="fas fa-th-list padding-small"></span>
Archives
</a>
</li>
<li class="nav-divider"></li>
<li>
<a data-toggle="collapse" data-parent="#accordion" href="#collapse-social">
<i class="fas fa-users padding-small"></i>
Contact
</a>
</li>
<li class="panel anti-panel"><ul id="collapse-social" class="sidebar_submenu collapse ">
<li>
<a href="https://github.com/F3real" title="Github">
<i class="fab fa-github-square padding-small"></i>
Github
</a>
</li>
<li>
<a href="https://www.linkedin.com/in/stefan-ili%C4%87-61a004111" title="Linkedin">
<i class="fab fa-linkedin padding-small"></i>
Linkedin
</a>
</li>
</ul></li>
<li class="nav-divider"></li>
<li class="panel anti-panel"><ul id="collapse-pages" class="sidebar_submenu collapse ">
</ul></li>
<li class="nav-divider"></li>
<li>
<a data-toggle="collapse" data-parent="#accordion" href="#collapse-categories">
<i class="fas fa-folder-open padding-small"></i>
Categories
</a>
</li>
<li class="panel anti-panel"><ul id="collapse-categories" class="sidebar_submenu collapse ">
<li >
<a href="https://f3real.github.io/category/ctf.html">
<i class="fas fa-folder-open padding-small"></i>
ctf
<span class="badge badge-secondary float-right categorybadge">28</span>
</a>
</li>
<li class="active">
<a href="https://f3real.github.io/category/misc.html">
<i class="fas fa-folder-open padding-small"></i>
misc
<span class="badge badge-secondary float-right categorybadge">15</span>
</a>
</li>
<li >
<a href="https://f3real.github.io/category/reversing.html">
<i class="fas fa-folder-open padding-small"></i>
reversing
<span class="badge badge-secondary float-right categorybadge">6</span>
</a>
</li>
<li >
<a href="https://f3real.github.io/category/tutorial.html">
<i class="fas fa-folder-open padding-small"></i>
tutorial
<span class="badge badge-secondary float-right categorybadge">5</span>
</a>
</li>
</ul></li>
</ul>
</nav>
<!-- /#sidebar-wrapper -->
<!-- open/close sidebar -->
<button onclick="toggleMenu();return false;" class="btn btn-primary" id="menu-toggle">
<span id="right-arrow" class="fas fa-chevron-right" title="expand sidebar"></span>
<span id="left-arrow" class="fas fa-chevron-left" title="minimize sidebar"></span>
</button>
<!-- /open/close sidebar -->
<!-- Page Content -->
<div id="page-content-wrapper">
<div class="container-fluid">
<section id="content">
<article>
<div class="row">
<div class="col-lg-10">
<header class="page-header">
<h1>
<a href="https://f3real.github.io/pdqsort.html"
rel="bookmark"
title="Permalink to Pdqsort">
Pdqsort
</a>
<small>
<div class="post-info">
<div class="publish-info-block">
<small>
<span class="published">
<i class="fa fa-calendar padding-small"></i><time datetime="2019-05-22T10:01:00+02:00"> Wed 22 May 2019</time>
</span>
<span class="category">
<i class="fa fa-folder-open padding-small"></i><a href="https://f3real.github.io/category/misc.html">misc</a>
</span>
<span class="tags">
<i class="fa fa-tags padding-small"></i>
<a href="https://f3real.github.io/tag/sorting.html">sorting</a> / <a href="https://f3real.github.io/tag/algorithm.html">algorithm</a> </span>
</small>
</div>
</div><!-- /.post-info --> </small>
</h1>
</header>
</div>
</div>
<div class="row">
<div class="col-lg-10">
<div class="entry-content">
<p>Pdqsort (Pattern-defeating quicksort) is another interesting sorting algorithm, originally it was made as a replacement for C++ <code>std::sort</code>. It is also a relatively new algorithm, made around 2015. Pdqsort is implemented in boost and it is implemented in rust stdlib (<code>sort_unstable</code>).</p>
<div class="highlight"><pre><span></span><code>Best: O(n)
Average: O(n logn)
Worst: O(n logn)
Memory O(n)
</code></pre></div>
<p>Like timsort, pdqsort is also a hybrid algorithm. It uses insertion sort, heap sort and quicksort. Since it uses quicksort it is also unstable.</p>
<p>Let's take high-level overview of rust implementation:</p>
<p>The main function of the algorithm is <code>recurse</code>. It calculates pivot using the median of medians (or simple median of three if slice length is bellow 50). After that <code>recurse</code> splits/partitions slice in two, left side with elements smaller than pivot and right side bigger then pivot. </p>
<p>The function <code>recurse</code> is then recursively called on smaller of the two parts to reduce recursion depth while it keeps looping on the bigger part.</p>
<p>While all of this is happening, <code>recurse</code> also tracks the state of partitions:</p>
<ul>
<li>if they were balanced</li>
<li>if they were likely sorted</li>
<li>if slice was already partitioned</li>
<li>recursion depth</li>
</ul>
<p>If the partition wasn't balanced, the algorithm will attempt to break patterns by randomly swapping 4 elements. This is checked based on the index of the element in the middle (last element in the left partition) and the total length of the slice.</p>
<div class="highlight"><pre><span></span><code><span class="n">cmp</span>::<span class="n">min</span><span class="p">(</span><span class="n">mid</span><span class="p">,</span><span class="w"> </span><span class="n">len</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">mid</span><span class="p">)</span><span class="w"> </span><span class="o">>=</span><span class="w"> </span><span class="n">len</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="mi">8</span><span class="p">;</span>
</code></pre></div>
<p>If the partition is likely sorted, the algorithm will try to do partial insertion sort (max 5 pairs swapped) to speed up the sorting of partition. This is decided based on a number of swaps when choosing the pivot element.</p>
<p>In case recursion depth starts growing, pdqsort switches to heapsort to ensure <code>O(n log n)</code> worst-case. Max recursion depth is calculated based on array length.</p>
<p>Also, similar to timsort, if the length of slice/partition is short algorithm will switch to insertion sort (len<=20).</p>
<p>Another optimization pdqsort does is detecting if there are many equal elements, in that case, it performs special partitioning. This happens if we select pivot equal to the previous one. In this case, all elements equal to pivot will be put in the left partition.</p>
<p>Partitioning itself is done in blocks, in branchless manner, using work from <a href="http://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf">BlockQuicksort: Avoiding Branch Mispredictions in Quicksort</a> as another optimization.</p>
<p>We avoid branch misses by casting boolean to int (SETcc instructions). This gives better performance then unpredictable branches (like in sorting case).</p>
<p>Example of branchless comparison:</p>
<div class="highlight"><pre><span></span><code><span class="k">for</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="k">in</span><span class="w"> </span><span class="mi">0</span><span class="o">..</span><span class="n">block_l</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="k">unsafe</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Set index of element that should be swapped.</span>
<span class="w"> </span><span class="o">*</span><span class="n">end_l</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="kt">u8</span><span class="p">;</span>
<span class="w"> </span><span class="cm">/* Increment pointer to end element(end_l) conditionally.</span>
<span class="cm"> In case it's not incremented in next iteration we will overwrite index we set in previous line.</span>
<span class="cm"> */</span>
<span class="w"> </span><span class="n">end_l</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">end_l</span><span class="p">.</span><span class="n">offset</span><span class="p">(</span><span class="o">!</span><span class="n">is_less</span><span class="p">(</span><span class="o">&*</span><span class="n">elem</span><span class="p">,</span><span class="w"> </span><span class="n">pivot</span><span class="p">)</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="kt">isize</span><span class="p">);</span>
<span class="w"> </span><span class="c1">// Increment pointer to next element in slice.</span>
<span class="w"> </span><span class="n">elem</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">elem</span><span class="p">.</span><span class="n">offset</span><span class="p">(</span><span class="mi">1</span><span class="p">);</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</code></pre></div>
<p>If the algorithm seems interesting and you want to dive in more details, I suggest you to also look at the following resources:</p>
<p><a href="https://github.com/rust-lang/rust/blob/master/src/libcore/slice/sort.rs">Rust stdlib implementation</a></p>
<p><a href="https://github.com/orlp/pdqsort">Original C++ implementation made by Orson</a></p>
<p><a href="https://drive.google.com/file/d/0B1-vl-dPgKm_T0Fxeno1a0lGT0E/view">Draft algorithm paper</a></p>
</div>
<footer class="text-right">
<p>- F3real</p>
</footer>
<div id="show-comments" class="span7 text-center">
<a href="https://f3real.github.io/pdqsort.html#disqus_thread"
data-disqus-identifier="pdqsort"
class="btn btn-primary twitchy-background">Show Comments</a>
</div>
<section id="comments" class="comments hidden">
<hr/>
<h2>Comments</h2>
<div id="disqus_thread"></div>
<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by
Disqus.</a></noscript>
<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
</section>
</div>
</div>
</article>
</section>
<footer>
<hr>
<div class="row">
<div class="col-lg-10 text-center">
<p><small>
Built by <a href="http://docs.getpelican.com/en/latest">Pelican</a> / <a href="https://github.com/F3real/pelican-twitchy">pelican-twitchy</a>
· © 2024 F3real
</small></p>
</div>
</div>
</footer> </div>
</div>
<!-- /#page-content-wrapper -->
</div>
<!-- /#wrapper -->
<!-- disqus -->
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'https-f3real-github-io'; // required: replace example with your forum shortname
var disqus_identifier = 'pdqsort';
var disqus_url = 'https://f3real.github.io/pdqsort.html';
var disqus_config = function () {
this.language = "en";
};
var commentsDiv = document.getElementById('show-comments');
commentsDiv.onclick = function() {
/* * * DON'T EDIT BELOW THIS LINE * * */
(function () {
var dsq = document.createElement('script');
dsq.type = 'text/javascript';
dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
this.style.display = 'none';
};
</script>
<!-- /disqus -->
<script>
const wrapper = document.getElementById('wrapper');
const sidebarBig = document.getElementById('sidebar-wrapper');
const sidebarSmall = document.getElementById('sidebar-wrapper-small');
const triggers = Array.from(document.querySelectorAll('[data-toggle="collapse"]'));
for (var i = 0; i < triggers.length; i++) {
triggers[i].addEventListener('click', (ev) => {
const elm = ev.currentTarget;
ev.preventDefault();
const selector = elm.getAttribute('href').replace('#','');
elm.classList.toggle('collapsed');
document.getElementById(selector).classList.toggle('show');
}, false);
}
function showBigNav() {;
sidebarBig.style.display = 'block';
sidebarSmall.style.display = 'none';
}
function showSmallNav() {
sidebarBig.style.display = 'none';
sidebarSmall.style.display = 'block';
}
const mediaQuery = window.matchMedia('(min-width:768px)');
mediaQuery.onchange = e => {
if (wrapper.classList.contains('toggled')) {
wrapper.classList.remove('toggled');
} else {
if (e.matches) {
showBigNav();
} else {
showSmallNav();
}
}
}
function setNavbar() {
var condition = wrapper.classList.contains('toggled');
if (!mediaQuery.matches) {
condition = !condition;
}
if (condition) {
showSmallNav();
} else {
showBigNav();
}
}
function toggleMenu(){
wrapper.classList.toggle('toggled');
setNavbar();
}
window.onload = setNavbar;
</script>
<script data-goatcounter="https://f3real.goatcounter.com/count"
async src="//gc.zgo.at/count.js"></script>
</body>
</html>