-
Notifications
You must be signed in to change notification settings - Fork 2
/
home.html
825 lines (711 loc) · 32.7 KB
/
home.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" type="image/png" href="assets/img/logo.png">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<title>NeuroBPredict</title>
<meta content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=0' name='viewport' />
<meta name="viewport" content="width=device-width" />
<link href='http://fonts.googleapis.com/css?family=Arvo' rel='stylesheet' type='text/css'>
<link href='http://fonts.googleapis.com/css?family=PT+Sans' rel='stylesheet' type='text/css'>
<!-- Bootstrap core CSS -->
<link href="assets/css/bootstrap.min.css" rel="stylesheet" />
<!-- Animation library for notifications -->
<link href="assets/css/animate.min.css" rel="stylesheet"/>
<!-- Light Bootstrap Table core CSS -->
<link href="assets/css/light-bootstrap-dashboard.css" rel="stylesheet"/>
<!-- CSS for Demo Purpose, don't include it in your project -->
<link href="assets/css/demo.css" rel="stylesheet" />
<!-- Fonts and icons -->
<link href="http://maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet">
<link href='http://fonts.googleapis.com/css?family=Roboto:400,700,300' rel='stylesheet' type='text/css'>
<link href="assets/css/pe-icon-7-stroke.css" rel="stylesheet" />
</head>
<body>
<div class="wrapper">
<div class="sidebar" data-color="purple" data-image="assets/img/sidebar-5.jpg">
<div class="sidebar-wrapper">
<div class="logo">
<a href="#" class="simple-text">
NeuroBPredict
</a>
</div>
<ul class="nav">
<li class="active">
<a href="home.html">
<i class="pe-7s-news-paper"></i>
<p>Description</p>
</a>
</li>
<li>
<a href="assets/figures/bubblesort.html">
<i class="pe-7s-graph"></i>
<p>Figures</p>
</a>
</li>
</ul>
</div>
</div>
<div class="main-panel">
<div class="content">
<div class="col-md-12">
<h1 class="col-md-12"> NeuroBPredict </h1>
<div class="tab">
<button class="tablinks" onclick="openCity(event, 'BranchPredictor')">
Branch Predictor
</button>
<button class="tablinks" onclick="openCity(event, 'StaticBP')" >
StaticBP
</button>
<button class="tablinks" onclick="openCity(event, 'LocalBP')" >
LocalBP
</button>
<button class="tablinks" onclick="openCity(event, 'TournamentBP')" >
TournamentBP
</button>
<button class="tablinks" onclick="openCity(event, 'BimodalBP')" >
BimodalBP
</button>
<button class="tablinks" onclick="openCity(event, 'LTAGE')" >
LTAGE
</button>
<button class="tablinks" onclick="openCity(event, 'NeuralBP')" >
NeuralBP
</button>
<button class="tablinks" onclick="openCity(event, 'NeuralPathBP')"> NeuralPathBP
</button>
</div>
<div id="BranchPredictor" class="tabcontent">
<h2 class="col-md-12"> Branch Predictor </h2><hr>
<p>
Modern CPUs have grown significantly more complex than they used to be, as manufacturers like AMD and Intel now grow unable to achieve speedups simply by smashing more transistors on a single chip. This limitation is largely due to physical constraints, i.e. due to quantum issues that become prevalent at size regimes these chips are dealing with. As a result, such manufacturers have looked to more creative ways of achieving speedups, largely in the form of increasing parallelism. This has emerged in the forms of multi-core processor designs and also branch prediction.
<br><br>
Branch prediction has become an essential part of the CPU pipeline, particularlyl to increase ILP (instruction-level parallelism). Specifically, branch predictors are responsible for pre-fetching instructions for decoding/execution in conditional branch locations, i.e. points in the code where it is impossible to know which instruction to fetch next. In other words, the CPU guesses the outcome of the branch and fetches instructions accordingly, hoping to avoid the seeming possible of needing to wait for the conditional branch to finish executing to fetch/process further instructions. Modern branch predictors typically integrate global history and local history, which respectively correspond to applying the result of previous branch outcomes in the overall program and at particular values for the program counter (i.e. by line number in the code). However, with the uprising of neural nets, it was unavoidable that they would too be tested in this regime.
<br><br>
The main issue with neural net implementations is the latency, i.e. how long it would take for the prediction to occur relative to the time spent. Despite maybe a few percentage points extra, perceptron use was largely relegated as not likely to be adopted, seeing as modern methods already achieve accuracies of upwards of 90-95%. However, recent research investigations have revealed otherwise, citing that perceptrons may in fact be comparable in accuracy and latency to those methods that are currently most widespread. We explored several branch predictor designs across different executables and similarly between ISAs, i.e. ARM and X86, all through the gem5 simulator environment.
</p>
</div>
</div>
<div id="StaticBP" class="tabcontent">
<h2 class="col-md-12"> StaticBP </h2><hr>
<p> The class of branch predictors can be subdivided into two major classes: "static" and "dynamic." Static branch predictors are the simplest variant of branch predictors. As suggested by the name, these predictors essentially guess the same result regardless of its performance on past conditional cases. That is, a static predictor will either always predict "true" whenever it encounters a branch (i.e. will inform the CPU to directly fetch instructions for the "true" case) or "false." There are some variants thereof, i.e. alternate between the two, but these two are clearly the most sensible and straightforwad to implement. Surprisingly, static branch predictors work quite well in practice, coupled with their 0 time latency, since no computation must be performed prior to fetching instructions. Thus, static BPs form a solid baseline from which to evaluate dynamic BPs. </p>
<hr>
<h4 class="title"> Branch Prediction Results </h4>
<div class="content table-responsive table-full-width">
<table class="table table-hover table-striped">
<thead>
<th>Executable</th>
<th>Conditional Accuracy</th>
<th>Indirect Accuracy</th>
<th>Latency</th>
</thead>
<tbody>
<tr>
<td>Bubblesort.txt</td>
<td>330833</td>
<td>1934</td>
<td>192.72</td>
</tr>
<tr>
<td>ConnCompMedium.txt</td>
<td>127705</td>
<td>25611</td>
<td>74.40</td>
</tr>
<tr>
<td>ConnCompSmall.txt</td>
<td>57430</td>
<td>12348</td>
<td>30.76</td>
</tr>
<tr>
<td>IntMM.txt</td>
<td>10607</td>
<td>1889</td>
<td>32.09</td>
</tr>
<tr>
<td>Oscar.txt</td>
<td>20943</td>
<td>2852</td>
<td>58.84</td>
</tr>
<tr>
<td>Puzzle.txt</td>
<td>580458</td>
<td>3029</td>
<td>946.32</td>
</tr>
<tr>
<td>Queens.txt</td>
<td>268830</td>
<td>1998</td>
<td>100.58</td>
</tr>
<tr>
<td>Quicksort.txt</td>
<td>299392</td>
<td>13046</td>
<td>116.42</td>
</tr>
<tr>
<td>RealMM.txt</td>
<td>10813</td>
<td>1997</td>
<td>31.88</td>
</tr>
<tr>
<td>Towers.txt</td>
<td>209757</td>
<td>32586</td>
<td>82.68</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="LocalBP" class="tabcontent">
<h2 class="col-md-12"> LocalBP </h2><hr>
<p> Dynamic BPs, on the other hand, typically integrate either global history and local history or a combination thereof. The LocalBP chooses to exclusively make use of the local history for its predictions, meaning that we have a table (i.e. an array) indexed by a hash of the program counter. This predictor makes use of a saturated counter, which is a commonly integrated structure in branch predictors that prevents a single change in conditional outcome from completely throwing off future predictions. Specifically, this "saturated counter" is essentially a Markov Model with four states, called "strongly accepting," "weakly accepting," "weakly rejecting," and "strongly rejecting," where the accepting states both correspond to when the BP predicts the conditional will go along the true branch and the rejecting the opposite.
<br><br>
Thus, each of the distinct local program counter hashes corresponds to a different saturated state machine. These predictors typically perform well when different conditionals across the program have distinct behaviors, although it suffers the downside of not capturing any overall trends in the program (i.e. if most conditionals are true). </p>
<h4 class="title"> Branch Prediction Results </h4>
<div class="content table-responsive table-full-width">
<table class="table table-hover table-striped">
<thead>
<th>Executable</th>
<th>Conditional Accuracy</th>
<th>Indirect Accuracy</th>
<th>Latency</th>
</thead>
<tbody>
<tr>
<td>Bubblesort.txt</td>
<td>198284</td>
<td>1486</td>
<td>91.44</td>
</tr>
<tr>
<td>ConnCompMedium.txt</td>
<td>104713</td>
<td>8770</td>
<td>55.55</td>
</tr>
<tr>
<td>ConnCompSmall.txt</td>
<td>55185</td>
<td>5713</td>
<td>31.15</td>
</tr>
<tr>
<td>IntMM.txt</td>
<td>9072</td>
<td>1345</td>
<td>32.73</td>
</tr>
<tr>
<td>Oscar.txt</td>
<td>15798</td>
<td>2538</td>
<td>58.68</td>
</tr>
<tr>
<td>Puzzle.txt</td>
<td>42277</td>
<td>2149</td>
<td>32.41</td>
</tr>
<tr>
<td>Queens.txt</td>
<td>210521</td>
<td>4005</td>
<td>59.36</td>
</tr>
<tr>
<td>Quicksort.txt</td>
<td>217449</td>
<td>5352</td>
<td>63.20</td>
</tr>
<tr>
<td>RealMM.txt</td>
<td>9286</td>
<td>1522</td>
<td>31.99</td>
</tr>
<tr>
<td>Towers.txt</td>
<td>58558</td>
<td>1653</td>
<td>81.71</td>
</tr>
<tr>
<td>Treesort.txt</td>
<td>368046</td>
<td>14900</td>
<td>114.14</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="TournamentBP" class="tabcontent">
<h2 class="col-md-12"> TournamentBP </h2><hr>
<p> Unlike the previous example, the tournament branch predictor makes use of several of the previous branches. That is, it makes correlated predictions based on the last m branches. In particular, each of the local branches has an indicator that keeps track of which of the past m branches is most correlated with outcomes, making it simply an extension of the LocalBP, since tournament can easily capture if the most recent branch is most heavily correlated with the outcome.
Unlike the single local branch predictor, this predictor is capable of making predictions that depends on results further back than the most recent branch, meaning it effectively extends the scope of what is "significant" in making a prediction. Of course, with this extended input domain comes the difficulty of including factors that may not be relevant yet may seem to be in a training sample along with increased prediction/training latency.
</p>
<h4 class="title"> Branch Prediction Results </h4>
<div class="content table-responsive table-full-width">
<table class="table table-hover table-striped">
<thead>
<th>Executable</th>
<th>Conditional Accuracy</th>
<th>Indirect Accuracy</th>
<th>Latency</th>
</thead>
<tbody>
<tr>
<td>Bubblesort.txt</td>
<td>69726</td>
<td>3685</td>
<td>89.56</td>
</tr>
<tr>
<td>ConnCompMedium.txt</td>
<td>112205</td>
<td>26699</td>
<td>74.81</td>
</tr>
<tr>
<td>ConnCompSmall.txt</td>
<td>50478</td>
<td>13169</td>
<td>31.48</td>
</tr>
<tr>
<td>IntMM.txt</td>
<td>9214</td>
<td>2903</td>
<td>32.33</td>
</tr>
<tr>
<td>Oscar.txt</td>
<td>11094</td>
<td>5669</td>
<td>58.21</td>
</tr>
<tr>
<td>Puzzle.txt</td>
<td>2317</td>
<td>1367</td>
<td>0.65</td>
</tr>
<tr>
<td>Queens.txt</td>
<td>113906</td>
<td>16731</td>
<td>59.00</td>
</tr>
<tr>
<td>Quicksort.txt</td>
<td>274604</td>
<td>93990</td>
<td>71.36</td>
</tr>
<tr>
<td>RealMM.txt</td>
<td>9372</td>
<td>2978</td>
<td>32.55</td>
</tr>
<tr>
<td>Towers.txt</td>
<td>15795</td>
<td>3116</td>
<td>81.96</td>
</tr>
<tr>
<td>Treesort.txt</td>
<td>119107</td>
<td>29582</td>
<td>33.24</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="BimodalBP" class="tabcontent">
<h2 class="col-md-12"> BimodalBP </h2><hr>
<p> Unlike both the tournament and local branch predictors, the Bimodal branch predictor, as suggested by the name, maintains both a global and local history of conditional branches down which we have travelled and also maintain a "choosing" parameter that determines which, between the global and local histories, is more directly correlated with the outputs. In that way, future predictions by this bimodal predictor can make use of both the global and local structures through the program. That is, we can detect if some global structure is emerging (i.e. alternating between true/false loops) and local constructs, further increasing the scope. These branch predictors tend to do quite well in practice and have become largely, in one variant or another, the de facto standard of branch predictors. </p>
<h4 class="title"> Branch Prediction Results </h4>
<div class="content table-responsive table-full-width">
<table class="table table-hover table-striped">
<thead>
<th>Executable</th>
<th>Conditional Accuracy</th>
<th>Indirect Accuracy</th>
<th>Latency</th>
</thead>
<tbody>
<tr>
<td>Bubblesort.txt</td>
<td>158021</td>
<td>2135</td>
<td>89.81</td>
</tr>
<tr>
<td>ConnCompMedium.txt</td>
<td>132635</td>
<td>37877</td>
<td>74.11</td>
</tr>
<tr>
<td>ConnCompSmall.txt</td>
<td>64185</td>
<td>19558</td>
<td>31.55</td>
</tr>
<tr>
<td>IntMM.txt</td>
<td>11714</td>
<td>1623</td>
<td>32.41</td>
</tr>
<tr>
<td>Oscar.txt</td>
<td>17935</td>
<td>4053</td>
<td>58.64</td>
</tr>
<tr>
<td>Puzzle.txt</td>
<td>2447</td>
<td>260</td>
<td>0.34</td>
</tr>
<tr>
<td>Queens.txt</td>
<td>189028</td>
<td>53924</td>
<td>59.32</td>
</tr>
<tr>
<td>Quicksort.txt</td>
<td>267541</td>
<td>85479</td>
<td>63.54</td>
</tr>
<tr>
<td>RealMM.txt</td>
<td>12137</td>
<td>1739</td>
<td>32.15</td>
</tr>
<tr>
<td>Towers.txt</td>
<td>52133</td>
<td>2006</td>
<td>82.27</td>
</tr>
<tr>
<td>Treesort.txt</td>
<td>1619</td>
<td>80</td>
<td>0.15</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="LTAGE" class="tabcontent">
<h2 class="col-md-12"> LTAGE </h2><hr>
<p> Similar to how neural networks grew so prevalent in recent times, the LTAGE branch predictor was essentially born out of online branch predictor competitions. That is, competitions where the goal was to most accurately predict conditional outcomes produced by CPU dumps (with penalties associated to latency). LTAGE is a very extension of the local branch predictor concept. In particular, with local BP, the main issue arises in determining how large of history the predictor should consider. If it considers too much, there may be too large of latency and also finding trends that may simply be artifacts of the training set. If it is taken too small, the BP will not have much information off of which it can do predictions, meaning it will essentially reduce down to simple/random chance. Thus, the LTAGE concept is maintaining a variable length local history for different branches, seeing which one most correlates with branches actually taken, which allows the predictor to essentially sidestep the issue of presetting a size of history to use for predictions. You can more thoroughly explore this architecture through its paper at: <a href="https://www.jilp.org/vol9/v9paper6.pdf">https://www.jilp.org/vol9/v9paper6.pdf</a> </p>
<h4 class="title"> Branch Prediction Results </h4>
<div class="content table-responsive table-full-width">
<table class="table table-hover table-striped">
<thead>
<th>Executable</th>
<th>Conditional Accuracy</th>
<th>Indirect Accuracy</th>
<th>Latency</th>
</thead>
<tbody>
<tr>
<td>Bubblesort.txt</td>
<td>36086</td>
<td>2209</td>
<td>91.27</td>
</tr>
<tr>
<td>ConnCompMedium.txt</td>
<td>145920</td>
<td>48768</td>
<td>74.91</td>
</tr>
<tr>
<td>ConnCompSmall.txt</td>
<td>64545</td>
<td>21220</td>
<td>32.43</td>
</tr>
<tr>
<td>IntMM.txt</td>
<td>5908</td>
<td>1758</td>
<td>32.35</td>
</tr>
<tr>
<td>Oscar.txt</td>
<td>10587</td>
<td>3823</td>
<td>59.51</td>
</tr>
<tr>
<td>Puzzle.txt</td>
<td>104092</td>
<td>53946</td>
<td>453.51</td>
</tr>
<tr>
<td>Queens.txt</td>
<td>10384</td>
<td>3833</td>
<td>60.73</td>
</tr>
<tr>
<td>Quicksort.txt</td>
<td>190634</td>
<td>33496</td>
<td>63.22</td>
</tr>
<tr>
<td>RealMM.txt</td>
<td>6261</td>
<td>1959</td>
<td>31.65</td>
</tr>
<tr>
<td>Towers.txt</td>
<td>6500</td>
<td>1954</td>
<td>82.14</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="NeuralBP" class="tabcontent">
<h2 class="col-md-12"> NeuralBP </h2><hr>
<p> Of course, as previously mentioned, with the uprising of neural nets, it was unavoidable that they would too be applied to branch prediction. The first attempt to apply neural nets to this regime was as follows: the main issue with applying complicated networks is that, since these predictors are extremely low level (on the CPU), they cannot afford the overhead necessary to perform the predictions. In other words, the "neural net" scope must be greatly limited as to allow the predictions to be made in a reasonable time frame. It turns out that this structure is simply a one layer network of neurons, i.e. a series of weights associated to different points in the program, set up in a table-like fashion.
<br><br>
In other words, different program counters (similar to the local branch predictor) hash to different locations in the weights matrix, where each row corresponds to a fixed hashed pc value. The input to this neuron is a vector of the global history of conditional outcomes at the point of prediction in the program (similar to the TournamentBP), meaning that a weighted linear combination thereof is used to predict the final outcome of the branch. Thus, this algorithm very closely resembles that of the TournamentBP with the exception of how it is updated, which is per the standard neural update rules. More recent modifications, however, have been made to this predictor, described below. This work was made possible by the assistance of the description in <a href="https://www.microarch.org/micro36/html/pdf/jimenez-FastPath.pdf">https://www.microarch.org/micro36/html/pdf/jimenez-FastPath.pdf</a>. </p>
<h4 class="title"> Branch Prediction Results </h4>
<div class="content table-responsive table-full-width">
<table class="table table-hover table-striped">
<thead>
<th>Executable</th>
<th>Conditional Accuracy</th>
<th>Indirect Accuracy</th>
<th>Latency</th>
</thead>
<tbody>
<tr>
<td>Bubblesort.txt</td>
<td>185036</td>
<td>3312</td>
<td>133.41</td>
</tr>
<tr>
<td>ConnCompMedium.txt</td>
<td>132996</td>
<td>29265</td>
<td>129.98</td>
</tr>
<tr>
<td>ConnCompSmall.txt</td>
<td>63890</td>
<td>15752</td>
<td>53.99</td>
</tr>
<tr>
<td>IntMM.txt</td>
<td>8604</td>
<td>2259</td>
<td>39.19</td>
</tr>
<tr>
<td>Oscar.txt</td>
<td>13140</td>
<td>5125</td>
<td>71.05</td>
</tr>
<tr>
<td>Puzzle.txt</td>
<td>675985</td>
<td>204267</td>
<td>1202.73</td>
</tr>
<tr>
<td>Queens.txt</td>
<td>205150</td>
<td>19174</td>
<td>92.66</td>
</tr>
<tr>
<td>Quicksort.txt</td>
<td>253347</td>
<td>82381</td>
<td>103.60</td>
</tr>
<tr>
<td>RealMM.txt</td>
<td>9082</td>
<td>2631</td>
<td>37.24</td>
</tr>
<tr>
<td>Towers.txt</td>
<td>97834</td>
<td>18770</td>
<td>119.72</td>
</tr>
<tr>
<td>Treesort.txt</td>
<td>432152</td>
<td>70188</td>
<td>198.38</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="NeuralPathBP" class="tabcontent">
<h2 class="col-md-12"> NeuralPathBP </h2><hr>
<p> The fast Neural Path BP is one of the state-of-the-art predictors that brought neural perceptron design back into focus in the realm of BPs. The algorithm that was implemented here is more thoroughly described in: <a href="https://www.microarch.org/micro36/html/pdf/jimenez-FastPath.pdf">https://www.microarch.org/micro36/html/pdf/jimenez-FastPath.pdf</a>. As a brief overview, this predictor essentially integrates the neural structure of the previous algorithm with the "paths taken" in the program. That is, we maintain a global history of the conditional addresses, which captures how we are jumping around the program, which serves the purpose of being the input vector combined as a weighted sum by the corresponding weights of the vectors. Thus, unlike the previous algorithm, this updates several rows, presumably to capture how the same location in the program may be correlated with many subsequent conditional outcomes, which this version can directly capture in its updates. </p>
<h4 class="title"> Branch Prediction Results </h4>
<div class="content table-responsive table-full-width">
<table class="table table-hover table-striped">
<thead>
<th>Executable</th>
<th>Conditional Accuracy</th>
<th>Indirect Accuracy</th>
<th>Latency</th>
</thead>
<tbody>
<tr>
<td>Bubblesort.txt</td>
<td>333474</td>
<td>2804</td>
<td>325.93</td>
</tr>
<tr>
<td>ConnCompMedium.txt</td>
<td>157877</td>
<td>34015</td>
<td>180.54</td>
</tr>
<tr>
<td>ConnCompSmall.txt</td>
<td>62784</td>
<td>14209</td>
<td>75.27</td>
</tr>
<tr>
<td>IntMM.txt</td>
<td>9765</td>
<td>1816</td>
<td>40.77</td>
</tr>
<tr>
<td>Oscar.txt</td>
<td>16688</td>
<td>3920</td>
<td>70.07</td>
</tr>
<tr>
<td>Perm.txt</td>
<td>124110</td>
<td>1980</td>
<td>151.88</td>
</tr>
<tr>
<td>Puzzle.txt</td>
<td>620337</td>
<td>139201</td>
<td>1412.84</td>
</tr>
<tr>
<td>Queens.txt</td>
<td>234622</td>
<td>3367</td>
<td>188.27</td>
</tr>
<tr>
<td>Quicksort.txt</td>
<td>281145</td>
<td>24475</td>
<td>209.31</td>
</tr>
<tr>
<td>RealMM.txt</td>
<td>10046</td>
<td>2145</td>
<td>40.74</td>
</tr>
<tr>
<td>Towers.txt</td>
<td>128113</td>
<td>1919</td>
<td>159.47</td>
</tr>
<tr>
<td>Treesort.txt</td>
<td>319174</td>
<td>2453</td>
<td>280.58</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</div>
</body>
<!-- Core JS Files -->
<script src="assets/js/jquery-1.10.2.js" type="text/javascript"></script>
<script src="assets/js/bootstrap.min.js" type="text/javascript"></script>
<!-- Checkbox, Radio & Switch Plugins -->
<script src="assets/js/bootstrap-checkbox-radio-switch.js"></script>
<!-- Charts Plugin -->
<script src="assets/js/chartist.min.js"></script>
<!-- Notifications Plugin -->
<script src="assets/js/bootstrap-notify.js"></script>
<!-- Google Maps Plugin -->
<script type="text/javascript" src="https://maps.googleapis.com/maps/api/js?sensor=false"></script>
<!-- Light Bootstrap Table Core javascript and methods for Demo purpose -->
<script src="assets/js/light-bootstrap-dashboard.js"></script>
<!-- Light Bootstrap Table DEMO methods, don't include it in your project! -->
<script src="assets/js/demo.js"></script>
<script>
function openCity(evt, cityName) {
// Declare all variables
var i, tabcontent, tablinks;
// Get all elements with class="tabcontent" and hide them
tabcontent = document.getElementsByClassName("tabcontent");
for (i = 0; i < tabcontent.length; i++) {
tabcontent[i].style.display = "none";
}
// Get all elements with class="tablinks" and remove the class "active"
tablinks = document.getElementsByClassName("tablinks");
for (i = 0; i < tablinks.length; i++) {
tablinks[i].className = tablinks[i].className.replace(" active", "");
}
// Show the current tab, and add an "active" class to the button that opened the tab
document.getElementById(cityName).style.display = "block";
evt.currentTarget.className += " active";
}
document.getElementById("BranchPredictor").style.display = "block";
evt.currentTarget.className += " active";
</script>
</html>