-
Notifications
You must be signed in to change notification settings - Fork 7
/
dispatch_example.cpp
108 lines (82 loc) · 3.72 KB
/
dispatch_example.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/************************* dispatch_example.cpp ****************************
Author: Agner Fog
Date created: 2012-05-30
Last modified: 2017-07-01
Version: 1.40
Project: vector class library
Description: Example of automatic CPU dispatching
# Example of compiling this with GCC compiler:
# Compile dispatch_example.cpp four times for different instruction sets:
# Compile for SSE4.1
g++ -O3 -msse4.1 -std=c++0x -c dispatch_example.cpp -od5.o
# Compile for AVX2
g++ -O3 -mavx2 -mfma -std=c++0x -c dispatch_example.cpp -od8.o
# Compile for AVX512
g++ -O3 -mavx512f -mfma -mavx512vl -mavx512bw -mavx512dq -std=c++0x -c dispatch_example.cpp -od10.o
# The last compilation uses the lowest supported instruction set (SSE2)
# This includes the main program, and links all versions together:
g++ -O3 -msse2 -std=c++0x -otest dispatch_example.cpp instrset_detect.cpp d5.o d8.o d10.o
# Run the program
./test
(c) Copyright 2012-2019 Agner Fog.
Apache License version 2.0 or later.
******************************************************************************/
#include <stdio.h>
#define MAX_VECTOR_SIZE 512
#include "vectorclass.h"
// Define function type
// Change this to fit your purpose. Should not contain vector types:
typedef float MyFuncType(float*);
// function prototypes for each version
MyFuncType myfunc, myfunc_SSE2, myfunc_SSE41, myfunc_AVX2, myfunc_AVX512, myfunc_dispatch;
// Define function name depending on which instruction set we compile for
#if INSTRSET >= 10 // AVX512VL
#define FUNCNAME myfunc_AVX512
#elif INSTRSET >= 8 // AVX2
#define FUNCNAME myfunc_AVX2
#elif INSTRSET >= 5 // SSE4.1
#define FUNCNAME myfunc_SSE41
#elif INSTRSET == 2
#define FUNCNAME myfunc_SSE2 // SSE2
#else
#error Unsupported instruction set
#endif
// Dispatched version of the function. Compile this once for each instruction set:
float FUNCNAME (float * f) {
// This example adds 16 floats
Vec16f a; // vector of 16 floats
a.load(f); // load array into vector
return horizontal_add(a); // return sum of 16 elements
}
#if INSTRSET == 2
// make dispatcher in only the lowest of the compiled versions
// This function pointer initially points to the dispatcher.
// After the first call it points to the selected version:
MyFuncType * myfunc_pointer = &myfunc_dispatch; // function pointer
// Dispatcher
float myfunc_dispatch(float * f) {
int iset = instrset_detect(); // Detect supported instruction set
if (iset >= 10) myfunc_pointer = &myfunc_AVX512; // AVX512 version
else if (iset >= 8) myfunc_pointer = &myfunc_AVX2; // AVX2 version
else if (iset >= 5) myfunc_pointer = &myfunc_SSE41; // SSE4.1 version
else if (iset >= 2) myfunc_pointer = &myfunc_SSE2; // SSE2 version
else {
// Error: lowest instruction set not supported
fprintf(stderr, "\nError: Instruction set SSE2 not supported on this computer");
return 0.f;
}
// continue in dispatched version of the function
return (*myfunc_pointer)(f);
}
// Entry to dispatched function call
inline float myfunc(float * f) {
return (*myfunc_pointer)(f); // go to dispatched version
}
// Example: main calls myfunc
int main() {
float a[16]={1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; // array of 16 floats
float sum = myfunc(a); // call function with dispatching
printf("\nsum = %8.2f \n", sum); // print result
return 0;
}
#endif // INSTRSET == 2