-
Notifications
You must be signed in to change notification settings - Fork 2
/
sleefsseavx.h
108 lines (85 loc) · 2.44 KB
/
sleefsseavx.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#include <immintrin.h>
#include <stdint.h>
#ifdef __SSE2__
#define VECTLENDP 2
#define VECTLENSP 4
typedef __m128d vdouble;
typedef __m128i vint;
typedef __m128 vfloat;
typedef __m128i vint2;
typedef __m128i vmask;
static vdouble vloadu(double *p) { return _mm_loadu_pd(p); }
static void vstoreu(double *p, vdouble v) { _mm_storeu_pd(p, v); }
static vfloat vloaduf(float *p) { return _mm_loadu_ps(p); }
static void vstoreuf(float *p, vfloat v) { _mm_storeu_ps(p, v); }
static vint2 vloadui2(int32_t *p) { return (vint2)_mm_loadu_si128((__m128i *)p); }
static void vstoreui2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, (__m128i)v); }
#endif
#ifdef ENABLE_AVX
#define VECTLENDP 4
#define VECTLENSP 8
typedef __m256d vdouble;
typedef __m128i vint;
typedef __m256 vfloat;
typedef struct {
vint x, y;
} vint2;
static vdouble vloadu(double *p) { return _mm256_loadu_pd(p); }
static void vstoreu(double *p, vdouble v) { return _mm256_storeu_pd(p, v); }
static vfloat vloaduf(float *p) { return _mm256_loadu_ps(p); }
static void vstoreuf(float *p, vfloat v) { return _mm256_storeu_ps(p, v); }
static vint2 vloadui2(int32_t *p) {
vint2 r;
r.x = _mm_loadu_si128((__m128i *) p );
r.y = _mm_loadu_si128((__m128i *)(p + 4));
return r;
}
static void vstoreui2(int32_t *p, vint2 v) {
_mm_storeu_si128((__m128i *) p , v.x);
_mm_storeu_si128((__m128i *)(p + 4), v.y);
}
#endif
typedef struct {
vdouble x, y;
} vdouble2;
vdouble xldexp(vdouble x, vint q);
vint xilogb(vdouble d);
vdouble xsin(vdouble d);
vdouble xcos(vdouble d);
vdouble2 xsincos(vdouble d);
vdouble xtan(vdouble d);
vdouble xasin(vdouble s);
vdouble xacos(vdouble s);
vdouble xatan(vdouble s);
vdouble xatan2(vdouble y, vdouble x);
vdouble xlog(vdouble d);
vdouble xexp(vdouble d);
vdouble xpow(vdouble x, vdouble y);
vdouble xsinh(vdouble d);
vdouble xcosh(vdouble d);
vdouble xtanh(vdouble d);
vdouble xasinh(vdouble s);
vdouble xacosh(vdouble s);
vdouble xatanh(vdouble s);
vdouble xcbrt(vdouble d);
vdouble xexp2(vdouble a);
vdouble xexp10(vdouble a);
vdouble xexpm1(vdouble a);
vdouble xlog10(vdouble a);
vdouble xlog1p(vdouble a);
//
typedef struct {
vfloat x, y;
} vfloat2;
vfloat xsinf(vfloat d);
vfloat xcosf(vfloat d);
vfloat2 xsincosf(vfloat d);
vfloat xtanf(vfloat d);
vfloat xasinf(vfloat s);
vfloat xacosf(vfloat s);
vfloat xatanf(vfloat s);
vfloat xatan2f(vfloat y, vfloat x);
vfloat xlogf(vfloat d);
vfloat xlogf0(vfloat d);
vfloat xexpf(vfloat d);
vfloat xcbrtf(vfloat s);