forked from xtcyclist/Phi-Benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfmadd.c
129 lines (121 loc) · 3.1 KB
/
fmadd.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#define _GNU_SOURCE
#include <stddef.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <sys/time.h>
#include <pthread.h>
#include <sched.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include "immintrin.h"
#ifndef DOUBLE
#define ONE {a=_mm512_fmadd_ps(a,b,c);b=_mm512_fmadd_ps(a,b,c);c=_mm512_fmadd_ps(a,b,c);}
#else
#define ONE {a=_mm512_fmadd_pd(a,b,c);b=_mm512_fmadd_pd(a,b,c);c=_mm512_fmadd_pd(a,b,c);}
#endif
#define TEN ONE ONE ONE ONE ONE ONE ONE ONE ONE ONE
#define HUN TEN TEN TEN TEN TEN TEN TEN TEN TEN TEN
#define THO HUN HUN HUN HUN HUN HUN HUN HUN HUN HUN
double d_bi_start_sec; /**< start time */
static double bi_gettimeofday() {
struct timeval time;
gettimeofday(&time, (struct timezone *) 0);
return (double) time.tv_sec + (double) time.tv_usec * 1.0e-6;
}
static double bi_gettimeofday_improved() {
struct timeval time;
gettimeofday(&time, (struct timezone *) 0);
return ((double) time.tv_sec - d_bi_start_sec) + (double) time.tv_usec * 1.0e-6;
}
double (*bi_gettime)() = bi_gettimeofday_improved;
double mysecond()
{
struct timeval tp;
struct timezone tzp;
int i;
i = gettimeofday(&tp,&tzp);
return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
}
typedef struct
{
float *mem;
void **ptr_per_core;
int tid;
int threads;
long size;
double time;
pthread_barrier_t *barrier;
}argc_t;
void *thread (void *parm)
{
argc_t *arg=(argc_t*)parm;
double results[2];
double start, stop;
int i;
#ifndef DOUBLE
__m512 a, b, c, d;
a=_mm512_set4_ps(1.1, 2.1, 3.1, 4.1);
b=_mm512_set4_ps(1.2, 2.2, 3.2, 4.2);
c=_mm512_set4_ps(1.3, 2.3, 3.3, 4.3);
#else
__m512d a, b, c, d;
a=_mm512_set4_pd(1.1, 2.1, 3.1, 4.1);
b=_mm512_set4_pd(1.2, 2.2, 3.2, 4.2);
c=_mm512_set4_pd(1.3, 2.3, 3.3, 4.3);
#endif
start=mysecond();
//pthread_barrier_wait(arg->barrier);
//#pragma unroll
for (i=0;i<100;++i)
{
HUN
}
//pthread_barrier_wait(arg->barrier);
stop=mysecond();
#ifndef DOUBLE
_mm512_store_ps(arg->mem,c);
#else
_mm512_store_pd(arg->mem,c);
#endif
if(arg->tid==0)
{
#ifndef DOUBLE
results[1]=arg->threads*100*100*3*2*16/(double)(stop-start);
#else
results[1]=arg->threads*100*100*3*2*8/(double)(stop-start);
#endif
printf ("#threads: %d, GFLOPS %f \n", arg->threads, results[1]/1000000000);
}
}
int main (int argc, char **argv)
{
int threads;
pthread_t tid[500];
pthread_attr_t attr;
pthread_barrier_t barrier;
cpu_set_t set;
int i,tt;
threads=atoll(argv[1]);
argc_t info[500];
float mem[8000] __attribute__((aligned(64)));
pthread_barrier_init(&barrier, NULL, threads);
for ( i=0;i<threads;++i)
{
info[i].tid=i;
info[i].threads=threads;
info[i].barrier=&barrier;
info[i].mem=&mem[i*16];
pthread_attr_init(&attr);
CPU_ZERO(&set);
CPU_SET(i, &set);//pin the thread to the coresponding core
pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &set);
pthread_create(&tid[i], &attr, thread, (void*) &info[i]);
}
for ( i=0;i<threads;++i)
{
pthread_join(tid[i], NULL);
}
return 0;
}