Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Query AVX2 and AVX512VL support when selecting x86 kernels #1949

Merged
merged 7 commits into from
Jan 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ matrix:

- &test-macos
os: osx
osx_image: xcode8
osx_image: xcode8.3
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
- brew update
Expand Down
2 changes: 1 addition & 1 deletion common_x86_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (op));
: "0" (op), "c"(0));
#endif
}

Expand Down
1 change: 1 addition & 0 deletions cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@
#define HAVE_FMA4 (1 << 19)
#define HAVE_FMA3 (1 << 20)
#define HAVE_AVX512VL (1 << 21)
#define HAVE_AVX2 (1 << 22)

#define CACHE_INFO_L1_I 1
#define CACHE_INFO_L1_D 2
Expand Down
134 changes: 76 additions & 58 deletions cpuid_x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,10 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
("mov %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op), "c" (0) : "cc");
#else
__asm__ __volatile__
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) , "c" (0) : "cc");
#endif
}

Expand Down Expand Up @@ -211,6 +211,44 @@ int support_avx(){
#endif
}

int support_avx2(){
#ifndef NO_AVX2
int eax, ebx, ecx=0, edx;
int ret=0;

if (!support_avx)
return 0;
cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) != 0)
ret=1; //OS supports AVX2
return ret;
#else
return 0;
#endif
}

int support_avx512(){
#ifndef NO_AVX512
int eax, ebx, ecx, edx;
int ret=0;

if (!support_avx)
return 0;
cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & 32) != 32){
ret=0; //OS does not even support AVX2
}
if((ebx & (1<<31)) != 0){
xgetbv(0, &eax, &edx);
if((eax & 0xe0) == 0xe0)
ret=1; //OS supports AVX512VL
}
return ret;
#else
return 0;
#endif
}


int get_vendor(void){
int eax, ebx, ecx, edx;
Expand Down Expand Up @@ -294,6 +332,8 @@ int get_cputype(int gettype){
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
#ifndef NO_AVX
if (support_avx()) feature |= HAVE_AVX;
if (support_avx2()) feature |= HAVE_AVX2;
if (support_avx512()) feature |= HAVE_AVX512VL;
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
#endif

Expand Down Expand Up @@ -1228,22 +1268,18 @@ int get_cpuname(void){
return CPUTYPE_NEHALEM;
case 12:
case 15:
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return CPUTYPE_HASWELL;
#else
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 13:
//Broadwell
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return CPUTYPE_HASWELL;
#else
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
}
Expand All @@ -1252,33 +1288,27 @@ int get_cpuname(void){
switch (model) {
case 5:
case 6:
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return CPUTYPE_HASWELL;
#else
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 7:
case 15:
//Broadwell
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return CPUTYPE_HASWELL;
#else
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 14:
//Skylake
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return CPUTYPE_HASWELL;
#else
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 12:
Expand All @@ -1292,46 +1322,36 @@ int get_cpuname(void){
switch (model) {
case 6:
//Broadwell
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return CPUTYPE_HASWELL;
#else
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 5:
// Skylake X
#ifndef NO_AVX512
return CPUTYPE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
if(support_avx512())
return CPUTYPE_SKYLAKEX;
if(support_avx2())
return CPUTYPE_HASWELL;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
#endif
case 14:
// Skylake
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return CPUTYPE_HASWELL;
#else
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 7:
// Xeon Phi Knights Landing
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return CPUTYPE_HASWELL;
#else
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 12:
Expand All @@ -1342,30 +1362,24 @@ int get_cpuname(void){
case 6:
switch (model) {
case 6: // Cannon Lake
#ifndef NO_AVX512
return CPUTYPE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
if(support_avx512())
return CPUTYPE_SKYLAKEX;
if(support_avx2())
return CPUTYPE_HASWELL;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
#endif
}
break;
case 9:
case 8:
switch (model) {
case 14: // Kaby Lake
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return CPUTYPE_HASWELL;
#else
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
}
Expand Down Expand Up @@ -2112,6 +2126,8 @@ void get_cpuconfig(void){
if (features & HAVE_SSE4A) printf("#define HAVE_SSE4A\n");
if (features & HAVE_SSE5 ) printf("#define HAVE_SSSE5\n");
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
Expand Down Expand Up @@ -2180,6 +2196,8 @@ void get_sse(void){
if (features & HAVE_SSE4A) printf("HAVE_SSE4A=1\n");
if (features & HAVE_SSE5 ) printf("HAVE_SSSE5=1\n");
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");
Expand Down
Loading