diff --git a/cpu/cpu.go b/cpu/cpu.go index 02609d5b2..9c105f23a 100644 --- a/cpu/cpu.go +++ b/cpu/cpu.go @@ -72,6 +72,9 @@ var X86 struct { HasSSSE3 bool // Supplemental streaming SIMD extension 3 HasSSE41 bool // Streaming SIMD extension 4 and 4.1 HasSSE42 bool // Streaming SIMD extension 4 and 4.2 + HasAVXIFMA bool // Advanced vector extension Integer Fused Multiply Add + HasAVXVNNI bool // Advanced vector extension Vector Neural Network Instructions + HasAVXVNNIInt8 bool // Advanced vector extension Vector Neural Network Int8 instructions _ CacheLinePad } diff --git a/cpu/cpu_test.go b/cpu/cpu_test.go index 7a9bac7e5..dd493ece8 100644 --- a/cpu/cpu_test.go +++ b/cpu/cpu_test.go @@ -41,6 +41,40 @@ func TestAVX512HasAVX2AndAVX(t *testing.T) { } } +func TestAVX512BF16HasAVX512(t *testing.T) { + if runtime.GOARCH == "amd64" { + if cpu.X86.HasAVX512BF16 && !cpu.X86.HasAVX512 { + t.Fatal("HasAVX512 expected true, got false") + } + } +} + +func TestAVXVNNIHasAVX(t *testing.T) { + if cpu.X86.HasAVXVNNI && !cpu.X86.HasAVX { + t.Fatal("HasAVX expected true, got false") + } +} + +func TestAVXIFMAHasAVXVNNIAndAVX(t *testing.T) { + if cpu.X86.HasAVXIFMA && !cpu.X86.HasAVX { + t.Fatal("HasAVX expected true, got false") + } + + if cpu.X86.HasAVXIFMA && !cpu.X86.HasAVXVNNI { + t.Fatal("HasAVXVNNI expected true, got false") + } +} + +func TestAVXVNNIInt8HasAVXVNNIAndAVX(t *testing.T) { + if cpu.X86.HasAVXVNNIInt8 && !cpu.X86.HasAVXVNNI { + t.Fatal("HasAVXVNNI expected true, got false") + } + + if cpu.X86.HasAVXVNNIInt8 && !cpu.X86.HasAVX { + t.Fatal("HasAVX expected true, got false") + } +} + func TestARM64minimalFeatures(t *testing.T) { if runtime.GOARCH != "arm64" || runtime.GOOS == "ios" { return diff --git a/cpu/cpu_x86.go b/cpu/cpu_x86.go index 600a68078..1e642f330 100644 --- a/cpu/cpu_x86.go +++ b/cpu/cpu_x86.go @@ -53,6 +53,9 @@ func initOptions() { {Name: "sse41", Feature: &X86.HasSSE41}, {Name: "sse42", Feature: &X86.HasSSE42}, {Name: "ssse3", Feature: &X86.HasSSSE3}, + {Name: "avxifma", Feature: &X86.HasAVXIFMA}, + {Name: "avxvnni", Feature: &X86.HasAVXVNNI}, + {Name: "avxvnniint8", Feature: &X86.HasAVXVNNIInt8}, // These capabilities should always be enabled on amd64: {Name: "sse2", Feature: &X86.HasSSE2, Required: runtime.GOARCH == "amd64"}, @@ -106,7 +109,7 @@ func archInit() { return } - _, ebx7, ecx7, edx7 := cpuid(7, 0) + eax7, ebx7, ecx7, edx7 := cpuid(7, 0) X86.HasBMI1 = isSet(3, ebx7) X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX X86.HasBMI2 = isSet(8, ebx7) @@ -134,14 +137,24 @@ func archInit() { X86.HasAVX512VAES = isSet(9, ecx7) X86.HasAVX512VBMI2 = isSet(6, ecx7) X86.HasAVX512BITALG = isSet(12, ecx7) - - eax71, _, _, _ := cpuid(7, 1) - X86.HasAVX512BF16 = isSet(5, eax71) } X86.HasAMXTile = isSet(24, edx7) X86.HasAMXInt8 = isSet(25, edx7) X86.HasAMXBF16 = isSet(22, edx7) + + // These features depend on the second level of extended features. + if eax7 >= 1 { + eax71, _, _, edx71 := cpuid(7, 1) + if X86.HasAVX512 { + X86.HasAVX512BF16 = isSet(5, eax71) + } + if X86.HasAVX { + X86.HasAVXIFMA = isSet(23, eax71) + X86.HasAVXVNNI = isSet(4, eax71) + X86.HasAVXVNNIInt8 = isSet(4, edx71) + } + } } func isSet(bitpos uint, value uint32) bool {