diff --git a/cpu/cpu.go b/cpu/cpu.go index 4756ad5f7..8fa707aa4 100644 --- a/cpu/cpu.go +++ b/cpu/cpu.go @@ -103,6 +103,7 @@ var ARM64 struct { HasASIMDDP bool // Advanced SIMD double precision instruction set HasSHA512 bool // SHA512 hardware implementation HasSVE bool // Scalable Vector Extensions + HasSVE2 bool // Scalable Vector Extensions 2 HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32 _ CacheLinePad } diff --git a/cpu/cpu_arm64.go b/cpu/cpu_arm64.go index f3eb993bf..0e27a21e1 100644 --- a/cpu/cpu_arm64.go +++ b/cpu/cpu_arm64.go @@ -28,6 +28,7 @@ func initOptions() { {Name: "sm3", Feature: &ARM64.HasSM3}, {Name: "sm4", Feature: &ARM64.HasSM4}, {Name: "sve", Feature: &ARM64.HasSVE}, + {Name: "sve2", Feature: &ARM64.HasSVE2}, {Name: "crc32", Feature: &ARM64.HasCRC32}, {Name: "atomics", Feature: &ARM64.HasATOMICS}, {Name: "asimdhp", Feature: &ARM64.HasASIMDHP}, @@ -164,6 +165,15 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { switch extractBits(pfr0, 32, 35) { case 1: ARM64.HasSVE = true + + parseARM64SVERegister(getzfr0()) + } +} + +func parseARM64SVERegister(zfr0 uint64) { + switch extractBits(zfr0, 0, 3) { + case 1: + ARM64.HasSVE2 = true } } diff --git a/cpu/cpu_arm64.s b/cpu/cpu_arm64.s index fcb9a3888..22cc99844 100644 --- a/cpu/cpu_arm64.s +++ b/cpu/cpu_arm64.s @@ -29,3 +29,11 @@ TEXT ·getpfr0(SB),NOSPLIT,$0-8 WORD $0xd5380400 MOVD R0, ret+0(FP) RET + +// func getzfr0() uint64 +TEXT ·getzfr0(SB),NOSPLIT,$0-8 + // get SVE Feature Register 0 into x0 + // mrs x0, ID_AA64ZFR0_EL1 = d5380480 + WORD $0xd5380480 + MOVD R0, ret+0(FP) + RET diff --git a/cpu/cpu_gc_arm64.go b/cpu/cpu_gc_arm64.go index a8acd3e32..6ac6e1efb 100644 --- a/cpu/cpu_gc_arm64.go +++ b/cpu/cpu_gc_arm64.go @@ -9,3 +9,4 @@ package cpu func getisar0() uint64 func getisar1() uint64 func getpfr0() uint64 +func getzfr0() uint64 diff --git a/cpu/cpu_linux_arm64.go b/cpu/cpu_linux_arm64.go index a968b80fa..3d386d0fc 100644 --- a/cpu/cpu_linux_arm64.go +++ b/cpu/cpu_linux_arm64.go @@ -35,6 +35,8 @@ const ( hwcap_SHA512 = 1 << 21 hwcap_SVE = 1 << 22 hwcap_ASIMDFHM = 1 << 23 + + hwcap2_SVE2 = 1 << 1 ) // linuxKernelCanEmulateCPUID reports whether we're running @@ -104,6 +106,9 @@ func doinit() { ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512) ARM64.HasSVE = isSet(hwCap, hwcap_SVE) ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) + + // HWCAP2 feature bits + ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2) } func isSet(hwc uint, value uint) bool {