From c49c7a675a2c8ee7591a2d6243255813856b65ce Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Mon, 1 Feb 2021 00:45:41 -0800 Subject: [PATCH] runtime: save R15 before checking AVX state When in dynlink mode, reading a global can clobber R15. Just to be safe, save R15 before checking the AVX state to see if we need to VZEROUPPER or not. This could cause a problem in buildmodes that aren't supported yet. Change-Id: I8fda62d3fbe808584774fa5e8d9810a4612a84e5 Reviewed-on: https://go-review.googlesource.com/c/go/+/288452 Trust: Keith Randall Reviewed-by: Cherry Zhang --- src/runtime/mkpreempt.go | 19 +++++++++++++------ src/runtime/preempt_amd64.s | 10 +++++----- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 3069d6ed04e4db..3a9e6cc4780450 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -230,12 +230,16 @@ func genAMD64() { if reg == "SP" || reg == "BP" { continue } - if strings.HasPrefix(reg, "X") { - l.add("MOVUPS", reg, 16) - } else { + if !strings.HasPrefix(reg, "X") { l.add("MOVQ", reg, 8) } } + lSSE := layout{stack: l.stack, sp: "SP"} + for _, reg := range regNamesAMD64 { + if strings.HasPrefix(reg, "X") { + lSSE.add("MOVUPS", reg, 16) + } + } // TODO: MXCSR register? @@ -244,10 +248,12 @@ func genAMD64() { p("// Save flags before clobbering them") p("PUSHFQ") p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP") - p("ADJSP $%d", l.stack) + p("ADJSP $%d", lSSE.stack) p("// But vet doesn't know ADJSP, so suppress vet stack checking") p("NOP SP") + l.save() + // Apparently, the signal handling code path in darwin kernel leaves // the upper bits of Y registers in a dirty state, which causes // many SSE operations (128-bit and narrower) become much slower. @@ -259,10 +265,11 @@ func genAMD64() { p("VZEROUPPER") p("#endif") - l.save() + lSSE.save() p("CALL ·asyncPreempt2(SB)") + lSSE.restore() l.restore() - p("ADJSP $%d", -l.stack) + p("ADJSP $%d", -lSSE.stack) p("POPFQ") p("POPQ BP") p("RET") diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s index 92c664d79abdbc..dc7af806d32f3e 100644 --- a/src/runtime/preempt_amd64.s +++ b/src/runtime/preempt_amd64.s @@ -13,11 +13,6 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 ADJSP $368 // But vet doesn't know ADJSP, so suppress vet stack checking NOP SP - #ifdef GOOS_darwin - CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0 - JE 2(PC) - VZEROUPPER - #endif MOVQ AX, 0(SP) MOVQ CX, 8(SP) MOVQ DX, 16(SP) @@ -32,6 +27,11 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVQ R13, 88(SP) MOVQ R14, 96(SP) MOVQ R15, 104(SP) + #ifdef GOOS_darwin + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0 + JE 2(PC) + VZEROUPPER + #endif MOVUPS X0, 112(SP) MOVUPS X1, 128(SP) MOVUPS X2, 144(SP)