Skip to content

Commit

Permalink
[dev.regabi] cmd/compile: reserve X15 as zero register on AMD64
Browse files Browse the repository at this point in the history
In ABIInternal, reserve X15 as constant zero, and use it to zero
memory. (Maybe there can be more use of it?)

The register is zeroed when transition to ABIInternal from ABI0.

Caveat: using X15 generates longer instructions than using X0.
Maybe we want to use X0?

Change-Id: I12d5ee92a01fc0b59dad4e5ab023ac71bc2a8b7d
Reviewed-on: https://go-review.googlesource.com/c/go/+/288093
Trust: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
  • Loading branch information
cherrymui committed Feb 3, 2021
1 parent bfc7418 commit 401d7e5
Show file tree
Hide file tree
Showing 13 changed files with 347 additions and 315 deletions.
4 changes: 2 additions & 2 deletions src/cmd/compile/internal/amd64/ggen.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ var isPlan9 = objabi.GOOS == "plan9"
const (
dzBlocks = 16 // number of MOV/ADD blocks
dzBlockLen = 4 // number of clears per block
dzBlockSize = 19 // size of instructions in a single block
dzMovSize = 4 // size of single MOV instruction w/ offset
dzBlockSize = 23 // size of instructions in a single block
dzMovSize = 5 // size of single MOV instruction w/ offset
dzLeaqSize = 4 // size of single LEAQ instruction
dzClearStep = 16 // number of bytes cleared by each MOV instruction

Expand Down
43 changes: 36 additions & 7 deletions src/cmd/compile/internal/amd64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,20 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
ssagen.AddAux2(&p.To, v, sc.Off())
case ssa.OpAMD64MOVOstorezero:
if s.ABI != obj.ABIInternal {
v.Fatalf("MOVOstorezero can be only used in ABIInternal functions")
}
if !base.Flag.ABIWrap {
// zeroing X15 manually if wrappers are not used
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
}
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = x86.REG_X15
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.To, v)
case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
Expand Down Expand Up @@ -900,6 +914,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
case ssa.OpAMD64DUFFZERO:
if s.ABI != obj.ABIInternal {
v.Fatalf("MOVOconst can be only used in ABIInternal functions")
}
if !base.Flag.ABIWrap {
// zeroing X15 manually if wrappers are not used
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
}
off := duffStart(v.AuxInt)
adj := duffAdj(v.AuxInt)
var p *obj.Prog
Expand All @@ -915,12 +936,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_ADDR
p.To.Sym = ir.Syms.Duffzero
p.To.Offset = off
case ssa.OpAMD64MOVOconst:
if v.AuxInt != 0 {
v.Fatalf("MOVOconst can only do constant=0")
}
r := v.Reg()
opregreg(s, x86.AXORPS, r, r)
case ssa.OpAMD64DUFFCOPY:
p := s.Prog(obj.ADUFFCOPY)
p.To.Type = obj.TYPE_ADDR
Expand Down Expand Up @@ -1000,7 +1015,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
q.To.Type = obj.TYPE_REG
q.To.Reg = r
}
case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
case ssa.OpAMD64CALLstatic:
if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
// zeroing X15 when entering ABIInternal from ABI0
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
}
s.Call(v)
if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
// zeroing X15 when entering ABIInternal from ABI0
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
}
case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
s.Call(v)

case ssa.OpAMD64LoweredGetCallerPC:
Expand Down Expand Up @@ -1297,6 +1322,10 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
case ssa.BlockRet:
s.Prog(obj.ARET)
case ssa.BlockRetJmp:
if s.ABI == obj.ABI0 && b.Aux.(*obj.LSym).ABI() == obj.ABIInternal {
// zeroing X15 when entering ABIInternal from ABI0
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
}
p := s.Prog(obj.ARET)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
Expand Down
1 change: 1 addition & 0 deletions src/cmd/compile/internal/ssa/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config
c.registers = registersAMD64[:]
c.gpRegMask = gpRegMaskAMD64
c.fpRegMask = fpRegMaskAMD64
c.specialRegMask = specialRegMaskAMD64
c.FPReg = framepointerRegAMD64
c.LinkReg = linkRegAMD64
c.hasGReg = false
Expand Down
26 changes: 13 additions & 13 deletions src/cmd/compile/internal/ssa/gen/AMD64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -361,31 +361,31 @@
// Adjust zeros to be a multiple of 16 bytes.
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE =>
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVOstore destptr (MOVOconst [0]) mem))
(MOVOstorezero destptr mem))

(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE =>
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))

(Zero [16] destptr mem) && config.useSSE =>
(MOVOstore destptr (MOVOconst [0]) mem)
(MOVOstorezero destptr mem)
(Zero [32] destptr mem) && config.useSSE =>
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem))
(MOVOstorezero (OffPtr <destptr.Type> destptr [16])
(MOVOstorezero destptr mem))
(Zero [48] destptr mem) && config.useSSE =>
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem)))
(MOVOstorezero (OffPtr <destptr.Type> destptr [32])
(MOVOstorezero (OffPtr <destptr.Type> destptr [16])
(MOVOstorezero destptr mem)))
(Zero [64] destptr mem) && config.useSSE =>
(MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem))))
(MOVOstorezero (OffPtr <destptr.Type> destptr [48])
(MOVOstorezero (OffPtr <destptr.Type> destptr [32])
(MOVOstorezero (OffPtr <destptr.Type> destptr [16])
(MOVOstorezero destptr mem))))

// Medium zeroing uses a duff device.
(Zero [s] destptr mem)
&& s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice =>
(DUFFZERO [s] destptr (MOVOconst [0]) mem)
(DUFFZERO [s] destptr mem)

// Large zeroing uses REP STOSQ.
(Zero [s] destptr mem)
Expand Down Expand Up @@ -1900,7 +1900,7 @@
&& c.Val() == 0
&& c2.Val() == 0
&& clobber(x)
=> (MOVOstore [c2.Off32()] {s} p (MOVOconst [0]) mem)
=> (MOVOstorezero [c2.Off32()] {s} p mem)

// Combine stores into larger (unaligned) stores. Little endian.
(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
Expand Down
41 changes: 21 additions & 20 deletions src/cmd/compile/internal/ssa/gen/AMD64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ var regNamesAMD64 = []string{
"X12",
"X13",
"X14",
"X15",
"X15", // constant 0 in ABIInternal

// If you add registers, update asyncPreempt in runtime

Expand Down Expand Up @@ -97,7 +97,8 @@ func init() {
dx = buildReg("DX")
bx = buildReg("BX")
gp = buildReg("AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15")
fp = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15")
fp = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14")
x15 = buildReg("X15")
gpsp = gp | buildReg("SP")
gpspsb = gpsp | buildReg("SB")
callerSave = gp | fp
Expand Down Expand Up @@ -684,19 +685,20 @@ func init() {
// Note: LEAx{1,2,4,8} must not have OpSB as either argument.

// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load byte from arg0+auxint+aux. arg1=mem. Zero extend.
{name: "MOVBQSXload", argLength: 2, reg: gpload, asm: "MOVBQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Zero extend.
{name: "MOVWQSXload", argLength: 2, reg: gpload, asm: "MOVWQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
{name: "MOVLload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Zero extend.
{name: "MOVLQSXload", argLength: 2, reg: gpload, asm: "MOVLQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
{name: "MOVQload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVQstore", argLength: 3, reg: gpstore, asm: "MOVQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVOload", argLength: 2, reg: fpload, asm: "MOVUPS", aux: "SymOff", typ: "Int128", faultOnNilArg0: true, symEffect: "Read"}, // load 16 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load byte from arg0+auxint+aux. arg1=mem. Zero extend.
{name: "MOVBQSXload", argLength: 2, reg: gpload, asm: "MOVBQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Zero extend.
{name: "MOVWQSXload", argLength: 2, reg: gpload, asm: "MOVWQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
{name: "MOVLload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Zero extend.
{name: "MOVLQSXload", argLength: 2, reg: gpload, asm: "MOVLQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
{name: "MOVQload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVQstore", argLength: 3, reg: gpstore, asm: "MOVQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVOload", argLength: 2, reg: fpload, asm: "MOVUPS", aux: "SymOff", typ: "Int128", faultOnNilArg0: true, symEffect: "Read"}, // load 16 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVOstorezero", argLength: 2, reg: regInfo{inputs: []regMask{gpspsb, 0}}, asm: "MOVUPS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0+auxint+aux. arg1=mem

// indexed loads/stores
{name: "MOVBloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBLZX", scale: 1, aux: "SymOff", typ: "UInt8", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
Expand Down Expand Up @@ -735,22 +737,20 @@ func init() {
{name: "MOVQstoreconstidx8", argLength: 3, reg: gpstoreconstidx, asm: "MOVQ", scale: 8, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store 8 bytes of ... 8*arg1 ...

// arg0 = pointer to start of memory to zero
// arg1 = value to store (will always be zero)
// arg2 = mem
// arg1 = mem
// auxint = # of bytes to zero
// returns mem
{
name: "DUFFZERO",
aux: "Int64",
argLength: 3,
argLength: 2,
reg: regInfo{
inputs: []regMask{buildReg("DI"), buildReg("X0")},
inputs: []regMask{buildReg("DI")},
clobbers: buildReg("DI"),
},
faultOnNilArg0: true,
unsafePoint: true, // FP maintenance around DUFFCOPY can be clobbered by interrupts
},
{name: "MOVOconst", reg: regInfo{nil, 0, []regMask{fp}}, typ: "Int128", aux: "Int128", rematerializeable: true},

// arg0 = address of memory to zero
// arg1 = # of 8-byte words to zero
Expand Down Expand Up @@ -935,6 +935,7 @@ func init() {
regnames: regNamesAMD64,
gpregmask: gp,
fpregmask: fp,
specialregmask: x15,
framepointerreg: int8(num["BP"]),
linkreg: -1, // not used
})
Expand Down
4 changes: 2 additions & 2 deletions src/cmd/compile/internal/ssa/op.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ func ClosureAuxCall(args []Param, results []Param) *AuxCall {
func (*AuxCall) CanBeAnSSAAux() {}

// OwnAuxCall returns a function's own AuxCall
func OwnAuxCall(args []Param, results []Param) *AuxCall {
func OwnAuxCall(fn *obj.LSym, args []Param, results []Param) *AuxCall {
// TODO if this remains identical to ClosureAuxCall above after new ABI is done, should deduplicate.
return &AuxCall{Fn: nil, args: args, results: results}
return &AuxCall{Fn: fn, args: args, results: results}
}

const (
Expand Down
Loading

0 comments on commit 401d7e5

Please sign in to comment.