Skip to content

Commit

Permalink
cmd/compile: ARM64 optimize []float64 and []float32 access
Browse files Browse the repository at this point in the history
Optimize load and store to []float64 and []float32.
Previously it used LSL instead of shifted register indexed load/store.

Before:

    LSL   $3, R0, R0
    FMOVD F0, (R1)(R0)

After:

    FMOVD F0, (R1)(R0<<3)

Fixes #42798

Change-Id: I0c0912140c3dce5aa6abc27097c0eb93833cc589
Reviewed-on: https://go-review.googlesource.com/c/go/+/273706
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Giovanni Bajo <rasky@develer.com>
  • Loading branch information
egonelbre authored and cherrymui committed Feb 24, 2021
1 parent 80ddc17 commit 3ee3243
Show file tree
Hide file tree
Showing 7 changed files with 396 additions and 14 deletions.
14 changes: 10 additions & 4 deletions src/cmd/compile/internal/arm64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,11 @@ func genIndexedOperand(v *ssa.Value) obj.Addr {
// Reg: base register, Index: (shifted) index register
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
switch v.Op {
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8:
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4:
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
Expand Down Expand Up @@ -435,7 +437,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64MOVHUloadidx2,
ssa.OpARM64MOVWloadidx4,
ssa.OpARM64MOVWUloadidx4,
ssa.OpARM64MOVDloadidx8:
ssa.OpARM64MOVDloadidx8,
ssa.OpARM64FMOVDloadidx8,
ssa.OpARM64FMOVSloadidx4:
p := s.Prog(v.Op.Asm())
p.From = genIndexedOperand(v)
p.To.Type = obj.TYPE_REG
Expand Down Expand Up @@ -472,7 +476,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64FMOVDstoreidx,
ssa.OpARM64MOVHstoreidx2,
ssa.OpARM64MOVWstoreidx4,
ssa.OpARM64MOVDstoreidx8:
ssa.OpARM64FMOVSstoreidx4,
ssa.OpARM64MOVDstoreidx8,
ssa.OpARM64FMOVDstoreidx8:
p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v)
p.From.Type = obj.TYPE_REG
Expand Down
18 changes: 18 additions & 0 deletions src/cmd/compile/internal/ssa/gen/ARM64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,15 @@
(MOVHUloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHUload [int32(c)<<1] ptr mem)
(MOVHloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHload [int32(c)<<1] ptr mem)

(FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx8 ptr idx mem)
(FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx4 ptr idx mem)
(FMOVDloadidx ptr (SLLconst [3] idx) mem) => (FMOVDloadidx8 ptr idx mem)
(FMOVSloadidx ptr (SLLconst [2] idx) mem) => (FMOVSloadidx4 ptr idx mem)
(FMOVDloadidx (SLLconst [3] idx) ptr mem) => (FMOVDloadidx8 ptr idx mem)
(FMOVSloadidx (SLLconst [2] idx) ptr mem) => (FMOVSloadidx4 ptr idx mem)
(FMOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (FMOVDload ptr [int32(c)<<3] mem)
(FMOVSloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (FMOVSload ptr [int32(c)<<2] mem)

(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
(MOVBstore [off1+int32(off2)] {sym} ptr val mem)
Expand Down Expand Up @@ -865,6 +874,15 @@
(MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem)
(MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem)

(FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx8 ptr idx val mem)
(FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx4 ptr idx val mem)
(FMOVDstoreidx ptr (SLLconst [3] idx) val mem) => (FMOVDstoreidx8 ptr idx val mem)
(FMOVSstoreidx ptr (SLLconst [2] idx) val mem) => (FMOVSstoreidx4 ptr idx val mem)
(FMOVDstoreidx (SLLconst [3] idx) ptr val mem) => (FMOVDstoreidx8 ptr idx val mem)
(FMOVSstoreidx (SLLconst [2] idx) ptr val mem) => (FMOVSstoreidx4 ptr idx val mem)
(FMOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (FMOVDstore [int32(c)<<3] ptr val mem)
(FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (FMOVSstore [int32(c)<<2] ptr val mem)

(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
Expand Down
20 changes: 12 additions & 8 deletions src/cmd/compile/internal/ssa/gen/ARM64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,11 +379,13 @@ func init() {
{name: "FMOVDloadidx", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1, arg2=mem.

// shifted register indexed load
{name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
{name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
{name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
{name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
{name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
{name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
{name: "FMOVSloadidx4", argLength: 3, reg: fp2load, asm: "FMOVS", typ: "Float32"}, // load 32-bit float from arg0 + arg1*4, arg2 = mem.
{name: "FMOVDloadidx8", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1*8, arg2 = mem.

{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
Expand All @@ -402,9 +404,11 @@ func init() {
{name: "FMOVDstoreidx", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1, arg3=mem.

// shifted register indexed store
{name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
{name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
{name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
{name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
{name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
{name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
{name: "FMOVSstoreidx4", argLength: 4, reg: fpstore2, asm: "FMOVS", typ: "Mem"}, // store 32-bit float of arg2 to arg0 + arg1*4, arg3=mem.
{name: "FMOVDstoreidx8", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1*8, arg3=mem.

{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
Expand Down
56 changes: 56 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3ee3243

Please sign in to comment.