Skip to content

Commit

Permalink
cmd/compile: add patterns for bitfield opcodes on loong64
Browse files Browse the repository at this point in the history
goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A6000 @ 2500.00MHz
                |  bench.old   |              bench.new               |
                |    sec/op    |    sec/op     vs base                |
LeadingZeros      1.0095n ± 0%   0.8011n ± 0%  -20.64% (p=0.000 n=10)
LeadingZeros8      1.201n ± 0%    1.167n ± 0%   -2.83% (p=0.000 n=10)
LeadingZeros16     1.201n ± 0%    1.167n ± 0%   -2.83% (p=0.000 n=10)
LeadingZeros32     1.201n ± 0%    1.134n ± 0%   -5.58% (p=0.000 n=10)
LeadingZeros64    0.8007n ± 0%   1.0115n ± 0%  +26.32% (p=0.000 n=10)
TrailingZeros     0.8054n ± 0%   0.8106n ± 1%   +0.65% (p=0.000 n=10)
TrailingZeros8     1.067n ± 0%    1.002n ± 1%   -6.09% (p=0.000 n=10)
TrailingZeros16   1.0540n ± 0%   0.8389n ± 0%  -20.40% (p=0.000 n=10)
TrailingZeros32   0.8014n ± 0%   0.8117n ± 0%   +1.29% (p=0.000 n=10)
TrailingZeros64   0.8015n ± 0%   0.8124n ± 1%   +1.36% (p=0.000 n=10)
OnesCount          3.418n ± 0%    3.417n ± 0%        ~ (p=0.911 n=10)
OnesCount8        0.8004n ± 0%   0.8004n ± 0%        ~ (p=1.000 n=10)
OnesCount16        1.440n ± 0%    1.299n ± 0%   -9.79% (p=0.000 n=10)
OnesCount32        2.969n ± 0%    2.940n ± 0%   -0.94% (p=0.000 n=10)
OnesCount64        3.563n ± 0%    3.558n ± 0%   -0.14% (p=0.000 n=10)
RotateLeft        0.6677n ± 0%   0.6670n ± 0%        ~ (p=0.055 n=10)
RotateLeft8        1.318n ± 1%    1.321n ± 0%        ~ (p=0.117 n=10)
RotateLeft16      0.8457n ± 1%   0.8442n ± 0%        ~ (p=0.325 n=10)
RotateLeft32      0.8004n ± 0%   0.8004n ± 0%        ~ (p=0.837 n=10)
RotateLeft64      0.6678n ± 0%   0.6670n ± 0%   -0.13% (p=0.000 n=10)
Reverse           0.8004n ± 0%   0.8004n ± 0%        ~ (p=1.000 n=10)
Reverse8          0.6989n ± 0%   0.6969n ± 1%        ~ (p=0.138 n=10)
Reverse16         0.6998n ± 1%   0.7004n ± 1%        ~ (p=0.985 n=10)
Reverse32         0.4158n ± 1%   0.4159n ± 1%        ~ (p=0.870 n=10)
Reverse64         0.4165n ± 1%   0.4194n ± 2%        ~ (p=0.093 n=10)
ReverseBytes      0.8004n ± 0%   0.8004n ± 0%        ~ (p=1.000 n=10)
ReverseBytes16    0.4183n ± 2%   0.4148n ± 1%        ~ (p=0.055 n=10)
ReverseBytes32    0.4143n ± 2%   0.4153n ± 1%        ~ (p=0.869 n=10)
ReverseBytes64    0.4168n ± 1%   0.4177n ± 1%        ~ (p=0.184 n=10)
Add                1.201n ± 0%    1.201n ± 0%        ~ (p=0.087 n=10)
Add32              1.603n ± 0%    1.601n ± 0%   -0.12% (p=0.000 n=10)
Add64              1.201n ± 0%    1.201n ± 0%        ~ (p=0.211 n=10)
Add64multiple      1.839n ± 0%    1.835n ± 0%   -0.24% (p=0.001 n=10)
Sub                1.202n ± 0%    1.201n ± 0%   -0.04% (p=0.033 n=10)
Sub32              2.401n ± 0%    1.601n ± 0%  -33.32% (p=0.000 n=10)
Sub64              1.201n ± 0%    1.201n ± 0%        ~ (p=1.000 n=10)
Sub64multiple      2.105n ± 0%    2.096n ± 0%   -0.40% (p=0.000 n=10)
Mul               0.8008n ± 0%   0.8004n ± 0%   -0.05% (p=0.000 n=10)
Mul32             0.8041n ± 0%   0.8014n ± 0%   -0.34% (p=0.000 n=10)
Mul64             0.8008n ± 0%   0.8004n ± 0%   -0.05% (p=0.000 n=10)
Div                8.977n ± 0%    8.945n ± 0%   -0.36% (p=0.000 n=10)
Div32              4.084n ± 0%    4.086n ± 0%        ~ (p=0.445 n=10)
Div64              9.316n ± 0%    9.301n ± 0%   -0.17% (p=0.000 n=10)
geomean            1.141n         1.117n        -2.09%

Change-Id: I4dc1eaab6728f771bc722ed331fe5c6429bd1037
Reviewed-on: https://go-review.googlesource.com/c/go/+/618475
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
  • Loading branch information
sophie-zhao authored and abner-chenc committed Oct 18, 2024
1 parent ef3e1da commit e45c125
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 0 deletions.
19 changes: 19 additions & 0 deletions src/cmd/compile/internal/loong64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,21 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

case ssa.OpLOONG64BSTRPICKV,
ssa.OpLOONG64BSTRPICKW:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
if v.Op == ssa.OpLOONG64BSTRPICKW {
p.From.Offset = v.AuxInt >> 5
p.AddRestSourceConst(v.AuxInt & 0x1f)
} else {
p.From.Offset = v.AuxInt >> 6
p.AddRestSourceConst(v.AuxInt & 0x3f)
}
p.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

case ssa.OpLOONG64FMINF,
ssa.OpLOONG64FMIND,
ssa.OpLOONG64FMAXF,
Expand Down Expand Up @@ -334,6 +349,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
}
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

case ssa.OpLOONG64MOVBloadidx,
ssa.OpLOONG64MOVBUloadidx,
ssa.OpLOONG64MOVHloadidx,
Expand All @@ -350,6 +366,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.From.Index = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

case ssa.OpLOONG64MOVBstoreidx,
ssa.OpLOONG64MOVHstoreidx,
ssa.OpLOONG64MOVWstoreidx,
Expand All @@ -363,6 +380,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()

case ssa.OpLOONG64MOVBstorezeroidx,
ssa.OpLOONG64MOVHstorezeroidx,
ssa.OpLOONG64MOVWstorezeroidx,
Expand All @@ -374,6 +392,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()

case ssa.OpLOONG64MOVBload,
ssa.OpLOONG64MOVBUload,
ssa.OpLOONG64MOVHload,
Expand Down
14 changes: 14 additions & 0 deletions src/cmd/compile/internal/ssa/_gen/LOONG64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,20 @@
(Rsh8x16 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
(Rsh8x8 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y)))

// bitfield ops

// bstrpickv
// (x << lc) >> rc
(SRLVconst [rc] (SLLVconst [lc] x)) && lc <= rc => (BSTRPICKV [rc-lc + ((64-lc)-1)<<6] x)
// uint64(x) >> rc
(SRLVconst [rc] (MOVWUreg x)) && rc < 32 => (BSTRPICKV [rc + 31<<6] x)
(SRLVconst [rc] (MOVHUreg x)) && rc < 16 => (BSTRPICKV [rc + 15<<6] x)
(SRLVconst [rc] (MOVBUreg x)) && rc < 8 => (BSTRPICKV [rc + 7<<6] x)
// uint64(x >> rc)
(MOVWUreg (SRLVconst [rc] x)) && rc < 32 => (BSTRPICKV [rc + (31+rc)<<6] x)
(MOVHUreg (SRLVconst [rc] x)) && rc < 16 => (BSTRPICKV [rc + (15+rc)<<6] x)
(MOVBUreg (SRLVconst [rc] x)) && rc < 8 => (BSTRPICKV [rc + (7+rc)<<6] x)

// rotates
(RotateLeft8 <t> x (MOVVconst [c])) => (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7])))
(RotateLeft8 <t> x y) => (OR <t> (SLLV <t> x (ANDconst <typ.Int64> [7] y)) (SRLV <t> (ZeroExt8to64 x) (ANDconst <typ.Int64> [7] (NEGV <typ.Int64> y))))
Expand Down
6 changes: 6 additions & 0 deletions src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,12 @@ func init() {
{name: "CMPGTF", argLength: 2, reg: fp2flags, asm: "CMPGTF", typ: "Flags"}, // flags=true if arg0 > arg1, float32
{name: "CMPGTD", argLength: 2, reg: fp2flags, asm: "CMPGTD", typ: "Flags"}, // flags=true if arg0 > arg1, float64

// bitfield ops
// for bstrpick.w msbw is auxInt>>5, lsbw is auxInt&0x1f
// for bstrpick.d msbd is auxInt>>6, lsbd is auxInt&0x3f
{name: "BSTRPICKW", argLength: 1, reg: gp11, asm: "BSTRPICKW", aux: "Int64"},
{name: "BSTRPICKV", argLength: 1, reg: gp11, asm: "BSTRPICKV", aux: "Int64"},

// moves
{name: "MOVVconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVV", typ: "UInt64", rematerializeable: true}, // auxint
{name: "MOVFconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVF", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
Expand Down
30 changes: 30 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

120 changes: 120 additions & 0 deletions src/cmd/compile/internal/ssa/rewriteLOONG64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e45c125

Please sign in to comment.