Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wazevo: adds support for br_table #1646

Merged
merged 6 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions internal/engine/wazevo/backend/backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2785,6 +2785,69 @@ L1 (SSA Block: blk0):
str d8, [x8, #0x8]
ldr x30, [sp], #0x10
ret
`,
},
{
name: "br_table",
m: testcases.BrTable.Module,
targetIndex: 0,
afterLoweringARM64: `
L1 (SSA Block: blk0):
mov x2?, x2
orr w9?, wzr, #0x6
subs wzr, w2?, w9?
csel w10?, w9?, w2?, hs
br_table_sequence x10?, [L2, L3, L4, L5, L6, L7, L2]
L2 (SSA Block: blk6):
movz w0, #0xb, LSL 0
ret
L3 (SSA Block: blk5):
orr w0, wzr, #0xc
ret
L4 (SSA Block: blk4):
movz w0, #0xd, LSL 0
ret
L5 (SSA Block: blk3):
orr w0, wzr, #0xe
ret
L6 (SSA Block: blk2):
orr w0, wzr, #0xf
ret
L7 (SSA Block: blk1):
orr w0, wzr, #0x10
ret
`,
afterFinalizeARM64: `
L1 (SSA Block: blk0):
str x30, [sp, #-0x10]!
orr w8, wzr, #0x6
subs wzr, w2, w8
csel w8, w8, w2, hs
adr x27, #16; ldrsw x8, [x27, x8, UXTW 2]; add x27, x27, x8; br x27; [0x1c 0x28 0x34 0x40 0x4c 0x58 0x1c]
L2 (SSA Block: blk6):
movz w0, #0xb, LSL 0
ldr x30, [sp], #0x10
ret
L3 (SSA Block: blk5):
orr w0, wzr, #0xc
ldr x30, [sp], #0x10
ret
L4 (SSA Block: blk4):
movz w0, #0xd, LSL 0
ldr x30, [sp], #0x10
ret
L5 (SSA Block: blk3):
orr w0, wzr, #0xe
ldr x30, [sp], #0x10
ret
L6 (SSA Block: blk2):
orr w0, wzr, #0xf
ldr x30, [sp], #0x10
ret
L7 (SSA Block: blk1):
orr w0, wzr, #0x10
ldr x30, [sp], #0x10
ret
`,
},
} {
Expand Down
21 changes: 11 additions & 10 deletions internal/engine/wazevo/backend/compiler_lower.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,17 @@ func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) {
c.mach.FlushPendingInstructions()
}

_, args, target := br0.BranchData()
argExists := len(args) != 0
if argExists && br1 != nil {
panic("BUG: critical edge split failed")
}

if argExists && target.ReturnBlock() {
c.lowerFunctionReturns(args)
} else if argExists {
c.lowerBlockArguments(args, target)
if br0.Opcode() == ssa.OpcodeJump {
_, args, target := br0.BranchData()
argExists := len(args) != 0
if argExists && br1 != nil {
panic("BUG: critical edge split failed")
}
if argExists && target.ReturnBlock() {
c.lowerFunctionReturns(args)
} else if argExists {
c.lowerBlockArguments(args, target)
}
}
c.mach.FlushPendingInstructions()
}
Expand Down
45 changes: 41 additions & 4 deletions internal/engine/wazevo/backend/isa/arm64/instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package arm64
import (
"fmt"
"math"
"strings"

"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
Expand All @@ -26,6 +27,7 @@ type (
rd, rm, rn, ra operand
amode addressMode
abi *abiImpl
targets []uint32
addedAfterLowering bool
}

Expand Down Expand Up @@ -69,6 +71,7 @@ var defKinds = [numInstructionKinds]defKind{
exitSequence: defKindNone,
condBr: defKindNone,
br: defKindNone,
brTableSequence: defKindNone,
cSet: defKindRD,
extend: defKindRD,
fpuCmp: defKindNone,
Expand Down Expand Up @@ -166,6 +169,7 @@ var useKinds = [numInstructionKinds]useKind{
exitSequence: useKindRN,
condBr: useKindCond,
br: useKindNone,
brTableSequence: useKindRN,
cSet: useKindNone,
extend: useKindRN,
fpuCmp: useKindRNRM,
Expand Down Expand Up @@ -490,6 +494,16 @@ func (i *instruction) asBr(target label) {
i.u1 = uint64(target)
}

func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []uint32) {
i.kind = brTableSequence
i.rn = operandNR(indexReg)
i.targets = targets
}

func (i *instruction) brTableSequenceOffsetsResolved() {
i.u3 = 1 // indicate that the offsets are resolved, for debugging.
}

func (i *instruction) brLabel() label {
return label(i.u1)
}
Expand Down Expand Up @@ -1028,8 +1042,29 @@ func (i *instruction) String() (str string) {
panic("TODO")
case word8:
panic("TODO")
case jtSequence:
panic("TODO")
case brTableSequence:
if i.u3 == 0 { // The offsets haven't been resolved yet.
labels := make([]string, len(i.targets))
for index, l := range i.targets {
labels[index] = label(l).String()
}
str = fmt.Sprintf("br_table_sequence %s, [%s]",
formatVRegSized(i.rn.nr(), 64),
strings.Join(labels, ", "),
)
} else {
// See encodeBrTableSequence for the encoding.
offsets := make([]string, len(i.targets))
for index, offset := range i.targets {
offsets[index] = fmt.Sprintf("%#x", int32(offset))
}
str = fmt.Sprintf(
`adr %[2]s, #16; ldrsw %[1]s, [%[2]s, %[1]s, UXTW 2]; add %[2]s, %[2]s, %[1]s; br %[2]s; %s`,
formatVRegSized(i.rn.nr(), 64),
formatVRegSized(tmpRegVReg, 64),
offsets,
)
}
case loadAddr:
panic("TODO")
case exitSequence:
Expand Down Expand Up @@ -1208,8 +1243,8 @@ const (
word4
// word8 represents a raw 64-bit word.
word8
// jtSequence represents a jump-table sequence.
jtSequence
// brTableSequence represents a jump-table sequence.
brTableSequence
// loadAddr represents a load address instruction.
loadAddr
// exitSequence consists of multiple instructions, and exits the execution immediately.
Expand Down Expand Up @@ -1571,6 +1606,8 @@ func (i *instruction) size() int64 {
return 4 + 4 + 8
case loadFpuConst128:
return 4 + 4 + 12
case brTableSequence:
return 4*4 + int64(len(i.targets))*4
default:
return 4
}
Expand Down
74 changes: 54 additions & 20 deletions internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,11 @@ func (i *instruction) encode(c backend.Compiler) {
c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder
}
case callInd:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BLR--Branch-with-Link-to-Register-
rn := regNumberInEncoding[i.rn.realReg()]
c.Emit4Bytes(
0b1101011<<25 | 0b111111<<16 | rn<<5,
)
c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true))
case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128:
c.Emit4Bytes(encodeStoreOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode))
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode))
case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128:
c.Emit4Bytes(encodeStoreOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode))
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode))
case condBr:
imm19 := i.condBrOffset()
if imm19%4 != 0 {
Expand Down Expand Up @@ -212,15 +208,7 @@ func (i *instruction) encode(c backend.Compiler) {
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined-?lang=en
c.Emit4Bytes(0)
case adr:
// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/ADR--Form-PC-relative-address-
rd := regNumberInEncoding[i.rd.realReg()]
off := i.u1
if off >= 1<<20 {
panic("BUG: too large adr instruction")
}
c.Emit4Bytes(
uint32(off&0b11)<<29 | 0b1<<28 | uint32(off&0b1111111111_1111111100)<<3 | rd,
)
c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1)))
case cSel:
c.Emit4Bytes(encodeConditionalSelect(
kind,
Expand Down Expand Up @@ -266,11 +254,22 @@ func (i *instruction) encode(c backend.Compiler) {
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u2),
))
case brTableSequence:
encodeBrTableSequence(c, i.rn.reg(), i.targets)
default:
panic(i.String())
}
}

// encodeAdr encodes a PC-relative ADR instruction.
// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/ADR--Form-PC-relative-address-
func encodeAdr(rd uint32, offset uint32) uint32 {
if offset >= 1<<20 {
panic("BUG: too large adr instruction")
}
return offset&0b11<<29 | 0b1<<28 | offset&0b1111111111_1111111100<<3 | rd
}

// encodeFpuCSel encodes as "Floating-point conditional select" in
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
func encodeFpuCSel(rd, rn, rm uint32, c condFlag, _64bit bool) uint32 {
Expand Down Expand Up @@ -318,6 +317,16 @@ func encodeMoveToVec(rd, rn uint32, arr vecArrangement, index vecIndex) uint32 {
return 0b01001110000<<21 | imm5<<16 | 0b000111<<10 | rn<<5 | rd
}

// encodeUnconditionalBranchReg encodes as "Unconditional branch (register)" in:
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
func encodeUnconditionalBranchReg(rn uint32, link bool) uint32 {
var opc uint32
if link {
opc = 0b0001
}
return 0b1101011<<25 | opc<<21 | 0b11111<<16 | rn<<5
}

// encodeMoveFromVec encodes as "Move vector element to a general-purpose register"
// (represented as `umov` when dest is 32-bit, `umov` otherwise) in
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en
Expand Down Expand Up @@ -481,7 +490,7 @@ func encodeExtend(signed bool, from, to byte, rd, rn uint32) uint32 {
return _31to10<<10 | rn<<5 | rd
}

func encodeStoreOrStore(kind instructionKind, rt uint32, amode addressMode) uint32 {
func encodeLoadOrStore(kind instructionKind, rt uint32, amode addressMode) uint32 {
var _22to31 uint32
var bits int64
switch kind {
Expand Down Expand Up @@ -1003,6 +1012,31 @@ func encodeVecMisc(op vecOp, rd, rn uint32, arr vecArrangement) uint32 {
return q<<30 | u<<29 | 0b01110<<24 | size<<22 | 0b10000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd
}

// brTableSequenceOffsetTableBegin is the offset inside the brTableSequence where the table begins after 4 instructions
const brTableSequenceOffsetTableBegin = 16

func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []uint32) {
tmpRegNumber := regNumberInEncoding[tmp]
indexNumber := regNumberInEncoding[index.RealReg()]

// adr tmpReg, PC+16 (PC+16 is the address of the first label offset)
// ldrsw index, [tmpReg, index, UXTW 2] ;; index = int64(*(tmpReg + index*8))
// add tmpReg, tmpReg, index
// br tmpReg
// [offset_to_l1, offset_to_l2, ..., offset_to_lN]
c.Emit4Bytes(encodeAdr(tmpRegNumber, 16))
c.Emit4Bytes(encodeLoadOrStore(sLoad32, indexNumber,
addressMode{kind: addressModeKindRegScaledExtended, rn: tmpRegVReg, rm: index, extOp: extendOpUXTW},
))
c.Emit4Bytes(encodeAluRRR(aluOpAdd, tmpRegNumber, tmpRegNumber, indexNumber, true, false))
c.Emit4Bytes(encodeUnconditionalBranchReg(tmpRegNumber, false))

// Offsets are resolved in ResolveRelativeAddress phase.
for _, offset := range targets {
c.Emit4Bytes(offset)
}
}

// encodeExitSequence matches the implementation detail of abiImpl.emitGoEntryPreamble.
func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) {
// Restore the FP, SP and LR, and return to the Go code:
Expand All @@ -1012,7 +1046,7 @@ func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) {
// ldr lr, [savedExecutionContextPtr, #GoReturnAddress]
// ret ;; --> return to the Go code

restoreFp := encodeStoreOrStore(
restoreFp := encodeLoadOrStore(
uLoad64,
regNumberInEncoding[fp],
addressMode{
Expand All @@ -1022,7 +1056,7 @@ func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) {
},
)

restoreSpToTmp := encodeStoreOrStore(
restoreSpToTmp := encodeLoadOrStore(
uLoad64,
regNumberInEncoding[tmp],
addressMode{
Expand All @@ -1035,7 +1069,7 @@ func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) {
movTmpToSp := encodeAddSubtractImmediate(0b100, 0, 0,
regNumberInEncoding[tmp], regNumberInEncoding[sp])

restoreLr := encodeStoreOrStore(
restoreLr := encodeLoadOrStore(
uLoad64,
regNumberInEncoding[lr],
addressMode{
Expand Down
24 changes: 24 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package arm64

import (
"encoding/binary"
"encoding/hex"
"fmt"
"math"
"testing"

Expand Down Expand Up @@ -869,3 +871,25 @@ func Test_lowerExitWithCodeEncodingSize(t *testing.T) {
m.encode(m.perBlockHead)
require.Equal(t, exitWithCodeEncodingSize, len(compiler.Buf()))
}

func Test_encodeBrTableSequence(t *testing.T) {
m := &mockCompiler{}
i := &instruction{kind: brTableSequence, targets: []uint32{1, 2, 3, 4, 5}}
encodeBrTableSequence(m, x22VReg, i.targets)
encoded := m.Buf()
require.Equal(t, i.size(), int64(len(encoded)))
require.Equal(t, "9b000010765bb6b87b03168b60031fd6", hex.EncodeToString(encoded[:brTableSequenceOffsetTableBegin]))
require.Equal(t, "0100000002000000030000000400000005000000", hex.EncodeToString(encoded[brTableSequenceOffsetTableBegin:]))
}

func Test_encodeUnconditionalBranch(t *testing.T) {
buf := make([]byte, 4)

actual := encodeUnconditionalBranch(true, 4)
binary.LittleEndian.PutUint32(buf, actual)
require.Equal(t, "0x01000094", fmt.Sprintf("%#x", buf))

actual = encodeUnconditionalBranch(false, 4*1024)
binary.LittleEndian.PutUint32(buf, actual)
require.Equal(t, "0x00040014", fmt.Sprintf("%#x", buf))
}
Loading