From d5729ba3d700957f9ec9fc4f5aaa80d05a7c5513 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Tue, 22 Aug 2023 10:50:05 +0900 Subject: [PATCH 1/6] wazevo: adds support for br_table Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/backend/backend_test.go | 63 +++++++++++++ .../engine/wazevo/backend/compiler_lower.go | 21 ++--- .../engine/wazevo/backend/isa/arm64/instr.go | 28 ++++-- .../wazevo/backend/isa/arm64/lower_instr.go | 37 +++++++- .../wazevo/backend/isa/arm64/machine.go | 5 +- .../engine/wazevo/frontend/frontend_test.go | 31 +++++++ internal/engine/wazevo/frontend/lower.go | 89 +++++++++++++++---- internal/engine/wazevo/ssa/basic_block.go | 15 +++- internal/engine/wazevo/ssa/builder.go | 3 + internal/engine/wazevo/ssa/builder_test.go | 4 +- internal/engine/wazevo/ssa/instructions.go | 37 +++++++- internal/engine/wazevo/testcases/testcases.go | 63 +++++++++++++ 12 files changed, 352 insertions(+), 44 deletions(-) diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go index 5654716ddf..4c9250867e 100644 --- a/internal/engine/wazevo/backend/backend_test.go +++ b/internal/engine/wazevo/backend/backend_test.go @@ -2785,6 +2785,69 @@ L1 (SSA Block: blk0): str d8, [x8, #0x8] ldr x30, [sp], #0x10 ret +`, + }, + { + name: "br_table", + m: testcases.BrTable.Module, + targetIndex: 0, + afterLoweringARM64: ` +L1 (SSA Block: blk0): + mov x2?, x2 + orr w9?, wzr, #0x6 + subs wzr, w2?, w9? + csel w10?, w9?, w2?, hs + br_table_sequence x10?, [L2, L3, L4, L5, L6, L7, L2] +L2 (SSA Block: blk6): + movz w0, #0xb, LSL 0 + ret +L3 (SSA Block: blk5): + orr w0, wzr, #0xc + ret +L4 (SSA Block: blk4): + movz w0, #0xd, LSL 0 + ret +L5 (SSA Block: blk3): + orr w0, wzr, #0xe + ret +L6 (SSA Block: blk2): + orr w0, wzr, #0xf + ret +L7 (SSA Block: blk1): + orr w0, wzr, #0x10 + ret +`, + afterFinalizeARM64: ` +L1 (SSA Block: blk0): + str x30, [sp, #-0x10]! + orr w8, wzr, #0x6 + subs wzr, w2, w8 + csel w8, w8, w2, hs + br_table_sequence x8, [L2, L3, L4, L5, L6, L7, L2] +L2 (SSA Block: blk6): + movz w0, #0xb, LSL 0 + ldr x30, [sp], #0x10 + ret +L3 (SSA Block: blk5): + orr w0, wzr, #0xc + ldr x30, [sp], #0x10 + ret +L4 (SSA Block: blk4): + movz w0, #0xd, LSL 0 + ldr x30, [sp], #0x10 + ret +L5 (SSA Block: blk3): + orr w0, wzr, #0xe + ldr x30, [sp], #0x10 + ret +L6 (SSA Block: blk2): + orr w0, wzr, #0xf + ldr x30, [sp], #0x10 + ret +L7 (SSA Block: blk1): + orr w0, wzr, #0x10 + ldr x30, [sp], #0x10 + ret `, }, } { diff --git a/internal/engine/wazevo/backend/compiler_lower.go b/internal/engine/wazevo/backend/compiler_lower.go index afaf5a8a8d..71b7fb7776 100644 --- a/internal/engine/wazevo/backend/compiler_lower.go +++ b/internal/engine/wazevo/backend/compiler_lower.go @@ -97,16 +97,17 @@ func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) { c.mach.FlushPendingInstructions() } - _, args, target := br0.BranchData() - argExists := len(args) != 0 - if argExists && br1 != nil { - panic("BUG: critical edge split failed") - } - - if argExists && target.ReturnBlock() { - c.lowerFunctionReturns(args) - } else if argExists { - c.lowerBlockArguments(args, target) + if br0.Opcode() == ssa.OpcodeJump { + _, args, target := br0.BranchData() + argExists := len(args) != 0 + if argExists && br1 != nil { + panic("BUG: critical edge split failed") + } + if argExists && target.ReturnBlock() { + c.lowerFunctionReturns(args) + } else if argExists { + c.lowerBlockArguments(args, target) + } } c.mach.FlushPendingInstructions() } diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index 7420a16318..81715edc48 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -2,10 +2,10 @@ package arm64 import ( "fmt" - "math" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" + "math" + "strings" ) type ( @@ -26,6 +26,7 @@ type ( rd, rm, rn, ra operand amode addressMode abi *abiImpl + targets []label addedAfterLowering bool } @@ -69,6 +70,7 @@ var defKinds = [numInstructionKinds]defKind{ exitSequence: defKindNone, condBr: defKindNone, br: defKindNone, + brTableSequence: defKindNone, cSet: defKindRD, extend: defKindRD, fpuCmp: defKindNone, @@ -166,6 +168,7 @@ var useKinds = [numInstructionKinds]useKind{ exitSequence: useKindRN, condBr: useKindCond, br: useKindNone, + brTableSequence: useKindRN, cSet: useKindNone, extend: useKindRN, fpuCmp: useKindRNRM, @@ -490,6 +493,12 @@ func (i *instruction) asBr(target label) { i.u1 = uint64(target) } +func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []label) { + i.kind = brTableSequence + i.rn = operandNR(indexReg) + i.targets = targets +} + func (i *instruction) brLabel() label { return label(i.u1) } @@ -1028,8 +1037,15 @@ func (i *instruction) String() (str string) { panic("TODO") case word8: panic("TODO") - case jtSequence: - panic("TODO") + case brTableSequence: + var labels = []string{} + for _, l := range i.targets { + labels = append(labels, l.String()) + } + str = fmt.Sprintf("br_table_sequence %s, [%s]", + formatVRegSized(i.rn.nr(), 64), + strings.Join(labels, ", "), + ) case loadAddr: panic("TODO") case exitSequence: @@ -1208,8 +1224,8 @@ const ( word4 // word8 represents a raw 64-bit word. word8 - // jtSequence represents a jump-table sequence. - jtSequence + // brTableSequence represents a jump-table sequence. + brTableSequence // loadAddr represents a load address instruction. loadAddr // exitSequence consists of multiple instructions, and exits the execution immediately. diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 07d06c467c..99cf38d3e6 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -14,10 +14,9 @@ import ( // LowerSingleBranch implements backend.Machine. func (m *machine) LowerSingleBranch(br *ssa.Instruction) { - _, _, targetBlk := br.BranchData() - switch br.Opcode() { case ssa.OpcodeJump: + _, _, targetBlk := br.BranchData() if br.IsFallthroughJump() { return } @@ -30,12 +29,44 @@ func (m *machine) LowerSingleBranch(br *ssa.Instruction) { } m.insert(b) case ssa.OpcodeBrTable: - panic("TODO: support OpcodeBrTable") + m.lowerBrTable(br) default: panic("BUG: unexpected branch opcode" + br.Opcode().String()) } } +func (m *machine) lowerBrTable(i *ssa.Instruction) { + index, targets := i.BrTableData() + indexOperand := m.getOperand_NR(m.compiler.ValueDefinition(index), extModeNone) + + // Firstly, we have to do the bounds check of the index, and + // set it to the default target (sitting at the end of the list) if it's out of bounds. + + // mov maxIndexReg #maximum_index + // subs wzr, index, maxIndexReg + // csel adjustedIndex, maxIndexReg, index, hs ;; if index is higher or equal than maxIndexReg. + maxIndexReg := m.compiler.AllocateVReg(regalloc.RegTypeInt) + m.lowerConstantI32(maxIndexReg, int32(len(targets)-1)) + subs := m.allocateInstr() + subs.asALU(aluOpSubS, operandNR(xzrVReg), indexOperand, operandNR(maxIndexReg), false) + m.insert(subs) + csel := m.allocateInstr() + adjustedIndex := m.compiler.AllocateVReg(regalloc.RegTypeInt) + csel.asCSel(operandNR(adjustedIndex), operandNR(maxIndexReg), indexOperand, hs, false) + m.insert(csel) + + brSequence := m.allocateInstr() + + // TODO: reuse the slice! + labels := make([]label, len(targets)) + for j, target := range targets { + labels[j] = m.getOrAllocateSSABlockLabel(target) + } + + brSequence.asBrTableSequence(adjustedIndex, labels) + m.insert(brSequence) +} + // LowerConditionalBranch implements backend.Machine. func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { cval, args, targetBlk := b.BranchData() diff --git a/internal/engine/wazevo/backend/isa/arm64/machine.go b/internal/engine/wazevo/backend/isa/arm64/machine.go index 637ca30c27..18c1267aaa 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -39,8 +39,8 @@ type ( addendsWorkQueue []ssa.Value addends32 []addend32 // addends64 is used during address lowering, defined here for reuse. - addends64 []regalloc.VReg - + addends64 []regalloc.VReg + brTablesTargets [][]ssa.BasicBlock unresolvedAddressModes []*instruction // spillSlotSize is the size of the stack slot in bytes used for spilling registers. @@ -135,6 +135,7 @@ func (m *machine) Reset() { m.spillSlotSize = 0 m.unresolvedAddressModes = m.unresolvedAddressModes[:0] m.rootInstr = nil + m.brTablesTargets = m.brTablesTargets[:0] } // InitializeABI implements backend.Machine InitializeABI. diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index c39a17b2dd..0942582946 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -1460,6 +1460,37 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) Jump blk_ret, v19 `, }, + { + name: "br_table", m: testcases.BrTable.Module, + exp: ` +blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) + BrTable v2, [blk6, blk5, blk4, blk3, blk2, blk1, blk6] + +blk1: () <-- (blk0) + v8:i32 = Iconst_32 0x10 + Return v8 + +blk2: () <-- (blk0) + v7:i32 = Iconst_32 0xf + Return v7 + +blk3: () <-- (blk0) + v6:i32 = Iconst_32 0xe + Return v6 + +blk4: () <-- (blk0) + v5:i32 = Iconst_32 0xd + Return v5 + +blk5: () <-- (blk0) + v4:i32 = Iconst_32 0xc + Return v4 + +blk6: () <-- (blk0,blk0) + v3:i32 = Iconst_32 0xb + Return v3 + `, + }, } { tc := tc diff --git a/internal/engine/wazevo/frontend/lower.go b/internal/engine/wazevo/frontend/lower.go index 51361179d4..c9d9bbdead 100644 --- a/internal/engine/wazevo/frontend/lower.go +++ b/internal/engine/wazevo/frontend/lower.go @@ -21,6 +21,7 @@ type ( controlFrames []controlFrame unreachable bool unreachableDepth int + tmpForBrTable []uint32 pc int } controlFrame struct { @@ -139,7 +140,7 @@ func (c *Compiler) lowerBody(entryBlk ssa.BasicBlock) { for c.loweringState.pc < len(c.wasmFunctionBody) { op := c.wasmFunctionBody[c.loweringState.pc] c.lowerOpcode(op) - if debug { + if true { fmt.Println("--------- Translated " + wasm.InstructionName(op) + " --------") fmt.Println("Stack: " + c.loweringState.String()) fmt.Println(c.formatBuilder()) @@ -835,7 +836,7 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { } else { // unreachable. if state.unreachableDepth > 0 { state.unreachableDepth-- - return + return // TODO: it seems not necessary return } else { state.unreachable = false } @@ -866,14 +867,7 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { return } - targetFrame := state.ctrlPeekAt(int(labelIndex)) - var targetBlk ssa.BasicBlock - var argNum int - if targetFrame.isLoop() { - targetBlk, argNum = targetFrame.blk, len(targetFrame.blockType.Params) - } else { - targetBlk, argNum = targetFrame.followingBlock, len(targetFrame.blockType.Results) - } + targetBlk, argNum := state.brTargetArgNumFor(labelIndex) args := c.loweringState.nPeekDup(argNum) c.insertJumpToBlock(args, targetBlk) @@ -887,14 +881,7 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { v := state.pop() - targetFrame := state.ctrlPeekAt(int(labelIndex)) - var targetBlk ssa.BasicBlock - var argNum int - if targetFrame.isLoop() { - targetBlk, argNum = targetFrame.blk, len(targetFrame.blockType.Params) - } else { - targetBlk, argNum = targetFrame.followingBlock, len(targetFrame.blockType.Results) - } + targetBlk, argNum := state.brTargetArgNumFor(labelIndex) args := c.loweringState.nPeekDup(argNum) // Insert the conditional jump to the target block. @@ -909,6 +896,22 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { // Now start translating the instructions after br_if. builder.SetCurrentBlock(elseBlk) + case wasm.OpcodeBrTable: + labels := state.tmpForBrTable + labels = labels[:0] + labelCount := c.readI32u() + for i := 0; i < int(labelCount); i++ { + labels = append(labels, c.readI32u()) + } + labels = append(labels, c.readI32u()) // default label. + if state.unreachable { + return + } + + index := state.pop() + c.lowerBrTable(labels, index) + state.unreachable = true + case wasm.OpcodeNop: case wasm.OpcodeReturn: results := c.loweringState.nPeekDup(c.results()) @@ -1462,3 +1465,53 @@ func cloneValuesList(in []ssa.Value) (ret []ssa.Value) { func (c *Compiler) results() int { return len(c.wasmFunctionTyp.Results) } + +func (c *Compiler) lowerBrTable(labels []uint32, index ssa.Value) { + state := c.state() + builder := c.ssaBuilder + + f := state.ctrlPeekAt(int(labels[0])) + var numArgs int + if f.isLoop() { + numArgs = len(f.blockType.Params) + } else { + numArgs = len(f.blockType.Results) + } + + if numArgs == 0 { + targets := make([]ssa.BasicBlock, len(labels)) + for i, l := range labels { + targetBlk, argNum := state.brTargetArgNumFor(l) + if argNum != 0 { + // This must be handled in else block below. + panic("BUG: br_table with args must not reach here") + } + targets[i] = targetBlk + + if targetBlk.ReturnBlock() { + // TODO: even when the target block has no arguments, we have to insert the unconditional jump to the return trampoline + // if the target is return. + panic("TODO") + } + } + + // If the target block has no arguments, we can just jump to the target block. + brTable := builder.AllocateInstruction() + brTable.AsBrTable(index, targets) + builder.InsertInstruction(brTable) + + } else { + panic("TODO") + } +} + +func (l *loweringState) brTargetArgNumFor(labelIndex uint32) (targetBlk ssa.BasicBlock, argNum int) { + targetFrame := l.ctrlPeekAt(int(labelIndex)) + if targetFrame.isLoop() { + targetBlk, argNum = targetFrame.blk, len(targetFrame.blockType.Params) + } else { + targetBlk, argNum = targetFrame.followingBlock, len(targetFrame.blockType.Results) + } + return + +} diff --git a/internal/engine/wazevo/ssa/basic_block.go b/internal/engine/wazevo/ssa/basic_block.go index 63258b29d3..6411956495 100644 --- a/internal/engine/wazevo/ssa/basic_block.go +++ b/internal/engine/wazevo/ssa/basic_block.go @@ -194,7 +194,10 @@ func (bb *basicBlock) InsertInstruction(next *Instruction) { target := next.blk.(*basicBlock) target.addPred(bb, next) case OpcodeBrTable: - panic(OpcodeBrTable) + for _, _target := range next.targets { + target := _target.(*basicBlock) + target.addPred(bb, next) + } } } @@ -252,7 +255,17 @@ func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) { if bb.sealed { panic("BUG: trying to add predecessor to a sealed block: " + bb.Name()) } + pred := blk.(*basicBlock) + for i := range bb.preds { + existingPred := &bb.preds[i] + if existingPred.blk == pred && existingPred.branch != branch { + // If the target is already added, then this must come from the same BrTable, + // otherwise such redundant branch should be eliminated by the frontend. (which should be simpler). + panic(fmt.Sprintf("BUG: redundant non BrTable jumps in %s whose targes are the same", bb.Name())) + } + } + bb.preds = append(bb.preds, basicBlockPredecessorInfo{ blk: pred, branch: branch, diff --git a/internal/engine/wazevo/ssa/builder.go b/internal/engine/wazevo/ssa/builder.go index 668e903cb4..63b2854438 100644 --- a/internal/engine/wazevo/ssa/builder.go +++ b/internal/engine/wazevo/ssa/builder.go @@ -696,6 +696,9 @@ func (b *builder) LayoutBlocks() { if len(blk.success) < 2 { // There won't be critical edge originating from this block. continue + } else if blk.currentInstr.opcode == OpcodeBrTable { + // We don't split critical edges here, because at the construction site of BrTable, we already split the edges. + continue } for sidx, succ := range blk.success { diff --git a/internal/engine/wazevo/ssa/builder_test.go b/internal/engine/wazevo/ssa/builder_test.go index 0381315dec..2a4f808c1b 100644 --- a/internal/engine/wazevo/ssa/builder_test.go +++ b/internal/engine/wazevo/ssa/builder_test.go @@ -176,7 +176,7 @@ func Test_maybeInvertBranch(t *testing.T) { func TestBuilder_splitCriticalEdge(t *testing.T) { b := NewBuilder().(*builder) - predBlk, dummyBlk := b.allocateBasicBlock(), b.allocateBasicBlock() + predBlk, dummyBlk, dummyBlk2 := b.allocateBasicBlock(), b.allocateBasicBlock(), b.allocateBasicBlock() predBlk.reversePostOrder = 100 b.SetCurrentBlock(predBlk) inst := b.AllocateInstruction() @@ -187,7 +187,7 @@ func TestBuilder_splitCriticalEdge(t *testing.T) { originalBrz.AsBrz(v, nil, dummyBlk) b.InsertInstruction(originalBrz) dummyJump := b.AllocateInstruction() - dummyJump.AsJump(nil, dummyBlk) + dummyJump.AsJump(nil, dummyBlk2) b.InsertInstruction(dummyJump) predInfo := &basicBlockPredecessorInfo{blk: predBlk, branch: originalBrz} diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go index 38987fbe39..327d80e689 100644 --- a/internal/engine/wazevo/ssa/instructions.go +++ b/internal/engine/wazevo/ssa/instructions.go @@ -23,6 +23,7 @@ type Instruction struct { vs []Value typ Type blk BasicBlock + targets []BasicBlock prev, next *Instruction rValue Value @@ -129,8 +130,8 @@ const ( // OpcodeBrnz branches into `blk` with `args` if the value `c` is not zero: `Brnz c, blk, args`. OpcodeBrnz - // OpcodeBrTable ... - // `BrTable x, block, JT`. + // OpcodeBrTable takes the index value `index`, and branches into `labelX`. If the `index` is out of range, + // it branches into the last labelN: `BrTable index, [label1, label2, ... labelN]`. OpcodeBrTable // OpcodeExitWithCode exit the execution immediately. @@ -863,6 +864,7 @@ var instructionSideEffects = [opcodeEnd]sideEffect{ OpcodeReturn: sideEffectTrue, OpcodeBrz: sideEffectTrue, OpcodeBrnz: sideEffectTrue, + OpcodeBrTable: sideEffectTrue, OpcodeFdiv: sideEffectFalse, OpcodeFmul: sideEffectFalse, OpcodeFmax: sideEffectFalse, @@ -948,6 +950,7 @@ var instructionReturnTypes = [opcodeEnd]returnTypesFn{ OpcodeReturn: returnTypesFnNoReturns, OpcodeBrz: returnTypesFnNoReturns, OpcodeBrnz: returnTypesFnNoReturns, + OpcodeBrTable: returnTypesFnNoReturns, OpcodeUload8: returnTypesFnSingle, OpcodeUload16: returnTypesFnSingle, OpcodeUload32: returnTypesFnSingle, @@ -1228,6 +1231,16 @@ func (i *Instruction) BranchData() (condVal Value, blockArgs []Value, target Bas return } +// BrTableData returns the branch table data for this instruction necessary for backends. +func (i *Instruction) BrTableData() (index Value, targets []BasicBlock) { + if i.opcode != OpcodeBrTable { + panic("BUG: BrTableData only available for OpcodeBrTable") + } + index = i.v + targets = i.targets + return +} + // AsJump initializes this instruction as a jump instruction with OpcodeJump. func (i *Instruction) AsJump(vs []Value, target BasicBlock) { i.opcode = OpcodeJump @@ -1267,6 +1280,13 @@ func (i *Instruction) AsBrnz(v Value, args []Value, target BasicBlock) { i.blk = target } +// AsBrTable initializes this instruction as a branch-table instruction with OpcodeBrTable. +func (i *Instruction) AsBrTable(index Value, targets []BasicBlock) { + i.opcode = OpcodeBrTable + i.v = index + i.targets = targets +} + // AsCall initializes this instruction as a call instruction with OpcodeCall. func (i *Instruction) AsCall(ref FuncRef, sig *Signature, args []Value) { i.opcode = OpcodeCall @@ -1467,6 +1487,19 @@ func (i *Instruction) Format(b Builder) string { vs[idx+2] = i.vs[idx].Format(b) } instSuffix = strings.Join(vs, ", ") + case OpcodeBrTable: + // `BrTable index, [label1, label2, ... labelN]` + instSuffix = fmt.Sprintf(" %s", i.v.Format(b)) + instSuffix += ", [" + for i, target := range i.targets { + blk := target.(*basicBlock) + if i == 0 { + instSuffix += blk.Name() + } else { + instSuffix += ", " + blk.Name() + } + } + instSuffix += "]" case OpcodeIshl, OpcodeSshr, OpcodeUshr: instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b)) case OpcodeUndefined: diff --git a/internal/engine/wazevo/testcases/testcases.go b/internal/engine/wazevo/testcases/testcases.go index b4372e2f66..0016389a44 100644 --- a/internal/engine/wazevo/testcases/testcases.go +++ b/internal/engine/wazevo/testcases/testcases.go @@ -1275,6 +1275,69 @@ var ( }, }, } + + BrTable = TestCase{ + Module: &wasm.Module{ + TypeSection: []wasm.FunctionType{i32_i32, {}}, + ExportSection: []wasm.Export{{Name: ExportedFunctionName, Type: wasm.ExternTypeFunc, Index: 0}}, + FunctionSection: []wasm.Index{0}, + CodeSection: []wasm.Code{ + {Body: []byte{ + wasm.OpcodeBlock, 1, // Signature v_v, + wasm.OpcodeBlock, 1, // Signature v_v, + wasm.OpcodeBlock, 1, // Signature v_v, + wasm.OpcodeBlock, 1, // Signature v_v, + wasm.OpcodeBlock, 1, // Signature v_v, + wasm.OpcodeBlock, 1, // Signature v_v, + wasm.OpcodeLocalGet, 0, + wasm.OpcodeBrTable, + 6, // size of label vector + 0, 1, 2, 3, 4, 5, // labels. + 0, // default label + wasm.OpcodeEnd, wasm.OpcodeI32Const, 11, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 12, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 13, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 14, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 15, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 16, wasm.OpcodeReturn, + wasm.OpcodeEnd, + }}, + }, + }, + } + + BrTableWithArg = TestCase{ + Name: "br_table_with_arg", + Module: &wasm.Module{ + TypeSection: []wasm.FunctionType{i32i32_i32, v_i32}, + ExportSection: []wasm.Export{{Name: ExportedFunctionName, Type: wasm.ExternTypeFunc, Index: 0}}, + FunctionSection: []wasm.Index{0}, + CodeSection: []wasm.Code{ + {Body: []byte{ + wasm.OpcodeBlock, 1, // Signature v_i32, + wasm.OpcodeBlock, 1, // Signature v_i32, + wasm.OpcodeBlock, 1, // Signature v_i32, + wasm.OpcodeBlock, 1, // Signature v_i32, + wasm.OpcodeBlock, 1, // Signature v_i32, + wasm.OpcodeBlock, 1, // Signature v_i32, + wasm.OpcodeLocalGet, 1, // Argument to each destination. + wasm.OpcodeLocalGet, 0, + wasm.OpcodeBrTable, + 6, // size of label vector + 0, 1, 2, 3, 4, 5, // labels. + 0, // default label + wasm.OpcodeEnd, wasm.OpcodeI32Const, 11, wasm.OpcodeI32Add, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 12, wasm.OpcodeI32Add, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 13, wasm.OpcodeI32Add, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 14, wasm.OpcodeI32Add, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 15, wasm.OpcodeI32Add, wasm.OpcodeReturn, + wasm.OpcodeEnd, wasm.OpcodeI32Const, 16, wasm.OpcodeI32Add, wasm.OpcodeReturn, + wasm.OpcodeUnreachable, + wasm.OpcodeEnd, + }}, + }, + }, + } ) type TestCase struct { From b8cddfd9c4bec732401bffad51a70e8a81ece131 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Tue, 22 Aug 2023 11:39:38 +0900 Subject: [PATCH 2/6] wazevo: adds support for br_table Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/backend/isa/arm64/instr.go | 9 ++- .../backend/isa/arm64/instr_encoding.go | 71 +++++++++++++------ .../backend/isa/arm64/instr_encoding_test.go | 22 ++++++ .../wazevo/backend/isa/arm64/machine.go | 4 +- .../engine/wazevo/frontend/frontend_test.go | 2 +- internal/engine/wazevo/frontend/lower.go | 3 +- 6 files changed, 83 insertions(+), 28 deletions(-) diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index 81715edc48..4439adeb5c 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -2,10 +2,11 @@ package arm64 import ( "fmt" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" "math" "strings" + + "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" + "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" ) type ( @@ -1038,7 +1039,7 @@ func (i *instruction) String() (str string) { case word8: panic("TODO") case brTableSequence: - var labels = []string{} + labels := []string{} for _, l := range i.targets { labels = append(labels, l.String()) } @@ -1587,6 +1588,8 @@ func (i *instruction) size() int64 { return 4 + 4 + 8 case loadFpuConst128: return 4 + 4 + 12 + case brTableSequence: + return 4*4 + int64(len(i.targets))*4 default: return 4 } diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index a027ddb202..8fcf1078d7 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -41,15 +41,11 @@ func (i *instruction) encode(c backend.Compiler) { c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder } case callInd: - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BLR--Branch-with-Link-to-Register- - rn := regNumberInEncoding[i.rn.realReg()] - c.Emit4Bytes( - 0b1101011<<25 | 0b111111<<16 | rn<<5, - ) + c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: - c.Emit4Bytes(encodeStoreOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode)) case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: - c.Emit4Bytes(encodeStoreOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode)) case condBr: imm19 := i.condBrOffset() if imm19%4 != 0 { @@ -212,15 +208,7 @@ func (i *instruction) encode(c backend.Compiler) { // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined-?lang=en c.Emit4Bytes(0) case adr: - // https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/ADR--Form-PC-relative-address- - rd := regNumberInEncoding[i.rd.realReg()] - off := i.u1 - if off >= 1<<20 { - panic("BUG: too large adr instruction") - } - c.Emit4Bytes( - uint32(off&0b11)<<29 | 0b1<<28 | uint32(off&0b1111111111_1111111100)<<3 | rd, - ) + c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1))) case cSel: c.Emit4Bytes(encodeConditionalSelect( kind, @@ -266,11 +254,22 @@ func (i *instruction) encode(c backend.Compiler) { regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u2), )) + case brTableSequence: + encodeBrTableSequence(c, i.rn.reg(), len(i.targets)) default: panic(i.String()) } } +// encodeAdr encodes a PC-relative ADR instruction. +// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/ADR--Form-PC-relative-address- +func encodeAdr(rd uint32, offset uint32) uint32 { + if offset >= 1<<20 { + panic("BUG: too large adr instruction") + } + return offset&0b11<<29 | 0b1<<28 | offset&0b1111111111_1111111100<<3 | rd +} + // encodeFpuCSel encodes as "Floating-point conditional select" in // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en func encodeFpuCSel(rd, rn, rm uint32, c condFlag, _64bit bool) uint32 { @@ -318,6 +317,16 @@ func encodeMoveToVec(rd, rn uint32, arr vecArrangement, index vecIndex) uint32 { return 0b01001110000<<21 | imm5<<16 | 0b000111<<10 | rn<<5 | rd } +// encodeUnconditionalBranchReg encodes as "Unconditional branch (register)" in: +// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +func encodeUnconditionalBranchReg(rn uint32, link bool) uint32 { + var opc uint32 + if link { + opc = 0b0001 + } + return 0b1101011<<25 | opc<<21 | 0b11111<<16 | rn<<5 +} + // encodeMoveFromVec encodes as "Move vector element to a general-purpose register" // (represented as `umov` when dest is 32-bit, `umov` otherwise) in // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en @@ -481,7 +490,7 @@ func encodeExtend(signed bool, from, to byte, rd, rn uint32) uint32 { return _31to10<<10 | rn<<5 | rd } -func encodeStoreOrStore(kind instructionKind, rt uint32, amode addressMode) uint32 { +func encodeLoadOrStore(kind instructionKind, rt uint32, amode addressMode) uint32 { var _22to31 uint32 var bits int64 switch kind { @@ -1003,6 +1012,28 @@ func encodeVecMisc(op vecOp, rd, rn uint32, arr vecArrangement) uint32 { return q<<30 | u<<29 | 0b01110<<24 | size<<22 | 0b10000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd } +func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, N int) { + tmpRegNumber := regNumberInEncoding[tmp] + indexNumber := regNumberInEncoding[index.RealReg()] + + // adr tmpReg, PC+16 (PC+16 is the address of the first label offset) + // ldrsw index, [tmpReg, index, UXTW 2] ;; index = uint64(*(tmpReg, index*8)) + // add tmpReg, tmpReg, index + // br tmpReg + // [offset_to_l1, offset_to_l2, ..., offset_to_lN] + c.Emit4Bytes(encodeAdr(tmpRegNumber, 16)) + c.Emit4Bytes(encodeLoadOrStore(uLoad32, indexNumber, + addressMode{kind: addressModeKindRegScaledExtended, rn: tmpRegVReg, rm: index, extOp: extendOpUXTW}, + )) + c.Emit4Bytes(encodeAluRRR(aluOpAdd, tmpRegNumber, tmpRegNumber, indexNumber, true, false)) + c.Emit4Bytes(encodeUnconditionalBranchReg(tmpRegNumber, false)) + + // Label offsets are resolved after the whole function is compiled. + for i := 0; i < N; i++ { + c.Emit4Bytes(0) + } +} + // encodeExitSequence matches the implementation detail of abiImpl.emitGoEntryPreamble. func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) { // Restore the FP, SP and LR, and return to the Go code: @@ -1012,7 +1043,7 @@ func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) { // ldr lr, [savedExecutionContextPtr, #GoReturnAddress] // ret ;; --> return to the Go code - restoreFp := encodeStoreOrStore( + restoreFp := encodeLoadOrStore( uLoad64, regNumberInEncoding[fp], addressMode{ @@ -1022,7 +1053,7 @@ func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) { }, ) - restoreSpToTmp := encodeStoreOrStore( + restoreSpToTmp := encodeLoadOrStore( uLoad64, regNumberInEncoding[tmp], addressMode{ @@ -1035,7 +1066,7 @@ func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) { movTmpToSp := encodeAddSubtractImmediate(0b100, 0, 0, regNumberInEncoding[tmp], regNumberInEncoding[sp]) - restoreLr := encodeStoreOrStore( + restoreLr := encodeLoadOrStore( uLoad64, regNumberInEncoding[lr], addressMode{ diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go index e61f522141..deee08f0f0 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go @@ -1,7 +1,9 @@ package arm64 import ( + "encoding/binary" "encoding/hex" + "fmt" "math" "testing" @@ -869,3 +871,23 @@ func Test_lowerExitWithCodeEncodingSize(t *testing.T) { m.encode(m.perBlockHead) require.Equal(t, exitWithCodeEncodingSize, len(compiler.Buf())) } + +func Test_encodeBrTableSequence(t *testing.T) { + const N = 10 + m := &mockCompiler{} + encodeBrTableSequence(m, x22VReg, N) + require.Equal(t, (&instruction{kind: brTableSequence, targets: make([]label, N)}).size(), int64(len(m.Buf()))) + require.Equal(t, "9b000010765b76b87b03168b60031fd600000000000000000000000000000000000000000000000000000000000000000000000000000000", hex.EncodeToString(m.buf)) +} + +func Test_encodeUnconditionalBranch(t *testing.T) { + buf := make([]byte, 4) + + actual := encodeUnconditionalBranch(true, 4) + binary.LittleEndian.PutUint32(buf, actual) + require.Equal(t, "0x01000094", fmt.Sprintf("%#x", buf)) + + actual = encodeUnconditionalBranch(false, 4*1024) + binary.LittleEndian.PutUint32(buf, actual) + require.Equal(t, "0x00040014", fmt.Sprintf("%#x", buf)) +} diff --git a/internal/engine/wazevo/backend/isa/arm64/machine.go b/internal/engine/wazevo/backend/isa/arm64/machine.go index 18c1267aaa..814c356912 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -335,7 +335,7 @@ func (m *machine) ResolveRelativeAddresses() { offsetOfTarget := m.labelPositions[target].binaryOffset diff := offsetOfTarget - currentOffset if diff%4 != 0 { - panic("Invalid binary; offsets between b and the target must be a multiple of 4") + panic("BUG: offsets between b and the target must be a multiple of 4") } divided := diff >> 2 if divided < minSignedInt26 || divided > maxSignedInt26 { @@ -349,7 +349,7 @@ func (m *machine) ResolveRelativeAddresses() { offsetOfTarget := m.labelPositions[target].binaryOffset diff := offsetOfTarget - currentOffset if diff%4 != 0 { - panic("Invalid binary; offsets between b and the target must be a multiple of 4") + panic("BUG: offsets between b and the target must be a multiple of 4") } divided := diff >> 2 if divided < minSignedInt19 || divided > maxSignedInt19 { diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index 0942582946..cf7f3b7b66 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -1489,7 +1489,7 @@ blk5: () <-- (blk0) blk6: () <-- (blk0,blk0) v3:i32 = Iconst_32 0xb Return v3 - `, +`, }, } { diff --git a/internal/engine/wazevo/frontend/lower.go b/internal/engine/wazevo/frontend/lower.go index c9d9bbdead..5b00bacdc8 100644 --- a/internal/engine/wazevo/frontend/lower.go +++ b/internal/engine/wazevo/frontend/lower.go @@ -140,7 +140,7 @@ func (c *Compiler) lowerBody(entryBlk ssa.BasicBlock) { for c.loweringState.pc < len(c.wasmFunctionBody) { op := c.wasmFunctionBody[c.loweringState.pc] c.lowerOpcode(op) - if true { + if debug { fmt.Println("--------- Translated " + wasm.InstructionName(op) + " --------") fmt.Println("Stack: " + c.loweringState.String()) fmt.Println(c.formatBuilder()) @@ -1513,5 +1513,4 @@ func (l *loweringState) brTargetArgNumFor(labelIndex uint32) (targetBlk ssa.Basi targetBlk, argNum = targetFrame.followingBlock, len(targetFrame.blockType.Results) } return - } From 5aa4246738443d57979f381027e3290d240ac09d Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Tue, 22 Aug 2023 12:43:59 +0900 Subject: [PATCH 3/6] wazevo: adds support for br_table Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/backend/backend_test.go | 2 +- .../engine/wazevo/backend/isa/arm64/instr.go | 36 ++++++++++++++----- .../backend/isa/arm64/instr_encoding.go | 13 ++++--- .../backend/isa/arm64/instr_encoding_test.go | 10 +++--- .../wazevo/backend/isa/arm64/lower_instr.go | 4 +-- .../wazevo/backend/isa/arm64/machine.go | 8 +++++ internal/engine/wazevo/e2e_test.go | 15 ++++++++ 7 files changed, 67 insertions(+), 21 deletions(-) diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go index 4c9250867e..4520ef776e 100644 --- a/internal/engine/wazevo/backend/backend_test.go +++ b/internal/engine/wazevo/backend/backend_test.go @@ -2823,7 +2823,7 @@ L1 (SSA Block: blk0): orr w8, wzr, #0x6 subs wzr, w2, w8 csel w8, w8, w2, hs - br_table_sequence x8, [L2, L3, L4, L5, L6, L7, L2] + adr x27, #16; ldrsw x8, [x27, x8, UXTW 2]; add x27, x27, x8; br x27; [0x1c 0x28 0x34 0x40 0x4c 0x58 0x1c] L2 (SSA Block: blk6): movz w0, #0xb, LSL 0 ldr x30, [sp], #0x10 diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index 4439adeb5c..7d103a8efe 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -27,7 +27,7 @@ type ( rd, rm, rn, ra operand amode addressMode abi *abiImpl - targets []label + targets []uint32 addedAfterLowering bool } @@ -494,12 +494,16 @@ func (i *instruction) asBr(target label) { i.u1 = uint64(target) } -func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []label) { +func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []uint32) { i.kind = brTableSequence i.rn = operandNR(indexReg) i.targets = targets } +func (i *instruction) brTableSequenceOffsetsResolved() { + i.u3 = 1 // indicate that the offsets are resolved, for debugging. +} + func (i *instruction) brLabel() label { return label(i.u1) } @@ -1039,14 +1043,28 @@ func (i *instruction) String() (str string) { case word8: panic("TODO") case brTableSequence: - labels := []string{} - for _, l := range i.targets { - labels = append(labels, l.String()) + if i.u3 == 0 { // The offsets haven't been resolved yet. + labels := make([]string, len(i.targets)) + for index, l := range i.targets { + labels[index] = label(l).String() + } + str = fmt.Sprintf("br_table_sequence %s, [%s]", + formatVRegSized(i.rn.nr(), 64), + strings.Join(labels, ", "), + ) + } else { + // See encodeBrTableSequence for the encoding. + offsets := make([]string, len(i.targets)) + for index, offset := range i.targets { + offsets[index] = fmt.Sprintf("%#x", int32(offset)) + } + str = fmt.Sprintf( + `adr %[2]s, #16; ldrsw %[1]s, [%[2]s, %[1]s, UXTW 2]; add %[2]s, %[2]s, %[1]s; br %[2]s; %s`, + formatVRegSized(i.rn.nr(), 64), + formatVRegSized(tmpRegVReg, 64), + offsets, + ) } - str = fmt.Sprintf("br_table_sequence %s, [%s]", - formatVRegSized(i.rn.nr(), 64), - strings.Join(labels, ", "), - ) case loadAddr: panic("TODO") case exitSequence: diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 8fcf1078d7..4b0c532b8f 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -255,7 +255,7 @@ func (i *instruction) encode(c backend.Compiler) { vecArrangement(i.u2), )) case brTableSequence: - encodeBrTableSequence(c, i.rn.reg(), len(i.targets)) + encodeBrTableSequence(c, i.rn.reg(), i.targets) default: panic(i.String()) } @@ -1012,7 +1012,10 @@ func encodeVecMisc(op vecOp, rd, rn uint32, arr vecArrangement) uint32 { return q<<30 | u<<29 | 0b01110<<24 | size<<22 | 0b10000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd } -func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, N int) { +// brTableSequenceOffsetTableBegin is the offset inside the brTableSequence where the table begins after 4 instructions +const brTableSequenceOffsetTableBegin = 16 + +func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []uint32) { tmpRegNumber := regNumberInEncoding[tmp] indexNumber := regNumberInEncoding[index.RealReg()] @@ -1022,15 +1025,15 @@ func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, N int) { // br tmpReg // [offset_to_l1, offset_to_l2, ..., offset_to_lN] c.Emit4Bytes(encodeAdr(tmpRegNumber, 16)) - c.Emit4Bytes(encodeLoadOrStore(uLoad32, indexNumber, + c.Emit4Bytes(encodeLoadOrStore(sLoad32, indexNumber, addressMode{kind: addressModeKindRegScaledExtended, rn: tmpRegVReg, rm: index, extOp: extendOpUXTW}, )) c.Emit4Bytes(encodeAluRRR(aluOpAdd, tmpRegNumber, tmpRegNumber, indexNumber, true, false)) c.Emit4Bytes(encodeUnconditionalBranchReg(tmpRegNumber, false)) // Label offsets are resolved after the whole function is compiled. - for i := 0; i < N; i++ { - c.Emit4Bytes(0) + for _, offset := range targets { + c.Emit4Bytes(offset) } } diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go index deee08f0f0..cb8abbdd65 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go @@ -873,11 +873,13 @@ func Test_lowerExitWithCodeEncodingSize(t *testing.T) { } func Test_encodeBrTableSequence(t *testing.T) { - const N = 10 m := &mockCompiler{} - encodeBrTableSequence(m, x22VReg, N) - require.Equal(t, (&instruction{kind: brTableSequence, targets: make([]label, N)}).size(), int64(len(m.Buf()))) - require.Equal(t, "9b000010765b76b87b03168b60031fd600000000000000000000000000000000000000000000000000000000000000000000000000000000", hex.EncodeToString(m.buf)) + i := &instruction{kind: brTableSequence, targets: []uint32{1, 2, 3, 4, 5}} + encodeBrTableSequence(m, x22VReg, i.targets) + encoded := m.Buf() + require.Equal(t, i.size(), int64(len(encoded))) + require.Equal(t, "9b000010765bb6b87b03168b60031fd6", hex.EncodeToString(encoded[:brTableSequenceOffsetTableBegin])) + require.Equal(t, "0100000002000000030000000400000005000000", hex.EncodeToString(encoded[brTableSequenceOffsetTableBegin:])) } func Test_encodeUnconditionalBranch(t *testing.T) { diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 99cf38d3e6..d85453276f 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -58,9 +58,9 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) { brSequence := m.allocateInstr() // TODO: reuse the slice! - labels := make([]label, len(targets)) + labels := make([]uint32, len(targets)) for j, target := range targets { - labels[j] = m.getOrAllocateSSABlockLabel(target) + labels[j] = uint32(m.getOrAllocateSSABlockLabel(target)) } brSequence.asBrTableSequence(adjustedIndex, labels) diff --git a/internal/engine/wazevo/backend/isa/arm64/machine.go b/internal/engine/wazevo/backend/isa/arm64/machine.go index 814c356912..9911bc705f 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -359,6 +359,14 @@ func (m *machine) ResolveRelativeAddresses() { } cur.condBrOffsetResolve(diff) } + case brTableSequence: + for i := range cur.targets { + l := label(cur.targets[i]) + offsetOfTarget := m.labelPositions[l].binaryOffset + diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin) + cur.targets[i] = uint32(diff) + } + cur.brTableSequenceOffsetsResolved() } currentOffset += cur.size() } diff --git a/internal/engine/wazevo/e2e_test.go b/internal/engine/wazevo/e2e_test.go index aa031af443..ce6494c786 100644 --- a/internal/engine/wazevo/e2e_test.go +++ b/internal/engine/wazevo/e2e_test.go @@ -198,6 +198,21 @@ func TestE2E(t *testing.T) { {params: []uint64{math.MaxUint32}, expErr: "invalid table access"}, // Out of bounds. }, }, + { + name: "br_table", + m: testcases.BrTable.Module, + calls: []callCase{ + {params: []uint64{0}, expResults: []uint64{11}}, + {params: []uint64{1}, expResults: []uint64{12}}, + {params: []uint64{2}, expResults: []uint64{13}}, + {params: []uint64{3}, expResults: []uint64{14}}, + {params: []uint64{4}, expResults: []uint64{15}}, + {params: []uint64{5}, expResults: []uint64{16}}, + // Out of range --> default. + {params: []uint64{6}, expResults: []uint64{11}}, + {params: []uint64{1000}, expResults: []uint64{11}}, + }, + }, } { tc := tc t.Run(tc.name, func(t *testing.T) { From 4c13bd108aa3c85667ae1b47174553cfc06a92cc Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Tue, 22 Aug 2023 12:46:17 +0900 Subject: [PATCH 4/6] comment Signed-off-by: Takeshi Yoneda --- internal/engine/wazevo/backend/isa/arm64/instr_encoding.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 4b0c532b8f..52e31d7a6c 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -1020,7 +1020,7 @@ func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []ui indexNumber := regNumberInEncoding[index.RealReg()] // adr tmpReg, PC+16 (PC+16 is the address of the first label offset) - // ldrsw index, [tmpReg, index, UXTW 2] ;; index = uint64(*(tmpReg, index*8)) + // ldrsw index, [tmpReg, index, UXTW 2] ;; index = int64(*(tmpReg + index*8)) // add tmpReg, tmpReg, index // br tmpReg // [offset_to_l1, offset_to_l2, ..., offset_to_lN] @@ -1031,7 +1031,7 @@ func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []ui c.Emit4Bytes(encodeAluRRR(aluOpAdd, tmpRegNumber, tmpRegNumber, indexNumber, true, false)) c.Emit4Bytes(encodeUnconditionalBranchReg(tmpRegNumber, false)) - // Label offsets are resolved after the whole function is compiled. + // Offsets are resolved in ResolveRelativeAddress phase. for _, offset := range targets { c.Emit4Bytes(offset) } From 32bf3868845ea62f4ee1da8dcbfea096d08a19fd Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Tue, 22 Aug 2023 13:02:30 +0900 Subject: [PATCH 5/6] brtable with args Signed-off-by: Takeshi Yoneda --- internal/engine/wazevo/e2e_test.go | 15 +++ .../engine/wazevo/frontend/frontend_test.go | 117 ++++++++++++++++++ internal/engine/wazevo/frontend/lower.go | 32 +++-- 3 files changed, 157 insertions(+), 7 deletions(-) diff --git a/internal/engine/wazevo/e2e_test.go b/internal/engine/wazevo/e2e_test.go index ce6494c786..335883ac3d 100644 --- a/internal/engine/wazevo/e2e_test.go +++ b/internal/engine/wazevo/e2e_test.go @@ -213,6 +213,21 @@ func TestE2E(t *testing.T) { {params: []uint64{1000}, expResults: []uint64{11}}, }, }, + { + name: "br_table_with_args", + m: testcases.BrTableWithArg.Module, + calls: []callCase{ + {params: []uint64{0, 100}, expResults: []uint64{11 + 100}}, + {params: []uint64{1, 100}, expResults: []uint64{12 + 100}}, + {params: []uint64{2, 100}, expResults: []uint64{13 + 100}}, + {params: []uint64{3, 100}, expResults: []uint64{14 + 100}}, + {params: []uint64{4, 100}, expResults: []uint64{15 + 100}}, + {params: []uint64{5, 100}, expResults: []uint64{16 + 100}}, + // Out of range --> default. + {params: []uint64{6, 200}, expResults: []uint64{11 + 200}}, + {params: []uint64{1000, 300}, expResults: []uint64{11 + 300}}, + }, + }, } { tc := tc t.Run(tc.name, func(t *testing.T) { diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index cf7f3b7b66..62be89af9f 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -1489,6 +1489,123 @@ blk5: () <-- (blk0) blk6: () <-- (blk0,blk0) v3:i32 = Iconst_32 0xb Return v3 +`, + }, + { + name: "br_table_with_arg", m: testcases.BrTableWithArg.Module, + exp: ` +blk0: (exec_ctx:i64, module_ctx:i64, v2:i32, v3:i32) + BrTable v2, [blk7, blk8, blk9, blk10, blk11, blk12, blk13] + +blk1: (v4:i32) <-- (blk12) + v20:i32 = Iconst_32 0x10 + v21:i32 = Iadd v4, v20 + Return v21 + Exit exec_ctx, unreachable + +blk2: (v5:i32) <-- (blk11) + v18:i32 = Iconst_32 0xf + v19:i32 = Iadd v5, v18 + Return v19 + +blk3: (v6:i32) <-- (blk10) + v16:i32 = Iconst_32 0xe + v17:i32 = Iadd v6, v16 + Return v17 + +blk4: (v7:i32) <-- (blk9) + v14:i32 = Iconst_32 0xd + v15:i32 = Iadd v7, v14 + Return v15 + +blk5: (v8:i32) <-- (blk8) + v12:i32 = Iconst_32 0xc + v13:i32 = Iadd v8, v12 + Return v13 + +blk6: (v9:i32) <-- (blk7,blk13) + v10:i32 = Iconst_32 0xb + v11:i32 = Iadd v9, v10 + Return v11 + +blk7: () <-- (blk0) + Jump blk6, v3 + +blk8: () <-- (blk0) + Jump blk5, v3 + +blk9: () <-- (blk0) + Jump blk4, v3 + +blk10: () <-- (blk0) + Jump blk3, v3 + +blk11: () <-- (blk0) + Jump blk2, v3 + +blk12: () <-- (blk0) + Jump blk1, v3 + +blk13: () <-- (blk0) + Jump blk6, v3 +`, + + // TODO: these trivial two hop jumps should be optimized away. + expAfterOpt: ` +blk0: (exec_ctx:i64, module_ctx:i64, v2:i32, v3:i32) + BrTable v2, [blk7, blk8, blk9, blk10, blk11, blk12, blk13] + +blk1: () <-- (blk12) + v20:i32 = Iconst_32 0x10 + v21:i32 = Iadd v3, v20 + Return v21 + Exit exec_ctx, unreachable + +blk2: () <-- (blk11) + v18:i32 = Iconst_32 0xf + v19:i32 = Iadd v3, v18 + Return v19 + +blk3: () <-- (blk10) + v16:i32 = Iconst_32 0xe + v17:i32 = Iadd v3, v16 + Return v17 + +blk4: () <-- (blk9) + v14:i32 = Iconst_32 0xd + v15:i32 = Iadd v3, v14 + Return v15 + +blk5: () <-- (blk8) + v12:i32 = Iconst_32 0xc + v13:i32 = Iadd v3, v12 + Return v13 + +blk6: () <-- (blk7,blk13) + v10:i32 = Iconst_32 0xb + v11:i32 = Iadd v3, v10 + Return v11 + +blk7: () <-- (blk0) + Jump blk6 + +blk8: () <-- (blk0) + Jump blk5 + +blk9: () <-- (blk0) + Jump blk4 + +blk10: () <-- (blk0) + Jump blk3 + +blk11: () <-- (blk0) + Jump blk2 + +blk12: () <-- (blk0) + Jump blk1 + +blk13: () <-- (blk0) + Jump blk6 `, }, } { diff --git a/internal/engine/wazevo/frontend/lower.go b/internal/engine/wazevo/frontend/lower.go index 5b00bacdc8..451ceb9816 100644 --- a/internal/engine/wazevo/frontend/lower.go +++ b/internal/engine/wazevo/frontend/lower.go @@ -1478,8 +1478,8 @@ func (c *Compiler) lowerBrTable(labels []uint32, index ssa.Value) { numArgs = len(f.blockType.Results) } + targets := make([]ssa.BasicBlock, len(labels)) if numArgs == 0 { - targets := make([]ssa.BasicBlock, len(labels)) for i, l := range labels { targetBlk, argNum := state.brTargetArgNumFor(l) if argNum != 0 { @@ -1494,14 +1494,32 @@ func (c *Compiler) lowerBrTable(labels []uint32, index ssa.Value) { panic("TODO") } } + } else { + // If this needs to pass arguments, we need trampoline blocks since depending on the target block structure, + // we might end up inserting moves before jumps, which cannot be done with br_table. Instead, we can do such + // per-block moves in the trampoline blocks. - // If the target block has no arguments, we can just jump to the target block. - brTable := builder.AllocateInstruction() - brTable.AsBrTable(index, targets) - builder.InsertInstruction(brTable) + args := c.loweringState.nPeekDup(numArgs) // Args are always on the top of the stack. + currentBlk := builder.CurrentBlock() + for i, l := range labels { + targetBlk, _ := state.brTargetArgNumFor(l) + trampoline := builder.AllocateBasicBlock() + builder.SetCurrentBlock(trampoline) + c.insertJumpToBlock(args, targetBlk) + targets[i] = trampoline + } + builder.SetCurrentBlock(currentBlk) + } - } else { - panic("TODO") + // If the target block has no arguments, we can just jump to the target block. + brTable := builder.AllocateInstruction() + brTable.AsBrTable(index, targets) + builder.InsertInstruction(brTable) + + if numArgs > 0 { + for _, trampoline := range targets { + builder.Seal(trampoline) + } } } From 6c657dfaa1e3a99449e1802c2b75bcedd7fb1326 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Tue, 22 Aug 2023 13:07:08 +0900 Subject: [PATCH 6/6] garbage Signed-off-by: Takeshi Yoneda --- internal/engine/wazevo/backend/isa/arm64/machine.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/engine/wazevo/backend/isa/arm64/machine.go b/internal/engine/wazevo/backend/isa/arm64/machine.go index 9911bc705f..1aa1380cdd 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -40,7 +40,6 @@ type ( addends32 []addend32 // addends64 is used during address lowering, defined here for reuse. addends64 []regalloc.VReg - brTablesTargets [][]ssa.BasicBlock unresolvedAddressModes []*instruction // spillSlotSize is the size of the stack slot in bytes used for spilling registers. @@ -135,7 +134,6 @@ func (m *machine) Reset() { m.spillSlotSize = 0 m.unresolvedAddressModes = m.unresolvedAddressModes[:0] m.rootInstr = nil - m.brTablesTargets = m.brTablesTargets[:0] } // InitializeABI implements backend.Machine InitializeABI.