diff --git a/go.mod b/go.mod index 85ae4ae007..44c2f035b9 100644 --- a/go.mod +++ b/go.mod @@ -4,8 +4,9 @@ module github.com/tetratelabs/wazero go 1.17 require ( + // Test-only dependency. github.com/stretchr/testify v1.7.0 - // Once we reach some maturity, remove this dep and implement our own assembler. + // Test-only dependency. github.com/twitchyliquid64/golang-asm v0.15.1 ) diff --git a/internal/asm/amd64/assembler.go b/internal/asm/amd64/assembler.go index 782c4fa583..61c83fe2d9 100644 --- a/internal/asm/amd64/assembler.go +++ b/internal/asm/amd64/assembler.go @@ -7,32 +7,59 @@ import ( // Assembler is the interface used by amd64 JIT compiler. type Assembler interface { asm.AssemblerBase + + // CompileJumpToMemory adds jump-type instruction whose destination is stored in the memory address specified by `baseReg+offset`, + // and returns the corresponding Node in the assembled linked list. + CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) + // CompileRegisterToRegisterWithMode adds an instruction where source and destination // are `from` and `to` registers and the instruction's "Mode" is specified by `Mode`. CompileRegisterToRegisterWithMode(instruction asm.Instruction, from, to asm.Register, mode Mode) + // CompileMemoryWithIndexToRegister adds an instruction where source operand is the memory address // specified as `srcBaseReg + srcOffsetConst + srcIndex*srcScale` and destination is the register `DstReg`. // Note: sourceScale must be one of 1, 2, 4, 8. - CompileMemoryWithIndexToRegister(instruction asm.Instruction, srcBaseReg asm.Register, srcOffsetConst int64, srcIndex asm.Register, srcScale int16, dstReg asm.Register) + CompileMemoryWithIndexToRegister( + instruction asm.Instruction, + srcBaseReg asm.Register, + srcOffsetConst int64, + srcIndex asm.Register, + srcScale int16, + dstReg asm.Register, + ) + // CompileRegisterToMemoryWithIndex adds an instruction where source operand is the register `SrcReg`, // and the destination is the memory address specified as `dstBaseReg + dstOffsetConst + dstIndex*dstScale` // Note: dstScale must be one of 1, 2, 4, 8. - CompileRegisterToMemoryWithIndex(instruction asm.Instruction, srcReg asm.Register, dstBaseReg asm.Register, dstOffsetConst int64, dstIndex asm.Register, dstScale int16) + CompileRegisterToMemoryWithIndex( + instruction asm.Instruction, + srcReg asm.Register, + dstBaseReg asm.Register, + dstOffsetConst int64, + dstIndex asm.Register, + dstScale int16, + ) + // CompileRegisterToConst adds an instruction where source operand is the register `srcRegister`, // and the destination is the const `value`. CompileRegisterToConst(instruction asm.Instruction, srcRegister asm.Register, value int64) asm.Node + // CompileRegisterToNone adds an instruction where source operand is the register `register`, // and there's no destination operand. CompileRegisterToNone(instruction asm.Instruction, register asm.Register) + // CompileNoneToRegister adds an instruction where destination operand is the register `register`, // and there's no source operand. CompileNoneToRegister(instruction asm.Instruction, register asm.Register) + // CompileNoneToMemory adds an instruction where destination operand is the memory address specified // as `baseReg+offset`. and there's no source operand. CompileNoneToMemory(instruction asm.Instruction, baseReg asm.Register, offset int64) + // CompileConstToMemory adds an instruction where source operand is the constant `value` and - // the destination is the memory address sppecified as `dstbaseReg+dstOffset`. + // the destination is the memory address specified as `dstbaseReg+dstOffset`. CompileConstToMemory(instruction asm.Instruction, value int64, dstbaseReg asm.Register, dstOffset int64) asm.Node + // CompileMemoryToConst adds an instruction where source operand is the memory address, and // the destination is the constant `value`. CompileMemoryToConst(instruction asm.Instruction, srcBaseReg asm.Register, srcOffset int64, value int64) asm.Node diff --git a/internal/asm/amd64/consts.go b/internal/asm/amd64/consts.go index ec0c205e4f..e09e4b23cc 100644 --- a/internal/asm/amd64/consts.go +++ b/internal/asm/amd64/consts.go @@ -3,8 +3,9 @@ package asm_amd64 import "github.com/tetratelabs/wazero/internal/asm" // AMD64-specific conditional register states. -// https://www.lri.fr/~filliatr/ens/compil/x86-64.pdf -// https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +// +// See https://www.lri.fr/~filliatr/ens/compil/x86-64.pdf +// See https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf const ( ConditionalRegisterStateE = asm.ConditionalRegisterStateUnset + 1 + iota // ZF equal to zero ConditionalRegisterStateNE //˜ZF not equal to zero @@ -21,10 +22,10 @@ const ( ) // AMD64-specific instructions. -// https://www.felixcloutier.com/x86/index.html // -// Note: here we do not define all of amd64 instructions, and we only define the ones used by wazero's JIT compiler. -// Note: naming convension is exactly the same as Go assembler: https://go.dev/doc/asm +// Note: This only defines amd64 instructions used by wazero's JIT compiler. +// Note: Naming conventions intentionally match the Go assembler: https://go.dev/doc/asm +// See https://www.felixcloutier.com/x86/index.html const ( NONE asm.Instruction = iota ADDL @@ -426,10 +427,10 @@ func InstructionName(instruction asm.Instruction) string { } // Arm64-specific registers. -// https://www.lri.fr/~filliatr/ens/compil/x86-64.pdf -// https://cs.brown.edu/courses/cs033/docs/guides/x64_cheatsheet.pdf // -// Note: naming convension is exactly the same as Go assembler: https://go.dev/doc/asm +// Note: naming convention intentionally matches the Go assembler: https://go.dev/doc/asm +// See https://www.lri.fr/~filliatr/ens/compil/x86-64.pdf +// See https://cs.brown.edu/courses/cs033/docs/guides/x64_cheatsheet.pdf const ( REG_AX asm.Register = asm.NilRegister + 1 + iota REG_CX diff --git a/internal/asm/amd64/impl.go b/internal/asm/amd64/impl.go index f57082e4a7..26a3d86088 100644 --- a/internal/asm/amd64/impl.go +++ b/internal/asm/amd64/impl.go @@ -350,10 +350,10 @@ func (a *AssemblerImpl) Encode() (err error) { } // After the padding, we can finalize the offset of this instruction in the binary. - n.OffsetInBinaryField = (uint64(a.Buf.Len())) + n.OffsetInBinaryField = uint64(a.Buf.Len()) - if err := a.EncodeNode(n); err != nil { - return fmt.Errorf("%w: %v", err, n) + if err = a.EncodeNode(n); err != nil { + return } err = a.ResolveForwardRelativeJumps(n) @@ -365,7 +365,7 @@ func (a *AssemblerImpl) Encode() (err error) { return } -// maybeNOPpadding maybe appends NOP instructions before the node `n`. +// maybeNOPPadding maybe appends NOP instructions before the node `n`. // This is necessary to avoid Intel's jump erratum: // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf func (a *AssemblerImpl) maybeNOPPadding(n *NodeImpl) (err error) { @@ -438,7 +438,7 @@ func (a *AssemblerImpl) fusedInstructionLength(n *NodeImpl) (ret int32, err erro return } - // How to determine whether or not the instruction can be fused is described in + // How to determine whether the instruction can be fused is described in // Section 3.4.2.2 of "Intel Optimization Manual": // https://www.intel.com/content/dam/doc/manual/64-ia-32-architectures-optimization-manual.pdf isTest := inst == TESTL || inst == TESTQ @@ -450,7 +450,7 @@ func (a *AssemblerImpl) fusedInstructionLength(n *NodeImpl) (ret int32, err erro return } - // Implement the descision according to the table 3-1 in the manual. + // Implement the decision according to the table 3-1 in the manual. isAnd := inst == ANDL || inst == ANDQ if !isTest && !isAnd { if jmpInst == JMI || jmpInst == JPL || jmpInst == JPS || jmpInst == JPC { @@ -479,8 +479,7 @@ func (a *AssemblerImpl) fusedInstructionLength(n *NodeImpl) (ret int32, err erro fused.OffsetInBinaryField = savedLen + uint64(a.Buf.Len()) // Encode the node into the temporary buffer. - err = a.EncodeNode(fused) - if err != nil { + if err = a.EncodeNode(fused); err != nil { return } } @@ -520,77 +519,108 @@ func (a *AssemblerImpl) padNOP(num int) { } } -// CompileStandAlone implements asm.AssemblerBase.CompileStandAlone +// CompileStandAlone implements the same method as documented on asm.AssemblerBase. func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node { return a.newNode(instruction, OperandTypesNoneToNone) } -// CompileConstToRegister implements asm.AssemblerBase.CompileConstToRegister -func (a *AssemblerImpl) CompileConstToRegister(instruction asm.Instruction, value asm.ConstantValue, destinationReg asm.Register) (inst asm.Node) { +// CompileConstToRegister implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileConstToRegister( + instruction asm.Instruction, + value asm.ConstantValue, + destinationReg asm.Register, +) (inst asm.Node) { n := a.newNode(instruction, OperandTypesConstToRegister) n.SrcConst = value n.DstReg = destinationReg return n } -// CompileRegisterToRegister implements asm.AssemblerBase.CompileRegisterToRegister +// CompileRegisterToRegister implements the same method as documented on asm.AssemblerBase. func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) { n := a.newNode(instruction, OperandTypesRegisterToRegister) n.SrcReg = from n.DstReg = to } -// CompileMemoryToRegister implements asm.AssemblerBase.CompileMemoryToRegister -func (a *AssemblerImpl) CompileMemoryToRegister(instruction asm.Instruction, sourceBaseReg asm.Register, sourceOffsetConst asm.ConstantValue, destinationReg asm.Register) { +// CompileMemoryToRegister implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileMemoryToRegister( + instruction asm.Instruction, + sourceBaseReg asm.Register, + sourceOffsetConst asm.ConstantValue, + destinationReg asm.Register, +) { n := a.newNode(instruction, OperandTypesMemoryToRegister) n.SrcReg = sourceBaseReg n.SrcConst = sourceOffsetConst n.DstReg = destinationReg } -// CompileRegisterToMemory implements asm.AssemblerBase.CompileRegisterToMemory -func (a *AssemblerImpl) CompileRegisterToMemory(instruction asm.Instruction, sourceRegister asm.Register, destinationBaseRegister asm.Register, destinationOffsetConst asm.ConstantValue) { +// CompileRegisterToMemory implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileRegisterToMemory( + instruction asm.Instruction, + sourceRegister, destinationBaseRegister asm.Register, + destinationOffsetConst asm.ConstantValue, +) { n := a.newNode(instruction, OperandTypesRegisterToMemory) n.SrcReg = sourceRegister n.DstReg = destinationBaseRegister n.DstConst = destinationOffsetConst } -// CompileJump implements asm.AssemblerBase.CompileJump +// CompileJump implements the same method as documented on asm.AssemblerBase. func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node { return a.newNode(jmpInstruction, OperandTypesNoneToBranch) } -// CompileJumpToMemory implements asm.AssemblerBase.CompileJumpToMemory -func (a *AssemblerImpl) CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) { +// CompileJumpToMemory implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileJumpToMemory( + jmpInstruction asm.Instruction, + baseReg asm.Register, + offset asm.ConstantValue, +) { n := a.newNode(jmpInstruction, OperandTypesNoneToMemory) n.DstReg = baseReg n.DstConst = offset } -// CompileJumpToRegister implements asm.AssemblerBase.CompileJumpToRegister +// CompileJumpToRegister implements the same method as documented on asm.AssemblerBase. func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) { n := a.newNode(jmpInstruction, OperandTypesNoneToRegister) n.DstReg = reg } -// CompileReadInstructionAddress implements asm.AssemblerBase.CompileReadInstructionAddress -func (a *AssemblerImpl) CompileReadInstructionAddress(destinationRegister asm.Register, beforeAcquisitionTargetInstruction asm.Instruction) { +// CompileReadInstructionAddress implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileReadInstructionAddress( + destinationRegister asm.Register, + beforeAcquisitionTargetInstruction asm.Instruction, +) { n := a.newNode(LEAQ, OperandTypesMemoryToRegister) n.DstReg = destinationRegister n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction } -// CompileRegisterToRegisterWithMode implements assembler.CompileRegisterToRegisterWithMode -func (a *AssemblerImpl) CompileRegisterToRegisterWithMode(instruction asm.Instruction, from, to asm.Register, mode Mode) { +// CompileRegisterToRegisterWithMode implements the same method as documented on asm_arm64.Assembler. +func (a *AssemblerImpl) CompileRegisterToRegisterWithMode( + instruction asm.Instruction, + from, to asm.Register, + mode Mode, +) { n := a.newNode(instruction, OperandTypesRegisterToRegister) n.SrcReg = from n.DstReg = to n.Mode = mode } -// CompileMemoryWithIndexToRegister implements assembler.CompileMemoryWithIndexToRegister -func (a *AssemblerImpl) CompileMemoryWithIndexToRegister(instruction asm.Instruction, srcBaseReg asm.Register, srcOffsetConst asm.ConstantValue, srcIndex asm.Register, srcScale int16, dstReg asm.Register) { +// CompileMemoryWithIndexToRegister implements the same method as documented on asm_arm64.Assembler. +func (a *AssemblerImpl) CompileMemoryWithIndexToRegister( + instruction asm.Instruction, + srcBaseReg asm.Register, + srcOffsetConst asm.ConstantValue, + srcIndex asm.Register, + srcScale int16, + dstReg asm.Register, +) { n := a.newNode(instruction, OperandTypesMemoryToRegister) n.SrcReg = srcBaseReg n.SrcConst = srcOffsetConst @@ -599,8 +629,14 @@ func (a *AssemblerImpl) CompileMemoryWithIndexToRegister(instruction asm.Instruc n.DstReg = dstReg } -// CompileRegisterToMemoryWithIndex implements assembler.CompileRegisterToMemoryWithIndex -func (a *AssemblerImpl) CompileRegisterToMemoryWithIndex(instruction asm.Instruction, srcReg asm.Register, dstBaseReg asm.Register, dstOffsetConst asm.ConstantValue, dstIndex asm.Register, dstScale int16) { +// CompileRegisterToMemoryWithIndex implements the same method as documented on asm_arm64.Assembler. +func (a *AssemblerImpl) CompileRegisterToMemoryWithIndex( + instruction asm.Instruction, + srcReg, dstBaseReg asm.Register, + dstOffsetConst asm.ConstantValue, + dstIndex asm.Register, + dstScale int16, +) { n := a.newNode(instruction, OperandTypesRegisterToMemory) n.SrcReg = srcReg n.DstReg = dstBaseReg @@ -609,35 +645,48 @@ func (a *AssemblerImpl) CompileRegisterToMemoryWithIndex(instruction asm.Instruc n.DstMemScale = byte(dstScale) } -// CompileRegisterToConst implements assembler.CompileRegisterToConst -func (a *AssemblerImpl) CompileRegisterToConst(instruction asm.Instruction, srcRegister asm.Register, value asm.ConstantValue) asm.Node { +// CompileRegisterToConst implements the same method as documented on asm_arm64.Assembler. +func (a *AssemblerImpl) CompileRegisterToConst( + instruction asm.Instruction, + srcRegister asm.Register, + value asm.ConstantValue, +) asm.Node { n := a.newNode(instruction, OperandTypesRegisterToConst) n.SrcReg = srcRegister n.DstConst = value return n } -// CompileRegisterToNone implements assembler.CompileRegisterToNone +// CompileRegisterToNone implements the same method as documented on asm_arm64.Assembler. func (a *AssemblerImpl) CompileRegisterToNone(instruction asm.Instruction, register asm.Register) { n := a.newNode(instruction, OperandTypesRegisterToNone) n.SrcReg = register } -// CompileNoneToRegister implements assembler.CompileNoneToRegister +// CompileNoneToRegister implements the same method as documented on asm_arm64.Assembler. func (a *AssemblerImpl) CompileNoneToRegister(instruction asm.Instruction, register asm.Register) { n := a.newNode(instruction, OperandTypesNoneToRegister) n.DstReg = register } -// CompileNoneToMemory implements assembler.CompileNoneToMemory -func (a *AssemblerImpl) CompileNoneToMemory(instruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) { +// CompileNoneToMemory implements the same method as documented on asm_arm64.Assembler. +func (a *AssemblerImpl) CompileNoneToMemory( + instruction asm.Instruction, + baseReg asm.Register, + offset asm.ConstantValue, +) { n := a.newNode(instruction, OperandTypesNoneToMemory) n.DstReg = baseReg n.DstConst = offset } -// CompileConstToMemory implements assembler.CompileConstToMemory -func (a *AssemblerImpl) CompileConstToMemory(instruction asm.Instruction, value asm.ConstantValue, dstbaseReg asm.Register, dstOffset asm.ConstantValue) asm.Node { +// CompileConstToMemory implements the same method as documented on asm_arm64.Assembler. +func (a *AssemblerImpl) CompileConstToMemory( + instruction asm.Instruction, + value asm.ConstantValue, + dstbaseReg asm.Register, + dstOffset asm.ConstantValue, +) asm.Node { n := a.newNode(instruction, OperandTypesConstToMemory) n.SrcConst = value n.DstReg = dstbaseReg @@ -645,8 +694,12 @@ func (a *AssemblerImpl) CompileConstToMemory(instruction asm.Instruction, value return n } -// CompileMemoryToConst implements assembler.CompileMemoryToConst -func (a *AssemblerImpl) CompileMemoryToConst(instruction asm.Instruction, srcBaseReg asm.Register, srcOffset asm.ConstantValue, value asm.ConstantValue) asm.Node { +// CompileMemoryToConst implements the same method as documented on asm_arm64.Assembler. +func (a *AssemblerImpl) CompileMemoryToConst( + instruction asm.Instruction, + srcBaseReg asm.Register, + srcOffset, value asm.ConstantValue, +) asm.Node { n := a.newNode(instruction, OperandTypesMemoryToConst) n.SrcReg = srcBaseReg n.SrcConst = srcOffset @@ -1137,18 +1190,18 @@ func (a *AssemblerImpl) EncodeRegisterToRegister(n *NodeImpl) (err error) { opcode = op.i2i } - RexPrefix, modRM, err := n.GetRegisterToRegisterModRM(opcode.srcOnModRMReg) + rexPrefix, modRM, err := n.GetRegisterToRegisterModRM(opcode.srcOnModRMReg) if err != nil { return err } - RexPrefix |= opcode.rPrefix + rexPrefix |= opcode.rPrefix if opcode.mandatoryPrefix != 0 { a.Buf.WriteByte(opcode.mandatoryPrefix) } - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } a.Buf.Write(opcode.opcode) @@ -1166,24 +1219,24 @@ func (a *AssemblerImpl) EncodeRegisterToRegister(n *NodeImpl) (err error) { return fmt.Errorf("%s require integer dst register but got %s", InstructionName(inst), RegisterName(n.DstReg)) } - RexPrefix, modRM, err := n.GetRegisterToRegisterModRM(op.srcOnModRMReg) + rexPrefix, modRM, err := n.GetRegisterToRegisterModRM(op.srcOnModRMReg) if err != nil { return err } - RexPrefix |= op.rPrefix + rexPrefix |= op.rPrefix if op.isSrc8bit && REG_SP <= n.SrcReg && n.SrcReg <= REG_DI { // If an operand register is 8-bit length of SP, BP, DI, or SI register, we need to have the default prefix. // https: //wiki.osdev.org/X86-64_Instruction_Encoding#Registers - RexPrefix |= RexPrefixDefault + rexPrefix |= RexPrefixDefault } if op.mandatoryPrefix != 0 { a.Buf.WriteByte(op.mandatoryPrefix) } - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } a.Buf.Write(op.opcode) @@ -1200,14 +1253,14 @@ func (a *AssemblerImpl) EncodeRegisterToRegister(n *NodeImpl) (err error) { return fmt.Errorf("shifting instruction %s require integer register as dst but got %s", InstructionName(inst), RegisterName(n.SrcReg)) } - reg3bits, RexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM) + reg3bits, rexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM) if err != nil { return err } - RexPrefix |= op.rPrefix - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + rexPrefix |= op.rPrefix + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } // https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM @@ -1222,7 +1275,7 @@ func (a *AssemblerImpl) EncodeRegisterToRegister(n *NodeImpl) (err error) { } func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { - RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation() + rexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation() if err != nil { return err } @@ -1236,7 +1289,7 @@ func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { opcode = []byte{0x3b} case CMPQ: // https://www.felixcloutier.com/x86/cmp - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x3b} case MOVB: // https://www.felixcloutier.com/x86/mov @@ -1257,7 +1310,7 @@ func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { mandatoryPrefix = 0x66 } else { // https://www.felixcloutier.com/x86/mov - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x89} } case MOVW: @@ -1273,7 +1326,7 @@ func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { isShiftInstruction = true case SARQ: // https://www.felixcloutier.com/x86/sal:sar:shl:shr - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW modRM |= 0b00_111_000 opcode = []byte{0xd3} isShiftInstruction = true @@ -1284,7 +1337,7 @@ func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { isShiftInstruction = true case SHLQ: // https://www.felixcloutier.com/x86/sal:sar:shl:shr - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW modRM |= 0b00_100_000 opcode = []byte{0xd3} isShiftInstruction = true @@ -1295,7 +1348,7 @@ func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { isShiftInstruction = true case SHRQ: // https://www.felixcloutier.com/x86/sal:sar:shl:shr - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW modRM |= 0b00_101_000 opcode = []byte{0xd3} isShiftInstruction = true @@ -1305,7 +1358,7 @@ func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { isShiftInstruction = true case ROLQ: // https://www.felixcloutier.com/x86/rcl:rcr:rol:ror - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0xd3} isShiftInstruction = true case RORL: @@ -1315,7 +1368,7 @@ func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { isShiftInstruction = true case RORQ: // https://www.felixcloutier.com/x86/rcl:rcr:rol:ror - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0xd3} modRM |= 0b00_001_000 isShiftInstruction = true @@ -1329,7 +1382,7 @@ func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { return err } - RexPrefix |= prefix + rexPrefix |= prefix modRM |= (srcReg3Bits << 3) // Place the source register on ModRM:reg } else { if n.SrcReg != REG_CX { @@ -1342,8 +1395,8 @@ func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { a.Buf.WriteByte(mandatoryPrefix) } - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } a.Buf.Write(opcode) @@ -1401,7 +1454,7 @@ func (a *AssemblerImpl) EncodeRegisterToConst(n *NodeImpl) (err error) { } func (a *AssemblerImpl) encodeReadInstructionAddress(n *NodeImpl) error { - dstReg3Bits, RexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldReg) + dstReg3Bits, rexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldReg) if err != nil { return err } @@ -1431,13 +1484,13 @@ func (a *AssemblerImpl) encodeReadInstructionAddress(n *NodeImpl) error { // https://www.felixcloutier.com/x86/lea opcode := byte(0x8d) - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW // https://wiki.osdev.org/X86-64_Instruction_Encoding#64-bit_addressing modRM := 0b00_000_101 | // Indicate "LEAQ [RIP + 32bit displacement], DstReg" encoding. (dstReg3Bits << 3) // Place the DstReg on ModRM:reg. - a.Buf.Write([]byte{RexPrefix, opcode, modRM}) + a.Buf.Write([]byte{rexPrefix, opcode, modRM}) a.WriteConst(int64(0), 32) // Preserve return nil } @@ -1447,7 +1500,7 @@ func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) { return a.encodeReadInstructionAddress(n) } - RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation() + rexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation() if err != nil { return err } @@ -1457,7 +1510,7 @@ func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) { return err } - RexPrefix |= prefix + rexPrefix |= prefix modRM |= (dstReg3Bits << 3) // Place the destination register on ModRM:reg var mandatoryPrefix byte @@ -1468,18 +1521,18 @@ func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) { opcode = []byte{0x03} case ADDQ: // https://www.felixcloutier.com/x86/add - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x03} case CMPL: // https://www.felixcloutier.com/x86/cmp opcode = []byte{0x39} case CMPQ: // https://www.felixcloutier.com/x86/cmp - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x39} case LEAQ: // https://www.felixcloutier.com/x86/lea - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x8d} case MOVBLSX: // https://www.felixcloutier.com/x86/movsx:movsxd @@ -1489,15 +1542,15 @@ func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) { opcode = []byte{0x0f, 0xb6} case MOVBQSX: // https://www.felixcloutier.com/x86/movsx:movsxd - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x0f, 0xbe} case MOVBQZX: // https://www.felixcloutier.com/x86/movzx - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x0f, 0xb6} case MOVLQSX: // https://www.felixcloutier.com/x86/movsx:movsxd - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x63} case MOVLQZX: // https://www.felixcloutier.com/x86/mov @@ -1523,7 +1576,7 @@ func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) { mandatoryPrefix = 0xf3 } else { // https://www.felixcloutier.com/x86/mov - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x8B} } case MOVWLSX: @@ -1534,15 +1587,15 @@ func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) { opcode = []byte{0x0f, 0xb7} case MOVWQSX: // https://www.felixcloutier.com/x86/movsx:movsxd - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x0f, 0xbf} case MOVWQZX: // https://www.felixcloutier.com/x86/movzx - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x0f, 0xb7} case SUBQ: // https://www.felixcloutier.com/x86/sub - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = []byte{0x2b} case SUBSD: // https://www.felixcloutier.com/x86/subsd @@ -1568,8 +1621,8 @@ func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) { a.Buf.WriteByte(mandatoryPrefix) } - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } a.Buf.Write(opcode) @@ -1588,7 +1641,7 @@ func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) { } func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { - regBits, RexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM) + regBits, rexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM) if err != nil { return err } @@ -1620,16 +1673,16 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { switch inst := n.Instruction; inst { case ADDQ: // https://www.felixcloutier.com/x86/add - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW if n.DstReg == REG_AX && !isSigned8bitConst { - a.Buf.Write([]byte{RexPrefix, 0x05}) + a.Buf.Write([]byte{rexPrefix, 0x05}) } else { modRM := 0b11_000_000 | // Specifying that opeand is register. regBits if isSigned8bitConst { - a.Buf.Write([]byte{RexPrefix, 0x83, modRM}) + a.Buf.Write([]byte{rexPrefix, 0x83, modRM}) } else { - a.Buf.Write([]byte{RexPrefix, 0x81, modRM}) + a.Buf.Write([]byte{rexPrefix, 0x81, modRM}) } } if isSigned8bitConst { @@ -1639,17 +1692,17 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { } case ANDQ: // https://www.felixcloutier.com/x86/and - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW if n.DstReg == REG_AX && !isSigned8bitConst { - a.Buf.Write([]byte{RexPrefix, 0x25}) + a.Buf.Write([]byte{rexPrefix, 0x25}) } else { modRM := 0b11_000_000 | // Specifying that opeand is register. 0b00_100_000 | // AND with immediate needs "/4" extension. regBits if isSigned8bitConst { - a.Buf.Write([]byte{RexPrefix, 0x83, modRM}) + a.Buf.Write([]byte{rexPrefix, 0x83, modRM}) } else { - a.Buf.Write([]byte{RexPrefix, 0x81, modRM}) + a.Buf.Write([]byte{rexPrefix, 0x81, modRM}) } } if fitInSigned8bit(n.SrcConst) { @@ -1659,8 +1712,8 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { } case MOVL: // https://www.felixcloutier.com/x86/mov - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } a.Buf.Write([]byte{0xb8 | regBits}) a.WriteConst(n.SrcConst, 32) @@ -1668,44 +1721,44 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { // https://www.felixcloutier.com/x86/mov if FitIn32bit(n.SrcConst) { if n.SrcConst > math.MaxInt32 { - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } a.Buf.Write([]byte{0xb8 | regBits}) } else { - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW modRM := 0b11_000_000 | // Specifying that opeand is register. regBits - a.Buf.Write([]byte{RexPrefix, 0xc7, modRM}) + a.Buf.Write([]byte{rexPrefix, 0xc7, modRM}) } a.WriteConst(n.SrcConst, 32) } else { - RexPrefix |= RexPrefixW - a.Buf.Write([]byte{RexPrefix, 0xb8 | regBits}) + rexPrefix |= RexPrefixW + a.Buf.Write([]byte{rexPrefix, 0xb8 | regBits}) a.WriteConst(n.SrcConst, 64) } case SHLQ: // https://www.felixcloutier.com/x86/sal:sar:shl:shr - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW modRM := 0b11_000_000 | // Specifying that opeand is register. 0b00_100_000 | // SHL with immediate needs "/4" extension. regBits if n.SrcConst == 1 { - a.Buf.Write([]byte{RexPrefix, 0xd1, modRM}) + a.Buf.Write([]byte{rexPrefix, 0xd1, modRM}) } else { - a.Buf.Write([]byte{RexPrefix, 0xc1, modRM}) + a.Buf.Write([]byte{rexPrefix, 0xc1, modRM}) a.WriteConst(n.SrcConst, 8) } case SHRQ: // https://www.felixcloutier.com/x86/sal:sar:shl:shr - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW modRM := 0b11_000_000 | // Specifying that opeand is register. 0b00_101_000 | // SHR with immediate needs "/5" extension. regBits if n.SrcConst == 1 { - a.Buf.Write([]byte{RexPrefix, 0xd1, modRM}) + a.Buf.Write([]byte{rexPrefix, 0xd1, modRM}) } else { - a.Buf.Write([]byte{RexPrefix, 0xc1, modRM}) + a.Buf.Write([]byte{rexPrefix, 0xc1, modRM}) a.WriteConst(n.SrcConst, 8) } case PSLLL: @@ -1713,8 +1766,8 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { modRM := 0b11_000_000 | // Specifying that opeand is register. 0b00_110_000 | // PSLL with immediate needs "/6" extension. regBits - if RexPrefix != RexPrefixNone { - a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x72, modRM}) + if rexPrefix != RexPrefixNone { + a.Buf.Write([]byte{0x66, rexPrefix, 0x0f, 0x72, modRM}) a.WriteConst(n.SrcConst, 8) } else { a.Buf.Write([]byte{0x66, 0x0f, 0x72, modRM}) @@ -1725,8 +1778,8 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { modRM := 0b11_000_000 | // Specifying that opeand is register. 0b00_110_000 | // PSLL with immediate needs "/6" extension. regBits - if RexPrefix != RexPrefixNone { - a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x73, modRM}) + if rexPrefix != RexPrefixNone { + a.Buf.Write([]byte{0x66, rexPrefix, 0x0f, 0x73, modRM}) a.WriteConst(n.SrcConst, 8) } else { a.Buf.Write([]byte{0x66, 0x0f, 0x73, modRM}) @@ -1738,8 +1791,8 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { modRM := 0b11_000_000 | // Specifying that opeand is register. 0b00_010_000 | // PSRL with immediate needs "/2" extension. regBits - if RexPrefix != RexPrefixNone { - a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x72, modRM}) + if rexPrefix != RexPrefixNone { + a.Buf.Write([]byte{0x66, rexPrefix, 0x0f, 0x72, modRM}) a.WriteConst(n.SrcConst, 8) } else { a.Buf.Write([]byte{0x66, 0x0f, 0x72, modRM}) @@ -1750,8 +1803,8 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { modRM := 0b11_000_000 | // Specifying that opeand is register. 0b00_010_000 | // PSRL with immediate needs "/2" extension. regBits - if RexPrefix != RexPrefixNone { - a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x73, modRM}) + if rexPrefix != RexPrefixNone { + a.Buf.Write([]byte{0x66, rexPrefix, 0x0f, 0x73, modRM}) a.WriteConst(n.SrcConst, 8) } else { a.Buf.Write([]byte{0x66, 0x0f, 0x73, modRM}) @@ -1760,10 +1813,10 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { case XORL, XORQ: // https://www.felixcloutier.com/x86/xor if inst == XORQ { - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW } - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } if n.DstReg == REG_AX && !isSigned8bitConst { a.Buf.Write([]byte{0x35}) @@ -1793,7 +1846,7 @@ func (a *AssemblerImpl) EncodeMemoryToConst(n *NodeImpl) (err error) { return fmt.Errorf("too large target const %d for %s", n.DstConst, InstructionName(n.Instruction)) } - RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation() + rexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation() if err != nil { return err } @@ -1817,8 +1870,8 @@ func (a *AssemblerImpl) EncodeMemoryToConst(n *NodeImpl) (err error) { return errorEncodingUnsupported(n) } - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } a.Buf.Write([]byte{opcode, modRM}) @@ -1836,7 +1889,7 @@ func (a *AssemblerImpl) EncodeMemoryToConst(n *NodeImpl) (err error) { } func (a *AssemblerImpl) EncodeConstToMemory(n *NodeImpl) (err error) { - RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation() + rexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation() if err != nil { return err } @@ -1860,15 +1913,15 @@ func (a *AssemblerImpl) EncodeConstToMemory(n *NodeImpl) (err error) { opcode = 0xc7 constWidth = 32 case MOVQ: - RexPrefix |= RexPrefixW + rexPrefix |= RexPrefixW opcode = 0xc7 constWidth = 32 default: return errorEncodingUnsupported(n) } - if RexPrefix != RexPrefixNone { - a.Buf.WriteByte(RexPrefix) + if rexPrefix != RexPrefixNone { + a.Buf.WriteByte(rexPrefix) } a.Buf.Write([]byte{opcode, modRM}) @@ -1936,7 +1989,7 @@ func (n *NodeImpl) GetMemoryLocation() (p RexPrefix, modRM byte, sbi *byte, disp return } - // Create ModR/M byte so that this instrction takes [R/M + displacement] operand if displacement !=0 + // Create ModR/M byte so that this instruction takes [R/M + displacement] operand if displacement !=0 // and otherwise [R/M]. withoutDisplacement := offset == 0 && // If the target register is R13 or BP, we have to keep [R/M + displacement] even if the value @@ -2025,6 +2078,8 @@ func (n *NodeImpl) GetMemoryLocation() (p RexPrefix, modRM byte, sbi *byte, disp return } +// GetRegisterToRegisterModRM does XXXX +// // TODO: srcOnModRMReg can be deleted after golang-asm removal. This is necessary to match our implementation // with golang-asm, but in practice, there are equivalent opcodes to always have src on ModRM:reg without ambiguity. func (n *NodeImpl) GetRegisterToRegisterModRM(srcOnModRMReg bool) (RexPrefix, modRM byte, err error) { @@ -2064,7 +2119,7 @@ func (n *NodeImpl) GetRegisterToRegisterModRM(srcOnModRMReg bool) (RexPrefix, mo } // https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM - modRM = 0b11_000_000 | // Specifying that dst opeand is register. + modRM = 0b11_000_000 | // Specifying that dst operand is register. (reg3bits << 3) | rm3bits @@ -2093,7 +2148,10 @@ const ( registerSpecifierPositionSIBIndex ) -func register3bits(reg asm.Register, registerSpecifierPosition registerSpecifierPosition) (bits byte, prefix RexPrefix, err error) { +func register3bits( + reg asm.Register, + registerSpecifierPosition registerSpecifierPosition, +) (bits byte, prefix RexPrefix, err error) { prefix = RexPrefixNone if REG_R8 <= reg && reg <= REG_R15 || REG_X8 <= reg && reg <= REG_X15 { // https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix diff --git a/internal/asm/amd64_debug/debug_assembler.go b/internal/asm/amd64_debug/debug_assembler.go index 3333e3cfc8..c90787ff44 100644 --- a/internal/asm/amd64_debug/debug_assembler.go +++ b/internal/asm/amd64_debug/debug_assembler.go @@ -32,7 +32,7 @@ func NewDebugAssembler() (asm_amd64.Assembler, error) { return &testAssembler{a: a, goasm: goasm}, nil } -// testAssembler implements Assembler. +// testAssembler implements asm_amd64.Assembler. // This assembler ensures that our assembler produces exactly the same binary as the Go's official assembler. // Disabled by default, and can be used for debugging only. // @@ -55,31 +55,31 @@ func (tn *testNode) String() string { return tn.n.String() } -// AssignJumpTarget implements asm.Node.AssignJumpTarget. +// AssignJumpTarget implements the same method as documented on asm.Node. func (tn *testNode) AssignJumpTarget(target asm.Node) { targetTestNode := target.(*testNode) tn.goasm.AssignJumpTarget(targetTestNode.goasm) tn.n.AssignJumpTarget(targetTestNode.n) } -// AssignDestinationConstant implements asm.Node.AssignDestinationConstant. +// AssignDestinationConstant implements the same method as documented on asm.Node. func (tn *testNode) AssignDestinationConstant(value asm.ConstantValue) { tn.goasm.AssignDestinationConstant(value) tn.n.AssignDestinationConstant(value) } -// AssignSourceConstant implements asm.Node.AssignSourceConstant. +// AssignSourceConstant implements the same method as documented on asm.Node. func (tn *testNode) AssignSourceConstant(value asm.ConstantValue) { tn.goasm.AssignSourceConstant(value) tn.n.AssignSourceConstant(value) } -// OffsetInBinary implements asm.Node.OffsetInBinary. +// OffsetInBinary implements the same method as documented on asm.Node. func (tn *testNode) OffsetInBinary() asm.NodeOffsetInBinary { return tn.goasm.OffsetInBinary() } -// Assemble implements Assembler.Assemble. +// Assemble implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) Assemble() ([]byte, error) { ret, err := ta.goasm.Assemble() if err != nil { @@ -99,7 +99,7 @@ func (ta *testAssembler) Assemble() ([]byte, error) { return ret, nil } -// SetJumpTargetOnNext implements Assembler.SetJumpTargetOnNext. +// SetJumpTargetOnNext implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) SetJumpTargetOnNext(nodes ...asm.Node) { for _, n := range nodes { targetTestNode := n.(*testNode) @@ -108,121 +108,167 @@ func (ta *testAssembler) SetJumpTargetOnNext(nodes ...asm.Node) { } } -// BuildJumpTable implements Assembler.BuildJumpTable. +// BuildJumpTable implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) BuildJumpTable(table []byte, initialInstructions []asm.Node) { ta.goasm.BuildJumpTable(table, initialInstructions) ta.a.BuildJumpTable(table, initialInstructions) } -// CompileStandAlone implements Assembler.CompileStandAlone. +// CompileStandAlone implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) CompileStandAlone(instruction asm.Instruction) asm.Node { ret := ta.goasm.CompileStandAlone(instruction) ret2 := ta.a.CompileStandAlone(instruction) return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_amd64.NodeImpl)} } -// CompileConstToRegister implements Assembler.CompileConstToRegister. +// CompileConstToRegister implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) CompileConstToRegister(instruction asm.Instruction, value asm.ConstantValue, destinationReg asm.Register) asm.Node { ret := ta.goasm.CompileConstToRegister(instruction, value, destinationReg) ret2 := ta.a.CompileConstToRegister(instruction, value, destinationReg) return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_amd64.NodeImpl)} } -// CompileRegisterToRegister implements Assembler.CompileRegisterToRegister. +// CompileRegisterToRegister implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) { ta.goasm.CompileRegisterToRegister(instruction, from, to) ta.a.CompileRegisterToRegister(instruction, from, to) } -// CompileMemoryToRegister implements Assembler.CompileMemoryToRegister. -func (ta *testAssembler) CompileMemoryToRegister(instruction asm.Instruction, sourceBaseReg asm.Register, sourceOffsetConst asm.ConstantValue, destinationReg asm.Register) { +// CompileMemoryToRegister implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileMemoryToRegister( + instruction asm.Instruction, + sourceBaseReg asm.Register, + sourceOffsetConst asm.ConstantValue, + destinationReg asm.Register, +) { ta.goasm.CompileMemoryToRegister(instruction, sourceBaseReg, sourceOffsetConst, destinationReg) ta.a.CompileMemoryToRegister(instruction, sourceBaseReg, sourceOffsetConst, destinationReg) } -// CompileRegisterToMemory implements Assembler.CompileRegisterToMemory. -func (ta *testAssembler) CompileRegisterToMemory(instruction asm.Instruction, sourceRegister asm.Register, destinationBaseRegister asm.Register, destinationOffsetConst asm.ConstantValue) { +// CompileRegisterToMemory implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileRegisterToMemory( + instruction asm.Instruction, + sourceRegister, destinationBaseRegister asm.Register, + destinationOffsetConst asm.ConstantValue, +) { ta.goasm.CompileRegisterToMemory(instruction, sourceRegister, destinationBaseRegister, destinationOffsetConst) ta.a.CompileRegisterToMemory(instruction, sourceRegister, destinationBaseRegister, destinationOffsetConst) } -// CompileJump implements Assembler.CompileJump. +// CompileJump implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) CompileJump(jmpInstruction asm.Instruction) asm.Node { ret := ta.goasm.CompileJump(jmpInstruction) ret2 := ta.a.CompileJump(jmpInstruction) return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_amd64.NodeImpl)} } -// CompileJumpToMemory implements Assembler.CompileJumpToMemory. -func (ta *testAssembler) CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) { +// CompileJumpToMemory implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileJumpToMemory( + jmpInstruction asm.Instruction, + baseReg asm.Register, + offset asm.ConstantValue, +) { ta.goasm.CompileJumpToMemory(jmpInstruction, baseReg, offset) ta.a.CompileJumpToMemory(jmpInstruction, baseReg, offset) } -// CompileJumpToRegister implements Assembler.CompileJumpToRegister. +// CompileJumpToRegister implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) { ta.goasm.CompileJumpToRegister(jmpInstruction, reg) ta.a.CompileJumpToRegister(jmpInstruction, reg) } -// CompileReadInstructionAddress implements Assembler.CompileReadInstructionAddress. -func (ta *testAssembler) CompileReadInstructionAddress(destinationRegister asm.Register, beforeAcquisitionTargetInstruction asm.Instruction) { +// CompileReadInstructionAddress implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileReadInstructionAddress( + destinationRegister asm.Register, + beforeAcquisitionTargetInstruction asm.Instruction, +) { ta.goasm.CompileReadInstructionAddress(destinationRegister, beforeAcquisitionTargetInstruction) ta.a.CompileReadInstructionAddress(destinationRegister, beforeAcquisitionTargetInstruction) } -// CompileRegisterToRegisterWithMode implements Assembler.CompileRegisterToRegisterWithMode. -func (ta *testAssembler) CompileRegisterToRegisterWithMode(instruction asm.Instruction, from, to asm.Register, mode asm_amd64.Mode) { +// CompileRegisterToRegisterWithMode implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileRegisterToRegisterWithMode( + instruction asm.Instruction, + from, to asm.Register, + mode asm_amd64.Mode, +) { ta.goasm.CompileRegisterToRegisterWithMode(instruction, from, to, mode) ta.a.CompileRegisterToRegisterWithMode(instruction, from, to, mode) } -// CompileMemoryWithIndexToRegister implements Assembler.CompileMemoryWithIndexToRegister. -func (ta *testAssembler) CompileMemoryWithIndexToRegister(instruction asm.Instruction, srcBaseReg asm.Register, srcOffsetConst int64, srcIndex asm.Register, srcScale int16, dstReg asm.Register) { +// CompileMemoryWithIndexToRegister implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileMemoryWithIndexToRegister( + instruction asm.Instruction, + srcBaseReg asm.Register, + srcOffsetConst int64, + srcIndex asm.Register, + srcScale int16, + dstReg asm.Register, +) { ta.goasm.CompileMemoryWithIndexToRegister(instruction, srcBaseReg, srcOffsetConst, srcIndex, srcScale, dstReg) ta.a.CompileMemoryWithIndexToRegister(instruction, srcBaseReg, srcOffsetConst, srcIndex, srcScale, dstReg) } -// CompileRegisterToMemoryWithIndex implements Assembler.CompileRegisterToMemoryWithIndex. -func (ta *testAssembler) CompileRegisterToMemoryWithIndex(instruction asm.Instruction, srcReg asm.Register, dstBaseReg asm.Register, dstOffsetConst int64, dstIndex asm.Register, dstScale int16) { +// CompileRegisterToMemoryWithIndex implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileRegisterToMemoryWithIndex( + instruction asm.Instruction, + srcReg, dstBaseReg asm.Register, + dstOffsetConst int64, + dstIndex asm.Register, + dstScale int16, +) { ta.goasm.CompileRegisterToMemoryWithIndex(instruction, srcReg, dstBaseReg, dstOffsetConst, dstIndex, dstScale) ta.a.CompileRegisterToMemoryWithIndex(instruction, srcReg, dstBaseReg, dstOffsetConst, dstIndex, dstScale) } -// CompileRegisterToConst implements Assembler.CompileRegisterToConst. -func (ta *testAssembler) CompileRegisterToConst(instruction asm.Instruction, srcRegister asm.Register, value int64) asm.Node { +// CompileRegisterToConst implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileRegisterToConst( + instruction asm.Instruction, + srcRegister asm.Register, + value int64, +) asm.Node { ret := ta.goasm.CompileRegisterToConst(instruction, srcRegister, value) ret2 := ta.a.CompileRegisterToConst(instruction, srcRegister, value) return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_amd64.NodeImpl)} } -// CompileRegisterToNone implements Assembler.CompileRegisterToNone. +// CompileRegisterToNone implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) CompileRegisterToNone(instruction asm.Instruction, register asm.Register) { ta.goasm.CompileRegisterToNone(instruction, register) ta.a.CompileRegisterToNone(instruction, register) } -// CompileNoneToRegister implements Assembler.CompileNoneToRegister. +// CompileNoneToRegister implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) CompileNoneToRegister(instruction asm.Instruction, register asm.Register) { ta.goasm.CompileNoneToRegister(instruction, register) ta.a.CompileNoneToRegister(instruction, register) } -// CompileNoneToMemory implements Assembler.CompileNoneToMemory. +// CompileNoneToMemory implements the same method as documented on asm_amd64.Assembler. func (ta *testAssembler) CompileNoneToMemory(instruction asm.Instruction, baseReg asm.Register, offset int64) { ta.goasm.CompileNoneToMemory(instruction, baseReg, offset) ta.a.CompileNoneToMemory(instruction, baseReg, offset) } -// CompileConstToMemory implements Assembler.CompileConstToMemory. -func (ta *testAssembler) CompileConstToMemory(instruction asm.Instruction, value int64, dstbaseReg asm.Register, dstOffset int64) asm.Node { +// CompileConstToMemory implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileConstToMemory( + instruction asm.Instruction, + value int64, + dstbaseReg asm.Register, + dstOffset int64, +) asm.Node { ret := ta.goasm.CompileConstToMemory(instruction, value, dstbaseReg, dstOffset) ret2 := ta.a.CompileConstToMemory(instruction, value, dstbaseReg, dstOffset) return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_amd64.NodeImpl)} } -// CompileMemoryToConst implements Assembler.CompileMemoryToConst. -func (ta *testAssembler) CompileMemoryToConst(instruction asm.Instruction, srcBaseReg asm.Register, srcOffset int64, value int64) asm.Node { +// CompileMemoryToConst implements the same method as documented on asm_amd64.Assembler. +func (ta *testAssembler) CompileMemoryToConst( + instruction asm.Instruction, + srcBaseReg asm.Register, + srcOffset, value int64, +) asm.Node { ret := ta.goasm.CompileMemoryToConst(instruction, srcBaseReg, srcOffset, value) ret2 := ta.a.CompileMemoryToConst(instruction, srcBaseReg, srcOffset, value) return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_amd64.NodeImpl)} diff --git a/internal/asm/amd64_debug/golang_asm.go b/internal/asm/amd64_debug/golang_asm.go index 7a50ed7810..99e9461db9 100644 --- a/internal/asm/amd64_debug/golang_asm.go +++ b/internal/asm/amd64_debug/golang_asm.go @@ -12,7 +12,7 @@ import ( "github.com/tetratelabs/wazero/internal/asm/golang_asm" ) -// assemblerGoAsmImpl implements Assembler for golang-asm library. +// assemblerGoAsmImpl implements asm_amd64.Assembler for golang-asm library. type assemblerGoAsmImpl struct { *golang_asm.GolangAsmBaseAssembler } @@ -22,7 +22,7 @@ func newGolangAsmAssembler() (*assemblerGoAsmImpl, error) { return &assemblerGoAsmImpl{g}, err } -// CompileStandAlone implements Assembler.CompileStandAlone. +// CompileStandAlone implements the same method as documented on asm_amd64.Assembler. func (a *assemblerGoAsmImpl) CompileStandAlone(inst asm.Instruction) asm.Node { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] @@ -30,7 +30,7 @@ func (a *assemblerGoAsmImpl) CompileStandAlone(inst asm.Instruction) asm.Node { return golang_asm.NewGolangAsmNode(p) } -// CompileRegisterToRegister implements Assembler.CompileRegisterToRegister. +// CompileRegisterToRegister implements the same method as documented on asm_amd64.Assembler. func (a *assemblerGoAsmImpl) CompileRegisterToRegister(inst asm.Instruction, from, to asm.Register) { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] @@ -41,9 +41,15 @@ func (a *assemblerGoAsmImpl) CompileRegisterToRegister(inst asm.Instruction, fro a.AddInstruction(p) } -// CompileMemoryWithIndexToRegister implements Assembler.CompileMemoryWithIndexToRegister. -func (a *assemblerGoAsmImpl) CompileMemoryWithIndexToRegister(inst asm.Instruction, - sourceBaseReg asm.Register, sourceOffsetConst asm.ConstantValue, sourceIndexReg asm.Register, sourceScale int16, destinationReg asm.Register) { +// CompileMemoryWithIndexToRegister implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileMemoryWithIndexToRegister( + inst asm.Instruction, + sourceBaseReg asm.Register, + sourceOffsetConst asm.ConstantValue, + sourceIndexReg asm.Register, + sourceScale int16, + destinationReg asm.Register, +) { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.To.Type = obj.TYPE_REG @@ -56,8 +62,14 @@ func (a *assemblerGoAsmImpl) CompileMemoryWithIndexToRegister(inst asm.Instructi a.AddInstruction(p) } -// CompileRegisterToMemoryWithIndex implements Assembler.CompileRegisterToMemoryWithIndex. -func (a *assemblerGoAsmImpl) CompileRegisterToMemoryWithIndex(inst asm.Instruction, srcReg asm.Register, dstBaseReg asm.Register, dstOffsetConst asm.ConstantValue, dstIndexReg asm.Register, dstScale int16) { +// CompileRegisterToMemoryWithIndex implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileRegisterToMemoryWithIndex( + inst asm.Instruction, + srcReg, dstBaseReg asm.Register, + dstOffsetConst asm.ConstantValue, + dstIndexReg asm.Register, + dstScale int16, +) { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.From.Type = obj.TYPE_REG @@ -70,8 +82,12 @@ func (a *assemblerGoAsmImpl) CompileRegisterToMemoryWithIndex(inst asm.Instructi a.AddInstruction(p) } -// CompileRegisterToMemory implements Assembler.CompileRegisterToMemory. -func (a *assemblerGoAsmImpl) CompileRegisterToMemory(inst asm.Instruction, sourceRegister asm.Register, destinationBaseRegister asm.Register, destinationOffsetConst asm.ConstantValue) { +// CompileRegisterToMemory implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileRegisterToMemory( + inst asm.Instruction, + sourceRegister, destinationBaseRegister asm.Register, + destinationOffsetConst asm.ConstantValue, +) { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.To.Type = obj.TYPE_MEM @@ -82,8 +98,12 @@ func (a *assemblerGoAsmImpl) CompileRegisterToMemory(inst asm.Instruction, sourc a.AddInstruction(p) } -// CompileConstToRegister implements Assembler.CompileConstToRegister. -func (a *assemblerGoAsmImpl) CompileConstToRegister(inst asm.Instruction, constValue asm.ConstantValue, destinationRegister asm.Register) asm.Node { +// CompileConstToRegister implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileConstToRegister( + inst asm.Instruction, + constValue asm.ConstantValue, + destinationRegister asm.Register, +) asm.Node { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.From.Type = obj.TYPE_CONST @@ -94,8 +114,12 @@ func (a *assemblerGoAsmImpl) CompileConstToRegister(inst asm.Instruction, constV return golang_asm.NewGolangAsmNode(p) } -// CompileRegisterToConst implements Assembler.CompileRegisterToConst. -func (a *assemblerGoAsmImpl) CompileRegisterToConst(inst asm.Instruction, srcRegister asm.Register, constValue asm.ConstantValue) asm.Node { +// CompileRegisterToConst implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileRegisterToConst( + inst asm.Instruction, + srcRegister asm.Register, + constValue asm.ConstantValue, +) asm.Node { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.To.Type = obj.TYPE_CONST @@ -106,7 +130,7 @@ func (a *assemblerGoAsmImpl) CompileRegisterToConst(inst asm.Instruction, srcReg return golang_asm.NewGolangAsmNode(p) } -// CompileRegisterToNone implements Assembler.CompileRegisterToNone. +// CompileRegisterToNone implements the same method as documented on asm_amd64.Assembler. func (a *assemblerGoAsmImpl) CompileRegisterToNone(inst asm.Instruction, register asm.Register) { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] @@ -116,7 +140,7 @@ func (a *assemblerGoAsmImpl) CompileRegisterToNone(inst asm.Instruction, registe a.AddInstruction(p) } -// CompileNoneToRegister implements Assembler.CompileNoneToRegister. +// CompileNoneToRegister implements the same method as documented on asm_amd64.Assembler. func (a *assemblerGoAsmImpl) CompileNoneToRegister(inst asm.Instruction, register asm.Register) { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] @@ -126,8 +150,12 @@ func (a *assemblerGoAsmImpl) CompileNoneToRegister(inst asm.Instruction, registe a.AddInstruction(p) } -// CompileNoneToMemory implements Assembler.CompileNoneToMemory. -func (a *assemblerGoAsmImpl) CompileNoneToMemory(inst asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) { +// CompileNoneToMemory implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileNoneToMemory( + inst asm.Instruction, + baseReg asm.Register, + offset asm.ConstantValue, +) { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.To.Type = obj.TYPE_MEM @@ -137,8 +165,13 @@ func (a *assemblerGoAsmImpl) CompileNoneToMemory(inst asm.Instruction, baseReg a a.AddInstruction(p) } -// CompileConstToMemory implements Assembler.CompileConstToMemory. -func (a *assemblerGoAsmImpl) CompileConstToMemory(inst asm.Instruction, constValue asm.ConstantValue, baseReg asm.Register, offset asm.ConstantValue) asm.Node { +// CompileConstToMemory implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileConstToMemory( + inst asm.Instruction, + constValue asm.ConstantValue, + baseReg asm.Register, + offset asm.ConstantValue, +) asm.Node { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.From.Type = obj.TYPE_CONST @@ -150,8 +183,13 @@ func (a *assemblerGoAsmImpl) CompileConstToMemory(inst asm.Instruction, constVal return golang_asm.NewGolangAsmNode(p) } -// CompileMemoryToRegister implements AssemblerBase.CompileMemoryToRegister. -func (a *assemblerGoAsmImpl) CompileMemoryToRegister(inst asm.Instruction, sourceBaseReg asm.Register, sourceOffsetConst asm.ConstantValue, destinationReg asm.Register) { +// CompileMemoryToRegister implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileMemoryToRegister( + inst asm.Instruction, + sourceBaseReg asm.Register, + sourceOffsetConst asm.ConstantValue, + destinationReg asm.Register, +) { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.From.Type = obj.TYPE_MEM @@ -162,8 +200,12 @@ func (a *assemblerGoAsmImpl) CompileMemoryToRegister(inst asm.Instruction, sourc a.AddInstruction(p) } -// CompileMemoryToConst implements Assembler.CompileMemoryToConst. -func (a *assemblerGoAsmImpl) CompileMemoryToConst(inst asm.Instruction, baseReg asm.Register, offset asm.ConstantValue, constValue asm.ConstantValue) asm.Node { +// CompileMemoryToConst implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileMemoryToConst( + inst asm.Instruction, + baseReg asm.Register, + offset, constValue asm.ConstantValue, +) asm.Node { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.To.Type = obj.TYPE_CONST @@ -175,7 +217,7 @@ func (a *assemblerGoAsmImpl) CompileMemoryToConst(inst asm.Instruction, baseReg return golang_asm.NewGolangAsmNode(p) } -// CompileJump implements Assembler.CompileJump. +// CompileJump implements the same method as documented on asm_amd64.Assembler. func (a *assemblerGoAsmImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node { p := a.NewProg() p.As = castAsGolangAsmInstruction[jmpInstruction] @@ -184,7 +226,7 @@ func (a *assemblerGoAsmImpl) CompileJump(jmpInstruction asm.Instruction) asm.Nod return golang_asm.NewGolangAsmNode(p) } -// CompileJumpToRegister implements Assembler.CompileJumpToRegister. +// CompileJumpToRegister implements the same method as documented on asm_amd64.Assembler. func (a *assemblerGoAsmImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) { p := a.NewProg() p.As = castAsGolangAsmInstruction[jmpInstruction] @@ -193,8 +235,12 @@ func (a *assemblerGoAsmImpl) CompileJumpToRegister(jmpInstruction asm.Instructio a.AddInstruction(p) } -// CompileJumpToMemory implements Assembler.CompileJumpToMemory. -func (a *assemblerGoAsmImpl) CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) { +// CompileJumpToMemory implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileJumpToMemory( + jmpInstruction asm.Instruction, + baseReg asm.Register, + offset asm.ConstantValue, +) { p := a.NewProg() p.As = castAsGolangAsmInstruction[jmpInstruction] p.To.Type = obj.TYPE_MEM @@ -203,8 +249,12 @@ func (a *assemblerGoAsmImpl) CompileJumpToMemory(jmpInstruction asm.Instruction, a.AddInstruction(p) } -// CompileRegisterToRegisterWithMode implements Assembler.CompileRegisterToRegisterWithMode. -func (a *assemblerGoAsmImpl) CompileRegisterToRegisterWithMode(inst asm.Instruction, from, to asm.Register, mode asm_amd64.Mode) { +// CompileRegisterToRegisterWithMode implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileRegisterToRegisterWithMode( + inst asm.Instruction, + from, to asm.Register, + mode asm_amd64.Mode, +) { p := a.NewProg() p.As = castAsGolangAsmInstruction[inst] p.From.Type = obj.TYPE_CONST @@ -216,8 +266,11 @@ func (a *assemblerGoAsmImpl) CompileRegisterToRegisterWithMode(inst asm.Instruct a.AddInstruction(p) } -// CompileReadInstructionAddress implements Assembler.CompileReadInstructionAddress. -func (a *assemblerGoAsmImpl) CompileReadInstructionAddress(destinationRegister asm.Register, beforeAcquisitionTargetInstruction asm.Instruction) { +// CompileReadInstructionAddress implements the same method as documented on asm_amd64.Assembler. +func (a *assemblerGoAsmImpl) CompileReadInstructionAddress( + destinationRegister asm.Register, + beforeAcquisitionTargetInstruction asm.Instruction, +) { // Emit the instruction in the form of "LEA destination [RIP + offset]". readInstructionAddress := a.NewProg() readInstructionAddress.As = x86.ALEAQ diff --git a/internal/asm/arm64/assembler.go b/internal/asm/arm64/assembler.go index 9696bc03dd..5be6085edc 100644 --- a/internal/asm/arm64/assembler.go +++ b/internal/asm/arm64/assembler.go @@ -2,50 +2,65 @@ package asm_arm64 import ( "github.com/tetratelabs/wazero/internal/asm" - "github.com/tetratelabs/wazero/internal/asm/golang_asm" ) -// NewAssembler implements asm.NewAssembler and is used by default. -// This returns an implementation of Assembler interface via our homemade assembler implementation. -func NewAssembler(temporaryRegister asm.Register) (Assembler, error) { - g, err := golang_asm.NewGolangAsmBaseAssembler("arm64") - return &assemblerGoAsmImpl{GolangAsmBaseAssembler: g, temporaryRegister: temporaryRegister}, err -} - // Assembler is the interface for arm64 specific assembler. type Assembler interface { asm.AssemblerBase + + // CompileJumpToMemory adds jump-type instruction whose destination is stored in the memory address specified by + // `baseReg`, and returns the corresponding Node in the assembled linked list. + // + // Note: this has exactly the same implementation as the same method in asm.AssemblerBase in the homemade assembler. + // TODO: this will be removed after golang-asm removal. + CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register) + // CompileMemoryWithRegisterOffsetToRegister adds an instruction where source operand is the memory address // specified as `srcBaseReg + srcOffsetReg` and dst is the register `dstReg`. CompileMemoryWithRegisterOffsetToRegister(instruction asm.Instruction, srcBaseReg, srcOffsetReg, dstReg asm.Register) + // CompileRegisterToMemoryWithRegisterOffset adds an instruction where source operand is the register `srcReg`, // and the destination is the memory address specified as `dstBaseReg + dstOffsetReg` CompileRegisterToMemoryWithRegisterOffset(instruction asm.Instruction, srcReg, dstBaseReg, dstOffsetReg asm.Register) + // CompileTwoRegistersToRegister adds an instruction where source operands consists of two registers `src1` and `src2`, // and the destination is the register `dst`. CompileTwoRegistersToRegister(instruction asm.Instruction, src1, src2, dst asm.Register) - // CompileTwoRegisters adds an instruction where source operands consist of two registers `src1` and `src2`, - // and destination operands consist of `dst1` and `dst2` registers. - CompileTwoRegisters(instruction asm.Instruction, src1, src2, dst1, dst2 asm.Register) + + // CompileThreeRegistersToRegister adds an instruction where source operands consist of three registers + // `src1`, `src2` and `src3`, and destination operands consist of `dst` register. + CompileThreeRegistersToRegister(instruction asm.Instruction, src1, src2, src3, dst asm.Register) + // CompileTwoRegistersToNone adds an instruction where source operands consist of two registers `src1` and `src2`, // and destination operand is unspecified. CompileTwoRegistersToNone(instruction asm.Instruction, src1, src2 asm.Register) - // CompileRegisterAndConstSourceToNone adds an instruction where source operands consist of one register `src` and + + // CompileRegisterAndConstToNone adds an instruction where source operands consist of one register `src` and // constant `srcConst`, and destination operand is unspecified. - CompileRegisterAndConstSourceToNone(instruction asm.Instruction, src asm.Register, srcConst asm.ConstantValue) + CompileRegisterAndConstToNone(instruction asm.Instruction, src asm.Register, srcConst asm.ConstantValue) + // CompileLeftShiftedRegisterToRegister adds an instruction where source operand is the "left shifted register" - // represented as `srcReg << shiftNum` and the destaintion is the register `dstReg`. - CompileLeftShiftedRegisterToRegister(shiftedSourceReg asm.Register, shiftNum asm.ConstantValue, srcReg, dstReg asm.Register) + // represented as `srcReg << shiftNum` and the destination is the register `dstReg`. + CompileLeftShiftedRegisterToRegister( + instruction asm.Instruction, + shiftedSourceReg asm.Register, + shiftNum asm.ConstantValue, + srcReg, dstReg asm.Register, + ) + // CompileSIMDByteToSIMDByte adds an instruction where source and destination operand is the SIMD register // specified as `srcReg.B8` and `dstReg.B8` where `.B8` part of register is called "arrangement". // See https://stackoverflow.com/questions/57294672/what-is-arrangement-specifier-16b-8b-in-arm-assembly-language-instructions CompileSIMDByteToSIMDByte(instruction asm.Instruction, srcReg, dstReg asm.Register) - // CompileTwoSIMDByteToRegister adds an instruction where source operand is two SIMD registers specified as `srcReg1.B8`, - // and `srcReg2.B8` and the destination is the register `dstReg`. - CompileTwoSIMDByteToRegister(instruction asm.Instruction, srcReg1, srcReg2, dstReg asm.Register) + + // CompileTwoSIMDBytesToSIMDByteRegister adds an instruction where source operand is two SIMD registers specified as `srcReg1.B8`, + // and `srcReg2.B8` and the destination is the one SIMD register `dstReg.B8`. + CompileTwoSIMDBytesToSIMDByteRegister(instruction asm.Instruction, srcReg1, srcReg2, dstReg asm.Register) + // CompileSIMDByteToRegister adds an instruction where source operand is the SIMD register specified as `srcReg.B8`, // and the destination is the register `dstReg`. CompileSIMDByteToRegister(instruction asm.Instruction, srcReg, dstReg asm.Register) + // CompileConditionalRegisterSet adds an instruction to set 1 on dstReg if the condition satisfies, // otherwise set 0. CompileConditionalRegisterSet(cond asm.ConditionalRegisterState, dstReg asm.Register) diff --git a/internal/asm/arm64/consts.go b/internal/asm/arm64/consts.go index 55e0e26c35..4009bd08d6 100644 --- a/internal/asm/arm64/consts.go +++ b/internal/asm/arm64/consts.go @@ -5,8 +5,9 @@ import ( ) // Arm64-specific register states. -// https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/condition-codes-1-condition-flags-and-codes -// Note: naming convension is exactly the same as Go assembler: https://go.dev/doc/asm +// +// Note: Naming conventions intentionally match the Go assembler: https://go.dev/doc/asm +// See https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/condition-codes-1-condition-flags-and-codes const ( COND_EQ asm.ConditionalRegisterState = asm.ConditionalRegisterStateUnset + 1 + iota COND_NE @@ -27,8 +28,9 @@ const ( ) // Arm64-specific registers. -// https://developer.arm.com/documentation/dui0801/a/Overview-of-AArch64-state/Predeclared-core-register-names-in-AArch64-state -// Note: naming convension is exactly the same as Go assembler: https://go.dev/doc/asm +// +// Note: Naming conventions intentionally match the Go assembler: https://go.dev/doc/asm +// See https://developer.arm.com/documentation/dui0801/a/Overview-of-AArch64-state/Predeclared-core-register-names-in-AArch64-state const ( // Integer registers. @@ -103,11 +105,243 @@ const ( // Floating point status register. REG_FPSR + + // Assign each conditional register state to the unique register ID. + // This is to reduce the size of NodeImpl struct without having dedicated field + // for conditional register state which would not be used by most nodes. + + REG_COND_EQ + REG_COND_NE + REG_COND_HS + REG_COND_LO + REG_COND_MI + REG_COND_PL + REG_COND_VS + REG_COND_VC + REG_COND_HI + REG_COND_LS + REG_COND_GE + REG_COND_LT + REG_COND_GT + REG_COND_LE + REG_COND_AL + REG_COND_NV ) +// conditionalRegisterStateToRegister cast a conditional register to its unique register ID. +// See the comment on REG_COND_EQ above. +func conditionalRegisterStateToRegister(c asm.ConditionalRegisterState) asm.Register { + switch c { + case COND_EQ: + return REG_COND_EQ + case COND_NE: + return REG_COND_NE + case COND_HS: + return REG_COND_HS + case COND_LO: + return REG_COND_LO + case COND_MI: + return REG_COND_MI + case COND_PL: + return REG_COND_PL + case COND_VS: + return REG_COND_VS + case COND_VC: + return REG_COND_VC + case COND_HI: + return REG_COND_HI + case COND_LS: + return REG_COND_LS + case COND_GE: + return REG_COND_GE + case COND_LT: + return REG_COND_LT + case COND_GT: + return REG_COND_GT + case COND_LE: + return REG_COND_LE + case COND_AL: + return REG_COND_AL + case COND_NV: + return REG_COND_NV + } + return asm.NilRegister +} + +func RegisterName(r asm.Register) string { + switch r { + case asm.NilRegister: + return "nil" + case REG_R0: + return "R0" + case REG_R1: + return "R1" + case REG_R2: + return "R2" + case REG_R3: + return "R3" + case REG_R4: + return "R4" + case REG_R5: + return "R5" + case REG_R6: + return "R6" + case REG_R7: + return "R7" + case REG_R8: + return "R8" + case REG_R9: + return "R9" + case REG_R10: + return "R10" + case REG_R11: + return "R11" + case REG_R12: + return "R12" + case REG_R13: + return "R13" + case REG_R14: + return "R14" + case REG_R15: + return "R15" + case REG_R16: + return "R16" + case REG_R17: + return "R17" + case REG_R18: + return "R18" + case REG_R19: + return "R19" + case REG_R20: + return "R20" + case REG_R21: + return "R21" + case REG_R22: + return "R22" + case REG_R23: + return "R23" + case REG_R24: + return "R24" + case REG_R25: + return "R25" + case REG_R26: + return "R26" + case REG_R27: + return "R27" + case REG_R28: + return "R28" + case REG_R29: + return "R29" + case REG_R30: + return "R30" + case REGZERO: + return "ZERO" + case REG_F0: + return "F0" + case REG_F1: + return "F1" + case REG_F2: + return "F2" + case REG_F3: + return "F3" + case REG_F4: + return "F4" + case REG_F5: + return "F5" + case REG_F6: + return "F6" + case REG_F7: + return "F7" + case REG_F8: + return "F8" + case REG_F9: + return "F9" + case REG_F10: + return "F10" + case REG_F11: + return "F11" + case REG_F12: + return "F12" + case REG_F13: + return "F13" + case REG_F14: + return "F14" + case REG_F15: + return "F15" + case REG_F16: + return "F16" + case REG_F17: + return "F17" + case REG_F18: + return "F18" + case REG_F19: + return "F19" + case REG_F20: + return "F20" + case REG_F21: + return "F21" + case REG_F22: + return "F22" + case REG_F23: + return "F23" + case REG_F24: + return "F24" + case REG_F25: + return "F25" + case REG_F26: + return "F26" + case REG_F27: + return "F27" + case REG_F28: + return "F28" + case REG_F29: + return "F29" + case REG_F30: + return "F30" + case REG_F31: + return "F31" + case REG_FPSR: + return "FPSR" + case REG_COND_EQ: + return "COND_EQ" + case REG_COND_NE: + return "COND_NE" + case REG_COND_HS: + return "COND_HS" + case REG_COND_LO: + return "COND_LO" + case REG_COND_MI: + return "COND_MI" + case REG_COND_PL: + return "COND_PL" + case REG_COND_VS: + return "COND_VS" + case REG_COND_VC: + return "COND_VC" + case REG_COND_HI: + return "COND_HI" + case REG_COND_LS: + return "COND_LS" + case REG_COND_GE: + return "COND_GE" + case REG_COND_LT: + return "COND_LT" + case REG_COND_GT: + return "COND_GT" + case REG_COND_LE: + return "COND_LE" + case REG_COND_AL: + return "COND_AL" + case REG_COND_NV: + return "COND_NV" + } + return "UNKOWN" +} + // Arm64-specific instructions. // -// Note: naming convension is exactly the same as Go assembler: https://go.dev/doc/asm +// Note: This only defines arm64 instructions used by wazero's JIT compiler. +// Note: Naming conventions intentionally match the Go assembler: https://go.dev/doc/asm const ( NOP asm.Instruction = iota RET @@ -229,3 +463,247 @@ const ( VCNT VUADDLV ) + +func InstructionName(i asm.Instruction) string { + switch i { + case NOP: + return "NOP" + case RET: + return "RET" + case ADD: + return "ADD" + case ADDW: + return "ADDW" + case ADR: + return "ADR" + case AND: + return "AND" + case ANDW: + return "ANDW" + case ASR: + return "ASR" + case ASRW: + return "ASRW" + case B: + return "B" + case BEQ: + return "BEQ" + case BGE: + return "BGE" + case BGT: + return "BGT" + case BHI: + return "BHI" + case BHS: + return "BHS" + case BLE: + return "BLE" + case BLO: + return "BLO" + case BLS: + return "BLS" + case BLT: + return "BLT" + case BMI: + return "BMI" + case BNE: + return "BNE" + case BVS: + return "BVS" + case CLZ: + return "CLZ" + case CLZW: + return "CLZW" + case CMP: + return "CMP" + case CMPW: + return "CMPW" + case CSET: + return "CSET" + case EOR: + return "EOR" + case EORW: + return "EORW" + case FABSD: + return "FABSD" + case FABSS: + return "FABSS" + case FADDD: + return "FADDD" + case FADDS: + return "FADDS" + case FCMPD: + return "FCMPD" + case FCMPS: + return "FCMPS" + case FCVTDS: + return "FCVTDS" + case FCVTSD: + return "FCVTSD" + case FCVTZSD: + return "FCVTZSD" + case FCVTZSDW: + return "FCVTZSDW" + case FCVTZSS: + return "FCVTZSS" + case FCVTZSSW: + return "FCVTZSSW" + case FCVTZUD: + return "FCVTZUD" + case FCVTZUDW: + return "FCVTZUDW" + case FCVTZUS: + return "FCVTZUS" + case FCVTZUSW: + return "FCVTZUSW" + case FDIVD: + return "FDIVD" + case FDIVS: + return "FDIVS" + case FMAXD: + return "FMAXD" + case FMAXS: + return "FMAXS" + case FMIND: + return "FMIND" + case FMINS: + return "FMINS" + case FMOVD: + return "FMOVD" + case FMOVS: + return "FMOVS" + case FMULD: + return "FMULD" + case FMULS: + return "FMULS" + case FNEGD: + return "FNEGD" + case FNEGS: + return "FNEGS" + case FRINTMD: + return "FRINTMD" + case FRINTMS: + return "FRINTMS" + case FRINTND: + return "FRINTND" + case FRINTNS: + return "FRINTNS" + case FRINTPD: + return "FRINTPD" + case FRINTPS: + return "FRINTPS" + case FRINTZD: + return "FRINTZD" + case FRINTZS: + return "FRINTZS" + case FSQRTD: + return "FSQRTD" + case FSQRTS: + return "FSQRTS" + case FSUBD: + return "FSUBD" + case FSUBS: + return "FSUBS" + case LSL: + return "LSL" + case LSLW: + return "LSLW" + case LSR: + return "LSR" + case LSRW: + return "LSRW" + case MOVB: + return "MOVB" + case MOVBU: + return "MOVBU" + case MOVD: + return "MOVD" + case MOVH: + return "MOVH" + case MOVHU: + return "MOVHU" + case MOVW: + return "MOVW" + case MOVWU: + return "MOVWU" + case MRS: + return "MRS" + case MSR: + return "MSR" + case MSUB: + return "MSUB" + case MSUBW: + return "MSUBW" + case MUL: + return "MUL" + case MULW: + return "MULW" + case NEG: + return "NEG" + case NEGW: + return "NEGW" + case ORR: + return "ORR" + case ORRW: + return "ORRW" + case RBIT: + return "RBIT" + case RBITW: + return "RBITW" + case RNG: + return "RNG" + case ROR: + return "ROR" + case RORW: + return "RORW" + case SCVTFD: + return "SCVTFD" + case SCVTFS: + return "SCVTFS" + case SCVTFWD: + return "SCVTFWD" + case SCVTFWS: + return "SCVTFWS" + case SDIV: + return "SDIV" + case SDIVW: + return "SDIVW" + case SUB: + return "SUB" + case SUBS: + return "SUBS" + case SUBW: + return "SUBW" + case SXTB: + return "SXTB" + case SXTBW: + return "SXTBW" + case SXTH: + return "SXTH" + case SXTHW: + return "SXTHW" + case SXTW: + return "SXTW" + case UCVTFD: + return "UCVTFD" + case UCVTFS: + return "UCVTFS" + case UCVTFWD: + return "UCVTFWD" + case UCVTFWS: + return "UCVTFWS" + case UDIV: + return "UDIV" + case UDIVW: + return "UDIVW" + case UXTW: + return "UXTW" + case VBIT: + return "VBIT" + case VCNT: + return "VCNT" + case VUADDLV: + return "VUADDLV" + } + return "UNKNOWN" +} diff --git a/internal/asm/arm64/impl.go b/internal/asm/arm64/impl.go new file mode 100644 index 0000000000..4223559702 --- /dev/null +++ b/internal/asm/arm64/impl.go @@ -0,0 +1,2421 @@ +package asm_arm64 + +import ( + "bytes" + "errors" + "fmt" + "math" + + "github.com/tetratelabs/wazero/internal/asm" +) + +type NodeImpl struct { + // NOTE: fields here are exported for testing with the amd64_debug package. + + Instruction asm.Instruction + + OffsetInBinaryField asm.NodeOffsetInBinary // Field suffix to dodge conflict with OffsetInBinary + + // JumpTarget holds the target node in the linked for the jump-kind instruction. + JumpTarget *NodeImpl + // next holds the next node from this node in the assembled linked list. + Next *NodeImpl + + Types OperandTypes + SrcReg, SrcReg2, DstReg, DstReg2 asm.Register + SrcConst, DstConst asm.ConstantValue + + // readInstructionAddressBeforeTargetInstruction holds the instruction right before the target of + // read instruction address instruction. See asm.assemblerBase.CompileReadInstructionAddress. + readInstructionAddressBeforeTargetInstruction asm.Instruction + + // JumpOrigins hold all the nodes trying to jump into this node. In other words, all the nodes with .JumpTarget == this. + JumpOrigins map[*NodeImpl]struct{} +} + +// AssignJumpTarget implements the same method as documented on asm.Node. +func (n *NodeImpl) AssignJumpTarget(target asm.Node) { + n.JumpTarget = target.(*NodeImpl) +} + +// AssignDestinationConstant implements the same method as documented on asm.Node. +func (n *NodeImpl) AssignDestinationConstant(value asm.ConstantValue) { + n.DstConst = value +} + +// AssignSourceConstant implements the same method as documented on asm.Node. +func (n *NodeImpl) AssignSourceConstant(value asm.ConstantValue) { + n.SrcConst = value +} + +// OffsetInBinary implements the same method as documented on asm.Node. +func (n *NodeImpl) OffsetInBinary() asm.NodeOffsetInBinary { + return n.OffsetInBinaryField +} + +// String implements fmt.Stringer. +// +// This is for debugging purpose, and the format is similar to the AT&T assembly syntax, +// meaning that this should look like "INSTRUCTION ${from}, ${to}" where each operand +// might be embraced by '[]' to represent the memory location, and multiple operands +// are embraced by `()`. +func (n *NodeImpl) String() (ret string) { + instName := InstructionName(n.Instruction) + switch n.Types { + case OperandTypesNoneToNone: + ret = instName + case OperandTypesNoneToRegister: + ret = fmt.Sprintf("%s %s", instName, RegisterName(n.DstReg)) + case OperandTypesNoneToMemory: + ret = fmt.Sprintf("%s [%s + 0x%x]", instName, RegisterName(n.DstReg), n.DstConst) + case OperandTypesNoneToBranch: + ret = fmt.Sprintf("%s {%v}", instName, n.JumpTarget) + case OperandTypesRegisterToRegister: + ret = fmt.Sprintf("%s %s, %s", instName, RegisterName(n.SrcReg), RegisterName(n.DstReg)) + case OperandTypesLeftShiftedRegisterToRegister: + ret = fmt.Sprintf("%s (%s, %s << %d), %s", instName, RegisterName(n.SrcReg), RegisterName(n.SrcReg2), n.SrcConst, RegisterName(n.DstReg)) + case OperandTypesTwoRegistersToRegister: + ret = fmt.Sprintf("%s (%s, %s), %s", instName, RegisterName(n.SrcReg), RegisterName(n.SrcReg2), RegisterName(n.DstReg)) + case OperandTypesThreeRegistersToRegister: + ret = fmt.Sprintf("%s (%s, %s, %s), %s)", instName, RegisterName(n.SrcReg), RegisterName(n.SrcReg2), RegisterName(n.DstReg), RegisterName(n.DstReg2)) + case OperandTypesTwoRegistersToNone: + ret = fmt.Sprintf("%s (%s, %s)", instName, RegisterName(n.SrcReg), RegisterName(n.SrcReg2)) + case OperandTypesRegisterAndConstToNone: + ret = fmt.Sprintf("%s (%s, 0x%x)", instName, RegisterName(n.SrcReg), n.SrcConst) + case OperandTypesRegisterToMemory: + if n.DstReg2 != asm.NilRegister { + ret = fmt.Sprintf("%s %s, [%s + %s]", instName, RegisterName(n.SrcReg), RegisterName(n.DstReg), RegisterName(n.DstReg2)) + } else { + ret = fmt.Sprintf("%s %s, [%s + 0x%x]", instName, RegisterName(n.SrcReg), RegisterName(n.DstReg), n.DstConst) + } + case OperandTypesMemoryToRegister: + if n.SrcReg2 != asm.NilRegister { + ret = fmt.Sprintf("%s [%s + %s], %s", instName, RegisterName(n.SrcReg), RegisterName(n.SrcReg2), RegisterName(n.DstReg)) + } else { + ret = fmt.Sprintf("%s [%s + 0x%x], %s", instName, RegisterName(n.SrcReg), n.SrcConst, RegisterName(n.DstReg)) + } + case OperandTypesConstToRegister: + ret = fmt.Sprintf("%s 0x%x, %s", instName, n.SrcConst, RegisterName(n.DstReg)) + case OperandTypesSIMDByteToSIMDByte: + ret = fmt.Sprintf("%s %s.B8, %s.B8", instName, RegisterName(n.SrcReg), RegisterName(n.DstReg)) + case OperandTypesSIMDByteToRegister: + ret = fmt.Sprintf("%s %s.B8, %s", instName, RegisterName(n.SrcReg), RegisterName(n.DstReg)) + case OperandTypesTwoSIMDBytesToSIMDByteRegister: + ret = fmt.Sprintf("%s (%s.B8, %s.B8), %s.B8", instName, RegisterName(n.SrcReg), RegisterName(n.SrcReg2), RegisterName(n.DstReg)) + } + return +} + +// OperandType represents where an operand is placed for an instruction. +// Note: this is almost the same as obj.AddrType in GO assembler. +type OperandType byte + +const ( + OperandTypeNone OperandType = iota + OperandTypeRegister + OperandTypeLeftShiftedRegister + OperandTypeTwoRegisters + OperandTypeThreeRegisters + OperandTypeRegisterAndConst + OperandTypeMemory + OperandTypeConst + OperandTypeBranch + OperandTypeSIMDByte + OperandTypeTwoSIMDBytes +) + +// String implements fmt.Stringer. +func (o OperandType) String() (ret string) { + switch o { + case OperandTypeNone: + ret = "none" + case OperandTypeRegister: + ret = "register" + case OperandTypeLeftShiftedRegister: + ret = "left-shifted-register" + case OperandTypeTwoRegisters: + ret = "two-registers" + case OperandTypeRegisterAndConst: + ret = "register-and-const" + case OperandTypeMemory: + ret = "memory" + case OperandTypeConst: + ret = "const" + case OperandTypeBranch: + ret = "branch" + case OperandTypeSIMDByte: + ret = "simd-byte" + case OperandTypeTwoSIMDBytes: + ret = "two-simd-bytes" + } + return +} + +// OperandTypes represents the only combinations of two OperandTypes used by wazero +type OperandTypes struct{ src, dst OperandType } + +var ( + OperandTypesNoneToNone = OperandTypes{OperandTypeNone, OperandTypeNone} + OperandTypesNoneToRegister = OperandTypes{OperandTypeNone, OperandTypeRegister} + OperandTypesNoneToMemory = OperandTypes{OperandTypeNone, OperandTypeMemory} + OperandTypesNoneToBranch = OperandTypes{OperandTypeNone, OperandTypeBranch} + OperandTypesRegisterToRegister = OperandTypes{OperandTypeRegister, OperandTypeRegister} + OperandTypesLeftShiftedRegisterToRegister = OperandTypes{OperandTypeLeftShiftedRegister, OperandTypeRegister} + OperandTypesTwoRegistersToRegister = OperandTypes{OperandTypeTwoRegisters, OperandTypeRegister} + OperandTypesThreeRegistersToRegister = OperandTypes{OperandTypeThreeRegisters, OperandTypeRegister} + OperandTypesTwoRegistersToNone = OperandTypes{OperandTypeTwoRegisters, OperandTypeNone} + OperandTypesRegisterAndConstToNone = OperandTypes{OperandTypeRegisterAndConst, OperandTypeNone} + OperandTypesRegisterToMemory = OperandTypes{OperandTypeRegister, OperandTypeMemory} + OperandTypesMemoryToRegister = OperandTypes{OperandTypeMemory, OperandTypeRegister} + OperandTypesConstToRegister = OperandTypes{OperandTypeConst, OperandTypeRegister} + OperandTypesSIMDByteToSIMDByte = OperandTypes{OperandTypeSIMDByte, OperandTypeSIMDByte} + OperandTypesSIMDByteToRegister = OperandTypes{OperandTypeSIMDByte, OperandTypeRegister} + OperandTypesTwoSIMDBytesToSIMDByteRegister = OperandTypes{OperandTypeTwoSIMDBytes, OperandTypeSIMDByte} +) + +// String implements fmt.Stringer +func (o OperandTypes) String() string { + return fmt.Sprintf("from:%s,to:%s", o.src, o.dst) +} + +// AssemblerImpl implements Assembler. +type AssemblerImpl struct { + asm.BaseAssemblerImpl + Root, Current *NodeImpl + Buf *bytes.Buffer + temporaryRegister asm.Register + nodeCount int + pool constPool +} + +// constPool holds 32-bit constants which are used by ldr(literal) instructions +// emitted by memory access. +type constPool struct { + // firstUseOffsetInBinary is the offset of the first ldr(literal) instruction + // which needs to access the const in this constPool. + firstUseOffsetInBinary *asm.NodeOffsetInBinary + consts []int32 + // offsetFinalizedCallbacks holds the callbacks keyed on the constants. + // These callbacks are called when the offsets of the constants in the binary + // have been determined. + offsetFinalizedCallbacks map[int32][]func(offsetOfConstInBinary int) +} + +func NewAssemblerImpl(temporaryRegister asm.Register) *AssemblerImpl { + return &AssemblerImpl{ + Buf: bytes.NewBuffer(nil), temporaryRegister: temporaryRegister, + pool: constPool{offsetFinalizedCallbacks: map[int32][]func(int){}}, + } +} + +// newNode creates a new Node and appends it into the linked list. +func (a *AssemblerImpl) newNode(instruction asm.Instruction, types OperandTypes) *NodeImpl { + n := &NodeImpl{ + Instruction: instruction, + Next: nil, + Types: types, + JumpOrigins: map[*NodeImpl]struct{}{}, + } + + a.addNode(n) + return n +} + +// addNode appends the new node into the linked list. +func (a *AssemblerImpl) addNode(node *NodeImpl) { + a.nodeCount++ + + if a.Root == nil { + a.Root = node + a.Current = node + } else { + parent := a.Current + parent.Next = node + a.Current = node + } + + for _, o := range a.SetBranchTargetOnNextNodes { + origin := o.(*NodeImpl) + origin.JumpTarget = node + } + a.SetBranchTargetOnNextNodes = nil +} + +// Assemble implements asm.AssemblerBase +func (a *AssemblerImpl) Assemble() ([]byte, error) { + // arm64 has 32-bit fixed length instructions, + // but note that some nodes are encoded as multiple instructions, + // so the resulting binary might not be the size of count*8. + a.Buf.Grow(a.nodeCount * 8) + + for n := a.Root; n != nil; n = n.Next { + n.OffsetInBinaryField = uint64(a.Buf.Len()) + if err := a.EncodeNode(n); err != nil { + return nil, err + } + a.maybeFlushConstPool(n.Next == nil) + } + + code := a.Bytes() + for _, cb := range a.OnGenerateCallbacks { + if err := cb(code); err != nil { + return nil, err + } + } + return code, nil +} + +// maybeFlushConstPool flushes the constant pool if endOfBinary or a boundary condition was met. +func (a *AssemblerImpl) maybeFlushConstPool(endOfBinary bool) { + if a.pool.firstUseOffsetInBinary == nil { + return + } + + // If endOfBinary = true, we no longer need to emit the instructions, therefore + // flush all the constants. + if endOfBinary || + // Also, if the offset between the first usage of the constant pool and + // the first constant would exceed 2^20 -1(= 2MiB-1), which is the maximum offset + // for load(literal) instruction, flush all the constants in the pool. + (a.Buf.Len()-int(*a.pool.firstUseOffsetInBinary)) >= (1<<20)-1-4 { // -4 for unconditional branch to skip the constants. + + // Before emitting consts, we have to add br instruction to skip the const pool. + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1123-L1129 + skipOffset := len(a.pool.consts) + if endOfBinary { + // If this is the end of binary, we never reach this block, + // so offset can be zero (which is the behavior of Go's assembler). + skipOffset = 0 + } + a.Buf.Write([]byte{ + byte(skipOffset), + byte(skipOffset >> 8), + byte(skipOffset >> 16), + 0x14, + }) + + // Then adding the consts into the binary. + for _, c := range a.pool.consts { + offsetOfConst := a.Buf.Len() + a.Buf.Write([]byte{byte(c), byte(c >> 8), byte(c >> 16), byte(c >> 24)}) + + // Invoke callbacks for `c` with the offset of binary where we store `c`. + for _, cb := range a.pool.offsetFinalizedCallbacks[c] { + cb(offsetOfConst) + } + } + + // After the flush, reset the constant pool. + a.pool = constPool{offsetFinalizedCallbacks: map[int32][]func(int){}} + } +} + +func (a *AssemblerImpl) setConstPoolCallback(v int32, cb func(int)) { + a.pool.offsetFinalizedCallbacks[v] = append(a.pool.offsetFinalizedCallbacks[v], cb) +} + +func (a *AssemblerImpl) addConstPool(v int32, useOffset asm.NodeOffsetInBinary) { + if a.pool.firstUseOffsetInBinary == nil { + a.pool.firstUseOffsetInBinary = &useOffset + } + + if _, ok := a.pool.offsetFinalizedCallbacks[v]; !ok { + a.pool.consts = append(a.pool.consts, v) + a.pool.offsetFinalizedCallbacks[v] = []func(int){} + } +} + +// Bytes returns the encoded binary. +// +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) Bytes() []byte { + // 16 bytes alignment to match our impl with golang-asm. + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L62 + // + // TODO: Delete after golang-asm removal. + if pad := 16 - a.Buf.Len()%16; pad > 0 && pad != 16 { + a.Buf.Write(make([]byte, pad)) + } + return a.Buf.Bytes() +} + +// EncodeNode encodes the given node into writer. +func (a *AssemblerImpl) EncodeNode(n *NodeImpl) (err error) { + switch n.Types { + case OperandTypesNoneToNone: + err = a.EncodeNoneToNone(n) + case OperandTypesNoneToRegister, OperandTypesNoneToMemory: + err = a.EncodeJumpToRegister(n) + case OperandTypesNoneToBranch: + err = a.EncodeRelativeBranch(n) + case OperandTypesRegisterToRegister: + err = a.EncodeRegisterToRegister(n) + case OperandTypesLeftShiftedRegisterToRegister: + err = a.EncodeLeftShiftedRegisterToRegister(n) + case OperandTypesTwoRegistersToRegister: + err = a.EncodeTwoRegistersToRegister(n) + case OperandTypesThreeRegistersToRegister: + err = a.EncodeThreeRegistersToRegister(n) + case OperandTypesTwoRegistersToNone: + err = a.EncodeTwoRegistersToNone(n) + case OperandTypesRegisterAndConstToNone: + err = a.EncodeRegisterAndConstToNone(n) + case OperandTypesRegisterToMemory: + err = a.EncodeRegisterToMemory(n) + case OperandTypesMemoryToRegister: + err = a.EncodeMemoryToRegister(n) + case OperandTypesConstToRegister: + err = a.EncodeConstToRegister(n) + case OperandTypesSIMDByteToSIMDByte: + err = a.EncodeSIMDByteToSIMDByte(n) + case OperandTypesSIMDByteToRegister: + err = a.EncodeSIMDByteToRegister(n) + case OperandTypesTwoSIMDBytesToSIMDByteRegister: + err = a.EncodeTwoSIMDBytesToSIMDByteRegister(n) + default: + err = fmt.Errorf("encoder undefined for [%s] operand type", n.Types) + } + if err != nil { + err = fmt.Errorf("%w: %s", err, n) // Ensure the error is debuggable by including the string value. + } + return +} + +// CompileStandAlone implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node { + return a.newNode(instruction, OperandTypesNoneToNone) +} + +// CompileConstToRegister implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileConstToRegister( + instruction asm.Instruction, + value asm.ConstantValue, + destinationReg asm.Register, +) (inst asm.Node) { + n := a.newNode(instruction, OperandTypesConstToRegister) + n.SrcConst = value + n.DstReg = destinationReg + return n +} + +// CompileRegisterToRegister implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) { + n := a.newNode(instruction, OperandTypesRegisterToRegister) + n.SrcReg = from + n.DstReg = to +} + +// CompileMemoryToRegister implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileMemoryToRegister( + instruction asm.Instruction, + sourceBaseReg asm.Register, + sourceOffsetConst asm.ConstantValue, + destinationReg asm.Register, +) { + n := a.newNode(instruction, OperandTypesMemoryToRegister) + n.SrcReg = sourceBaseReg + n.SrcConst = sourceOffsetConst + n.DstReg = destinationReg +} + +// CompileRegisterToMemory implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileRegisterToMemory( + instruction asm.Instruction, + sourceRegister, destinationBaseRegister asm.Register, + destinationOffsetConst asm.ConstantValue, +) { + n := a.newNode(instruction, OperandTypesRegisterToMemory) + n.SrcReg = sourceRegister + n.DstReg = destinationBaseRegister + n.DstConst = destinationOffsetConst +} + +// CompileJump implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node { + return a.newNode(jmpInstruction, OperandTypesNoneToBranch) +} + +// CompileJumpToMemory implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register) { + n := a.newNode(jmpInstruction, OperandTypesNoneToMemory) + n.DstReg = baseReg +} + +// CompileJumpToRegister implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) { + n := a.newNode(jmpInstruction, OperandTypesNoneToRegister) + n.DstReg = reg +} + +// CompileReadInstructionAddress implements the same method as documented on asm.AssemblerBase. +func (a *AssemblerImpl) CompileReadInstructionAddress( + destinationRegister asm.Register, + beforeAcquisitionTargetInstruction asm.Instruction, +) { + n := a.newNode(ADR, OperandTypesMemoryToRegister) + n.DstReg = destinationRegister + n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction +} + +// CompileMemoryWithRegisterOffsetToRegister implements Assembler.CompileMemoryWithRegisterOffsetToRegister +func (a *AssemblerImpl) CompileMemoryWithRegisterOffsetToRegister( + instruction asm.Instruction, + srcBaseReg, srcOffsetReg, dstReg asm.Register, +) { + n := a.newNode(instruction, OperandTypesMemoryToRegister) + n.DstReg = dstReg + n.SrcReg = srcBaseReg + n.SrcReg2 = srcOffsetReg +} + +// CompileRegisterToMemoryWithRegisterOffset implements Assembler.CompileRegisterToMemoryWithRegisterOffset +func (a *AssemblerImpl) CompileRegisterToMemoryWithRegisterOffset( + instruction asm.Instruction, + srcReg, dstBaseReg, dstOffsetReg asm.Register, +) { + n := a.newNode(instruction, OperandTypesRegisterToMemory) + n.SrcReg = srcReg + n.DstReg = dstBaseReg + n.DstReg2 = dstOffsetReg +} + +// CompileTwoRegistersToRegister implements Assembler.CompileTwoRegistersToRegister +func (a *AssemblerImpl) CompileTwoRegistersToRegister(instruction asm.Instruction, src1, src2, dst asm.Register) { + n := a.newNode(instruction, OperandTypesTwoRegistersToRegister) + n.SrcReg = src1 + n.SrcReg2 = src2 + n.DstReg = dst +} + +// CompileThreeRegistersToRegister implements Assembler.CompileThreeRegistersToRegister +func (a *AssemblerImpl) CompileThreeRegistersToRegister( + instruction asm.Instruction, + src1, src2, src3, dst asm.Register, +) { + n := a.newNode(instruction, OperandTypesThreeRegistersToRegister) + n.SrcReg = src1 + n.SrcReg2 = src2 + n.DstReg = src3 // To minimize the size of NodeImpl struct, we reuse DstReg for the third source operand. + n.DstReg2 = dst +} + +// CompileTwoRegistersToNone implements Assembler.CompileTwoRegistersToNone +func (a *AssemblerImpl) CompileTwoRegistersToNone(instruction asm.Instruction, src1, src2 asm.Register) { + n := a.newNode(instruction, OperandTypesTwoRegistersToNone) + n.SrcReg = src1 + n.SrcReg2 = src2 +} + +// CompileRegisterAndConstToNone implements Assembler.CompileRegisterAndConstToNone +func (a *AssemblerImpl) CompileRegisterAndConstToNone( + instruction asm.Instruction, + src asm.Register, + srcConst asm.ConstantValue, +) { + n := a.newNode(instruction, OperandTypesRegisterAndConstToNone) + n.SrcReg = src + n.SrcConst = srcConst +} + +// CompileLeftShiftedRegisterToRegister implements Assembler.CompileLeftShiftedRegisterToRegister +func (a *AssemblerImpl) CompileLeftShiftedRegisterToRegister( + instruction asm.Instruction, + shiftedSourceReg asm.Register, + shiftNum asm.ConstantValue, + srcReg, dstReg asm.Register, +) { + n := a.newNode(instruction, OperandTypesLeftShiftedRegisterToRegister) + n.SrcReg = srcReg + n.SrcReg2 = shiftedSourceReg + n.SrcConst = shiftNum + n.DstReg = dstReg +} + +// CompileSIMDByteToSIMDByte implements Assembler.CompileSIMDByteToSIMDByte +func (a *AssemblerImpl) CompileSIMDByteToSIMDByte(instruction asm.Instruction, srcReg, dstReg asm.Register) { + n := a.newNode(instruction, OperandTypesSIMDByteToSIMDByte) + n.SrcReg = srcReg + n.DstReg = dstReg +} + +// CompileTwoSIMDBytesToSIMDByteRegister implements Assembler.CompileTwoSIMDBytesToSIMDByteRegister +func (a *AssemblerImpl) CompileTwoSIMDBytesToSIMDByteRegister( + instruction asm.Instruction, + srcReg1, srcReg2, dstReg asm.Register, +) { + n := a.newNode(instruction, OperandTypesTwoSIMDBytesToSIMDByteRegister) + n.SrcReg = srcReg1 + n.SrcReg2 = srcReg2 + n.DstReg = dstReg +} + +// CompileSIMDByteToRegister implements Assembler.CompileSIMDByteToRegister +func (a *AssemblerImpl) CompileSIMDByteToRegister(instruction asm.Instruction, srcReg, dstReg asm.Register) { + n := a.newNode(instruction, OperandTypesSIMDByteToRegister) + n.SrcReg = srcReg + n.DstReg = dstReg +} + +// CompileConditionalRegisterSet implements Assembler.CompileConditionalRegisterSet +func (a *AssemblerImpl) CompileConditionalRegisterSet(cond asm.ConditionalRegisterState, dstReg asm.Register) { + n := a.newNode(CSET, OperandTypesRegisterToRegister) + n.SrcReg = conditionalRegisterStateToRegister(cond) + n.DstReg = dstReg +} + +func errorEncodingUnsupported(n *NodeImpl) error { + return fmt.Errorf("%s is unsupported for %s type", InstructionName(n.Instruction), n.Types) +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeNoneToNone(n *NodeImpl) (err error) { + if n.Instruction != NOP { + err = errorEncodingUnsupported(n) + } + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeJumpToRegister(n *NodeImpl) (err error) { + // "Unconditional branch (register)" in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions + var opc byte + switch n.Instruction { + case RET: + opc = 0b0010 + case B: + opc = 0b0000 + default: + return errorEncodingUnsupported(n) + } + + regBits, err := intRegisterBits(n.DstReg) + if err != nil { + return fmt.Errorf("invalid destination register: %w", err) + } + + a.Buf.Write([]byte{ + 0x00 | (regBits << 5), + 0x00 | (regBits >> 3), + 0b000_11111 | (opc << 5), + 0b1101011_0 | (opc >> 3), + }) + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeRelativeBranch(n *NodeImpl) (err error) { + switch n.Instruction { + case B, BEQ, BGE, BGT, BHI, BHS, BLE, BLO, BLS, BLT, BMI, BNE, BVS: + default: + return errorEncodingUnsupported(n) + } + + if n.JumpTarget == nil { + return fmt.Errorf("branch target must be set for %s", InstructionName(n.Instruction)) + } + + // At this point, we don't yet know that target's branch, so emit the placeholder (4 bytes). + a.Buf.Write([]byte{0, 0, 0, 0}) + + a.AddOnGenerateCallBack(func(code []byte) error { + var condBits byte + const condBitsUnconditional = 0xff // Indicates this is not conditional jump. + + // https://developer.arm.com/documentation/den0024/a/CHDEEABE + switch n.Instruction { + case B: + condBits = condBitsUnconditional + case BEQ: + condBits = 0b0000 + case BGE: + condBits = 0b1010 + case BGT: + condBits = 0b1100 + case BHI: + condBits = 0b1000 + case BHS: + condBits = 0b0010 + case BLE: + condBits = 0b1101 + case BLO: + condBits = 0b0011 + case BLS: + condBits = 0b1001 + case BLT: + condBits = 0b1011 + case BMI: + condBits = 0b0100 + case BNE: + condBits = 0b0001 + case BVS: + condBits = 0b0110 + } + + branchInstOffset := int64(n.OffsetInBinary()) + offset := int64(n.JumpTarget.OffsetInBinary()) - branchInstOffset + if offset%4 != 0 { + return errors.New("BUG: relative jump offset must be 4 bytes aligned") + } + + branchInst := code[branchInstOffset : branchInstOffset+4] + if condBits == condBitsUnconditional { + imm26 := offset / 4 + const maxSignedInt26 int64 = 1<<25 - 1 + const minSignedInt26 int64 = -(1 << 25) + if offset < minSignedInt26 || offset > maxSignedInt26 { + // In theory this could happen if a Wasm binary has a huge single label (more than 128MB for a single block), + // and in that case, we use load the offset into a register and do the register jump, but to avoid the complexity, + // we impose this limit for now as that would be *unlikely* happen in practice. + return fmt.Errorf("relative jump offset %d/4 must be within %d and %d", offset, minSignedInt26, maxSignedInt26) + } + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-?lang=en + branchInst[0] = byte(imm26) + branchInst[1] = byte(imm26 >> 8) + branchInst[2] = byte(imm26 >> 16) + branchInst[3] = (byte(imm26 >> 24 & 0b000000_11)) | 0b000101_00 + } else { + imm19 := offset / 4 + const maxSignedInt19 int64 = 1<<19 - 1 + const minSignedInt19 int64 = -(1 << 19) + if offset < minSignedInt19 || offset > maxSignedInt19 { + // This should be a bug in our JIT compiler as the conditional jumps are only used in the small offsets (~a few bytes), + // and if ever happens, JIT compiler can be fixed. + return fmt.Errorf("BUG: relative jump offset %d/4(=%d)must be within %d and %d", offset, imm19, minSignedInt19, maxSignedInt19) + } + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B-cond--Branch-conditionally-?lang=en + branchInst[0] = (byte(imm19<<5) & 0b111_0_0000) | condBits + branchInst[1] = byte(imm19 >> 3) + branchInst[2] = byte(imm19 >> 11) + branchInst[3] = 0b01010100 + } + return nil + }) + return +} + +func checkRegisterToRegisterType(src, dst asm.Register, requireSrcInt, requireDstInt bool) (err error) { + isSrcInt, isDstInt := isIntRegister(src), isIntRegister(dst) + if isSrcInt && !requireSrcInt { + err = fmt.Errorf("src requires float register but got %s", RegisterName(src)) + } else if !isSrcInt && requireSrcInt { + err = fmt.Errorf("src requires int register but got %s", RegisterName(src)) + } else if isDstInt && !requireDstInt { + err = fmt.Errorf("dst requires float register but got %s", RegisterName(dst)) + } else if !isDstInt && requireDstInt { + err = fmt.Errorf("dst requires int register but got %s", RegisterName(dst)) + } + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeRegisterToRegister(n *NodeImpl) (err error) { + switch inst := n.Instruction; inst { + case ADD, ADDW, SUB: + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, true, true); err != nil { + return + } + + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift + var sfops byte + switch inst { + case ADD: + sfops = 0b100 + case ADDW: + case SUB: + sfops = 0b110 + } + + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + a.Buf.Write([]byte{ + (dstRegBits << 5) | dstRegBits, + (dstRegBits >> 3), + srcRegBits, + (sfops << 5) | 0b01011, + }) + case CLZ, CLZW, RBIT, RBITW: + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, true, true); err != nil { + return + } + + var sf, opcode byte + switch inst { + case CLZ: + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CLZ--Count-Leading-Zeros-?lang=en + sf, opcode = 0b1, 0b000_100 + case CLZW: + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CLZ--Count-Leading-Zeros-?lang=en + sf, opcode = 0b0, 0b000_100 + case RBIT: + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RBIT--Reverse-Bits-?lang=en + sf, opcode = 0b1, 0b000_000 + case RBITW: + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RBIT--Reverse-Bits-?lang=en + sf, opcode = 0b0, 0b000_000 + } + if inst == CLZ { + sf = 1 + } + + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + opcode<<2 | (srcRegBits >> 3), + 0b110_00000, + (sf << 7) | 0b0_1011010, + }) + case CSET: + if !isConditionalRegister(n.SrcReg) { + return fmt.Errorf("CSET requires conditional register but got %s", RegisterName(n.SrcReg)) + } + + dstRegBits, err := intRegisterBits(n.DstReg) + if err != nil { + return err + } + + // CSET encodes the conditional bits with its least significant bit inverted. + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC-?lang=en + // + // https://developer.arm.com/documentation/den0024/a/CHDEEABE + var conditionalBits byte + switch n.SrcReg { + case REG_COND_EQ: + conditionalBits = 0b0001 + case REG_COND_NE: + conditionalBits = 0b0000 + case REG_COND_HS: + conditionalBits = 0b0011 + case REG_COND_LO: + conditionalBits = 0b0010 + case REG_COND_MI: + conditionalBits = 0b0101 + case REG_COND_PL: + conditionalBits = 0b0100 + case REG_COND_VS: + conditionalBits = 0b0111 + case REG_COND_VC: + conditionalBits = 0b0110 + case REG_COND_HI: + conditionalBits = 0b1001 + case REG_COND_LS: + conditionalBits = 0b1000 + case REG_COND_GE: + conditionalBits = 0b1011 + case REG_COND_LT: + conditionalBits = 0b1010 + case REG_COND_GT: + conditionalBits = 0b1101 + case REG_COND_LE: + conditionalBits = 0b1100 + case REG_COND_AL: + conditionalBits = 0b1111 + case REG_COND_NV: + conditionalBits = 0b1110 + } + + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC-?lang=en + a.Buf.Write([]byte{ + 0b111_00000 | dstRegBits, + (conditionalBits << 4) | 0b0000_0111, + 0b100_11111, + 0b10011010, + }) + + case FABSD, FABSS, FNEGD, FNEGS, FSQRTD, FSQRTS, FCVTSD, FCVTDS, FRINTMD, FRINTMS, + FRINTND, FRINTNS, FRINTPD, FRINTPS, FRINTZD, FRINTZS: + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, false, false); err != nil { + return + } + + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 + var tp, opcode byte + switch inst { + case FABSD: + opcode, tp = 0b000001, 0b01 + case FABSS: + opcode, tp = 0b000001, 0b00 + case FNEGD: + opcode, tp = 0b000010, 0b01 + case FNEGS: + opcode, tp = 0b000010, 0b00 + case FSQRTD: + opcode, tp = 0b000011, 0b01 + case FSQRTS: + opcode, tp = 0b000011, 0b00 + case FCVTSD: + opcode, tp = 0b000101, 0b00 + case FCVTDS: + opcode, tp = 0b000100, 0b01 + case FRINTMD: + opcode, tp = 0b001010, 0b01 + case FRINTMS: + opcode, tp = 0b001010, 0b00 + case FRINTND: + opcode, tp = 0b001000, 0b01 + case FRINTNS: + opcode, tp = 0b001000, 0b00 + case FRINTPD: + opcode, tp = 0b001001, 0b01 + case FRINTPS: + opcode, tp = 0b001001, 0b00 + case FRINTZD: + opcode, tp = 0b001011, 0b01 + case FRINTZS: + opcode, tp = 0b001011, 0b00 + } + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + (opcode << 7) | 0b0_10000_00 | (srcRegBits >> 3), + tp<<6 | 0b00_1_00000 | opcode>>1, + 0b0_00_11110, + }) + + case FADDD, FADDS, FDIVS, FDIVD, FMAXD, FMAXS, FMIND, FMINS, FMULS, FMULD: + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, false, false); err != nil { + return + } + + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + + // "Floating-point data-processing (2 source)" in + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 + var tp, opcode byte + switch inst { + case FADDD: + opcode, tp = 0b0010, 0b01 + case FADDS: + opcode, tp = 0b0010, 0b00 + case FDIVD: + opcode, tp = 0b0001, 0b01 + case FDIVS: + opcode, tp = 0b0001, 0b00 + case FMAXD: + opcode, tp = 0b0100, 0b01 + case FMAXS: + opcode, tp = 0b0100, 0b00 + case FMIND: + opcode, tp = 0b0101, 0b01 + case FMINS: + opcode, tp = 0b0101, 0b00 + case FMULS: + opcode, tp = 0b0000, 0b00 + case FMULD: + opcode, tp = 0b0000, 0b01 + } + + a.Buf.Write([]byte{ + (dstRegBits << 5) | dstRegBits, + opcode<<4 | 0b0000_10_00 | (dstRegBits >> 3), + tp<<6 | 0b00_1_00000 | srcRegBits, + 0b0001_1110, + }) + + case FCVTZSD, FCVTZSDW, FCVTZSS, FCVTZSSW, FCVTZUD, FCVTZUDW, FCVTZUS, FCVTZUSW: + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, false, true); err != nil { + return + } + + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + + // "Conversion between floating-point and integer" in + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 + var sf, tp, opcode byte + switch inst { + case FCVTZSD: // Double to signed 64-bit + sf, tp, opcode = 0b1, 0b01, 0b000 + case FCVTZSDW: // Double to signed 32-bit. + sf, tp, opcode = 0b0, 0b01, 0b000 + case FCVTZSS: // Single to signed 64-bit. + sf, tp, opcode = 0b1, 0b00, 0b000 + case FCVTZSSW: // Single to signed 32-bit. + sf, tp, opcode = 0b0, 0b00, 0b000 + case FCVTZUD: // Double to unsigned 64-bit. + sf, tp, opcode = 0b1, 0b01, 0b001 + case FCVTZUDW: // Double to unsigned 32-bit. + sf, tp, opcode = 0b0, 0b01, 0b001 + case FCVTZUS: // Single to unsigned 64-bit. + sf, tp, opcode = 0b1, 0b00, 0b001 + case FCVTZUSW: // Single to unsigned 32-bit. + sf, tp, opcode = 0b0, 0b00, 0b001 + } + + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + 0 | (srcRegBits >> 3), + tp<<6 | 0b00_1_11_000 | opcode, + sf<<7 | 0b0_0_0_11110, + }) + + case FMOVD, FMOVS: + isSrcInt, isDstInt := isIntRegister(n.SrcReg), isIntRegister(n.DstReg) + if isSrcInt && isDstInt { + return errors.New("FMOV needs at least one of operands to be integer") + } + + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMOV--register---Floating-point-Move-register-without-conversion-?lang=en + if !isSrcInt && !isDstInt { // Float to float. + var tp byte + if inst == FMOVD { + tp = 0b01 + } + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + 0b0_10000_00 | (srcRegBits >> 3), + tp<<6 | 0b00_1_00000, + 0b000_11110, + }) + } else if isSrcInt && !isDstInt { // Int to float. + var tp, sf byte + if inst == FMOVD { + tp, sf = 0b01, 0b1 + } + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + (srcRegBits >> 3), + tp<<6 | 0b00_1_00_111, + sf<<7 | 0b0_00_11110, + }) + } else { // Float to int. + var tp, sf byte + if inst == FMOVD { + tp, sf = 0b01, 0b1 + } + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + (srcRegBits >> 3), + tp<<6 | 0b00_1_00_110, + sf<<7 | 0b0_00_11110, + }) + } + + case MOVD, MOVWU: + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, true, true); err != nil { + return + } + + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + if n.SrcReg == REGZERO && inst == MOVD { + // If this is 64-bit mov from zero register, then we encode this as MOVK. + // See "Move wide (immediate)" in + // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Immediate + a.Buf.Write([]byte{ + dstRegBits, + 0x0, + 0b1000_0000, + 0b1_10_10010, + }) + } else { + // MOV can be encoded as ORR (shifted register): "ORR Wd, WZR, Wm". + // https://developer.arm.com/documentation/100069/0609/A64-General-Instructions/MOV--register- + var sf byte + if inst == MOVD { + sf = 0b1 + } + a.Buf.Write([]byte{ + (zeroRegisterBits << 5) | dstRegBits, + (zeroRegisterBits >> 3), + 0b000_00000 | srcRegBits, + sf<<7 | 0b0_01_01010, + }) + } + + case MRS: + if n.SrcReg != REG_FPSR { + return fmt.Errorf("MRS has only support for FPSR register as a src but got %s", RegisterName(n.SrcReg)) + } + + // For how to specify FPSR register, see "Accessing FPSR" in: + // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/FPSR--Floating-point-Status-Register?lang=en + dstRegBits := registerBits(n.DstReg) + a.Buf.Write([]byte{ + 0b001<<5 | dstRegBits, + 0b0100<<4 | 0b0100, + 0b0011_0000 | 0b11<<3 | 0b011, + 0b1101_0101, + }) + + case MSR: + if n.DstReg != REG_FPSR { + return fmt.Errorf("MSR has only support for FPSR register as a dst but got %s", RegisterName(n.SrcReg)) + } + + // For how to specify FPSR register, see "Accessing FPSR" in: + // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/FPSR--Floating-point-Status-Register?lang=en + srcRegBits := registerBits(n.SrcReg) + a.Buf.Write([]byte{ + 0b001<<5 | srcRegBits, + 0b0100<<4 | 0b0100, + 0b0001_0000 | 0b11<<3 | 0b011, + 0b1101_0101, + }) + + case MUL, MULW: + // Multiplications are encoded as MADD (zero register, src, dst), dst = zero + (src * dst) = src * dst. + // See "Data-processing (3 source)" in + // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, true, true); err != nil { + return + } + + var sf byte + if inst == MUL { + sf = 0b1 + } + + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + + a.Buf.Write([]byte{ + dstRegBits<<5 | dstRegBits, + zeroRegisterBits<<2 | dstRegBits>>3, + srcRegBits, + sf<<7 | 0b11011, + }) + + case NEG, NEGW: + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, true, true); err != nil { + return + } + + // NEG is encoded as "SUB dst, XZR, src" = "dst = 0 - src" + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift + var sf byte + if inst == NEG { + sf = 0b1 + } + + a.Buf.Write([]byte{ + (zeroRegisterBits << 5) | dstRegBits, + (zeroRegisterBits >> 3), + srcRegBits, + sf<<7 | 0b0_10_00000 | 0b0_00_01011, + }) + + case SDIV, SDIVW, UDIV, UDIVW: + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, true, true); err != nil { + return + } + + // See "Data-processing (2 source)" in + // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en + var sf, opcode byte + switch inst { + case SDIV: + sf, opcode = 0b1, 0b000011 + case SDIVW: + sf, opcode = 0b0, 0b000011 + case UDIV: + sf, opcode = 0b1, 0b000010 + case UDIVW: + sf, opcode = 0b0, 0b000010 + } + + a.Buf.Write([]byte{ + (dstRegBits << 5) | dstRegBits, + opcode<<2 | (dstRegBits >> 3), + 0b110_00000 | srcRegBits, + sf<<7 | 0b0_00_11010, + }) + + case SCVTFD, SCVTFWD, SCVTFS, SCVTFWS, UCVTFD, UCVTFS, UCVTFWD, UCVTFWS: + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, true, false); err != nil { + return + } + + // "Conversion between floating-point and integer" in + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 + var sf, tp, opcode byte + switch inst { + case SCVTFD: // 64-bit integer to double + sf, tp, opcode = 0b1, 0b01, 0b010 + case SCVTFWD: // 32-bit integer to double + sf, tp, opcode = 0b0, 0b01, 0b010 + case SCVTFS: // 64-bit integer to single + sf, tp, opcode = 0b1, 0b00, 0b010 + case SCVTFWS: // 32-bit integer to single + sf, tp, opcode = 0b0, 0b00, 0b010 + case UCVTFD: // 64-bit to double + sf, tp, opcode = 0b1, 0b01, 0b011 + case UCVTFWD: // 32-bit to double + sf, tp, opcode = 0b0, 0b01, 0b011 + case UCVTFS: // 64-bit to single + sf, tp, opcode = 0b1, 0b00, 0b011 + case UCVTFWS: // 32-bit to single + sf, tp, opcode = 0b0, 0b00, 0b011 + } + + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + (srcRegBits >> 3), + tp<<6 | 0b00_1_00_000 | opcode, + sf<<7 | 0b0_0_0_11110, + }) + + case SXTB, SXTBW, SXTH, SXTHW, SXTW, UXTW: + if err = checkRegisterToRegisterType(n.SrcReg, n.DstReg, true, true); err != nil { + return + } + + srcRegBits, dstRegBits := registerBits(n.SrcReg), registerBits(n.DstReg) + if n.SrcReg == REGZERO { + // If the source is zero register, we encode as MOV dst, zero. + var sf byte + if inst == MOVD { + sf = 0b1 + } + a.Buf.Write([]byte{ + (zeroRegisterBits << 5) | dstRegBits, + (zeroRegisterBits >> 3), + 0b000_00000 | srcRegBits, + sf<<7 | 0b0_01_01010, + }) + return + } + + // SXTB is encoded as "SBFM Wd, Wn, #0, #7" + // https://developer.arm.com/documentation/dui0801/g/A64-General-Instructions/SXTB + // SXTH is encoded as "SBFM Wd, Wn, #0, #15" + // https://developer.arm.com/documentation/dui0801/g/A64-General-Instructions/SXTH + // SXTW is encoded as "SBFM Xd, Xn, #0, #31" + // https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/SXTW + + var n, sf, imms, opc byte + switch inst { + case SXTB: + n, sf, imms = 0b1, 0b1, 0x7 + case SXTBW: + n, sf, imms = 0b0, 0b0, 0x7 + case SXTH: + n, sf, imms = 0b1, 0b1, 0xf + case SXTHW: + n, sf, imms = 0b0, 0b0, 0xf + case SXTW: + n, sf, imms = 0b1, 0b1, 0x1f + } + + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + imms<<2 | (srcRegBits >> 3), + n << 6, + sf<<7 | opc<<5 | 0b10011, + }) + default: + return errorEncodingUnsupported(n) + } + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeLeftShiftedRegisterToRegister(n *NodeImpl) (err error) { + + baseRegBits, err := intRegisterBits(n.SrcReg) + if err != nil { + return err + } + shiftTargetRegBits, err := intRegisterBits(n.SrcReg2) + if err != nil { + return err + } + dstRegBits, err := intRegisterBits(n.DstReg) + if err != nil { + return err + } + + switch n.Instruction { + case ADD: + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift + const logicalLeftShiftBits = 0b00 + if n.SrcConst < 0 || n.SrcConst > 64 { + return fmt.Errorf("shift amount must fit in unsigned 6-bit integer (0-64) but got %d", n.SrcConst) + } + shiftByte := byte(n.SrcConst) + a.Buf.Write([]byte{ + (baseRegBits << 5) | dstRegBits, + (shiftByte << 2) | (baseRegBits >> 3), + (logicalLeftShiftBits << 6) | shiftTargetRegBits, + 0b1000_1011, + }) + default: + return errorEncodingUnsupported(n) + } + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeTwoRegistersToRegister(n *NodeImpl) (err error) { + switch inst := n.Instruction; inst { + case AND, ANDW, ORR, ORRW, EOR, EORW: + // See "Logical (shifted register)" in + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en + srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.SrcReg), registerBits(n.SrcReg2), registerBits(n.DstReg) + var sf, opc byte + switch inst { + case AND: + sf, opc = 0b1, 0b00 + case ANDW: + sf, opc = 0b0, 0b00 + case ORR: + sf, opc = 0b1, 0b01 + case ORRW: + sf, opc = 0b0, 0b01 + case EOR: + sf, opc = 0b1, 0b10 + case EORW: + sf, opc = 0b0, 0b10 + } + a.Buf.Write([]byte{ + (srcReg2Bits << 5) | dstRegBits, + (srcReg2Bits >> 3), + srcRegBits, + sf<<7 | opc<<5 | 0b01010, + }) + case ASR, ASRW, LSL, LSLW, LSR, LSRW, ROR, RORW: + // See "Data-processing (2 source)" in + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en + srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.SrcReg), registerBits(n.SrcReg2), registerBits(n.DstReg) + + var sf, opcode byte + switch inst { + case ASR: + sf, opcode = 0b1, 0b001010 + case ASRW: + sf, opcode = 0b0, 0b001010 + case LSL: + sf, opcode = 0b1, 0b001000 + case LSLW: + sf, opcode = 0b0, 0b001000 + case LSR: + sf, opcode = 0b1, 0b001001 + case LSRW: + sf, opcode = 0b0, 0b001001 + case ROR: + sf, opcode = 0b1, 0b001011 + case RORW: + sf, opcode = 0b0, 0b001011 + } + a.Buf.Write([]byte{ + (srcReg2Bits << 5) | dstRegBits, + opcode<<2 | (srcReg2Bits >> 3), + 0b110_00000 | srcRegBits, + sf<<7 | 0b0_00_11010, + }) + case SDIV, SDIVW, UDIV, UDIVW: + srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.SrcReg), registerBits(n.SrcReg2), registerBits(n.DstReg) + + // See "Data-processing (2 source)" in + // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en + var sf, opcode byte + switch inst { + case SDIV: + sf, opcode = 0b1, 0b000011 + case SDIVW: + sf, opcode = 0b0, 0b000011 + case UDIV: + sf, opcode = 0b1, 0b000010 + case UDIVW: + sf, opcode = 0b0, 0b000010 + } + + a.Buf.Write([]byte{ + (srcReg2Bits << 5) | dstRegBits, + opcode<<2 | (srcReg2Bits >> 3), + 0b110_00000 | srcRegBits, + sf<<7 | 0b0_00_11010, + }) + case SUB, SUBW: + srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.SrcReg), registerBits(n.SrcReg2), registerBits(n.DstReg) + + // See "Add/subtract (shifted register)" in + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en + var sf byte + if inst == SUB { + sf = 0b1 + } + + a.Buf.Write([]byte{ + (srcReg2Bits << 5) | dstRegBits, + (srcReg2Bits >> 3), + srcRegBits, + sf<<7 | 0b0_10_01011, + }) + case FSUBD, FSUBS: + srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.SrcReg), registerBits(n.SrcReg2), registerBits(n.DstReg) + + // See "Floating-point data-processing (2 source)" in + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en + var tp byte + if inst == FSUBD { + tp = 0b01 + } + a.Buf.Write([]byte{ + (srcReg2Bits << 5) | dstRegBits, + 0b0011_10_00 | (srcReg2Bits >> 3), + tp<<6 | 0b00_1_00000 | srcRegBits, + 0b0_00_11110, + }) + default: + return errorEncodingUnsupported(n) + } + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeThreeRegistersToRegister(n *NodeImpl) (err error) { + switch n.Instruction { + case MSUB, MSUBW: + // Dst = Src2 - (Src1 * Src3) + // "Data-processing (3 source)" in: + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en + src1RegBits, err := intRegisterBits(n.SrcReg) + if err != nil { + return err + } + src2RegBits, err := intRegisterBits(n.SrcReg2) + if err != nil { + return err + } + src3RegBits, err := intRegisterBits(n.DstReg) + if err != nil { + return err + } + dstRegBits, err := intRegisterBits(n.DstReg2) + if err != nil { + return err + } + + var sf byte // is zero for MSUBW (32-bit MSUB). + if n.Instruction == MSUB { + sf = 0b1 + } + + a.Buf.Write([]byte{ + (src3RegBits << 5) | dstRegBits, + 0b1_0000000 | (src2RegBits << 2) | (src3RegBits >> 3), + src1RegBits, + sf<<7 | 0b00_11011, + }) + default: + return errorEncodingUnsupported(n) + } + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeTwoRegistersToNone(n *NodeImpl) (err error) { + switch n.Instruction { + case CMPW, CMP: + // Compare on two registers is an alias for "SUBS (src1, src2) ZERO" + // which can be encoded as SUBS (shifted registers) with zero shifting. + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift + src1RegBits, err := intRegisterBits(n.SrcReg) + if err != nil { + return err + } + src2RegBits, err := intRegisterBits(n.SrcReg2) + if err != nil { + return err + } + + var op byte + if n.Instruction == CMP { + op = 0b111 + } else { + op = 0b011 + } + + a.Buf.Write([]byte{ + (src2RegBits << 5) | zeroRegisterBits, + (src2RegBits >> 3), + src1RegBits, + 0b01011 | (op << 5), + }) + case FCMPS, FCMPD: + // "Floating-point compare" section in: + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en + src1RegBits, err := floatRegisterBits(n.SrcReg) + if err != nil { + return err + } + src2RegBits, err := floatRegisterBits(n.SrcReg2) + if err != nil { + return err + } + + var ftype byte // is zero for FCMPS (single precision float compare). + if n.Instruction == FCMPD { + ftype = 0b01 + } + a.Buf.Write([]byte{ + (src2RegBits << 5) | 0b00000, + 0b001000_00 | (src2RegBits >> 3), + ftype<<6 | 0b1_00000 | src1RegBits, + 0b000_11110, + }) + default: + return errorEncodingUnsupported(n) + } + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeRegisterAndConstToNone(n *NodeImpl) (err error) { + if n.Instruction != CMP { + return errorEncodingUnsupported(n) + } + + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en + if n.SrcConst < 0 || n.SrcConst > 4095 { + return fmt.Errorf("immediate for CMP must fit in 0 to 4095 but got %d", n.SrcConst) + } else if n.SrcReg == REGZERO { + return errors.New("zero register is not supported for CMP (immediate)") + } + + srcRegBits, err := intRegisterBits(n.SrcReg) + if err != nil { + return err + } + + a.Buf.Write([]byte{ + (srcRegBits << 5) | zeroRegisterBits, + (byte(n.SrcConst) << 2) | (srcRegBits >> 3), + byte(n.SrcConst >> 6), + 0b111_10001, + }) + return +} + +func fitInSigned9Bits(v int64) bool { + return v >= -256 && v <= 255 +} + +func (a *AssemblerImpl) encodeLoadOrStoreWithRegisterOffset( + baseRegBits, offsetRegBits, targetRegBits byte, opcode, size, v byte) { + // See "Load/store register (register offset)". + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff + a.Buf.Write([]byte{ + (baseRegBits << 5) | targetRegBits, + 0b011_010_00 | (baseRegBits >> 3), + opcode<<6 | 0b00_1_00000 | offsetRegBits, + size<<6 | v<<2 | 0b00_111_0_00, + }) +} + +// validateMemoryOffset validates the memory offset if the given offset can be encoded in the assembler. +// In theory, offset can be any, but for simplicity of our homemade assembler, we limit the offset range +// that can be encoded enough for supporting JIT compiler. +func validateMemoryOffset(offset int64) (err error) { + if offset > 255 && offset%8 != 0 { + // This is because we only have large offsets for load/store with Wasm value stack, and its offset + // is always multiplied by 8 (== the size of uint64, the type of value stack in Go). + err = fmt.Errorf("large memory offset (>255) must be a multiple of 8 but got %d", offset) + } else if offset < -256 { // 9-bit signed integer's minimum = 2^8. + err = fmt.Errorf("negative memory offset must be larget than or equal -256 but got %d", offset) + } else if offset > 1<<31-1 { + return fmt.Errorf("large memory offset must be less than %d but got %d", 1<<31-1, offset) + } + return +} + +// encodeLoadOrStoreWithConstOffset encodes load/store instructions with the constant offset. +// +// Note: Encoding strategy intentionally matches the Go assembler: https://go.dev/doc/asm +func (a *AssemblerImpl) encodeLoadOrStoreWithConstOffset( + baseRegBits, targetRegBits byte, + offset int64, + opcode, size, v byte, + datasize, datasizeLog2 int64, +) (err error) { + if err = validateMemoryOffset(offset); err != nil { + return + } + + if fitInSigned9Bits(offset) { + // See "LDAPR/STLR (unscaled immediate)" + // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldapstl_unscaled + if offset < 0 || offset%datasize != 0 { + // This case is encoded as one "unscaled signed store". + a.Buf.Write([]byte{ + (baseRegBits << 5) | targetRegBits, + byte(offset<<4) | (baseRegBits >> 3), + opcode<<6 | (0b00_00_11111 & byte(offset>>4)), + size<<6 | v<<2 | 0b00_1_11_0_00, + }) + return + } + } + + // At this point we have the assumption that offset is positive and multiple of datasize. + if offset < (1<<12)<> 3), + opcode<<6 | 0b00_111111&byte(m>>6), + size<<6 | v<<2 | 0b00_1_11_0_01, + }) + return + } + + // Otherwise, we need multiple instructions. + tmpRegBits := registerBits(a.temporaryRegister) + offset32 := int32(offset) + + // Go's assembler adds a const into the const pool at this point, + // regardless of its usage; e.g. if we enter the then block of the following if statement, + // the const is not used but it is added into the const pool. + a.addConstPool(offset32, uint64(a.Buf.Len())) + + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3529-L3532 + // If the offset is within 24-bits, we can load it with two ADD instructions. + hi := offset32 - (offset32 & (0xfff << uint(datasizeLog2))) + if hi&^0xfff000 == 0 { + var sfops byte = 0b100 + m := ((offset32 - hi) >> datasizeLog2) & 0xfff + hi >>= 12 + + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3534-L3535 + a.Buf.Write([]byte{ + (baseRegBits << 5) | tmpRegBits, + (byte(hi) << 2) | (baseRegBits >> 3), + 0b01<<6 /* shift by 12 */ | byte(hi>>6), + sfops<<5 | 0b10001, + }) + + a.Buf.Write([]byte{ + (tmpRegBits << 5) | targetRegBits, + (byte(m << 2)) | (tmpRegBits >> 3), + opcode<<6 | 0b00_111111&byte(m>>6), + size<<6 | v<<2 | 0b00_1_11_0_01, + }) + } else { + // This case we load the const via ldr(literal) into tem register, + // and the target const is placed after this instruction below. + loadLiteralOffsetInBinary := uint64(a.Buf.Len()) + + // First we emit the ldr(literal) with offset zero as we don't yet know the const's placement in the binary. + // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--literal---Load-Register--literal-- + a.Buf.Write([]byte{tmpRegBits, 0x0, 0x0, 0b00_011_0_00}) + + // Set the callback for the constant, and we set properly the offset in the callback. + a.setConstPoolCallback(offset32, func(offsetOfConst int) { + // ldr(literal) encodes offset divided by 4. + offset := (offsetOfConst - int(loadLiteralOffsetInBinary)) / 4 + bin := a.Buf.Bytes() + bin[loadLiteralOffsetInBinary] |= byte(offset << 5) + bin[loadLiteralOffsetInBinary+1] |= byte(offset >> 3) + bin[loadLiteralOffsetInBinary+2] |= byte(offset >> 11) + }) + + // Then, load the constant with the register offset. + // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--register---Load-Register--register-- + a.Buf.Write([]byte{ + (baseRegBits << 5) | targetRegBits, + 0b011_010_00 | (baseRegBits >> 3), + opcode<<6 | 0b00_1_00000 | tmpRegBits, + size<<6 | v<<2 | 0b00_111_0_00, + }) + } + return +} + +var storeOrLoadInstructionTable = map[asm.Instruction]struct { + size, v byte + datasize, datasizeLog2 int64 + isTargetFloat bool +}{ + MOVD: {size: 0b11, v: 0x0, datasize: 8, datasizeLog2: 3}, + MOVW: {size: 0b10, v: 0x0, datasize: 4, datasizeLog2: 2}, + MOVWU: {size: 0b10, v: 0x0, datasize: 4, datasizeLog2: 2}, + MOVH: {size: 0b01, v: 0x0, datasize: 2, datasizeLog2: 1}, + MOVHU: {size: 0b01, v: 0x0, datasize: 2, datasizeLog2: 1}, + MOVB: {size: 0b00, v: 0x0, datasize: 1, datasizeLog2: 0}, + MOVBU: {size: 0b00, v: 0x0, datasize: 1, datasizeLog2: 0}, + FMOVD: {size: 0b11, v: 0x1, datasize: 8, datasizeLog2: 3, isTargetFloat: true}, + FMOVS: {size: 0b10, v: 0x1, datasize: 4, datasizeLog2: 2, isTargetFloat: true}, +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) { + inst, ok := storeOrLoadInstructionTable[n.Instruction] + if !ok { + return errorEncodingUnsupported(n) + } + + var srcRegBits byte + if inst.isTargetFloat { + srcRegBits, err = floatRegisterBits(n.SrcReg) + } else { + srcRegBits, err = intRegisterBits(n.SrcReg) + } + if err != nil { + return + } + + baseRegBits, err := intRegisterBits(n.DstReg) + if err != nil { + return err + } + + const opcode = 0x00 // Opcode for store instructions. + if n.DstReg2 != asm.NilRegister { + offsetRegBits, err := intRegisterBits(n.DstReg2) + if err != nil { + return err + } + a.encodeLoadOrStoreWithRegisterOffset(baseRegBits, offsetRegBits, srcRegBits, opcode, inst.size, inst.v) + } else { + err = a.encodeLoadOrStoreWithConstOffset(baseRegBits, srcRegBits, n.DstConst, opcode, inst.size, inst.v, inst.datasize, inst.datasizeLog2) + } + return +} + +func (a *AssemblerImpl) encodeADR(n *NodeImpl) (err error) { + dstRegBits, err := intRegisterBits(n.DstReg) + if err != nil { + return err + } + + // At this point, we don't yet know the target instruction's offset, + // so we emit the ADR instruction with 0 offset, and replace later in the callback. + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en + a.Buf.Write([]byte{dstRegBits, 0x0, 0x0, 0b10000}) + + a.AddOnGenerateCallBack(func(code []byte) error { + // Find the target instruction node. + targetNode := n + for ; targetNode != nil; targetNode = targetNode.Next { + if targetNode.Instruction == n.readInstructionAddressBeforeTargetInstruction { + targetNode = targetNode.Next + break + } + } + + if targetNode == nil { + return fmt.Errorf("BUG: target instruction %s not found for ADR", InstructionName(n.readInstructionAddressBeforeTargetInstruction)) + } + + offset := targetNode.OffsetInBinary() - n.OffsetInBinary() + if offset > math.MaxUint8 { + // We could support up to 20-bit integer, but byte should be enough for our impl. + // If the necessity comes up, we could fix the below to support larger offsets. + return fmt.Errorf("BUG: too large offset for ADR") + } + + // Now ready to write an offset byte. + v := byte(offset) + + adrInstructionBytes := code[n.OffsetInBinary() : n.OffsetInBinary()+4] + // According to the binary format of ADR instruction in arm64: + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en + // + // The 0 to 1 bits live on 29 to 30 bits of the instruction. + adrInstructionBytes[3] |= (v & 0b00000011) << 5 + // The 2 to 4 bits live on 5 to 7 bits of the instruction. + adrInstructionBytes[0] |= (v & 0b00011100) << 3 + // The 5 to 7 bits live on 8 to 10 bits of the instruction. + adrInstructionBytes[1] |= (v & 0b11100000) >> 5 + return nil + }) + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) { + if n.Instruction == ADR { + return a.encodeADR(n) + } + + inst, ok := storeOrLoadInstructionTable[n.Instruction] + if !ok { + return errorEncodingUnsupported(n) + } + + var dstRegBits byte + if inst.isTargetFloat { + dstRegBits, err = floatRegisterBits(n.DstReg) + } else { + dstRegBits, err = intRegisterBits(n.DstReg) + } + if err != nil { + return + } + baseRegBits, err := intRegisterBits(n.SrcReg) + if err != nil { + return err + } + + var opcode byte = 0b01 // Opcode for load instructions. + if n.Instruction == MOVW || n.Instruction == MOVH || n.Instruction == MOVB { + // Sign-extend load (without "U" suffix except 64-bit MOVD) needs different opcode. + opcode = 0b10 + } + if n.SrcReg2 != asm.NilRegister { + offsetRegBits, err := intRegisterBits(n.SrcReg2) + if err != nil { + return err + } + a.encodeLoadOrStoreWithRegisterOffset(baseRegBits, offsetRegBits, dstRegBits, opcode, inst.size, inst.v) + } else { + err = a.encodeLoadOrStoreWithConstOffset(baseRegBits, dstRegBits, n.SrcConst, opcode, inst.size, inst.v, inst.datasize, inst.datasizeLog2) + } + return +} + +// const16bitAligned check if the value is on the 16-bit alignment. +// If so, returns the shift num divided by 16, and otherwise -1. +func const16bitAligned(v int64) (ret int) { + ret = -1 + for s := 0; s < 64; s += 16 { + if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 { + ret = s / 16 + break + } + } + return +} + +// isBitMaskImmediate determines if the value can be encoded as "bitmask immediate". +// +// Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits. +// Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits. +// +// See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate- +func isBitMaskImmediate(x uint64) bool { + // All zeros and ones are not "bitmask immediate" by defainition. + if x == 0 || x == 0xffff_ffff_ffff_ffff { + return false + } + + switch { + case x != x>>32|x<<32: + // e = 64 + case x != x>>16|x<<48: + // e = 32 (x == x>>32|x<<32). + // e.g. 0x00ff_ff00_00ff_ff00 + x = uint64(int64(int32(x))) + case x != x>>8|x<<56: + // e = 16 (x == x>>16|x<<48). + // e.g. 0x00ff_00ff_00ff_00ff + x = uint64(int64(int16(x))) + case x != x>>4|x<<60: + // e = 8 (x == x>>8|x<<56). + // e.g. 0x0f0f_0f0f_0f0f_0f0f + x = uint64(int64(int8(x))) + default: + // e = 4 or 2. + return true + } + return sequenceOfSetbits(x) || sequenceOfSetbits(^x) +} + +// sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1). +// For example: 0b1110 -> true, 0b1010 -> false +func sequenceOfSetbits(x uint64) bool { + y := getLowestBit(x) + // If x is a sequence of set bit, this should results in the number + // with only one set bit (i.e. power of two). + y += x + return (y-1)&y == 0 +} + +func getLowestBit(x uint64) uint64 { + // See https://stackoverflow.com/questions/12247186/find-the-lowest-set-bit + return x & (^x + 1) +} + +func (a *AssemblerImpl) addOrSub64BitRegisters(sfops byte, src1RegBits byte, src2RegBits byte) { + // src1Reg = src1Reg +/- src2Reg + a.Buf.Write([]byte{ + (src1RegBits << 5) | src1RegBits, + (src1RegBits >> 3), + src2RegBits, + sfops<<5 | 0b01011, + }) +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { + // Alias for readability. + c := n.SrcConst + + dstRegBits, err := intRegisterBits(n.DstReg) + if err != nil { + return err + } + + switch inst := n.Instruction; inst { + case ADD, SUB, SUBS: + var sfops byte = 0b100 // ADD + if inst == SUB { + sfops = 0b110 // SUB + } else if inst == SUBS { + sfops = 0b111 // SUBS + } + + if c == 0 { + // If the constant equals zero, we encode it as ADD (register) with zero register. + a.addOrSub64BitRegisters(sfops, dstRegBits, zeroRegisterBits) + return + } + + if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) { + // If the const can be represented as "imm12" or "imm12 << 12": one instruction + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L2992 + + if c <= 0xfff { + a.Buf.Write([]byte{ + (dstRegBits << 5) | dstRegBits, + (byte(c) << 2) | (dstRegBits >> 3), + byte(c >> 6), + sfops<<5 | 0b10001, + }) + } else { + c >>= 12 + a.Buf.Write([]byte{ + (dstRegBits << 5) | dstRegBits, + (byte(c) << 2) | (dstRegBits >> 3), + 0b01<<6 /* shift by 12 */ | byte(c>>6), + sfops<<5 | 0b10001, + }) + } + return + } + + if t := const16bitAligned(c); t >= 0 { + // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 + // We could load it into temporary with movk. + //https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L4029 + tmpRegBits := registerBits(a.temporaryRegister) + + // MOVZ $c, tmpReg with shifting. + a.load16bitAlignedConst(c>>(16*t), byte(t), tmpRegBits, false, true) + + // ADD/SUB tmpReg, dstReg + a.addOrSub64BitRegisters(sfops, dstRegBits, tmpRegBits) + return + } else if t := const16bitAligned(^c); t >= 0 { + // Also if the reverse of the const can fit within 16-bit range, do the same ^^. + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L4029 + tmpRegBits := registerBits(a.temporaryRegister) + + // MOVN $c, tmpReg with shifting. + a.load16bitAlignedConst((^c >> (16 * t)), byte(t), tmpRegBits, true, true) + + // ADD/SUB tmpReg, dstReg + a.addOrSub64BitRegisters(sfops, dstRegBits, tmpRegBits) + return + } + + if uc := uint64(c); isBitMaskImmediate(uc) { + // If the const can be represented as "bitmask immediate", we load it via ORR into temp register. + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6570-L6583 + tmpRegBits := registerBits(a.temporaryRegister) + // OOR $c, tmpReg + a.loadConstViaBitMaskImmediate(uc, tmpRegBits, true) + + // ADD/SUB tmpReg, dstReg + a.addOrSub64BitRegisters(sfops, dstRegBits, tmpRegBits) + return + } + + // If the value fits within 24-bit, then we emit two add instructions + if 0 <= c && c <= 0xffffff && inst != SUBS { + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3849-L3862 + a.Buf.Write([]byte{ + (dstRegBits << 5) | dstRegBits, + (byte(c) << 2) | (dstRegBits >> 3), + byte(c & 0xfff >> 6), + sfops<<5 | 0b10001, + }) + c = c >> 12 + a.Buf.Write([]byte{ + (dstRegBits << 5) | dstRegBits, + (byte(c) << 2) | (dstRegBits >> 3), + 0b01_000000 /* shift by 12 */ | byte(c>>6), + sfops<<5 | 0b10001, + }) + return + } + + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3163-L3203 + // Otherwise we use MOVZ and MOVNs for loading const into tmpRegister. + tmpRegBits := registerBits(a.temporaryRegister) + a.load64bitConst(c, tmpRegBits) + a.addOrSub64BitRegisters(sfops, dstRegBits, tmpRegBits) + case MOVW: + if c == 0 { + a.Buf.Write([]byte{ + (zeroRegisterBits << 5) | dstRegBits, + (zeroRegisterBits >> 3), + 0b000_00000 | zeroRegisterBits, + 0b0_01_01010, + }) + return + } + + // Following the logic here: + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637 + c32 := uint32(c) + ic := int64(c32) + if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) { + if isBitMaskImmediate(uint64(c)) { + a.loadConstViaBitMaskImmediate(uint64(c), dstRegBits, false) + return + } + } + + if t := const16bitAligned(int64(c32)); t >= 0 { + // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 + // We could load it into temporary with movk. + a.load16bitAlignedConst(int64(c32)>>(16*t), byte(t), dstRegBits, false, false) + } else if t := const16bitAligned(int64(^c32)); t >= 0 { + // Also, if the reverse of the const can fit within 16-bit range, do the same ^^. + a.load16bitAlignedConst((int64(^c32) >> (16 * t)), byte(t), dstRegBits, true, false) + } else if isBitMaskImmediate(uint64(c)) { + a.loadConstViaBitMaskImmediate(uint64(c), dstRegBits, false) + } else { + // Otherwise, we use MOVZ and MOVK to load it. + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6623-L6630 + c16 := uint16(c32) + // MOVZ: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ + a.Buf.Write([]byte{ + (byte(c16) << 5) | dstRegBits, + byte(c16 >> 3), + 1<<7 | byte(c16>>11), + 0b0_10_10010, + }) + // MOVK: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVK + c16 = uint16(c32 >> 16) + if c16 != 0 { + a.Buf.Write([]byte{ + (byte(c16) << 5) | dstRegBits, + byte(c16 >> 3), + 1<<7 | 0b0_01_00000 /* shift by 16 */ | byte(c16>>11), + 0b0_11_10010, + }) + } + } + case MOVD: + // Following the logic here: + // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852 + if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) { + if isBitMaskImmediate(uint64(c)) { + a.loadConstViaBitMaskImmediate(uint64(c), dstRegBits, true) + return + } + } + + if t := const16bitAligned(c); t >= 0 { + // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 + // We could load it into temporary with movk. + a.load16bitAlignedConst(c>>(16*t), byte(t), dstRegBits, false, true) + } else if t := const16bitAligned(int64(^c)); t >= 0 { + // Also, if the reverse of the const can fit within 16-bit range, do the same ^^. + a.load16bitAlignedConst((int64(^c) >> (16 * t)), byte(t), dstRegBits, true, true) + } else if isBitMaskImmediate(uint64(c)) { + a.loadConstViaBitMaskImmediate(uint64(c), dstRegBits, true) + } else { + a.load64bitConst(c, dstRegBits) + } + case LSR: + if c == 0 { + err = errors.New("LSR with zero constant should be optimized out") + return + } else if c < 0 || c > 63 { + err = fmt.Errorf("LSR requires immediate to be within 0 to 63, but got %d", c) + return + } + + // LSR(immediate) is an alias of UBFM + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en + a.Buf.Write([]byte{ + (dstRegBits << 5) | dstRegBits, + 0b111111_00 | dstRegBits>>3, + 0b01_000000 | byte(c), + 0b110_10011, + }) + default: + return errorEncodingUnsupported(n) + } + return +} + +func (a *AssemblerImpl) movk(v uint64, shfitNum int, dstRegBits byte) { + // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVK + a.Buf.Write([]byte{ + (byte(v) << 5) | dstRegBits, + byte(v >> 3), + 1<<7 | byte(shfitNum)<<5 | (0b000_11111 & byte(v>>11)), + 0b1_11_10010, + }) +} + +func (a *AssemblerImpl) movz(v uint64, shfitNum int, dstRegBits byte) { + // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ + a.Buf.Write([]byte{ + (byte(v) << 5) | dstRegBits, + byte(v >> 3), + 1<<7 | byte(shfitNum)<<5 | (0b000_11111 & byte(v>>11)), + 0b1_10_10010, + }) +} + +func (a *AssemblerImpl) movn(v uint64, shfitNum int, dstRegBits byte) { + // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ + a.Buf.Write([]byte{ + (byte(v) << 5) | dstRegBits, + byte(v >> 3), + 1<<7 | byte(shfitNum)<<5 | (0b000_11111 & byte(v>>11)), + 0b1_00_10010, + }) +} + +// load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit +// consts as in the Go assembler. +// +// See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759 +func (a *AssemblerImpl) load64bitConst(c int64, dstRegBits byte) { + var bits [4]uint64 + var zeros, negs int + for i := 0; i < 4; i++ { + bits[i] = uint64((c >> uint(i*16)) & 0xffff) + if v := bits[i]; v == 0 { + zeros++ + } else if v == 0xffff { + negs++ + } + } + + if zeros == 3 { + // one MOVZ instruction. + for i, v := range bits { + if v != 0 { + a.movz(v, i, dstRegBits) + } + } + + } else if negs == 3 { + // one MOVN instruction. + for i, v := range bits { + if v != 0xffff { + v = ^v + a.movn(v, i, dstRegBits) + } + } + + } else if zeros == 2 { + // one MOVZ then one OVK. + var movz bool + for i, v := range bits { + if !movz && v != 0 { // MOVZ. + // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ + a.movz(v, i, dstRegBits) + movz = true + } else if v != 0 { + a.movk(v, i, dstRegBits) + } + } + + } else if negs == 2 { + // one MOVN then one or two MOVK. + var movn bool + for i, v := range bits { // Emit MOVN. + if !movn && v != 0xffff { + v = ^v + // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN + a.movn(v, i, dstRegBits) + movn = true + } else if v != 0xffff { + a.movk(v, i, dstRegBits) + } + } + + } else if zeros == 1 { + // one MOVZ then two MOVK. + var movz bool + for i, v := range bits { + if !movz && v != 0 { // MOVZ. + // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ + a.movz(v, i, dstRegBits) + movz = true + } else if v != 0 { + a.movk(v, i, dstRegBits) + } + } + + } else if negs == 1 { + // one MOVN then two MOVK. + var movn bool + for i, v := range bits { // Emit MOVN. + if !movn && v != 0xffff { + v = ^v + // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN + a.movn(v, i, dstRegBits) + movn = true + } else if v != 0xffff { + a.movk(v, i, dstRegBits) + } + } + + } else { + // one MOVZ then tree MOVK. + var movz bool + for i, v := range bits { + if !movz && v != 0 { // MOVZ. + // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ + a.movz(v, i, dstRegBits) + movz = true + } else if v != 0 { + a.movk(v, i, dstRegBits) + } + } + + } +} + +func (a *AssemblerImpl) load16bitAlignedConst(c int64, shiftNum byte, regBits byte, reverse bool, dst64bit bool) { + var lastByte byte + if reverse { + // MOVN: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ + lastByte = 0b0_00_10010 + } else { + // MOVZ: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN + lastByte = 0b0_10_10010 + } + if dst64bit { + lastByte |= 0b1 << 7 + } + a.Buf.Write([]byte{ + (byte(c) << 5) | regBits, + byte(c >> 3), + 1<<7 | (byte(shiftNum) << 5) | byte(c>>11), + lastByte, + }) +} + +// loadConstViaBitMaskImmediate loads the constant with ORR (bitmask immediate). +// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate--?lang=en +func (a *AssemblerImpl) loadConstViaBitMaskImmediate(c uint64, regBits byte, dst64bit bool) { + var size uint32 + switch { + case c != c>>32|c<<32: + size = 64 + case c != c>>16|c<<48: + size = 32 + c = uint64(int64(int32(c))) + case c != c>>8|c<<56: + size = 16 + c = uint64(int64(int16(c))) + case c != c>>4|c<<60: + size = 8 + c = uint64(int64(int8(c))) + case c != c>>2|c<<62: + size = 4 + c = uint64(int64(c<<60) >> 60) + default: + size = 2 + c = uint64(int64(c<<62) >> 62) + } + + neg := false + if int64(c) < 0 { + c = ^c + neg = true + } + + onesSize, nonZeroPos := getOnesSequenceSize(c) + if neg { + nonZeroPos = onesSize + nonZeroPos + onesSize = size - onesSize + } + + // See the following article for understanding the encoding. + // https://dinfuehr.github.io/blog/encoding-of-immediate-values-on-aarch64/ + var n byte + var mode = 32 + if dst64bit && size == 64 { + n = 0b1 + mode = 64 + } + + r := byte((size - nonZeroPos) & (size - 1) & uint32(mode-1)) + s := byte((onesSize - 1) | 63&^(size<<1-1)) + + var sf byte + if dst64bit { + sf = 0b1 + } + a.Buf.Write([]byte{ + (zeroRegisterBits << 5) | regBits, + s<<2 | (zeroRegisterBits >> 3), + n<<6 | r, + sf<<7 | 0b0_01_10010, + }) +} + +func getOnesSequenceSize(x uint64) (size, nonZeroPos uint32) { + // Take 0b00111000 for example: + y := getLowestBit(x) // = 0b0000100 + nonZeroPos = setBitPos(y) // = 2 + size = setBitPos(x+y) - nonZeroPos // = setBitPos(0b0100000) - 2 = 5 - 2 = 3 + return +} + +func setBitPos(x uint64) (ret uint32) { + for ; ; ret++ { + if x == 0b1 { + break + } + x = x >> 1 + } + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeSIMDByteToSIMDByte(n *NodeImpl) (err error) { + if n.Instruction != VCNT { + return errorEncodingUnsupported(n) + } + + srcRegBits, err := floatRegisterBits(n.SrcReg) + if err != nil { + return err + } + + dstRegBits, err := floatRegisterBits(n.DstReg) + if err != nil { + return err + } + + // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/CNT--Population-Count-per-byte- + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + 0b010110_00 | srcRegBits>>3, + 0b00_100000, + 0b1110, + }) + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeSIMDByteToRegister(n *NodeImpl) (err error) { + if n.Instruction != VUADDLV { + return errorEncodingUnsupported(n) + } + + srcRegBits, err := floatRegisterBits(n.SrcReg) + if err != nil { + return err + } + + dstRegBits, err := floatRegisterBits(n.DstReg) + if err != nil { + return err + } + + // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/UADDLV--Unsigned-sum-Long-across-Vector- + a.Buf.Write([]byte{ + (srcRegBits << 5) | dstRegBits, + 0b001110_00 | srcRegBits>>3, + 0b00_110000, + 0b101110, + }) + return +} + +// Exported for inter-op testing with golang-asm. +// TODO: unexport after golang-asm complete removal. +func (a *AssemblerImpl) EncodeTwoSIMDBytesToSIMDByteRegister(n *NodeImpl) (err error) { + if n.Instruction != VBIT { + return errorEncodingUnsupported(n) + } + + src1RegBits, err := floatRegisterBits(n.SrcReg) + if err != nil { + return err + } + + src2RegBits, err := floatRegisterBits(n.SrcReg2) + if err != nil { + return err + } + + dstRegBits, err := floatRegisterBits(n.DstReg) + if err != nil { + return err + } + + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BIT--Bitwise-Insert-if-True-?lang=en + a.Buf.Write([]byte{ + (src2RegBits << 5) | dstRegBits, + 0b000111_00 | src2RegBits>>3, + 0b101_00000 | src1RegBits, + 0b00101110, + }) + return +} + +var zeroRegisterBits byte = 0b11111 + +func isIntRegister(r asm.Register) bool { + return REG_R0 <= r && r <= REGZERO +} + +func isFloatRegister(r asm.Register) bool { + return REG_F0 <= r && r <= REG_F31 +} + +func isConditionalRegister(r asm.Register) bool { + return REG_COND_EQ <= r && r <= REG_COND_NV +} + +func intRegisterBits(r asm.Register) (ret byte, err error) { + if !isIntRegister(r) { + err = fmt.Errorf("%s is not integer", RegisterName(r)) + } else { + ret = byte((r - REG_R0)) + } + return +} + +func floatRegisterBits(r asm.Register) (ret byte, err error) { + if !isFloatRegister(r) { + err = fmt.Errorf("%s is not float", RegisterName(r)) + } else { + ret = byte((r - REG_F0)) + } + return +} + +func registerBits(r asm.Register) (ret byte) { + if isIntRegister(r) { + ret = byte((r - REG_R0)) + } else { + ret = byte((r - REG_F0)) + } + return +} diff --git a/internal/asm/arm64/impl_test.go b/internal/asm/arm64/impl_test.go new file mode 100644 index 0000000000..6f1cc2caff --- /dev/null +++ b/internal/asm/arm64/impl_test.go @@ -0,0 +1,416 @@ +package asm_arm64 + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/tetratelabs/wazero/internal/asm" +) + +func TestNodeImpl_AssignJumpTarget(t *testing.T) { + n := &NodeImpl{} + target := &NodeImpl{} + n.AssignJumpTarget(target) + require.Equal(t, n.JumpTarget, target) +} + +func TestNodeImpl_AssignDestinationConstant(t *testing.T) { + n := &NodeImpl{} + n.AssignDestinationConstant(12345) + require.Equal(t, int64(12345), n.DstConst) +} + +func TestNodeImpl_AssignSourceConstant(t *testing.T) { + n := &NodeImpl{} + n.AssignSourceConstant(12345) + require.Equal(t, int64(12345), n.SrcConst) +} + +func TestNodeImpl_String(t *testing.T) { + for _, tc := range []struct { + in *NodeImpl + exp string + }{ + { + in: &NodeImpl{Instruction: NOP, Types: OperandTypesNoneToNone}, + exp: "NOP", + }, + { + in: &NodeImpl{Instruction: BEQ, Types: OperandTypesNoneToRegister, DstReg: REG_R1}, + exp: "BEQ R1", + }, + { + in: &NodeImpl{Instruction: BNE, Types: OperandTypesNoneToMemory, DstReg: REG_R1, DstConst: 0x1234}, + exp: "BNE [R1 + 0x1234]", + }, + { + in: &NodeImpl{Instruction: BNE, Types: OperandTypesNoneToBranch, JumpTarget: &NodeImpl{Instruction: NOP}}, + exp: "BNE {NOP}", + }, + { + in: &NodeImpl{Instruction: ADD, Types: OperandTypesRegisterToRegister, SrcReg: REG_F0, DstReg: REG_F10}, + exp: "ADD F0, F10", + }, + { + in: &NodeImpl{Instruction: ADD, Types: OperandTypesLeftShiftedRegisterToRegister, + SrcReg: REG_R0, SrcReg2: REG_R11, SrcConst: 4, DstReg: REG_R10}, + exp: "ADD (R0, R11 << 4), R10", + }, + { + in: &NodeImpl{Instruction: ADD, Types: OperandTypesTwoRegistersToRegister, SrcReg: REG_R0, SrcReg2: REG_R8, DstReg: REG_R10}, + exp: "ADD (R0, R8), R10", + }, + { + in: &NodeImpl{Instruction: MSUB, Types: OperandTypesThreeRegistersToRegister, + SrcReg: REG_R0, SrcReg2: REG_R8, DstReg: REG_R10, DstReg2: REG_R1}, + exp: "MSUB (R0, R8, R10), R1)", + }, + { + in: &NodeImpl{Instruction: CMPW, Types: OperandTypesTwoRegistersToNone, SrcReg: REG_R0, SrcReg2: REG_R8}, + exp: "CMPW (R0, R8)", + }, + { + in: &NodeImpl{Instruction: CMP, Types: OperandTypesRegisterAndConstToNone, SrcReg: REG_R0, SrcConst: 0x123}, + exp: "CMP (R0, 0x123)", + }, + { + in: &NodeImpl{Instruction: MOVD, Types: OperandTypesRegisterToMemory, SrcReg: REG_R0, DstReg: REG_R8, DstConst: 0x123}, + exp: "MOVD R0, [R8 + 0x123]", + }, + { + in: &NodeImpl{Instruction: MOVD, Types: OperandTypesRegisterToMemory, SrcReg: REG_R0, DstReg: REG_R8, DstReg2: REG_R6}, + exp: "MOVD R0, [R8 + R6]", + }, + { + in: &NodeImpl{Instruction: MOVD, Types: OperandTypesMemoryToRegister, SrcReg: REG_R0, SrcConst: 0x123, DstReg: REG_R8}, + exp: "MOVD [R0 + 0x123], R8", + }, + { + in: &NodeImpl{Instruction: MOVD, Types: OperandTypesMemoryToRegister, SrcReg: REG_R0, SrcReg2: REG_R6, DstReg: REG_R8}, + exp: "MOVD [R0 + R6], R8", + }, + { + in: &NodeImpl{Instruction: MOVD, Types: OperandTypesConstToRegister, SrcConst: 0x123, DstReg: REG_R8}, + exp: "MOVD 0x123, R8", + }, + { + in: &NodeImpl{Instruction: VCNT, Types: OperandTypesSIMDByteToSIMDByte, SrcReg: REG_F1, DstReg: REG_F2}, + exp: "VCNT F1.B8, F2.B8", + }, + { + in: &NodeImpl{Instruction: VUADDLV, Types: OperandTypesSIMDByteToRegister, SrcReg: REG_F1, DstReg: REG_F2}, + exp: "VUADDLV F1.B8, F2", + }, + { + in: &NodeImpl{Instruction: VBIT, Types: OperandTypesTwoSIMDBytesToSIMDByteRegister, SrcReg: REG_F1, SrcReg2: REG_F2, DstReg: REG_F3}, + exp: "VBIT (F1.B8, F2.B8), F3.B8", + }, + } { + require.Equal(t, tc.exp, tc.in.String()) + } +} + +func TestAssemblerImpl_addNode(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + + root := &NodeImpl{} + a.addNode(root) + require.Equal(t, a.Root, root) + require.Equal(t, a.Current, root) + require.Nil(t, root.Next) + + next := &NodeImpl{} + a.addNode(next) + require.Equal(t, a.Root, root) + require.Equal(t, a.Current, next) + require.Equal(t, next, root.Next) + require.Nil(t, next.Next) +} + +func TestAssemblerImpl_newNode(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + actual := a.newNode(MOVD, OperandTypesMemoryToRegister) + require.Equal(t, MOVD, actual.Instruction) + require.Equal(t, OperandTypeMemory, actual.Types.src) + require.Equal(t, OperandTypeRegister, actual.Types.dst) + require.Equal(t, actual, a.Root) + require.Equal(t, actual, a.Current) +} + +func TestAssemblerImpl_CompileStandAlone(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileStandAlone(RET) + actualNode := a.Current + require.Equal(t, RET, actualNode.Instruction) + require.Equal(t, OperandTypeNone, actualNode.Types.src) + require.Equal(t, OperandTypeNone, actualNode.Types.dst) +} + +func TestAssemblerImpl_CompileConstToRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileConstToRegister(MOVD, 1000, REG_R10) + actualNode := a.Current + require.Equal(t, MOVD, actualNode.Instruction) + require.Equal(t, int64(1000), actualNode.SrcConst) + require.Equal(t, REG_R10, actualNode.DstReg) + require.Equal(t, OperandTypeConst, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func TestAssemblerImpl_CompileRegisterToRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileRegisterToRegister(MOVD, REG_R15, REG_R27) + actualNode := a.Current + require.Equal(t, MOVD, actualNode.Instruction) + require.Equal(t, REG_R15, actualNode.SrcReg) + require.Equal(t, REG_R27, actualNode.DstReg) + require.Equal(t, OperandTypeRegister, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func TestAssemblerImpl_CompileMemoryToRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileMemoryToRegister(MOVD, REG_R15, 100, REG_R27) + actualNode := a.Current + require.Equal(t, MOVD, actualNode.Instruction) + require.Equal(t, REG_R15, actualNode.SrcReg) + require.Equal(t, int64(100), actualNode.SrcConst) + require.Equal(t, REG_R27, actualNode.DstReg) + require.Equal(t, OperandTypeMemory, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func TestAssemblerImpl_CompileRegisterToMemory(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileRegisterToMemory(MOVD, REG_R15, REG_R27, 100) + actualNode := a.Current + require.Equal(t, MOVD, actualNode.Instruction) + require.Equal(t, REG_R15, actualNode.SrcReg) + require.Equal(t, REG_R27, actualNode.DstReg) + require.Equal(t, int64(100), actualNode.DstConst) + require.Equal(t, OperandTypeRegister, actualNode.Types.src) + require.Equal(t, OperandTypeMemory, actualNode.Types.dst) +} + +func TestAssemblerImpl_CompileJump(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileJump(B) + actualNode := a.Current + require.Equal(t, B, actualNode.Instruction) + require.Equal(t, OperandTypeNone, actualNode.Types.src) + require.Equal(t, OperandTypeBranch, actualNode.Types.dst) +} + +func TestAssemblerImpl_CompileJumpToRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileJumpToRegister(BNE, REG_R27) + actualNode := a.Current + require.Equal(t, BNE, actualNode.Instruction) + require.Equal(t, REG_R27, actualNode.DstReg) + require.Equal(t, OperandTypeNone, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func TestAssemblerImpl_CompileJumpToMemory(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileJumpToMemory(BNE, REG_R27) + actualNode := a.Current + require.Equal(t, BNE, actualNode.Instruction) + require.Equal(t, REG_R27, actualNode.DstReg) + require.Equal(t, OperandTypeNone, actualNode.Types.src) + require.Equal(t, OperandTypeMemory, actualNode.Types.dst) +} + +func TestAssemblerImpl_CompileReadInstructionAddress(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileReadInstructionAddress(REG_R10, RET) + actualNode := a.Current + require.Equal(t, ADR, actualNode.Instruction) + require.Equal(t, REG_R10, actualNode.DstReg) + require.Equal(t, OperandTypeMemory, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) + require.Equal(t, RET, actualNode.readInstructionAddressBeforeTargetInstruction) +} + +func Test_CompileMemoryWithRegisterOffsetToRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileMemoryWithRegisterOffsetToRegister(MOVD, REG_R27, REG_R10, REG_R0) + actualNode := a.Current + require.Equal(t, MOVD, actualNode.Instruction) + require.Equal(t, REG_R27, actualNode.SrcReg) + require.Equal(t, REG_R10, actualNode.SrcReg2) + require.Equal(t, REG_R0, actualNode.DstReg) + require.Equal(t, OperandTypeMemory, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func Test_CompileRegisterToMemoryWithRegisterOffset(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileRegisterToMemoryWithRegisterOffset(MOVD, REG_R27, REG_R10, REG_R0) + actualNode := a.Current + require.Equal(t, MOVD, actualNode.Instruction) + require.Equal(t, REG_R27, actualNode.SrcReg) + require.Equal(t, REG_R10, actualNode.DstReg) + require.Equal(t, REG_R0, actualNode.DstReg2) + require.Equal(t, OperandTypeRegister, actualNode.Types.src) + require.Equal(t, OperandTypeMemory, actualNode.Types.dst) +} + +func Test_CompileTwoRegistersToRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileTwoRegistersToRegister(MOVD, REG_R27, REG_R10, REG_R0) + actualNode := a.Current + require.Equal(t, MOVD, actualNode.Instruction) + require.Equal(t, REG_R27, actualNode.SrcReg) + require.Equal(t, REG_R10, actualNode.SrcReg2) + require.Equal(t, REG_R0, actualNode.DstReg) + require.Equal(t, OperandTypeTwoRegisters, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func Test_CompileThreeRegistersToRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileThreeRegistersToRegister(MOVD, REG_R27, REG_R10, REG_R0, REG_R28) + actualNode := a.Current + require.Equal(t, MOVD, actualNode.Instruction) + require.Equal(t, REG_R27, actualNode.SrcReg) + require.Equal(t, REG_R10, actualNode.SrcReg2) + require.Equal(t, REG_R0, actualNode.DstReg) + require.Equal(t, REG_R28, actualNode.DstReg2) + require.Equal(t, OperandTypeThreeRegisters, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func Test_CompileTwoRegistersToNone(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileTwoRegistersToNone(CMP, REG_R27, REG_R10) + actualNode := a.Current + require.Equal(t, CMP, actualNode.Instruction) + require.Equal(t, REG_R27, actualNode.SrcReg) + require.Equal(t, REG_R10, actualNode.SrcReg2) + require.Equal(t, OperandTypeTwoRegisters, actualNode.Types.src) + require.Equal(t, OperandTypeNone, actualNode.Types.dst) +} + +func Test_CompileRegisterAndConstToNone(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileRegisterAndConstToNone(CMP, REG_R27, 10) + actualNode := a.Current + require.Equal(t, CMP, actualNode.Instruction) + require.Equal(t, REG_R27, actualNode.SrcReg) + require.Equal(t, int64(10), actualNode.SrcConst) + require.Equal(t, OperandTypeRegisterAndConst, actualNode.Types.src) + require.Equal(t, OperandTypeNone, actualNode.Types.dst) +} + +func Test_CompileLeftShiftedRegisterToRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileLeftShiftedRegisterToRegister(ADD, REG_R27, 10, REG_R28, REG_R5) + actualNode := a.Current + require.Equal(t, ADD, actualNode.Instruction) + require.Equal(t, REG_R28, actualNode.SrcReg) + require.Equal(t, REG_R27, actualNode.SrcReg2) + require.Equal(t, int64(10), actualNode.SrcConst) + require.Equal(t, REG_R5, actualNode.DstReg) + require.Equal(t, OperandTypeLeftShiftedRegister, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func Test_CompileSIMDByteToSIMDByte(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileSIMDByteToSIMDByte(VCNT, REG_F0, REG_F2) + actualNode := a.Current + require.Equal(t, VCNT, actualNode.Instruction) + require.Equal(t, REG_F0, actualNode.SrcReg) + require.Equal(t, REG_F2, actualNode.DstReg) + require.Equal(t, OperandTypeSIMDByte, actualNode.Types.src) + require.Equal(t, OperandTypeSIMDByte, actualNode.Types.dst) +} + +func Test_CompileTwoSIMDBytesToSIMDByteRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileTwoSIMDBytesToSIMDByteRegister(VBIT, REG_F0, REG_F10, REG_F2) + actualNode := a.Current + require.Equal(t, VBIT, actualNode.Instruction) + require.Equal(t, REG_F0, actualNode.SrcReg) + require.Equal(t, REG_F10, actualNode.SrcReg2) + require.Equal(t, REG_F2, actualNode.DstReg) + require.Equal(t, OperandTypeTwoSIMDBytes, actualNode.Types.src) + require.Equal(t, OperandTypeSIMDByte, actualNode.Types.dst) +} + +func Test_CompileSIMDByteToRegister(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileSIMDByteToRegister(VUADDLV, REG_F0, REG_F10) + actualNode := a.Current + require.Equal(t, VUADDLV, actualNode.Instruction) + require.Equal(t, REG_F0, actualNode.SrcReg) + require.Equal(t, REG_F10, actualNode.DstReg) + require.Equal(t, OperandTypeSIMDByte, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func Test_CompileConditionalRegisterSet(t *testing.T) { + a := NewAssemblerImpl(REG_R10) + a.CompileConditionalRegisterSet(COND_NE, REG_R10) + actualNode := a.Current + require.Equal(t, CSET, actualNode.Instruction) + require.Equal(t, REG_COND_NE, actualNode.SrcReg) + require.Equal(t, REG_R10, actualNode.DstReg) + require.Equal(t, OperandTypeRegister, actualNode.Types.src) + require.Equal(t, OperandTypeRegister, actualNode.Types.dst) +} + +func Test_checkRegisterToRegisterType(t *testing.T) { + for _, tc := range []struct { + src, dst asm.Register + requireSrcInt, requireDstInt bool + expErr string + }{ + {src: REG_R10, dst: REG_R30, requireSrcInt: true, requireDstInt: true, expErr: ""}, + {src: REG_R10, dst: REG_R30, requireSrcInt: false, requireDstInt: true, expErr: "src requires float register but got R10"}, + {src: REG_R10, dst: REG_R30, requireSrcInt: false, requireDstInt: false, expErr: "src requires float register but got R10"}, + {src: REG_R10, dst: REG_R30, requireSrcInt: true, requireDstInt: false, expErr: "dst requires float register but got R30"}, + + {src: REG_R10, dst: REG_F30, requireSrcInt: true, requireDstInt: false, expErr: ""}, + {src: REG_R10, dst: REG_F30, requireSrcInt: false, requireDstInt: true, expErr: "src requires float register but got R10"}, + {src: REG_R10, dst: REG_F30, requireSrcInt: false, requireDstInt: false, expErr: "src requires float register but got R10"}, + {src: REG_R10, dst: REG_F30, requireSrcInt: true, requireDstInt: true, expErr: "dst requires int register but got F30"}, + + {src: REG_F10, dst: REG_R30, requireSrcInt: false, requireDstInt: true, expErr: ""}, + {src: REG_F10, dst: REG_R30, requireSrcInt: true, requireDstInt: true, expErr: "src requires int register but got F10"}, + {src: REG_F10, dst: REG_R30, requireSrcInt: true, requireDstInt: false, expErr: "src requires int register but got F10"}, + {src: REG_F10, dst: REG_R30, requireSrcInt: false, requireDstInt: false, expErr: "dst requires float register but got R30"}, + + {src: REG_F10, dst: REG_F30, requireSrcInt: false, requireDstInt: false, expErr: ""}, + {src: REG_F10, dst: REG_F30, requireSrcInt: true, requireDstInt: false, expErr: "src requires int register but got F10"}, + {src: REG_F10, dst: REG_F30, requireSrcInt: true, requireDstInt: true, expErr: "src requires int register but got F10"}, + {src: REG_F10, dst: REG_F30, requireSrcInt: false, requireDstInt: true, expErr: "dst requires int register but got F30"}, + } { + actual := checkRegisterToRegisterType(tc.src, tc.dst, tc.requireSrcInt, tc.requireDstInt) + if tc.expErr != "" { + require.EqualError(t, actual, tc.expErr) + } else { + require.NoError(t, actual) + } + } +} + +func Test_validateMemoryOffset(t *testing.T) { + for _, tc := range []struct { + offset int64 + expErr string + }{ + {offset: 0}, {offset: -256}, {offset: 255}, {offset: 123 * 8}, + {offset: -257, expErr: "negative memory offset must be larget than or equal -256 but got -257"}, + {offset: 257, expErr: "large memory offset (>255) must be a multiple of 8 but got 257"}, + } { + actual := validateMemoryOffset(tc.offset) + if tc.expErr == "" { + require.NoError(t, actual) + } else { + require.EqualError(t, actual, tc.expErr) + } + } +} diff --git a/internal/asm/arm64_debug/debug_assembler.go b/internal/asm/arm64_debug/debug_assembler.go new file mode 100644 index 0000000000..ab2768c182 --- /dev/null +++ b/internal/asm/arm64_debug/debug_assembler.go @@ -0,0 +1,256 @@ +package arm64debug + +import ( + "bytes" + "encoding/hex" + "fmt" + + "github.com/tetratelabs/wazero/internal/asm" + asm_arm64 "github.com/tetratelabs/wazero/internal/asm/arm64" + "github.com/tetratelabs/wazero/internal/asm/golang_asm" +) + +// NewDebugAssembler can be used for ensuring that our assembler produces exactly the same binary as Go. +// Disabled by default, but assigning this to NewAssembler allows us to debug assembler's bug. +// +// TODO: this will be removed after golang-asm removal. +// Note: this is intentionally exported in order to suppress bunch of "unused" lint errors on this function, testAssembler and testNode. +func NewDebugAssembler(temporaryRegister asm.Register) (asm_arm64.Assembler, error) { + goasm, err := newAssembler(temporaryRegister) + if err != nil { + return nil, err + } + a := asm_arm64.NewAssemblerImpl(temporaryRegister) + return &testAssembler{a: a, goasm: goasm}, nil +} + +// testAssembler implements Assembler. +// This assembler ensures that our assembler produces exactly the same binary as the Go's official assembler. +// Disabled by default, and can be used for debugging only. +// +// TODO: this will be removed after golang-asm removal. +type testAssembler struct { + goasm *assemblerGoAsmImpl + a *asm_arm64.AssemblerImpl +} + +// testNode implements asm.Node for the usage with testAssembler. +// +// TODO: this will be removed after golang-asm removal. +type testNode struct { + n *asm_arm64.NodeImpl + goasm *golang_asm.GolangAsmNode +} + +// String implements fmt.Stringer. +func (tn *testNode) String() string { + return tn.n.String() +} + +// AssignJumpTarget implements the same method as documented on asm.Node. +func (tn *testNode) AssignJumpTarget(target asm.Node) { + targetTestNode := target.(*testNode) + tn.goasm.AssignJumpTarget(targetTestNode.goasm) + tn.n.AssignJumpTarget(targetTestNode.n) +} + +// AssignDestinationConstant implements the same method as documented on asm.Node. +func (tn *testNode) AssignDestinationConstant(value asm.ConstantValue) { + tn.goasm.AssignDestinationConstant(value) + tn.n.AssignDestinationConstant(value) +} + +// AssignSourceConstant implements the same method as documented on asm.Node. +func (tn *testNode) AssignSourceConstant(value asm.ConstantValue) { + tn.goasm.AssignSourceConstant(value) + tn.n.AssignSourceConstant(value) +} + +// OffsetInBinary implements the same method as documented on asm.Node. +func (tn *testNode) OffsetInBinary() asm.NodeOffsetInBinary { + return tn.goasm.OffsetInBinary() +} + +// Assemble implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) Assemble() ([]byte, error) { + ret, err := ta.goasm.Assemble() + if err != nil { + return nil, err + } + + a, err := ta.a.Assemble() + if err != nil { + return nil, fmt.Errorf("homemade assembler failed: %w", err) + } + + if !bytes.Equal(ret, a) { + expected := hex.EncodeToString(ret) + actual := hex.EncodeToString(a) + return nil, fmt.Errorf("expected (len=%d): %s\nactual(len=%d): %s", len(expected), expected, len(actual), actual) + } + return ret, nil +} + +// SetJumpTargetOnNext implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) SetJumpTargetOnNext(nodes ...asm.Node) { + for _, n := range nodes { + targetTestNode := n.(*testNode) + ta.goasm.SetJumpTargetOnNext(targetTestNode.goasm) + ta.a.SetJumpTargetOnNext(targetTestNode.n) + } +} + +// BuildJumpTable implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) BuildJumpTable(table []byte, initialInstructions []asm.Node) { + ta.goasm.BuildJumpTable(table, initialInstructions) + ta.a.BuildJumpTable(table, initialInstructions) +} + +// CompileStandAlone implements Assembler.CompileStandAlone. +func (ta *testAssembler) CompileStandAlone(instruction asm.Instruction) asm.Node { + ret := ta.goasm.CompileStandAlone(instruction) + ret2 := ta.a.CompileStandAlone(instruction) + return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_arm64.NodeImpl)} +} + +// CompileConstToRegister implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) CompileConstToRegister( + instruction asm.Instruction, + value asm.ConstantValue, + destinationReg asm.Register, +) asm.Node { + ret := ta.goasm.CompileConstToRegister(instruction, value, destinationReg) + ret2 := ta.a.CompileConstToRegister(instruction, value, destinationReg) + return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_arm64.NodeImpl)} +} + +// CompileRegisterToRegister implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) { + ta.goasm.CompileRegisterToRegister(instruction, from, to) + ta.a.CompileRegisterToRegister(instruction, from, to) +} + +// CompileMemoryToRegister implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) CompileMemoryToRegister( + instruction asm.Instruction, + sourceBaseReg asm.Register, + sourceOffsetConst asm.ConstantValue, + destinationReg asm.Register, +) { + ta.goasm.CompileMemoryToRegister(instruction, sourceBaseReg, sourceOffsetConst, destinationReg) + ta.a.CompileMemoryToRegister(instruction, sourceBaseReg, sourceOffsetConst, destinationReg) +} + +// CompileRegisterToMemory implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) CompileRegisterToMemory( + instruction asm.Instruction, + sourceRegister, destinationBaseRegister asm.Register, + destinationOffsetConst asm.ConstantValue, +) { + ta.goasm.CompileRegisterToMemory(instruction, sourceRegister, destinationBaseRegister, destinationOffsetConst) + ta.a.CompileRegisterToMemory(instruction, sourceRegister, destinationBaseRegister, destinationOffsetConst) +} + +// CompileJump implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) CompileJump(jmpInstruction asm.Instruction) asm.Node { + ret := ta.goasm.CompileJump(jmpInstruction) + ret2 := ta.a.CompileJump(jmpInstruction) + return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_arm64.NodeImpl)} +} + +// CompileJumpToMemory implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register) { + ta.goasm.CompileJumpToMemory(jmpInstruction, baseReg) + ta.a.CompileJumpToMemory(jmpInstruction, baseReg) +} + +// CompileJumpToRegister implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) { + ta.goasm.CompileJumpToRegister(jmpInstruction, reg) + ta.a.CompileJumpToRegister(jmpInstruction, reg) +} + +// CompileReadInstructionAddress implements the same method as documented on asm_arm64.Assembler. +func (ta *testAssembler) CompileReadInstructionAddress( + destinationRegister asm.Register, + beforeAcquisitionTargetInstruction asm.Instruction, +) { + ta.goasm.CompileReadInstructionAddress(destinationRegister, beforeAcquisitionTargetInstruction) + ta.a.CompileReadInstructionAddress(destinationRegister, beforeAcquisitionTargetInstruction) +} + +func (ta *testAssembler) CompileMemoryWithRegisterOffsetToRegister( + instruction asm.Instruction, + srcBaseReg, srcOffsetReg, dstReg asm.Register, +) { + ta.goasm.CompileMemoryWithRegisterOffsetToRegister(instruction, srcBaseReg, srcOffsetReg, dstReg) + ta.a.CompileMemoryWithRegisterOffsetToRegister(instruction, srcBaseReg, srcOffsetReg, dstReg) +} + +func (ta *testAssembler) CompileRegisterToMemoryWithRegisterOffset( + instruction asm.Instruction, + srcReg, dstBaseReg, dstOffsetReg asm.Register, +) { + ta.goasm.CompileRegisterToMemoryWithRegisterOffset(instruction, srcReg, dstBaseReg, dstOffsetReg) + ta.a.CompileRegisterToMemoryWithRegisterOffset(instruction, srcReg, dstBaseReg, dstOffsetReg) +} + +func (ta *testAssembler) CompileTwoRegistersToRegister(instruction asm.Instruction, src1, src2, dst asm.Register) { + ta.goasm.CompileTwoRegistersToRegister(instruction, src1, src2, dst) + ta.a.CompileTwoRegistersToRegister(instruction, src1, src2, dst) +} + +func (ta *testAssembler) CompileThreeRegistersToRegister( + instruction asm.Instruction, + src1, src2, dst1, dst2 asm.Register, +) { + ta.goasm.CompileThreeRegistersToRegister(instruction, src1, src2, dst1, dst2) + ta.a.CompileThreeRegistersToRegister(instruction, src1, src2, dst1, dst2) +} + +func (ta *testAssembler) CompileTwoRegistersToNone(instruction asm.Instruction, src1, src2 asm.Register) { + ta.goasm.CompileTwoRegistersToNone(instruction, src1, src2) + ta.a.CompileTwoRegistersToNone(instruction, src1, src2) +} + +func (ta *testAssembler) CompileRegisterAndConstToNone( + instruction asm.Instruction, + src asm.Register, + srcConst asm.ConstantValue, +) { + ta.goasm.CompileRegisterAndConstToNone(instruction, src, srcConst) + ta.a.CompileRegisterAndConstToNone(instruction, src, srcConst) +} + +func (ta *testAssembler) CompileLeftShiftedRegisterToRegister( + instruction asm.Instruction, + shiftedSourceReg asm.Register, + shiftNum asm.ConstantValue, + srcReg, dstReg asm.Register, +) { + ta.goasm.CompileLeftShiftedRegisterToRegister(instruction, shiftedSourceReg, shiftNum, srcReg, dstReg) + ta.a.CompileLeftShiftedRegisterToRegister(instruction, shiftedSourceReg, shiftNum, srcReg, dstReg) +} + +func (ta *testAssembler) CompileSIMDByteToSIMDByte(instruction asm.Instruction, srcReg, dstReg asm.Register) { + ta.goasm.CompileSIMDByteToSIMDByte(instruction, srcReg, dstReg) + ta.a.CompileSIMDByteToSIMDByte(instruction, srcReg, dstReg) +} + +func (ta *testAssembler) CompileTwoSIMDBytesToSIMDByteRegister( + instruction asm.Instruction, + srcReg1, srcReg2, dstReg asm.Register, +) { + ta.goasm.CompileTwoSIMDBytesToSIMDByteRegister(instruction, srcReg1, srcReg2, dstReg) + ta.a.CompileTwoSIMDBytesToSIMDByteRegister(instruction, srcReg1, srcReg2, dstReg) +} + +func (ta *testAssembler) CompileSIMDByteToRegister(instruction asm.Instruction, srcReg, dstReg asm.Register) { + ta.goasm.CompileSIMDByteToRegister(instruction, srcReg, dstReg) + ta.a.CompileSIMDByteToRegister(instruction, srcReg, dstReg) +} + +func (ta *testAssembler) CompileConditionalRegisterSet(cond asm.ConditionalRegisterState, dstReg asm.Register) { + ta.goasm.CompileConditionalRegisterSet(cond, dstReg) + ta.a.CompileConditionalRegisterSet(cond, dstReg) +} diff --git a/internal/asm/arm64/golang_asm.go b/internal/asm/arm64_debug/golang_asm.go similarity index 51% rename from internal/asm/arm64/golang_asm.go rename to internal/asm/arm64_debug/golang_asm.go index 7901e0d812..187cbbe7c5 100644 --- a/internal/asm/arm64/golang_asm.go +++ b/internal/asm/arm64_debug/golang_asm.go @@ -1,4 +1,4 @@ -package asm_arm64 +package arm64debug import ( "fmt" @@ -8,16 +8,25 @@ import ( "github.com/twitchyliquid64/golang-asm/obj/arm64" "github.com/tetratelabs/wazero/internal/asm" + asm_arm64 "github.com/tetratelabs/wazero/internal/asm/arm64" "github.com/tetratelabs/wazero/internal/asm/golang_asm" ) -// assemblerGoAsmImpl implements Assembler for golang-asm library. +// TODO: this comment seems wrong +// newAssembler implements asm.NewAssembler and is used by default. +// This returns an implementation of Assembler interface via our homemade assembler implementation. +func newAssembler(temporaryRegister asm.Register) (*assemblerGoAsmImpl, error) { + g, err := golang_asm.NewGolangAsmBaseAssembler("arm64") + return &assemblerGoAsmImpl{GolangAsmBaseAssembler: g, temporaryRegister: temporaryRegister}, err +} + +// assemblerGoAsmImpl implements asm_arm64.Assembler for golang-asm library. type assemblerGoAsmImpl struct { *golang_asm.GolangAsmBaseAssembler temporaryRegister asm.Register } -// CompileConstToRegister implements Assembler.CompileConstToRegisterInstruction. +// CompileConstToRegister implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileConstToRegister(instruction asm.Instruction, constValue asm.ConstantValue, destinationReg asm.Register) asm.Node { inst := a.NewProg() inst.As = castAsGolangAsmInstruction[instruction] @@ -38,27 +47,19 @@ func (a *assemblerGoAsmImpl) CompileConstToRegister(instruction asm.Instruction, return golang_asm.NewGolangAsmNode(inst) } -// CompileMemoryToRegister implements AssemblerBase.CompileMemoryToRegister. +// CompileMemoryToRegister implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileMemoryToRegister(instruction asm.Instruction, sourceBaseReg asm.Register, sourceOffsetConst asm.ConstantValue, destinationReg asm.Register) { - if sourceOffsetConst > math.MaxInt16 { - // The assembler can take care of offsets larger than 2^15-1 by emitting additional instructions to load such large offset, - // but it uses "its" temporary register which we cannot track. Therefore, we avoid directly emitting memory load with large offsets, - // but instead load the constant manually to "our" temporary register, then emit the load with it. - a.CompileConstToRegister(MOVD, sourceOffsetConst, a.temporaryRegister) - a.CompileMemoryWithRegisterOffsetToRegister(instruction, sourceBaseReg, a.temporaryRegister, destinationReg) - } else { - inst := a.NewProg() - inst.As = castAsGolangAsmInstruction[instruction] - inst.From.Type = obj.TYPE_MEM - inst.From.Reg = castAsGolangAsmRegister[sourceBaseReg] - inst.From.Offset = sourceOffsetConst - inst.To.Type = obj.TYPE_REG - inst.To.Reg = castAsGolangAsmRegister[destinationReg] - a.AddInstruction(inst) - } + inst := a.NewProg() + inst.As = castAsGolangAsmInstruction[instruction] + inst.From.Type = obj.TYPE_MEM + inst.From.Reg = castAsGolangAsmRegister[sourceBaseReg] + inst.From.Offset = sourceOffsetConst + inst.To.Type = obj.TYPE_REG + inst.To.Reg = castAsGolangAsmRegister[destinationReg] + a.AddInstruction(inst) } -// CompileMemoryWithRegisterOffsetToRegister implements Assembler.CompileMemoryWithRegisterOffsetToRegister. +// CompileMemoryWithRegisterOffsetToRegister implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileMemoryWithRegisterOffsetToRegister(instruction asm.Instruction, sourceBaseReg, sourceOffsetReg, destinationReg asm.Register) { inst := a.NewProg() inst.As = castAsGolangAsmInstruction[instruction] @@ -71,24 +72,16 @@ func (a *assemblerGoAsmImpl) CompileMemoryWithRegisterOffsetToRegister(instructi a.AddInstruction(inst) } -// CompileRegisterToMemory implements Assembler.CompileRegisterToMemory. +// CompileRegisterToMemory implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileRegisterToMemory(instruction asm.Instruction, sourceReg asm.Register, destinationBaseReg asm.Register, destinationOffsetConst asm.ConstantValue) { - if destinationOffsetConst > math.MaxInt16 { - // The assembler can take care of offsets larger than 2^15-1 by emitting additional instructions to load such large offset, - // but we cannot track its temporary register. Therefore, we avoid directly emitting memory load with large offsets: - // load the constant manually to "our" temporary register, then emit the load with it. - a.CompileConstToRegister(MOVD, destinationOffsetConst, a.temporaryRegister) - a.CompileRegisterToMemoryWithRegisterOffset(instruction, sourceReg, destinationBaseReg, a.temporaryRegister) - } else { - inst := a.NewProg() - inst.As = castAsGolangAsmInstruction[instruction] - inst.To.Type = obj.TYPE_MEM - inst.To.Reg = castAsGolangAsmRegister[destinationBaseReg] - inst.To.Offset = destinationOffsetConst - inst.From.Type = obj.TYPE_REG - inst.From.Reg = castAsGolangAsmRegister[sourceReg] - a.AddInstruction(inst) - } + inst := a.NewProg() + inst.As = castAsGolangAsmInstruction[instruction] + inst.To.Type = obj.TYPE_MEM + inst.To.Reg = castAsGolangAsmRegister[destinationBaseReg] + inst.To.Offset = destinationOffsetConst + inst.From.Type = obj.TYPE_REG + inst.From.Reg = castAsGolangAsmRegister[sourceReg] + a.AddInstruction(inst) } // CompileRegisterToMemoryWithRegisterOffset implements Assembler.CompileRegisterToMemoryWithRegisterOffset. @@ -104,7 +97,7 @@ func (a *assemblerGoAsmImpl) CompileRegisterToMemoryWithRegisterOffset(instructi a.AddInstruction(inst) } -// CompileRegisterToRegister implements Assembler.CompileRegisterToRegister. +// CompileRegisterToRegister implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) { inst := a.NewProg() inst.As = castAsGolangAsmInstruction[instruction] @@ -115,7 +108,7 @@ func (a *assemblerGoAsmImpl) CompileRegisterToRegister(instruction asm.Instructi a.AddInstruction(inst) } -// CompileTwoRegistersToRegister implements Assembler.CompileTwoRegistersToRegister. +// CompileTwoRegistersToRegister implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileTwoRegistersToRegister(instruction asm.Instruction, src1, src2, destination asm.Register) { inst := a.NewProg() inst.As = castAsGolangAsmInstruction[instruction] @@ -127,25 +120,25 @@ func (a *assemblerGoAsmImpl) CompileTwoRegistersToRegister(instruction asm.Instr a.AddInstruction(inst) } -// CompileTwoRegisters implements Assembler.CompileTwoRegisters. -func (a *assemblerGoAsmImpl) CompileTwoRegisters(instruction asm.Instruction, src1, src2, dst1, dst2 asm.Register) { +// CompileThreeRegistersToRegister implements the same method as documented on asm_arm64.Assembler. +func (a *assemblerGoAsmImpl) CompileThreeRegistersToRegister(instruction asm.Instruction, src1, src2, src3, dst asm.Register) { inst := a.NewProg() inst.As = castAsGolangAsmInstruction[instruction] inst.To.Type = obj.TYPE_REG - inst.To.Reg = castAsGolangAsmRegister[dst1] + inst.To.Reg = castAsGolangAsmRegister[dst] inst.From.Type = obj.TYPE_REG inst.From.Reg = castAsGolangAsmRegister[src1] inst.Reg = castAsGolangAsmRegister[src2] - inst.RestArgs = append(inst.RestArgs, obj.Addr{Type: obj.TYPE_REG, Reg: castAsGolangAsmRegister[dst2]}) + inst.RestArgs = append(inst.RestArgs, obj.Addr{Type: obj.TYPE_REG, Reg: castAsGolangAsmRegister[src3]}) a.AddInstruction(inst) } -// CompileTwoRegistersToNone implements Assembler.CompileTwoRegistersToNone. +// CompileTwoRegistersToNone implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileTwoRegistersToNone(instruction asm.Instruction, src1, src2 asm.Register) { inst := a.NewProg() inst.As = castAsGolangAsmInstruction[instruction] // TYPE_NONE indicates that this instruction doesn't have a destination. - // Note: this line is deletable as the value equals zero in anyway. + // Note: this line is deletable as the value equals zero anyway. inst.To.Type = obj.TYPE_NONE inst.From.Type = obj.TYPE_REG inst.From.Reg = castAsGolangAsmRegister[src1] @@ -153,12 +146,12 @@ func (a *assemblerGoAsmImpl) CompileTwoRegistersToNone(instruction asm.Instructi a.AddInstruction(inst) } -// CompileRegisterAndConstSourceToNone implements Assembler.CompileRegisterAndConstSourceToNone. -func (a *assemblerGoAsmImpl) CompileRegisterAndConstSourceToNone(instruction asm.Instruction, src asm.Register, srcConst asm.ConstantValue) { +// CompileRegisterAndConstToNone implements the same method as documented on asm_arm64.Assembler. +func (a *assemblerGoAsmImpl) CompileRegisterAndConstToNone(instruction asm.Instruction, src asm.Register, srcConst asm.ConstantValue) { inst := a.NewProg() inst.As = castAsGolangAsmInstruction[instruction] // TYPE_NONE indicates that this instruction doesn't have a destination. - // Note: this line is deletable as the value equals zero in anyway. + // Note: this line is deletable as the value equals zero anyway. inst.To.Type = obj.TYPE_NONE inst.From.Type = obj.TYPE_CONST inst.From.Offset = srcConst @@ -166,7 +159,7 @@ func (a *assemblerGoAsmImpl) CompileRegisterAndConstSourceToNone(instruction asm a.AddInstruction(inst) } -// CompileJump implements AssemblerBase.CompileJump. +// CompileJump implements the same method as documented on asm.AssemblerBase. func (a *assemblerGoAsmImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node { br := a.NewProg() br.As = castAsGolangAsmInstruction[jmpInstruction] @@ -175,17 +168,16 @@ func (a *assemblerGoAsmImpl) CompileJump(jmpInstruction asm.Instruction) asm.Nod return golang_asm.NewGolangAsmNode(br) } -// CompileJumpToMemory implements AssemblerBase.CompileJumpToMemory. -func (a *assemblerGoAsmImpl) CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) { +// CompileJumpToMemory implements the same method as documented on asm.AssemblerBase. +func (a *assemblerGoAsmImpl) CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register) { br := a.NewProg() br.As = castAsGolangAsmInstruction[jmpInstruction] br.To.Type = obj.TYPE_MEM br.To.Reg = castAsGolangAsmRegister[baseReg] - br.To.Offset = offset a.AddInstruction(br) } -// CompileJumpToRegister implements AssemblerBase.CompileJumpToRegister. +// CompileJumpToRegister implements the same method as documented on asm.AssemblerBase. func (a *assemblerGoAsmImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) { ret := a.NewProg() ret.As = castAsGolangAsmInstruction[jmpInstruction] @@ -194,7 +186,7 @@ func (a *assemblerGoAsmImpl) CompileJumpToRegister(jmpInstruction asm.Instructio a.AddInstruction(ret) } -// CompileStandAlone implements AssemblerBase.CompileStandAlone. +// CompileStandAlone implements the same method as documented on asm.AssemblerBase. func (a *assemblerGoAsmImpl) CompileStandAlone(instruction asm.Instruction) asm.Node { prog := a.NewProg() prog.As = castAsGolangAsmInstruction[instruction] @@ -202,10 +194,10 @@ func (a *assemblerGoAsmImpl) CompileStandAlone(instruction asm.Instruction) asm. return golang_asm.NewGolangAsmNode(prog) } -// CompileLeftShiftedRegisterToRegister implements Assembler.CompileLeftShiftedRegisterToRegister. -func (a *assemblerGoAsmImpl) CompileLeftShiftedRegisterToRegister(shiftedSourceReg asm.Register, shiftNum asm.ConstantValue, srcReg, destinationReg asm.Register) { +// CompileLeftShiftedRegisterToRegister implements the same method as documented on asm_arm64.Assembler. +func (a *assemblerGoAsmImpl) CompileLeftShiftedRegisterToRegister(instruction asm.Instruction, shiftedSourceReg asm.Register, shiftNum asm.ConstantValue, srcReg, destinationReg asm.Register) { inst := a.NewProg() - inst.As = arm64.AADD + inst.As = castAsGolangAsmInstruction[instruction] inst.To.Type = obj.TYPE_REG inst.To.Reg = castAsGolangAsmRegister[destinationReg] // See https://github.com/twitchyliquid64/golang-asm/blob/v0.15.1/obj/link.go#L120-L131 @@ -215,7 +207,7 @@ func (a *assemblerGoAsmImpl) CompileLeftShiftedRegisterToRegister(shiftedSourceR a.AddInstruction(inst) } -// CompileReadInstructionAddress implements AssemblerBase.CompileReadInstructionAddress. +// CompileReadInstructionAddress implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileReadInstructionAddress(destinationReg asm.Register, beforeAcquisitionTargetInstruction asm.Instruction) { // Emit ADR instruction to read the specified instruction's absolute address. // Note: we cannot emit the "ADR REG, $(target's offset from here)" due to the @@ -247,7 +239,7 @@ func (a *assemblerGoAsmImpl) CompileReadInstructionAddress(destinationReg asm.Re } if target == nil { - return fmt.Errorf("BUG: target instruction not found for read instruction address") + return fmt.Errorf("BUG: target instruction not %s found for read instruction address", asm_arm64.InstructionName(beforeAcquisitionTargetInstruction)) } offset := target.Pc - readAddress.Pc @@ -257,24 +249,22 @@ func (a *assemblerGoAsmImpl) CompileReadInstructionAddress(destinationReg asm.Re return fmt.Errorf("BUG: too large offset for read") } - // Now ready to write an offset byte. v := byte(offset) - // arm64 has 4-bytes = 32-bit fixed-length instruction. - adrInstructionBytes := code[readAddress.Pc : readAddress.Pc+4] + adrInst := code[readAddress.Pc : readAddress.Pc+4] // According to the binary format of ADR instruction in arm64: // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en // // The 0 to 1 bits live on 29 to 30 bits of the instruction. - adrInstructionBytes[3] |= (v & 0b00000011) << 5 + adrInst[3] |= (v & 0b00000011) << 5 // The 2 to 4 bits live on 5 to 7 bits of the instruction. - adrInstructionBytes[0] |= (v & 0b00011100) << 3 + adrInst[0] |= (v & 0b00011100) << 3 // The 5 to 7 bits live on 8 to 10 bits of the instruction. - adrInstructionBytes[1] |= (v & 0b11100000) >> 5 + adrInst[1] |= (v & 0b11100000) >> 5 return nil }) } -// CompileConditionalRegisterSet implements Assembler.CompileConditionalRegisterSet. +// CompileConditionalRegisterSet implements the same method as documented on asm_arm64.Assembler. // // We use CSET instruction to set 1 on the register if the condition satisfies: // https://developer.arm.com/documentation/100076/0100/a64-instruction-set-reference/a64-general-instructions/cset @@ -294,8 +284,8 @@ func simdRegisterForScalarFloatRegister(freg int16) int16 { return freg + (arm64.REG_F31 - arm64.REG_F0) + 1 } -// CompileTwoSIMDByteToRegister implements Assembler.CompileTwoSIMDByteToRegister. -func (a *assemblerGoAsmImpl) CompileTwoSIMDByteToRegister(instruction asm.Instruction, srcReg1, srcReg2, dstReg asm.Register) { +// CompileTwoSIMDBytesToSIMDByteRegister implements the same method as documented on asm_arm64.Assembler. +func (a *assemblerGoAsmImpl) CompileTwoSIMDBytesToSIMDByteRegister(instruction asm.Instruction, srcReg1, srcReg2, dstReg asm.Register) { src1FloatReg, src2FloatReg, dstFloatReg := castAsGolangAsmRegister[srcReg1], castAsGolangAsmRegister[srcReg2], castAsGolangAsmRegister[dstReg] src1VReg, src2VReg, dstVReg := simdRegisterForScalarFloatRegister(src1FloatReg), simdRegisterForScalarFloatRegister(src2FloatReg), simdRegisterForScalarFloatRegister(dstFloatReg) @@ -312,7 +302,7 @@ func (a *assemblerGoAsmImpl) CompileTwoSIMDByteToRegister(instruction asm.Instru } -// CompileSIMDByteToSIMDByte implements Assembler.CompileSIMDByteToSIMDByte. +// CompileSIMDByteToSIMDByte implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileSIMDByteToSIMDByte(instruction asm.Instruction, srcReg, dstReg asm.Register) { srcFloatReg, dstFloatReg := castAsGolangAsmRegister[srcReg], castAsGolangAsmRegister[dstReg] srcVReg, dstVReg := simdRegisterForScalarFloatRegister(srcFloatReg), simdRegisterForScalarFloatRegister(dstFloatReg) @@ -328,7 +318,7 @@ func (a *assemblerGoAsmImpl) CompileSIMDByteToSIMDByte(instruction asm.Instructi a.AddInstruction(inst) } -// CompileSIMDByteToRegister implements Assembler.CompileSIMDByteToRegister. +// CompileSIMDByteToRegister implements the same method as documented on asm_arm64.Assembler. func (a *assemblerGoAsmImpl) CompileSIMDByteToRegister(instruction asm.Instruction, srcReg, dstReg asm.Register) { srcFloatReg, dstFlaotReg := castAsGolangAsmRegister[srcReg], castAsGolangAsmRegister[dstReg] srcVReg, dstVReg := simdRegisterForScalarFloatRegister(srcFloatReg), simdRegisterForScalarFloatRegister(dstFlaotReg) @@ -346,212 +336,211 @@ func (a *assemblerGoAsmImpl) CompileSIMDByteToRegister(instruction asm.Instructi // castAsGolangAsmConditionalRegister maps the conditional states to golang-asm specific conditional state register values. var castAsGolangAsmConditionalRegister = [...]int16{ - COND_EQ: arm64.COND_EQ, - COND_NE: arm64.COND_NE, - COND_HS: arm64.COND_HS, - COND_LO: arm64.COND_LO, - COND_MI: arm64.COND_MI, - COND_PL: arm64.COND_PL, - COND_VS: arm64.COND_VS, - COND_VC: arm64.COND_VC, - COND_HI: arm64.COND_HI, - COND_LS: arm64.COND_LS, - COND_GE: arm64.COND_GE, - COND_LT: arm64.COND_LT, - COND_GT: arm64.COND_GT, - COND_LE: arm64.COND_LE, - COND_AL: arm64.COND_AL, - COND_NV: arm64.COND_NV, + asm_arm64.COND_EQ: arm64.COND_EQ, + asm_arm64.COND_NE: arm64.COND_NE, + asm_arm64.COND_HS: arm64.COND_HS, + asm_arm64.COND_LO: arm64.COND_LO, + asm_arm64.COND_MI: arm64.COND_MI, + asm_arm64.COND_PL: arm64.COND_PL, + asm_arm64.COND_VS: arm64.COND_VS, + asm_arm64.COND_VC: arm64.COND_VC, + asm_arm64.COND_HI: arm64.COND_HI, + asm_arm64.COND_LS: arm64.COND_LS, + asm_arm64.COND_GE: arm64.COND_GE, + asm_arm64.COND_LT: arm64.COND_LT, + asm_arm64.COND_GT: arm64.COND_GT, + asm_arm64.COND_LE: arm64.COND_LE, + asm_arm64.COND_AL: arm64.COND_AL, + asm_arm64.COND_NV: arm64.COND_NV, } // castAsGolangAsmRegister maps the registers to golang-asm specific registers values. var castAsGolangAsmRegister = [...]int16{ - REG_R0: arm64.REG_R0, - REG_R1: arm64.REG_R1, - REG_R2: arm64.REG_R2, - REG_R3: arm64.REG_R3, - REG_R4: arm64.REG_R4, - REG_R5: arm64.REG_R5, - REG_R6: arm64.REG_R6, - REG_R7: arm64.REG_R7, - REG_R8: arm64.REG_R8, - REG_R9: arm64.REG_R9, - REG_R10: arm64.REG_R10, - REG_R11: arm64.REG_R11, - REG_R12: arm64.REG_R12, - REG_R13: arm64.REG_R13, - REG_R14: arm64.REG_R14, - REG_R15: arm64.REG_R15, - REG_R16: arm64.REG_R16, - REG_R17: arm64.REG_R17, - REG_R18: arm64.REG_R18, - REG_R19: arm64.REG_R19, - REG_R20: arm64.REG_R20, - REG_R21: arm64.REG_R21, - REG_R22: arm64.REG_R22, - REG_R23: arm64.REG_R23, - REG_R24: arm64.REG_R24, - REG_R25: arm64.REG_R25, - REG_R26: arm64.REG_R26, - REG_R27: arm64.REG_R27, - REG_R28: arm64.REG_R28, - REG_R29: arm64.REG_R29, - REG_R30: arm64.REG_R30, - REGZERO: arm64.REGZERO, - REG_F0: arm64.REG_F0, - REG_F1: arm64.REG_F1, - REG_F2: arm64.REG_F2, - REG_F3: arm64.REG_F3, - REG_F4: arm64.REG_F4, - REG_F5: arm64.REG_F5, - REG_F6: arm64.REG_F6, - REG_F7: arm64.REG_F7, - REG_F8: arm64.REG_F8, - REG_F9: arm64.REG_F9, - REG_F10: arm64.REG_F10, - REG_F11: arm64.REG_F11, - REG_F12: arm64.REG_F12, - REG_F13: arm64.REG_F13, - REG_F14: arm64.REG_F14, - REG_F15: arm64.REG_F15, - REG_F16: arm64.REG_F16, - REG_F17: arm64.REG_F17, - REG_F18: arm64.REG_F18, - REG_F19: arm64.REG_F19, - REG_F20: arm64.REG_F20, - REG_F21: arm64.REG_F21, - REG_F22: arm64.REG_F22, - REG_F23: arm64.REG_F23, - REG_F24: arm64.REG_F24, - REG_F25: arm64.REG_F25, - REG_F26: arm64.REG_F26, - REG_F27: arm64.REG_F27, - REG_F28: arm64.REG_F28, - REG_F29: arm64.REG_F29, - REG_F30: arm64.REG_F30, - REG_F31: arm64.REG_F31, - REG_FPSR: arm64.REG_FPSR, + asm_arm64.REG_R0: arm64.REG_R0, + asm_arm64.REG_R1: arm64.REG_R1, + asm_arm64.REG_R2: arm64.REG_R2, + asm_arm64.REG_R3: arm64.REG_R3, + asm_arm64.REG_R4: arm64.REG_R4, + asm_arm64.REG_R5: arm64.REG_R5, + asm_arm64.REG_R6: arm64.REG_R6, + asm_arm64.REG_R7: arm64.REG_R7, + asm_arm64.REG_R8: arm64.REG_R8, + asm_arm64.REG_R9: arm64.REG_R9, + asm_arm64.REG_R10: arm64.REG_R10, + asm_arm64.REG_R11: arm64.REG_R11, + asm_arm64.REG_R12: arm64.REG_R12, + asm_arm64.REG_R13: arm64.REG_R13, + asm_arm64.REG_R14: arm64.REG_R14, + asm_arm64.REG_R15: arm64.REG_R15, + asm_arm64.REG_R16: arm64.REG_R16, + asm_arm64.REG_R17: arm64.REG_R17, + asm_arm64.REG_R18: arm64.REG_R18, + asm_arm64.REG_R19: arm64.REG_R19, + asm_arm64.REG_R20: arm64.REG_R20, + asm_arm64.REG_R21: arm64.REG_R21, + asm_arm64.REG_R22: arm64.REG_R22, + asm_arm64.REG_R23: arm64.REG_R23, + asm_arm64.REG_R24: arm64.REG_R24, + asm_arm64.REG_R25: arm64.REG_R25, + asm_arm64.REG_R26: arm64.REG_R26, + asm_arm64.REG_R27: arm64.REG_R27, + asm_arm64.REG_R28: arm64.REG_R28, + asm_arm64.REG_R29: arm64.REG_R29, + asm_arm64.REG_R30: arm64.REG_R30, + asm_arm64.REGZERO: arm64.REGZERO, + asm_arm64.REG_F0: arm64.REG_F0, + asm_arm64.REG_F1: arm64.REG_F1, + asm_arm64.REG_F2: arm64.REG_F2, + asm_arm64.REG_F3: arm64.REG_F3, + asm_arm64.REG_F4: arm64.REG_F4, + asm_arm64.REG_F5: arm64.REG_F5, + asm_arm64.REG_F6: arm64.REG_F6, + asm_arm64.REG_F7: arm64.REG_F7, + asm_arm64.REG_F8: arm64.REG_F8, + asm_arm64.REG_F9: arm64.REG_F9, + asm_arm64.REG_F10: arm64.REG_F10, + asm_arm64.REG_F11: arm64.REG_F11, + asm_arm64.REG_F12: arm64.REG_F12, + asm_arm64.REG_F13: arm64.REG_F13, + asm_arm64.REG_F14: arm64.REG_F14, + asm_arm64.REG_F15: arm64.REG_F15, + asm_arm64.REG_F16: arm64.REG_F16, + asm_arm64.REG_F17: arm64.REG_F17, + asm_arm64.REG_F18: arm64.REG_F18, + asm_arm64.REG_F19: arm64.REG_F19, + asm_arm64.REG_F20: arm64.REG_F20, + asm_arm64.REG_F21: arm64.REG_F21, + asm_arm64.REG_F22: arm64.REG_F22, + asm_arm64.REG_F23: arm64.REG_F23, + asm_arm64.REG_F24: arm64.REG_F24, + asm_arm64.REG_F25: arm64.REG_F25, + asm_arm64.REG_F26: arm64.REG_F26, + asm_arm64.REG_F27: arm64.REG_F27, + asm_arm64.REG_F28: arm64.REG_F28, + asm_arm64.REG_F29: arm64.REG_F29, + asm_arm64.REG_F30: arm64.REG_F30, + asm_arm64.REG_F31: arm64.REG_F31, + asm_arm64.REG_FPSR: arm64.REG_FPSR, } // castAsGolangAsmInstruction maps the instructions to golang-asm specific instructions values. var castAsGolangAsmInstruction = [...]obj.As{ - NOP: obj.ANOP, - RET: obj.ARET, - ADD: arm64.AADD, - ADDW: arm64.AADDW, - ADR: arm64.AADR, - AND: arm64.AAND, - ANDW: arm64.AANDW, - ASR: arm64.AASR, - ASRW: arm64.AASRW, - B: arm64.AB, - BEQ: arm64.ABEQ, - BGE: arm64.ABGE, - BGT: arm64.ABGT, - BHI: arm64.ABHI, - BHS: arm64.ABHS, - BLE: arm64.ABLE, - BLO: arm64.ABLO, - BLS: arm64.ABLS, - BLT: arm64.ABLT, - BMI: arm64.ABMI, - BNE: arm64.ABNE, - BVS: arm64.ABVS, - CLZ: arm64.ACLZ, - CLZW: arm64.ACLZW, - CMP: arm64.ACMP, - CMPW: arm64.ACMPW, - CSET: arm64.ACSET, - EOR: arm64.AEOR, - EORW: arm64.AEORW, - FABSD: arm64.AFABSD, - FABSS: arm64.AFABSS, - FADDD: arm64.AFADDD, - FADDS: arm64.AFADDS, - FCMPD: arm64.AFCMPD, - FCMPS: arm64.AFCMPS, - FCVTDS: arm64.AFCVTDS, - FCVTSD: arm64.AFCVTSD, - FCVTZSD: arm64.AFCVTZSD, - FCVTZSDW: arm64.AFCVTZSDW, - FCVTZSS: arm64.AFCVTZSS, - FCVTZSSW: arm64.AFCVTZSSW, - FCVTZUD: arm64.AFCVTZUD, - FCVTZUDW: arm64.AFCVTZUDW, - FCVTZUS: arm64.AFCVTZUS, - FCVTZUSW: arm64.AFCVTZUSW, - FDIVD: arm64.AFDIVD, - FDIVS: arm64.AFDIVS, - FMAXD: arm64.AFMAXD, - FMAXS: arm64.AFMAXS, - FMIND: arm64.AFMIND, - FMINS: arm64.AFMINS, - FMOVD: arm64.AFMOVD, - FMOVS: arm64.AFMOVS, - FMULD: arm64.AFMULD, - FMULS: arm64.AFMULS, - FNEGD: arm64.AFNEGD, - FNEGS: arm64.AFNEGS, - FRINTMD: arm64.AFRINTMD, - FRINTMS: arm64.AFRINTMS, - FRINTND: arm64.AFRINTND, - FRINTNS: arm64.AFRINTNS, - FRINTPD: arm64.AFRINTPD, - FRINTPS: arm64.AFRINTPS, - FRINTZD: arm64.AFRINTZD, - FRINTZS: arm64.AFRINTZS, - FSQRTD: arm64.AFSQRTD, - FSQRTS: arm64.AFSQRTS, - FSUBD: arm64.AFSUBD, - FSUBS: arm64.AFSUBS, - LSL: arm64.ALSL, - LSLW: arm64.ALSLW, - LSR: arm64.ALSR, - LSRW: arm64.ALSRW, - MOVB: arm64.AMOVB, - MOVBU: arm64.AMOVBU, - MOVD: arm64.AMOVD, - MOVH: arm64.AMOVH, - MOVHU: arm64.AMOVHU, - MOVW: arm64.AMOVW, - MOVWU: arm64.AMOVWU, - MRS: arm64.AMRS, - MSR: arm64.AMSR, - MSUB: arm64.AMSUB, - MSUBW: arm64.AMSUBW, - MUL: arm64.AMUL, - MULW: arm64.AMULW, - NEG: arm64.ANEG, - NEGW: arm64.ANEGW, - ORR: arm64.AORR, - ORRW: arm64.AORRW, - RBIT: arm64.ARBIT, - RBITW: arm64.ARBITW, - // RNG: arm64.ARNG, TODO!!!!!!! - ROR: arm64.AROR, - RORW: arm64.ARORW, - SCVTFD: arm64.ASCVTFD, - SCVTFS: arm64.ASCVTFS, - SCVTFWD: arm64.ASCVTFWD, - SCVTFWS: arm64.ASCVTFWS, - SDIV: arm64.ASDIV, - SDIVW: arm64.ASDIVW, - SUB: arm64.ASUB, - SUBS: arm64.ASUBS, - SUBW: arm64.ASUBW, - SXTB: arm64.ASXTB, - SXTBW: arm64.ASXTBW, - SXTH: arm64.ASXTH, - SXTHW: arm64.ASXTHW, - SXTW: arm64.ASXTW, - UCVTFD: arm64.AUCVTFD, - UCVTFS: arm64.AUCVTFS, - UCVTFWD: arm64.AUCVTFWD, - UCVTFWS: arm64.AUCVTFWS, - UDIV: arm64.AUDIV, - UDIVW: arm64.AUDIVW, - UXTW: arm64.AUXTW, - VBIT: arm64.AVBIT, - VCNT: arm64.AVCNT, - VUADDLV: arm64.AVUADDLV, + asm_arm64.NOP: obj.ANOP, + asm_arm64.RET: obj.ARET, + asm_arm64.ADD: arm64.AADD, + asm_arm64.ADDW: arm64.AADDW, + asm_arm64.ADR: arm64.AADR, + asm_arm64.AND: arm64.AAND, + asm_arm64.ANDW: arm64.AANDW, + asm_arm64.ASR: arm64.AASR, + asm_arm64.ASRW: arm64.AASRW, + asm_arm64.B: arm64.AB, + asm_arm64.BEQ: arm64.ABEQ, + asm_arm64.BGE: arm64.ABGE, + asm_arm64.BGT: arm64.ABGT, + asm_arm64.BHI: arm64.ABHI, + asm_arm64.BHS: arm64.ABHS, + asm_arm64.BLE: arm64.ABLE, + asm_arm64.BLO: arm64.ABLO, + asm_arm64.BLS: arm64.ABLS, + asm_arm64.BLT: arm64.ABLT, + asm_arm64.BMI: arm64.ABMI, + asm_arm64.BNE: arm64.ABNE, + asm_arm64.BVS: arm64.ABVS, + asm_arm64.CLZ: arm64.ACLZ, + asm_arm64.CLZW: arm64.ACLZW, + asm_arm64.CMP: arm64.ACMP, + asm_arm64.CMPW: arm64.ACMPW, + asm_arm64.CSET: arm64.ACSET, + asm_arm64.EOR: arm64.AEOR, + asm_arm64.EORW: arm64.AEORW, + asm_arm64.FABSD: arm64.AFABSD, + asm_arm64.FABSS: arm64.AFABSS, + asm_arm64.FADDD: arm64.AFADDD, + asm_arm64.FADDS: arm64.AFADDS, + asm_arm64.FCMPD: arm64.AFCMPD, + asm_arm64.FCMPS: arm64.AFCMPS, + asm_arm64.FCVTDS: arm64.AFCVTDS, + asm_arm64.FCVTSD: arm64.AFCVTSD, + asm_arm64.FCVTZSD: arm64.AFCVTZSD, + asm_arm64.FCVTZSDW: arm64.AFCVTZSDW, + asm_arm64.FCVTZSS: arm64.AFCVTZSS, + asm_arm64.FCVTZSSW: arm64.AFCVTZSSW, + asm_arm64.FCVTZUD: arm64.AFCVTZUD, + asm_arm64.FCVTZUDW: arm64.AFCVTZUDW, + asm_arm64.FCVTZUS: arm64.AFCVTZUS, + asm_arm64.FCVTZUSW: arm64.AFCVTZUSW, + asm_arm64.FDIVD: arm64.AFDIVD, + asm_arm64.FDIVS: arm64.AFDIVS, + asm_arm64.FMAXD: arm64.AFMAXD, + asm_arm64.FMAXS: arm64.AFMAXS, + asm_arm64.FMIND: arm64.AFMIND, + asm_arm64.FMINS: arm64.AFMINS, + asm_arm64.FMOVD: arm64.AFMOVD, + asm_arm64.FMOVS: arm64.AFMOVS, + asm_arm64.FMULD: arm64.AFMULD, + asm_arm64.FMULS: arm64.AFMULS, + asm_arm64.FNEGD: arm64.AFNEGD, + asm_arm64.FNEGS: arm64.AFNEGS, + asm_arm64.FRINTMD: arm64.AFRINTMD, + asm_arm64.FRINTMS: arm64.AFRINTMS, + asm_arm64.FRINTND: arm64.AFRINTND, + asm_arm64.FRINTNS: arm64.AFRINTNS, + asm_arm64.FRINTPD: arm64.AFRINTPD, + asm_arm64.FRINTPS: arm64.AFRINTPS, + asm_arm64.FRINTZD: arm64.AFRINTZD, + asm_arm64.FRINTZS: arm64.AFRINTZS, + asm_arm64.FSQRTD: arm64.AFSQRTD, + asm_arm64.FSQRTS: arm64.AFSQRTS, + asm_arm64.FSUBD: arm64.AFSUBD, + asm_arm64.FSUBS: arm64.AFSUBS, + asm_arm64.LSL: arm64.ALSL, + asm_arm64.LSLW: arm64.ALSLW, + asm_arm64.LSR: arm64.ALSR, + asm_arm64.LSRW: arm64.ALSRW, + asm_arm64.MOVB: arm64.AMOVB, + asm_arm64.MOVBU: arm64.AMOVBU, + asm_arm64.MOVD: arm64.AMOVD, + asm_arm64.MOVH: arm64.AMOVH, + asm_arm64.MOVHU: arm64.AMOVHU, + asm_arm64.MOVW: arm64.AMOVW, + asm_arm64.MOVWU: arm64.AMOVWU, + asm_arm64.MRS: arm64.AMRS, + asm_arm64.MSR: arm64.AMSR, + asm_arm64.MSUB: arm64.AMSUB, + asm_arm64.MSUBW: arm64.AMSUBW, + asm_arm64.MUL: arm64.AMUL, + asm_arm64.MULW: arm64.AMULW, + asm_arm64.NEG: arm64.ANEG, + asm_arm64.NEGW: arm64.ANEGW, + asm_arm64.ORR: arm64.AORR, + asm_arm64.ORRW: arm64.AORRW, + asm_arm64.RBIT: arm64.ARBIT, + asm_arm64.RBITW: arm64.ARBITW, + asm_arm64.ROR: arm64.AROR, + asm_arm64.RORW: arm64.ARORW, + asm_arm64.SCVTFD: arm64.ASCVTFD, + asm_arm64.SCVTFS: arm64.ASCVTFS, + asm_arm64.SCVTFWD: arm64.ASCVTFWD, + asm_arm64.SCVTFWS: arm64.ASCVTFWS, + asm_arm64.SDIV: arm64.ASDIV, + asm_arm64.SDIVW: arm64.ASDIVW, + asm_arm64.SUB: arm64.ASUB, + asm_arm64.SUBS: arm64.ASUBS, + asm_arm64.SUBW: arm64.ASUBW, + asm_arm64.SXTB: arm64.ASXTB, + asm_arm64.SXTBW: arm64.ASXTBW, + asm_arm64.SXTH: arm64.ASXTH, + asm_arm64.SXTHW: arm64.ASXTHW, + asm_arm64.SXTW: arm64.ASXTW, + asm_arm64.UCVTFD: arm64.AUCVTFD, + asm_arm64.UCVTFS: arm64.AUCVTFS, + asm_arm64.UCVTFWD: arm64.AUCVTFWD, + asm_arm64.UCVTFWS: arm64.AUCVTFWS, + asm_arm64.UDIV: arm64.AUDIV, + asm_arm64.UDIVW: arm64.AUDIVW, + asm_arm64.UXTW: arm64.AUXTW, + asm_arm64.VBIT: arm64.AVBIT, + asm_arm64.VCNT: arm64.AVCNT, + asm_arm64.VUADDLV: arm64.AVUADDLV, } diff --git a/internal/asm/arm64_debug/impl_test.go b/internal/asm/arm64_debug/impl_test.go new file mode 100644 index 0000000000..00216bca2f --- /dev/null +++ b/internal/asm/arm64_debug/impl_test.go @@ -0,0 +1,1241 @@ +package arm64debug + +import ( + "encoding/hex" + "fmt" + "math" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/tetratelabs/wazero/internal/asm" + asm_arm64 "github.com/tetratelabs/wazero/internal/asm/arm64" +) + +// TODO: Comment why tmpReg is unused. +func newGoasmAssembler(t *testing.T, _ asm.Register) asm_arm64.Assembler { + a, err := newAssembler(asm.NilRegister) + require.NoError(t, err) + a.CompileStandAlone(asm_arm64.NOP) + return a +} + +func TestAssemblerImpl_encodeNoneToNone(t *testing.T) { + t.Run("error", func(t *testing.T) { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeNoneToNone(&asm_arm64.NodeImpl{Instruction: asm_arm64.ADD}) + require.EqualError(t, err, "ADD is unsupported for from:none,to:none type") + }) + t.Run("ok", func(t *testing.T) { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeNoneToNone(&asm_arm64.NodeImpl{Instruction: asm_arm64.NOP}) + require.NoError(t, err) + + // NOP must be ignored. + actual := a.Buf.Bytes() + require.Len(t, actual, 0) + }) +} + +var intRegisters = []asm.Register{ + asm_arm64.REG_R0, asm_arm64.REG_R1, asm_arm64.REG_R2, asm_arm64.REG_R3, asm_arm64.REG_R4, asm_arm64.REG_R5, asm_arm64.REG_R6, + asm_arm64.REG_R7, asm_arm64.REG_R8, asm_arm64.REG_R9, asm_arm64.REG_R10, asm_arm64.REG_R11, asm_arm64.REG_R12, asm_arm64.REG_R13, + asm_arm64.REG_R14, asm_arm64.REG_R15, asm_arm64.REG_R16, asm_arm64.REG_R17, asm_arm64.REG_R18, asm_arm64.REG_R19, asm_arm64.REG_R20, + asm_arm64.REG_R21, asm_arm64.REG_R22, asm_arm64.REG_R23, asm_arm64.REG_R24, asm_arm64.REG_R25, asm_arm64.REG_R26, asm_arm64.REG_R27, + asm_arm64.REG_R28, asm_arm64.REG_R29, asm_arm64.REG_R30, +} + +func TestAssemblerImpl_EncodeJumpToRegister(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADD, Types: asm_arm64.OperandTypesNoneToRegister}, + expErr: "ADD is unsupported for from:none,to:register type", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.RET, DstReg: asm.NilRegister}, + expErr: "invalid destination register: nil is not integer", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.RET, DstReg: asm_arm64.REG_F0}, + expErr: "invalid destination register: F0 is not integer", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeJumpToRegister(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + t.Run("ok", func(t *testing.T) { + for _, inst := range []asm.Instruction{ + asm_arm64.B, + asm_arm64.RET, + } { + t.Run(asm_arm64.InstructionName(inst), func(t *testing.T) { + for _, r := range intRegisters { + t.Run(asm_arm64.RegisterName(r), func(t *testing.T) { + // TODO: remove golang-asm dependency in tests. + goasm := newGoasmAssembler(t, asm.NilRegister) + if inst == asm_arm64.RET { + goasm.CompileJumpToRegister(inst, r) + } else { + goasm.CompileJumpToMemory(inst, r) + } + + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err = a.EncodeJumpToRegister(&asm_arm64.NodeImpl{Instruction: inst, DstReg: r}) + require.NoError(t, err) + + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } + }) + } + }) +} + +func TestAssemblerImpl_EncodeLeftShiftedRegisterToRegister(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.SUB, Types: asm_arm64.OperandTypesLeftShiftedRegisterToRegister, + SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_R0, DstReg: asm_arm64.REG_R0}, + expErr: "SUB is unsupported for from:left-shifted-register,to:register type", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADD, + SrcConst: -1, SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_R0, DstReg: asm_arm64.REG_R0}, + expErr: "shift amount must fit in unsigned 6-bit integer (0-64) but got -1", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADD, + SrcConst: -1, SrcReg: asm_arm64.REG_F0, SrcReg2: asm_arm64.REG_R0, DstReg: asm_arm64.REG_R0}, + expErr: "F0 is not integer", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADD, + SrcConst: -1, SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_F0, DstReg: asm_arm64.REG_R0}, + expErr: "F0 is not integer", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADD, + SrcConst: -1, SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_R0, DstReg: asm_arm64.REG_F0}, + expErr: "F0 is not integer", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeLeftShiftedRegisterToRegister(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + const inst = asm_arm64.ADD + for _, tc := range []struct { + srcReg, shiftedSrcReg, dstReg asm.Register + shiftNum int64 + }{ + { + srcReg: asm_arm64.REG_R0, + shiftedSrcReg: asm_arm64.REG_R29, + shiftNum: 1, + dstReg: asm_arm64.REG_R21, + }, + { + srcReg: asm_arm64.REG_R0, + shiftedSrcReg: asm_arm64.REG_R29, + shiftNum: 2, + dstReg: asm_arm64.REG_R21, + }, + { + srcReg: asm_arm64.REG_R0, + shiftedSrcReg: asm_arm64.REG_R29, + shiftNum: 8, + dstReg: asm_arm64.REG_R21, + }, + { + srcReg: asm_arm64.REG_R29, + shiftedSrcReg: asm_arm64.REG_R0, + shiftNum: 16, + dstReg: asm_arm64.REG_R21, + }, + { + srcReg: asm_arm64.REG_R29, + shiftedSrcReg: asm_arm64.REG_R0, + shiftNum: 64, + dstReg: asm_arm64.REG_R21, + }, + { + srcReg: asm_arm64.REGZERO, + shiftedSrcReg: asm_arm64.REG_R0, + shiftNum: 64, + dstReg: asm_arm64.REG_R21, + }, + { + srcReg: asm_arm64.REGZERO, + shiftedSrcReg: asm_arm64.REGZERO, + shiftNum: 64, + dstReg: asm_arm64.REG_R21, + }, + { + srcReg: asm_arm64.REGZERO, + shiftedSrcReg: asm_arm64.REGZERO, + shiftNum: 64, + dstReg: asm_arm64.REGZERO, + }, + } { + tc := tc + t.Run(fmt.Sprintf("src=%s,shifted_src=%s,shift_num=%d,dst=%s", + asm_arm64.RegisterName(tc.srcReg), asm_arm64.RegisterName(tc.shiftedSrcReg), + tc.shiftNum, asm_arm64.RegisterName(tc.srcReg)), func(t *testing.T) { + + // TODO: remove golang-asm dependency in tests. + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileLeftShiftedRegisterToRegister(inst, tc.shiftedSrcReg, tc.shiftNum, tc.srcReg, tc.dstReg) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err = a.EncodeLeftShiftedRegisterToRegister(&asm_arm64.NodeImpl{Instruction: inst, + SrcReg: tc.srcReg, SrcReg2: tc.shiftedSrcReg, SrcConst: tc.shiftNum, + DstReg: tc.dstReg, + }) + require.NoError(t, err) + + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } +} + +func TestAssemblerImpl_EncodeTwoRegistersToNone(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.SUB, Types: asm_arm64.OperandTypesTwoRegistersToNone, + SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_R0, DstReg: asm_arm64.REG_R0}, + expErr: "SUB is unsupported for from:two-registers,to:none type", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.CMP, + SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_F0}, + expErr: "F0 is not integer", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.FCMPS, + SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_F0}, + expErr: "R0 is not float", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeTwoRegistersToNone(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + intRegs := []asm.Register{asm_arm64.REGZERO, asm_arm64.REG_R0, asm_arm64.REG_R10, asm_arm64.REG_R30} + floatRegs := []asm.Register{asm_arm64.REG_F0, asm_arm64.REG_F12, asm_arm64.REG_F31} + for _, tc := range []struct { + instruction asm.Instruction + regs []asm.Register + }{ + {instruction: asm_arm64.CMP, regs: intRegs}, + {instruction: asm_arm64.CMPW, regs: intRegs}, + {instruction: asm_arm64.FCMPD, regs: floatRegs}, + {instruction: asm_arm64.FCMPS, regs: floatRegs}, + } { + t.Run(asm_arm64.InstructionName(tc.instruction), func(t *testing.T) { + for _, src := range tc.regs { + for _, src2 := range tc.regs { + t.Run(fmt.Sprintf("src=%s,src2=%s", asm_arm64.RegisterName(src), asm_arm64.RegisterName(src2)), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileTwoRegistersToNone(tc.instruction, src, src2) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err = a.EncodeTwoRegistersToNone(&asm_arm64.NodeImpl{Instruction: tc.instruction, SrcReg: src, SrcReg2: src2}) + require.NoError(t, err) + + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + + } + } + }) + } +} + +func TestAssemblerImpl_EncodeThreeRegistersToRegister(t *testing.T) { + intRegs := []asm.Register{asm_arm64.REGZERO, asm_arm64.REG_R1, asm_arm64.REG_R10, asm_arm64.REG_R30} + for _, inst := range []asm.Instruction{asm_arm64.MSUB, asm_arm64.MSUBW} { + inst := inst + t.Run(asm_arm64.InstructionName(inst), func(t *testing.T) { + for _, src1 := range intRegs { + for _, src2 := range intRegs { + for _, src3 := range intRegs { + for _, dst := range intRegs { + src1, src2, src3, dst := src1, src2, src3, dst + t.Run(fmt.Sprintf("src1=%s,src2=%s,src3=%s,dst=%s", + asm_arm64.RegisterName(src1), asm_arm64.RegisterName(src2), + asm_arm64.RegisterName(src3), asm_arm64.RegisterName(dst)), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileThreeRegistersToRegister(inst, src1, src2, src3, dst) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err = a.EncodeThreeRegistersToRegister(&asm_arm64.NodeImpl{Instruction: inst, SrcReg: src1, SrcReg2: src2, DstReg: src3, DstReg2: dst}) + require.NoError(t, err) + + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } + } + } + } + }) + } +} + +func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADR, Types: asm_arm64.OperandTypesRegisterToRegister, + SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_R0, DstReg: asm_arm64.REG_R0}, + expErr: "ADR is unsupported for from:register,to:register type", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeRegisterToRegister(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + intRegs := []asm.Register{asm_arm64.REGZERO, asm_arm64.REG_R1, asm_arm64.REG_R10, asm_arm64.REG_R30} + intRegsWithoutZero := intRegs[1:] + conditionalRegs := []asm.Register{asm_arm64.REG_COND_EQ, asm_arm64.REG_COND_NE, asm_arm64.REG_COND_HS, asm_arm64.REG_COND_LO, asm_arm64.REG_COND_MI, asm_arm64.REG_COND_PL, asm_arm64.REG_COND_VS, asm_arm64.REG_COND_VC, asm_arm64.REG_COND_HI, asm_arm64.REG_COND_LS, asm_arm64.REG_COND_GE, asm_arm64.REG_COND_LT, asm_arm64.REG_COND_GT, asm_arm64.REG_COND_LE, asm_arm64.REG_COND_AL, asm_arm64.REG_COND_NV} + floatRegs := []asm.Register{asm_arm64.REG_F0, asm_arm64.REG_F15, asm_arm64.REG_F31} + + for _, tc := range []struct { + inst asm.Instruction + srcRegs, dstRegs []asm.Register + }{ + {inst: asm_arm64.ADD, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.ADDW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SUB, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.CLZ, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.CLZW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.CSET, srcRegs: conditionalRegs, dstRegs: intRegs}, + {inst: asm_arm64.FABSS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FABSD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FNEGS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FNEGD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FSQRTD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FSQRTS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FCVTDS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FCVTSD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FRINTMD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FRINTMS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FRINTND, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FRINTNS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FRINTPD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FRINTPS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FRINTZD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FRINTZS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FDIVS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FDIVD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMAXD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMAXS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMIND, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMINS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMULS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMULD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FADDD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FADDS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FCVTZSD, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.FCVTZSDW, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.FCVTZSS, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.FCVTZSSW, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.FCVTZUD, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.FCVTZUDW, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.FCVTZUS, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.FCVTZUSW, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.FMOVD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMOVS, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMOVD, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMOVS, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FMOVD, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.FMOVS, srcRegs: floatRegs, dstRegs: intRegs}, + {inst: asm_arm64.MOVD, srcRegs: intRegs, dstRegs: intRegsWithoutZero}, + {inst: asm_arm64.MOVWU, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.MRS, srcRegs: []asm.Register{asm_arm64.REG_FPSR}, dstRegs: intRegs}, + {inst: asm_arm64.MSR, srcRegs: intRegs, dstRegs: []asm.Register{asm_arm64.REG_FPSR}}, + {inst: asm_arm64.MUL, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.MULW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.NEG, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.NEGW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.RBIT, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.RBITW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SDIV, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SDIVW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.UDIV, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.UDIVW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SCVTFD, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.SCVTFWD, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.SCVTFS, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.SCVTFWS, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.UCVTFD, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.UCVTFWD, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.UCVTFS, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.UCVTFWS, srcRegs: intRegs, dstRegs: floatRegs}, + {inst: asm_arm64.SXTB, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SXTBW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SXTH, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SXTHW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SXTW, srcRegs: intRegs, dstRegs: intRegs}, + } { + + tc := tc + t.Run(asm_arm64.InstructionName(tc.inst), func(t *testing.T) { + for _, src := range tc.srcRegs { + for _, dst := range tc.dstRegs { + src, dst := src, dst + t.Run(fmt.Sprintf("src=%s,dst=%s", asm_arm64.RegisterName(src), asm_arm64.RegisterName(dst)), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + if tc.inst == asm_arm64.CSET { + + goasm.CompileConditionalRegisterSet(conditionalRegisterToState(src), dst) + } else { + goasm.CompileRegisterToRegister(tc.inst, src, dst) + + } + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err = a.EncodeRegisterToRegister(&asm_arm64.NodeImpl{Instruction: tc.inst, SrcReg: src, DstReg: dst}) + require.NoError(t, err) + + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } + } + }) + } +} + +func TestAssemblerImpl_EncodeTwoRegistersToRegister(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADR, Types: asm_arm64.OperandTypesTwoRegistersToRegister, + SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_R0, DstReg: asm_arm64.REG_R0}, + expErr: "ADR is unsupported for from:two-registers,to:register type", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeThreeRegistersToRegister(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + intRegs := []asm.Register{asm_arm64.REGZERO, asm_arm64.REG_R1, asm_arm64.REG_R10, asm_arm64.REG_R30} + floatRegs := []asm.Register{asm_arm64.REG_F0, asm_arm64.REG_F15, asm_arm64.REG_F31} + + for _, tc := range []struct { + inst asm.Instruction + srcRegs, dstRegs []asm.Register + }{ + {inst: asm_arm64.AND, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.ANDW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.ORR, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.ORRW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.EOR, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.EORW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.ASR, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.ASRW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.LSL, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.LSLW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.LSR, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.LSRW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.ROR, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.RORW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SDIV, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SDIVW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.UDIV, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.UDIVW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SUB, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.SUBW, srcRegs: intRegs, dstRegs: intRegs}, + {inst: asm_arm64.FSUBD, srcRegs: floatRegs, dstRegs: floatRegs}, + {inst: asm_arm64.FSUBS, srcRegs: floatRegs, dstRegs: floatRegs}, + } { + tc := tc + t.Run(asm_arm64.InstructionName(tc.inst), func(t *testing.T) { + for _, src := range tc.srcRegs { + for _, src2 := range tc.srcRegs { + for _, dst := range tc.dstRegs { + src, src2, dst := src, src2, dst + t.Run(fmt.Sprintf("src=%s,src2=%s,dst=%s", asm_arm64.RegisterName(src), asm_arm64.RegisterName(src2), asm_arm64.RegisterName(dst)), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileTwoRegistersToRegister(tc.inst, src, src2, dst) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err = a.EncodeTwoRegistersToRegister(&asm_arm64.NodeImpl{Instruction: tc.inst, SrcReg: src, SrcReg2: src2, DstReg: dst}) + require.NoError(t, err) + + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } + } + } + }) + } +} + +func TestAssemblerImpl_EncodeRegisterAndConstToNone(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADR, Types: asm_arm64.OperandTypesRegisterAndConstToNone, + SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_R0, DstReg: asm_arm64.REG_R0}, + expErr: "ADR is unsupported for from:register-and-const,to:none type", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.CMP, Types: asm_arm64.OperandTypesRegisterAndConstToNone, + SrcReg: asm_arm64.REG_R0, SrcConst: 12345}, + expErr: "immediate for CMP must fit in 0 to 4095 but got 12345", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.CMP, Types: asm_arm64.OperandTypesRegisterAndConstToNone, + SrcReg: asm_arm64.REGZERO, SrcConst: 123}, + expErr: "zero register is not supported for CMP (immediate)", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeRegisterAndConstToNone(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + const inst = asm_arm64.CMP + for _, reg := range []asm.Register{asm_arm64.REG_R1, asm_arm64.REG_R10, asm_arm64.REG_R30} { + for _, c := range []int64{0, 10, 100, 300, 4095} { + reg, c := reg, c + t.Run(fmt.Sprintf("%s, %d", asm_arm64.RegisterName(reg), c), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileRegisterAndConstToNone(inst, reg, c) + expected, err := goasm.Assemble() + require.NoError(t, err) + if c == 0 { + // This case cannot be supported in golang-asm and it results in miscompilation. + expected[3] = 0b111_10001 + } + + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err = a.EncodeRegisterAndConstToNone(&asm_arm64.NodeImpl{Instruction: inst, SrcReg: reg, SrcConst: c}) + require.NoError(t, err) + + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } + } +} + +func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADR, Types: asm_arm64.OperandTypesConstToRegister, + SrcReg: asm_arm64.REG_R0, SrcReg2: asm_arm64.REG_R0, DstReg: asm_arm64.REG_R0}, + expErr: "ADR is unsupported for from:const,to:register type", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeConstToRegister(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + consts64 := []int64{ + 0x1, + 0xfff, + 0xfff << 12, + 123 << 12, + (1<<15 + 1), + (1<<15 + 1) << 16, + (1<<15 + 1) << 32, + 0x0000_ffff_ffff_ffff, + -281470681743361, /* = 0xffff_0000_ffff_ffff */ + math.MinInt32 + 1, + -281474976645121, /* = 0xffff_0000_0000_ffff */ + 1<<20 + 1, + 1<<20 - 1, + 1<<23 | 0b01, + 1<<30 + 1, + 1 << 1, 1<<1 + 1, 1<<1 - 1, 1<<1 + 0xf, + 1 << 2, 1<<2 + 1, 1<<2 - 1, 1<<2 + 0xf, + 1 << 3, 1<<3 + 1, 1<<3 - 1, 1<<3 + 0xf, + 1 << 4, 1<<4 + 1, 1<<4 - 1, 1<<4 + 0xf, + 1 << 5, 1<<5 + 1, 1<<5 - 1, 1<<5 + 0xf, + 1 << 6, 1<<6 + 1, 1<<6 - 1, 1<<6 + 0xf, + 0xfff << 1, 0xfff<<1 - 1, 0xfff<<1 + 1, + 0, 1, -1, 2, 3, 10, -10, 123, -123, + math.MaxInt16, math.MaxInt32, math.MaxUint32, 0b01000000_00000010, 0xffff_0000, 0xffff_0001, 0xf00_000f, + math.MaxInt16 - 1, math.MaxInt32 - 1, math.MaxUint32 - 1, 0b01000000_00000010 - 1, 0xffff_0000 - 1, 0xffff_0001 - 1, 0xf00_000f - 1, + math.MaxInt16 + 1, 0b01000000_00001010 - 1, 0xfff_0000 - 1, 0xffe_0001 - 1, 0xe00_000f - 1, + (1<<15 + 1) << 16, 0b1_00000000_00000010, + 1 << 32, 1 << 34, 1 << 40, + 1<<32 + 1, 1<<34 + 1, 1<<40 + 1, + 1<<32 - 1, 1<<34 - 1, 1<<40 - 1, + 1<<32 + 0xf, 1<<34 + 0xf, 1<<40 + 0xf, + 1<<32 - 0xf, 1<<34 - 0xf, 1<<40 - 0xf, + math.MaxInt64, math.MinInt64, + 1<<30 + 1, + 0x7000000010000000, + 0x7000000100000000, + 0x7000100000000000, + 87220, + (math.MaxInt16 + 2) * 8, + -281471681677793, + 3295005183, + -8543223759426509416, + -1000000000, + 0xffffff, + } + + for _, tc := range []struct { + inst asm.Instruction + consts []int64 + }{ + { + inst: asm_arm64.ADD, + consts: consts64, + }, + { + inst: asm_arm64.SUB, + consts: consts64, + }, + { + inst: asm_arm64.SUBS, + consts: consts64, + }, + { + inst: asm_arm64.MOVW, + consts: []int64{ + 1 << 1, 1<<1 + 1, 1<<1 - 1, 1<<1 + 0xf, + 1 << 2, 1<<2 + 1, 1<<2 - 1, 1<<2 + 0xf, + 1 << 3, 1<<3 + 1, 1<<3 - 1, 1<<3 + 0xf, + 1 << 4, 1<<4 + 1, 1<<4 - 1, 1<<4 + 0xf, + 1 << 5, 1<<5 + 1, 1<<5 - 1, 1<<5 + 0xf, + 1 << 6, 1<<6 + 1, 1<<6 - 1, 1<<6 + 0xf, + 0xfff << 1, 0xfff<<1 - 1, 0xfff<<1 + 1, + 0, 1, -1, 2, 3, 10, -10, 123, -123, + math.MaxInt16, math.MaxInt32, math.MaxUint32, 0b01000000_00000010, 0xffff_0000, 0xffff_0001, 0xf00_000f, + math.MaxInt16 - 1, math.MaxInt32 - 1, math.MaxUint32 - 1, 0b01000000_00000010 - 1, 0xffff_0000 - 1, 0xffff_0001 - 1, 0xf00_000f - 1, + math.MaxInt16 + 1, 0b01000000_00001010 - 1, 0xfff_0000 - 1, 0xffe_0001 - 1, 0xe00_000f - 1, + (1<<15 + 1) << 16, 0b1_00000000_00000010, + 1 << 30, 1<<30 + 1, 1<<30 - 1, 1<<30 + 0xf, 1<<30 - 0xf, + 0x7fffffffffffffff, + -(1 << 30), + }, + }, + { + inst: asm_arm64.MOVD, + consts: consts64, + }, + { + inst: asm_arm64.LSR, + consts: []int64{1, 2, 4, 16, 31, 32, 63}, + }, + } { + tc := tc + t.Run(asm_arm64.InstructionName(tc.inst), func(t *testing.T) { + for _, r := range []asm.Register{ + asm_arm64.REG_R0, asm_arm64.REG_R10, + asm_arm64.REG_R30, + } { + r := r + t.Run(asm_arm64.RegisterName(r), func(t *testing.T) { + for _, c := range tc.consts { + var cs = []int64{c} + if tc.inst != asm_arm64.LSR && c != 0 { + cs = append(cs, -c) + } + for _, c := range cs { + t.Run(fmt.Sprintf("0x%x", uint64(c)), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm_arm64.REG_R27) + goasm.CompileConstToRegister(tc.inst, c, r) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm_arm64.REG_R27) + err = a.EncodeConstToRegister(&asm_arm64.NodeImpl{Instruction: tc.inst, SrcConst: c, DstReg: r}) + require.NoError(t, err) + + fmt.Println(hex.EncodeToString(expected)) + fmt.Println(hex.EncodeToString(a.Bytes())) + + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } + } + }) + } + }) + } +} + +func TestAssemblerImpl_EncodeSIMDByteToSIMDByte(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADR, Types: asm_arm64.OperandTypesSIMDByteToSIMDByte}, + expErr: "ADR is unsupported for from:simd-byte,to:simd-byte type", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeSIMDByteToSIMDByte(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + const inst = asm_arm64.VCNT + t.Run(asm_arm64.InstructionName(inst), func(t *testing.T) { + floatRegs := []asm.Register{asm_arm64.REG_F0, asm_arm64.REG_F10, asm_arm64.REG_F21, asm_arm64.REG_F31} + for _, src := range floatRegs { + for _, dst := range floatRegs { + src, dst := src, dst + t.Run(fmt.Sprintf("src=%s,dst=%s", asm_arm64.RegisterName(src), asm_arm64.RegisterName(dst)), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileSIMDByteToSIMDByte(inst, src, dst) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm_arm64.REG_R27) + err = a.EncodeSIMDByteToSIMDByte(&asm_arm64.NodeImpl{Instruction: inst, SrcReg: src, DstReg: dst}) + require.NoError(t, err) + + actual := a.Bytes() + require.Equal(t, expected, actual) + + }) + } + } + }) +} + +func TestAssemblerImpl_EncodeSIMDByteToRegister(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADR, Types: asm_arm64.OperandTypesSIMDByteToRegister}, + expErr: "ADR is unsupported for from:simd-byte,to:register type", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeSIMDByteToRegister(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + const inst = asm_arm64.VUADDLV + t.Run(asm_arm64.InstructionName(inst), func(t *testing.T) { + floatRegs := []asm.Register{asm_arm64.REG_F0, asm_arm64.REG_F10, asm_arm64.REG_F21, asm_arm64.REG_F31} + for _, src := range floatRegs { + for _, dst := range floatRegs { + src, dst := src, dst + t.Run(fmt.Sprintf("src=%s,dst=%s", asm_arm64.RegisterName(src), asm_arm64.RegisterName(dst)), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileSIMDByteToRegister(inst, src, dst) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm_arm64.REG_R27) + err = a.EncodeSIMDByteToRegister(&asm_arm64.NodeImpl{Instruction: inst, SrcReg: src, DstReg: dst}) + require.NoError(t, err) + + actual := a.Bytes() + require.Equal(t, expected, actual) + + }) + } + } + }) +} + +func TestAssemblerImpl_EncodeRegisterToMemory(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.ADR, Types: asm_arm64.OperandTypesRegisterToMemory}, + expErr: "ADR is unsupported for from:register,to:memory type", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeRegisterToMemory(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + offsets := []int64{ + -1, 0, 1, 2, -2, 4, -4, 0xf, -0xf, 1 << 4, 1<<4 - 1, 1<<4 + 1, -128, -256, 8 * 10, -128, + 255, 4096, 4096 << 1, 32760, 32760 * 2, 32760*2 - 8, + 32760*2 - 16, 1 << 27, 1 << 30, 1<<30 + 8, 1<<30 - 8, 1<<30 + 16, 1<<30 - 16, 1<<31 - 8, + } + intRegs := []asm.Register{ + asm_arm64.REG_R0, asm_arm64.REG_R16, + asm_arm64.REG_R30, + } + floatRegs := []asm.Register{ + asm_arm64.REG_F0, asm_arm64.REG_F10, + asm_arm64.REG_F30, + } + for _, tc := range []struct { + inst asm.Instruction + srcRegs []asm.Register + offsets []int64 + }{ + {inst: asm_arm64.MOVD, srcRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVW, srcRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVWU, srcRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVH, srcRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVB, srcRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.FMOVD, srcRegs: floatRegs, offsets: offsets}, + {inst: asm_arm64.FMOVS, srcRegs: floatRegs, offsets: offsets}, + } { + tc := tc + t.Run(asm_arm64.InstructionName(tc.inst), func(t *testing.T) { + for _, srcReg := range tc.srcRegs { + for _, baseReg := range intRegs { + t.Run("const offset", func(t *testing.T) { + for _, offset := range tc.offsets { + n := &asm_arm64.NodeImpl{Types: asm_arm64.OperandTypesRegisterToMemory, + Instruction: tc.inst, SrcReg: srcReg, DstReg: baseReg, DstConst: offset} + t.Run(n.String(), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + a := asm_arm64.NewAssemblerImpl(asm_arm64.REG_R27) + + for _, assembler := range []asm_arm64.Assembler{goasm, a} { + assembler.CompileRegisterToMemory(n.Instruction, n.SrcReg, n.DstReg, n.DstConst) + } + + expected, err := goasm.Assemble() + require.NoError(t, err) + + actual, err := a.Assemble() + require.NoError(t, err) + + require.Equal(t, expected, actual) + }) + } + }) + t.Run("register offset", func(t *testing.T) { + for _, offsetReg := range []asm.Register{asm_arm64.REG_R8, asm_arm64.REG_R18} { + n := &asm_arm64.NodeImpl{Types: asm_arm64.OperandTypesRegisterToMemory, + Instruction: tc.inst, SrcReg: srcReg, DstReg: baseReg, DstReg2: offsetReg} + t.Run(n.String(), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileRegisterToMemoryWithRegisterOffset(n.Instruction, n.SrcReg, n.DstReg, n.DstReg2) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm_arm64.REG_R27) + err = a.EncodeRegisterToMemory(n) + require.NoError(t, err) + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } + }) + } + } + }) + } +} + +func TestAssemblerImpl_EncodeMemoryToRegister(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.SUB, Types: asm_arm64.OperandTypesMemoryToRegister}, + expErr: "SUB is unsupported for from:memory,to:register type", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeMemoryToRegister(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + offsets := []int64{ + -1, 0, 1, 2, -2, 0xf, -0xf, 1 << 4, 1<<4 - 1, 1<<4 + 1, -128, -256, 8 * 10, -128, + 255, 4096, 4096 << 1, 32760, 32760 * 2, 32760*2 - 8, + 32760*2 - 16, 1 << 27, 1 << 30, 1<<30 + 8, 1<<30 - 8, 1<<30 + 16, 1<<30 - 16, 1<<31 - 8, + 1<<12<<8 + 8, + 1<<12<<8 - 8, + } + intRegs := []asm.Register{ + asm_arm64.REG_R0, asm_arm64.REG_R16, + asm_arm64.REG_R30, + } + floatRegs := []asm.Register{ + asm_arm64.REG_F0, asm_arm64.REG_F10, + asm_arm64.REG_F30, + } + for _, tc := range []struct { + inst asm.Instruction + dstRegs []asm.Register + offsets []int64 + }{ + {inst: asm_arm64.MOVD, dstRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVW, dstRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVWU, dstRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVH, dstRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVHU, dstRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVB, dstRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.MOVBU, dstRegs: intRegs, offsets: offsets}, + {inst: asm_arm64.FMOVD, dstRegs: floatRegs, offsets: offsets}, + {inst: asm_arm64.FMOVS, dstRegs: floatRegs, offsets: offsets}, + } { + tc := tc + t.Run(asm_arm64.InstructionName(tc.inst), func(t *testing.T) { + for _, dstReg := range tc.dstRegs { + for _, baseReg := range intRegs { + t.Run("const offset", func(t *testing.T) { + for _, offset := range tc.offsets { + n := &asm_arm64.NodeImpl{Types: asm_arm64.OperandTypesMemoryToRegister, + Instruction: tc.inst, SrcReg: baseReg, SrcConst: offset, DstReg: dstReg} + t.Run(n.String(), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + a := asm_arm64.NewAssemblerImpl(asm_arm64.REG_R27) + + for _, assembler := range []asm_arm64.Assembler{goasm, a} { + assembler.CompileMemoryToRegister(n.Instruction, n.SrcReg, n.SrcConst, n.DstReg) + } + + expected, err := goasm.Assemble() + require.NoError(t, err) + + actual, err := a.Assemble() + require.NoError(t, err) + + require.Equal(t, expected, actual) + }) + } + }) + t.Run("register offset", func(t *testing.T) { + for _, offsetReg := range []asm.Register{asm_arm64.REG_R8, asm_arm64.REG_R18} { + n := &asm_arm64.NodeImpl{Types: asm_arm64.OperandTypesMemoryToRegister, + Instruction: tc.inst, SrcReg: baseReg, SrcReg2: offsetReg, DstReg: dstReg} + t.Run(n.String(), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileMemoryWithRegisterOffsetToRegister(n.Instruction, n.SrcReg, n.SrcReg2, n.DstReg) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm_arm64.REG_R27) + err = a.EncodeMemoryToRegister(n) + require.NoError(t, err) + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } + }) + } + } + }) + } +} + +func TestAssemblerImpl_encodeReadInstructionAddress(t *testing.T) { + t.Run("ok", func(t *testing.T) { + const targetBeforeInstruction = asm_arm64.RET + for _, dstReg := range []asm.Register{asm_arm64.REG_R19, asm_arm64.REG_R23} { + dstReg := dstReg + t.Run(asm_arm64.RegisterName(dstReg), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + + for _, assembler := range []asm_arm64.Assembler{a, goasm} { + assembler.CompileReadInstructionAddress(dstReg, targetBeforeInstruction) + assembler.CompileConstToRegister(asm_arm64.MOVD, 1000, asm_arm64.REG_R10) // Dummy + assembler.CompileJumpToRegister(targetBeforeInstruction, asm_arm64.REG_R25) + assembler.CompileConstToRegister(asm_arm64.MOVD, 1000, asm_arm64.REG_R10) // Target. + } + + actual, err := a.Assemble() + require.NoError(t, err) + expected, err := goasm.Assemble() + require.NoError(t, err) + require.Equal(t, expected, actual) + }) + } + }) + t.Run("not found", func(t *testing.T) { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + a.CompileReadInstructionAddress(asm_arm64.REG_R27, asm_arm64.NOP) + a.CompileConstToRegister(asm_arm64.MOVD, 1000, asm_arm64.REG_R10) + _, err := a.Assemble() + require.EqualError(t, err, "BUG: target instruction NOP not found for ADR") + }) + t.Run("offset too large", func(t *testing.T) { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + a.CompileReadInstructionAddress(asm_arm64.REG_R27, asm_arm64.RET) + a.CompileJumpToRegister(asm_arm64.RET, asm_arm64.REG_R25) + a.CompileConstToRegister(asm_arm64.MOVD, 1000, asm_arm64.REG_R10) + + for n := a.Root; n != nil; n = n.Next { + n.OffsetInBinaryField = uint64(a.Buf.Len()) + + err := a.EncodeNode(n) + require.NoError(t, err) + } + + require.Len(t, a.OnGenerateCallbacks, 1) + cb := a.OnGenerateCallbacks[0] + + targetNode := a.Current + targetNode.OffsetInBinaryField = uint64(math.MaxInt64) + + err := cb(nil) + require.EqualError(t, err, "BUG: too large offset for ADR") + }) +} + +func TestAssemblerImpl_EncodeRelativeJump(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.B, Types: asm_arm64.OperandTypesNoneToBranch}, + expErr: "branch target must be set for B", + }, + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.SUB, Types: asm_arm64.OperandTypesNoneToBranch}, + expErr: "SUB is unsupported for from:none,to:branch type", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeRelativeBranch(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + for _, inst := range []asm.Instruction{ + asm_arm64.B, asm_arm64.BEQ, asm_arm64.BGE, asm_arm64.BGT, asm_arm64.BHI, asm_arm64.BHS, + asm_arm64.BLE, asm_arm64.BLO, asm_arm64.BLS, asm_arm64.BLT, asm_arm64.BMI, asm_arm64.BNE, asm_arm64.BVS, + } { + inst := inst + t.Run(asm_arm64.InstructionName(inst), func(t *testing.T) { + for _, tc := range []struct { + forward bool + instructionsInPreamble, instructionsBeforeBranch, instructionsAfterBranch int + }{ + {forward: true, instructionsInPreamble: 0, instructionsBeforeBranch: 0, instructionsAfterBranch: 10}, + {forward: true, instructionsInPreamble: 0, instructionsBeforeBranch: 10, instructionsAfterBranch: 10}, + {forward: true, instructionsInPreamble: 123, instructionsBeforeBranch: 10, instructionsAfterBranch: 10}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 0, instructionsAfterBranch: 0}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 10, instructionsAfterBranch: 0}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 10, instructionsAfterBranch: 10}, + {forward: true, instructionsInPreamble: 0, instructionsBeforeBranch: 0, instructionsAfterBranch: 1000}, + {forward: true, instructionsInPreamble: 0, instructionsBeforeBranch: 1000, instructionsAfterBranch: 1000}, + {forward: true, instructionsInPreamble: 123, instructionsBeforeBranch: 1000, instructionsAfterBranch: 1000}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 0, instructionsAfterBranch: 0}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 1000, instructionsAfterBranch: 0}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 1000, instructionsAfterBranch: 1000}, + {forward: true, instructionsInPreamble: 0, instructionsBeforeBranch: 0, instructionsAfterBranch: 1234}, + {forward: true, instructionsInPreamble: 0, instructionsBeforeBranch: 1234, instructionsAfterBranch: 1234}, + {forward: true, instructionsInPreamble: 123, instructionsBeforeBranch: 1234, instructionsAfterBranch: 1234}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 0, instructionsAfterBranch: 0}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 1234, instructionsAfterBranch: 0}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 1234, instructionsAfterBranch: 1234}, + {forward: true, instructionsInPreamble: 123, instructionsBeforeBranch: 123, instructionsAfterBranch: 65536}, + {forward: false, instructionsInPreamble: 123, instructionsBeforeBranch: 65536, instructionsAfterBranch: 0}, + } { + t.Run(fmt.Sprintf("foward=%v(before=%d,after=%d)", tc.forward, + tc.instructionsBeforeBranch, tc.instructionsAfterBranch), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + + for _, assembler := range []asm_arm64.Assembler{a, goasm} { + for i := 0; i < tc.instructionsInPreamble; i++ { + assembler.CompileConstToRegister(asm_arm64.MOVD, 1000, asm_arm64.REG_R10) + } + backwardTarget := assembler.CompileStandAlone(asm_arm64.NOP) + for i := 0; i < tc.instructionsBeforeBranch; i++ { + assembler.CompileConstToRegister(asm_arm64.MOVD, 1000, asm_arm64.REG_R10) + } + br := assembler.CompileJump(inst) + for i := 0; i < tc.instructionsAfterBranch; i++ { + assembler.CompileConstToRegister(asm_arm64.MOVD, 1000, asm_arm64.REG_R10) + } + fowardTarget := assembler.CompileStandAlone(asm_arm64.NOP) + + if tc.forward { + br.AssignJumpTarget(fowardTarget) + } else { + br.AssignJumpTarget(backwardTarget) + } + } + + actual, err := a.Assemble() + require.NoError(t, err) + expected, err := goasm.Assemble() + require.NoError(t, err) + require.Equal(t, expected, actual) + }) + } + }) + } +} + +// TestAssemblerImpl_multipleLargeOffest ensures that the const pool flushing strategy matches +// the one of Go's assembler. +func TestAssemblerImpl_multipleLargeOffest(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + a := asm_arm64.NewAssemblerImpl(asm_arm64.REG_R27) + + for _, assembler := range []asm_arm64.Assembler{a, goasm} { + for i := 0; i < 10000; i++ { + // This will be put into const pool, but the callback won't be set for it. + assembler.CompileRegisterToMemory(asm_arm64.MOVD, asm_arm64.REG_R11, asm_arm64.REG_R12, 0xfff0+int64(i*8)) + // This will also set the call back for it. + assembler.CompileRegisterToMemory(asm_arm64.MOVD, asm_arm64.REG_R11, asm_arm64.REG_R12, (0xfff0+int64(i*8)<<16+8)%(1<<31)) + } + } + + actual, err := a.Assemble() + require.NoError(t, err) + expected, err := goasm.Assemble() + require.NoError(t, err) + require.Equal(t, expected, actual) +} + +func TestAssemblerImpl_EncodeTwoSIMDBytesToSIMDByteRegister(t *testing.T) { + t.Run("error", func(t *testing.T) { + for _, tc := range []struct { + n *asm_arm64.NodeImpl + expErr string + }{ + { + n: &asm_arm64.NodeImpl{Instruction: asm_arm64.B, Types: asm_arm64.OperandTypesTwoSIMDBytesToSIMDByteRegister}, + expErr: "B is unsupported for from:two-simd-bytes,to:simd-byte type", + }, + } { + a := asm_arm64.NewAssemblerImpl(asm.NilRegister) + err := a.EncodeTwoSIMDBytesToSIMDByteRegister(tc.n) + require.EqualError(t, err, tc.expErr) + } + }) + + for _, inst := range []asm.Instruction{asm_arm64.VBIT} { + regs := []asm.Register{asm_arm64.REG_F0, asm_arm64.REG_F10, asm_arm64.REG_F30} + for _, src1 := range regs { + for _, src2 := range regs { + for _, dst := range regs { + n := &asm_arm64.NodeImpl{Instruction: inst, SrcReg: src1, SrcReg2: src2, DstReg: dst, + Types: asm_arm64.OperandTypesTwoSIMDBytesToSIMDByteRegister} + t.Run(n.String(), func(t *testing.T) { + goasm := newGoasmAssembler(t, asm.NilRegister) + goasm.CompileTwoSIMDBytesToSIMDByteRegister(n.Instruction, n.SrcReg, n.SrcReg2, n.DstReg) + expected, err := goasm.Assemble() + require.NoError(t, err) + + a := asm_arm64.NewAssemblerImpl(asm_arm64.REG_R27) + err = a.EncodeTwoSIMDBytesToSIMDByteRegister(n) + require.NoError(t, err) + actual := a.Bytes() + require.Equal(t, expected, actual) + }) + } + } + } + } +} + +func conditionalRegisterToState(r asm.Register) asm.ConditionalRegisterState { + switch r { + case asm_arm64.REG_COND_EQ: + return asm_arm64.COND_EQ + case asm_arm64.REG_COND_NE: + return asm_arm64.COND_NE + case asm_arm64.REG_COND_HS: + return asm_arm64.COND_HS + case asm_arm64.REG_COND_LO: + return asm_arm64.COND_LO + case asm_arm64.REG_COND_MI: + return asm_arm64.COND_MI + case asm_arm64.REG_COND_PL: + return asm_arm64.COND_PL + case asm_arm64.REG_COND_VS: + return asm_arm64.COND_VS + case asm_arm64.REG_COND_VC: + return asm_arm64.COND_VC + case asm_arm64.REG_COND_HI: + return asm_arm64.COND_HI + case asm_arm64.REG_COND_LS: + return asm_arm64.COND_LS + case asm_arm64.REG_COND_GE: + return asm_arm64.COND_GE + case asm_arm64.REG_COND_LT: + return asm_arm64.COND_LT + case asm_arm64.REG_COND_GT: + return asm_arm64.COND_GT + case asm_arm64.REG_COND_LE: + return asm_arm64.COND_LE + case asm_arm64.REG_COND_AL: + return asm_arm64.COND_AL + case asm_arm64.REG_COND_NV: + return asm_arm64.COND_NV + } + return asm.ConditionalRegisterStateUnset +} diff --git a/internal/asm/assembler.go b/internal/asm/assembler.go index 4c6680fbe9..0f3d347312 100644 --- a/internal/asm/assembler.go +++ b/internal/asm/assembler.go @@ -21,22 +21,26 @@ type Instruction byte // register's states. type ConditionalRegisterState byte -// ConditionalRegisterStateUnset is the only architecture-independent conditinal state, and -// can be used to indicate that no conditional state is specificed. +// ConditionalRegisterStateUnset is the only architecture-independent conditional state, and +// can be used to indicate that no conditional state is specified. const ConditionalRegisterStateUnset ConditionalRegisterState = 0 // Node represents a node in the linked list of assembled operations. type Node interface { fmt.Stringer + // AssignJumpTarget assigns the given target node as the destination of // jump instruction for this Node. AssignJumpTarget(target Node) - // AssignDestinationConstant assigns the given constnat as the destination + + // AssignDestinationConstant assigns the given constant as the destination // of the instruction for this node. AssignDestinationConstant(value ConstantValue) - // AssignSourceConstant assigns the given constnat as the source + + // AssignSourceConstant assigns the given constant as the source // of the instruction for this node. AssignSourceConstant(value ConstantValue) + // OffsetInBinary returns the offset of this node in the assembled binary. OffsetInBinary() NodeOffsetInBinary } @@ -49,56 +53,74 @@ type ConstantValue = int64 // AssemblerBase is the common interface for assemblers among multiple architectures. // -// Note: some of them can be implemented in a arch-independent way, but not all can be +// Note: some of them can be implemented in an arch-independent way, but not all can be // implemented as such. However, we intentionally put such arch-dependant methods here // in order to provide the common documentation interface. +// // Note: this interface is coupled and heavily influenced by golang-asm's API (i.e. Go's official assembler). // Therefore, we will do the refactoring after golang-asm removal. type AssemblerBase interface { // Assemble produces the final binary for the assembled operations. Assemble() ([]byte, error) - // SetJumpTargetOnNext instructs the assmembler that the next node must be - // assigned to the given nodes's jump destination. + + // SetJumpTargetOnNext instructs the assembler that the next node must be + // assigned to the given node's jump destination. SetJumpTargetOnNext(nodes ...Node) + // BuildJumpTable calculates the offsets between the first instruction `initialInstructions[0]` - // and others (e.g. initialInstructions[3]), and wrote the calcualted offsets into pre-allocated - // `table` slice in litte endian. + // and others (e.g. initialInstructions[3]), and wrote the calculated offsets into pre-allocated + // `table` slice in little endian. // // TODO: This can be hidden into assembler implementation after golang-asm removal. BuildJumpTable(table []byte, initialInstructions []Node) + // CompileStandAlone adds an instruction to take no arguments. CompileStandAlone(instruction Instruction) Node + // CompileConstToRegister adds an instruction where source operand is `value` as constant and destination is `destinationReg` register. CompileConstToRegister(instruction Instruction, value ConstantValue, destinationReg Register) Node + // CompileRegisterToRegister adds an instruction where source and destination operands are registers. CompileRegisterToRegister(instruction Instruction, from, to Register) + // CompileMemoryToRegister adds an instruction where source operands is the memory address specified by `sourceBaseReg+sourceOffsetConst` // and the destination is `destinationReg` register. - CompileMemoryToRegister(instruction Instruction, sourceBaseReg Register, sourceOffsetConst ConstantValue, destinationReg Register) + CompileMemoryToRegister( + instruction Instruction, + sourceBaseReg Register, + sourceOffsetConst ConstantValue, + destinationReg Register, + ) + // CompileRegisterToMemory adds an instruction where source operand is `sourceRegister` register and the destination is the // memory address specified by `destinationBaseRegister+destinationOffsetConst`. - CompileRegisterToMemory(instruction Instruction, sourceRegister Register, destinationBaseRegister Register, destinationOffsetConst ConstantValue) + CompileRegisterToMemory( + instruction Instruction, + sourceRegister Register, + destinationBaseRegister Register, + destinationOffsetConst ConstantValue, + ) + // CompileJump adds jump-type instruction and returns the corresponding Node in the assembled linked list. CompileJump(jmpInstruction Instruction) Node - // CompileJumpToMemory adds jump-type instruction whose destination is stored in the memory address specified by `baseReg+offset`, - // and returns the corresponding Node in the assembled linked list. - CompileJumpToMemory(jmpInstruction Instruction, baseReg Register, offset ConstantValue) + // CompileJumpToRegister adds jump-type instruction whose destination is the memory address specified by `reg` register. CompileJumpToRegister(jmpInstruction Instruction, reg Register) + // CompileReadInstructionAddress adds an ADR instruction to set the absolute address of "target instruction" // into destinationRegister. "target instruction" is specified by beforeTargetInst argument and // the target is determined by "the instruction right after beforeTargetInst type". // - // For example, if beforeTargetInst == RET and we have the instruction sequence like - // ADR -> X -> Y -> ... -> RET -> MOV, then the ADR instruction emitted by this function set the absolute - // address of MOV instruction into the destination register. + // For example, if `beforeTargetInst == RET` and we have the instruction sequence like + // `ADR -> X -> Y -> ... -> RET -> MOV`, then the `ADR` instruction emitted by this function set the absolute + // address of `MOV` instruction into the destination register. CompileReadInstructionAddress(destinationRegister Register, beforeAcquisitionTargetInstruction Instruction) } // JumpTableMaximumOffset represents the limit on the size of jump table in bytes. -// When users try loading an extremely large webassembly binary which contains a br_table +// When users try loading an extremely large WebAssembly binary which contains a br_table // statement with approximately 4294967296 (2^32) targets. Realistically speaking, that kind of binary -// could result in more than ten giga bytes of native JITed code where we have to care about +// could result in more than ten gigabytes of native JITed code where we have to care about // huge stacks whose height might exceed 32-bit range, and such huge stack doesn't work with the // current implementation. const JumpTableMaximumOffset = math.MaxUint32 diff --git a/internal/asm/golang_asm/golang_asm.go b/internal/asm/golang_asm/golang_asm.go index 6906590332..4a6bff132d 100644 --- a/internal/asm/golang_asm/golang_asm.go +++ b/internal/asm/golang_asm/golang_asm.go @@ -24,23 +24,23 @@ func (n *GolangAsmNode) String() string { return n.prog.String() } -// OffsetInBinary implements Node.OffsetInBinary. +// OffsetInBinary implements the same method as documented on asm.Node. func (n *GolangAsmNode) OffsetInBinary() asm.NodeOffsetInBinary { return asm.NodeOffsetInBinary(n.prog.Pc) } -// AssignJumpTarget implements Node.AssignJumpTarget. +// AssignJumpTarget implements the same method as documented on asm.Node. func (n *GolangAsmNode) AssignJumpTarget(target asm.Node) { b := target.(*GolangAsmNode) n.prog.To.SetTarget(b.prog) } -// AssignDestinationConstant implements Node.AssignDestinationConstant. +// AssignDestinationConstant implements the same method as documented on asm.Node. func (n *GolangAsmNode) AssignDestinationConstant(value asm.ConstantValue) { n.prog.To.Offset = value } -// AssignSourceConstant implements Node.AssignSourceConstant. +// AssignSourceConstant implements the same method as documented on asm.Node. func (n *GolangAsmNode) AssignSourceConstant(value asm.ConstantValue) { n.prog.From.Offset = value } @@ -48,9 +48,11 @@ func (n *GolangAsmNode) AssignSourceConstant(value asm.ConstantValue) { // GolangAsmBaseAssembler implements *part of* AssemblerBase for golang-asm library. type GolangAsmBaseAssembler struct { b *goasm.Builder - // setBranchTargetOnNextInstructions holds branch kind instructions (BR, conditional BR, etc) + + // setBranchTargetOnNextNodes holds branch kind instructions (BR, conditional BR, etc) // where we want to set the next coming instruction as the destination of these BR instructions. setBranchTargetOnNextNodes []asm.Node + // onGenerateCallbacks holds the callbacks which are called after generating native code. onGenerateCallbacks []func(code []byte) error } @@ -63,7 +65,7 @@ func NewGolangAsmBaseAssembler(arch string) (*GolangAsmBaseAssembler, error) { return &GolangAsmBaseAssembler{b: b}, nil } -// Assemble implements AssemblerBase.Assemble +// Assemble implements the same method as documented on asm.AssemblerBase. func (a *GolangAsmBaseAssembler) Assemble() ([]byte, error) { code := a.b.Assemble() for _, cb := range a.onGenerateCallbacks { @@ -74,17 +76,17 @@ func (a *GolangAsmBaseAssembler) Assemble() ([]byte, error) { return code, nil } -// SetJumpTargetOnNext implements AssemblerBase.SetJumpTargetOnNext +// SetJumpTargetOnNext implements the same method as documented on asm.AssemblerBase. func (a *GolangAsmBaseAssembler) SetJumpTargetOnNext(nodes ...asm.Node) { a.setBranchTargetOnNextNodes = append(a.setBranchTargetOnNextNodes, nodes...) } -// AddOnGenerateCallBack implements AssemblerBase.AddOnGenerateCallBack +// AddOnGenerateCallBack implements the same method as documented on asm.AssemblerBase. func (a *GolangAsmBaseAssembler) AddOnGenerateCallBack(cb func([]byte) error) { a.onGenerateCallbacks = append(a.onGenerateCallbacks, cb) } -// BuildJumpTable implements AssemblerBase.BuildJumpTable +// BuildJumpTable implements the same method as documented on asm.AssemblerBase. func (a *GolangAsmBaseAssembler) BuildJumpTable(table []byte, labelInitialInstructions []asm.Node) { a.AddOnGenerateCallBack(func(code []byte) error { // Build the offset table for each target. diff --git a/internal/asm/impl.go b/internal/asm/impl.go index 6629504cd4..4fec01493a 100644 --- a/internal/asm/impl.go +++ b/internal/asm/impl.go @@ -10,10 +10,11 @@ import ( // Note: When possible, add code here instead of in architecture-specific files to reduce drift: // As this is internal, exporting symbols only to reduce duplication is ok. type BaseAssemblerImpl struct { - // SetBranchTargetOnNextNodes holds branch kind instructions (BR, conditional BR, etc) + // SetBranchTargetOnNextNodes holds branch kind instructions (BR, conditional BR, etc.) // where we want to set the next coming instruction as the destination of these BR instructions. SetBranchTargetOnNextNodes []Node - // onGenerateCallbacks holds the callbacks which are called after generating native code. + + // OnGenerateCallbacks holds the callbacks which are called after generating native code. OnGenerateCallbacks []func(code []byte) error } diff --git a/internal/u64/u64.go b/internal/u64/u64.go index a47d9d607f..65c7cd124b 100644 --- a/internal/u64/u64.go +++ b/internal/u64/u64.go @@ -1,6 +1,6 @@ package u64 -// LeBytes returns a byte array corresponding to the 8 bytes in the uint64 in little-endian byte order. +// LeBytes returns a byte slice corresponding to the 8 bytes in the uint64 in little-endian byte order. func LeBytes(v uint64) []byte { return []byte{ byte(v), diff --git a/internal/wasm/jit/engine.go b/internal/wasm/jit/engine.go index 6d6618ff27..63f64f9089 100644 --- a/internal/wasm/jit/engine.go +++ b/internal/wasm/jit/engine.go @@ -373,7 +373,7 @@ func (e *engine) NewModuleEngine(name string, importedFunctions, moduleFunctions } if err != nil { me.Close() // safe because the reference to me was never leaked. - return nil, fmt.Errorf("function[%d/%d] %w", i, len(moduleFunctions)-1, err) + return nil, fmt.Errorf("function[%s(%d/%d)] %w", f.DebugName, i, len(moduleFunctions)-1, err) } // As this uses mmap, we need a finalizer in case moduleEngine.Close was never called. Regardless, we need a @@ -786,14 +786,7 @@ func (ce *callEngine) builtinFunctionMemoryGrow(mem *wasm.MemoryInstance) { ce.moduleContext.memoryElement0Address = bufSliceHeader.Data } -// golang-asm is not goroutine-safe so we take lock until we complete the compilation. -// TODO: delete after https://github.com/tetratelabs/wazero/issues/233 -var assemblerMutex = &sync.Mutex{} - func compileHostFunction(f *wasm.FunctionInstance) (*compiledFunction, error) { - assemblerMutex.Lock() - defer assemblerMutex.Unlock() - compiler, err := newCompiler(f, nil) if err != nil { return nil, err @@ -822,9 +815,6 @@ func compileHostFunction(f *wasm.FunctionInstance) (*compiledFunction, error) { } func compileWasmFunction(f *wasm.FunctionInstance) (*compiledFunction, error) { - assemblerMutex.Lock() - defer assemblerMutex.Unlock() - ir, err := wazeroir.Compile(f) if err != nil { return nil, fmt.Errorf("failed to lower to wazeroir: %w", err) diff --git a/internal/wasm/jit/engine_test.go b/internal/wasm/jit/engine_test.go index 6f3fdd5613..2b08c73edf 100644 --- a/internal/wasm/jit/engine_test.go +++ b/internal/wasm/jit/engine_test.go @@ -183,7 +183,7 @@ func TestJIT_EngineCompile_Errors(t *testing.T) { } _, err = e.NewModuleEngine(t.Name(), importedFunctions, moduleFunctions, nil, nil) - require.EqualError(t, err, "function[2/2] failed to lower to wazeroir: handling instruction: apply stack failed for call: reading immediates: EOF") + require.EqualError(t, err, "function[invalid code(2/2)] failed to lower to wazeroir: handling instruction: apply stack failed for call: reading immediates: EOF") // On the compilation failure, all the compiled functions including succeeded ones must be released. require.Len(t, e.compiledFunctions, len(importedFunctions)) diff --git a/internal/wasm/jit/jit_controlflow_test.go b/internal/wasm/jit/jit_controlflow_test.go index 31a5b31b03..62e96ad1b0 100644 --- a/internal/wasm/jit/jit_controlflow_test.go +++ b/internal/wasm/jit/jit_controlflow_test.go @@ -15,13 +15,11 @@ func TestCompiler_compileHostFunction(t *testing.T) { env := newJITEnvironment() compiler := env.requireNewCompiler(t, newCompiler, nil) - // The assembler skips the first instruction so we intentionally add const op here, which is ignored. + // The golang-asm assembler skips the first instruction, so we emit NOP here which is ignored. // TODO: delete after #233 - err := compiler.compileConstI32(&wazeroir.OperationConstI32{Value: 1}) - require.NoError(t, err) - compiler.valueLocationStack().pop() + compiler.compileNOP() - err = compiler.compileHostFunction() + err := compiler.compileHostFunction() require.NoError(t, err) // Generate and run the code under test. diff --git a/internal/wasm/jit/jit_impl_amd64_test.go b/internal/wasm/jit/jit_impl_amd64_test.go index 290c8b7763..3d1b3cb38f 100644 --- a/internal/wasm/jit/jit_impl_amd64_test.go +++ b/internal/wasm/jit/jit_impl_amd64_test.go @@ -363,3 +363,7 @@ func (c *amd64Compiler) setStackPointerCeil(v uint64) { func (c *amd64Compiler) setValueLocationStack(s *valueLocationStack) { c.locationStack = s } + +func (a *amd64Compiler) compileNOP() { + a.assembler.CompileStandAlone(amd64.NOP) +} diff --git a/internal/wasm/jit/jit_impl_arm64.go b/internal/wasm/jit/jit_impl_arm64.go index 7318dc2194..9494178398 100644 --- a/internal/wasm/jit/jit_impl_arm64.go +++ b/internal/wasm/jit/jit_impl_arm64.go @@ -39,18 +39,13 @@ type arm64Compiler struct { } func newArm64Compiler(f *wasm.FunctionInstance, ir *wazeroir.CompilationResult) (compiler, error) { - b, err := arm64.NewAssembler(arm64ReservedRegisterForTemporary) - if err != nil { - return nil, err - } - c := &arm64Compiler{ + return &arm64Compiler{ f: f, - assembler: b, + assembler: arm64.NewAssemblerImpl(arm64ReservedRegisterForTemporary), locationStack: newValueLocationStack(), ir: ir, labels: map[string]*arm64LabelInfo{}, - } - return c, nil + }, nil } var ( @@ -68,11 +63,11 @@ var ( // * REG_R18 is reserved as a platform register, and we don't use it in JIT. // * REG_R28 is reserved for Goroutine by Go runtime, and we don't use it in JIT. arm64UnreservedGeneralPurposeIntRegisters = []asm.Register{ // nolint - arm64.REG_R4, arm64.REG_R5, arm64.REG_R6, arm64.REG_R7, arm64.REG_R8, + arm64.REG_R3, arm64.REG_R4, arm64.REG_R5, arm64.REG_R6, arm64.REG_R7, arm64.REG_R8, arm64.REG_R9, arm64.REG_R10, arm64.REG_R11, arm64.REG_R12, arm64.REG_R13, arm64.REG_R14, arm64.REG_R15, arm64.REG_R16, arm64.REG_R17, arm64.REG_R19, arm64.REG_R20, arm64.REG_R21, arm64.REG_R22, arm64.REG_R23, arm64.REG_R24, - arm64.REG_R25, arm64.REG_R26, arm64.REG_R27, arm64.REG_R29, arm64.REG_R30, + arm64.REG_R25, arm64.REG_R26, arm64.REG_R29, arm64.REG_R30, } ) @@ -83,7 +78,7 @@ const ( arm64ReservedRegisterForStackBasePointerAddress asm.Register = arm64.REG_R1 // arm64ReservedRegisterForMemory holds the pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr). arm64ReservedRegisterForMemory asm.Register = arm64.REG_R2 - arm64ReservedRegisterForTemporary asm.Register = arm64.REG_R3 + arm64ReservedRegisterForTemporary asm.Register = arm64.REG_R27 ) const ( @@ -315,6 +310,7 @@ func (c *arm64Compiler) compileReturnFunction() error { ) // "callFrameStackTopAddressRegister = tmpReg + callFramePointerReg << ${callFrameDataSizeMostSignificantSetBit}" c.assembler.CompileLeftShiftedRegisterToRegister( + arm64.ADD, callFramePointerReg, callFrameDataSizeMostSignificantSetBit, tmpReg, callFrameStackTopAddressRegister, @@ -351,7 +347,7 @@ func (c *arm64Compiler) compileReturnFunction() error { // "rb.caller" is below the top address. callFrameStackTopAddressRegister, -(callFrameDataSize - callFrameReturnAddressOffset), tmpReg) - c.assembler.CompileJumpToMemory(arm64.B, tmpReg, 0) + c.assembler.CompileJumpToMemory(arm64.B, tmpReg) c.markRegisterUnused(tmpRegs...) return nil @@ -742,7 +738,7 @@ func (c *arm64Compiler) compileBrTable(o *wazeroir.OperationBrTable) error { c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmpReg, index.register) // If the value exceeds the length, we will branch into the default target (corresponding to len(o.Targets) index). brDefaultIndex := c.assembler.CompileJump(arm64.BLO) - c.assembler.CompileRegisterToRegister(arm64.MOVW, tmpReg, index.register) + c.assembler.CompileRegisterToRegister(arm64.MOVWU, tmpReg, index.register) c.assembler.SetJumpTargetOnNext(brDefaultIndex) // We prepare the static data which holds the offset of @@ -782,7 +778,7 @@ func (c *arm64Compiler) compileBrTable(o *wazeroir.OperationBrTable) error { ) // "index.register = tmpReg + (index.register << 2) (== &offsetData[offset])" - c.assembler.CompileLeftShiftedRegisterToRegister(index.register, 2, tmpReg, index.register) + c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, index.register, 2, tmpReg, index.register) // "index.register = *index.register (== offsetData[offset])" c.assembler.CompileMemoryToRegister(arm64.MOVW, index.register, 0, index.register) @@ -795,7 +791,7 @@ func (c *arm64Compiler) compileBrTable(o *wazeroir.OperationBrTable) error { // So we could achieve the br_table jump by adding them and jump into the resulting address. c.assembler.CompileRegisterToRegister(arm64.ADD, tmpReg, index.register) - c.assembler.CompileJumpToMemory(arm64.B, index.register, 0) + c.assembler.CompileJumpToMemory(arm64.B, index.register) // We no longer need the index's register, so mark it unused. c.markRegisterUnused(index.register) @@ -999,7 +995,7 @@ func (c *arm64Compiler) compileCallImpl(index wasm.Index, compiledFunctionAddres compiledFunctionRegister, compiledFunctionCodeInitialAddressOffset, tmp) - c.assembler.CompileJumpToMemory(arm64.B, tmp, 0) + c.assembler.CompileJumpToMemory(arm64.B, tmp) // All the registers used are temporary so we mark them unused. c.markRegisterUnused(freeRegisters...) @@ -1040,6 +1036,7 @@ func (c *arm64Compiler) compileCalcCallFrameStackTopAddress(callFrameStackPointe destinationRegister) // "destinationRegister += callFrameStackPointerRegister << $callFrameDataSizeMostSignificantSetBit" c.assembler.CompileLeftShiftedRegisterToRegister( + arm64.ADD, callFrameStackPointerRegister, callFrameDataSizeMostSignificantSetBit, destinationRegister, destinationRegister, @@ -1097,6 +1094,7 @@ func (c *arm64Compiler) compileCallIndirect(o *wazeroir.OperationCallIndirect) e // Here we left shifting by 4 in order to get the offset in bytes, // and the table element type is interface which is 16 bytes (two pointers). c.assembler.CompileLeftShiftedRegisterToRegister( + arm64.ADD, offset.register, 4, tmp, offset.register, @@ -1668,7 +1666,8 @@ func (c *arm64Compiler) compileRem(o *wazeroir.OperationRem) error { // [result: x2=quotient, x3=remainder] // c.assembler.CompileTwoRegistersToRegister(divInst, divisorReg, dividendReg, resultReg) - c.assembler.CompileTwoRegisters(msubInst, divisorReg, dividendReg, resultReg, resultReg) + // ResultReg = dividendReg - (divisorReg * resultReg) + c.assembler.CompileThreeRegistersToRegister(msubInst, divisorReg, dividendReg, resultReg, resultReg) c.markRegisterUnused(dividend.register, divisor.register) c.pushValueLocationOnRegister(resultReg) @@ -1993,7 +1992,7 @@ func (c *arm64Compiler) compileCopysign(o *wazeroir.OperationCopysign) error { // // mov x0, -9223372036854775808 // fmov d2, x0 - // vbit v0.8b, v1.8b, v2.8b + // vbit v0.8b, v1.8b, v2.8b // // "mov freg, -9223372036854775808 (stored at ce.minimum64BitSignedInt)" c.assembler.CompileMemoryToRegister( @@ -2011,7 +2010,7 @@ func (c *arm64Compiler) compileCopysign(o *wazeroir.OperationCopysign) error { // * https://github.com/golang/go/blob/739328c694d5e608faa66d17192f0a59f6e01d04/src/cmd/compile/internal/arm64/ssa.go#L972 // // "vbit vreg.8b, x2vreg.8b, x1vreg.8b" == "inserting 64th bit of x2 into x1". - c.assembler.CompileTwoSIMDByteToRegister(arm64.VBIT, freg, x2.register, x1.register) + c.assembler.CompileTwoSIMDBytesToSIMDByteRegister(arm64.VBIT, freg, x2.register, x1.register) c.markRegisterUnused(x2.register) c.pushValueLocationOnRegister(x1.register) @@ -2020,7 +2019,7 @@ func (c *arm64Compiler) compileCopysign(o *wazeroir.OperationCopysign) error { // compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the arm64 architecture. func (c *arm64Compiler) compileI32WrapFromI64() error { - return c.compileSimpleUnop(arm64.MOVW) + return c.compileSimpleUnop(arm64.MOVWU) } // compileITruncFromF implements compiler.compileITruncFromF for the arm64 architecture. @@ -2066,7 +2065,7 @@ func (c *arm64Compiler) compileITruncFromF(o *wazeroir.OperationITruncFromF) err c.assembler.CompileRegisterToRegister(arm64.MRS, arm64.REG_FPSR, arm64ReservedRegisterForTemporary) // Check if the conversion was undefined by comparing the status with 1. // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register - c.assembler.CompileRegisterAndConstSourceToNone(arm64.CMP, arm64ReservedRegisterForTemporary, 1) + c.assembler.CompileRegisterAndConstToNone(arm64.CMP, arm64ReservedRegisterForTemporary, 1) brOK := c.assembler.CompileJump(arm64.BNE) @@ -2190,7 +2189,7 @@ func (c *arm64Compiler) compileExtend(o *wazeroir.OperationExtend) error { if o.Signed { return c.compileSimpleUnop(arm64.SXTW) } else { - return c.compileSimpleUnop(arm64.UXTW) + return c.compileSimpleUnop(arm64.MOVWU) } } @@ -2731,7 +2730,7 @@ func (c *arm64Compiler) compileCallGoFunction(jitStatus jitCallStatusCode, built // Set the address of callFrameStack[ce.callFrameStackPointer] into currentCallFrameTopAddressRegister. c.compileCalcCallFrameStackTopAddress(currentCallFrameStackPointerRegister, currentCallFrameTopAddressRegister) - // Set the return address (after RET in c.exit below) into returnAddressRegister. + // Set the return address (after RET in c.compileExitFromNativeCode below) into returnAddressRegister. c.assembler.CompileReadInstructionAddress(returnAddressRegister, arm64.RET) // Write returnAddressRegister into callFrameStack[ce.callFrameStackPointer-1].returnAddress. @@ -3011,7 +3010,7 @@ func (c *arm64Compiler) compileReservedStackBasePointerRegisterInitialization() // Finally, we calculate "arm64ReservedRegisterForStackBasePointerAddress + arm64ReservedRegisterForTemporary << 3" // where we shift tmpReg by 3 because stack pointer is an index in the []uint64 // so we must multiply the value by the size of uint64 = 8 bytes. - c.assembler.CompileLeftShiftedRegisterToRegister( + c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, 3, arm64ReservedRegisterForStackBasePointerAddress, arm64ReservedRegisterForStackBasePointerAddress) } diff --git a/internal/wasm/jit/jit_impl_arm64_test.go b/internal/wasm/jit/jit_impl_arm64_test.go index 3ecc05fe06..45fa1724be 100644 --- a/internal/wasm/jit/jit_impl_arm64_test.go +++ b/internal/wasm/jit/jit_impl_arm64_test.go @@ -6,88 +6,40 @@ import ( "github.com/stretchr/testify/require" arm64 "github.com/tetratelabs/wazero/internal/asm/arm64" - "github.com/tetratelabs/wazero/internal/wazeroir" ) func TestArm64Compiler_readInstructionAddress(t *testing.T) { - t.Run("target instruction not found", func(t *testing.T) { - env := newJITEnvironment() - compiler := env.requireNewCompiler(t, newArm64Compiler, nil).(*arm64Compiler) - - err := compiler.compilePreamble() - require.NoError(t, err) - - // Set the acquisition target instruction to the one after JMP. - compiler.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.B) - - compiler.compileExitFromNativeCode(jitCallStatusCodeReturned) - - // If generate the code without JMP after compileReadInstructionAddress, - // the call back added must return error. - _, _, _, err = compiler.compile() - require.Error(t, err) - require.Contains(t, err.Error(), "target instruction not found") - }) - t.Run("too large offset", func(t *testing.T) { - env := newJITEnvironment() - compiler := env.requireNewCompiler(t, newArm64Compiler, nil).(*arm64Compiler) - - err := compiler.compilePreamble() - require.NoError(t, err) - - // Set the acquisition target instruction to the one after RET. - compiler.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.RET) - - // Add many instruction between the target and compileReadInstructionAddress. - for i := 0; i < 100; i++ { - err = compiler.compileConstI32(&wazeroir.OperationConstI32{Value: 10}) - require.NoError(t, err) - } - - compiler.assembler.CompileJumpToRegister(arm64.RET, arm64ReservedRegisterForTemporary) - - err = compiler.compileReturnFunction() - require.NoError(t, err) - - // If generate the code with too many instruction between ADR and - // the target, compile must fail. - _, _, _, err = compiler.compile() - require.Error(t, err) - require.Contains(t, err.Error(), "too large offset") - }) - t.Run("ok", func(t *testing.T) { - env := newJITEnvironment() - compiler := env.requireNewCompiler(t, newArm64Compiler, nil).(*arm64Compiler) - - err := compiler.compilePreamble() - require.NoError(t, err) - - // Set the acquisition target instruction to the one after RET, - // and read the absolute address into destinationRegister. - const addressReg = arm64ReservedRegisterForTemporary - compiler.assembler.CompileReadInstructionAddress(addressReg, arm64.RET) - - // Branch to the instruction after RET below via the absolute - // address stored in destinationRegister. - compiler.assembler.CompileJumpToMemory(arm64.B, addressReg, 0) - - // If we fail to branch, we reach here and exit with unreachable status, - // so the assertion would fail. - compiler.compileExitFromNativeCode(jitCallStatusCodeUnreachable) - - // This could be the read instruction target as this is the - // right after RET. Therefore, the branch instruction above - // must target here. - err = compiler.compileReturnFunction() - require.NoError(t, err) - - code, _, _, err := compiler.compile() - require.NoError(t, err) - - env.exec(code) - - require.Equal(t, jitCallStatusCodeReturned, env.jitStatus()) - }) + env := newJITEnvironment() + compiler := env.requireNewCompiler(t, newArm64Compiler, nil).(*arm64Compiler) + + err := compiler.compilePreamble() + require.NoError(t, err) + + // Set the acquisition target instruction to the one after RET, + // and read the absolute address into destinationRegister. + const addressReg = arm64ReservedRegisterForTemporary + compiler.assembler.CompileReadInstructionAddress(addressReg, arm64.RET) + + // Branch to the instruction after RET below via the absolute + // address stored in destinationRegister. + compiler.assembler.CompileJumpToMemory(arm64.B, addressReg) + + // If we fail to branch, we reach here and exit with unreachable status, + // so the assertion would fail. + compiler.compileExitFromNativeCode(jitCallStatusCodeUnreachable) + + // This could be the read instruction target as this is the + // right after RET. Therefore, the branch instruction above + // must target here. + err = compiler.compileReturnFunction() + require.NoError(t, err) + + code, _, _, err := compiler.compile() + require.NoError(t, err) + + env.exec(code) + + require.Equal(t, jitCallStatusCodeReturned, env.jitStatus()) } // compile implements compilerImpl.valueLocationStack for the amd64 architecture. @@ -109,3 +61,7 @@ func (c *arm64Compiler) setStackPointerCeil(v uint64) { func (c *arm64Compiler) setValueLocationStack(s *valueLocationStack) { c.locationStack = s } + +func (a *arm64Compiler) compileNOP() { + a.assembler.CompileStandAlone(arm64.NOP) +} diff --git a/internal/wasm/jit/jit_initialization_test.go b/internal/wasm/jit/jit_initialization_test.go index 37af4be962..cc0fd6ebc5 100644 --- a/internal/wasm/jit/jit_initialization_test.go +++ b/internal/wasm/jit/jit_initialization_test.go @@ -9,7 +9,6 @@ import ( "github.com/stretchr/testify/require" "github.com/tetratelabs/wazero/internal/wasm" - "github.com/tetratelabs/wazero/internal/wazeroir" ) func TestCompiler_compileModuleContextInitialization(t *testing.T) { @@ -73,14 +72,11 @@ func TestCompiler_compileModuleContextInitialization(t *testing.T) { me := &moduleEngine{compiledFunctions: make([]*compiledFunction, 10)} tc.moduleInstance.Engine = me - // The assembler skips the first instruction so we intentionally add const op here, which is ignored. + // The golang-asm assembler skips the first instruction, so we emit NOP here which is ignored. // TODO: delete after #233 - err := compiler.compileConstI32(&wazeroir.OperationConstI32{Value: 1}) - require.NoError(t, err) - loc := compiler.valueLocationStack().pop() - compiler.valueLocationStack().markRegisterUnused(loc.register) + compiler.compileNOP() - err = compiler.compileModuleContextInitialization() + err := compiler.compileModuleContextInitialization() require.NoError(t, err) require.Empty(t, compiler.valueLocationStack().usedRegisters) @@ -124,13 +120,11 @@ func TestCompiler_compileMaybeGrowValueStack(t *testing.T) { env := newJITEnvironment() compiler := env.requireNewCompiler(t, newCompiler, nil) - // The assembler skips the first instruction so we intentionally add const op here, which is ignored. + // The golang-asm assembler skips the first instruction, so we emit NOP here which is ignored. // TODO: delete after #233 - err := compiler.compileConstI32(&wazeroir.OperationConstI32{Value: 1}) - require.NoError(t, err) - compiler.valueLocationStack().pop() + compiler.compileNOP() - err = compiler.compileMaybeGrowValueStack() + err := compiler.compileMaybeGrowValueStack() require.NoError(t, err) require.NotNil(t, compiler.getOnStackPointerCeilDeterminedCallBack()) @@ -155,13 +149,11 @@ func TestCompiler_compileMaybeGrowValueStack(t *testing.T) { env := newJITEnvironment() compiler := env.requireNewCompiler(t, newCompiler, nil) - // The assembler skips the first instruction so we intentionally add const op here, which is ignored. + // The golang-asm assembler skips the first instruction, so we emit NOP here which is ignored. // TODO: delete after #233 - err := compiler.compileConstI32(&wazeroir.OperationConstI32{Value: 1}) - require.NoError(t, err) - compiler.valueLocationStack().pop() + compiler.compileNOP() - err = compiler.compileMaybeGrowValueStack() + err := compiler.compileMaybeGrowValueStack() require.NoError(t, err) // On the return from grow value stack, we simply return. diff --git a/internal/wasm/jit/jit_numeric_test.go b/internal/wasm/jit/jit_numeric_test.go index 1274ded45c..091b399239 100644 --- a/internal/wasm/jit/jit_numeric_test.go +++ b/internal/wasm/jit/jit_numeric_test.go @@ -885,7 +885,7 @@ func TestCompiler_compile_Min_Max_Copysign(t *testing.T) { }, }, { - name: "max-32-bit", + name: "copysign-32-bit", is32bit: true, setupFunc: func(t *testing.T, compiler compilerImpl) { err := compiler.compileCopysign(&wazeroir.OperationCopysign{Type: wazeroir.Float32}) diff --git a/internal/wasm/jit/jit_test.go b/internal/wasm/jit/jit_test.go index 113d52731f..6bc7a4f07b 100644 --- a/internal/wasm/jit/jit_test.go +++ b/internal/wasm/jit/jit_test.go @@ -141,16 +141,11 @@ func (j *jitEnv) exec(code []byte) { type newTestCompiler func(f *wasm.FunctionInstance, ir *wazeroir.CompilationResult) (compiler, error) func (j *jitEnv) requireNewCompiler(t *testing.T, fn newTestCompiler, functype *wasm.FunctionType) compilerImpl { - // golang-asm is not goroutine-safe so we take lock until we complete the compilation. - // TODO: delete after https://github.com/tetratelabs/wazero/issues/233 - assemblerMutex.Lock() - requireSupportedOSArch(t) c, err := fn( &wasm.FunctionInstance{Module: j.moduleInstance, Kind: wasm.FunctionKindWasm, Type: functype}, &wazeroir.CompilationResult{LabelCallers: map[string]uint32{}}, ) - t.Cleanup(func() { assemblerMutex.Unlock() }) require.NoError(t, err) ret, ok := c.(compilerImpl) @@ -172,6 +167,7 @@ type compilerImpl interface { setValueLocationStack(*valueLocationStack) compileEnsureOnGeneralPurposeRegister(loc *valueLocation) error compileModuleContextInitialization() error + compileNOP() } const defaultMemoryPageNumInTest = 1