diff --git a/internal/asm/arm64/consts.go b/internal/asm/arm64/consts.go index 76cc3fbd5e..f95de788e9 100644 --- a/internal/asm/arm64/consts.go +++ b/internal/asm/arm64/consts.go @@ -786,6 +786,36 @@ const ( FCMGT // FCMGE is the FCMGE(register) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGE--register---Floating-point-Compare-Greater-than-or-Equal--vector--?lang=en FCMGE + // VFMUL is the FMUL(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMUL--vector---Floating-point-Multiply--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFMUL + // VFDIV is the FDIV(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FDIV--vector---Floating-point-Divide--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFDIV + // VFSQRT is the FSQRT(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSQRT--vector---Floating-point-Square-Root--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFSQRT + // VFMIN is the FMIN(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMIN--vector---Floating-point-minimum--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFMIN + // VFMAX is the FMAX(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMAX--vector---Floating-point-Maximum--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFMAX + // VFABS is the FABS(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FABS--vector---Floating-point-Absolute-value--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFABS + // VFRINTP is the FRINTP(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTP--vector---Floating-point-Round-to-Integral--toward-Plus-infinity--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFRINTP + // VFRINTM is the FRINTM(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTM--vector---Floating-point-Round-to-Integral--toward-Minus-infinity--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFRINTM + // VFRINTZ is the FRINTZ(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTZ--vector---Floating-point-Round-to-Integral--toward-Zero--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFRINTZ + // VFRINTN is the FRINTN(vector) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTN--vector---Floating-point-Round-to-Integral--to-nearest-with-ties-to-even--vector--?lang=en + // Note: prefixed by V to distinguish from the non-vector variant. + VFRINTN // instructionEnd is always placed at the bottom of this iota definition to be used in the test. instructionEnd @@ -1204,6 +1234,26 @@ func InstructionName(i asm.Instruction) string { return "FCMGT" case FCMGE: return "FCMGE" + case VFMUL: + return "VFMUL" + case VFDIV: + return "VFDIV" + case VFSQRT: + return "VFSQRT" + case VFMIN: + return "VFMIN" + case VFMAX: + return "VFMAX" + case VFABS: + return "VFABS" + case VFRINTP: + return "VFRINTP" + case VFRINTM: + return "VFRINTM" + case VFRINTZ: + return "VFRINTZ" + case VFRINTN: + return "VFRINTN" } panic(fmt.Errorf("unknown instruction %d", i)) } diff --git a/internal/asm/arm64/impl.go b/internal/asm/arm64/impl.go index 8cd8c3590f..bac696b431 100644 --- a/internal/asm/arm64/impl.go +++ b/internal/asm/arm64/impl.go @@ -2899,283 +2899,260 @@ func (a *AssemblerImpl) EncodeStaticConstToVectorRegister(n *NodeImpl) (err erro // advancedSIMDAcrossLanes holds information to encode instructions as "Advanced SIMD two-register miscellaneous" in // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en var advancedSIMDTwoRegisterMisc = map[asm.Instruction]struct { - U, Opcode byte - // TODO: extract common implementation of qAndSizeResolver. - qAndSizeResolver func(arrangement VectorArrangement) (Q, Size byte, err error) + U, opcode byte + qAndSize map[VectorArrangement]qAndSize }{ // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/NOT--Bitwise-NOT--vector--?lang=en - NOT: {U: 0b1, Opcode: 0b00101, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - size = 0b00 - switch arrangement { - case VectorArrangement16B: - Q = 0b1 - case VectorArrangement8B: - Q = 0b0 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(NOT)) - } - return - }}, + NOT: {U: 0b1, opcode: 0b00101, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement16B: {size: 0b00, q: 0b1}, + VectorArrangement8B: {size: 0b00, q: 0b0}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FNEG--vector---Floating-point-Negate--vector--?lang=en - VFNEG: {U: 0b1, Opcode: 0b01111, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - size = 0b00 - switch arrangement { - case VectorArrangement4S: - size, Q = 0b10, 0b1 - case VectorArrangement2S: - size, Q = 0b10, 0b0 - case VectorArrangement2D: - size, Q = 0b11, 0b1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(VFNEG)) - } - return + VFNEG: {U: 0b1, opcode: 0b01111, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b10, q: 0b1}, + VectorArrangement2S: {size: 0b10, q: 0b0}, + VectorArrangement2D: {size: 0b11, q: 0b1}, + }, + }, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FABS--vector---Floating-point-Absolute-value--vector--?lang=en + VFABS: {U: 0, opcode: 0b01111, qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement2D: {size: 0b11, q: 0b1}, + VectorArrangement4S: {size: 0b10, q: 0b1}, + VectorArrangement2S: {size: 0b10, q: 0b0}, + }}, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSQRT--vector---Floating-point-Square-Root--vector--?lang=en + VFSQRT: {U: 1, opcode: 0b11111, qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement2D: {size: 0b11, q: 0b1}, + VectorArrangement4S: {size: 0b10, q: 0b1}, + VectorArrangement2S: {size: 0b10, q: 0b0}, + }}, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTM--vector---Floating-point-Round-to-Integral--toward-Minus-infinity--vector--?lang=en + VFRINTM: {U: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement2D: {size: 0b01, q: 0b1}, + VectorArrangement4S: {size: 0b00, q: 0b1}, + VectorArrangement2S: {size: 0b00, q: 0b0}, + }}, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTN--vector---Floating-point-Round-to-Integral--to-nearest-with-ties-to-even--vector--?lang=en + VFRINTN: {U: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement2D: {size: 0b01, q: 0b1}, + VectorArrangement4S: {size: 0b00, q: 0b1}, + VectorArrangement2S: {size: 0b00, q: 0b0}, + }}, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTP--vector---Floating-point-Round-to-Integral--toward-Plus-infinity--vector--?lang=en + VFRINTP: {U: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement2D: {size: 0b11, q: 0b1}, + VectorArrangement4S: {size: 0b10, q: 0b1}, + VectorArrangement2S: {size: 0b10, q: 0b0}, + }}, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTZ--vector---Floating-point-Round-to-Integral--toward-Zero--vector--?lang=en + VFRINTZ: {U: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement2D: {size: 0b11, q: 0b1}, + VectorArrangement4S: {size: 0b10, q: 0b1}, + VectorArrangement2S: {size: 0b10, q: 0b0}, }}, } // advancedSIMDAcrossLanes holds information to encode instructions as "Advanced SIMD three same" in // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en var advancedSIMDThreeSame = map[asm.Instruction]struct { - U, Opcode byte - qAndSizeResolver func(arrangement VectorArrangement) (Q, Size byte, err error) + U, Opcode byte + qAndSize map[VectorArrangement]qAndSize }{ // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/AND--vector---Bitwise-AND--vector--?lang=en - VAND: {U: 0b0, Opcode: 0b00011, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - size = 0b00 - switch arrangement { - case VectorArrangement16B: - Q = 0b1 - case VectorArrangement8B: - Q = 0b0 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(AND)) - } - return - }}, + VAND: {U: 0b0, Opcode: 0b00011, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement16B: {size: 0b00, q: 0b1}, + VectorArrangement8B: {size: 0b00, q: 0b0}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BSL--Bitwise-Select-?lang=en - BSL: {U: 0b1, Opcode: 0b00011, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - size = 0b01 - switch arrangement { - case VectorArrangement16B: - Q = 0b1 - case VectorArrangement8B: - Q = 0b0 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(BSL)) - } - return - }}, + BSL: {U: 0b1, Opcode: 0b00011, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement16B: {size: 0b01, q: 0b1}, + VectorArrangement8B: {size: 0b01, q: 0b0}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/EOR--vector---Bitwise-Exclusive-OR--vector--?lang=en - EOR: {U: 0b1, Opcode: 0b00011, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - size = 0b00 - switch arrangement { - case VectorArrangement16B: - Q = 0b1 - case VectorArrangement8B: - Q = 0b0 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(BSL)) - } - return - }}, + EOR: {U: 0b1, Opcode: 0b00011, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement16B: {size: 0b00, q: 0b1}, + VectorArrangement8B: {size: 0b00, q: 0b0}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ORR--vector--register---Bitwise-inclusive-OR--vector--register--?lang=en - VORR: {U: 0b0, Opcode: 0b00011, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - size = 0b10 - switch arrangement { - case VectorArrangement16B: - Q = 0b1 - case VectorArrangement8B: - Q = 0b0 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(VORR)) - } - return - }}, + VORR: {U: 0b0, Opcode: 0b00011, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement16B: {size: 0b10, q: 0b1}, + VectorArrangement8B: {size: 0b10, q: 0b0}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BIC--vector--register---Bitwise-bit-Clear--vector--register--?lang=en - BIC: {U: 0b0, Opcode: 0b00011, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - size = 0b01 - switch arrangement { - case VectorArrangement16B: - Q = 0b1 - case VectorArrangement8B: - Q = 0b0 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(BIC)) - } - return - }}, + BIC: {U: 0b0, Opcode: 0b00011, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement16B: {size: 0b01, q: 0b1}, + VectorArrangement8B: {size: 0b01, q: 0b0}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FADD--vector---Floating-point-Add--vector--?lang=en - VFADDS: {U: 0b0, Opcode: 0b11010, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - switch arrangement { - case VectorArrangement2S: - size, Q = 0b00, 0 - case VectorArrangement4S: - size, Q = 0b00, 1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(VFADDS)) - } - return - }}, + VFADDS: {U: 0b0, Opcode: 0b11010, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b00, q: 0b1}, + VectorArrangement2S: {size: 0b00, q: 0b0}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FADD--vector---Floating-point-Add--vector--?lang=en - VFADDD: {U: 0b0, Opcode: 0b11010, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - switch arrangement { - case VectorArrangement2D: - size, Q = 0b01, 1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(VFADDD)) - } - return - }}, + VFADDD: {U: 0b0, Opcode: 0b11010, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement2D: {size: 0b01, q: 0b1}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSUB--vector---Floating-point-Subtract--vector--?lang=en - VFSUBS: {U: 0b0, Opcode: 0b11010, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - switch arrangement { - case VectorArrangement2S: - size, Q = 0b10, 0 - case VectorArrangement4S: - size, Q = 0b10, 1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(VFSUBS)) - } - return - }}, + VFSUBS: {U: 0b0, Opcode: 0b11010, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b10, q: 0b1}, + VectorArrangement2S: {size: 0b10, q: 0b0}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSUB--vector---Floating-point-Subtract--vector--?lang=en - VFSUBD: {U: 0b0, Opcode: 0b11010, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - switch arrangement { - case VectorArrangement2D: - size, Q = 0b11, 1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(VFSUBD)) - } - return - }}, + VFSUBD: {U: 0b0, Opcode: 0b11010, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement2D: {size: 0b11, q: 0b1}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMAXP--Unsigned-Maximum-Pairwise-?lang=en - UMAXP: {U: 0b1, Opcode: 0b10100, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + UMAXP: {U: 0b1, Opcode: 0b10100, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMEQ--register---Compare-bitwise-Equal--vector--?lang=en - CMEQ: {U: 0b1, Opcode: 0b10001, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + CMEQ: {U: 0b1, Opcode: 0b10001, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/ADDP--vector- - VADDP: {U: 0b0, Opcode: 0b10111, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + VADDP: {U: 0b0, Opcode: 0b10111, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADD--vector---Add--vector--?lang=en - VADD: {U: 0, Opcode: 0b10000, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + VADD: {U: 0, Opcode: 0b10000, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SUB--vector---Subtract--vector--?lang=en - VSUB: {U: 1, Opcode: 0b10000, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + VSUB: {U: 1, Opcode: 0b10000, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en - SSHL: {U: 0, Opcode: 0b01000, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + SSHL: {U: 0, Opcode: 0b01000, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en - USHL: {U: 0b1, Opcode: 0b01000, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + USHL: {U: 0b1, Opcode: 0b01000, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGT--register---Compare-signed-Greater-than--vector--?lang=en - CMGT: {U: 0b0, Opcode: 0b00110, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + CMGT: {U: 0b0, Opcode: 0b00110, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHI--register---Compare-unsigned-Higher--vector--?lang=en - CMHI: {U: 0b1, Opcode: 0b00110, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + CMHI: {U: 0b1, Opcode: 0b00110, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGE--register---Compare-signed-Greater-than-or-Equal--vector--?lang=en - CMGE: {U: 0b0, Opcode: 0b00111, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + CMGE: {U: 0b0, Opcode: 0b00111, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHS--register---Compare-unsigned-Higher-or-Same--vector--?lang=en - CMHS: {U: 0b1, Opcode: 0b00111, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver}, + CMHS: {U: 0b1, Opcode: 0b00111, qAndSize: defaultQAndSize}, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMEQ--register---Floating-point-Compare-Equal--vector--?lang=en - FCMEQ: {U: 0b0, Opcode: 0b11100, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - switch arrangement { - case VectorArrangement4S: - size, Q = 0b00, 1 - case VectorArrangement2S: - size, Q = 0b00, 0 - case VectorArrangement2D: - size, Q = 0b01, 1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(FCMEQ)) - } - return - }}, + FCMEQ: {U: 0b0, Opcode: 0b11100, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b00, q: 0b1}, + VectorArrangement2S: {size: 0b00, q: 0b0}, + VectorArrangement2D: {size: 0b01, q: 0b1}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGT--register---Floating-point-Compare-Greater-than--vector--?lang=en - FCMGT: {U: 0b1, Opcode: 0b11100, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - switch arrangement { - case VectorArrangement4S: - size, Q = 0b10, 1 - case VectorArrangement2S: - size, Q = 0b10, 0 - case VectorArrangement2D: - size, Q = 0b11, 1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(FCMGT)) - } - return - }}, + FCMGT: {U: 0b1, Opcode: 0b11100, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b10, q: 0b1}, + VectorArrangement2S: {size: 0b10, q: 0b0}, + VectorArrangement2D: {size: 0b11, q: 0b1}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGE--register---Floating-point-Compare-Greater-than-or-Equal--vector--?lang=en - FCMGE: {U: 0b1, Opcode: 0b11100, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) { - switch arrangement { - case VectorArrangement4S: - size, Q = 0b00, 1 - case VectorArrangement2S: - size, Q = 0b00, 0 - case VectorArrangement2D: - size, Q = 0b01, 1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(FCMGE)) - } - return - }}, -} - -func advancedSIMDThreeSameDefaultResolver(arrangement VectorArrangement) (Q, size byte, err error) { - // TODO: simply use arrangementSizeQ as the resolver after refactoring other call-site of arrangementSizeQ. - size, Q = arrangementSizeQ(arrangement) - return + FCMGE: {U: 0b1, Opcode: 0b11100, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b00, q: 0b1}, + VectorArrangement2S: {size: 0b00, q: 0b0}, + VectorArrangement2D: {size: 0b01, q: 0b1}, + }, + }, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMIN--vector---Floating-point-minimum--vector--?lang=en + VFMIN: {U: 0b0, Opcode: 0b11110, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b10, q: 0b1}, + VectorArrangement2S: {size: 0b10, q: 0b0}, + VectorArrangement2D: {size: 0b11, q: 0b1}, + }, + }, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMAX--vector---Floating-point-Maximum--vector--?lang=en + VFMAX: {U: 0b0, Opcode: 0b11110, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b00, q: 0b1}, + VectorArrangement2S: {size: 0b00, q: 0b0}, + VectorArrangement2D: {size: 0b01, q: 0b1}, + }, + }, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMUL--vector---Floating-point-Multiply--vector--?lang=en + VFMUL: {U: 0b1, Opcode: 0b11011, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b00, q: 0b1}, + VectorArrangement2S: {size: 0b00, q: 0b0}, + VectorArrangement2D: {size: 0b01, q: 0b1}, + }, + }, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FDIV--vector---Floating-point-Divide--vector--?lang=en + VFDIV: {U: 0b1, Opcode: 0b11111, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement4S: {size: 0b00, q: 0b1}, + VectorArrangement2S: {size: 0b00, q: 0b0}, + VectorArrangement2D: {size: 0b01, q: 0b1}, + }, + }, +} + +// aAndSize is a pair of "Q" and "size" that appear in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en +type qAndSize struct{ q, size byte } + +// defaultQAndSize maps a vector arrangement to the default qAndSize which is encoded by many instructions. +var defaultQAndSize = map[VectorArrangement]qAndSize{ + VectorArrangement8B: {size: 0b00, q: 0b0}, + VectorArrangement16B: {size: 0b00, q: 0b1}, + VectorArrangement4H: {size: 0b01, q: 0b0}, + VectorArrangement8H: {size: 0b01, q: 0b1}, + VectorArrangement2S: {size: 0b10, q: 0b0}, + VectorArrangement4S: {size: 0b10, q: 0b1}, + VectorArrangement1D: {size: 0b11, q: 0b0}, + VectorArrangement2D: {size: 0b11, q: 0b1}, } // advancedSIMDAcrossLanes holds information to encode instructions as "Advanced SIMD across lanes" in // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en var advancedSIMDAcrossLanes = map[asm.Instruction]struct { U, Opcode byte - // TODO: extract common implementation of qAndSizeResolver. - qAndSizeResolver func(arrangement VectorArrangement) (Q, Size byte, err error) + qAndSize map[VectorArrangement]qAndSize }{ // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADDV--Add-across-Vector-?lang=en - ADDV: {U: 0b0, Opcode: 0b11011, qAndSizeResolver: func(arrangement VectorArrangement) (Q, Size byte, err error) { - switch arrangement { - case VectorArrangement16B: - Size, Q = 0b00, 0b1 - case VectorArrangement8B: - Size, Q = 0b00, 0b0 - case VectorArrangement8H: - Size, Q = 0b01, 0b1 - case VectorArrangement4H: - Size, Q = 0b01, 0b0 - case VectorArrangement4S: - Size, Q = 0b10, 0b1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(ADDV)) - } - return - }}, + ADDV: {U: 0b0, Opcode: 0b11011, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement16B: {size: 0b00, q: 0b1}, + VectorArrangement8B: {size: 0b00, q: 0b0}, + VectorArrangement8H: {size: 0b01, q: 0b1}, + VectorArrangement4H: {size: 0b01, q: 0b0}, + VectorArrangement4S: {size: 0b10, q: 0b1}, + }, + }, // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMINV--Unsigned-Minimum-across-Vector-?lang=en - UMINV: {U: 0b1, Opcode: 0b11010, qAndSizeResolver: func(arrangement VectorArrangement) (Q, Size byte, err error) { - switch arrangement { - case VectorArrangement16B: - Size, Q = 0b00, 0b1 - case VectorArrangement8B: - Size, Q = 0b00, 0b0 - case VectorArrangement8H: - Size, Q = 0b01, 0b1 - case VectorArrangement4H: - Size, Q = 0b01, 0b0 - case VectorArrangement4S: - Size, Q = 0b10, 0b1 - default: - err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(UMINV)) - } - return - }}, + UMINV: {U: 0b1, Opcode: 0b11010, + qAndSize: map[VectorArrangement]qAndSize{ + VectorArrangement16B: {size: 0b00, q: 0b1}, + VectorArrangement8B: {size: 0b00, q: 0b0}, + VectorArrangement8H: {size: 0b01, q: 0b1}, + VectorArrangement4H: {size: 0b01, q: 0b0}, + VectorArrangement4S: {size: 0b10, q: 0b1}, + }, + }, } // advancedSIMDScalarPairwise holds information to encode instructions as "Advanced SIMD scalar pairwise" in // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en var advancedSIMDScalarPairwise = map[asm.Instruction]struct { - U, Opcode byte - sizeResolver func(arrangement VectorArrangement) (Size byte) + U, Opcode byte + size map[VectorArrangement]byte }{ // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADDP--scalar---Add-Pair-of-elements--scalar--?lang=en - ADDP: {U: 0b0, Opcode: 0b11011, sizeResolver: func(arrangement VectorArrangement) (size byte) { - size = 0b11 - return - }}, + ADDP: {U: 0b0, Opcode: 0b11011, size: map[VectorArrangement]byte{VectorArrangement2D: 0b11}}, } // advancedSIMDCopy holds information to encode instructions as "Advanced SIMD copy" in @@ -3325,38 +3302,20 @@ var advancedSIMDCopy = map[asm.Instruction]struct { // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en var advancedSIMDTableLookup = map[asm.Instruction]struct { op, op2, Len byte - qResolver func(arr VectorArrangement) (q byte) + q map[VectorArrangement]byte }{ - TBL1: {op: 0, op2: 0, Len: 0b00, qResolver: func(arr VectorArrangement) (q byte) { - switch arr { - case VectorArrangement16B: - q = 0b1 - case VectorArrangement8B: - q = 0b0 - } - return - }}, - TBL2: {op: 0, op2: 0, Len: 0b01, qResolver: func(arr VectorArrangement) (q byte) { - switch arr { - case VectorArrangement16B: - q = 0b1 - case VectorArrangement8B: - q = 0b0 - } - return - }}, + TBL1: {op: 0, op2: 0, Len: 0b00, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}}, + TBL2: {op: 0, op2: 0, Len: 0b01, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}}, } // advancedSIMDScalarTwoRegisterMisc holds information to encode instructions as "Advanced SIMD scalar two-register miscellaneous" in // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en var advancedSIMDScalarTwoRegisterMisc = map[asm.Instruction]struct { - U, opcode byte - qAndSizeResolver func(arr VectorArrangement) (q, size byte) + U, opcode byte + qAndSize map[VectorArrangement]qAndSize }{ - CMEQZERO: {U: 0b0, opcode: 0b01001, qAndSizeResolver: func(arr VectorArrangement) (q, size byte) { - size, q = arrangementSizeQ(arr) - return - }}, + // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMEQ--zero---Compare-bitwise-Equal-to-zero--vector--?lang=en + CMEQZERO: {U: 0b0, opcode: 0b01001, qAndSize: defaultQAndSize}, } // advancedSIMDShiftByImmediate holds information to encode instructions as "Advanced SIMD shift by immediate" in @@ -3490,7 +3449,10 @@ func (a *AssemblerImpl) EncodeVectorRegisterToVectorRegister(n *NodeImpl) (err e if scalarPairwise, ok := advancedSIMDScalarPairwise[n.Instruction]; ok { // See "Advanced SIMD scalar pairwise" in // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en - size := scalarPairwise.sizeResolver(n.VectorArrangement) + size, ok := scalarPairwise.size[n.VectorArrangement] + if !ok { + return fmt.Errorf("unsupported vector arrangement %s for %s", n.VectorArrangement, InstructionName(n.Instruction)) + } a.Buf.Write([]byte{ (srcVectorRegBits << 5) | dstVectorRegBits, scalarPairwise.Opcode<<4 | 1<<3 | srcVectorRegBits>>3, @@ -3503,47 +3465,49 @@ func (a *AssemblerImpl) EncodeVectorRegisterToVectorRegister(n *NodeImpl) (err e if twoRegMisc, ok := advancedSIMDTwoRegisterMisc[n.Instruction]; ok { // See "Advanced SIMD two-register miscellaneous" in // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en - q, size, err := twoRegMisc.qAndSizeResolver(n.VectorArrangement) - if err != nil { - return err + qs, ok := twoRegMisc.qAndSize[n.VectorArrangement] + if !ok { + return fmt.Errorf("unsupported vector arrangement %s for %s", n.VectorArrangement, InstructionName(n.Instruction)) } a.Buf.Write([]byte{ (srcVectorRegBits << 5) | dstVectorRegBits, - twoRegMisc.Opcode<<4 | 0b1<<3 | srcVectorRegBits>>3, - size<<6 | 0b1<<5 | twoRegMisc.Opcode>>4, - q<<6 | twoRegMisc.U<<5 | 0b01110, + twoRegMisc.opcode<<4 | 0b1<<3 | srcVectorRegBits>>3, + qs.size<<6 | 0b1<<5 | twoRegMisc.opcode>>4, + qs.q<<6 | twoRegMisc.U<<5 | 0b01110, }) return nil } if threeSame, ok := advancedSIMDThreeSame[n.Instruction]; ok { - q, size, err := threeSame.qAndSizeResolver(n.VectorArrangement) - if err != nil { - return err + qs, ok := threeSame.qAndSize[n.VectorArrangement] + if !ok { + return fmt.Errorf("unsupported vector arrangement %s for %s", n.VectorArrangement, InstructionName(n.Instruction)) } - a.encodeAdvancedSIMDThreeSame(srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeSame.Opcode, size, q, threeSame.U) + a.encodeAdvancedSIMDThreeSame(srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeSame.Opcode, qs.size, qs.q, threeSame.U) return nil } if acrossLanes, ok := advancedSIMDAcrossLanes[n.Instruction]; ok { // See "Advanced SIMD across lanes" in // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en - q, size, err := acrossLanes.qAndSizeResolver(n.VectorArrangement) - if err != nil { - return err + qs, ok := acrossLanes.qAndSize[n.VectorArrangement] + if !ok { + return fmt.Errorf("unsupported vector arrangement %s for %s", n.VectorArrangement, InstructionName(n.Instruction)) } a.Buf.Write([]byte{ (srcVectorRegBits << 5) | dstVectorRegBits, acrossLanes.Opcode<<4 | 0b1<<3 | srcVectorRegBits>>3, - size<<6 | 0b11000<<1 | acrossLanes.Opcode>>4, - q<<6 | acrossLanes.U<<5 | 0b01110, + qs.size<<6 | 0b11000<<1 | acrossLanes.Opcode>>4, + qs.q<<6 | acrossLanes.U<<5 | 0b01110, }) return nil } if lookup, ok := advancedSIMDTableLookup[n.Instruction]; ok { - q := lookup.qResolver(n.VectorArrangement) - + q, ok := lookup.q[n.VectorArrangement] + if !ok { + return fmt.Errorf("unsupported vector arrangement %s for %s", n.VectorArrangement, InstructionName(n.Instruction)) + } a.Buf.Write([]byte{ (srcVectorRegBits << 5) | dstVectorRegBits, lookup.Len<<5 | lookup.op<<4 | srcVectorRegBits>>3, @@ -3554,12 +3518,15 @@ func (a *AssemblerImpl) EncodeVectorRegisterToVectorRegister(n *NodeImpl) (err e } if scalaTwoMisc, ok := advancedSIMDScalarTwoRegisterMisc[n.Instruction]; ok { - q, size := scalaTwoMisc.qAndSizeResolver(n.VectorArrangement) + qs, ok := scalaTwoMisc.qAndSize[n.VectorArrangement] + if !ok { + return fmt.Errorf("unsupported vector arrangement %s for %s", n.VectorArrangement, InstructionName(n.Instruction)) + } a.Buf.Write([]byte{ (dstVectorRegBits << 5) | dstVectorRegBits, 0b100110<<2 | dstVectorRegBits>>3, - size<<6 | 0b1<<5, - q<<6 | scalaTwoMisc.U<<5 | 0b01001110, + qs.size<<6 | 0b1<<5, + qs.q<<6 | scalaTwoMisc.U<<5 | 0b01001110, }) return } @@ -3604,11 +3571,11 @@ func (a *AssemblerImpl) encodeTwoVectorRegistersToVectorRegister(n *NodeImpl) (e } if threeSame, ok := advancedSIMDThreeSame[n.Instruction]; ok { - q, size, err := threeSame.qAndSizeResolver(n.VectorArrangement) - if err != nil { - return err + qs, ok := threeSame.qAndSize[n.VectorArrangement] + if !ok { + return fmt.Errorf("unsupported vector arrangement %s for %s", n.VectorArrangement, InstructionName(n.Instruction)) } - a.encodeAdvancedSIMDThreeSame(srcRegBits, srcRegBits2, dstRegBits, threeSame.Opcode, size, q, threeSame.U) + a.encodeAdvancedSIMDThreeSame(srcRegBits, srcRegBits2, dstRegBits, threeSame.Opcode, qs.size, qs.q, threeSame.U) return nil } diff --git a/internal/asm/arm64/impl_test.go b/internal/asm/arm64/impl_test.go index 690e231965..b1977aeef9 100644 --- a/internal/asm/arm64/impl_test.go +++ b/internal/asm/arm64/impl_test.go @@ -780,7 +780,6 @@ func TestAssemblerImpl_EncodeMemoryToVectorRegister(t *testing.T) { }, exp: []byte{0x6a, 0x0, 0x0, 0x18, 0xc1, 0x6b, 0xea, 0x3c, 0x0, 0x0, 0x0, 0x14, 0x4, 0x0, 0x2, 0x0}, }, - // LD1R { name: "ld1r {v11.8b}, [x12]", n: &NodeImpl{ @@ -895,7 +894,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, arr: VectorArrangement16B, - exp: []byte{0x4a, 0x39, 0x2, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x39, 0x2, 0x4e}, }, { inst: ADDV, @@ -903,7 +902,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, arr: VectorArrangement16B, - exp: []byte{0x4a, 0xb8, 0x31, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xb8, 0x31, 0x4e}, }, { inst: VORR, @@ -911,7 +910,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, arr: VectorArrangement16B, - exp: []byte{0x4a, 0x1d, 0xa2, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x1d, 0xa2, 0x4e}, }, { inst: VORR, @@ -919,7 +918,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, arr: VectorArrangement8B, - exp: []byte{0x4a, 0x1d, 0xa2, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x1d, 0xa2, 0xe}, }, { name: "fadd v10.2d, v10.2d, v2.2d", @@ -927,7 +926,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: VFADDD, arr: VectorArrangement2D, - exp: []byte{0x4a, 0xd5, 0x62, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xd5, 0x62, 0x4e}, }, { name: "fadd v10.4s, v10.4s, v2.4s", @@ -935,7 +934,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: VFADDS, arr: VectorArrangement4S, - exp: []byte{0x4a, 0xd5, 0x22, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xd5, 0x22, 0x4e}, }, { name: "fsub v10.2d, v10.2d, v2.2d", @@ -943,7 +942,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: VFSUBD, arr: VectorArrangement2D, - exp: []byte{0x4a, 0xd5, 0xe2, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xd5, 0xe2, 0x4e}, }, { name: "fsub v10.4s, v10.4s, v2.4s", @@ -951,14 +950,14 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: VFSUBS, arr: VectorArrangement4S, - exp: []byte{0x4a, 0xd5, 0xa2, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xd5, 0xa2, 0x4e}, }, { name: "ushll v10.8h, v2.8b, #0", x1: RegV2, x2: RegV10, inst: USHLLIMM, - exp: []byte{0x4a, 0xa4, 0x8, 0x2f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xa4, 0x8, 0x2f}, arr: VectorArrangement8B, }, { @@ -966,7 +965,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: USHLLIMM, - exp: []byte{0x4a, 0xa4, 0xf, 0x2f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xa4, 0xf, 0x2f}, arr: VectorArrangement8B, c: 7, }, @@ -975,7 +974,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x8, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x8, 0x4f}, arr: VectorArrangement16B, c: 8, }, @@ -984,7 +983,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0xd, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0xd, 0x4f}, arr: VectorArrangement16B, c: 3, }, @@ -993,7 +992,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0xf, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0xf, 0x4f}, arr: VectorArrangement16B, c: 1, }, @@ -1002,7 +1001,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0xd, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0xd, 0xf}, arr: VectorArrangement8B, c: 3, }, @@ -1011,7 +1010,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x10, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x10, 0x4f}, arr: VectorArrangement8H, c: 16, }, @@ -1020,7 +1019,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x11, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x11, 0x4f}, arr: VectorArrangement8H, c: 15, }, @@ -1029,7 +1028,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x1d, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x1d, 0x4f}, arr: VectorArrangement8H, c: 3, }, @@ -1038,7 +1037,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x11, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x11, 0xf}, arr: VectorArrangement4H, c: 15, }, @@ -1047,7 +1046,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x20, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x20, 0xf}, arr: VectorArrangement2S, c: 32, }, @@ -1056,7 +1055,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x21, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x21, 0xf}, arr: VectorArrangement2S, c: 31, }, @@ -1065,7 +1064,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x39, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x39, 0xf}, arr: VectorArrangement2S, c: 7, }, @@ -1074,7 +1073,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x39, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x39, 0x4f}, arr: VectorArrangement4S, c: 7, }, @@ -1083,7 +1082,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x41, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x41, 0x4f}, arr: VectorArrangement2D, c: 63, }, @@ -1092,7 +1091,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x5f, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x5f, 0x4f}, arr: VectorArrangement2D, c: 33, }, @@ -1101,7 +1100,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHR, - exp: []byte{0x4a, 0x4, 0x7f, 0x4f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x7f, 0x4f}, arr: VectorArrangement2D, c: 1, }, @@ -1110,18 +1109,14 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHLLIMM, - exp: []byte{ - 0x4a, 0xa4, 0x8, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - }, - arr: VectorArrangement8B, + exp: []byte{0x4a, 0xa4, 0x8, 0xf}, + arr: VectorArrangement8B, }, { name: "sshll v10.8h, v2.8b, #7", x1: RegV2, x2: RegV10, - inst: SSHLLIMM, exp: []byte{ - 0x4a, 0xa4, 0xf, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - }, + inst: SSHLLIMM, exp: []byte{0x4a, 0xa4, 0xf, 0xf}, arr: VectorArrangement8B, c: 7, }, @@ -1130,49 +1125,41 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x1: RegV2, x2: RegV10, inst: SSHLLIMM, - exp: []byte{ - 0x4a, 0xa4, 0x10, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - }, - arr: VectorArrangement4H, + exp: []byte{0x4a, 0xa4, 0x10, 0xf}, + arr: VectorArrangement4H, }, { name: "sshll v10.4s, v2.4h, #0xf", x1: RegV2, x2: RegV10, inst: SSHLLIMM, - exp: []byte{ - 0x4a, 0xa4, 0x1f, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - }, - arr: VectorArrangement4H, - c: 15, + exp: []byte{0x4a, 0xa4, 0x1f, 0xf}, + arr: VectorArrangement4H, + c: 15, }, { name: "sshll v10.2d, v2.2s, #0", x1: RegV2, x2: RegV10, inst: SSHLLIMM, - exp: []byte{ - 0x4a, 0xa4, 0x20, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - }, - arr: VectorArrangement2S, + exp: []byte{0x4a, 0xa4, 0x20, 0xf}, + arr: VectorArrangement2S, }, { name: "sshll v10.2d, v2.2s, #0x1f", x1: RegV2, x2: RegV10, inst: SSHLLIMM, - exp: []byte{ - 0x4a, 0xa4, 0x3f, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - }, - arr: VectorArrangement2S, - c: 31, + exp: []byte{0x4a, 0xa4, 0x3f, 0xf}, + arr: VectorArrangement2S, + c: 31, }, { x1: RegV2, x2: RegV10, name: "ins v10.s[2], v2.s[1]", inst: INSELEM, - exp: []byte{0x4a, 0x24, 0x14, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x24, 0x14, 0x6e}, arr: VectorArrangementS, srcIndex: 1, dstIndex: 2, @@ -1182,7 +1169,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "ins v10.s[0], v2.s[3]", inst: INSELEM, - exp: []byte{0x4a, 0x64, 0x4, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x64, 0x4, 0x6e}, arr: VectorArrangementS, srcIndex: 3, dstIndex: 0, @@ -1192,7 +1179,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "ins v10.b[0], v2.b[0xf]", inst: INSELEM, - exp: []byte{0x4a, 0x7c, 0x1, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x7c, 0x1, 0x6e}, arr: VectorArrangementB, srcIndex: 15, dstIndex: 0, @@ -1202,7 +1189,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "ins v10.d[1], v2.d[0]", inst: INSELEM, - exp: []byte{0x4a, 0x4, 0x18, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x18, 0x6e}, arr: VectorArrangementD, srcIndex: 0, dstIndex: 1, @@ -1212,7 +1199,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "dup v10.2d, v2.d[0]", inst: DUPELEM, - exp: []byte{0x4a, 0x4, 0x8, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x8, 0x4e}, arr: VectorArrangementD, srcIndex: 0, }, @@ -1221,7 +1208,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "dup v10.2d, v2.d[1]", inst: DUPELEM, - exp: []byte{0x4a, 0x4, 0x18, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x18, 0x4e}, arr: VectorArrangementD, srcIndex: 1, }, @@ -1230,7 +1217,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "dup v10.4s, v2.s[3]", inst: DUPELEM, - exp: []byte{0x4a, 0x4, 0x1c, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x1c, 0x4e}, arr: VectorArrangementS, srcIndex: 3, }, @@ -1239,7 +1226,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "dup v10.8h, v2.h[7]", inst: DUPELEM, - exp: []byte{0x4a, 0x4, 0x1e, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x1e, 0x4e}, arr: VectorArrangementH, srcIndex: 7, }, @@ -1248,7 +1235,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "dup v10.16b, v2.b[0xf]", inst: DUPELEM, - exp: []byte{0x4a, 0x4, 0x1f, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x4, 0x1f, 0x4e}, arr: VectorArrangementB, srcIndex: 15, }, @@ -1257,7 +1244,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "umaxp v10.16b, v10.16b, v2.16b", inst: UMAXP, - exp: []byte{0x4a, 0xa5, 0x22, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xa5, 0x22, 0x6e}, arr: VectorArrangement16B, }, { @@ -1265,7 +1252,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "umaxp v10.8h, v10.8h, v2.8h", inst: UMAXP, - exp: []byte{0x4a, 0xa5, 0x62, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xa5, 0x62, 0x6e}, arr: VectorArrangement8H, }, { @@ -1273,7 +1260,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "umaxp v10.4s, v10.4s, v2.4s", inst: UMAXP, - exp: []byte{0x4a, 0xa5, 0xa2, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xa5, 0xa2, 0x6e}, arr: VectorArrangement4S, }, { @@ -1281,14 +1268,15 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV11, name: "addp d11, v11.2d", inst: ADDP, - exp: []byte{0x6b, 0xb9, 0xf1, 0x5e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + arr: VectorArrangement2D, + exp: []byte{0x6b, 0xb9, 0xf1, 0x5e}, }, { x1: RegV2, x2: RegV10, name: "addp v10.16b, v10.16b, v2.16b", inst: VADDP, - exp: []byte{0x4a, 0xbd, 0x22, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xbd, 0x22, 0x4e}, arr: VectorArrangement16B, }, { @@ -1296,7 +1284,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "addp v10.8h, v10.8h, v2.8h", inst: VADDP, - exp: []byte{0x4a, 0xbd, 0x62, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xbd, 0x62, 0x4e}, arr: VectorArrangement8H, }, { @@ -1304,7 +1292,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "addp v10.4s, v10.4s, v2.4s", inst: VADDP, - exp: []byte{0x4a, 0xbd, 0xa2, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xbd, 0xa2, 0x4e}, arr: VectorArrangement4S, }, { @@ -1312,7 +1300,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "uminv b10, v2.16b", inst: UMINV, - exp: []byte{0x4a, 0xa8, 0x31, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xa8, 0x31, 0x6e}, arr: VectorArrangement16B, }, { @@ -1320,7 +1308,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "uminv h10, v2.8h", inst: UMINV, - exp: []byte{0x4a, 0xa8, 0x71, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xa8, 0x71, 0x6e}, arr: VectorArrangement8H, }, { @@ -1328,7 +1316,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "uminv s10, v2.4s", inst: UMINV, - exp: []byte{0x4a, 0xa8, 0xb1, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xa8, 0xb1, 0x6e}, arr: VectorArrangement4S, }, { @@ -1337,7 +1325,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { name: "cmeq v10.2d, v10.2d, v2.2d", arr: VectorArrangement2D, inst: CMEQ, - exp: []byte{0x4a, 0x8d, 0xe2, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x8d, 0xe2, 0x6e}, }, { x1: RegRZR, @@ -1345,7 +1333,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { name: "cmeq v30.2d, v30.2d, #0", inst: CMEQZERO, arr: VectorArrangement2D, - exp: []byte{0xde, 0x9b, 0xe0, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x9b, 0xe0, 0x4e}, }, { name: "tbl v1.8b, {v0.16b}, v1.8b", @@ -1353,7 +1341,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV1, inst: TBL1, arr: VectorArrangement8B, - exp: []byte{0x1, 0x0, 0x1, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x1, 0x0, 0x1, 0xe}, }, { name: "tbl v1.16b, {v0.16b}, v1.16b", @@ -1361,7 +1349,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV1, inst: TBL1, arr: VectorArrangement16B, - exp: []byte{0x1, 0x0, 0x1, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x1, 0x0, 0x1, 0x4e}, }, { name: "tbl v30.8b, {v0.16b, v1.16b}, v30.8b", @@ -1369,7 +1357,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV30, inst: TBL2, arr: VectorArrangement8B, - exp: []byte{0x1e, 0x20, 0x1e, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x1e, 0x20, 0x1e, 0xe}, }, { name: "tbl v1.16b, {v31.16b, v0.16b}, v1.16b", @@ -1377,14 +1365,14 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV1, inst: TBL2, arr: VectorArrangement16B, - exp: []byte{0xe1, 0x23, 0x1, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe1, 0x23, 0x1, 0x4e}, }, { x1: RegV2, x2: RegV10, name: "add v10.4s, v10.4s, v2.4s", inst: VADD, - exp: []byte{0x4a, 0x85, 0xa2, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x85, 0xa2, 0x4e}, arr: VectorArrangement4S, }, { @@ -1392,7 +1380,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "add v10.2d, v10.2d, v2.2d", inst: VADD, - exp: []byte{0x4a, 0x85, 0xe2, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x85, 0xe2, 0x4e}, arr: VectorArrangement2D, }, { @@ -1400,7 +1388,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "sub v10.8h, v10.8h, v2.8h", inst: VSUB, - exp: []byte{0x4a, 0x85, 0x62, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x85, 0x62, 0x6e}, arr: VectorArrangement8H, }, { @@ -1408,7 +1396,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV30, name: "sub v30.16b, v30.16b, v29.16b", inst: VSUB, - exp: []byte{0xde, 0x87, 0x3d, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x87, 0x3d, 0x6e}, arr: VectorArrangement16B, }, { @@ -1417,7 +1405,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: BIC, arr: VectorArrangement16B, - exp: []byte{0x4a, 0x1d, 0x62, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x1d, 0x62, 0x4e}, }, { name: "eor v10.16b, v10.16b, v2.16b", @@ -1425,7 +1413,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: EOR, arr: VectorArrangement16B, - exp: []byte{0x4a, 0x1d, 0x22, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x1d, 0x22, 0x6e}, }, { name: "bsl v10.16b, v10.16b, v2.16b", @@ -1433,7 +1421,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: BSL, arr: VectorArrangement16B, - exp: []byte{0x4a, 0x1d, 0x62, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x1d, 0x62, 0x6e}, }, { name: "bsl v10.16b, v10.16b, v2.16b", @@ -1441,7 +1429,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: BSL, arr: VectorArrangement16B, - exp: []byte{0x4a, 0x1d, 0x62, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x1d, 0x62, 0x6e}, }, { name: "and v10.16b, v10.16b, v2.16b", @@ -1449,7 +1437,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: VAND, arr: VectorArrangement16B, - exp: []byte{0x4a, 0x1d, 0x22, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x1d, 0x22, 0x4e}, }, { // mvn is an alias of NOT: https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MVN--Bitwise-NOT--vector---an-alias-of-NOT-?lang=en @@ -1458,7 +1446,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: NOT, arr: VectorArrangement16B, - exp: []byte{0x4a, 0x58, 0x20, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x58, 0x20, 0x6e}, }, { name: "fneg v10.2d, v2.2d", @@ -1466,7 +1454,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: VFNEG, arr: VectorArrangement2D, - exp: []byte{0x4a, 0xf8, 0xe0, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xf8, 0xe0, 0x6e}, }, { name: "fneg v10.4s, v2.4s", @@ -1474,14 +1462,14 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, inst: VFNEG, arr: VectorArrangement4S, - exp: []byte{0x4a, 0xf8, 0xa0, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xf8, 0xa0, 0x6e}, }, { x1: RegV2, x2: RegV10, name: "sshl v10.2d, v10.2d, v2.2d", inst: SSHL, - exp: []byte{0x4a, 0x45, 0xe2, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x45, 0xe2, 0x4e}, arr: VectorArrangement2D, }, { @@ -1489,7 +1477,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV30, name: "sshl v30.4s, v30.4s, v25.4s", inst: SSHL, - exp: []byte{0xde, 0x47, 0xb9, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x47, 0xb9, 0x4e}, arr: VectorArrangement4S, }, { @@ -1497,7 +1485,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV10, name: "ushl v10.8h, v10.8h, v2.8h", inst: USHL, - exp: []byte{0x4a, 0x45, 0x62, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x45, 0x62, 0x6e}, arr: VectorArrangement8H, }, { @@ -1505,9 +1493,153 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { x2: RegV30, name: "ushl v30.16b, v30.16b, v25.16b", inst: USHL, - exp: []byte{0xde, 0x47, 0x39, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x47, 0x39, 0x6e}, arr: VectorArrangement16B, }, + { + x1: RegV25, + x2: RegV30, + name: "fabs v30.4s, v25.4s", + inst: VFABS, + exp: []byte{0x3e, 0xfb, 0xa0, 0x4e}, + arr: VectorArrangement4S, + }, + { + x1: RegV25, + x2: RegV30, + name: "fabs v30.2s, v25.2s", + inst: VFABS, + exp: []byte{0x3e, 0xfb, 0xa0, 0xe}, + arr: VectorArrangement2S, + }, + { + x1: RegV25, + x2: RegV30, + name: "fabs v30.2d, v25.2d", + inst: VFABS, + exp: []byte{0x3e, 0xfb, 0xe0, 0x4e}, + arr: VectorArrangement2D, + }, + { + x1: RegV25, + x2: RegV30, + name: "fsqrt v30.4s, v25.4s", + inst: VFSQRT, + exp: []byte{0x3e, 0xfb, 0xa1, 0x6e}, + arr: VectorArrangement4S, + }, + { + x1: RegV25, + x2: RegV30, + name: "fsqrt v30.2s, v25.2s", + inst: VFSQRT, + exp: []byte{0x3e, 0xfb, 0xa1, 0x2e}, + arr: VectorArrangement2S, + }, + { + x1: RegV25, + x2: RegV30, + name: "fsqrt v30.2d, v25.2d", + inst: VFSQRT, + exp: []byte{0x3e, 0xfb, 0xe1, 0x6e}, + arr: VectorArrangement2D, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintm v30.4s, v25.4s", + inst: VFRINTM, + exp: []byte{0x3e, 0x9b, 0x21, 0x4e}, + arr: VectorArrangement4S, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintm v30.2s, v25.2s", + inst: VFRINTM, + exp: []byte{0x3e, 0x9b, 0x21, 0xe}, + arr: VectorArrangement2S, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintm v30.2d, v25.2d", + inst: VFRINTM, + exp: []byte{0x3e, 0x9b, 0x61, 0x4e}, + arr: VectorArrangement2D, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintn v30.4s, v25.4s", + inst: VFRINTN, + exp: []byte{0x3e, 0x8b, 0x21, 0x4e}, + arr: VectorArrangement4S, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintn v30.2s, v25.2s", + inst: VFRINTN, + exp: []byte{0x3e, 0x8b, 0x21, 0xe}, + arr: VectorArrangement2S, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintn v30.2d, v25.2d", + inst: VFRINTN, + exp: []byte{0x3e, 0x8b, 0x61, 0x4e}, + arr: VectorArrangement2D, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintp v30.4s, v25.4s", + inst: VFRINTP, + exp: []byte{0x3e, 0x8b, 0xa1, 0x4e}, + arr: VectorArrangement4S, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintp v30.2s, v25.2s", + inst: VFRINTP, + exp: []byte{0x3e, 0x8b, 0xa1, 0xe}, + arr: VectorArrangement2S, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintp v30.2d, v25.2d", + inst: VFRINTP, + exp: []byte{0x3e, 0x8b, 0xe1, 0x4e}, + arr: VectorArrangement2D, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintp v30.4s, v25.4s", + inst: VFRINTN, + exp: []byte{0x3e, 0x8b, 0x21, 0x4e}, + arr: VectorArrangement4S, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintp v30.2s, v25.2s", + inst: VFRINTN, + exp: []byte{0x3e, 0x8b, 0x21, 0xe}, + arr: VectorArrangement2S, + }, + { + x1: RegV25, + x2: RegV30, + name: "frintp v30.2d, v25.2d", + inst: VFRINTN, + exp: []byte{0x3e, 0x8b, 0x61, 0x4e}, + arr: VectorArrangement2D, + }, } for _, tt := range tests { @@ -1524,9 +1656,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToVectorRegister(t *testing.T) { DstVectorIndex: tc.dstIndex, }) require.NoError(t, err) - actual, err := a.Assemble() - require.NoError(t, err) - + actual := a.Buf.Bytes() require.Equal(t, tc.exp, actual, hex.EncodeToString(actual)) }) } @@ -1548,7 +1678,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToRegister(t *testing.T) { VectorArrangement: VectorArrangementB, SrcVectorIndex: 15, }, - exp: []byte{0xa, 0x3c, 0x1f, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xa, 0x3c, 0x1f, 0xe}, }, { name: "mov w10, v0.s[3]", @@ -1559,7 +1689,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToRegister(t *testing.T) { VectorArrangement: VectorArrangementS, SrcVectorIndex: 3, }, - exp: []byte{0xa, 0x3c, 0x1c, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xa, 0x3c, 0x1c, 0xe}, }, { name: "mov x5, v30.d[1]", @@ -1570,7 +1700,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToRegister(t *testing.T) { VectorArrangement: VectorArrangementD, SrcVectorIndex: 1, }, - exp: []byte{0xc5, 0x3f, 0x18, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xc5, 0x3f, 0x18, 0x4e}, }, { name: "smov w10, v0.b[0xf]", @@ -1581,7 +1711,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToRegister(t *testing.T) { VectorArrangement: VectorArrangementB, SrcVectorIndex: 15, }, - exp: []byte{0xa, 0x2c, 0x1f, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xa, 0x2c, 0x1f, 0xe}, }, { name: "smov w10, v0.b[0]", @@ -1592,7 +1722,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToRegister(t *testing.T) { VectorArrangement: VectorArrangementB, SrcVectorIndex: 0, }, - exp: []byte{0xa, 0x2c, 0x1, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xa, 0x2c, 0x1, 0xe}, }, { name: "smov w1, v30.h[7]", @@ -1603,7 +1733,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToRegister(t *testing.T) { VectorArrangement: VectorArrangementH, SrcVectorIndex: 7, }, - exp: []byte{0xc1, 0x2f, 0x1e, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xc1, 0x2f, 0x1e, 0xe}, }, { name: "smov w1, v30.h[0]", @@ -1614,7 +1744,7 @@ func TestAssemblerImpl_EncodeVectorRegisterToRegister(t *testing.T) { VectorArrangement: VectorArrangementH, SrcVectorIndex: 0, }, - exp: []byte{0xc1, 0x2f, 0x2, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xc1, 0x2f, 0x2, 0xe}, }, } @@ -1624,9 +1754,8 @@ func TestAssemblerImpl_EncodeVectorRegisterToRegister(t *testing.T) { a := NewAssemblerImpl(asm.NilRegister) err := a.EncodeVectorRegisterToRegister(tc.n) require.NoError(t, err) - actual, err := a.Assemble() - require.NoError(t, err) + actual := a.Buf.Bytes() require.Equal(t, tc.exp, actual, hex.EncodeToString(actual)) }) } @@ -1647,7 +1776,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV10, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0x5e, 0x1d, 0xa1, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x5e, 0x1d, 0xa1, 0x4e}, }, { name: "orr v30.8b, v10.8b, v1.8b", @@ -1658,7 +1787,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV10, VectorArrangement: VectorArrangement8B, }, - exp: []byte{0x5e, 0x1d, 0xa1, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x5e, 0x1d, 0xa1, 0xe}, }, { name: "bsl v0.8b, v15.8b, v1.8b", @@ -1669,7 +1798,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV15, VectorArrangement: VectorArrangement8B, }, - exp: []byte{0xe0, 0x1d, 0x61, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x1d, 0x61, 0x2e}, }, { name: "zip1 v0.4s, v15.4s, v1.4s", @@ -1680,7 +1809,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV15, VectorArrangement: VectorArrangement4S, }, - exp: []byte{0xe0, 0x39, 0x81, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x39, 0x81, 0x4e}, }, { name: "zip1 v0.2d, v15.2d, v1.2d", @@ -1691,7 +1820,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV15, VectorArrangement: VectorArrangement2D, }, - exp: []byte{0xe0, 0x39, 0xc1, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x39, 0xc1, 0x4e}, }, { name: "ext v0.16b, v15.16b, v1.16b, #0xf", @@ -1703,7 +1832,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcConst: 0xf, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xe0, 0x79, 0x1, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x79, 0x1, 0x6e}, }, { name: "ext v0.16b, v15.16b, v1.16b, #8", @@ -1715,7 +1844,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcConst: 8, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xe0, 0x41, 0x1, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x41, 0x1, 0x6e}, }, { name: "ext v0.16b, v15.16b, v1.16b, #0", @@ -1727,7 +1856,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcConst: 0, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xe0, 0x1, 0x1, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x1, 0x1, 0x6e}, }, { name: "ext v0.8b, v15.8b, v1.8b, #7", @@ -1739,7 +1868,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcConst: 7, VectorArrangement: VectorArrangement8B, }, - exp: []byte{0xe0, 0x39, 0x1, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x39, 0x1, 0x2e}, }, { name: "cmeq v0.8b, v15.8b, v1.8b", @@ -1750,7 +1879,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV15, VectorArrangement: VectorArrangement8B, }, - exp: []byte{0xe0, 0x8d, 0x21, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x8d, 0x21, 0x2e}, }, { name: "cmgt v0.16b, v15.16b, v1.16b", @@ -1761,7 +1890,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV15, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xe0, 0x35, 0x21, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x35, 0x21, 0x4e}, }, { name: "cmhi v0.8h, v15.8h, v1.8h", @@ -1772,7 +1901,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV15, VectorArrangement: VectorArrangement8H, }, - exp: []byte{0xe0, 0x35, 0x61, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x35, 0x61, 0x6e}, }, { name: "cmhi v0.4h, v15.4h, v1.4h", @@ -1783,7 +1912,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV15, VectorArrangement: VectorArrangement4H, }, - exp: []byte{0xe0, 0x35, 0x61, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x35, 0x61, 0x2e}, }, { name: "cmge v0.4s, v15.4s, v1.4s", @@ -1794,7 +1923,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV15, VectorArrangement: VectorArrangement4S, }, - exp: []byte{0xe0, 0x3d, 0xa1, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x3d, 0xa1, 0x4e}, }, { name: "cmge v0.2s, v15.2s, v1.2s", @@ -1805,7 +1934,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV15, VectorArrangement: VectorArrangement2S, }, - exp: []byte{0xe0, 0x3d, 0xa1, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xe0, 0x3d, 0xa1, 0xe}, }, { name: "cmhs v30.2d, v4.2d, v11.2d", @@ -1816,7 +1945,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement2D, }, - exp: []byte{0x9e, 0x3c, 0xeb, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0x3c, 0xeb, 0x6e}, }, { name: "fcmeq v30.2d, v4.2d, v11.2d", @@ -1827,7 +1956,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement2D, }, - exp: []byte{0x9e, 0xe4, 0x6b, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0xe4, 0x6b, 0x4e}, }, { name: "fcmeq v30.4s, v4.4s, v11.4s", @@ -1838,7 +1967,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement4S, }, - exp: []byte{0x9e, 0xe4, 0x2b, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0xe4, 0x2b, 0x4e}, }, { name: "fcmeq v30.2s, v4.2s, v11.2s", @@ -1849,7 +1978,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement2S, }, - exp: []byte{0x9e, 0xe4, 0x2b, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0xe4, 0x2b, 0xe}, }, { name: "fcmgt v30.2d, v4.2d, v11.2d", @@ -1860,7 +1989,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement2D, }, - exp: []byte{0x9e, 0xe4, 0xeb, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0xe4, 0xeb, 0x6e}, }, { name: "fcmgt v30.4s, v4.4s, v11.4s", @@ -1871,7 +2000,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement4S, }, - exp: []byte{0x9e, 0xe4, 0xab, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0xe4, 0xab, 0x6e}, }, { name: "fcmgt v30.2s, v4.2s, v11.2s", @@ -1882,7 +2011,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement2S, }, - exp: []byte{0x9e, 0xe4, 0xab, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0xe4, 0xab, 0x2e}, }, { name: "fcmge v30.2d, v4.2d, v11.2d", @@ -1893,7 +2022,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement2D, }, - exp: []byte{0x9e, 0xe4, 0x6b, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0xe4, 0x6b, 0x6e}, }, { name: "fcmge v30.4s, v4.4s, v11.4s", @@ -1904,7 +2033,7 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement4S, }, - exp: []byte{0x9e, 0xe4, 0x2b, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0xe4, 0x2b, 0x6e}, }, { name: "fcmge v30.2s, v4.2s, v11.2s", @@ -1915,7 +2044,139 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { SrcReg2: RegV4, VectorArrangement: VectorArrangement2S, }, - exp: []byte{0x9e, 0xe4, 0x2b, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x9e, 0xe4, 0x2b, 0x2e}, + }, + { + name: "fdiv v30.4s, v4.4s, v11.4s", + n: &NodeImpl{ + Instruction: VFDIV, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement4S, + }, + exp: []byte{0x9e, 0xfc, 0x2b, 0x6e}, + }, + { + name: "fdiv v30.2s, v4.2s, v11.2s", + n: &NodeImpl{ + Instruction: VFDIV, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement2S, + }, + exp: []byte{0x9e, 0xfc, 0x2b, 0x2e}, + }, + { + name: "fdiv v30.2d, v4.2d, v11.2d", + n: &NodeImpl{ + Instruction: VFDIV, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement2D, + }, + exp: []byte{0x9e, 0xfc, 0x6b, 0x6e}, + }, + { + name: "fmul v30.4s, v4.4s, v11.4s", + n: &NodeImpl{ + Instruction: VFMUL, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement4S, + }, + exp: []byte{0x9e, 0xdc, 0x2b, 0x6e}, + }, + { + name: "fmul v30.2s, v4.2s, v11.2s", + n: &NodeImpl{ + Instruction: VFMUL, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement2S, + }, + exp: []byte{0x9e, 0xdc, 0x2b, 0x2e}, + }, + { + name: "fmul v30.2d, v4.2d, v11.2d", + n: &NodeImpl{ + Instruction: VFMUL, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement2D, + }, + exp: []byte{0x9e, 0xdc, 0x6b, 0x6e}, + }, + { + name: "fmin v30.4s, v4.4s, v11.4s", + n: &NodeImpl{ + Instruction: VFMIN, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement4S, + }, + exp: []byte{0x9e, 0xf4, 0xab, 0x4e}, + }, + { + name: "fmin v30.2s, v4.2s, v11.2s", + n: &NodeImpl{ + Instruction: VFMIN, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement2S, + }, + exp: []byte{0x9e, 0xf4, 0xab, 0xe}, + }, + { + name: "fmin v30.2d, v4.2d, v11.2d", + n: &NodeImpl{ + Instruction: VFMIN, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement2D, + }, + exp: []byte{0x9e, 0xf4, 0xeb, 0x4e}, + }, + { + name: "fmax v30.4s, v4.4s, v11.4s", + n: &NodeImpl{ + Instruction: VFMAX, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement4S, + }, + exp: []byte{0x9e, 0xf4, 0x2b, 0x4e}, + }, + { + name: "fmax v30.2s, v4.2s, v11.2s", + n: &NodeImpl{ + Instruction: VFMAX, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement2S, + }, + exp: []byte{0x9e, 0xf4, 0x2b, 0xe}, + }, + { + name: "fmax v30.2d, v4.2d, v11.2d", + n: &NodeImpl{ + Instruction: VFMAX, + DstReg: RegV30, + SrcReg: RegV11, + SrcReg2: RegV4, + VectorArrangement: VectorArrangement2D, + }, + exp: []byte{0x9e, 0xf4, 0x6b, 0x4e}, }, } @@ -1925,9 +2186,8 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) { a := NewAssemblerImpl(asm.NilRegister) err := a.encodeTwoVectorRegistersToVectorRegister(tc.n) require.NoError(t, err) - actual, err := a.Assemble() - require.NoError(t, err) + actual := a.Buf.Bytes() require.Equal(t, tc.exp, actual, hex.EncodeToString(actual)) }) } @@ -1947,7 +2207,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 1, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0x3, 0x0, 0x12, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x3, 0x0, 0x12}, }, { name: "and w30, w30, #7", @@ -1957,7 +2217,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 0x7, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0xb, 0x0, 0x12, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0xb, 0x0, 0x12}, }, { name: "and w30, w30, #0xf", @@ -1967,7 +2227,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 0xf, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0xf, 0x0, 0x12, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0xf, 0x0, 0x12}, }, { name: "and w30, w30, #0x1f", @@ -1977,7 +2237,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 0x1f, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0x13, 0x0, 0x12, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x13, 0x0, 0x12}, }, { name: "and w30, w30, #0x3f", @@ -1987,7 +2247,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 0x3f, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0x17, 0x0, 0x12, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x17, 0x0, 0x12}, }, { name: "and x30, x30, #1", @@ -1997,7 +2257,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 1, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0x3, 0x40, 0x92, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x3, 0x40, 0x92}, }, { name: "and x30, x30, #7", @@ -2007,7 +2267,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 0x7, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0xb, 0x40, 0x92, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0xb, 0x40, 0x92}, }, { name: "and x30, x30, #0xf", @@ -2017,7 +2277,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 0xf, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0xf, 0x40, 0x92, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0xf, 0x40, 0x92}, }, { name: "and x30, x30, #0x1f", @@ -2027,7 +2287,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 0x1f, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0x13, 0x40, 0x92, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x13, 0x40, 0x92}, }, { name: "and x30, x30, #0x3f", @@ -2037,7 +2297,7 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { SrcConst: 0x3f, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0xde, 0x17, 0x40, 0x92, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xde, 0x17, 0x40, 0x92}, }, } @@ -2047,9 +2307,8 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { a := NewAssemblerImpl(asm.NilRegister) err := a.EncodeConstToRegister(tc.n) require.NoError(t, err) - actual, err := a.Assemble() - require.NoError(t, err) + actual := a.Buf.Bytes() require.Equal(t, tc.exp, actual, hex.EncodeToString(actual)) }) } @@ -2070,7 +2329,7 @@ func TestAssemblerImpl_EncodeRegisterToVectorRegister(t *testing.T) { SrcReg: RegR10, VectorArrangement: VectorArrangementD, }, - exp: []byte{0x4a, 0x1d, 0x8, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x1d, 0x8, 0x4e}, }, { name: "ins v10.d[1], x10", @@ -2081,7 +2340,7 @@ func TestAssemblerImpl_EncodeRegisterToVectorRegister(t *testing.T) { VectorArrangement: VectorArrangementD, DstVectorIndex: 1, }, - exp: []byte{0x4a, 0x1d, 0x18, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0x1d, 0x18, 0x4e}, }, { name: "dup v10.2d, x10", @@ -2091,7 +2350,7 @@ func TestAssemblerImpl_EncodeRegisterToVectorRegister(t *testing.T) { DstReg: RegV10, VectorArrangement: VectorArrangement2D, }, - exp: []byte{0x4a, 0xd, 0x8, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x4a, 0xd, 0x8, 0x4e}, }, { name: "dup v1.4s, w30", @@ -2101,7 +2360,7 @@ func TestAssemblerImpl_EncodeRegisterToVectorRegister(t *testing.T) { DstReg: RegV1, VectorArrangement: VectorArrangement4S, }, - exp: []byte{0xc1, 0xf, 0x4, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0xc1, 0xf, 0x4, 0x4e}, }, { name: "dup v30.8h, w1", @@ -2111,7 +2370,7 @@ func TestAssemblerImpl_EncodeRegisterToVectorRegister(t *testing.T) { DstReg: RegV30, VectorArrangement: VectorArrangement8H, }, - exp: []byte{0x3e, 0xc, 0x2, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x3e, 0xc, 0x2, 0x4e}, }, { name: "dup v30.16b, w1", @@ -2121,7 +2380,7 @@ func TestAssemblerImpl_EncodeRegisterToVectorRegister(t *testing.T) { DstReg: RegV30, VectorArrangement: VectorArrangement16B, }, - exp: []byte{0x3e, 0xc, 0x1, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + exp: []byte{0x3e, 0xc, 0x1, 0x4e}, }, } @@ -2131,9 +2390,8 @@ func TestAssemblerImpl_EncodeRegisterToVectorRegister(t *testing.T) { a := NewAssemblerImpl(asm.NilRegister) err := a.EncodeRegisterToVectorRegister(tc.n) require.NoError(t, err) - actual, err := a.Assemble() - require.NoError(t, err) + actual := a.Buf.Bytes() require.Equal(t, tc.exp, actual, hex.EncodeToString(actual)) }) } diff --git a/internal/engine/compiler/compiler_vec_test.go b/internal/engine/compiler/compiler_vec_test.go index 2aaa90ad70..3ff2d34db9 100644 --- a/internal/engine/compiler/compiler_vec_test.go +++ b/internal/engine/compiler/compiler_vec_test.go @@ -3473,10 +3473,6 @@ func TestCompiler_compileV128AvgrU(t *testing.T) { } func TestCompiler_compileV128Sqrt(t *testing.T) { - if runtime.GOARCH != "amd64" { - // TODO: implement on amd64. - t.Skip() - } tests := []struct { name string @@ -3843,11 +3839,6 @@ func TestCompiler_compileV128Abs(t *testing.T) { } func TestCompiler_compileV128Div(t *testing.T) { - if runtime.GOARCH != "amd64" { - // TODO: implement on amd64. - t.Skip() - } - tests := []struct { name string shape wazeroir.Shape @@ -4635,11 +4626,6 @@ func TestCompiler_compileV128Popcnt(t *testing.T) { } func TestCompiler_compileV128Round(t *testing.T) { - if runtime.GOARCH != "amd64" { - // TODO: implement on amd64. - t.Skip() - } - tests := []struct { name string shape wazeroir.Shape diff --git a/internal/engine/compiler/impl_vec_arm64.go b/internal/engine/compiler/impl_vec_arm64.go index f7a9a97a64..94d655f927 100644 --- a/internal/engine/compiler/impl_vec_arm64.go +++ b/internal/engine/compiler/impl_vec_arm64.go @@ -628,7 +628,7 @@ func (c *arm64Compiler) compileV128AllTrue(o *wazeroir.OperationV128AllTrue) (er c.assembler.CompileVectorRegisterToVectorRegister(arm64.CMEQZERO, arm64.RegRZR, v, arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDP, v, v, - arm64.VectorArrangementNone, arm64.VectorIndexNone, arm64.VectorIndexNone) + arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileTwoRegistersToNone(arm64.FCMPD, v, v) c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ) } else { @@ -1023,27 +1023,83 @@ func (c *arm64Compiler) compileV128SubSat(o *wazeroir.OperationV128SubSat) error // compileV128Mul implements compiler.compileV128Mul for arm64. func (c *arm64Compiler) compileV128Mul(o *wazeroir.OperationV128Mul) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var arr arm64.VectorArrangement + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeF32x4: + arr = arm64.VectorArrangement4S + inst = arm64.VFMUL + case wazeroir.ShapeF64x2: + arr = arm64.VectorArrangement2D + inst = arm64.VFMUL + default: + // TODO: support for integer mul. + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + } + return c.compileV128x2BinOp(inst, arr) } // compileV128Div implements compiler.compileV128Div for arm64. func (c *arm64Compiler) compileV128Div(o *wazeroir.OperationV128Div) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var arr arm64.VectorArrangement + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeF32x4: + arr = arm64.VectorArrangement4S + inst = arm64.VFDIV + case wazeroir.ShapeF64x2: + arr = arm64.VectorArrangement2D + inst = arm64.VFDIV + } + return c.compileV128x2BinOp(inst, arr) } // compileV128Neg implements compiler.compileV128Neg for arm64. func (c *arm64Compiler) compileV128Neg(o *wazeroir.OperationV128Neg) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var arr arm64.VectorArrangement + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeF32x4: + arr = arm64.VectorArrangement4S + inst = arm64.VFNEG + case wazeroir.ShapeF64x2: + arr = arm64.VectorArrangement2D + inst = arm64.VFNEG + default: + // TODO: support for integer neg. + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + } + return c.compileV128UniOp(inst, arr) } // compileV128Sqrt implements compiler.compileV128Sqrt for arm64. func (c *arm64Compiler) compileV128Sqrt(o *wazeroir.OperationV128Sqrt) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var arr arm64.VectorArrangement + switch o.Shape { + case wazeroir.ShapeF32x4: + arr = arm64.VectorArrangement4S + case wazeroir.ShapeF64x2: + arr = arm64.VectorArrangement2D + } + return c.compileV128UniOp(arm64.VFSQRT, arr) } // compileV128Abs implements compiler.compileV128Abs for arm64. func (c *arm64Compiler) compileV128Abs(o *wazeroir.OperationV128Abs) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var inst asm.Instruction + var arr arm64.VectorArrangement + switch o.Shape { + case wazeroir.ShapeF32x4: + inst = arm64.VFABS + arr = arm64.VectorArrangement4S + case wazeroir.ShapeF64x2: + inst = arm64.VFABS + arr = arm64.VectorArrangement2D + default: + // TODO: support for integer abs. + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + } + return c.compileV128UniOp(inst, arr) } // compileV128Popcnt implements compiler.compileV128Popcnt for arm64. @@ -1053,12 +1109,38 @@ func (c *arm64Compiler) compileV128Popcnt(o *wazeroir.OperationV128Popcnt) error // compileV128Min implements compiler.compileV128Min for arm64. func (c *arm64Compiler) compileV128Min(o *wazeroir.OperationV128Min) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var inst asm.Instruction + var arr arm64.VectorArrangement + switch o.Shape { + case wazeroir.ShapeF32x4: + inst = arm64.VFMIN + arr = arm64.VectorArrangement4S + case wazeroir.ShapeF64x2: + inst = arm64.VFMIN + arr = arm64.VectorArrangement2D + default: + // TODO: support for integer min. + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + } + return c.compileV128x2BinOp(inst, arr) } // compileV128Max implements compiler.compileV128Max for arm64. func (c *arm64Compiler) compileV128Max(o *wazeroir.OperationV128Max) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var inst asm.Instruction + var arr arm64.VectorArrangement + switch o.Shape { + case wazeroir.ShapeF32x4: + inst = arm64.VFMAX + arr = arm64.VectorArrangement4S + case wazeroir.ShapeF64x2: + inst = arm64.VFMAX + arr = arm64.VectorArrangement2D + default: + // TODO: support for integer max. + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + } + return c.compileV128x2BinOp(inst, arr) } // compileV128AvgrU implements compiler.compileV128AvgrU for arm64. @@ -1078,22 +1160,50 @@ func (c *arm64Compiler) compileV128Pmax(o *wazeroir.OperationV128Pmax) error { // compileV128Ceil implements compiler.compileV128Ceil for arm64. func (c *arm64Compiler) compileV128Ceil(o *wazeroir.OperationV128Ceil) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var arr arm64.VectorArrangement + switch o.Shape { + case wazeroir.ShapeF32x4: + arr = arm64.VectorArrangement4S + case wazeroir.ShapeF64x2: + arr = arm64.VectorArrangement2D + } + return c.compileV128UniOp(arm64.VFRINTP, arr) } // compileV128Floor implements compiler.compileV128Floor for arm64. func (c *arm64Compiler) compileV128Floor(o *wazeroir.OperationV128Floor) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var arr arm64.VectorArrangement + switch o.Shape { + case wazeroir.ShapeF32x4: + arr = arm64.VectorArrangement4S + case wazeroir.ShapeF64x2: + arr = arm64.VectorArrangement2D + } + return c.compileV128UniOp(arm64.VFRINTM, arr) } // compileV128Trunc implements compiler.compileV128Trunc for arm64. func (c *arm64Compiler) compileV128Trunc(o *wazeroir.OperationV128Trunc) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var arr arm64.VectorArrangement + switch o.Shape { + case wazeroir.ShapeF32x4: + arr = arm64.VectorArrangement4S + case wazeroir.ShapeF64x2: + arr = arm64.VectorArrangement2D + } + return c.compileV128UniOp(arm64.VFRINTZ, arr) } // compileV128Nearest implements compiler.compileV128Nearest for arm64. func (c *arm64Compiler) compileV128Nearest(o *wazeroir.OperationV128Nearest) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + var arr arm64.VectorArrangement + switch o.Shape { + case wazeroir.ShapeF32x4: + arr = arm64.VectorArrangement4S + case wazeroir.ShapeF64x2: + arr = arm64.VectorArrangement2D + } + return c.compileV128UniOp(arm64.VFRINTN, arr) } // compileV128Extend implements compiler.compileV128Extend for arm64. diff --git a/internal/integration_test/spectest/v2/spec_test.go b/internal/integration_test/spectest/v2/spec_test.go index 68a8ae7513..808a538aa1 100644 --- a/internal/integration_test/spectest/v2/spec_test.go +++ b/internal/integration_test/spectest/v2/spec_test.go @@ -26,11 +26,9 @@ func TestCompiler(t *testing.T) { spectest.Run(t, testcases, compiler.NewEngine, enabledFeatures, func(jsonname string) bool { switch path.Base(jsonname) { - case "simd_f32x4_arith.json", "simd_f64x2_arith.json", - "simd_i16x8_arith.json", "simd_i64x2_arith.json", "simd_i32x4_arith.json", "simd_i8x16_arith.json", + case "simd_i16x8_arith.json", "simd_i64x2_arith.json", "simd_i32x4_arith.json", "simd_i8x16_arith.json", "simd_i16x8_sat_arith.json", "simd_i8x16_sat_arith.json", "simd_i16x8_arith2.json", "simd_i8x16_arith2.json", "simd_i32x4_arith2.json", "simd_i64x2_arith2.json", - "simd_f64x2.json", "simd_f32x4.json", "simd_f32x4_rounding.json", "simd_f64x2_rounding.json", "simd_f64x2_pmin_pmax.json", "simd_f32x4_pmin_pmax.json", "simd_int_to_int_extend.json", "simd_i64x2_extmul_i32x4.json", "simd_i32x4_extmul_i16x8.json", "simd_i16x8_extmul_i8x16.json", "simd_i16x8_q15mulr_sat_s.json", "simd_i16x8_extadd_pairwise_i8x16.json", "simd_i32x4_extadd_pairwise_i16x8.json",