From 0514b8c55b3162169b75a371417b7ce9b8dd5068 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Fri, 7 Jan 2022 10:58:41 -0800 Subject: [PATCH] cranelift: port `sshr` to ISLE on x64 --- .../lower/isle/generated_code.manifest | 2 +- .../isa/aarch64/lower/isle/generated_code.rs | 19 +- cranelift/codegen/src/isa/x64/inst.isle | 100 +- cranelift/codegen/src/isa/x64/lower.isle | 132 +- cranelift/codegen/src/isa/x64/lower.rs | 183 +- cranelift/codegen/src/isa/x64/lower/isle.rs | 54 +- .../x64/lower/isle/generated_code.manifest | 6 +- .../src/isa/x64/lower/isle/generated_code.rs | 883 +++++--- cranelift/codegen/src/machinst/isle.rs | 25 +- cranelift/codegen/src/prelude.isle | 19 +- .../filetests/filetests/isa/x64/i128.clif | 1885 ++++++++--------- .../isa/x64/simd-bitwise-compile.clif | 78 +- cranelift/filetests/src/runner.rs | 2 +- 13 files changed, 1793 insertions(+), 1595 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest index e71f6812677a..f754f379a87d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle f176ef3bba99365 -src/prelude.isle babc931e5dc5b4cf +src/prelude.isle 3434bc353bb711d5 src/isa/aarch64/inst.isle 5fa80451697b084f src/isa/aarch64/lower.isle 2d2e1e076a0c8a23 diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs index 6e48ebb0e13d..472281c84e81 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs @@ -20,6 +20,8 @@ pub trait Context { fn pack_value_array_2(&mut self, arg0: Value, arg1: Value) -> ValueArray2; fn unpack_value_array_3(&mut self, arg0: &ValueArray3) -> (Value, Value, Value); fn pack_value_array_3(&mut self, arg0: Value, arg1: Value, arg2: Value) -> ValueArray3; + fn u32_add(&mut self, arg0: u32, arg1: u32) -> u32; + fn u8_and(&mut self, arg0: u8, arg1: u8) -> u8; fn value_reg(&mut self, arg0: Reg) -> ValueRegs; fn value_regs(&mut self, arg0: Reg, arg1: Reg) -> ValueRegs; fn temp_writable_reg(&mut self, arg0: Type) -> WritableReg; @@ -32,6 +34,7 @@ pub trait Context { fn u32_as_u64(&mut self, arg0: u32) -> u64; fn ty_bits(&mut self, arg0: Type) -> u8; fn ty_bits_u16(&mut self, arg0: Type) -> u16; + fn lane_type(&mut self, arg0: Type) -> Type; fn fits_in_16(&mut self, arg0: Type) -> Option; fn fits_in_32(&mut self, arg0: Type) -> Option; fn fits_in_64(&mut self, arg0: Type) -> Option; @@ -52,9 +55,9 @@ pub trait Context { fn first_result(&mut self, arg0: Inst) -> Option; fn inst_data(&mut self, arg0: Inst) -> InstructionData; fn value_type(&mut self, arg0: Value) -> Type; - fn ty_bits_mask(&mut self, arg0: Type) -> u64; fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>; fn def_inst(&mut self, arg0: Value) -> Option; + fn const_value(&mut self, arg0: Value) -> Option; fn trap_code_division_by_zero(&mut self) -> TrapCode; fn trap_code_integer_overflow(&mut self) -> TrapCode; fn move_wide_const_from_u64(&mut self, arg0: u64) -> Option; @@ -89,13 +92,13 @@ pub trait Context { fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift; } -/// Internal type ProducesFlags: defined at src/prelude.isle line 263. +/// Internal type ProducesFlags: defined at src/prelude.isle line 272. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlags { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 266. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 275. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlags { inst: MInst, result: Reg }, @@ -975,7 +978,7 @@ pub enum AtomicRMWOp { // Generated as internal constructor for term temp_reg. pub fn constructor_temp_reg(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; - // Rule at src/prelude.isle line 60. + // Rule at src/prelude.isle line 66. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); return Some(expr1_0); @@ -984,7 +987,7 @@ pub fn constructor_temp_reg(ctx: &mut C, arg0: Type) -> Option // Generated as internal constructor for term lo_reg. pub fn constructor_lo_reg(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/prelude.isle line 95. + // Rule at src/prelude.isle line 101. let expr0_0 = C::put_in_regs(ctx, pattern0_0); let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -1009,7 +1012,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 276. + // Rule at src/prelude.isle line 285. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -1037,7 +1040,7 @@ pub fn constructor_with_flags_1( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 284. + // Rule at src/prelude.isle line 293. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); return Some(pattern3_1); @@ -1071,7 +1074,7 @@ pub fn constructor_with_flags_2( result: pattern5_1, } = pattern4_0 { - // Rule at src/prelude.isle line 294. + // Rule at src/prelude.isle line 303. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern5_0); let expr2_0 = C::emit(ctx, &pattern3_0); diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 8f94248d764c..9be471a7e288 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -35,6 +35,7 @@ (dst WritableReg) (imm u8) (size OperandSize)) + (XmmUninitializedValue (dst WritableReg)) (CmpRmiR (size OperandSize) (opcode CmpOpcode) (src RegMemImm) @@ -292,6 +293,15 @@ (Mem (addr SyntheticAmode)) (Imm (simm32 u32)))) +;; Put the given clif value into a `RegMemImm` operand. +;; +;; Asserts that the value fits into a single register, and doesn't require +;; multiple registers for its representation (like `i128` for example). +;; +;; As a side effect, this marks the value as used. +(decl put_in_reg_mem_imm (Value) RegMemImm) +(extern constructor put_in_reg_mem_imm put_in_reg_mem_imm) + (type RegMem extern (enum (Reg (reg Reg)) @@ -319,6 +329,18 @@ (enum (Imm8 (imm u8)) (Reg (reg Reg)))) +;; Put the given clif value into a `Imm8Reg` operand, masked to the bit width of +;; the given type. +;; +;; Asserts that the value fits into a single register, and doesn't require +;; multiple registers for its representation (like `i128` for example). +;; +;; As a side effect, this marks the value as used. +;; +;; This is used when lowering various shifts and rotates. +(decl put_masked_in_imm8_reg (Value Type) Imm8Reg) +(extern constructor put_masked_in_imm8_reg put_masked_in_imm8_reg) + (type CC extern (enum O NO @@ -383,9 +405,12 @@ (decl imm8_from_value (Imm8Reg) Value) (extern extractor imm8_from_value imm8_from_value) -;; Mask an `Imm8Reg.Imm8`. -(decl mask_imm8_const (Imm8Reg u64) Imm8Reg) -(extern constructor mask_imm8_const mask_imm8_const) +;; Mask a constant to the bit-width of the given type and package it into an +;; `Imm8Reg.Imm8`. This is used for shifts and rotates, so that we don't try and +;; shift/rotate more bits than the type has available, per Cranelift's +;; semantics. +(decl const_to_type_masked_imm8 (u64 Type) Imm8Reg) +(extern constructor const_to_type_masked_imm8 const_to_type_masked_imm8) ;; Extract a constant `RegMemImm.Imm` from a value operand. (decl simm32_from_value (RegMemImm) Value) @@ -494,6 +519,37 @@ wr)))) r)) +;; Helper for creating an SSE register holding an `i64x2` from two `i64` values. +(decl make_i64x2_from_lanes (RegMem RegMem) Reg) +(rule (make_i64x2_from_lanes lo hi) + (let ((dst_w WritableReg (temp_writable_reg $I64X2)) + (dst_r Reg (writable_reg_to_reg dst_w)) + (_0 Unit (emit (MInst.XmmUninitializedValue dst_w))) + (_1 Unit (emit (MInst.XmmRmRImm (SseOpcode.Pinsrd) + dst_r + lo + dst_w + 0 + (OperandSize.Size64)))) + (_2 Unit (emit (MInst.XmmRmRImm (SseOpcode.Pinsrd) + dst_r + hi + dst_w + 1 + (OperandSize.Size64))))) + dst_r)) + +;; Move a `RegMemImm.Reg` operand to an XMM register, if necessary. +(decl reg_mem_imm_to_xmm (RegMemImm) RegMemImm) +(rule (reg_mem_imm_to_xmm rmi @ (RegMemImm.Mem _)) rmi) +(rule (reg_mem_imm_to_xmm rmi @ (RegMemImm.Imm _)) rmi) +(rule (reg_mem_imm_to_xmm (RegMemImm.Reg r)) + (RegMemImm.Reg (gpr_to_xmm $I8X16 + (SseOpcode.Movd) + (RegMem.Reg r) + (OperandSize.Size32)))) + + ;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; These constructors create SSA-style `MInst`s. It is their responsibility to @@ -1058,6 +1114,21 @@ (rule (pminud src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pminud) src1 src2)) +;; Helper for creating `punpcklbw` instructions. +(decl punpcklbw (Reg RegMem) Reg) +(rule (punpcklbw src1 src2) + (xmm_rm_r $I8X16 (SseOpcode.Punpcklbw) src1 src2)) + +;; Helper for creating `punpckhbw` instructions. +(decl punpckhbw (Reg RegMem) Reg) +(rule (punpckhbw src1 src2) + (xmm_rm_r $I8X16 (SseOpcode.Punpckhbw) src1 src2)) + +;; Helper for creating `packsswb` instructions. +(decl packsswb (Reg RegMem) Reg) +(rule (packsswb src1 src2) + (xmm_rm_r $I8X16 (SseOpcode.Packsswb) src1 src2)) + ;; Helper for creating `MInst.XmmRmRImm` instructions. (decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg) (rule (xmm_rm_r_imm op src1 src2 imm size) @@ -1180,6 +1251,16 @@ (rule (psrlq src1 src2) (xmm_rmi_reg (SseOpcode.Psrlq) src1 src2)) +;; Helper for creating `psraw` instructions. +(decl psraw (Reg RegMemImm) Reg) +(rule (psraw src1 src2) + (xmm_rmi_reg (SseOpcode.Psraw) src1 src2)) + +;; Helper for creating `psrad` instructions. +(decl psrad (Reg RegMemImm) Reg) +(rule (psrad src1 src2) + (xmm_rmi_reg (SseOpcode.Psrad) src1 src2)) + ;; Helper for creating `MInst.MulHi` instructions. ;; ;; Returns the (lo, hi) register halves of the multiplication. @@ -1252,6 +1333,19 @@ (rule (insertps src1 src2 lane) (xmm_rm_r_imm (SseOpcode.Insertps) src1 src2 lane (OperandSize.Size32))) +;; Helper for creating `pextrd` instructions. +(decl pextrd (Type Reg u8) Reg) +(rule (pextrd ty src lane) + (let ((w_dst WritableReg (temp_writable_reg ty)) + (r_dst Reg (writable_reg_to_reg w_dst)) + (_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pextrd) + r_dst + (RegMem.Reg src) + w_dst + lane + (operand_size_of_type_32_64 (lane_type ty)))))) + r_dst)) + ;; Helper for creating `not` instructions. (decl not (Type Reg) Reg) (rule (not ty src) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 5647ef06dd6c..d8daeb636699 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -537,13 +537,7 @@ ;; `i64` and smaller. (rule (lower (has_type (fits_in_64 ty) (ishl src amt))) - ;; NB: Only the low bits of `amt` matter since we logically mask the shift - ;; amount to the value's bit width. - (let ((amt_ Reg (lo_reg amt))) - (value_reg (shl ty (put_in_reg src) (Imm8Reg.Reg amt_))))) - -(rule (lower (has_type (fits_in_64 ty) (ishl src (imm8_from_value amt)))) - (value_reg (shl ty (put_in_reg src) amt))) + (value_reg (shl ty (put_in_reg src) (put_masked_in_imm8_reg amt ty)))) ;; `i128`. @@ -582,15 +576,8 @@ ;; `i64` and smaller. (rule (lower (has_type (fits_in_64 ty) (ushr src amt))) - (let ((src_ Reg (extend_to_reg src ty (ExtendKind.Zero))) - ;; NB: Only the low bits of `amt` matter since we logically mask the - ;; shift amount to the value's bit width. - (amt_ Reg (lo_reg amt))) - (value_reg (shr ty src_ (Imm8Reg.Reg amt_))))) - -(rule (lower (has_type (fits_in_64 ty) (ushr src (imm8_from_value amt)))) (let ((src_ Reg (extend_to_reg src ty (ExtendKind.Zero)))) - (value_reg (shr ty src_ amt)))) + (value_reg (shr ty src_ (put_masked_in_imm8_reg amt ty))))) ;; `i128`. @@ -623,6 +610,109 @@ (let ((amt_ Reg (lo_reg amt))) (shr_i128 (put_in_regs src) amt_))) +;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; `i64` and smaller. + +(rule (lower (has_type (fits_in_64 ty) (sshr src amt))) + (let ((src_ Reg (extend_to_reg src ty (ExtendKind.Sign)))) + (value_reg (sar ty src_ (put_masked_in_imm8_reg amt ty))))) + +;; `i128`. + +(decl sar_i128 (ValueRegs Reg) ValueRegs) +(rule (sar_i128 src amt) + ;; Unpack the low/high halves of `src`. + (let ((src_lo Reg (value_regs_get src 0)) + (src_hi Reg (value_regs_get src 1)) + ;; Do a shift of each half. NB: the low half uses an unsigned shift + ;; because its MSB is not a sign bit. + (lo_shifted Reg (shr $I64 src_lo (Imm8Reg.Reg amt))) + (hi_shifted Reg (sar $I64 src_hi (Imm8Reg.Reg amt))) + ;; `src_hi << (64 - amt)` are the bits to carry over from the low + ;; half to the high half. + (carry Reg (shl $I64 src_hi (Imm8Reg.Reg (sub $I64 (imm $I64 64) (RegMemImm.Reg amt))))) + ;; Nullify the carry if we are shifting by a multiple of 128. + (carry_ Reg (with_flags_1 (test (OperandSize.Size64) (RegMemImm.Imm 127) amt) + (cmove $I64 (CC.Z) (RegMem.Reg (imm $I64 0)) carry))) + ;; Add the carry into the low half. + (lo_shifted_ Reg (or $I64 lo_shifted (RegMemImm.Reg carry_))) + ;; Get all sign bits. + (sign_bits Reg (sar $I64 src_hi (Imm8Reg.Imm8 63)))) + ;; Combine the two shifted halves. However, if we are shifting by >= 64 + ;; (modulo 128), then the hi bits are all sign bits and the lo bits are + ;; what would otherwise be our hi bits. + (with_flags_2 (test (OperandSize.Size64) (RegMemImm.Imm 64) amt) + (cmove $I64 (CC.Z) (RegMem.Reg lo_shifted_) hi_shifted) + (cmove $I64 (CC.Z) (RegMem.Reg hi_shifted) sign_bits)))) + +(rule (lower (has_type $I128 (sshr src amt))) + ;; NB: Only the low bits of `amt` matter since we logically mask the shift + ;; amount to the value's bit width. + (let ((amt_ Reg (lo_reg amt))) + (sar_i128 (put_in_regs src) amt_))) + +;; SSE. + +;; Since the x86 instruction set does not have an 8x16 shift instruction and the +;; approach used for `ishl` and `ushr` cannot be easily used (the masks do not +;; preserve the sign), we use a different approach here: separate the low and +;; high lanes, shift them separately, and merge them into the final result. +;; +;; Visually, this looks like the following, where `src.i8x16 = [s0, s1, ..., +;; s15]: +;; +;; lo.i16x8 = [(s0, s0), (s1, s1), ..., (s7, s7)] +;; shifted_lo.i16x8 = shift each lane of `low` +;; hi.i16x8 = [(s8, s8), (s9, s9), ..., (s15, s15)] +;; shifted_hi.i16x8 = shift each lane of `high` +;; result = [s0'', s1'', ..., s15''] +(rule (lower (has_type $I8X16 (sshr src amt @ (value_type amt_ty)))) + (let ((src_ Reg (put_in_reg src)) + ;; In order for `packsswb` later to only use the high byte of each + ;; 16x8 lane, we shift right an extra 8 bits, relying on `psraw` to + ;; fill in the upper bits appropriately. + (amt_ RegMemImm (sshr_i8x16_bigger_shift amt_ty (put_in_reg_mem_imm amt))) + (lo Reg (punpcklbw src_ (RegMem.Reg src_))) + (shifted_lo Reg (psraw lo amt_)) + (hi Reg (punpckhbw src_ (RegMem.Reg src_))) + (shifted_hi Reg (psraw hi amt_))) + (value_reg (packsswb shifted_lo (RegMem.Reg shifted_hi))))) + +(decl sshr_i8x16_bigger_shift (Type RegMemImm) RegMemImm) +(rule (sshr_i8x16_bigger_shift _ty (RegMemImm.Imm i)) + (RegMemImm.Imm (u32_add i 8))) +(rule (sshr_i8x16_bigger_shift ty (RegMemImm.Reg r)) + (reg_mem_imm_to_xmm (RegMemImm.Reg (add ty r (RegMemImm.Imm 8))))) +(rule (sshr_i8x16_bigger_shift ty rmi @ (RegMemImm.Mem _m)) + (reg_mem_imm_to_xmm (RegMemImm.Reg (add ty (imm ty 8) rmi)))) + +;; `sshr.{i16x8,i32x4}` can be a simple `psra{w,d}`, we just have to make sure +;; that if the shift amount is in a register, it is in an XMM register. +(rule (lower (has_type $I16X8 (sshr src amt))) + (value_reg (psraw (put_in_reg src) + (reg_mem_imm_to_xmm (put_in_reg_mem_imm amt))))) +(rule (lower (has_type $I32X4 (sshr src amt))) + (value_reg (psrad (put_in_reg src) + (reg_mem_imm_to_xmm (put_in_reg_mem_imm amt))))) + +;; The `sshr.i64x2` CLIF instruction has no single x86 instruction in the older +;; feature sets. Newer ones like AVX512VL + AVX512F include `vpsraq`, a 128-bit +;; instruction that would fit here, but this backend does not currently have +;; support for EVEX encodings. To remedy this, we extract each 64-bit lane to a +;; GPR, shift each using a scalar instruction, and insert the shifted values +;; back in the `dst` XMM register. +;; +;; (TODO: when EVEX support is available, add an alternate lowering here). +(rule (lower (has_type $I64X2 (sshr src amt))) + (let ((src_ Reg (put_in_reg src)) + (lo Reg (pextrd $I64 src_ 0)) + (hi Reg (pextrd $I64 src_ 1)) + (amt_ Imm8Reg (put_masked_in_imm8_reg amt $I64)) + (shifted_lo Reg (sar $I64 lo amt_)) + (shifted_hi Reg (sar $I64 hi amt_))) + (value_reg (make_i64x2_from_lanes (RegMem.Reg shifted_lo) + (RegMem.Reg shifted_hi))))) ;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i16` and `i8`: we need to extend the shift amount, or mask the @@ -632,8 +722,10 @@ (let ((amt_ Reg (extend_to_reg amt $I32 (ExtendKind.Zero)))) (value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_))))) -(rule (lower (has_type (ty_8_or_16 ty) (rotl src (imm8_from_value amt)))) - (value_reg (m_rotl ty (put_in_reg src) (mask_imm8_const amt (ty_bits_mask ty))))) +(rule (lower (has_type (ty_8_or_16 ty) (rotl src (const_value amt)))) + (value_reg (m_rotl ty + (put_in_reg src) + (const_to_type_masked_imm8 amt ty)))) ;; `i64` and `i32`: we can rely on x86's rotate-amount masking since ;; we operate on the whole register. @@ -644,8 +736,10 @@ (let ((amt_ Reg (lo_reg amt))) (value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_))))) -(rule (lower (has_type (ty_32_or_64 ty) (rotl src (imm8_from_value amt)))) - (value_reg (m_rotl ty (put_in_reg src) amt))) +(rule (lower (has_type (ty_32_or_64 ty) (rotl src (const_value amt)))) + (value_reg (m_rotl ty + (put_in_reg src) + (const_to_type_masked_imm8 amt ty)))) ;; `i128`. diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 5b4ed7fbbd79..6fd6a995b912 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1538,13 +1538,18 @@ fn lower_insn_to_regs>( | Opcode::Umin | Opcode::Bnot | Opcode::Bitselect - | Opcode::Vselect => implemented_in_isle(ctx), + | Opcode::Vselect + | Opcode::Sshr => implemented_in_isle(ctx), - Opcode::Ishl | Opcode::Ushr | Opcode::Sshr | Opcode::Rotl | Opcode::Rotr => { + Opcode::Ishl | Opcode::Ushr | Opcode::Rotl | Opcode::Rotr => { let dst_ty = ctx.output_ty(insn, 0); debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty); if !dst_ty.is_vector() && dst_ty.bits() <= 64 { + if op != Opcode::Rotr { + implemented_in_isle(ctx); + } + // Scalar shifts on x86 have various encodings: // - shift by one bit, e.g. `SAL r/m8, 1` (not used here) // - shift by an immediate amount, e.g. `SAL r/m8, imm8` @@ -1557,10 +1562,6 @@ fn lower_insn_to_regs>( OperandSize::Size32, extend_input_to_reg(ctx, inputs[0], ExtSpec::ZeroExtendTo32), ), - Opcode::Sshr => ( - OperandSize::Size32, - extend_input_to_reg(ctx, inputs[0], ExtSpec::SignExtendTo32), - ), Opcode::Rotl | Opcode::Rotr => ( OperandSize::from_ty(dst_ty), put_input_in_reg(ctx, inputs[0]), @@ -1590,7 +1591,6 @@ fn lower_insn_to_regs>( let shift_kind = match op { Opcode::Ishl => ShiftKind::ShiftLeft, Opcode::Ushr => ShiftKind::ShiftRightLogical, - Opcode::Sshr => ShiftKind::ShiftRightArithmetic, Opcode::Rotl => ShiftKind::RotateLeft, Opcode::Rotr => ShiftKind::RotateRight, _ => unreachable!(), @@ -1608,50 +1608,8 @@ fn lower_insn_to_regs>( let dst = get_output_reg(ctx, outputs[0]); match op { - Opcode::Ishl => { - emit_shl_i128(ctx, src, dst, amt_src); - } - Opcode::Ushr => { - emit_shr_i128(ctx, src, dst, amt_src, /* is_signed = */ false); - } - Opcode::Sshr => { - emit_shr_i128(ctx, src, dst, amt_src, /* is_signed = */ true); - } - Opcode::Rotl => { - // (mov tmp, src) - // (shl.i128 tmp, amt) - // (mov dst, src) - // (ushr.i128 dst, 128-amt) - // (or dst, tmp) - let tmp = ctx.alloc_tmp(types::I128); - emit_shl_i128(ctx, src, tmp, amt_src); - let inv_amt = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - ctx.emit(Inst::imm(OperandSize::Size64, 128, inv_amt)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Sub, - RegMemImm::reg(amt_src), - inv_amt, - )); - emit_shr_i128( - ctx, - src, - dst, - inv_amt.to_reg(), - /* is_signed = */ false, - ); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(tmp.regs()[0].to_reg()), - dst.regs()[0], - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(tmp.regs()[1].to_reg()), - dst.regs()[1], - )); + Opcode::Ishl | Opcode::Ushr | Opcode::Rotl => { + implemented_in_isle(ctx); } Opcode::Rotr => { // (mov tmp, src) @@ -1808,127 +1766,6 @@ fn lower_insn_to_regs>( _ => SseOpcode::Pand, }; ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::from(mask_value), dst)); - } else if dst_ty == types::I8X16 && op == Opcode::Sshr { - // Since the x86 instruction set does not have an 8x16 shift instruction and the approach used for - // `ishl` and `ushr` cannot be easily used (the masks do not preserve the sign), we use a different - // approach here: separate the low and high lanes, shift them separately, and merge them into the final - // result. Visually, this looks like the following, where `src.i8x16 = [s0, s1, ..., s15]: - // low.i16x8 = [(s0, s0), (s1, s1), ..., (s7, s7)] - // shifted_low.i16x8 = shift each lane of `low` - // high.i16x8 = [(s8, s8), (s9, s9), ..., (s15, s15)] - // shifted_high.i16x8 = shift each lane of `high` - // dst.i8x16 = [s0'', s1'', ..., s15''] - let src = put_input_in_reg(ctx, inputs[0]); - let shift_by = input_to_reg_mem_imm(ctx, inputs[1]); - let shift_by_ty = ctx.input_ty(insn, 1); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - // In order for PACKSSWB later to only use the high byte of each 16x8 lane, we shift right an extra 8 - // bits, relying on PSRAW to fill in the upper bits appropriately. - let bigger_shift_by = match shift_by { - // When we know the shift amount at compile time, we add the extra shift amount statically. - RegMemImm::Imm { simm32 } => RegMemImm::imm(simm32 + 8), - // Otherwise we add instructions to add the extra shift amount and move the value into an XMM - // register. - RegMemImm::Reg { reg } => { - let bigger_shift_by_gpr = ctx.alloc_tmp(shift_by_ty).only_reg().unwrap(); - ctx.emit(Inst::mov_r_r(OperandSize::Size64, reg, bigger_shift_by_gpr)); - - let size = if shift_by_ty == types::I64 { - OperandSize::Size64 - } else { - OperandSize::Size32 - }; - let imm = RegMemImm::imm(8); - ctx.emit(Inst::alu_rmi_r( - size, - AluRmiROpcode::Add, - imm, - bigger_shift_by_gpr, - )); - - let bigger_shift_by_xmm = ctx.alloc_tmp(dst_ty).only_reg().unwrap(); - ctx.emit(Inst::gpr_to_xmm( - SseOpcode::Movd, - RegMem::from(bigger_shift_by_gpr), - OperandSize::Size32, - bigger_shift_by_xmm, - )); - RegMemImm::reg(bigger_shift_by_xmm.to_reg()) - } - RegMemImm::Mem { .. } => unimplemented!("load shift amount to XMM register"), - }; - - // Unpack and shift the lower lanes of `src` into the `dst` register. - ctx.emit(Inst::gen_move(dst, src, dst_ty)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Punpcklbw, RegMem::from(dst), dst)); - ctx.emit(Inst::xmm_rmi_reg( - SseOpcode::Psraw, - bigger_shift_by.clone(), - dst, - )); - - // Unpack and shift the upper lanes of `src` into a temporary register, `upper_lanes`. - let upper_lanes = ctx.alloc_tmp(dst_ty).only_reg().unwrap(); - ctx.emit(Inst::gen_move(upper_lanes, src, dst_ty)); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Punpckhbw, - RegMem::from(upper_lanes), - upper_lanes, - )); - ctx.emit(Inst::xmm_rmi_reg( - SseOpcode::Psraw, - bigger_shift_by, - upper_lanes, - )); - - // Merge the upper and lower shifted lanes into `dst`. - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Packsswb, - RegMem::from(upper_lanes), - dst, - )); - } else if dst_ty == types::I64X2 && op == Opcode::Sshr { - // The `sshr.i8x16` CLIF instruction has no single x86 instruction in the older feature sets; newer ones - // like AVX512VL + AVX512F include VPSRAQ, a 128-bit instruction that would fit here, but this backend - // does not currently have support for EVEX encodings (TODO when EVEX support is available, add an - // alternate lowering here). To remedy this, we extract each 64-bit lane to a GPR, shift each using a - // scalar instruction, and insert the shifted values back in the `dst` XMM register. - let src = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::gen_move(dst, src, dst_ty)); - - // Extract the upper and lower lanes into temporary GPRs. - let lower_lane = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - emit_extract_lane(ctx, src, lower_lane, 0, types::I64); - let upper_lane = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - emit_extract_lane(ctx, src, upper_lane, 1, types::I64); - - // Shift each value. - let mut shift = |reg: Writable| { - let kind = ShiftKind::ShiftRightArithmetic; - if let Some(shift_by) = ctx.get_input_as_source_or_const(insn, 1).constant { - // Mask the shift amount according to Cranelift's semantics. - let shift_by = (shift_by as u8) & (types::I64.bits() as u8 - 1); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - kind, - Some(shift_by), - reg, - )); - } else { - let dynamic_shift_by = put_input_in_reg(ctx, inputs[1]); - let w_rcx = Writable::from_reg(regs::rcx()); - ctx.emit(Inst::mov_r_r(OperandSize::Size64, dynamic_shift_by, w_rcx)); - ctx.emit(Inst::shift_r(OperandSize::Size64, kind, None, reg)); - }; - }; - shift(lower_lane); - shift(upper_lane); - - // Insert the scalar values back into the `dst` vector. - emit_insert_lane(ctx, RegMem::from(lower_lane), dst, 0, types::I64); - emit_insert_lane(ctx, RegMem::from(upper_lane), dst, 1, types::I64); } else { // For the remaining packed shifts not covered above, x86 has implementations that can either: // - shift using an immediate @@ -1940,13 +1777,11 @@ fn lower_insn_to_regs>( types::I16X8 => match op { Opcode::Ishl => SseOpcode::Psllw, Opcode::Ushr => SseOpcode::Psrlw, - Opcode::Sshr => SseOpcode::Psraw, _ => unimplemented!("{} is not implemented for type {}", op, dst_ty), }, types::I32X4 => match op { Opcode::Ishl => SseOpcode::Pslld, Opcode::Ushr => SseOpcode::Psrld, - Opcode::Sshr => SseOpcode::Psrad, _ => unimplemented!("{} is not implemented for type {}", op, dst_ty), }, types::I64X2 => match op { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index bebedc324028..18260e802086 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -69,6 +69,31 @@ where OperandSize::from_ty(ty) } + fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm { + let inputs = self.lower_ctx.get_value_as_source_or_const(val); + + if let Some(c) = inputs.constant { + if let Some(imm) = to_simm32(c as i64) { + return imm; + } + + // Generate constants fresh at each use to minimize long-range + // register pressure. + let ty = self.value_type(val); + return RegMemImm::reg(generated_code::constructor_imm(self, ty, c).unwrap()); + } + + if let Some((src_insn, 0)) = inputs.inst { + if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, src_insn) { + self.lower_ctx.sink_inst(src_insn); + let amode = lower_to_amode(self.lower_ctx, addr_input, offset); + return RegMemImm::mem(amode); + } + } + + RegMemImm::reg(self.put_in_reg(val)) + } + fn put_in_reg_mem(&mut self, val: Value) -> RegMem { let inputs = self.lower_ctx.get_value_as_source_or_const(val); @@ -90,6 +115,23 @@ where RegMem::reg(self.put_in_reg(val)) } + fn put_masked_in_imm8_reg(&mut self, val: Value, ty: Type) -> Imm8Reg { + let inputs = self.lower_ctx.get_value_as_source_or_const(val); + + if let Some(c) = inputs.constant { + let mask = 1_u64 + .checked_shl(ty.bits() as u32) + .map_or(u64::MAX, |x| x - 1); + return Imm8Reg::Imm8 { + imm: (c & mask) as u8, + }; + } + + Imm8Reg::Reg { + reg: self.put_in_regs(val).regs()[0], + } + } + #[inline] fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 { imm.encode() @@ -131,12 +173,12 @@ where } #[inline] - fn mask_imm8_const(&mut self, imm8: &Imm8Reg, mask: u64) -> Imm8Reg { - match imm8 { - &Imm8Reg::Reg { reg } => Imm8Reg::Reg { reg }, - &Imm8Reg::Imm8 { imm } => Imm8Reg::Imm8 { - imm: imm & (mask as u8), - }, + fn const_to_type_masked_imm8(&mut self, c: u64, ty: Type) -> Imm8Reg { + let mask = 1_u64 + .checked_shl(ty.bits() as u32) + .map_or(u64::MAX, |x| x - 1); + Imm8Reg::Imm8 { + imm: (c & mask) as u8, } } diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest index e3cdbcd263f1..092ecdd4b011 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle f176ef3bba99365 -src/prelude.isle babc931e5dc5b4cf -src/isa/x64/inst.isle bc5fc626492752c8 -src/isa/x64/lower.isle 33e94300f4c08455 +src/prelude.isle 3434bc353bb711d5 +src/isa/x64/inst.isle c16462cc359dd466 +src/isa/x64/lower.isle c6147937da7f0412 diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs index f2f8dd0ca2ca..8247ba368b59 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs @@ -20,6 +20,8 @@ pub trait Context { fn pack_value_array_2(&mut self, arg0: Value, arg1: Value) -> ValueArray2; fn unpack_value_array_3(&mut self, arg0: &ValueArray3) -> (Value, Value, Value); fn pack_value_array_3(&mut self, arg0: Value, arg1: Value, arg2: Value) -> ValueArray3; + fn u32_add(&mut self, arg0: u32, arg1: u32) -> u32; + fn u8_and(&mut self, arg0: u8, arg1: u8) -> u8; fn value_reg(&mut self, arg0: Reg) -> ValueRegs; fn value_regs(&mut self, arg0: Reg, arg1: Reg) -> ValueRegs; fn temp_writable_reg(&mut self, arg0: Type) -> WritableReg; @@ -32,6 +34,7 @@ pub trait Context { fn u32_as_u64(&mut self, arg0: u32) -> u64; fn ty_bits(&mut self, arg0: Type) -> u8; fn ty_bits_u16(&mut self, arg0: Type) -> u16; + fn lane_type(&mut self, arg0: Type) -> Type; fn fits_in_16(&mut self, arg0: Type) -> Option; fn fits_in_32(&mut self, arg0: Type) -> Option; fn fits_in_64(&mut self, arg0: Type) -> Option; @@ -52,21 +55,23 @@ pub trait Context { fn first_result(&mut self, arg0: Inst) -> Option; fn inst_data(&mut self, arg0: Inst) -> InstructionData; fn value_type(&mut self, arg0: Value) -> Type; - fn ty_bits_mask(&mut self, arg0: Type) -> u64; fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>; fn def_inst(&mut self, arg0: Value) -> Option; + fn const_value(&mut self, arg0: Value) -> Option; fn trap_code_division_by_zero(&mut self) -> TrapCode; fn trap_code_integer_overflow(&mut self) -> TrapCode; fn operand_size_of_type_32_64(&mut self, arg0: Type) -> OperandSize; fn raw_operand_size_of_type(&mut self, arg0: Type) -> OperandSize; + fn put_in_reg_mem_imm(&mut self, arg0: Value) -> RegMemImm; fn put_in_reg_mem(&mut self, arg0: Value) -> RegMem; + fn put_masked_in_imm8_reg(&mut self, arg0: Value, arg1: Type) -> Imm8Reg; fn encode_fcmp_imm(&mut self, arg0: &FcmpImm) -> u8; fn xmm0(&mut self) -> WritableReg; fn avx512vl_enabled(&mut self, arg0: Type) -> Option<()>; fn avx512dq_enabled(&mut self, arg0: Type) -> Option<()>; fn avx512f_enabled(&mut self, arg0: Type) -> Option<()>; fn imm8_from_value(&mut self, arg0: Value) -> Option; - fn mask_imm8_const(&mut self, arg0: &Imm8Reg, arg1: u64) -> Imm8Reg; + fn const_to_type_masked_imm8(&mut self, arg0: u64, arg1: Type) -> Imm8Reg; fn simm32_from_value(&mut self, arg0: Value) -> Option; fn simm32_from_imm64(&mut self, arg0: Imm64) -> Option; fn sinkable_load(&mut self, arg0: Value) -> Option; @@ -77,19 +82,19 @@ pub trait Context { fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8; } -/// Internal type ProducesFlags: defined at src/prelude.isle line 263. +/// Internal type ProducesFlags: defined at src/prelude.isle line 272. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlags { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 266. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 275. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlags { inst: MInst, result: Reg }, } -/// Internal type ExtendKind: defined at src/isa/x64/inst.isle line 416. +/// Internal type ExtendKind: defined at src/isa/x64/inst.isle line 441. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum ExtendKind { Sign, @@ -99,7 +104,7 @@ pub enum ExtendKind { // Generated as internal constructor for term temp_reg. pub fn constructor_temp_reg(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; - // Rule at src/prelude.isle line 60. + // Rule at src/prelude.isle line 66. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); return Some(expr1_0); @@ -108,7 +113,7 @@ pub fn constructor_temp_reg(ctx: &mut C, arg0: Type) -> Option // Generated as internal constructor for term lo_reg. pub fn constructor_lo_reg(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/prelude.isle line 95. + // Rule at src/prelude.isle line 101. let expr0_0 = C::put_in_regs(ctx, pattern0_0); let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -133,7 +138,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 276. + // Rule at src/prelude.isle line 285. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -161,7 +166,7 @@ pub fn constructor_with_flags_1( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 284. + // Rule at src/prelude.isle line 293. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); return Some(pattern3_1); @@ -195,7 +200,7 @@ pub fn constructor_with_flags_2( result: pattern5_1, } = pattern4_0 { - // Rule at src/prelude.isle line 294. + // Rule at src/prelude.isle line 303. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern5_0); let expr2_0 = C::emit(ctx, &pattern3_0); @@ -212,22 +217,22 @@ pub fn constructor_operand_size_bits(ctx: &mut C, arg0: &OperandSize let pattern0_0 = arg0; match pattern0_0 { &OperandSize::Size8 => { - // Rule at src/isa/x64/inst.isle line 90. + // Rule at src/isa/x64/inst.isle line 91. let expr0_0: u16 = 8; return Some(expr0_0); } &OperandSize::Size16 => { - // Rule at src/isa/x64/inst.isle line 91. + // Rule at src/isa/x64/inst.isle line 92. let expr0_0: u16 = 16; return Some(expr0_0); } &OperandSize::Size32 => { - // Rule at src/isa/x64/inst.isle line 92. + // Rule at src/isa/x64/inst.isle line 93. let expr0_0: u16 = 32; return Some(expr0_0); } &OperandSize::Size64 => { - // Rule at src/isa/x64/inst.isle line 93. + // Rule at src/isa/x64/inst.isle line 94. let expr0_0: u16 = 64; return Some(expr0_0); } @@ -248,12 +253,12 @@ pub fn constructor_extend_to_reg( let pattern2_0 = arg1; if pattern2_0 == pattern1_0 { let pattern4_0 = arg2; - // Rule at src/isa/x64/inst.isle line 428. + // Rule at src/isa/x64/inst.isle line 453. let expr0_0 = C::put_in_reg(ctx, pattern0_0); return Some(expr0_0); } let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 431. + // Rule at src/isa/x64/inst.isle line 456. let expr0_0 = C::ty_bits_u16(ctx, pattern1_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern2_0); let expr2_0 = constructor_operand_size_bits(ctx, &expr1_0)?; @@ -277,7 +282,7 @@ pub fn constructor_extend( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 451. + // Rule at src/isa/x64/inst.isle line 476. let expr0_0 = constructor_movsx(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -285,7 +290,7 @@ pub fn constructor_extend( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 447. + // Rule at src/isa/x64/inst.isle line 472. let expr0_0 = constructor_movzx(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -298,17 +303,17 @@ pub fn constructor_extend( pub fn constructor_sse_xor_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 458. + // Rule at src/isa/x64/inst.isle line 483. let expr0_0 = SseOpcode::Xorps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 459. + // Rule at src/isa/x64/inst.isle line 484. let expr0_0 = SseOpcode::Xorpd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { - // Rule at src/isa/x64/inst.isle line 460. + // Rule at src/isa/x64/inst.isle line 485. let expr0_0 = SseOpcode::Pxor; return Some(expr0_0); } @@ -325,7 +330,7 @@ pub fn constructor_sse_xor( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 464. + // Rule at src/isa/x64/inst.isle line 489. let expr0_0 = constructor_sse_xor_op(ctx, pattern0_0)?; let expr1_0 = constructor_xmm_rm_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -335,40 +340,40 @@ pub fn constructor_sse_xor( pub fn constructor_sse_cmp_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 473. + // Rule at src/isa/x64/inst.isle line 498. let expr0_0 = SseOpcode::Cmpps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 474. + // Rule at src/isa/x64/inst.isle line 499. let expr0_0 = SseOpcode::Cmppd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { if pattern1_0 == 8 { if pattern1_1 == 16 { - // Rule at src/isa/x64/inst.isle line 469. + // Rule at src/isa/x64/inst.isle line 494. let expr0_0 = SseOpcode::Pcmpeqb; return Some(expr0_0); } } if pattern1_0 == 16 { if pattern1_1 == 8 { - // Rule at src/isa/x64/inst.isle line 470. + // Rule at src/isa/x64/inst.isle line 495. let expr0_0 = SseOpcode::Pcmpeqw; return Some(expr0_0); } } if pattern1_0 == 32 { if pattern1_1 == 4 { - // Rule at src/isa/x64/inst.isle line 471. + // Rule at src/isa/x64/inst.isle line 496. let expr0_0 = SseOpcode::Pcmpeqd; return Some(expr0_0); } } if pattern1_0 == 64 { if pattern1_1 == 2 { - // Rule at src/isa/x64/inst.isle line 472. + // Rule at src/isa/x64/inst.isle line 497. let expr0_0 = SseOpcode::Pcmpeqq; return Some(expr0_0); } @@ -380,7 +385,7 @@ pub fn constructor_sse_cmp_op(ctx: &mut C, arg0: Type) -> Option(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 488. + // Rule at src/isa/x64/inst.isle line 513. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); let expr2_0: Type = I32X4; @@ -396,6 +401,79 @@ pub fn constructor_vector_all_ones(ctx: &mut C, arg0: Type) -> Optio return Some(expr1_0); } +// Generated as internal constructor for term make_i64x2_from_lanes. +pub fn constructor_make_i64x2_from_lanes( + ctx: &mut C, + arg0: &RegMem, + arg1: &RegMem, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 524. + let expr0_0: Type = I64X2; + let expr1_0 = C::temp_writable_reg(ctx, expr0_0); + let expr2_0 = C::writable_reg_to_reg(ctx, expr1_0); + let expr3_0 = MInst::XmmUninitializedValue { dst: expr1_0 }; + let expr4_0 = C::emit(ctx, &expr3_0); + let expr5_0 = SseOpcode::Pinsrd; + let expr6_0: u8 = 0; + let expr7_0 = OperandSize::Size64; + let expr8_0 = MInst::XmmRmRImm { + op: expr5_0, + src1: expr2_0, + src2: pattern0_0.clone(), + dst: expr1_0, + imm: expr6_0, + size: expr7_0, + }; + let expr9_0 = C::emit(ctx, &expr8_0); + let expr10_0 = SseOpcode::Pinsrd; + let expr11_0: u8 = 1; + let expr12_0 = OperandSize::Size64; + let expr13_0 = MInst::XmmRmRImm { + op: expr10_0, + src1: expr2_0, + src2: pattern1_0.clone(), + dst: expr1_0, + imm: expr11_0, + size: expr12_0, + }; + let expr14_0 = C::emit(ctx, &expr13_0); + return Some(expr2_0); +} + +// Generated as internal constructor for term reg_mem_imm_to_xmm. +pub fn constructor_reg_mem_imm_to_xmm( + ctx: &mut C, + arg0: &RegMemImm, +) -> Option { + let pattern0_0 = arg0; + match pattern0_0 { + &RegMemImm::Imm { simm32: pattern1_0 } => { + // Rule at src/isa/x64/inst.isle line 545. + return Some(pattern0_0.clone()); + } + &RegMemImm::Reg { reg: pattern1_0 } => { + // Rule at src/isa/x64/inst.isle line 546. + let expr0_0: Type = I8X16; + let expr1_0 = SseOpcode::Movd; + let expr2_0 = RegMem::Reg { reg: pattern1_0 }; + let expr3_0 = OperandSize::Size32; + let expr4_0 = constructor_gpr_to_xmm(ctx, expr0_0, &expr1_0, &expr2_0, &expr3_0)?; + let expr5_0 = RegMemImm::Reg { reg: expr4_0 }; + return Some(expr5_0); + } + &RegMemImm::Mem { + addr: ref pattern1_0, + } => { + // Rule at src/isa/x64/inst.isle line 544. + return Some(pattern0_0.clone()); + } + _ => {} + } + return None; +} + // Generated as internal constructor for term alu_rmi_r. pub fn constructor_alu_rmi_r( ctx: &mut C, @@ -408,7 +486,7 @@ pub fn constructor_alu_rmi_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 512. + // Rule at src/isa/x64/inst.isle line 568. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::AluRmiR { @@ -433,7 +511,7 @@ pub fn constructor_add( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 520. + // Rule at src/isa/x64/inst.isle line 576. let expr0_0 = AluRmiROpcode::Add; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -449,7 +527,7 @@ pub fn constructor_add_with_flags( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 528. + // Rule at src/isa/x64/inst.isle line 584. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Add; @@ -478,7 +556,7 @@ pub fn constructor_adc( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 539. + // Rule at src/isa/x64/inst.isle line 595. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Adc; @@ -507,7 +585,7 @@ pub fn constructor_sub( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 550. + // Rule at src/isa/x64/inst.isle line 606. let expr0_0 = AluRmiROpcode::Sub; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -523,7 +601,7 @@ pub fn constructor_sub_with_flags( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 558. + // Rule at src/isa/x64/inst.isle line 614. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Sub; @@ -552,7 +630,7 @@ pub fn constructor_sbb( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 569. + // Rule at src/isa/x64/inst.isle line 625. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Sbb; @@ -581,7 +659,7 @@ pub fn constructor_mul( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 580. + // Rule at src/isa/x64/inst.isle line 636. let expr0_0 = AluRmiROpcode::Mul; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -597,7 +675,7 @@ pub fn constructor_m_and( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 591. + // Rule at src/isa/x64/inst.isle line 647. let expr0_0 = AluRmiROpcode::And; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -613,7 +691,7 @@ pub fn constructor_or( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 599. + // Rule at src/isa/x64/inst.isle line 655. let expr0_0 = AluRmiROpcode::Or; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -629,7 +707,7 @@ pub fn constructor_xor( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 607. + // Rule at src/isa/x64/inst.isle line 663. let expr0_0 = AluRmiROpcode::Xor; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -641,7 +719,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if pattern0_0 == I64 { let pattern2_0 = arg1; if let Some(pattern3_0) = C::nonzero_u64_fits_in_u32(ctx, pattern2_0) { - // Rule at src/isa/x64/inst.isle line 636. + // Rule at src/isa/x64/inst.isle line 692. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = OperandSize::Size32; @@ -658,7 +736,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if pattern0_0 == F32 { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 665. + // Rule at src/isa/x64/inst.isle line 721. let expr0_0: Type = F32; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = C::writable_reg_to_reg(ctx, expr1_0); @@ -673,7 +751,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option let expr6_0 = C::emit(ctx, &expr5_0); return Some(expr2_0); } - // Rule at src/isa/x64/inst.isle line 624. + // Rule at src/isa/x64/inst.isle line 680. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Movd; let expr2_0: Type = I32; @@ -686,7 +764,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if pattern0_0 == F64 { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 677. + // Rule at src/isa/x64/inst.isle line 733. let expr0_0: Type = F64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = C::writable_reg_to_reg(ctx, expr1_0); @@ -701,7 +779,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option let expr6_0 = C::emit(ctx, &expr5_0); return Some(expr2_0); } - // Rule at src/isa/x64/inst.isle line 628. + // Rule at src/isa/x64/inst.isle line 684. let expr0_0: Type = F64; let expr1_0 = SseOpcode::Movq; let expr2_0: Type = I64; @@ -714,7 +792,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 655. + // Rule at src/isa/x64/inst.isle line 711. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); let expr2_0 = constructor_sse_xor_op(ctx, pattern0_0)?; @@ -731,7 +809,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option } let pattern1_0 = arg1; if pattern1_0 == 0 { - // Rule at src/isa/x64/inst.isle line 642. + // Rule at src/isa/x64/inst.isle line 698. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); @@ -747,7 +825,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option let expr6_0 = C::emit(ctx, &expr5_0); return Some(expr1_0); } - // Rule at src/isa/x64/inst.isle line 617. + // Rule at src/isa/x64/inst.isle line 673. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Imm { @@ -772,7 +850,7 @@ pub fn constructor_shift_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 690. + // Rule at src/isa/x64/inst.isle line 746. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::raw_operand_size_of_type(ctx, pattern0_0); let expr2_0 = MInst::ShiftR { @@ -797,7 +875,7 @@ pub fn constructor_m_rotl( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 701. + // Rule at src/isa/x64/inst.isle line 757. let expr0_0 = ShiftKind::RotateLeft; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -813,7 +891,7 @@ pub fn constructor_shl( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 706. + // Rule at src/isa/x64/inst.isle line 762. let expr0_0 = ShiftKind::ShiftLeft; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -829,7 +907,7 @@ pub fn constructor_shr( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 711. + // Rule at src/isa/x64/inst.isle line 767. let expr0_0 = ShiftKind::ShiftRightLogical; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -845,7 +923,7 @@ pub fn constructor_sar( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 716. + // Rule at src/isa/x64/inst.isle line 772. let expr0_0 = ShiftKind::ShiftRightArithmetic; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -863,7 +941,7 @@ pub fn constructor_cmp_rmi_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 721. + // Rule at src/isa/x64/inst.isle line 777. let expr0_0 = MInst::CmpRmiR { size: pattern0_0.clone(), opcode: pattern1_0.clone(), @@ -888,7 +966,7 @@ pub fn constructor_cmp( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 730. + // Rule at src/isa/x64/inst.isle line 786. let expr0_0 = CmpOpcode::Cmp; let expr1_0 = constructor_cmp_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -904,7 +982,7 @@ pub fn constructor_test( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 735. + // Rule at src/isa/x64/inst.isle line 791. let expr0_0 = CmpOpcode::Test; let expr1_0 = constructor_cmp_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -922,7 +1000,7 @@ pub fn constructor_cmove( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 740. + // Rule at src/isa/x64/inst.isle line 796. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Cmove { @@ -950,7 +1028,7 @@ pub fn constructor_movzx( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 748. + // Rule at src/isa/x64/inst.isle line 804. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = MInst::MovzxRmR { ext_mode: pattern1_0.clone(), @@ -972,7 +1050,7 @@ pub fn constructor_movsx( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 755. + // Rule at src/isa/x64/inst.isle line 811. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = MInst::MovsxRmR { ext_mode: pattern1_0.clone(), @@ -996,7 +1074,7 @@ pub fn constructor_xmm_rm_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 762. + // Rule at src/isa/x64/inst.isle line 818. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = MInst::XmmRmR { op: pattern1_0.clone(), @@ -1013,7 +1091,7 @@ pub fn constructor_xmm_rm_r( pub fn constructor_paddb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 769. + // Rule at src/isa/x64/inst.isle line 825. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Paddb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1024,7 +1102,7 @@ pub fn constructor_paddb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_paddw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 774. + // Rule at src/isa/x64/inst.isle line 830. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Paddw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1035,7 +1113,7 @@ pub fn constructor_paddw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_paddd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 779. + // Rule at src/isa/x64/inst.isle line 835. let expr0_0: Type = I32X4; let expr1_0 = SseOpcode::Paddd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1046,7 +1124,7 @@ pub fn constructor_paddd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_paddq(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 784. + // Rule at src/isa/x64/inst.isle line 840. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Paddq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1057,7 +1135,7 @@ pub fn constructor_paddq(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_paddsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 789. + // Rule at src/isa/x64/inst.isle line 845. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Paddsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1068,7 +1146,7 @@ pub fn constructor_paddsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_paddsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 794. + // Rule at src/isa/x64/inst.isle line 850. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Paddsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1079,7 +1157,7 @@ pub fn constructor_paddsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_paddusb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 799. + // Rule at src/isa/x64/inst.isle line 855. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Paddusb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1090,7 +1168,7 @@ pub fn constructor_paddusb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_paddusw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 804. + // Rule at src/isa/x64/inst.isle line 860. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Paddusw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1101,7 +1179,7 @@ pub fn constructor_paddusw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_psubb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 809. + // Rule at src/isa/x64/inst.isle line 865. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Psubb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1112,7 +1190,7 @@ pub fn constructor_psubb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_psubw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 814. + // Rule at src/isa/x64/inst.isle line 870. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Psubw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1123,7 +1201,7 @@ pub fn constructor_psubw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_psubd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 819. + // Rule at src/isa/x64/inst.isle line 875. let expr0_0: Type = I32X4; let expr1_0 = SseOpcode::Psubd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1134,7 +1212,7 @@ pub fn constructor_psubd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_psubq(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 824. + // Rule at src/isa/x64/inst.isle line 880. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Psubq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1145,7 +1223,7 @@ pub fn constructor_psubq(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_psubsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 829. + // Rule at src/isa/x64/inst.isle line 885. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Psubsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1156,7 +1234,7 @@ pub fn constructor_psubsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_psubsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 834. + // Rule at src/isa/x64/inst.isle line 890. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Psubsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1167,7 +1245,7 @@ pub fn constructor_psubsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_psubusb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 839. + // Rule at src/isa/x64/inst.isle line 895. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Psubusb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1178,7 +1256,7 @@ pub fn constructor_psubusb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_psubusw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 844. + // Rule at src/isa/x64/inst.isle line 900. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Psubusw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1189,7 +1267,7 @@ pub fn constructor_psubusw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pavgb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 849. + // Rule at src/isa/x64/inst.isle line 905. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pavgb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1200,7 +1278,7 @@ pub fn constructor_pavgb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_pavgw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 854. + // Rule at src/isa/x64/inst.isle line 910. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pavgw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1211,7 +1289,7 @@ pub fn constructor_pavgw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_pand(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 859. + // Rule at src/isa/x64/inst.isle line 915. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Pand; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1222,7 +1300,7 @@ pub fn constructor_pand(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Op pub fn constructor_andps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 864. + // Rule at src/isa/x64/inst.isle line 920. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Andps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1233,7 +1311,7 @@ pub fn constructor_andps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_andpd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 869. + // Rule at src/isa/x64/inst.isle line 925. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Andpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1244,7 +1322,7 @@ pub fn constructor_andpd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_por(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 874. + // Rule at src/isa/x64/inst.isle line 930. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Por; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1255,7 +1333,7 @@ pub fn constructor_por(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Opt pub fn constructor_orps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 879. + // Rule at src/isa/x64/inst.isle line 935. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Orps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1266,7 +1344,7 @@ pub fn constructor_orps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Op pub fn constructor_orpd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 884. + // Rule at src/isa/x64/inst.isle line 940. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Orpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1277,7 +1355,7 @@ pub fn constructor_orpd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Op pub fn constructor_pxor(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 889. + // Rule at src/isa/x64/inst.isle line 945. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pxor; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1288,7 +1366,7 @@ pub fn constructor_pxor(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Op pub fn constructor_xorps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 894. + // Rule at src/isa/x64/inst.isle line 950. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Xorps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1299,7 +1377,7 @@ pub fn constructor_xorps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_xorpd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 899. + // Rule at src/isa/x64/inst.isle line 955. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Xorpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1310,7 +1388,7 @@ pub fn constructor_xorpd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_pmullw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 904. + // Rule at src/isa/x64/inst.isle line 960. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmullw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1321,7 +1399,7 @@ pub fn constructor_pmullw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmulld(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 909. + // Rule at src/isa/x64/inst.isle line 965. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmulld; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1332,7 +1410,7 @@ pub fn constructor_pmulld(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmulhw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 914. + // Rule at src/isa/x64/inst.isle line 970. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmulhw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1343,7 +1421,7 @@ pub fn constructor_pmulhw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmulhuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 919. + // Rule at src/isa/x64/inst.isle line 975. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmulhuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1354,7 +1432,7 @@ pub fn constructor_pmulhuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmuldq(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 924. + // Rule at src/isa/x64/inst.isle line 980. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmuldq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1365,7 +1443,7 @@ pub fn constructor_pmuldq(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmuludq(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 929. + // Rule at src/isa/x64/inst.isle line 985. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Pmuludq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1376,7 +1454,7 @@ pub fn constructor_pmuludq(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_punpckhwd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 934. + // Rule at src/isa/x64/inst.isle line 990. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Punpckhwd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1387,7 +1465,7 @@ pub fn constructor_punpckhwd(ctx: &mut C, arg0: Reg, arg1: &RegMem) pub fn constructor_punpcklwd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 939. + // Rule at src/isa/x64/inst.isle line 995. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Punpcklwd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1398,7 +1476,7 @@ pub fn constructor_punpcklwd(ctx: &mut C, arg0: Reg, arg1: &RegMem) pub fn constructor_andnps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 944. + // Rule at src/isa/x64/inst.isle line 1000. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Andnps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1409,7 +1487,7 @@ pub fn constructor_andnps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_andnpd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 949. + // Rule at src/isa/x64/inst.isle line 1005. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Andnpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1420,7 +1498,7 @@ pub fn constructor_andnpd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pandn(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 954. + // Rule at src/isa/x64/inst.isle line 1010. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Pandn; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1431,17 +1509,17 @@ pub fn constructor_pandn(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_sse_blend_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 958. + // Rule at src/isa/x64/inst.isle line 1014. let expr0_0 = SseOpcode::Blendvps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 959. + // Rule at src/isa/x64/inst.isle line 1015. let expr0_0 = SseOpcode::Blendvpd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { - // Rule at src/isa/x64/inst.isle line 960. + // Rule at src/isa/x64/inst.isle line 1016. let expr0_0 = SseOpcode::Pblendvb; return Some(expr0_0); } @@ -1452,17 +1530,17 @@ pub fn constructor_sse_blend_op(ctx: &mut C, arg0: Type) -> Option(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 963. + // Rule at src/isa/x64/inst.isle line 1019. let expr0_0 = SseOpcode::Movaps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 964. + // Rule at src/isa/x64/inst.isle line 1020. let expr0_0 = SseOpcode::Movapd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { - // Rule at src/isa/x64/inst.isle line 965. + // Rule at src/isa/x64/inst.isle line 1021. let expr0_0 = SseOpcode::Movdqa; return Some(expr0_0); } @@ -1481,7 +1559,7 @@ pub fn constructor_sse_blend( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 969. + // Rule at src/isa/x64/inst.isle line 1025. let expr0_0 = C::xmm0(ctx); let expr1_0 = constructor_sse_mov_op(ctx, pattern0_0)?; let expr2_0 = MInst::XmmUnaryRmR { @@ -1505,7 +1583,7 @@ pub fn constructor_blendvpd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 981. + // Rule at src/isa/x64/inst.isle line 1037. let expr0_0 = C::xmm0(ctx); let expr1_0 = SseOpcode::Movapd; let expr2_0 = RegMem::Reg { reg: pattern2_0 }; @@ -1525,7 +1603,7 @@ pub fn constructor_blendvpd( pub fn constructor_movsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 993. + // Rule at src/isa/x64/inst.isle line 1049. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Movsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1536,7 +1614,7 @@ pub fn constructor_movsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_movlhps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 998. + // Rule at src/isa/x64/inst.isle line 1054. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Movlhps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1547,7 +1625,7 @@ pub fn constructor_movlhps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1003. + // Rule at src/isa/x64/inst.isle line 1059. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1558,7 +1636,7 @@ pub fn constructor_pmaxsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1008. + // Rule at src/isa/x64/inst.isle line 1064. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1569,7 +1647,7 @@ pub fn constructor_pmaxsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1013. + // Rule at src/isa/x64/inst.isle line 1069. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1580,7 +1658,7 @@ pub fn constructor_pmaxsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1018. + // Rule at src/isa/x64/inst.isle line 1074. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1591,7 +1669,7 @@ pub fn constructor_pminsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1023. + // Rule at src/isa/x64/inst.isle line 1079. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1602,7 +1680,7 @@ pub fn constructor_pminsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1028. + // Rule at src/isa/x64/inst.isle line 1084. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1613,7 +1691,7 @@ pub fn constructor_pminsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxub(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1033. + // Rule at src/isa/x64/inst.isle line 1089. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxub; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1624,7 +1702,7 @@ pub fn constructor_pmaxub(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1038. + // Rule at src/isa/x64/inst.isle line 1094. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1635,7 +1713,7 @@ pub fn constructor_pmaxuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxud(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1043. + // Rule at src/isa/x64/inst.isle line 1099. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxud; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1646,7 +1724,7 @@ pub fn constructor_pmaxud(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminub(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1048. + // Rule at src/isa/x64/inst.isle line 1104. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminub; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1657,7 +1735,7 @@ pub fn constructor_pminub(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1053. + // Rule at src/isa/x64/inst.isle line 1109. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1668,13 +1746,46 @@ pub fn constructor_pminuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminud(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1058. + // Rule at src/isa/x64/inst.isle line 1114. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminud; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; return Some(expr2_0); } +// Generated as internal constructor for term punpcklbw. +pub fn constructor_punpcklbw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1119. + let expr0_0: Type = I8X16; + let expr1_0 = SseOpcode::Punpcklbw; + let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; + return Some(expr2_0); +} + +// Generated as internal constructor for term punpckhbw. +pub fn constructor_punpckhbw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1124. + let expr0_0: Type = I8X16; + let expr1_0 = SseOpcode::Punpckhbw; + let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; + return Some(expr2_0); +} + +// Generated as internal constructor for term packsswb. +pub fn constructor_packsswb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1129. + let expr0_0: Type = I8X16; + let expr1_0 = SseOpcode::Packsswb; + let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; + return Some(expr2_0); +} + // Generated as internal constructor for term xmm_rm_r_imm. pub fn constructor_xmm_rm_r_imm( ctx: &mut C, @@ -1689,7 +1800,7 @@ pub fn constructor_xmm_rm_r_imm( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1063. + // Rule at src/isa/x64/inst.isle line 1134. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmRmRImm { @@ -1717,7 +1828,7 @@ pub fn constructor_palignr( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1075. + // Rule at src/isa/x64/inst.isle line 1146. let expr0_0 = SseOpcode::Palignr; let expr1_0 = constructor_xmm_rm_r_imm( ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, @@ -1735,7 +1846,7 @@ pub fn constructor_pshufd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1084. + // Rule at src/isa/x64/inst.isle line 1155. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = C::writable_reg_to_reg(ctx, expr1_0); @@ -1760,7 +1871,7 @@ pub fn constructor_xmm_unary_rm_r( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1097. + // Rule at src/isa/x64/inst.isle line 1168. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmUnaryRmR { @@ -1776,7 +1887,7 @@ pub fn constructor_xmm_unary_rm_r( // Generated as internal constructor for term pmovsxbw. pub fn constructor_pmovsxbw(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1104. + // Rule at src/isa/x64/inst.isle line 1175. let expr0_0 = SseOpcode::Pmovsxbw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1785,7 +1896,7 @@ pub fn constructor_pmovsxbw(ctx: &mut C, arg0: &RegMem) -> Option(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1109. + // Rule at src/isa/x64/inst.isle line 1180. let expr0_0 = SseOpcode::Pmovzxbw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1794,7 +1905,7 @@ pub fn constructor_pmovzxbw(ctx: &mut C, arg0: &RegMem) -> Option(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1114. + // Rule at src/isa/x64/inst.isle line 1185. let expr0_0 = SseOpcode::Pabsb; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1803,7 +1914,7 @@ pub fn constructor_pabsb(ctx: &mut C, arg0: &RegMem) -> Option // Generated as internal constructor for term pabsw. pub fn constructor_pabsw(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1119. + // Rule at src/isa/x64/inst.isle line 1190. let expr0_0 = SseOpcode::Pabsw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1812,7 +1923,7 @@ pub fn constructor_pabsw(ctx: &mut C, arg0: &RegMem) -> Option // Generated as internal constructor for term pabsd. pub fn constructor_pabsd(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1124. + // Rule at src/isa/x64/inst.isle line 1195. let expr0_0 = SseOpcode::Pabsd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1826,7 +1937,7 @@ pub fn constructor_xmm_unary_rm_r_evex( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1129. + // Rule at src/isa/x64/inst.isle line 1200. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmUnaryRmREvex { @@ -1842,7 +1953,7 @@ pub fn constructor_xmm_unary_rm_r_evex( // Generated as internal constructor for term vpabsq. pub fn constructor_vpabsq(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1136. + // Rule at src/isa/x64/inst.isle line 1207. let expr0_0 = Avx512Opcode::Vpabsq; let expr1_0 = constructor_xmm_unary_rm_r_evex(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1858,7 +1969,7 @@ pub fn constructor_xmm_rm_r_evex( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1141. + // Rule at src/isa/x64/inst.isle line 1212. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmRmREvex { @@ -1876,7 +1987,7 @@ pub fn constructor_xmm_rm_r_evex( pub fn constructor_vpmullq(ctx: &mut C, arg0: &RegMem, arg1: Reg) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1153. + // Rule at src/isa/x64/inst.isle line 1224. let expr0_0 = Avx512Opcode::Vpmullq; let expr1_0 = constructor_xmm_rm_r_evex(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -1892,7 +2003,7 @@ pub fn constructor_xmm_rmi_reg( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1160. + // Rule at src/isa/x64/inst.isle line 1231. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmRmiReg { @@ -1910,7 +2021,7 @@ pub fn constructor_xmm_rmi_reg( pub fn constructor_psllq(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1170. + // Rule at src/isa/x64/inst.isle line 1241. let expr0_0 = SseOpcode::Psllq; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -1920,7 +2031,7 @@ pub fn constructor_psllq(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) - pub fn constructor_psrld(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1175. + // Rule at src/isa/x64/inst.isle line 1246. let expr0_0 = SseOpcode::Psrld; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -1930,12 +2041,32 @@ pub fn constructor_psrld(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) - pub fn constructor_psrlq(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1180. + // Rule at src/isa/x64/inst.isle line 1251. let expr0_0 = SseOpcode::Psrlq; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); } +// Generated as internal constructor for term psraw. +pub fn constructor_psraw(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1256. + let expr0_0 = SseOpcode::Psraw; + let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; + return Some(expr1_0); +} + +// Generated as internal constructor for term psrad. +pub fn constructor_psrad(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1261. + let expr0_0 = SseOpcode::Psrad; + let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; + return Some(expr1_0); +} + // Generated as internal constructor for term mul_hi. pub fn constructor_mul_hi( ctx: &mut C, @@ -1948,7 +2079,7 @@ pub fn constructor_mul_hi( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1187. + // Rule at src/isa/x64/inst.isle line 1268. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::temp_writable_reg(ctx, pattern0_0); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); @@ -1977,7 +2108,7 @@ pub fn constructor_mulhi_u( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1203. + // Rule at src/isa/x64/inst.isle line 1284. let expr0_0: bool = false; let expr1_0 = constructor_mul_hi(ctx, pattern0_0, expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1993,7 +2124,7 @@ pub fn constructor_cmpps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1208. + // Rule at src/isa/x64/inst.isle line 1289. let expr0_0 = SseOpcode::Cmpps; let expr1_0 = C::encode_fcmp_imm(ctx, pattern2_0); let expr2_0 = OperandSize::Size32; @@ -2012,7 +2143,7 @@ pub fn constructor_cmppd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1221. + // Rule at src/isa/x64/inst.isle line 1302. let expr0_0 = SseOpcode::Cmppd; let expr1_0 = C::encode_fcmp_imm(ctx, pattern2_0); let expr2_0 = OperandSize::Size32; @@ -2033,7 +2164,7 @@ pub fn constructor_gpr_to_xmm( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1230. + // Rule at src/isa/x64/inst.isle line 1311. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = MInst::GprToXmm { op: pattern1_0.clone(), @@ -2056,7 +2187,7 @@ pub fn constructor_pinsrb( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1237. + // Rule at src/isa/x64/inst.isle line 1318. let expr0_0 = SseOpcode::Pinsrb; let expr1_0 = OperandSize::Size32; let expr2_0 = @@ -2074,7 +2205,7 @@ pub fn constructor_pinsrw( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1242. + // Rule at src/isa/x64/inst.isle line 1323. let expr0_0 = SseOpcode::Pinsrw; let expr1_0 = OperandSize::Size32; let expr2_0 = @@ -2094,7 +2225,7 @@ pub fn constructor_pinsrd( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1247. + // Rule at src/isa/x64/inst.isle line 1328. let expr0_0 = SseOpcode::Pinsrd; let expr1_0 = constructor_xmm_rm_r_imm( ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, @@ -2112,7 +2243,7 @@ pub fn constructor_insertps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1252. + // Rule at src/isa/x64/inst.isle line 1333. let expr0_0 = SseOpcode::Insertps; let expr1_0 = OperandSize::Size32; let expr2_0 = @@ -2120,11 +2251,35 @@ pub fn constructor_insertps( return Some(expr2_0); } +// Generated as internal constructor for term pextrd. +pub fn constructor_pextrd(ctx: &mut C, arg0: Type, arg1: Reg, arg2: u8) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/x64/inst.isle line 1338. + let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); + let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); + let expr2_0 = SseOpcode::Pextrd; + let expr3_0 = RegMem::Reg { reg: pattern1_0 }; + let expr4_0 = C::lane_type(ctx, pattern0_0); + let expr5_0 = C::operand_size_of_type_32_64(ctx, expr4_0); + let expr6_0 = MInst::XmmRmRImm { + op: expr2_0, + src1: expr1_0, + src2: expr3_0, + dst: expr0_0, + imm: pattern2_0, + size: expr5_0, + }; + let expr7_0 = C::emit(ctx, &expr6_0); + return Some(expr1_0); +} + // Generated as internal constructor for term not. pub fn constructor_not(ctx: &mut C, arg0: Type, arg1: Reg) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1257. + // Rule at src/isa/x64/inst.isle line 1351. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Not { @@ -2177,7 +2332,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Bnot = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1031. + // Rule at src/isa/x64/lower.isle line 1125. let expr0_0 = constructor_i128_not(ctx, pattern5_1)?; return Some(expr0_0); } @@ -2366,7 +2521,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 719. + // Rule at src/isa/x64/lower.isle line 813. let expr0_0 = C::put_in_regs(ctx, pattern7_0); let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -2457,7 +2612,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 652. + // Rule at src/isa/x64/lower.isle line 746. let expr0_0 = C::put_in_regs(ctx, pattern7_0); let expr1_0 = constructor_lo_reg(ctx, pattern7_1)?; let expr2_0 = constructor_shl_i128(ctx, expr0_0, expr1_0)?; @@ -2474,7 +2629,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 574. + // Rule at src/isa/x64/lower.isle line 568. let expr0_0 = constructor_lo_reg(ctx, pattern7_1)?; let expr1_0 = C::put_in_regs(ctx, pattern7_0); let expr2_0 = constructor_shl_i128(ctx, expr1_0, expr0_0)?; @@ -2483,12 +2638,21 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 620. + // Rule at src/isa/x64/lower.isle line 607. let expr0_0 = constructor_lo_reg(ctx, pattern7_1)?; let expr1_0 = C::put_in_regs(ctx, pattern7_0); let expr2_0 = constructor_shr_i128(ctx, expr1_0, expr0_0)?; return Some(expr2_0); } + &Opcode::Sshr => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 649. + let expr0_0 = constructor_lo_reg(ctx, pattern7_1)?; + let expr1_0 = C::put_in_regs(ctx, pattern7_0); + let expr2_0 = constructor_sar_i128(ctx, expr1_0, expr0_0)?; + return Some(expr2_0); + } _ => {} } } @@ -2497,7 +2661,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Bnot = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1028. + // Rule at src/isa/x64/lower.isle line 1122. let expr0_0 = constructor_i128_not(ctx, pattern5_1)?; return Some(expr0_0); } @@ -2542,7 +2706,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1132. + // Rule at src/isa/x64/lower.isle line 1226. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminsb(ctx, expr0_0, &expr1_0)?; @@ -2552,7 +2716,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1154. + // Rule at src/isa/x64/lower.isle line 1248. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminub(ctx, expr0_0, &expr1_0)?; @@ -2562,7 +2726,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1121. + // Rule at src/isa/x64/lower.isle line 1215. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxsb(ctx, expr0_0, &expr1_0)?; @@ -2572,13 +2736,33 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1143. + // Rule at src/isa/x64/lower.isle line 1237. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxub(ctx, expr0_0, &expr1_0)?; let expr3_0 = C::value_reg(ctx, expr2_0); return Some(expr3_0); } + &Opcode::Sshr => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + let pattern8_0 = C::value_type(ctx, pattern7_1); + // Rule at src/isa/x64/lower.isle line 670. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); + let expr2_0 = + constructor_sshr_i8x16_bigger_shift(ctx, pattern8_0, &expr1_0)?; + let expr3_0 = RegMem::Reg { reg: expr0_0 }; + let expr4_0 = constructor_punpcklbw(ctx, expr0_0, &expr3_0)?; + let expr5_0 = constructor_psraw(ctx, expr4_0, &expr2_0)?; + let expr6_0 = RegMem::Reg { reg: expr0_0 }; + let expr7_0 = constructor_punpckhbw(ctx, expr0_0, &expr6_0)?; + let expr8_0 = constructor_psraw(ctx, expr7_0, &expr2_0)?; + let expr9_0 = RegMem::Reg { reg: expr8_0 }; + let expr10_0 = constructor_packsswb(ctx, expr5_0, &expr9_0)?; + let expr11_0 = C::value_reg(ctx, expr10_0); + return Some(expr11_0); + } _ => {} } } @@ -2587,7 +2771,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Iabs = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 974. + // Rule at src/isa/x64/lower.isle line 1068. let expr0_0 = C::put_in_reg_mem(ctx, pattern5_1); let expr1_0 = constructor_pabsb(ctx, &expr0_0)?; let expr2_0 = C::value_reg(ctx, expr1_0); @@ -2608,7 +2792,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1135. + // Rule at src/isa/x64/lower.isle line 1229. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminsw(ctx, expr0_0, &expr1_0)?; @@ -2618,7 +2802,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1157. + // Rule at src/isa/x64/lower.isle line 1251. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminuw(ctx, expr0_0, &expr1_0)?; @@ -2628,7 +2812,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1124. + // Rule at src/isa/x64/lower.isle line 1218. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxsw(ctx, expr0_0, &expr1_0)?; @@ -2638,13 +2822,24 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1146. + // Rule at src/isa/x64/lower.isle line 1240. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxuw(ctx, expr0_0, &expr1_0)?; let expr3_0 = C::value_reg(ctx, expr2_0); return Some(expr3_0); } + &Opcode::Sshr => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 692. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); + let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; + let expr3_0 = constructor_psraw(ctx, expr0_0, &expr2_0)?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); + } _ => {} } } @@ -2653,7 +2848,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Iabs = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 977. + // Rule at src/isa/x64/lower.isle line 1071. let expr0_0 = C::put_in_reg_mem(ctx, pattern5_1); let expr1_0 = constructor_pabsw(ctx, &expr0_0)?; let expr2_0 = C::value_reg(ctx, expr1_0); @@ -2674,7 +2869,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1138. + // Rule at src/isa/x64/lower.isle line 1232. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminsd(ctx, expr0_0, &expr1_0)?; @@ -2684,7 +2879,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1160. + // Rule at src/isa/x64/lower.isle line 1254. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminud(ctx, expr0_0, &expr1_0)?; @@ -2694,7 +2889,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1127. + // Rule at src/isa/x64/lower.isle line 1221. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxsd(ctx, expr0_0, &expr1_0)?; @@ -2704,13 +2899,24 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1149. + // Rule at src/isa/x64/lower.isle line 1243. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxud(ctx, expr0_0, &expr1_0)?; let expr3_0 = C::value_reg(ctx, expr2_0); return Some(expr3_0); } + &Opcode::Sshr => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 695. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); + let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; + let expr3_0 = constructor_psrad(ctx, expr0_0, &expr2_0)?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); + } _ => {} } } @@ -2719,7 +2925,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Iabs = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 980. + // Rule at src/isa/x64/lower.isle line 1074. let expr0_0 = C::put_in_reg_mem(ctx, pattern5_1); let expr1_0 = constructor_pabsd(ctx, &expr0_0)?; let expr2_0 = C::value_reg(ctx, expr1_0); @@ -2731,24 +2937,54 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + if let &Opcode::Sshr = &pattern5_0 { + let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 707. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0: Type = I64; + let expr2_0: u8 = 0; + let expr3_0 = constructor_pextrd(ctx, expr1_0, expr0_0, expr2_0)?; + let expr4_0: Type = I64; + let expr5_0: u8 = 1; + let expr6_0 = constructor_pextrd(ctx, expr4_0, expr0_0, expr5_0)?; + let expr7_0: Type = I64; + let expr8_0 = C::put_masked_in_imm8_reg(ctx, pattern7_1, expr7_0); + let expr9_0: Type = I64; + let expr10_0 = constructor_sar(ctx, expr9_0, expr3_0, &expr8_0)?; + let expr11_0: Type = I64; + let expr12_0 = constructor_sar(ctx, expr11_0, expr6_0, &expr8_0)?; + let expr13_0 = RegMem::Reg { reg: expr10_0 }; + let expr14_0 = RegMem::Reg { reg: expr12_0 }; + let expr15_0 = + constructor_make_i64x2_from_lanes(ctx, &expr13_0, &expr14_0)?; + let expr16_0 = C::value_reg(ctx, expr15_0); + return Some(expr16_0); + } } + &InstructionData::Unary { + opcode: ref pattern5_0, + arg: pattern5_1, + } => { + if let &Opcode::Iabs = &pattern5_0 { + // Rule at src/isa/x64/lower.isle line 1088. + let expr0_0 = C::put_in_reg(ctx, pattern5_1); + let expr1_0: Type = I64X2; + let expr2_0: u64 = 0; + let expr3_0 = constructor_imm(ctx, expr1_0, expr2_0)?; + let expr4_0 = RegMem::Reg { reg: expr0_0 }; + let expr5_0 = constructor_psubq(ctx, expr3_0, &expr4_0)?; + let expr6_0 = RegMem::Reg { reg: expr0_0 }; + let expr7_0 = constructor_blendvpd(ctx, expr5_0, &expr6_0, expr5_0)?; + let expr8_0 = C::value_reg(ctx, expr7_0); + return Some(expr8_0); + } + } + _ => {} } } if pattern2_0 == F32X4 { @@ -2759,7 +2995,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::BandNot = &pattern4_0 { let (pattern6_0, pattern6_1) = C::unpack_value_array_2(ctx, &pattern4_1); - // Rule at src/isa/x64/lower.isle line 967. + // Rule at src/isa/x64/lower.isle line 1061. let expr0_0 = C::put_in_reg(ctx, pattern6_1); let expr1_0 = C::put_in_reg_mem(ctx, pattern6_0); let expr2_0 = constructor_sse_and_not(ctx, pattern2_0, expr0_0, &expr1_0)?; @@ -2838,7 +3074,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { let (pattern9_0, pattern9_1) = C::unpack_value_array_2(ctx, &pattern7_1); - // Rule at src/isa/x64/lower.isle line 662. + // Rule at src/isa/x64/lower.isle line 756. let expr0_0 = C::put_in_reg(ctx, pattern9_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern9_1); let expr2_0 = constructor_pavgb(ctx, expr0_0, &expr1_0)?; @@ -2966,7 +3202,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern9_0, pattern9_1) = C::unpack_value_array_2(ctx, &pattern7_1); - // Rule at src/isa/x64/lower.isle line 666. + // Rule at src/isa/x64/lower.isle line 760. let expr0_0 = C::put_in_reg(ctx, pattern9_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern9_1); let expr2_0 = constructor_pavgw(ctx, expr0_0, &expr1_0)?; @@ -3079,7 +3315,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1, pattern7_2) = C::unpack_value_array_3(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1041. + // Rule at src/isa/x64/lower.isle line 1135. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg(ctx, pattern7_1); let expr2_0 = RegMem::Reg { reg: expr0_0 }; @@ -4026,7 +4262,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1, pattern7_2) = C::unpack_value_array_3(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1055. + // Rule at src/isa/x64/lower.isle line 1149. let expr0_0 = C::put_in_reg_mem(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = C::put_in_reg(ctx, pattern7_2); @@ -4044,7 +4280,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Bnot = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1036. + // Rule at src/isa/x64/lower.isle line 1130. let expr0_0 = C::put_in_reg(ctx, pattern5_1); let expr1_0 = constructor_vector_all_ones(ctx, pattern2_0)?; let expr2_0 = RegMem::Reg { reg: expr1_0 }; @@ -4171,7 +4407,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - if let Some(pattern8_0) = C::imm8_from_value(ctx, pattern7_1) { - // Rule at src/isa/x64/lower.isle line 545. - let expr0_0 = C::put_in_reg(ctx, pattern7_0); - let expr1_0 = - constructor_shl(ctx, pattern3_0, expr0_0, &pattern8_0)?; - let expr2_0 = C::value_reg(ctx, expr1_0); - return Some(expr2_0); - } // Rule at src/isa/x64/lower.isle line 539. - let expr0_0 = constructor_lo_reg(ctx, pattern7_1)?; - let expr1_0 = C::put_in_reg(ctx, pattern7_0); - let expr2_0 = Imm8Reg::Reg { reg: expr0_0 }; - let expr3_0 = constructor_shl(ctx, pattern3_0, expr1_0, &expr2_0)?; - let expr4_0 = C::value_reg(ctx, expr3_0); - return Some(expr4_0); + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_masked_in_imm8_reg(ctx, pattern7_1, pattern3_0); + let expr2_0 = constructor_shl(ctx, pattern3_0, expr0_0, &expr1_0)?; + let expr3_0 = C::value_reg(ctx, expr2_0); + return Some(expr3_0); } &Opcode::Ushr => { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - if let Some(pattern8_0) = C::imm8_from_value(ctx, pattern7_1) { - // Rule at src/isa/x64/lower.isle line 591. - let expr0_0 = ExtendKind::Zero; - let expr1_0 = constructor_extend_to_reg( - ctx, pattern7_0, pattern3_0, &expr0_0, - )?; - let expr2_0 = - constructor_shr(ctx, pattern3_0, expr1_0, &pattern8_0)?; - let expr3_0 = C::value_reg(ctx, expr2_0); - return Some(expr3_0); - } - // Rule at src/isa/x64/lower.isle line 584. + // Rule at src/isa/x64/lower.isle line 578. let expr0_0 = ExtendKind::Zero; let expr1_0 = constructor_extend_to_reg(ctx, pattern7_0, pattern3_0, &expr0_0)?; - let expr2_0 = constructor_lo_reg(ctx, pattern7_1)?; - let expr3_0 = Imm8Reg::Reg { reg: expr2_0 }; - let expr4_0 = constructor_shr(ctx, pattern3_0, expr1_0, &expr3_0)?; - let expr5_0 = C::value_reg(ctx, expr4_0); - return Some(expr5_0); + let expr2_0 = C::put_masked_in_imm8_reg(ctx, pattern7_1, pattern3_0); + let expr3_0 = constructor_shr(ctx, pattern3_0, expr1_0, &expr2_0)?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); + } + &Opcode::Sshr => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 617. + let expr0_0 = ExtendKind::Sign; + let expr1_0 = + constructor_extend_to_reg(ctx, pattern7_0, pattern3_0, &expr0_0)?; + let expr2_0 = C::put_masked_in_imm8_reg(ctx, pattern7_1, pattern3_0); + let expr3_0 = constructor_sar(ctx, pattern3_0, expr1_0, &expr2_0)?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); } _ => {} } @@ -4450,7 +4677,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Bnot = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1015. + // Rule at src/isa/x64/lower.isle line 1109. let expr0_0 = C::put_in_reg(ctx, pattern5_1); let expr1_0 = constructor_not(ctx, pattern3_0, expr0_0)?; let expr2_0 = C::value_reg(ctx, expr1_0); @@ -4485,14 +4712,15 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/lower.isle line 551. + // Rule at src/isa/x64/lower.isle line 545. let expr0_0: usize = 0; let expr1_0 = C::value_regs_get(ctx, pattern0_0, expr0_0); let expr2_0: usize = 1; @@ -4694,7 +4921,7 @@ pub fn constructor_shr_i128( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/lower.isle line 598. + // Rule at src/isa/x64/lower.isle line 585. let expr0_0: usize = 0; let expr1_0 = C::value_regs_get(ctx, pattern0_0, expr0_0); let expr2_0: usize = 1; @@ -4748,6 +4975,110 @@ pub fn constructor_shr_i128( return Some(expr49_0); } +// Generated as internal constructor for term sar_i128. +pub fn constructor_sar_i128( + ctx: &mut C, + arg0: ValueRegs, + arg1: Reg, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/lower.isle line 624. + let expr0_0: usize = 0; + let expr1_0 = C::value_regs_get(ctx, pattern0_0, expr0_0); + let expr2_0: usize = 1; + let expr3_0 = C::value_regs_get(ctx, pattern0_0, expr2_0); + let expr4_0: Type = I64; + let expr5_0 = Imm8Reg::Reg { reg: pattern1_0 }; + let expr6_0 = constructor_shr(ctx, expr4_0, expr1_0, &expr5_0)?; + let expr7_0: Type = I64; + let expr8_0 = Imm8Reg::Reg { reg: pattern1_0 }; + let expr9_0 = constructor_sar(ctx, expr7_0, expr3_0, &expr8_0)?; + let expr10_0: Type = I64; + let expr11_0: Type = I64; + let expr12_0: Type = I64; + let expr13_0: u64 = 64; + let expr14_0 = constructor_imm(ctx, expr12_0, expr13_0)?; + let expr15_0 = RegMemImm::Reg { reg: pattern1_0 }; + let expr16_0 = constructor_sub(ctx, expr11_0, expr14_0, &expr15_0)?; + let expr17_0 = Imm8Reg::Reg { reg: expr16_0 }; + let expr18_0 = constructor_shl(ctx, expr10_0, expr3_0, &expr17_0)?; + let expr19_0 = OperandSize::Size64; + let expr20_0: u32 = 127; + let expr21_0 = RegMemImm::Imm { simm32: expr20_0 }; + let expr22_0 = constructor_test(ctx, &expr19_0, &expr21_0, pattern1_0)?; + let expr23_0: Type = I64; + let expr24_0 = CC::Z; + let expr25_0: Type = I64; + let expr26_0: u64 = 0; + let expr27_0 = constructor_imm(ctx, expr25_0, expr26_0)?; + let expr28_0 = RegMem::Reg { reg: expr27_0 }; + let expr29_0 = constructor_cmove(ctx, expr23_0, &expr24_0, &expr28_0, expr18_0)?; + let expr30_0 = constructor_with_flags_1(ctx, &expr22_0, &expr29_0)?; + let expr31_0: Type = I64; + let expr32_0 = RegMemImm::Reg { reg: expr30_0 }; + let expr33_0 = constructor_or(ctx, expr31_0, expr6_0, &expr32_0)?; + let expr34_0: Type = I64; + let expr35_0: u8 = 63; + let expr36_0 = Imm8Reg::Imm8 { imm: expr35_0 }; + let expr37_0 = constructor_sar(ctx, expr34_0, expr3_0, &expr36_0)?; + let expr38_0 = OperandSize::Size64; + let expr39_0: u32 = 64; + let expr40_0 = RegMemImm::Imm { simm32: expr39_0 }; + let expr41_0 = constructor_test(ctx, &expr38_0, &expr40_0, pattern1_0)?; + let expr42_0: Type = I64; + let expr43_0 = CC::Z; + let expr44_0 = RegMem::Reg { reg: expr33_0 }; + let expr45_0 = constructor_cmove(ctx, expr42_0, &expr43_0, &expr44_0, expr9_0)?; + let expr46_0: Type = I64; + let expr47_0 = CC::Z; + let expr48_0 = RegMem::Reg { reg: expr9_0 }; + let expr49_0 = constructor_cmove(ctx, expr46_0, &expr47_0, &expr48_0, expr37_0)?; + let expr50_0 = constructor_with_flags_2(ctx, &expr41_0, &expr45_0, &expr49_0)?; + return Some(expr50_0); +} + +// Generated as internal constructor for term sshr_i8x16_bigger_shift. +pub fn constructor_sshr_i8x16_bigger_shift( + ctx: &mut C, + arg0: Type, + arg1: &RegMemImm, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + match pattern1_0 { + &RegMemImm::Imm { simm32: pattern2_0 } => { + // Rule at src/isa/x64/lower.isle line 683. + let expr0_0: u32 = 8; + let expr1_0 = C::u32_add(ctx, pattern2_0, expr0_0); + let expr2_0 = RegMemImm::Imm { simm32: expr1_0 }; + return Some(expr2_0); + } + &RegMemImm::Reg { reg: pattern2_0 } => { + // Rule at src/isa/x64/lower.isle line 685. + let expr0_0: u32 = 8; + let expr1_0 = RegMemImm::Imm { simm32: expr0_0 }; + let expr2_0 = constructor_add(ctx, pattern0_0, pattern2_0, &expr1_0)?; + let expr3_0 = RegMemImm::Reg { reg: expr2_0 }; + let expr4_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr3_0)?; + return Some(expr4_0); + } + &RegMemImm::Mem { + addr: ref pattern2_0, + } => { + // Rule at src/isa/x64/lower.isle line 687. + let expr0_0: u64 = 8; + let expr1_0 = constructor_imm(ctx, pattern0_0, expr0_0)?; + let expr2_0 = constructor_add(ctx, pattern0_0, expr1_0, pattern1_0)?; + let expr3_0 = RegMemImm::Reg { reg: expr2_0 }; + let expr4_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr3_0)?; + return Some(expr4_0); + } + _ => {} + } + return None; +} + // Generated as internal constructor for term sse_and_not. pub fn constructor_sse_and_not( ctx: &mut C, @@ -4759,21 +5090,21 @@ pub fn constructor_sse_and_not( if pattern0_0 == F32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/lower.isle line 956. + // Rule at src/isa/x64/lower.isle line 1050. let expr0_0 = constructor_andnps(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == F64X2 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/lower.isle line 957. + // Rule at src/isa/x64/lower.isle line 1051. let expr0_0 = constructor_andnpd(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/lower.isle line 958. + // Rule at src/isa/x64/lower.isle line 1052. let expr0_0 = constructor_pandn(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } @@ -4783,7 +5114,7 @@ pub fn constructor_sse_and_not( // Generated as internal constructor for term i128_not. pub fn constructor_i128_not(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/lower.isle line 1021. + // Rule at src/isa/x64/lower.isle line 1115. let expr0_0 = C::put_in_regs(ctx, pattern0_0); let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -4810,7 +5141,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1075. + // Rule at src/isa/x64/lower.isle line 1169. let expr0_0 = constructor_pinsrb(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -4818,7 +5149,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1078. + // Rule at src/isa/x64/lower.isle line 1172. let expr0_0 = constructor_pinsrw(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -4826,7 +5157,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1081. + // Rule at src/isa/x64/lower.isle line 1175. let expr0_0 = OperandSize::Size32; let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?; return Some(expr1_0); @@ -4835,7 +5166,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1084. + // Rule at src/isa/x64/lower.isle line 1178. let expr0_0 = OperandSize::Size64; let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?; return Some(expr1_0); @@ -4844,7 +5175,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1087. + // Rule at src/isa/x64/lower.isle line 1181. let expr0_0 = C::sse_insertps_lane_imm(ctx, pattern4_0); let expr1_0 = constructor_insertps(ctx, pattern2_0, pattern3_0, expr0_0)?; return Some(expr1_0); @@ -4855,7 +5186,7 @@ pub fn constructor_vec_insert_lane( if let &RegMem::Reg { reg: pattern4_0 } = pattern3_0 { let pattern5_0 = arg3; if pattern5_0 == 0 { - // Rule at src/isa/x64/lower.isle line 1108. + // Rule at src/isa/x64/lower.isle line 1202. let expr0_0 = RegMem::Reg { reg: pattern4_0 }; let expr1_0 = constructor_movsd(ctx, pattern2_0, &expr0_0)?; return Some(expr1_0); @@ -4863,7 +5194,7 @@ pub fn constructor_vec_insert_lane( } let pattern4_0 = arg3; if pattern4_0 == 0 { - // Rule at src/isa/x64/lower.isle line 1109. + // Rule at src/isa/x64/lower.isle line 1203. let expr0_0 = SseOpcode::Movsd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern3_0)?; let expr2_0 = RegMem::Reg { reg: expr1_0 }; @@ -4871,7 +5202,7 @@ pub fn constructor_vec_insert_lane( return Some(expr3_0); } if pattern4_0 == 1 { - // Rule at src/isa/x64/lower.isle line 1117. + // Rule at src/isa/x64/lower.isle line 1211. let expr0_0 = constructor_movlhps(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 1fe9b067fb33..2a13827ce4cb 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -95,11 +95,6 @@ macro_rules! isle_prelude_methods { ty.bits().try_into().unwrap() } - #[inline] - fn ty_bits_mask(&mut self, ty: Type) -> u64 { - (1 << (self.ty_bits(ty) as u64)) - 1 - } - #[inline] fn ty_bits_u16(&mut self, ty: Type) -> u16 { ty.bits() @@ -260,6 +255,26 @@ macro_rules! isle_prelude_methods { n => Some(n as u64), } } + + #[inline] + fn u32_add(&mut self, a: u32, b: u32) -> u32 { + a.wrapping_add(b) + } + + #[inline] + fn u8_and(&mut self, a: u8, b: u8) -> u8 { + a & b + } + + #[inline] + fn lane_type(&mut self, ty: Type) -> Type { + ty.lane_type() + } + + #[inline] + fn const_value(&mut self, val: Value) -> Option { + self.lower_ctx.get_value_as_source_or_const(val).constant + } }; } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 9c1d5b72be10..26b298a5a227 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -38,6 +38,12 @@ (type ValueList (primitive ValueList)) (type ValueRegs (primitive ValueRegs)) +(decl u32_add (u32 u32) u32) +(extern constructor u32_add u32_add) + +(decl u8_and (u8 u8) u8) +(extern constructor u8_and u8_and) + ;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (type Reg (primitive Reg)) @@ -146,6 +152,10 @@ (decl ty_bits_u16 (Type) u16) (extern constructor ty_bits_u16 ty_bits_u16) +;; Get the type of each lane in the given type. +(decl lane_type (Type) Type) +(extern constructor lane_type lane_type) + ;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; An extractor that only matches types that can fit in 16 bits. @@ -242,11 +252,6 @@ (and (result_type ty) inst)) -;; Return a bitmask that will mask off a count to be within `ty`'s -;; bit-width. Used for shifts/rotates. -(decl ty_bits_mask (Type) u64) -(extern constructor ty_bits_mask ty_bits_mask) - ;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given ;; type. Will only match when there is more than one lane. (decl multi_lane (u8 u16) Type) @@ -256,6 +261,10 @@ (decl def_inst (Inst) Value) (extern extractor def_inst def_inst) +;; Extract a constant `u64` from the given value. +(decl const_value (u64) Value) +(extern extractor const_value const_value) + ;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Newtype wrapper around `MInst` for instructions that are used for their diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 3d1c82c37868..cf80b73dd963 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1,221 +1,283 @@ -test compile precise-output +test compile set enable_llvm_abi_extensions=true target x86_64 function %f0(i128, i128) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128, v1: i128): + v2 = iadd v0, v1 +; nextln: addq %rdx, %rdi +; nextln: adcq %rcx, %rsi + return v2 +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: addq %rdx, %rdi -; Inst 3: adcq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} - function %f1(i128, i128) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128, v1: i128): + v2 = isub v0, v1 +; nextln: subq %rdx, %rdi +; nextln: sbbq %rcx, %rsi + return v2 +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq %rdx, %rdi -; Inst 3: sbbq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} - function %f2(i128, i128) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128, v1: i128): + v2 = band v0, v1 +; nextln: andq %rdx, %rdi +; nextln: andq %rcx, %rsi + return v2 +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: andq %rdx, %rdi -; Inst 3: andq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} - function %f3(i128, i128) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128, v1: i128): + v2 = bor v0, v1 +; nextln: orq %rdx, %rdi +; nextln: orq %rcx, %rsi + return v2 +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: orq %rdx, %rdi -; Inst 3: orq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} - function %f4(i128, i128) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128, v1: i128): + v2 = bxor v0, v1 +; nextln: xorq %rdx, %rdi +; nextln: xorq %rcx, %rsi + return v2 +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: xorq %rdx, %rdi -; Inst 3: xorq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} - function %f5(i128) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128): + v1 = bnot v0 +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rsi +; nextln: notq %rsi +; nextln: movq %rax, %rdi +; nextln: notq %rdi + return v1 +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 12) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rax -; Inst 3: movq %rdi, %rsi -; Inst 4: notq %rsi -; Inst 5: movq %rax, %rdi -; Inst 6: notq %rdi -; Inst 7: movq %rsi, %rax -; Inst 8: movq %rdi, %rdx -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} - function %f6(i128, i128) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128, v1: i128): +; v0 in rdi:rsi, v1 in rdx:rcx + v2 = imul v0, v1 +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rsi +; nextln: imulq %rcx, %rsi +; nextln: imulq %rdx, %rax +; nextln: addq %rax, %rsi +; nextln: movq %rdi, %rax +; nextln: mul %rdx +; nextln: addq %rdx, %rsi +; nextln: movq %rsi, %rdx + return v2 +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 14) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rax -; Inst 3: movq %rdi, %rsi -; Inst 4: imulq %rcx, %rsi -; Inst 5: imulq %rdx, %rax -; Inst 6: addq %rax, %rsi -; Inst 7: movq %rdi, %rax -; Inst 8: mul %rdx -; Inst 9: addq %rdx, %rsi -; Inst 10: movq %rsi, %rdx -; Inst 11: movq %rbp, %rsp -; Inst 12: popq %rbp -; Inst 13: ret -; }} - function %f7(i64, i64) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i64, v1: i64): v2 = iconcat.i64 v0, v1 +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx + return v2 +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rsi, %rdx -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} - function %f8(i128) -> i64, i64 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128): v1, v2 = isplit.i128 v0 +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx + return v1, v2 +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rsi, %rdx -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} - function %f9(i128, i128) -> b1 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128, v1: i128): v2 = icmp eq v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setz %al +; nextln: cmpq %rdx, %rdi +; nextln: setz %r8b +; nextln: andq %rax, %r8 +; nextln: andq $$1, %r8 +; nextln: setnz %al + v3 = icmp ne v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setnz %al +; nextln: cmpq %rdx, %rdi +; nextln: setnz %r8b +; nextln: orq %rax, %r8 +; nextln: andq $$1, %r8 +; nextln: setnz %r8b + v4 = icmp slt v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setl %r9b +; nextln: setz %al +; nextln: cmpq %rdx, %rdi +; nextln: setb %r10b +; nextln: andq %rax, %r10 +; nextln: orq %r9, %r10 +; nextln: andq $$1, %r10 +; nextln: setnz %r9b + v5 = icmp sle v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setl %r10b +; nextln: setz %al +; nextln: cmpq %rdx, %rdi +; nextln: setbe %r11b +; nextln: andq %rax, %r11 +; nextln: orq %r10, %r11 +; nextln: andq $$1, %r11 +; nextln: setnz %r10b + v6 = icmp sgt v0, v1 - v7 = icmp sge v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setnle %r11b +; nextln: setz %al +; nextln: cmpq %rdx, %rdi +; nextln: setnbe %r12b +; nextln: andq %rax, %r12 +; nextln: orq %r11, %r12 +; nextln: andq $$1, %r12 +; nextln: setnz %r11b + + v7 = icmp sge v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setnle %r12b +; nextln: setz %al +; nextln: cmpq %rdx, %rdi +; nextln: setnb %r13b +; nextln: andq %rax, %r13 +; nextln: orq %r12, %r13 +; nextln: andq $$1, %r13 +; nextln: setnz %r12b + v8 = icmp ult v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setb %r13b +; nextln: setz %al +; nextln: cmpq %rdx, %rdi +; nextln: setb %r14b +; nextln: andq %rax, %r14 +; nextln: orq %r13, %r14 +; nextln: andq $$1, %r14 +; nextln: setnz %r13b + v9 = icmp ule v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setb %r14b +; nextln: setz %al +; nextln: cmpq %rdx, %rdi +; nextln: setbe %bl +; nextln: andq %rax, %rbx +; nextln: orq %r14, %rbx +; nextln: andq $$1, %rbx +; nextln: setnz %r14b + v10 = icmp ugt v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setnbe %bl +; nextln: setz %r15b +; nextln: cmpq %rdx, %rdi +; nextln: setnbe %al +; nextln: andq %r15, %rax +; nextln: orq %rbx, %rax +; nextln: andq $$1, %rax +; nextln: setnz %bl + v11 = icmp uge v0, v1 +; check: cmpq %rcx, %rsi +; nextln: setnbe %sil +; nextln: setz %cl +; nextln: cmpq %rdx, %rdi +; nextln: setnb %dil +; nextln: andq %rcx, %rdi +; nextln: orq %rsi, %rdi +; nextln: andq $$1, %rdi +; nextln: setnz %sil + v12 = band v2, v3 v13 = band v4, v5 v14 = band v6, v7 @@ -225,133 +287,26 @@ block0(v0: i128, v1: i128): v18 = band v14, v15 v19 = band v17, v18 v20 = band v19, v16 + return v20 +; check: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 114) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $64, %rsp -; Inst 3: movq %r12, 16(%rsp) -; Inst 4: movq %r13, 24(%rsp) -; Inst 5: movq %r14, 32(%rsp) -; Inst 6: movq %rbx, 40(%rsp) -; Inst 7: movq %r15, 48(%rsp) -; Inst 8: cmpq %rcx, %rsi -; Inst 9: setz %al -; Inst 10: cmpq %rdx, %rdi -; Inst 11: setz %r8b -; Inst 12: andq %rax, %r8 -; Inst 13: andq $1, %r8 -; Inst 14: setnz %al -; Inst 15: movq %rax, rsp(0 + virtual offset) -; Inst 16: cmpq %rcx, %rsi -; Inst 17: setnz %al -; Inst 18: cmpq %rdx, %rdi -; Inst 19: setnz %r8b -; Inst 20: orq %rax, %r8 -; Inst 21: andq $1, %r8 -; Inst 22: setnz %r8b -; Inst 23: cmpq %rcx, %rsi -; Inst 24: setl %r9b -; Inst 25: setz %al -; Inst 26: cmpq %rdx, %rdi -; Inst 27: setb %r10b -; Inst 28: andq %rax, %r10 -; Inst 29: orq %r9, %r10 -; Inst 30: andq $1, %r10 -; Inst 31: setnz %r9b -; Inst 32: cmpq %rcx, %rsi -; Inst 33: setl %r10b -; Inst 34: setz %al -; Inst 35: cmpq %rdx, %rdi -; Inst 36: setbe %r11b -; Inst 37: andq %rax, %r11 -; Inst 38: orq %r10, %r11 -; Inst 39: andq $1, %r11 -; Inst 40: setnz %r10b -; Inst 41: cmpq %rcx, %rsi -; Inst 42: setnle %r11b -; Inst 43: setz %al -; Inst 44: cmpq %rdx, %rdi -; Inst 45: setnbe %r12b -; Inst 46: andq %rax, %r12 -; Inst 47: orq %r11, %r12 -; Inst 48: andq $1, %r12 -; Inst 49: setnz %r11b -; Inst 50: cmpq %rcx, %rsi -; Inst 51: setnle %r12b -; Inst 52: setz %al -; Inst 53: cmpq %rdx, %rdi -; Inst 54: setnb %r13b -; Inst 55: andq %rax, %r13 -; Inst 56: orq %r12, %r13 -; Inst 57: andq $1, %r13 -; Inst 58: setnz %r12b -; Inst 59: cmpq %rcx, %rsi -; Inst 60: setb %r13b -; Inst 61: setz %al -; Inst 62: cmpq %rdx, %rdi -; Inst 63: setb %r14b -; Inst 64: andq %rax, %r14 -; Inst 65: orq %r13, %r14 -; Inst 66: andq $1, %r14 -; Inst 67: setnz %r13b -; Inst 68: cmpq %rcx, %rsi -; Inst 69: setb %r14b -; Inst 70: setz %al -; Inst 71: cmpq %rdx, %rdi -; Inst 72: setbe %bl -; Inst 73: andq %rax, %rbx -; Inst 74: orq %r14, %rbx -; Inst 75: andq $1, %rbx -; Inst 76: setnz %r14b -; Inst 77: cmpq %rcx, %rsi -; Inst 78: setnbe %bl -; Inst 79: setz %r15b -; Inst 80: cmpq %rdx, %rdi -; Inst 81: setnbe %al -; Inst 82: andq %r15, %rax -; Inst 83: orq %rbx, %rax -; Inst 84: andq $1, %rax -; Inst 85: setnz %bl -; Inst 86: cmpq %rcx, %rsi -; Inst 87: setnbe %sil -; Inst 88: setz %cl -; Inst 89: cmpq %rdx, %rdi -; Inst 90: setnb %dil -; Inst 91: andq %rcx, %rdi -; Inst 92: orq %rsi, %rdi -; Inst 93: andq $1, %rdi -; Inst 94: setnz %sil -; Inst 95: movq rsp(0 + virtual offset), %rax -; Inst 96: andl %r8d, %eax -; Inst 97: andl %r10d, %r9d -; Inst 98: andl %r12d, %r11d -; Inst 99: andl %r14d, %r13d -; Inst 100: andl %esi, %ebx -; Inst 101: andl %r9d, %eax -; Inst 102: andl %r13d, %r11d -; Inst 103: andl %r11d, %eax -; Inst 104: andl %ebx, %eax -; Inst 105: movq 16(%rsp), %r12 -; Inst 106: movq 24(%rsp), %r13 -; Inst 107: movq 32(%rsp), %r14 -; Inst 108: movq 40(%rsp), %rbx -; Inst 109: movq 48(%rsp), %r15 -; Inst 110: addq $64, %rsp -; Inst 111: movq %rbp, %rsp -; Inst 112: popq %rbp -; Inst 113: ret -; }} - function %f10(i128) -> i32 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128): brz v0, block1 +; check: cmpq $$0, %rdi +; nextln: setz %dil +; nextln: cmpq $$0, %rsi +; nextln: setz %sil +; nextln: andb %dil, %sil +; nextln: jnz label1; j label2 + jump block2 block1: @@ -361,42 +316,24 @@ block1: block2: v2 = iconst.i32 2 return v2 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: cmpq $0, %rdi -; Inst 3: setz %dil -; Inst 4: cmpq $0, %rsi -; Inst 5: setz %sil -; Inst 6: andb %dil, %sil -; Inst 7: jnz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 8 .. 12) -; Inst 8: movl $1, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 12 .. 16) -; Inst 12: movl $2, %eax -; Inst 13: movq %rbp, %rsp -; Inst 14: popq %rbp -; Inst 15: ret -; }} +; check: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f11(i128) -> i32 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128): brnz v0, block1 +; check: cmpq $$0, %rdi +; nextln: setnz %dil +; nextln: cmpq $$0, %rsi +; nextln: setnz %sil +; nextln: orb %dil, %sil +; nextln: jnz label1; j label2 jump block2 block1: @@ -406,408 +343,342 @@ block1: block2: v2 = iconst.i32 2 return v2 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: cmpq $0, %rdi -; Inst 3: setnz %dil -; Inst 4: cmpq $0, %rsi -; Inst 5: setnz %sil -; Inst 6: orb %dil, %sil -; Inst 7: jnz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 8 .. 12) -; Inst 8: movl $1, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 12 .. 16) -; Inst 12: movl $2, %eax -; Inst 13: movq %rbp, %rsp -; Inst 14: popq %rbp -; Inst 15: ret -; }} +; check: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f12(i64) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i64): v1 = uextend.i128 v0 return v1 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: xorq %rdi, %rdi -; Inst 4: movq %rsi, %rax -; Inst 5: movq %rdi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; nextln: movq %rdi, %rsi +; nextln: xorq %rdi, %rdi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f13(i64) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i64): v1 = sextend.i128 v0 return v1 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: movq %rsi, %rdi -; Inst 4: sarq $63, %rdi -; Inst 5: movq %rsi, %rax -; Inst 6: movq %rdi, %rdx -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; nextln: movq %rdi, %rsi +; nextln: movq %rsi, %rdi +; nextln: sarq $$63, %rdi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f14(i8) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i8): v1 = sextend.i128 v0 return v1 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movsbq %dil, %rsi -; Inst 3: movq %rsi, %rdi -; Inst 4: sarq $63, %rdi -; Inst 5: movq %rsi, %rax -; Inst 6: movq %rdi, %rdx -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; nextln: movsbq %dil, %rsi +; nextln: movq %rsi, %rdi +; nextln: sarq $$63, %rdi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f15(i8) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i8): v1 = uextend.i128 v0 return v1 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movzbq %dil, %rsi -; Inst 3: xorq %rdi, %rdi -; Inst 4: movq %rsi, %rax -; Inst 5: movq %rdi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; nextln: movzbq %dil, %rsi +; nextln: xorq %rdi, %rdi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +} function %f16(i128) -> i64 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128): v1 = ireduce.i64 v0 return v1 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; nextln: movq %rdi, %rax + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f17(i128) -> i8 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128): v1 = ireduce.i8 v0 return v1 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; nextln: movq %rdi, %rax + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f18(b1) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: b1): v1 = bint.i128 v0 return v1 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: andq $1, %rsi -; Inst 4: xorq %rdi, %rdi -; Inst 5: movq %rsi, %rax -; Inst 6: movq %rdi, %rdx -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; nextln: movq %rdi, %rsi +; nextln: andq $$1, %rsi +; nextln: xorq %rdi, %rdi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f19(i128) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128): v1 = popcnt.i128 v0 return v1 + +; check: movq %rsi, %rdx +; nextln: movq %rdi, %rsi +; nextln: shrq $$1, %rsi +; nextln: movabsq $$8608480567731124087, %rcx +; nextln: andq %rcx, %rsi +; nextln: movq %rdi, %rax +; nextln: subq %rsi, %rax +; nextln: shrq $$1, %rsi +; nextln: andq %rcx, %rsi +; nextln: subq %rsi, %rax +; nextln: shrq $$1, %rsi +; nextln: andq %rcx, %rsi +; nextln: subq %rsi, %rax +; nextln: movq %rax, %rsi +; nextln: shrq $$4, %rsi +; nextln: addq %rax, %rsi +; nextln: movabsq $$1085102592571150095, %rdi +; nextln: andq %rdi, %rsi +; nextln: movabsq $$72340172838076673, %rdi +; nextln: imulq %rdi, %rsi +; nextln: shrq $$56, %rsi +; nextln: movq %rdx, %rax +; nextln: shrq $$1, %rax +; nextln: movabsq $$8608480567731124087, %rcx +; nextln: andq %rcx, %rax +; nextln: movq %rdx, %rdi +; nextln: subq %rax, %rdi +; nextln: shrq $$1, %rax +; nextln: andq %rcx, %rax +; nextln: subq %rax, %rdi +; nextln: shrq $$1, %rax +; nextln: andq %rcx, %rax +; nextln: subq %rax, %rdi +; nextln: movq %rdi, %rax +; nextln: shrq $$4, %rax +; nextln: addq %rdi, %rax +; nextln: movabsq $$1085102592571150095, %rdi +; nextln: andq %rdi, %rax +; nextln: movabsq $$72340172838076673, %rdi +; nextln: imulq %rdi, %rax +; nextln: shrq $$56, %rax +; nextln: addq %rax, %rsi +; nextln: xorq %rdi, %rdi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx + + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 50) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rdx -; Inst 3: movq %rdi, %rsi -; Inst 4: shrq $1, %rsi -; Inst 5: movabsq $8608480567731124087, %rcx -; Inst 6: andq %rcx, %rsi -; Inst 7: movq %rdi, %rax -; Inst 8: subq %rsi, %rax -; Inst 9: shrq $1, %rsi -; Inst 10: andq %rcx, %rsi -; Inst 11: subq %rsi, %rax -; Inst 12: shrq $1, %rsi -; Inst 13: andq %rcx, %rsi -; Inst 14: subq %rsi, %rax -; Inst 15: movq %rax, %rsi -; Inst 16: shrq $4, %rsi -; Inst 17: addq %rax, %rsi -; Inst 18: movabsq $1085102592571150095, %rdi -; Inst 19: andq %rdi, %rsi -; Inst 20: movabsq $72340172838076673, %rdi -; Inst 21: imulq %rdi, %rsi -; Inst 22: shrq $56, %rsi -; Inst 23: movq %rdx, %rax -; Inst 24: shrq $1, %rax -; Inst 25: movabsq $8608480567731124087, %rcx -; Inst 26: andq %rcx, %rax -; Inst 27: movq %rdx, %rdi -; Inst 28: subq %rax, %rdi -; Inst 29: shrq $1, %rax -; Inst 30: andq %rcx, %rax -; Inst 31: subq %rax, %rdi -; Inst 32: shrq $1, %rax -; Inst 33: andq %rcx, %rax -; Inst 34: subq %rax, %rdi -; Inst 35: movq %rdi, %rax -; Inst 36: shrq $4, %rax -; Inst 37: addq %rdi, %rax -; Inst 38: movabsq $1085102592571150095, %rdi -; Inst 39: andq %rdi, %rax -; Inst 40: movabsq $72340172838076673, %rdi -; Inst 41: imulq %rdi, %rax -; Inst 42: shrq $56, %rax -; Inst 43: addq %rax, %rsi -; Inst 44: xorq %rdi, %rdi -; Inst 45: movq %rsi, %rax -; Inst 46: movq %rdi, %rdx -; Inst 47: movq %rbp, %rsp -; Inst 48: popq %rbp -; Inst 49: ret -; }} function %f20(i128) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128): v1 = bitrev.i128 v0 return v1 + +; check: movq %rdi, %rcx +; nextln: movq %rcx, %rdi +; nextln: movabsq $$6148914691236517205, %rax +; nextln: shrq $$1, %rdi +; nextln: andq %rax, %rdi +; nextln: andq %rcx, %rax +; nextln: shlq $$1, %rax +; nextln: movq %rax, %rcx +; nextln: orq %rdi, %rcx +; nextln: movq %rcx, %rdi +; nextln: movabsq $$3689348814741910323, %rax +; nextln: shrq $$2, %rdi +; nextln: andq %rax, %rdi +; nextln: andq %rcx, %rax +; nextln: shlq $$2, %rax +; nextln: movq %rax, %rcx +; nextln: orq %rdi, %rcx +; nextln: movq %rcx, %rdi +; nextln: movabsq $$1085102592571150095, %rax +; nextln: shrq $$4, %rdi +; nextln: andq %rax, %rdi +; nextln: andq %rcx, %rax +; nextln: shlq $$4, %rax +; nextln: movq %rax, %rcx +; nextln: orq %rdi, %rcx +; nextln: movq %rcx, %rdi +; nextln: movabsq $$71777214294589695, %rax +; nextln: shrq $$8, %rdi +; nextln: andq %rax, %rdi +; nextln: andq %rcx, %rax +; nextln: shlq $$8, %rax +; nextln: movq %rax, %rcx +; nextln: orq %rdi, %rcx +; nextln: movq %rcx, %rdi +; nextln: movabsq $$281470681808895, %rax +; nextln: shrq $$16, %rdi +; nextln: andq %rax, %rdi +; nextln: andq %rcx, %rax +; nextln: shlq $$16, %rax +; nextln: orq %rdi, %rax +; nextln: movq %rax, %rcx +; nextln: movl $$-1, %edi +; nextln: shrq $$32, %rcx +; nextln: andq %rdi, %rcx +; nextln: andq %rax, %rdi +; nextln: shlq $$32, %rdi +; nextln: orq %rcx, %rdi +; nextln: movq %rsi, %rcx +; nextln: movq %rcx, %rsi +; nextln: movabsq $$6148914691236517205, %rax +; nextln: shrq $$1, %rsi +; nextln: andq %rax, %rsi +; nextln: andq %rcx, %rax +; nextln: shlq $$1, %rax +; nextln: movq %rax, %rcx +; nextln: orq %rsi, %rcx +; nextln: movq %rcx, %rsi +; nextln: movabsq $$3689348814741910323, %rax +; nextln: shrq $$2, %rsi +; nextln: andq %rax, %rsi +; nextln: andq %rcx, %rax +; nextln: shlq $$2, %rax +; nextln: movq %rax, %rcx +; nextln: orq %rsi, %rcx +; nextln: movq %rcx, %rsi +; nextln: movabsq $$1085102592571150095, %rax +; nextln: shrq $$4, %rsi +; nextln: andq %rax, %rsi +; nextln: andq %rcx, %rax +; nextln: shlq $$4, %rax +; nextln: movq %rax, %rcx +; nextln: orq %rsi, %rcx +; nextln: movq %rcx, %rsi +; nextln: movabsq $$71777214294589695, %rax +; nextln: shrq $$8, %rsi +; nextln: andq %rax, %rsi +; nextln: andq %rcx, %rax +; nextln: shlq $$8, %rax +; nextln: movq %rax, %rcx +; nextln: orq %rsi, %rcx +; nextln: movq %rcx, %rsi +; nextln: movabsq $$281470681808895, %rax +; nextln: shrq $$16, %rsi +; nextln: andq %rax, %rsi +; nextln: andq %rcx, %rax +; nextln: shlq $$16, %rax +; nextln: orq %rsi, %rax +; nextln: movq %rax, %rsi +; nextln: movl $$-1, %ecx +; nextln: shrq $$32, %rsi +; nextln: andq %rcx, %rsi +; nextln: andq %rax, %rcx +; nextln: shlq $$32, %rcx +; nextln: orq %rsi, %rcx +; nextln: movq %rcx, %rax +; nextln: movq %rdi, %rdx + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 101) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rcx -; Inst 3: movq %rcx, %rdi -; Inst 4: movabsq $6148914691236517205, %rax -; Inst 5: shrq $1, %rdi -; Inst 6: andq %rax, %rdi -; Inst 7: andq %rcx, %rax -; Inst 8: shlq $1, %rax -; Inst 9: movq %rax, %rcx -; Inst 10: orq %rdi, %rcx -; Inst 11: movq %rcx, %rdi -; Inst 12: movabsq $3689348814741910323, %rax -; Inst 13: shrq $2, %rdi -; Inst 14: andq %rax, %rdi -; Inst 15: andq %rcx, %rax -; Inst 16: shlq $2, %rax -; Inst 17: movq %rax, %rcx -; Inst 18: orq %rdi, %rcx -; Inst 19: movq %rcx, %rdi -; Inst 20: movabsq $1085102592571150095, %rax -; Inst 21: shrq $4, %rdi -; Inst 22: andq %rax, %rdi -; Inst 23: andq %rcx, %rax -; Inst 24: shlq $4, %rax -; Inst 25: movq %rax, %rcx -; Inst 26: orq %rdi, %rcx -; Inst 27: movq %rcx, %rdi -; Inst 28: movabsq $71777214294589695, %rax -; Inst 29: shrq $8, %rdi -; Inst 30: andq %rax, %rdi -; Inst 31: andq %rcx, %rax -; Inst 32: shlq $8, %rax -; Inst 33: movq %rax, %rcx -; Inst 34: orq %rdi, %rcx -; Inst 35: movq %rcx, %rdi -; Inst 36: movabsq $281470681808895, %rax -; Inst 37: shrq $16, %rdi -; Inst 38: andq %rax, %rdi -; Inst 39: andq %rcx, %rax -; Inst 40: shlq $16, %rax -; Inst 41: orq %rdi, %rax -; Inst 42: movq %rax, %rcx -; Inst 43: movl $-1, %edi -; Inst 44: shrq $32, %rcx -; Inst 45: andq %rdi, %rcx -; Inst 46: andq %rax, %rdi -; Inst 47: shlq $32, %rdi -; Inst 48: orq %rcx, %rdi -; Inst 49: movq %rsi, %rcx -; Inst 50: movq %rcx, %rsi -; Inst 51: movabsq $6148914691236517205, %rax -; Inst 52: shrq $1, %rsi -; Inst 53: andq %rax, %rsi -; Inst 54: andq %rcx, %rax -; Inst 55: shlq $1, %rax -; Inst 56: movq %rax, %rcx -; Inst 57: orq %rsi, %rcx -; Inst 58: movq %rcx, %rsi -; Inst 59: movabsq $3689348814741910323, %rax -; Inst 60: shrq $2, %rsi -; Inst 61: andq %rax, %rsi -; Inst 62: andq %rcx, %rax -; Inst 63: shlq $2, %rax -; Inst 64: movq %rax, %rcx -; Inst 65: orq %rsi, %rcx -; Inst 66: movq %rcx, %rsi -; Inst 67: movabsq $1085102592571150095, %rax -; Inst 68: shrq $4, %rsi -; Inst 69: andq %rax, %rsi -; Inst 70: andq %rcx, %rax -; Inst 71: shlq $4, %rax -; Inst 72: movq %rax, %rcx -; Inst 73: orq %rsi, %rcx -; Inst 74: movq %rcx, %rsi -; Inst 75: movabsq $71777214294589695, %rax -; Inst 76: shrq $8, %rsi -; Inst 77: andq %rax, %rsi -; Inst 78: andq %rcx, %rax -; Inst 79: shlq $8, %rax -; Inst 80: movq %rax, %rcx -; Inst 81: orq %rsi, %rcx -; Inst 82: movq %rcx, %rsi -; Inst 83: movabsq $281470681808895, %rax -; Inst 84: shrq $16, %rsi -; Inst 85: andq %rax, %rsi -; Inst 86: andq %rcx, %rax -; Inst 87: shlq $16, %rax -; Inst 88: orq %rsi, %rax -; Inst 89: movq %rax, %rsi -; Inst 90: movl $-1, %ecx -; Inst 91: shrq $32, %rsi -; Inst 92: andq %rcx, %rsi -; Inst 93: andq %rax, %rcx -; Inst 94: shlq $32, %rcx -; Inst 95: orq %rsi, %rcx -; Inst 96: movq %rcx, %rax -; Inst 97: movq %rdi, %rdx -; Inst 98: movq %rbp, %rsp -; Inst 99: popq %rbp -; Inst 100: ret -; }} +; Shifts are covered by run-tests in shift-i128-run.clif. function %f21(i128, i64) { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128, v1: i64): store.i128 v0, v1 return -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, 0(%rdx) -; Inst 3: movq %rsi, 8(%rdx) -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; check: movq %rdi, 0(%rdx) +; nextln: movq %rsi, 8(%rdx) + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f22(i64) -> i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i64): v1 = load.i128 v0 return v1 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq 0(%rdi), %rsi -; Inst 3: movq 8(%rdi), %rdi -; Inst 4: movq %rsi, %rax -; Inst 5: movq %rdi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; check: movq 0(%rdi), %rsi +; nextln: movq 8(%rdi), %rdi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx + +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} function %f23(i128, b1) -> i128 { block0(v0: i128, v1: b1): @@ -824,48 +695,38 @@ block2(v6: i128): v7 = iconst.i128 2 v8 = iadd.i128 v6, v7 return v8 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: testb $1, %dl -; Inst 3: jnz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 4 .. 15) -; Inst 4: xorq %rdi, %rdi -; Inst 5: xorq %rsi, %rsi -; Inst 6: movl $1, %ecx -; Inst 7: xorq %rax, %rax -; Inst 8: addq %rcx, %rdi -; Inst 9: adcq %rax, %rsi -; Inst 10: movq %rdi, %rax -; Inst 11: movq %rsi, %rdx -; Inst 12: movq %rbp, %rsp -; Inst 13: popq %rbp -; Inst 14: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 15 .. 26) -; Inst 15: xorq %rdi, %rdi -; Inst 16: xorq %rsi, %rsi -; Inst 17: movl $2, %ecx -; Inst 18: xorq %rax, %rax -; Inst 19: addq %rcx, %rdi -; Inst 20: adcq %rax, %rsi -; Inst 21: movq %rdi, %rax -; Inst 22: movq %rsi, %rdx -; Inst 23: movq %rbp, %rsp -; Inst 24: popq %rbp -; Inst 25: ret -; }} +; check: Block 0: +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: testb $$1, %dl +; nextln: jnz label1; j label2 +; check: Block 1: +; check: xorq %rdi, %rdi +; nextln: xorq %rsi, %rsi +; nextln: movl $$1, %ecx +; nextln: xorq %rax, %rax +; nextln: addq %rcx, %rdi +; nextln: adcq %rax, %rsi +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +; check: Block 2: +; check: xorq %rdi, %rdi +; nextln: xorq %rsi, %rsi +; nextln: movl $$2, %ecx +; nextln: xorq %rax, %rax +; nextln: addq %rcx, %rdi +; nextln: adcq %rax, %rsi +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +} function %f24(i128, i128, i64, i128, i128, i128) -> i128 { @@ -877,164 +738,121 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128): v10 = iadd.i128 v6, v8 v11 = iadd.i128 v9, v10 return v11 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 31) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %r13, 8(%rsp) -; Inst 5: movq %r9, %r11 -; Inst 6: movq 16(%rbp), %r13 -; Inst 7: movq 24(%rbp), %r12 -; Inst 8: movq 32(%rbp), %r10 -; Inst 9: movq 40(%rbp), %r9 -; Inst 10: movq 48(%rbp), %rax -; Inst 11: addq %rdx, %rdi -; Inst 12: movq %rsi, %rdx -; Inst 13: adcq %rcx, %rdx -; Inst 14: xorq %rsi, %rsi -; Inst 15: addq %r8, %r11 -; Inst 16: adcq %rsi, %r13 -; Inst 17: addq %r9, %r12 -; Inst 18: adcq %rax, %r10 -; Inst 19: addq %r11, %rdi -; Inst 20: adcq %r13, %rdx -; Inst 21: addq %rdi, %r12 -; Inst 22: adcq %rdx, %r10 -; Inst 23: movq %r12, %rax -; Inst 24: movq %r10, %rdx -; Inst 25: movq 0(%rsp), %r12 -; Inst 26: movq 8(%rsp), %r13 -; Inst 27: addq $16, %rsp -; Inst 28: movq %rbp, %rsp -; Inst 29: popq %rbp -; Inst 30: ret -; }} +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: subq $$16, %rsp +; nextln: movq %r12, 0(%rsp) +; nextln: movq %r13, 8(%rsp) +; nextln: movq %r9, %r11 +; nextln: movq 16(%rbp), %r13 +; nextln: movq 24(%rbp), %r12 +; nextln: movq 32(%rbp), %r10 +; nextln: movq 40(%rbp), %r9 +; nextln: movq 48(%rbp), %rax +; nextln: addq %rdx, %rdi +; nextln: movq %rsi, %rdx +; nextln: adcq %rcx, %rdx +; nextln: xorq %rsi, %rsi +; nextln: addq %r8, %r11 +; nextln: adcq %rsi, %r13 +; nextln: addq %r9, %r12 +; nextln: adcq %rax, %r10 +; nextln: addq %r11, %rdi +; nextln: adcq %r13, %rdx +; nextln: addq %rdi, %r12 +; nextln: adcq %rdx, %r10 +; nextln: movq %r12, %rax +; nextln: movq %r10, %rdx +; nextln: movq 0(%rsp), %r12 +; nextln: movq 8(%rsp), %r13 +; nextln: addq $$16, %rsp +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +} function %f25(i128) -> i128, i128, i128, i64, i128, i128 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + block0(v0: i128): v1 = ireduce.i64 v0 return v0, v0, v0, v1, v0, v0 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 37) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $32, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %r13, 8(%rsp) -; Inst 5: movq %r14, 16(%rsp) -; Inst 6: movq %rbx, 24(%rsp) -; Inst 7: movq %rsi, %rax -; Inst 8: movq %rdx, %rsi -; Inst 9: movq %rdi, %r12 -; Inst 10: movq %rdi, %rcx -; Inst 11: movq %rax, %rdx -; Inst 12: movq %rdi, %r8 -; Inst 13: movq %rax, %r9 -; Inst 14: movq %rdi, %r10 -; Inst 15: movq %rax, %r11 -; Inst 16: movq %rdi, %r13 -; Inst 17: movq %rax, %r14 -; Inst 18: movq %rax, %rbx -; Inst 19: movq %rcx, %rax -; Inst 20: movq %r8, 0(%rsi) -; Inst 21: movq %r9, 8(%rsi) -; Inst 22: movq %r10, 16(%rsi) -; Inst 23: movq %r11, 24(%rsi) -; Inst 24: movq %r12, 32(%rsi) -; Inst 25: movq %r13, 40(%rsi) -; Inst 26: movq %r14, 48(%rsi) -; Inst 27: movq %rdi, 56(%rsi) -; Inst 28: movq %rbx, 64(%rsi) -; Inst 29: movq 0(%rsp), %r12 -; Inst 30: movq 8(%rsp), %r13 -; Inst 31: movq 16(%rsp), %r14 -; Inst 32: movq 24(%rsp), %rbx -; Inst 33: addq $32, %rsp -; Inst 34: movq %rbp, %rsp -; Inst 35: popq %rbp -; Inst 36: ret -; }} +; likely to change with regalloc -- just check the stores into the retval area: + +; check: movq %r8, 0(%rsi) +; nextln: movq %r9, 8(%rsi) +; nextln: movq %r10, 16(%rsi) +; nextln: movq %r11, 24(%rsi) +; nextln: movq %r12, 32(%rsi) +; nextln: movq %r13, 40(%rsi) +; nextln: movq %r14, 48(%rsi) +; nextln: movq %rdi, 56(%rsi) +; nextln: movq %rbx, 64(%rsi) + +} function %f26(i128, i128) -> i128, i128 { fn0 = %g(i128, i128) -> i128, i128 block0(v0: i128, v1: i128): v2, v3 = call fn0(v0, v1) return v2, v3 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 21) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %r8, %r12 -; Inst 5: subq $16, %rsp -; Inst 6: virtual_sp_offset_adjust 16 -; Inst 7: lea 0(%rsp), %r8 -; Inst 8: load_ext_name %g+0, %rax -; Inst 9: call *%rax -; Inst 10: movq 0(%rsp), %rsi -; Inst 11: movq 8(%rsp), %rdi -; Inst 12: addq $16, %rsp -; Inst 13: virtual_sp_offset_adjust -16 -; Inst 14: movq %rsi, 0(%r12) -; Inst 15: movq %rdi, 8(%r12) -; Inst 16: movq 0(%rsp), %r12 -; Inst 17: addq $16, %rsp -; Inst 18: movq %rbp, %rsp -; Inst 19: popq %rbp -; Inst 20: ret -; }} +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: subq $$16, %rsp +; nextln: movq %r12, 0(%rsp) +; nextln: movq %r8, %r12 +; nextln: subq $$16, %rsp +; nextln: virtual_sp_offset_adjust 16 +; nextln: lea 0(%rsp), %r8 +; nextln: load_ext_name %g+0, %rax +; nextln: call *%rax +; nextln: movq 0(%rsp), %rsi +; nextln: movq 8(%rsp), %rdi +; nextln: addq $$16, %rsp +; nextln: virtual_sp_offset_adjust -16 +; nextln: movq %rsi, 0(%r12) +; nextln: movq %rdi, 8(%r12) +; nextln: movq 0(%rsp), %r12 +; nextln: addq $$16, %rsp +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +} function %f27(i128) -> i128 { block0(v0: i128): v1 = clz.i128 v0 return v1 -} -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 21) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movabsq $-1, %rcx -; Inst 3: bsrq %rsi, %rax -; Inst 4: cmovzq %rcx, %rax -; Inst 5: movl $63, %esi -; Inst 6: subq %rax, %rsi -; Inst 7: movabsq $-1, %rcx -; Inst 8: bsrq %rdi, %rax -; Inst 9: cmovzq %rcx, %rax -; Inst 10: movl $63, %edi -; Inst 11: subq %rax, %rdi -; Inst 12: addq $64, %rdi -; Inst 13: cmpq $64, %rsi -; Inst 14: cmovnzq %rsi, %rdi -; Inst 15: xorq %rsi, %rsi -; Inst 16: movq %rdi, %rax -; Inst 17: movq %rsi, %rdx -; Inst 18: movq %rbp, %rsp -; Inst 19: popq %rbp -; Inst 20: ret -; }} +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movabsq $$-1, %rcx +; nextln: bsrq %rsi, %rax +; nextln: cmovzq %rcx, %rax +; nextln: movl $$63, %esi +; nextln: subq %rax, %rsi +; nextln: movabsq $$-1, %rcx +; nextln: bsrq %rdi, %rax +; nextln: cmovzq %rcx, %rax +; nextln: movl $$63, %edi +; nextln: subq %rax, %rdi +; nextln: addq $$64, %rdi +; nextln: cmpq $$64, %rsi +; nextln: cmovnzq %rsi, %rdi +; nextln: xorq %rsi, %rsi +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +} function %f28(i128) -> i128 { block0(v0: i128): @@ -1042,30 +860,24 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 18) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rax -; Inst 3: movl $64, %ecx -; Inst 4: bsfq %rdi, %rsi -; Inst 5: cmovzq %rcx, %rsi -; Inst 6: movl $64, %ecx -; Inst 7: bsfq %rax, %rdi -; Inst 8: cmovzq %rcx, %rdi -; Inst 9: addq $64, %rdi -; Inst 10: cmpq $64, %rsi -; Inst 11: cmovzq %rdi, %rsi -; Inst 12: xorq %rdi, %rdi -; Inst 13: movq %rsi, %rax -; Inst 14: movq %rdi, %rdx -; Inst 15: movq %rbp, %rsp -; Inst 16: popq %rbp -; Inst 17: ret -; }} +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rsi, %rax +; nextln: movl $$64, %ecx +; nextln: bsfq %rdi, %rsi +; nextln: cmovzq %rcx, %rsi +; nextln: movl $$64, %ecx +; nextln: bsfq %rax, %rdi +; nextln: cmovzq %rcx, %rdi +; nextln: addq $$64, %rdi +; nextln: cmpq $$64, %rsi +; nextln: cmovzq %rdi, %rsi +; nextln: xorq %rdi, %rdi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret function %f29(i8, i128) -> i8 { block0(v0: i8, v1: i128): @@ -1073,20 +885,14 @@ block0(v0: i8, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rcx -; Inst 3: shlb %cl, %dil -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rsi, %rcx +; nextln: shlb %cl, %dil +; nextln: movq %rdi, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret function %f30(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1094,37 +900,31 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 25) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rsi, %rdi -; Inst 4: movq %rax, %rsi -; Inst 5: movq %rdx, %rcx -; Inst 6: shlq %cl, %rsi -; Inst 7: movq %rdx, %rcx -; Inst 8: shlq %cl, %rdi -; Inst 9: movl $64, %ecx -; Inst 10: subq %rdx, %rcx -; Inst 11: shrq %cl, %rax -; Inst 12: xorq %rcx, %rcx -; Inst 13: testq $127, %rdx -; Inst 14: cmovzq %rcx, %rax -; Inst 15: orq %rdi, %rax -; Inst 16: testq $64, %rdx -; Inst 17: movq %rsi, %rdi -; Inst 18: cmovzq %rax, %rdi -; Inst 19: cmovzq %rsi, %rcx -; Inst 20: movq %rcx, %rax -; Inst 21: movq %rdi, %rdx -; Inst 22: movq %rbp, %rsp -; Inst 23: popq %rbp -; Inst 24: ret -; }} +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdi +; nextln: movq %rax, %rsi +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %rsi +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %rdi +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: shrq %cl, %rax +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %rax +; nextln: orq %rdi, %rax +; nextln: testq $$64, %rdx +; nextln: movq %rsi, %rdi +; nextln: cmovzq %rax, %rdi +; nextln: cmovzq %rsi, %rcx +; nextln: movq %rcx, %rax +; nextln: movq %rdi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret function %f31(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1132,36 +932,30 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 24) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rax -; Inst 3: movq %rdx, %rcx -; Inst 4: shrq %cl, %rdi -; Inst 5: movq %rax, %rsi -; Inst 6: movq %rdx, %rcx -; Inst 7: shrq %cl, %rsi -; Inst 8: movl $64, %ecx -; Inst 9: subq %rdx, %rcx -; Inst 10: shlq %cl, %rax -; Inst 11: xorq %rcx, %rcx -; Inst 12: testq $127, %rdx -; Inst 13: cmovzq %rcx, %rax -; Inst 14: orq %rdi, %rax -; Inst 15: xorq %rdi, %rdi -; Inst 16: testq $64, %rdx -; Inst 17: cmovzq %rsi, %rdi -; Inst 18: cmovzq %rax, %rsi -; Inst 19: movq %rsi, %rax -; Inst 20: movq %rdi, %rdx -; Inst 21: movq %rbp, %rsp -; Inst 22: popq %rbp -; Inst 23: ret -; }} +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rsi, %rax +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rdi +; nextln: movq %rax, %rsi +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rsi +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: shlq %cl, %rax +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %rax +; nextln: orq %rdi, %rax +; nextln: xorq %rdi, %rdi +; nextln: testq $$64, %rdx +; nextln: cmovzq %rsi, %rdi +; nextln: cmovzq %rax, %rsi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret function %f32(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1169,40 +963,31 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 28) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %r8 -; Inst 3: movq %rsi, %rdi -; Inst 4: movq %rdi, %rsi -; Inst 5: movq %rdx, %rcx -; Inst 6: sarq %cl, %rsi -; Inst 7: movq %rdx, %rcx -; Inst 8: shrq %cl, %r8 -; Inst 9: movl $64, %ecx -; Inst 10: subq %rdx, %rcx -; Inst 11: movq %rdi, %rax -; Inst 12: shlq %cl, %rax -; Inst 13: xorq %rcx, %rcx -; Inst 14: testq $127, %rdx -; Inst 15: cmovzq %rcx, %rax -; Inst 16: orq %r8, %rax -; Inst 17: sarq $63, %rdi -; Inst 18: xorq %rcx, %rcx -; Inst 19: andq $64, %rdx -; Inst 20: cmovzq %rsi, %rdi -; Inst 21: cmovzq %rax, %rcx -; Inst 22: cmovnzq %rsi, %rcx -; Inst 23: movq %rcx, %rax -; Inst 24: movq %rdi, %rdx -; Inst 25: movq %rbp, %rsp -; Inst 26: popq %rbp -; Inst 27: ret -; }} +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %rax +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rax +; nextln: movq %rsi, %rdi +; nextln: movq %rdx, %rcx +; nextln: sarq %cl, %rdi +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: movq %rsi, %r8 +; nextln: shlq %cl, %r8 +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %r8 +; nextln: orq %r8, %rax +; nextln: sarq $$63, %rsi +; nextln: testq $$64, %rdx +; nextln: cmovzq %rdi, %rsi +; nextln: cmovzq %rax, %rdi +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret function %f33(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1210,58 +995,53 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 46) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %r9 -; Inst 3: movq %rdx, %rcx -; Inst 4: shlq %cl, %r9 -; Inst 5: movq %rsi, %rax -; Inst 6: movq %rdx, %rcx -; Inst 7: shlq %cl, %rax -; Inst 8: movl $64, %ecx -; Inst 9: subq %rdx, %rcx -; Inst 10: movq %rdi, %r10 -; Inst 11: shrq %cl, %r10 -; Inst 12: xorq %r8, %r8 -; Inst 13: testq $127, %rdx -; Inst 14: cmovzq %r8, %r10 -; Inst 15: orq %rax, %r10 -; Inst 16: testq $64, %rdx -; Inst 17: movq %r9, %rax -; Inst 18: cmovzq %r10, %rax -; Inst 19: cmovzq %r9, %r8 -; Inst 20: movl $128, %r9d -; Inst 21: subq %rdx, %r9 -; Inst 22: movq %rdi, %rdx -; Inst 23: movq %r9, %rcx -; Inst 24: shrq %cl, %rdx -; Inst 25: movq %rsi, %rdi -; Inst 26: movq %r9, %rcx -; Inst 27: shrq %cl, %rdi -; Inst 28: movl $64, %ecx -; Inst 29: subq %r9, %rcx -; Inst 30: shlq %cl, %rsi -; Inst 31: xorq %rcx, %rcx -; Inst 32: testq $127, %r9 -; Inst 33: cmovzq %rcx, %rsi -; Inst 34: orq %rdx, %rsi -; Inst 35: xorq %rcx, %rcx -; Inst 36: testq $64, %r9 -; Inst 37: cmovzq %rdi, %rcx -; Inst 38: cmovzq %rsi, %rdi -; Inst 39: orq %rdi, %r8 -; Inst 40: orq %rcx, %rax -; Inst 41: movq %rax, %rdx -; Inst 42: movq %r8, %rax -; Inst 43: movq %rbp, %rsp -; Inst 44: popq %rbp -; Inst 45: ret -; }} +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %r9 +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %r9 +; nextln: movq %rsi, %rax +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: movq %rdi, %r10 +; nextln: shrq %cl, %r10 +; nextln: xorq %r8, %r8 +; nextln: testq $$127, %rdx +; nextln: cmovzq %r8, %r10 +; nextln: orq %rax, %r10 +; nextln: testq $$64, %rdx +; nextln: movq %r9, %rax +; nextln: cmovzq %r10, %rax +; nextln: cmovzq %r9, %r8 +; nextln: movl $$128, %r9d +; nextln: subq %rdx, %r9 +; nextln: movq %rdi, %rdx +; nextln: movq %r9, %rcx +; nextln: shrq %cl, %rdx +; nextln: movq %rsi, %rdi +; nextln: movq %r9, %rcx +; nextln: shrq %cl, %rdi +; nextln: movl $$64, %ecx +; nextln: subq %r9, %rcx +; nextln: shlq %cl, %rsi +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %r9 +; nextln: cmovzq %rcx, %rsi +; nextln: orq %rdx, %rsi +; nextln: xorq %rcx, %rcx +; nextln: testq $$64, %r9 +; nextln: cmovzq %rdi, %rcx +; nextln: cmovzq %rsi, %rdi +; nextln: orq %rdi, %r8 +; nextln: orq %rcx, %rax +; nextln: movq %rax, %rdx +; nextln: movq %r8, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + function %f34(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1269,60 +1049,53 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 50) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rax -; Inst 3: movq %rax, %r9 -; Inst 4: movq %rdx, %rcx -; Inst 5: shrq %cl, %r9 -; Inst 6: movq %rdi, %rsi -; Inst 7: movq %rdx, %rcx -; Inst 8: shrq %cl, %rsi -; Inst 9: movl $64, %ecx -; Inst 10: subq %rdx, %rcx -; Inst 11: movq %rax, %r10 -; Inst 12: shlq %cl, %r10 -; Inst 13: xorq %rcx, %rcx -; Inst 14: testq $127, %rdx -; Inst 15: cmovzq %rcx, %r10 -; Inst 16: orq %rsi, %r10 -; Inst 17: xorq %rsi, %rsi -; Inst 18: xorq %r8, %r8 -; Inst 19: movq %rdx, %rcx -; Inst 20: andq $64, %rcx -; Inst 21: cmovzq %r9, %rsi -; Inst 22: cmovzq %r10, %r8 -; Inst 23: cmovnzq %r9, %r8 -; Inst 24: movl $128, %r9d -; Inst 25: subq %rdx, %r9 -; Inst 26: movq %rdi, %rdx -; Inst 27: movq %r9, %rcx -; Inst 28: shlq %cl, %rdx -; Inst 29: movq %r9, %rcx -; Inst 30: shlq %cl, %rax -; Inst 31: movl $64, %ecx -; Inst 32: subq %r9, %rcx -; Inst 33: shrq %cl, %rdi -; Inst 34: xorq %rcx, %rcx -; Inst 35: testq $127, %r9 -; Inst 36: cmovzq %rcx, %rdi -; Inst 37: orq %rax, %rdi -; Inst 38: xorq %rax, %rax -; Inst 39: andq $64, %r9 -; Inst 40: cmovzq %rdi, %rax -; Inst 41: cmovzq %rdx, %rcx -; Inst 42: cmovnzq %rdx, %rax -; Inst 43: orq %r8, %rcx -; Inst 44: orq %rsi, %rax -; Inst 45: movq %rax, %rdx -; Inst 46: movq %rcx, %rax -; Inst 47: movq %rbp, %rsp -; Inst 48: popq %rbp -; Inst 49: ret -; }} - +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rsi, %rax +; nextln: movq %rax, %r9 +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %r9 +; nextln: movq %rdi, %rsi +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rsi +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: movq %rax, %r10 +; nextln: shlq %cl, %r10 +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %r10 +; nextln: orq %rsi, %r10 +; nextln: xorq %rsi, %rsi +; nextln: xorq %r8, %r8 +; nextln: movq %rdx, %rcx +; nextln: andq $$64, %rcx +; nextln: cmovzq %r9, %rsi +; nextln: cmovzq %r10, %r8 +; nextln: cmovnzq %r9, %r8 +; nextln: movl $$128, %r9d +; nextln: subq %rdx, %r9 +; nextln: movq %rdi, %rdx +; nextln: movq %r9, %rcx +; nextln: shlq %cl, %rdx +; nextln: movq %r9, %rcx +; nextln: shlq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %r9, %rcx +; nextln: shrq %cl, %rdi +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %r9 +; nextln: cmovzq %rcx, %rdi +; nextln: orq %rax, %rdi +; nextln: xorq %rax, %rax +; nextln: andq $$64, %r9 +; nextln: cmovzq %rdi, %rax +; nextln: cmovzq %rdx, %rcx +; nextln: cmovnzq %rdx, %rax +; nextln: orq %r8, %rcx +; nextln: orq %rsi, %rax +; nextln: movq %rax, %rdx +; nextln: movq %rcx, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 9dcea1b5dc88..065a3c68d3bb 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -321,22 +321,23 @@ block0(v0: i32): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 15) +; (instruction range: 0 .. 16) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp -; Inst 2: load_const VCodeConstant(0), %xmm0 +; Inst 2: load_const VCodeConstant(0), %xmm3 ; Inst 3: addl $8, %edi -; Inst 4: movd %edi, %xmm2 -; Inst 5: movdqa %xmm0, %xmm1 -; Inst 6: punpcklbw %xmm1, %xmm1 -; Inst 7: psraw %xmm2, %xmm1 -; Inst 8: punpckhbw %xmm0, %xmm0 -; Inst 9: psraw %xmm2, %xmm0 -; Inst 10: packsswb %xmm0, %xmm1 -; Inst 11: movdqa %xmm1, %xmm0 -; Inst 12: movq %rbp, %rsp -; Inst 13: popq %rbp -; Inst 14: ret +; Inst 4: movd %edi, %xmm1 +; Inst 5: movdqa %xmm3, %xmm2 +; Inst 6: punpcklbw %xmm3, %xmm2 +; Inst 7: psraw %xmm1, %xmm2 +; Inst 8: movdqa %xmm3, %xmm0 +; Inst 9: punpckhbw %xmm3, %xmm0 +; Inst 10: psraw %xmm1, %xmm0 +; Inst 11: packsswb %xmm0, %xmm2 +; Inst 12: movdqa %xmm2, %xmm0 +; Inst 13: movq %rbp, %rsp +; Inst 14: popq %rbp +; Inst 15: ret ; }} function %sshr_i8x16_imm(i8x16, i32) -> i8x16 { @@ -349,19 +350,21 @@ block0(v0: i8x16, v1: i32): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 12) +; (instruction range: 0 .. 14) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp -; Inst 2: movdqa %xmm0, %xmm1 -; Inst 3: movdqa %xmm1, %xmm0 -; Inst 4: punpcklbw %xmm0, %xmm0 -; Inst 5: psraw $11, %xmm0 -; Inst 6: punpckhbw %xmm1, %xmm1 -; Inst 7: psraw $11, %xmm1 -; Inst 8: packsswb %xmm1, %xmm0 -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret +; Inst 2: movdqa %xmm0, %xmm2 +; Inst 3: movdqa %xmm2, %xmm1 +; Inst 4: punpcklbw %xmm2, %xmm1 +; Inst 5: psraw $11, %xmm1 +; Inst 6: movdqa %xmm2, %xmm0 +; Inst 7: punpckhbw %xmm2, %xmm0 +; Inst 8: psraw $11, %xmm0 +; Inst 9: packsswb %xmm0, %xmm1 +; Inst 10: movdqa %xmm1, %xmm0 +; Inst 11: movq %rbp, %rsp +; Inst 12: popq %rbp +; Inst 13: ret ; }} function %sshr_i64x2(i64x2, i32) -> i64x2 { @@ -374,21 +377,20 @@ block0(v0: i64x2, v1: i32): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 15) +; (instruction range: 0 .. 14) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp -; Inst 2: movdqa %xmm0, %xmm1 -; Inst 3: pextrd.w $0, %xmm0, %rsi -; Inst 4: pextrd.w $1, %xmm0, %rax -; Inst 5: movq %rdi, %rcx -; Inst 6: sarq %cl, %rsi -; Inst 7: movq %rdi, %rcx -; Inst 8: sarq %cl, %rax -; Inst 9: pinsrd.w $0, %rsi, %xmm1 -; Inst 10: pinsrd.w $1, %rax, %xmm1 -; Inst 11: movdqa %xmm1, %xmm0 -; Inst 12: movq %rbp, %rsp -; Inst 13: popq %rbp -; Inst 14: ret +; Inst 2: pextrd.w $0, %xmm0, %rsi +; Inst 3: pextrd.w $1, %xmm0, %rax +; Inst 4: movq %rdi, %rcx +; Inst 5: sarq %cl, %rsi +; Inst 6: movq %rdi, %rcx +; Inst 7: sarq %cl, %rax +; Inst 8: uninit %xmm0 +; Inst 9: pinsrd.w $0, %rsi, %xmm0 +; Inst 10: pinsrd.w $1, %rax, %xmm0 +; Inst 11: movq %rbp, %rsp +; Inst 12: popq %rbp +; Inst 13: ret ; }} diff --git a/cranelift/filetests/src/runner.rs b/cranelift/filetests/src/runner.rs index d3fa7595373d..f844f4c38819 100644 --- a/cranelift/filetests/src/runner.rs +++ b/cranelift/filetests/src/runner.rs @@ -13,7 +13,7 @@ use std::path::{Path, PathBuf}; use std::time; /// Timeout in seconds when we're not making progress. -const TIMEOUT_PANIC: usize = 10; +const TIMEOUT_PANIC: usize = 60; /// Timeout for reporting slow tests without panicking. const TIMEOUT_SLOW: usize = 3;